X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/2d21ac55c334faf3a56e5634905ed6987fc787d4..ecc0ceb4089d506a0b8d16686a95817b331af9cb:/osfmk/vm/vm_map.c diff --git a/osfmk/vm/vm_map.c b/osfmk/vm/vm_map.c index f20b587c1..840c4babf 100644 --- a/osfmk/vm/vm_map.c +++ b/osfmk/vm/vm_map.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -65,6 +65,9 @@ #include #include + +#include + #include #include @@ -84,30 +87,30 @@ #include #include +#include #include #include #include #include #include +#include #include #include #include #include -#include -#include #include #include #include #include - -#ifdef ppc -#include -#endif /* ppc */ +#include #include #include +#include + +extern u_int32_t random(void); /* from */ /* Internal prototypes */ @@ -123,7 +126,7 @@ static boolean_t vm_map_range_check( vm_map_entry_t *entry); static vm_map_entry_t _vm_map_entry_create( - struct vm_map_header *map_header); + struct vm_map_header *map_header, boolean_t map_locked); static void _vm_map_entry_dispose( struct vm_map_header *map_header, @@ -162,7 +165,8 @@ static kern_return_t vm_map_copy_overwrite_unaligned( vm_map_t dst_map, vm_map_entry_t entry, vm_map_copy_t copy, - vm_map_address_t start); + vm_map_address_t start, + boolean_t discard_on_success); static kern_return_t vm_map_copy_overwrite_aligned( vm_map_t dst_map, @@ -182,7 +186,8 @@ static kern_return_t vm_map_copyout_kernel_buffer( vm_map_t map, vm_map_address_t *addr, /* IN/OUT */ vm_map_copy_t copy, - boolean_t overwrite); + boolean_t overwrite, + boolean_t consume_on_success); static void vm_map_fork_share( vm_map_t old_map, @@ -205,16 +210,18 @@ void vm_map_region_walk( vm_object_offset_t offset, vm_object_size_t range, vm_region_extended_info_t extended, - boolean_t look_for_pages); + boolean_t look_for_pages, + mach_msg_type_number_t count); static kern_return_t vm_map_wire_nested( vm_map_t map, vm_map_offset_t start, vm_map_offset_t end, - vm_prot_t access_type, + vm_prot_t caller_prot, boolean_t user_wire, pmap_t map_pmap, - vm_map_offset_t pmap_addr); + vm_map_offset_t pmap_addr, + ppnum_t *physpage_p); static kern_return_t vm_map_unwire_nested( vm_map_t map, @@ -234,7 +241,8 @@ static kern_return_t vm_map_copy_overwrite_nested( vm_map_offset_t dst_addr, vm_map_copy_t copy, boolean_t interruptible, - pmap_t pmap); + pmap_t pmap, + boolean_t discard_on_success); static kern_return_t vm_map_remap_extract( vm_map_t map, @@ -252,7 +260,7 @@ static kern_return_t vm_map_remap_range_allocate( vm_map_address_t *address, vm_map_size_t size, vm_map_offset_t mask, - boolean_t anywhere, + int flags, vm_map_entry_t *map_entry); static void vm_map_region_look_for_page( @@ -262,12 +270,41 @@ static void vm_map_region_look_for_page( vm_object_offset_t offset, int max_refcnt, int depth, - vm_region_extended_info_t extended); + vm_region_extended_info_t extended, + mach_msg_type_number_t count); static int vm_map_region_count_obj_refs( vm_map_entry_t entry, vm_object_t object); + +static kern_return_t vm_map_willneed( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end); + +static kern_return_t vm_map_reuse_pages( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end); + +static kern_return_t vm_map_reusable_pages( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end); + +static kern_return_t vm_map_can_reuse( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end); + +#if MACH_ASSERT +static kern_return_t vm_map_pageout( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end); +#endif /* MACH_ASSERT */ + /* * Macros to copy a vm_map_entry. We must be careful to correctly * manage the wired page count. vm_map_entry_copy() creates a new @@ -277,17 +314,30 @@ static int vm_map_region_count_obj_refs( * wire count; it's used for map splitting and zone changing in * vm_map_copyout. */ -#define vm_map_entry_copy(NEW,OLD) \ -MACRO_BEGIN \ + +#define vm_map_entry_copy(NEW,OLD) \ +MACRO_BEGIN \ +boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \ *(NEW) = *(OLD); \ (NEW)->is_shared = FALSE; \ (NEW)->needs_wakeup = FALSE; \ (NEW)->in_transition = FALSE; \ (NEW)->wired_count = 0; \ (NEW)->user_wired_count = 0; \ + (NEW)->permanent = FALSE; \ + (NEW)->used_for_jit = FALSE; \ + (NEW)->from_reserved_zone = _vmec_reserved; \ + (NEW)->iokit_acct = FALSE; \ + (NEW)->vme_resilient_codesign = FALSE; \ + (NEW)->vme_resilient_media = FALSE; \ MACRO_END -#define vm_map_entry_copy_full(NEW,OLD) (*(NEW) = *(OLD)) +#define vm_map_entry_copy_full(NEW,OLD) \ +MACRO_BEGIN \ +boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \ +(*(NEW) = *(OLD)); \ +(NEW)->from_reserved_zone = _vmecf_reserved; \ +MACRO_END /* * Decide if we want to allow processes to execute from their data or stack areas. @@ -311,7 +361,12 @@ MACRO_END * execute from a page that lacks execute permission. * * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the - * default behavior for both 32 and 64 bit apps on a system-wide basis. + * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore, + * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow + * execution from data areas for a particular binary even if the arch normally permits it. As + * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit + * to support some complicated use cases, notably browsers with out-of-process plugins that + * are not all NX-safe. */ extern int allow_data_exec, allow_stack_exec; @@ -321,6 +376,8 @@ override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */ { int current_abi; + if (map->pmap == kernel_pmap) return FALSE; + /* * Determine if the app is running in 32 or 64 bit mode. */ @@ -338,7 +395,7 @@ override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */ if (user_tag == VM_MEMORY_STACK) return allow_stack_exec & current_abi; - return allow_data_exec & current_abi; + return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE); } @@ -387,8 +444,10 @@ override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */ static zone_t vm_map_zone; /* zone for vm_map structures */ static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */ -static zone_t vm_map_kentry_zone; /* zone for kernel entry structures */ +static zone_t vm_map_entry_reserved_zone; /* zone with reserve for non-blocking + * allocations */ static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */ +zone_t vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */ /* @@ -399,177 +458,429 @@ static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */ vm_object_t vm_submap_object; -/* - * vm_map_init: - * - * Initialize the vm_map module. Must be called before - * any other vm_map routines. - * - * Map and entry structures are allocated from zones -- we must - * initialize those zones. - * - * There are three zones of interest: - * - * vm_map_zone: used to allocate maps. - * vm_map_entry_zone: used to allocate map entries. - * vm_map_kentry_zone: used to allocate map entries for the kernel. - * - * The kernel allocates map entries from a special zone that is initially - * "crammed" with memory. It would be difficult (perhaps impossible) for - * the kernel to allocate more memory to a entry zone when it became - * empty since the very act of allocating memory implies the creation - * of a new entry. - */ - static void *map_data; -static vm_map_size_t map_data_size; +static vm_size_t map_data_size; static void *kentry_data; -static vm_map_size_t kentry_data_size; -static int kentry_count = 2048; /* to init kentry_data_size */ - -#define NO_COALESCE_LIMIT (1024 * 128) +static vm_size_t kentry_data_size; +static void *map_holes_data; +static vm_size_t map_holes_data_size; +#define NO_COALESCE_LIMIT ((1024 * 128) - 1) /* Skip acquiring locks if we're in the midst of a kernel core dump */ -extern unsigned int not_in_kdp; +unsigned int not_in_kdp = 1; + +unsigned int vm_map_set_cache_attr_count = 0; -#ifdef __i386__ kern_return_t -vm_map_apple_protected( +vm_map_set_cache_attr( vm_map_t map, - vm_map_offset_t start, - vm_map_offset_t end) + vm_map_offset_t va) { - boolean_t map_locked; - kern_return_t kr; vm_map_entry_t map_entry; - memory_object_t protected_mem_obj; - vm_object_t protected_object; - vm_map_offset_t map_addr; + vm_object_t object; + kern_return_t kr = KERN_SUCCESS; vm_map_lock_read(map); - map_locked = TRUE; - /* lookup the protected VM object */ - if (!vm_map_lookup_entry(map, - start, - &map_entry) || - map_entry->vme_end != end || + if (!vm_map_lookup_entry(map, va, &map_entry) || map_entry->is_sub_map) { - /* that memory is not properly mapped */ - kr = KERN_INVALID_ARGUMENT; - goto done; - } - protected_object = map_entry->object.vm_object; - if (protected_object == VM_OBJECT_NULL) { - /* there should be a VM object here at this point */ + /* + * that memory is not properly mapped + */ kr = KERN_INVALID_ARGUMENT; goto done; } + object = VME_OBJECT(map_entry); - /* - * Lookup (and create if necessary) the protected memory object - * matching that VM object. - * If successful, this also grabs a reference on the memory object, - * to guarantee that it doesn't go away before we get a chance to map - * it. - */ - - protected_mem_obj = apple_protect_pager_setup(protected_object); - if (protected_mem_obj == NULL) { - kr = KERN_FAILURE; + if (object == VM_OBJECT_NULL) { + /* + * there should be a VM object here at this point + */ + kr = KERN_INVALID_ARGUMENT; goto done; } + vm_object_lock(object); + object->set_cache_attr = TRUE; + vm_object_unlock(object); + vm_map_set_cache_attr_count++; +done: vm_map_unlock_read(map); + + return kr; +} + + +#if CONFIG_CODE_DECRYPTION +/* + * vm_map_apple_protected: + * This remaps the requested part of the object with an object backed by + * the decrypting pager. + * crypt_info contains entry points and session data for the crypt module. + * The crypt_info block will be copied by vm_map_apple_protected. The data structures + * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called. + */ +kern_return_t +vm_map_apple_protected( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end, + vm_object_offset_t crypto_backing_offset, + struct pager_crypt_info *crypt_info) +{ + boolean_t map_locked; + kern_return_t kr; + vm_map_entry_t map_entry; + struct vm_map_entry tmp_entry; + memory_object_t unprotected_mem_obj; + vm_object_t protected_object; + vm_map_offset_t map_addr; + vm_map_offset_t start_aligned, end_aligned; + vm_object_offset_t crypto_start, crypto_end; + int vm_flags; + map_locked = FALSE; + unprotected_mem_obj = MEMORY_OBJECT_NULL; - /* map this memory object in place of the current one */ - map_addr = start; - kr = vm_map_enter_mem_object(map, - &map_addr, - end - start, - (mach_vm_offset_t) 0, - VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, - (ipc_port_t) protected_mem_obj, - (map_entry->offset + - (start - map_entry->vme_start)), - TRUE, - map_entry->protection, - map_entry->max_protection, - map_entry->inheritance); - assert(map_addr == start); - if (kr == KERN_SUCCESS) { - /* let the pager know that this mem_obj is mapped */ - apple_protect_pager_map(protected_mem_obj); + start_aligned = vm_map_trunc_page(start, PAGE_MASK_64); + end_aligned = vm_map_round_page(end, PAGE_MASK_64); + start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map)); + end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map)); + + assert(start_aligned == start); + assert(end_aligned == end); + + map_addr = start_aligned; + for (map_addr = start_aligned; + map_addr < end; + map_addr = tmp_entry.vme_end) { + vm_map_lock(map); + map_locked = TRUE; + + /* lookup the protected VM object */ + if (!vm_map_lookup_entry(map, + map_addr, + &map_entry) || + map_entry->is_sub_map || + VME_OBJECT(map_entry) == VM_OBJECT_NULL || + !(map_entry->protection & VM_PROT_EXECUTE)) { + /* that memory is not properly mapped */ + kr = KERN_INVALID_ARGUMENT; + goto done; + } + + /* get the protected object to be decrypted */ + protected_object = VME_OBJECT(map_entry); + if (protected_object == VM_OBJECT_NULL) { + /* there should be a VM object here at this point */ + kr = KERN_INVALID_ARGUMENT; + goto done; + } + /* ensure protected object stays alive while map is unlocked */ + vm_object_reference(protected_object); + + /* limit the map entry to the area we want to cover */ + vm_map_clip_start(map, map_entry, start_aligned); + vm_map_clip_end(map, map_entry, end_aligned); + + tmp_entry = *map_entry; + map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */ + vm_map_unlock(map); + map_locked = FALSE; + + /* + * This map entry might be only partially encrypted + * (if not fully "page-aligned"). + */ + crypto_start = 0; + crypto_end = tmp_entry.vme_end - tmp_entry.vme_start; + if (tmp_entry.vme_start < start) { + if (tmp_entry.vme_start != start_aligned) { + kr = KERN_INVALID_ADDRESS; + } + crypto_start += (start - tmp_entry.vme_start); + } + if (tmp_entry.vme_end > end) { + if (tmp_entry.vme_end != end_aligned) { + kr = KERN_INVALID_ADDRESS; + } + crypto_end -= (tmp_entry.vme_end - end); + } + + /* + * This "extra backing offset" is needed to get the decryption + * routine to use the right key. It adjusts for the possibly + * relative offset of an interposed "4K" pager... + */ + if (crypto_backing_offset == (vm_object_offset_t) -1) { + crypto_backing_offset = VME_OFFSET(&tmp_entry); + } + + /* + * Lookup (and create if necessary) the protected memory object + * matching that VM object. + * If successful, this also grabs a reference on the memory object, + * to guarantee that it doesn't go away before we get a chance to map + * it. + */ + unprotected_mem_obj = apple_protect_pager_setup( + protected_object, + VME_OFFSET(&tmp_entry), + crypto_backing_offset, + crypt_info, + crypto_start, + crypto_end); + + /* release extra ref on protected object */ + vm_object_deallocate(protected_object); + + if (unprotected_mem_obj == NULL) { + kr = KERN_FAILURE; + goto done; + } + + vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE; + + /* map this memory object in place of the current one */ + map_addr = tmp_entry.vme_start; + kr = vm_map_enter_mem_object(map, + &map_addr, + (tmp_entry.vme_end - + tmp_entry.vme_start), + (mach_vm_offset_t) 0, + vm_flags, + (ipc_port_t) unprotected_mem_obj, + 0, + TRUE, + tmp_entry.protection, + tmp_entry.max_protection, + tmp_entry.inheritance); + assert(kr == KERN_SUCCESS); + assert(map_addr == tmp_entry.vme_start); + +#if VM_MAP_DEBUG_APPLE_PROTECT + printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p: " + "backing:[object:%p,offset:0x%llx," + "crypto_backing_offset:0x%llx," + "crypto_start:0x%llx,crypto_end:0x%llx]\n", + map, + (uint64_t) map_addr, + (uint64_t) (map_addr + (tmp_entry.vme_end - + tmp_entry.vme_start)), + unprotected_mem_obj, + protected_object, + VME_OFFSET(&tmp_entry), + crypto_backing_offset, + crypto_start, + crypto_end); +#endif /* VM_MAP_DEBUG_APPLE_PROTECT */ + + /* + * Release the reference obtained by + * apple_protect_pager_setup(). + * The mapping (if it succeeded) is now holding a reference on + * the memory object. + */ + memory_object_deallocate(unprotected_mem_obj); + unprotected_mem_obj = MEMORY_OBJECT_NULL; + + /* continue with next map entry */ + crypto_backing_offset += (tmp_entry.vme_end - + tmp_entry.vme_start); + crypto_backing_offset -= crypto_start; } - /* - * Release the reference obtained by apple_protect_pager_setup(). - * The mapping (if it succeeded) is now holding a reference on the - * memory object. - */ - memory_object_deallocate(protected_mem_obj); + kr = KERN_SUCCESS; done: if (map_locked) { - vm_map_unlock_read(map); + vm_map_unlock(map); } return kr; } -#endif /* __i386__ */ +#endif /* CONFIG_CODE_DECRYPTION */ + +lck_grp_t vm_map_lck_grp; +lck_grp_attr_t vm_map_lck_grp_attr; +lck_attr_t vm_map_lck_attr; +lck_attr_t vm_map_lck_rw_attr; + +/* + * vm_map_init: + * + * Initialize the vm_map module. Must be called before + * any other vm_map routines. + * + * Map and entry structures are allocated from zones -- we must + * initialize those zones. + * + * There are three zones of interest: + * + * vm_map_zone: used to allocate maps. + * vm_map_entry_zone: used to allocate map entries. + * vm_map_entry_reserved_zone: fallback zone for kernel map entries + * + * The kernel allocates map entries from a special zone that is initially + * "crammed" with memory. It would be difficult (perhaps impossible) for + * the kernel to allocate more memory to a entry zone when it became + * empty since the very act of allocating memory implies the creation + * of a new entry. + */ void vm_map_init( void) { + vm_size_t entry_zone_alloc_size; + const char *mez_name = "VM map entries"; + vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024, PAGE_SIZE, "maps"); - + zone_change(vm_map_zone, Z_NOENCRYPT, TRUE); +#if defined(__LP64__) + entry_zone_alloc_size = PAGE_SIZE * 5; +#else + entry_zone_alloc_size = PAGE_SIZE * 6; +#endif vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry), - 1024*1024, PAGE_SIZE*5, - "non-kernel map entries"); + 1024*1024, entry_zone_alloc_size, + mez_name); + zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE); + zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE); + zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE); - vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry), - kentry_data_size, kentry_data_size, - "kernel map entries"); + vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry), + kentry_data_size * 64, kentry_data_size, + "Reserved VM map entries"); + zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE); vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy), - 16*1024, PAGE_SIZE, "map copies"); + 16*1024, PAGE_SIZE, "VM map copies"); + zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE); + + vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links), + 16*1024, PAGE_SIZE, "VM map holes"); + zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE); /* * Cram the map and kentry zones with initial data. - * Set kentry_zone non-collectible to aid zone_gc(). + * Set reserved_zone non-collectible to aid zone_gc(). */ zone_change(vm_map_zone, Z_COLLECT, FALSE); - zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE); - zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE); - zcram(vm_map_zone, map_data, map_data_size); - zcram(vm_map_kentry_zone, kentry_data, kentry_data_size); + + zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE); + zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE); + zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE); + zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE); + zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */ + zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */ + zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE); + + zone_change(vm_map_holes_zone, Z_COLLECT, TRUE); + zone_change(vm_map_holes_zone, Z_EXPAND, TRUE); + zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE); + zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE); + zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE); + zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE); + + /* + * Add the stolen memory to zones, adjust zone size and stolen counts. + */ + zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size); + zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size); + zcram(vm_map_holes_zone, (vm_offset_t)map_holes_data, map_holes_data_size); + VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size)); + + lck_grp_attr_setdefault(&vm_map_lck_grp_attr); + lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr); + lck_attr_setdefault(&vm_map_lck_attr); + + lck_attr_setdefault(&vm_map_lck_rw_attr); + lck_attr_cleardebug(&vm_map_lck_rw_attr); + +#if CONFIG_FREEZE + default_freezer_init(); +#endif /* CONFIG_FREEZE */ } void vm_map_steal_memory( void) { - map_data_size = vm_map_round_page(10 * sizeof(struct _vm_map)); + uint32_t kentry_initial_pages; + + map_data_size = round_page(10 * sizeof(struct _vm_map)); map_data = pmap_steal_memory(map_data_size); -#if 0 /* - * Limiting worst case: vm_map_kentry_zone needs to map each "available" - * physical page (i.e. that beyond the kernel image and page tables) - * individually; we guess at most one entry per eight pages in the - * real world. This works out to roughly .1 of 1% of physical memory, - * or roughly 1900 entries (64K) for a 64M machine with 4K pages. + * kentry_initial_pages corresponds to the number of kernel map entries + * required during bootstrap until the asynchronous replenishment + * scheme is activated and/or entries are available from the general + * map entry pool. */ +#if defined(__LP64__) + kentry_initial_pages = 10; +#else + kentry_initial_pages = 6; #endif - kentry_count = pmap_free_pages() / 8; +#if CONFIG_GZALLOC + /* If using the guard allocator, reserve more memory for the kernel + * reserved map entry pool. + */ + if (gzalloc_enabled()) + kentry_initial_pages *= 1024; +#endif - kentry_data_size = - vm_map_round_page(kentry_count * sizeof(struct vm_map_entry)); + kentry_data_size = kentry_initial_pages * PAGE_SIZE; kentry_data = pmap_steal_memory(kentry_data_size); + + map_holes_data_size = kentry_data_size; + map_holes_data = pmap_steal_memory(map_holes_data_size); +} + +void +vm_kernel_reserved_entry_init(void) { + zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry)); + zone_prio_refill_configure(vm_map_holes_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_links)); +} + +void +vm_map_disable_hole_optimization(vm_map_t map) +{ + vm_map_entry_t head_entry, hole_entry, next_hole_entry; + + if (map->holelistenabled) { + + head_entry = hole_entry = (vm_map_entry_t) map->holes_list; + + while (hole_entry != NULL) { + + next_hole_entry = hole_entry->vme_next; + + hole_entry->vme_next = NULL; + hole_entry->vme_prev = NULL; + zfree(vm_map_holes_zone, hole_entry); + + if (next_hole_entry == head_entry) { + hole_entry = NULL; + } else { + hole_entry = next_hole_entry; + } + } + + map->holes_list = NULL; + map->holelistenabled = FALSE; + + map->first_free = vm_map_first_entry(map); + SAVE_HINT_HOLE_WRITE(map, NULL); + } +} + +boolean_t +vm_kernel_map_is_kernel(vm_map_t map) { + return (map->pmap == kernel_pmap); } /* @@ -579,6 +890,9 @@ vm_map_steal_memory( * the given physical map structure, and having * the given lower and upper address bounds. */ + +boolean_t vm_map_supports_hole_optimization = TRUE; + vm_map_t vm_map_create( pmap_t pmap, @@ -588,6 +902,7 @@ vm_map_create( { static int color_seed = 0; register vm_map_t result; + struct vm_map_links *hole_entry = NULL; result = (vm_map_t) zalloc(vm_map_zone); if (result == VM_MAP_NULL) @@ -598,6 +913,10 @@ vm_map_create( result->hdr.nentries = 0; result->hdr.entries_pageable = pageable; + vm_map_store_init( &(result->hdr) ); + + result->hdr.page_shift = PAGE_SHIFT; + result->size = 0; result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */ result->user_wire_size = 0; @@ -611,14 +930,37 @@ vm_map_create( result->max_offset = max; result->wiring_required = FALSE; result->no_zero_fill = FALSE; - result->mapped = FALSE; + result->mapped_in_other_pmaps = FALSE; result->wait_for_space = FALSE; + result->switch_protect = FALSE; + result->disable_vmentry_reuse = FALSE; + result->map_disallow_data_exec = FALSE; + result->highest_entry_end = 0; result->first_free = vm_map_to_entry(result); result->hint = vm_map_to_entry(result); result->color_rr = (color_seed++) & vm_color_mask; - vm_map_lock_init(result); - mutex_init(&result->s_lock, 0); + result->jit_entry_exists = FALSE; + + if (vm_map_supports_hole_optimization && pmap != kernel_pmap) { + hole_entry = zalloc(vm_map_holes_zone); + + hole_entry->start = min; + hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS; + result->holes_list = result->hole_hint = hole_entry; + hole_entry->prev = hole_entry->next = (vm_map_entry_t) hole_entry; + result->holelistenabled = TRUE; + + } else { + + result->holelistenabled = FALSE; + } +#if CONFIG_FREEZE + result->default_freezer_handle = NULL; +#endif + vm_map_lock_init(result); + lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr); + return(result); } @@ -628,28 +970,47 @@ vm_map_create( * Allocates a VM map entry for insertion in the * given map (or map copy). No fields are filled. */ -#define vm_map_entry_create(map) \ - _vm_map_entry_create(&(map)->hdr) +#define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked) -#define vm_map_copy_entry_create(copy) \ - _vm_map_entry_create(&(copy)->cpy_hdr) +#define vm_map_copy_entry_create(copy, map_locked) \ + _vm_map_entry_create(&(copy)->cpy_hdr, map_locked) +unsigned reserved_zalloc_count, nonreserved_zalloc_count; static vm_map_entry_t _vm_map_entry_create( - register struct vm_map_header *map_header) + struct vm_map_header *map_header, boolean_t __unused map_locked) { - register zone_t zone; - register vm_map_entry_t entry; + zone_t zone; + vm_map_entry_t entry; - if (map_header->entries_pageable) - zone = vm_map_entry_zone; - else - zone = vm_map_kentry_zone; + zone = vm_map_entry_zone; + + assert(map_header->entries_pageable ? !map_locked : TRUE); + + if (map_header->entries_pageable) { + entry = (vm_map_entry_t) zalloc(zone); + } + else { + entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE); + + if (entry == VM_MAP_ENTRY_NULL) { + zone = vm_map_entry_reserved_zone; + entry = (vm_map_entry_t) zalloc(zone); + OSAddAtomic(1, &reserved_zalloc_count); + } else + OSAddAtomic(1, &nonreserved_zalloc_count); + } - entry = (vm_map_entry_t) zalloc(zone); if (entry == VM_MAP_ENTRY_NULL) panic("vm_map_entry_create"); + entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone); + vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE); +#if MAP_ENTRY_CREATION_DEBUG + entry->vme_creation_maphdr = map_header; + fastbacktrace(&entry->vme_creation_bt[0], + (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t))); +#endif return(entry); } @@ -663,13 +1024,7 @@ _vm_map_entry_create( * of the stores */ #define vm_map_entry_dispose(map, entry) \ - MACRO_BEGIN \ - if((entry) == (map)->first_free) \ - (map)->first_free = vm_map_to_entry(map); \ - if((entry) == (map)->hint) \ - (map)->hint = vm_map_to_entry(map); \ - _vm_map_entry_dispose(&(map)->hdr, (entry)); \ - MACRO_END + _vm_map_entry_dispose(&(map)->hdr, (entry)) #define vm_map_copy_entry_dispose(map, entry) \ _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry)) @@ -681,125 +1036,40 @@ _vm_map_entry_dispose( { register zone_t zone; - if (map_header->entries_pageable) + if (map_header->entries_pageable || !(entry->from_reserved_zone)) zone = vm_map_entry_zone; else - zone = vm_map_kentry_zone; + zone = vm_map_entry_reserved_zone; + + if (!map_header->entries_pageable) { + if (zone == vm_map_entry_zone) + OSAddAtomic(-1, &nonreserved_zalloc_count); + else + OSAddAtomic(-1, &reserved_zalloc_count); + } zfree(zone, entry); } #if MACH_ASSERT -static boolean_t first_free_is_valid(vm_map_t map); /* forward */ static boolean_t first_free_check = FALSE; -static boolean_t +boolean_t first_free_is_valid( vm_map_t map) { - vm_map_entry_t entry, next; - if (!first_free_check) return TRUE; - entry = vm_map_to_entry(map); - next = entry->vme_next; - while (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_end) || - (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_start) && - next != vm_map_to_entry(map))) { - entry = next; - next = entry->vme_next; - if (entry == vm_map_to_entry(map)) - break; - } - if (map->first_free != entry) { - printf("Bad first_free for map %p: %p should be %p\n", - map, map->first_free, entry); - return FALSE; - } - return TRUE; + return( first_free_is_valid_store( map )); } #endif /* MACH_ASSERT */ -/* - * UPDATE_FIRST_FREE: - * - * Updates the map->first_free pointer to the - * entry immediately before the first hole in the map. - * The map should be locked. - */ -#define UPDATE_FIRST_FREE(map, new_first_free) \ - MACRO_BEGIN \ - vm_map_t UFF_map; \ - vm_map_entry_t UFF_first_free; \ - vm_map_entry_t UFF_next_entry; \ - UFF_map = (map); \ - UFF_first_free = (new_first_free); \ - UFF_next_entry = UFF_first_free->vme_next; \ - while (vm_map_trunc_page(UFF_next_entry->vme_start) == \ - vm_map_trunc_page(UFF_first_free->vme_end) || \ - (vm_map_trunc_page(UFF_next_entry->vme_start) == \ - vm_map_trunc_page(UFF_first_free->vme_start) && \ - UFF_next_entry != vm_map_to_entry(UFF_map))) { \ - UFF_first_free = UFF_next_entry; \ - UFF_next_entry = UFF_first_free->vme_next; \ - if (UFF_first_free == vm_map_to_entry(UFF_map)) \ - break; \ - } \ - UFF_map->first_free = UFF_first_free; \ - assert(first_free_is_valid(UFF_map)); \ - MACRO_END - -/* - * vm_map_entry_{un,}link: - * - * Insert/remove entries from maps (or map copies). - */ -#define vm_map_entry_link(map, after_where, entry) \ - MACRO_BEGIN \ - vm_map_t VMEL_map; \ - vm_map_entry_t VMEL_entry; \ - VMEL_map = (map); \ - VMEL_entry = (entry); \ - _vm_map_entry_link(&VMEL_map->hdr, after_where, VMEL_entry); \ - UPDATE_FIRST_FREE(VMEL_map, VMEL_map->first_free); \ - MACRO_END - #define vm_map_copy_entry_link(copy, after_where, entry) \ - _vm_map_entry_link(&(copy)->cpy_hdr, after_where, (entry)) - -#define _vm_map_entry_link(hdr, after_where, entry) \ - MACRO_BEGIN \ - (hdr)->nentries++; \ - (entry)->vme_prev = (after_where); \ - (entry)->vme_next = (after_where)->vme_next; \ - (entry)->vme_prev->vme_next = (entry)->vme_next->vme_prev = (entry); \ - MACRO_END - -#define vm_map_entry_unlink(map, entry) \ - MACRO_BEGIN \ - vm_map_t VMEU_map; \ - vm_map_entry_t VMEU_entry; \ - vm_map_entry_t VMEU_first_free; \ - VMEU_map = (map); \ - VMEU_entry = (entry); \ - if (VMEU_entry->vme_start <= VMEU_map->first_free->vme_start) \ - VMEU_first_free = VMEU_entry->vme_prev; \ - else \ - VMEU_first_free = VMEU_map->first_free; \ - _vm_map_entry_unlink(&VMEU_map->hdr, VMEU_entry); \ - UPDATE_FIRST_FREE(VMEU_map, VMEU_first_free); \ - MACRO_END + _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry)) #define vm_map_copy_entry_unlink(copy, entry) \ - _vm_map_entry_unlink(&(copy)->cpy_hdr, (entry)) - -#define _vm_map_entry_unlink(hdr, entry) \ - MACRO_BEGIN \ - (hdr)->nentries--; \ - (entry)->vme_next->vme_prev = (entry)->vme_prev; \ - (entry)->vme_prev->vme_next = (entry)->vme_next; \ - MACRO_END + _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry)) #if MACH_ASSERT && TASK_SWAPPER /* @@ -817,10 +1087,10 @@ void vm_map_res_reference(register vm_map_t map) assert(map->res_count >= 0); assert(map->ref_count >= map->res_count); if (map->res_count == 0) { - mutex_unlock(&map->s_lock); + lck_mtx_unlock(&map->s_lock); vm_map_lock(map); vm_map_swapin(map); - mutex_lock(&map->s_lock); + lck_mtx_lock(&map->s_lock); ++map->res_count; vm_map_unlock(map); } else @@ -838,12 +1108,12 @@ void vm_map_res_reference(register vm_map_t map) void vm_map_reference_swap(register vm_map_t map) { assert(map != VM_MAP_NULL); - mutex_lock(&map->s_lock); + lck_mtx_lock(&map->s_lock); assert(map->res_count >= 0); assert(map->ref_count >= map->res_count); map->ref_count++; vm_map_res_reference(map); - mutex_unlock(&map->s_lock); + lck_mtx_unlock(&map->s_lock); } /* @@ -860,11 +1130,11 @@ void vm_map_res_deallocate(register vm_map_t map) { assert(map->res_count > 0); if (--map->res_count == 0) { - mutex_unlock(&map->s_lock); + lck_mtx_unlock(&map->s_lock); vm_map_lock(map); vm_map_swapout(map); vm_map_unlock(map); - mutex_lock(&map->s_lock); + lck_mtx_lock(&map->s_lock); } assert(map->ref_count >= map->res_count); } @@ -882,26 +1152,23 @@ vm_map_destroy( { vm_map_lock(map); + /* final cleanup: no need to unnest shared region */ + flags |= VM_MAP_REMOVE_NO_UNNESTING; + /* clean up regular map entries */ (void) vm_map_delete(map, map->min_offset, map->max_offset, flags, VM_MAP_NULL); /* clean up leftover special mappings (commpage, etc...) */ -#ifdef __ppc__ - /* - * PPC51: ppc64 is limited to 51-bit addresses. - * Memory beyond this 51-bit limit is mapped specially at the - * pmap level, so do not interfere. - * On PPC64, the commpage is mapped beyond the addressable range - * via a special pmap hack, so ask pmap to clean it explicitly... - */ - if (map->pmap) { - pmap_unmap_sharedpage(map->pmap); - } - /* ... and do not let regular pmap cleanup apply here */ - flags |= VM_MAP_REMOVE_NO_PMAP_CLEANUP; -#endif /* __ppc__ */ (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL, flags, VM_MAP_NULL); + +#if CONFIG_FREEZE + if (map->default_freezer_handle) { + default_freezer_handle_deallocate(map->default_freezer_handle); + map->default_freezer_handle = NULL; + } +#endif + vm_map_disable_hole_optimization(map); vm_map_unlock(map); assert(map->hdr.nentries == 0); @@ -1007,14 +1274,14 @@ void vm_map_swapin (vm_map_t map) entry = vm_map_first_entry(map); while (entry != vm_map_to_entry(map)) { - if (entry->object.vm_object != VM_OBJECT_NULL) { + if (VME_OBJECT(entry) != VM_OBJECT_NULL) { if (entry->is_sub_map) { - vm_map_t lmap = entry->object.sub_map; - mutex_lock(&lmap->s_lock); + vm_map_t lmap = VME_SUBMAP(entry); + lck_mtx_lock(&lmap->s_lock); vm_map_res_reference(lmap); - mutex_unlock(&lmap->s_lock); + lck_mtx_unlock(&lmap->s_lock); } else { - vm_object_t object = entry->object.vm_object; + vm_object_t object = VME_OBEJCT(entry); vm_object_lock(object); /* * This call may iterate through the @@ -1040,12 +1307,12 @@ void vm_map_swapout(vm_map_t map) * If we raced with a swapin and lost, the residence count * will have been incremented to 1, and we simply return. */ - mutex_lock(&map->s_lock); + lck_mtx_lock(&map->s_lock); if (map->res_count != 0) { - mutex_unlock(&map->s_lock); + lck_mtx_unlock(&map->s_lock); return; } - mutex_unlock(&map->s_lock); + lck_mtx_unlock(&map->s_lock); /* * There are no intermediate states of a map going out or @@ -1068,14 +1335,14 @@ void vm_map_swapout(vm_map_t map) entry = vm_map_first_entry(map); while (entry != vm_map_to_entry(map)) { - if (entry->object.vm_object != VM_OBJECT_NULL) { + if (VME_OBJECT(entry) != VM_OBJECT_NULL) { if (entry->is_sub_map) { - vm_map_t lmap = entry->object.sub_map; - mutex_lock(&lmap->s_lock); + vm_map_t lmap = VME_SUBMAP(entry); + lck_mtx_lock(&lmap->s_lock); vm_map_res_deallocate(lmap); - mutex_unlock(&lmap->s_lock); + lck_mtx_unlock(&lmap->s_lock); } else { - vm_object_t object = entry->object.vm_object; + vm_object_t object = VME_OBJECT(entry); vm_object_lock(object); /* * This call may take a long time, @@ -1095,42 +1362,12 @@ void vm_map_swapout(vm_map_t map) #endif /* TASK_SWAPPER */ - /* - * SAVE_HINT_MAP_READ: + * vm_map_lookup_entry: [ internal use only ] * - * Saves the specified entry as the hint for - * future lookups. only a read lock is held on map, - * so make sure the store is atomic... OSCompareAndSwap - * guarantees this... also, we don't care if we collide - * and someone else wins and stores their 'hint' - */ -#define SAVE_HINT_MAP_READ(map,value) \ - MACRO_BEGIN \ - OSCompareAndSwap((UInt32)((map)->hint), (UInt32)value, (UInt32 *)(&(map)->hint)); \ - MACRO_END - - -/* - * SAVE_HINT_MAP_WRITE: - * - * Saves the specified entry as the hint for - * future lookups. write lock held on map, - * so no one else can be writing or looking - * until the lock is dropped, so it's safe - * to just do an assignment - */ -#define SAVE_HINT_MAP_WRITE(map,value) \ - MACRO_BEGIN \ - (map)->hint = (value); \ - MACRO_END - -/* - * vm_map_lookup_entry: [ internal use only ] - * - * Finds the map entry containing (or - * immediately preceding) the specified address - * in the given map; the entry is returned + * Calls into the vm map store layer to find the map + * entry containing (or immediately preceding) the + * specified address in the given map; the entry is returned * in the "entry" parameter. The boolean * result indicates whether the address is * actually contained in the map. @@ -1141,69 +1378,7 @@ vm_map_lookup_entry( register vm_map_offset_t address, vm_map_entry_t *entry) /* OUT */ { - register vm_map_entry_t cur; - register vm_map_entry_t last; - - /* - * Start looking either from the head of the - * list, or from the hint. - */ - cur = map->hint; - - if (cur == vm_map_to_entry(map)) - cur = cur->vme_next; - - if (address >= cur->vme_start) { - /* - * Go from hint to end of list. - * - * But first, make a quick check to see if - * we are already looking at the entry we - * want (which is usually the case). - * Note also that we don't need to save the hint - * here... it is the same hint (unless we are - * at the header, in which case the hint didn't - * buy us anything anyway). - */ - last = vm_map_to_entry(map); - if ((cur != last) && (cur->vme_end > address)) { - *entry = cur; - return(TRUE); - } - } - else { - /* - * Go from start to hint, *inclusively* - */ - last = cur->vme_next; - cur = vm_map_first_entry(map); - } - - /* - * Search linearly - */ - - while (cur != last) { - if (cur->vme_end > address) { - if (address >= cur->vme_start) { - /* - * Save this lookup for future - * hints, and return - */ - - *entry = cur; - SAVE_HINT_MAP_READ(map, cur); - - return(TRUE); - } - break; - } - cur = cur->vme_next; - } - *entry = cur->vme_prev; - SAVE_HINT_MAP_READ(map, *entry); - - return(FALSE); + return ( vm_map_store_lookup_entry( map, address, entry )); } /* @@ -1228,9 +1403,10 @@ vm_map_find_space( int flags, vm_map_entry_t *o_entry) /* OUT */ { - register vm_map_entry_t entry, new_entry; + vm_map_entry_t entry, new_entry; register vm_map_offset_t start; register vm_map_offset_t end; + vm_map_entry_t hole_entry; if (size == 0) { *address = 0; @@ -1239,10 +1415,10 @@ vm_map_find_space( if (flags & VM_FLAGS_GUARD_AFTER) { /* account for the back guard page in the size */ - size += PAGE_SIZE_64; + size += VM_MAP_PAGE_SIZE(map); } - new_entry = vm_map_entry_create(map); + new_entry = vm_map_entry_create(map, FALSE); /* * Look for the first possible address; if there's already @@ -1251,11 +1427,31 @@ vm_map_find_space( vm_map_lock(map); - assert(first_free_is_valid(map)); - if ((entry = map->first_free) == vm_map_to_entry(map)) - start = map->min_offset; - else - start = entry->vme_end; + if( map->disable_vmentry_reuse == TRUE) { + VM_MAP_HIGHEST_ENTRY(map, entry, start); + } else { + if (map->holelistenabled) { + hole_entry = (vm_map_entry_t)map->holes_list; + + if (hole_entry == NULL) { + /* + * No more space in the map? + */ + vm_map_entry_dispose(map, new_entry); + vm_map_unlock(map); + return(KERN_NO_SPACE); + } + + entry = hole_entry; + start = entry->vme_start; + } else { + assert(first_free_is_valid(map)); + if ((entry = map->first_free) == vm_map_to_entry(map)) + start = map->min_offset; + else + start = entry->vme_end; + } + } /* * In any case, the "entry" always precedes @@ -1273,7 +1469,7 @@ vm_map_find_space( if (flags & VM_FLAGS_GUARD_BEFORE) { /* reserve space for the front guard page */ - start += PAGE_SIZE_64; + start += VM_MAP_PAGE_SIZE(map); } end = ((start + mask) & ~mask); @@ -1291,28 +1487,53 @@ vm_map_find_space( return(KERN_NO_SPACE); } - /* - * If there are no more entries, we must win. - */ - next = entry->vme_next; - if (next == vm_map_to_entry(map)) - break; - /* - * If there is another entry, it must be - * after the end of the potential new region. - */ + if (map->holelistenabled) { + if (entry->vme_end >= end) + break; + } else { + /* + * If there are no more entries, we must win. + * + * OR + * + * If there is another entry, it must be + * after the end of the potential new region. + */ - if (next->vme_start >= end) - break; + if (next == vm_map_to_entry(map)) + break; + + if (next->vme_start >= end) + break; + } /* * Didn't fit -- move to the next entry. */ entry = next; - start = entry->vme_end; + + if (map->holelistenabled) { + if (entry == (vm_map_entry_t) map->holes_list) { + /* + * Wrapped around + */ + vm_map_entry_dispose(map, new_entry); + vm_map_unlock(map); + return(KERN_NO_SPACE); + } + start = entry->vme_start; + } else { + start = entry->vme_end; + } + } + + if (map->holelistenabled) { + if (vm_map_lookup_entry(map, entry->vme_start, &entry)) { + panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start); + } } /* @@ -1327,20 +1548,25 @@ vm_map_find_space( if (flags & VM_FLAGS_GUARD_BEFORE) { /* go back for the front guard page */ - start -= PAGE_SIZE_64; + start -= VM_MAP_PAGE_SIZE(map); } *address = start; + assert(start < end); new_entry->vme_start = start; new_entry->vme_end = end; assert(page_aligned(new_entry->vme_start)); assert(page_aligned(new_entry->vme_end)); + assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start, + VM_MAP_PAGE_MASK(map))); + assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end, + VM_MAP_PAGE_MASK(map))); new_entry->is_shared = FALSE; new_entry->is_sub_map = FALSE; - new_entry->use_pmap = FALSE; - new_entry->object.vm_object = VM_OBJECT_NULL; - new_entry->offset = (vm_object_offset_t) 0; + new_entry->use_pmap = TRUE; + VME_OBJECT_SET(new_entry, VM_OBJECT_NULL); + VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0); new_entry->needs_copy = FALSE; @@ -1354,16 +1580,29 @@ vm_map_find_space( new_entry->in_transition = FALSE; new_entry->needs_wakeup = FALSE; new_entry->no_cache = FALSE; + new_entry->permanent = FALSE; + new_entry->superpage_size = FALSE; + if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) { + new_entry->map_aligned = TRUE; + } else { + new_entry->map_aligned = FALSE; + } - new_entry->alias = 0; + new_entry->used_for_jit = FALSE; + new_entry->zero_wired_pages = FALSE; + new_entry->iokit_acct = FALSE; + new_entry->vme_resilient_codesign = FALSE; + new_entry->vme_resilient_media = FALSE; - VM_GET_FLAGS_ALIAS(flags, new_entry->alias); + int alias; + VM_GET_FLAGS_ALIAS(flags, alias); + VME_ALIAS_SET(new_entry, alias); /* * Insert the new entry into the list */ - vm_map_entry_link(map, entry, new_entry); + vm_map_store_entry_link(map, entry, new_entry); map->size += size; @@ -1393,7 +1632,7 @@ int vm_map_pmap_enter_enable = FALSE; * In/out conditions: * The source map should not be locked on entry. */ -static void +__unused static void vm_map_pmap_enter( vm_map_t map, register vm_map_offset_t addr, @@ -1411,6 +1650,17 @@ vm_map_pmap_enter( while (addr < end_addr) { register vm_page_t m; + + /* + * TODO: + * From vm_map_enter(), we come into this function without the map + * lock held or the object lock held. + * We haven't taken a reference on the object either. + * We should do a proper lookup on the map to make sure + * that things are sane before we go locking objects that + * could have been deallocated from under us. + */ + vm_object_lock(object); m = vm_page_lookup(object, offset); @@ -1432,8 +1682,11 @@ vm_map_pmap_enter( map, (unsigned long long)addr, object, (unsigned long long)offset); } type_of_fault = DBG_CACHE_HIT_FAULT; - kr = vm_fault_enter(m, map->pmap, addr, protection, - m->wire_count != 0, FALSE, FALSE, + kr = vm_fault_enter(m, map->pmap, addr, protection, protection, + VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, + 0, /* XXX need user tag / alias? */ + 0, /* alternate accounting? */ + NULL, &type_of_fault); vm_object_unlock(object); @@ -1478,6 +1731,59 @@ boolean_t vm_map_pmap_is_empty( #endif /* MACHINE_PMAP_IS_EMPTY */ } +#define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000 +kern_return_t +vm_map_random_address_for_size( + vm_map_t map, + vm_map_offset_t *address, + vm_map_size_t size) +{ + kern_return_t kr = KERN_SUCCESS; + int tries = 0; + vm_map_offset_t random_addr = 0; + vm_map_offset_t hole_end; + + vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL; + vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL; + vm_map_size_t vm_hole_size = 0; + vm_map_size_t addr_space_size; + + addr_space_size = vm_map_max(map) - vm_map_min(map); + + assert(page_aligned(size)); + + while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) { + random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT; + random_addr = vm_map_trunc_page( + vm_map_min(map) +(random_addr % addr_space_size), + VM_MAP_PAGE_MASK(map)); + + if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) { + if (prev_entry == vm_map_to_entry(map)) { + next_entry = vm_map_first_entry(map); + } else { + next_entry = prev_entry->vme_next; + } + if (next_entry == vm_map_to_entry(map)) { + hole_end = vm_map_max(map); + } else { + hole_end = next_entry->vme_start; + } + vm_hole_size = hole_end - random_addr; + if (vm_hole_size >= size) { + *address = random_addr; + break; + } + } + tries++; + } + + if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) { + kr = KERN_NO_SPACE; + } + return kr; +} + /* * Routine: vm_map_enter * @@ -1494,9 +1800,9 @@ static unsigned int vm_map_enter_restore_failures = 0; kern_return_t vm_map_enter( vm_map_t map, - vm_map_offset_t *address, /* IN/OUT */ + vm_map_offset_t *address, /* IN/OUT */ vm_map_size_t size, - vm_map_offset_t mask, + vm_map_offset_t mask, int flags, vm_object_t object, vm_object_offset_t offset, @@ -1508,19 +1814,65 @@ vm_map_enter( vm_map_entry_t entry, new_entry; vm_map_offset_t start, tmp_start, tmp_offset; vm_map_offset_t end, tmp_end; + vm_map_offset_t tmp2_start, tmp2_end; + vm_map_offset_t step; kern_return_t result = KERN_SUCCESS; vm_map_t zap_old_map = VM_MAP_NULL; vm_map_t zap_new_map = VM_MAP_NULL; boolean_t map_locked = FALSE; boolean_t pmap_empty = TRUE; boolean_t new_mapping_established = FALSE; + boolean_t keep_map_locked = ((flags & VM_FLAGS_KEEP_MAP_LOCKED) != 0); boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0); boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0); boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0); boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0); boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0); - char alias; + boolean_t permanent = ((flags & VM_FLAGS_PERMANENT) != 0); + boolean_t entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0); + boolean_t iokit_acct = ((flags & VM_FLAGS_IOKIT_ACCT) != 0); + boolean_t resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0); + boolean_t resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0); + unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT); + vm_tag_t alias, user_alias; vm_map_offset_t effective_min_offset, effective_max_offset; + kern_return_t kr; + boolean_t clear_map_aligned = FALSE; + vm_map_entry_t hole_entry; + + if (superpage_size) { + switch (superpage_size) { + /* + * Note that the current implementation only supports + * a single size for superpages, SUPERPAGE_SIZE, per + * architecture. As soon as more sizes are supposed + * to be supported, SUPERPAGE_SIZE has to be replaced + * with a lookup of the size depending on superpage_size. + */ +#ifdef __x86_64__ + case SUPERPAGE_SIZE_ANY: + /* handle it like 2 MB and round up to page size */ + size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1); + case SUPERPAGE_SIZE_2MB: + break; +#endif + default: + return KERN_INVALID_ARGUMENT; + } + mask = SUPERPAGE_SIZE-1; + if (size & (SUPERPAGE_SIZE-1)) + return KERN_INVALID_ARGUMENT; + inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */ + } + + + + if (resilient_codesign || resilient_media) { + if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) || + (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) { + return KERN_PROTECTION_FAILURE; + } + } if (is_submap) { if (purgable) { @@ -1547,9 +1899,10 @@ vm_map_enter( } effective_min_offset = map->min_offset; + if (flags & VM_FLAGS_BEYOND_MAX) { /* - * Allow an insertion beyond the map's official top boundary. + * Allow an insertion beyond the map's max offset. */ if (vm_map_is_64bit(map)) effective_max_offset = 0xFFFFFFFFFFFFF000ULL; @@ -1566,12 +1919,41 @@ vm_map_enter( } VM_GET_FLAGS_ALIAS(flags, alias); + if (map->pmap == kernel_pmap) { + user_alias = VM_KERN_MEMORY_NONE; + } else { + user_alias = alias; + } #define RETURN(value) { result = value; goto BailOut; } assert(page_aligned(*address)); assert(page_aligned(size)); + if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) { + /* + * In most cases, the caller rounds the size up to the + * map's page size. + * If we get a size that is explicitly not map-aligned here, + * we'll have to respect the caller's wish and mark the + * mapping as "not map-aligned" to avoid tripping the + * map alignment checks later. + */ + clear_map_aligned = TRUE; + } + if (!anywhere && + !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) { + /* + * We've been asked to map at a fixed address and that + * address is not aligned to the map's specific alignment. + * The caller should know what it's doing (i.e. most likely + * mapping some fragmented copy map, transferring memory from + * a VM map with a different alignment), so clear map_aligned + * for this new VM map entry and proceed. + */ + clear_map_aligned = TRUE; + } + /* * Only zero-fill objects are allowed to be purgable. * LP64todo - limit purgable objects to 32-bits for now @@ -1579,9 +1961,9 @@ vm_map_enter( if (purgable && (offset != 0 || (object != VM_OBJECT_NULL && - (object->size != size || + (object->vo_size != size || object->purgable == VM_PURGABLE_DENY)) - || size > VM_MAX_ADDRESS)) /* LP64todo: remove when dp capable */ + || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */ return KERN_INVALID_ARGUMENT; if (!anywhere && overwrite) { @@ -1597,7 +1979,9 @@ vm_map_enter( zap_old_map = vm_map_create(PMAP_NULL, *address, *address + size, - TRUE); + map->hdr.entries_pageable); + vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map)); + vm_map_disable_hole_optimization(zap_old_map); } StartAgain: ; @@ -1607,6 +1991,22 @@ StartAgain: ; if (anywhere) { vm_map_lock(map); map_locked = TRUE; + + if (entry_for_jit) { + if (map->jit_entry_exists) { + result = KERN_INVALID_ARGUMENT; + goto BailOut; + } + /* + * Get a random start address. + */ + result = vm_map_random_address_for_size(map, address, size); + if (result != KERN_SUCCESS) { + goto BailOut; + } + start = *address; + } + /* * Calculate the first possible address. @@ -1623,15 +2023,86 @@ StartAgain: ; * address, we have to start after it. */ - assert(first_free_is_valid(map)); - if (start == effective_min_offset) { - if ((entry = map->first_free) != vm_map_to_entry(map)) - start = entry->vme_end; + if( map->disable_vmentry_reuse == TRUE) { + VM_MAP_HIGHEST_ENTRY(map, entry, start); } else { - vm_map_entry_t tmp_entry; - if (vm_map_lookup_entry(map, start, &tmp_entry)) - start = tmp_entry->vme_end; - entry = tmp_entry; + + if (map->holelistenabled) { + hole_entry = (vm_map_entry_t)map->holes_list; + + if (hole_entry == NULL) { + /* + * No more space in the map? + */ + result = KERN_NO_SPACE; + goto BailOut; + } else { + + boolean_t found_hole = FALSE; + + do { + if (hole_entry->vme_start >= start) { + start = hole_entry->vme_start; + found_hole = TRUE; + break; + } + + if (hole_entry->vme_end > start) { + found_hole = TRUE; + break; + } + hole_entry = hole_entry->vme_next; + + } while (hole_entry != (vm_map_entry_t) map->holes_list); + + if (found_hole == FALSE) { + result = KERN_NO_SPACE; + goto BailOut; + } + + entry = hole_entry; + + if (start == 0) + start += PAGE_SIZE_64; + } + } else { + assert(first_free_is_valid(map)); + + entry = map->first_free; + + if (entry == vm_map_to_entry(map)) { + entry = NULL; + } else { + if (entry->vme_next == vm_map_to_entry(map)){ + /* + * Hole at the end of the map. + */ + entry = NULL; + } else { + if (start < (entry->vme_next)->vme_start ) { + start = entry->vme_end; + start = vm_map_round_page(start, + VM_MAP_PAGE_MASK(map)); + } else { + /* + * Need to do a lookup. + */ + entry = NULL; + } + } + } + + if (entry == NULL) { + vm_map_entry_t tmp_entry; + if (vm_map_lookup_entry(map, start, &tmp_entry)) { + assert(!entry_for_jit); + start = tmp_entry->vme_end; + start = vm_map_round_page(start, + VM_MAP_PAGE_MASK(map)); + } + entry = tmp_entry; + } + } } /* @@ -1650,13 +2121,18 @@ StartAgain: ; */ end = ((start + mask) & ~mask); + end = vm_map_round_page(end, + VM_MAP_PAGE_MASK(map)); if (end < start) RETURN(KERN_NO_SPACE); start = end; + assert(VM_MAP_PAGE_ALIGNED(start, + VM_MAP_PAGE_MASK(map))); end += size; if ((end > effective_max_offset) || (end < start)) { if (map->wait_for_space) { + assert(!keep_map_locked); if (size <= (effective_max_offset - effective_min_offset)) { assert_wait((event_t)map, @@ -1670,30 +2146,60 @@ StartAgain: ; RETURN(KERN_NO_SPACE); } - /* - * If there are no more entries, we must win. - */ - next = entry->vme_next; - if (next == vm_map_to_entry(map)) - break; - /* - * If there is another entry, it must be - * after the end of the potential new region. - */ + if (map->holelistenabled) { + if (entry->vme_end >= end) + break; + } else { + /* + * If there are no more entries, we must win. + * + * OR + * + * If there is another entry, it must be + * after the end of the potential new region. + */ - if (next->vme_start >= end) - break; + if (next == vm_map_to_entry(map)) + break; + + if (next->vme_start >= end) + break; + } /* * Didn't fit -- move to the next entry. */ entry = next; - start = entry->vme_end; + + if (map->holelistenabled) { + if (entry == (vm_map_entry_t) map->holes_list) { + /* + * Wrapped around + */ + result = KERN_NO_SPACE; + goto BailOut; + } + start = entry->vme_start; + } else { + start = entry->vme_end; + } + + start = vm_map_round_page(start, + VM_MAP_PAGE_MASK(map)); + } + + if (map->holelistenabled) { + if (vm_map_lookup_entry(map, entry->vme_start, &entry)) { + panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start); + } } + *address = start; + assert(VM_MAP_PAGE_ALIGNED(*address, + VM_MAP_PAGE_MASK(map))); } else { /* * Verify that: @@ -1725,7 +2231,8 @@ StartAgain: ; * address range, saving them in our "zap_old_map". */ (void) vm_map_delete(map, start, end, - VM_MAP_REMOVE_SAVE_ENTRIES, + (VM_MAP_REMOVE_SAVE_ENTRIES | + VM_MAP_REMOVE_NO_MAP_ALIGN), zap_old_map); } @@ -1749,19 +2256,53 @@ StartAgain: ; } for (; entry->vme_start < end; entry = entry->vme_next) { + /* + * Check if the mapping's attributes + * match the existing map entry. + */ if (entry == vm_map_to_entry(map) || entry->vme_start != tmp_start || entry->is_sub_map != is_submap || - entry->object.vm_object != object || - entry->offset != tmp_offset || + VME_OFFSET(entry) != tmp_offset || entry->needs_copy != needs_copy || entry->protection != cur_protection || entry->max_protection != max_protection || entry->inheritance != inheritance || - entry->alias != alias) { + entry->iokit_acct != iokit_acct || + VME_ALIAS(entry) != alias) { /* not the same mapping ! */ RETURN(KERN_NO_SPACE); } + /* + * Check if the same object is being mapped. + */ + if (is_submap) { + if (VME_SUBMAP(entry) != + (vm_map_t) object) { + /* not the same submap */ + RETURN(KERN_NO_SPACE); + } + } else { + if (VME_OBJECT(entry) != object) { + /* not the same VM object... */ + vm_object_t obj2; + + obj2 = VME_OBJECT(entry); + if ((obj2 == VM_OBJECT_NULL || + obj2->internal) && + (object == VM_OBJECT_NULL || + object->internal)) { + /* + * ... but both are + * anonymous memory, + * so equivalent. + */ + } else { + RETURN(KERN_NO_SPACE); + } + } + } + tmp_offset += entry->vme_end - entry->vme_start; tmp_start += entry->vme_end - entry->vme_start; if (entry->vme_end >= end) { @@ -1802,11 +2343,36 @@ StartAgain: ; * semantics. */ - if (purgable) { + if (purgable || entry_for_jit) { if (object == VM_OBJECT_NULL) { + object = vm_object_allocate(size); object->copy_strategy = MEMORY_OBJECT_COPY_NONE; - object->purgable = VM_PURGABLE_NONVOLATILE; + object->true_share = TRUE; + if (purgable) { + task_t owner; + object->purgable = VM_PURGABLE_NONVOLATILE; + if (map->pmap == kernel_pmap) { + /* + * Purgeable mappings made in a kernel + * map are "owned" by the kernel itself + * rather than the current user task + * because they're likely to be used by + * more than this user task (see + * execargs_purgeable_allocate(), for + * example). + */ + owner = kernel_task; + } else { + owner = current_task(); + } + assert(object->vo_purgeable_owner == NULL); + assert(object->resident_page_count == 0); + assert(object->wired_page_count == 0); + vm_object_lock(object); + vm_purgeable_nonvolatile_enqueue(object, owner); + vm_object_unlock(object); + } offset = (vm_object_offset_t)0; } } else if ((is_submap == FALSE) && @@ -1815,19 +2381,37 @@ StartAgain: ; (entry->vme_end == start) && (!entry->is_shared) && (!entry->is_sub_map) && - (entry->alias == alias) && - (entry->inheritance == inheritance) && + (!entry->in_transition) && + (!entry->needs_wakeup) && + (entry->behavior == VM_BEHAVIOR_DEFAULT) && (entry->protection == cur_protection) && (entry->max_protection == max_protection) && - (entry->behavior == VM_BEHAVIOR_DEFAULT) && - (entry->in_transition == 0) && + (entry->inheritance == inheritance) && + ((user_alias == VM_MEMORY_REALLOC) || + (VME_ALIAS(entry) == alias)) && (entry->no_cache == no_cache) && - ((alias == VM_MEMORY_REALLOC) || - ((entry->vme_end - entry->vme_start) + size < NO_COALESCE_LIMIT)) && + (entry->permanent == permanent) && + (!entry->superpage_size && !superpage_size) && + /* + * No coalescing if not map-aligned, to avoid propagating + * that condition any further than needed: + */ + (!entry->map_aligned || !clear_map_aligned) && + (!entry->zero_wired_pages) && + (!entry->used_for_jit && !entry_for_jit) && + (entry->iokit_acct == iokit_acct) && + (!entry->vme_resilient_codesign) && + (!entry->vme_resilient_media) && + + ((entry->vme_end - entry->vme_start) + size <= + (user_alias == VM_MEMORY_REALLOC ? + ANON_CHUNK_SIZE : + NO_COALESCE_LIMIT)) && + (entry->wired_count == 0)) { /* implies user_wired_count == 0 */ - if (vm_object_coalesce(entry->object.vm_object, + if (vm_object_coalesce(VME_OBJECT(entry), VM_OBJECT_NULL, - entry->offset, + VME_OFFSET(entry), (vm_object_offset_t) 0, (vm_map_size_t)(entry->vme_end - entry->vme_start), (vm_map_size_t)(end - entry->vme_end))) { @@ -1838,122 +2422,288 @@ StartAgain: ; * new range. */ map->size += (end - entry->vme_end); + assert(entry->vme_start < end); + assert(VM_MAP_PAGE_ALIGNED(end, + VM_MAP_PAGE_MASK(map))); + if (__improbable(vm_debug_events)) + DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end); entry->vme_end = end; - UPDATE_FIRST_FREE(map, map->first_free); + if (map->holelistenabled) { + vm_map_store_update_first_free(map, entry, TRUE); + } else { + vm_map_store_update_first_free(map, map->first_free, TRUE); + } + new_mapping_established = TRUE; RETURN(KERN_SUCCESS); } } - /* - * Create a new entry - * LP64todo - for now, we can only allocate 4GB internal objects - * because the default pager can't page bigger ones. Remove this - * when it can. - * - * XXX FBDP - * The reserved "page zero" in each process's address space can - * be arbitrarily large. Splitting it into separate 4GB objects and - * therefore different VM map entries serves no purpose and just - * slows down operations on the VM map, so let's not split the - * allocation into 4GB chunks if the max protection is NONE. That - * memory should never be accessible, so it will never get to the - * default pager. - */ - tmp_start = start; - if (object == VM_OBJECT_NULL && - size > (vm_map_size_t)VM_MAX_ADDRESS && - max_protection != VM_PROT_NONE) - tmp_end = tmp_start + (vm_map_size_t)VM_MAX_ADDRESS; - else - tmp_end = end; - do { - new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end, - object, offset, needs_copy, - FALSE, FALSE, - cur_protection, max_protection, - VM_BEHAVIOR_DEFAULT, - inheritance, 0, no_cache); - new_entry->alias = alias; - if (is_submap) { - vm_map_t submap; - boolean_t submap_is_64bit; - boolean_t use_pmap; - - new_entry->is_sub_map = TRUE; - submap = (vm_map_t) object; - submap_is_64bit = vm_map_is_64bit(submap); - use_pmap = (alias == VM_MEMORY_SHARED_PMAP); -#ifndef NO_NESTED_PMAP - if (use_pmap && submap->pmap == NULL) { - /* we need a sub pmap to nest... */ - submap->pmap = pmap_create(0, submap_is_64bit); - if (submap->pmap == NULL) { - /* let's proceed without nesting... */ - } + step = superpage_size ? SUPERPAGE_SIZE : (end - start); + new_entry = NULL; + + for (tmp2_start = start; tmp2_start (vm_map_size_t)ANON_CHUNK_SIZE && + max_protection != VM_PROT_NONE && + superpage_size == 0) + tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE; + else + tmp_end = tmp2_end; + do { + new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end, + object, offset, needs_copy, + FALSE, FALSE, + cur_protection, max_protection, + VM_BEHAVIOR_DEFAULT, + (entry_for_jit)? VM_INHERIT_NONE: inheritance, + 0, no_cache, + permanent, + superpage_size, + clear_map_aligned, + is_submap); + + assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias)); + VME_ALIAS_SET(new_entry, alias); + + if (entry_for_jit){ + if (!(map->jit_entry_exists)){ + new_entry->used_for_jit = TRUE; + map->jit_entry_exists = TRUE; + } + } + + if (resilient_codesign && + ! ((cur_protection | max_protection) & + (VM_PROT_WRITE | VM_PROT_EXECUTE))) { + new_entry->vme_resilient_codesign = TRUE; + } + + if (resilient_media && + ! ((cur_protection | max_protection) & + (VM_PROT_WRITE | VM_PROT_EXECUTE))) { + new_entry->vme_resilient_media = TRUE; + } + + assert(!new_entry->iokit_acct); + if (!is_submap && + object != VM_OBJECT_NULL && + object->purgable != VM_PURGABLE_DENY) { + assert(new_entry->use_pmap); + assert(!new_entry->iokit_acct); + /* + * Turn off pmap accounting since + * purgeable objects have their + * own ledgers. + */ + new_entry->use_pmap = FALSE; + } else if (!is_submap && + iokit_acct && + object != VM_OBJECT_NULL && + object->internal) { + /* alternate accounting */ + assert(!new_entry->iokit_acct); + assert(new_entry->use_pmap); + new_entry->iokit_acct = TRUE; + new_entry->use_pmap = FALSE; + DTRACE_VM4( + vm_map_iokit_mapped_region, + vm_map_t, map, + vm_map_offset_t, new_entry->vme_start, + vm_map_offset_t, new_entry->vme_end, + int, VME_ALIAS(new_entry)); + vm_map_iokit_mapped_region( + map, + (new_entry->vme_end - + new_entry->vme_start)); + } else if (!is_submap) { + assert(!new_entry->iokit_acct); + assert(new_entry->use_pmap); + } + + if (is_submap) { + vm_map_t submap; + boolean_t submap_is_64bit; + boolean_t use_pmap; + + assert(new_entry->is_sub_map); + assert(!new_entry->use_pmap); + assert(!new_entry->iokit_acct); + submap = (vm_map_t) object; + submap_is_64bit = vm_map_is_64bit(submap); + use_pmap = (user_alias == VM_MEMORY_SHARED_PMAP); +#ifndef NO_NESTED_PMAP + if (use_pmap && submap->pmap == NULL) { + ledger_t ledger = map->pmap->ledger; + /* we need a sub pmap to nest... */ + submap->pmap = pmap_create(ledger, 0, + submap_is_64bit); + if (submap->pmap == NULL) { + /* let's proceed without nesting... */ + } + } + if (use_pmap && submap->pmap != NULL) { + kr = pmap_nest(map->pmap, + submap->pmap, + tmp_start, + tmp_start, + tmp_end - tmp_start); + if (kr != KERN_SUCCESS) { + printf("vm_map_enter: " + "pmap_nest(0x%llx,0x%llx) " + "error 0x%x\n", + (long long)tmp_start, + (long long)tmp_end, + kr); + } else { + /* we're now nested ! */ + new_entry->use_pmap = TRUE; + pmap_empty = FALSE; + } + } +#endif /* NO_NESTED_PMAP */ } - if (use_pmap && submap->pmap != NULL) { - kern_return_t kr; + entry = new_entry; + + if (superpage_size) { + vm_page_t pages, m; + vm_object_t sp_object; - kr = pmap_nest(map->pmap, - submap->pmap, - tmp_start, - tmp_start, - tmp_end - tmp_start); + VME_OFFSET_SET(entry, 0); + + /* allocate one superpage */ + kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0); if (kr != KERN_SUCCESS) { - printf("vm_map_enter: " - "pmap_nest(0x%llx,0x%llx) " - "error 0x%x\n", - (long long)tmp_start, - (long long)tmp_end, - kr); - } else { - /* we're now nested ! */ - new_entry->use_pmap = TRUE; - pmap_empty = FALSE; + /* deallocate whole range... */ + new_mapping_established = TRUE; + /* ... but only up to "tmp_end" */ + size -= end - tmp_end; + RETURN(kr); + } + + /* create one vm_object per superpage */ + sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start)); + sp_object->phys_contiguous = TRUE; + sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE; + VME_OBJECT_SET(entry, sp_object); + assert(entry->use_pmap); + + /* enter the base pages into the object */ + vm_object_lock(sp_object); + for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) { + m = pages; + pmap_zero_page(m->phys_page); + pages = NEXT_PAGE(m); + *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL; + vm_page_insert_wired(m, sp_object, offset, VM_KERN_MEMORY_OSFMK); } + vm_object_unlock(sp_object); } -#endif /* NO_NESTED_PMAP */ + } while (tmp_end != tmp2_end && + (tmp_start = tmp_end) && + (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ? + tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end)); + } + + new_mapping_established = TRUE; + +BailOut: + assert(map_locked == TRUE); + + if (result == KERN_SUCCESS) { + vm_prot_t pager_prot; + memory_object_t pager; + +#if DEBUG + if (pmap_empty && + !(flags & VM_FLAGS_NO_PMAP_CHECK)) { + assert(vm_map_pmap_is_empty(map, + *address, + *address+size)); } - entry = new_entry; - } while (tmp_end != end && - (tmp_start = tmp_end) && - (tmp_end = (end - tmp_end > (vm_map_size_t)VM_MAX_ADDRESS) ? - tmp_end + (vm_map_size_t)VM_MAX_ADDRESS : end)); +#endif /* DEBUG */ - vm_map_unlock(map); - map_locked = FALSE; + /* + * For "named" VM objects, let the pager know that the + * memory object is being mapped. Some pagers need to keep + * track of this, to know when they can reclaim the memory + * object, for example. + * VM calls memory_object_map() for each mapping (specifying + * the protection of each mapping) and calls + * memory_object_last_unmap() when all the mappings are gone. + */ + pager_prot = max_protection; + if (needs_copy) { + /* + * Copy-On-Write mapping: won't modify + * the memory object. + */ + pager_prot &= ~VM_PROT_WRITE; + } + if (!is_submap && + object != VM_OBJECT_NULL && + object->named && + object->pager != MEMORY_OBJECT_NULL) { + vm_object_lock(object); + pager = object->pager; + if (object->named && + pager != MEMORY_OBJECT_NULL) { + assert(object->pager_ready); + vm_object_mapping_wait(object, THREAD_UNINT); + vm_object_mapping_begin(object); + vm_object_unlock(object); - new_mapping_established = TRUE; + kr = memory_object_map(pager, pager_prot); + assert(kr == KERN_SUCCESS); - /* Wire down the new entry if the user - * requested all new map entries be wired. - */ - if (map->wiring_required) { - pmap_empty = FALSE; /* pmap won't be empty */ - result = vm_map_wire(map, start, end, - new_entry->protection, TRUE); - RETURN(result); + vm_object_lock(object); + vm_object_mapping_end(object); + } + vm_object_unlock(object); + } } - if ((object != VM_OBJECT_NULL) && - (vm_map_pmap_enter_enable) && - (!anywhere) && - (!needs_copy) && - (size < (128*1024))) { - pmap_empty = FALSE; /* pmap won't be empty */ - - if (override_nx(map, alias) && cur_protection) - cur_protection |= VM_PROT_EXECUTE; + assert(map_locked == TRUE); - vm_map_pmap_enter(map, start, end, - object, offset, cur_protection); + if (!keep_map_locked) { + vm_map_unlock(map); + map_locked = FALSE; } -BailOut: ; - if (result == KERN_SUCCESS && - pmap_empty && - !(flags & VM_FLAGS_NO_PMAP_CHECK)) { - assert(vm_map_pmap_is_empty(map, *address, *address+size)); + /* + * We can't hold the map lock if we enter this block. + */ + + if (result == KERN_SUCCESS) { + + /* Wire down the new entry if the user + * requested all new map entries be wired. + */ + if ((map->wiring_required)||(superpage_size)) { + assert(!keep_map_locked); + pmap_empty = FALSE; /* pmap won't be empty */ + kr = vm_map_wire(map, start, end, + new_entry->protection | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK), + TRUE); + result = kr; + } + } if (result != KERN_SUCCESS) { @@ -1967,13 +2717,18 @@ BailOut: ; zap_new_map = vm_map_create(PMAP_NULL, *address, *address + size, - TRUE); + map->hdr.entries_pageable); + vm_map_set_page_shift(zap_new_map, + VM_MAP_PAGE_SHIFT(map)); + vm_map_disable_hole_optimization(zap_new_map); + if (!map_locked) { vm_map_lock(map); map_locked = TRUE; } (void) vm_map_delete(map, *address, *address+size, - VM_MAP_REMOVE_SAVE_ENTRIES, + (VM_MAP_REMOVE_SAVE_ENTRIES | + VM_MAP_REMOVE_NO_MAP_ALIGN), zap_new_map); } if (zap_old_map != VM_MAP_NULL && @@ -2014,10 +2769,10 @@ BailOut: ; entry_size = (entry2->vme_end - entry2->vme_start); - vm_map_entry_unlink(zap_old_map, + vm_map_store_entry_unlink(zap_old_map, entry2); zap_old_map->size -= entry_size; - vm_map_entry_link(map, entry1, entry2); + vm_map_store_entry_link(map, entry1, entry2); map->size += entry_size; entry1 = entry2; } @@ -2032,7 +2787,11 @@ BailOut: ; } } - if (map_locked) { + /* + * The caller is responsible for releasing the lock if it requested to + * keep the map locked. + */ + if (map_locked && !keep_map_locked) { vm_map_unlock(map); } @@ -2054,8 +2813,15 @@ BailOut: ; #undef RETURN } -kern_return_t -vm_map_enter_mem_object( + +/* + * Counters for the prefault optimization. + */ +int64_t vm_prefault_nb_pages = 0; +int64_t vm_prefault_nb_bailout = 0; + +static kern_return_t +vm_map_enter_mem_object_helper( vm_map_t target_map, vm_map_offset_t *address, vm_map_size_t initial_size, @@ -2066,13 +2832,23 @@ vm_map_enter_mem_object( boolean_t copy, vm_prot_t cur_protection, vm_prot_t max_protection, - vm_inherit_t inheritance) + vm_inherit_t inheritance, + upl_page_list_ptr_t page_list, + unsigned int page_list_count) { vm_map_address_t map_addr; vm_map_size_t map_size; vm_object_t object; vm_object_size_t size; kern_return_t result; + boolean_t mask_cur_protection, mask_max_protection; + boolean_t try_prefault = (page_list_count != 0); + vm_map_offset_t offset_in_mapping = 0; + + mask_cur_protection = cur_protection & VM_PROT_IS_MASK; + mask_max_protection = max_protection & VM_PROT_IS_MASK; + cur_protection &= ~VM_PROT_IS_MASK; + max_protection &= ~VM_PROT_IS_MASK; /* * Check arguments for validity @@ -2081,13 +2857,19 @@ vm_map_enter_mem_object( (cur_protection & ~VM_PROT_ALL) || (max_protection & ~VM_PROT_ALL) || (inheritance > VM_INHERIT_LAST_VALID) || - initial_size == 0) + (try_prefault && (copy || !page_list)) || + initial_size == 0) { return KERN_INVALID_ARGUMENT; - - map_addr = vm_map_trunc_page(*address); - map_size = vm_map_round_page(initial_size); - size = vm_object_round_page(initial_size); + } + { + map_addr = vm_map_trunc_page(*address, + VM_MAP_PAGE_MASK(target_map)); + map_size = vm_map_round_page(initial_size, + VM_MAP_PAGE_MASK(target_map)); + } + size = vm_object_round_page(initial_size); + /* * Find the vm object (if any) corresponding to this port. */ @@ -2099,29 +2881,86 @@ vm_map_enter_mem_object( vm_named_entry_t named_entry; named_entry = (vm_named_entry_t) port->ip_kobject; + + if (flags & (VM_FLAGS_RETURN_DATA_ADDR | + VM_FLAGS_RETURN_4K_DATA_ADDR)) { + offset += named_entry->data_offset; + } + /* a few checks to make sure user is obeying rules */ if (size == 0) { if (offset >= named_entry->size) return KERN_INVALID_RIGHT; size = named_entry->size - offset; } + if (mask_max_protection) { + max_protection &= named_entry->protection; + } + if (mask_cur_protection) { + cur_protection &= named_entry->protection; + } if ((named_entry->protection & max_protection) != max_protection) return KERN_INVALID_RIGHT; if ((named_entry->protection & cur_protection) != cur_protection) return KERN_INVALID_RIGHT; - if (named_entry->size < (offset + size)) + if (offset + size < offset) { + /* overflow */ return KERN_INVALID_ARGUMENT; + } + if (named_entry->size < (offset + initial_size)) { + return KERN_INVALID_ARGUMENT; + } + + if (named_entry->is_copy) { + /* for a vm_map_copy, we can only map it whole */ + if ((size != named_entry->size) && + (vm_map_round_page(size, + VM_MAP_PAGE_MASK(target_map)) == + named_entry->size)) { + /* XXX FBDP use the rounded size... */ + size = vm_map_round_page( + size, + VM_MAP_PAGE_MASK(target_map)); + } + + if (!(flags & VM_FLAGS_ANYWHERE) && + (offset != 0 || + size != named_entry->size)) { + /* + * XXX for a mapping at a "fixed" address, + * we can't trim after mapping the whole + * memory entry, so reject a request for a + * partial mapping. + */ + return KERN_INVALID_ARGUMENT; + } + } /* the callers parameter offset is defined to be the */ /* offset from beginning of named entry offset in object */ offset = offset + named_entry->offset; + if (! VM_MAP_PAGE_ALIGNED(size, + VM_MAP_PAGE_MASK(target_map))) { + /* + * Let's not map more than requested; + * vm_map_enter() will handle this "not map-aligned" + * case. + */ + map_size = size; + } + named_entry_lock(named_entry); if (named_entry->is_sub_map) { vm_map_t submap; + if (flags & (VM_FLAGS_RETURN_DATA_ADDR | + VM_FLAGS_RETURN_4K_DATA_ADDR)) { + panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap."); + } + submap = named_entry->backing.map; vm_map_lock(submap); vm_map_reference(submap); @@ -2148,16 +2987,20 @@ vm_map_enter_mem_object( * once it's been set and if we race, we'll * just end up setting it twice, which is OK. */ - if (submap->mapped == FALSE) { + if (submap->mapped_in_other_pmaps == FALSE && + vm_map_pmap(submap) != PMAP_NULL && + vm_map_pmap(submap) != + vm_map_pmap(target_map)) { /* - * This submap has never been mapped. - * Set its "mapped" flag now that it - * has been mapped. - * This happens only for the first ever - * mapping of a "submap". + * This submap is being mapped in a map + * that uses a different pmap. + * Set its "mapped_in_other_pmaps" flag + * to indicate that we now need to + * remove mappings from all pmaps rather + * than just the submap's pmap. */ vm_map_lock(submap); - submap->mapped = TRUE; + submap->mapped_in_other_pmaps = TRUE; vm_map_unlock(submap); } *address = map_addr; @@ -2168,11 +3011,15 @@ vm_map_enter_mem_object( unsigned int access; vm_prot_t protections; unsigned int wimg_mode; - boolean_t cache_attr; protections = named_entry->protection & VM_PROT_ALL; access = GET_MAP_MEM(named_entry->protection); + if (flags & (VM_FLAGS_RETURN_DATA_ADDR| + VM_FLAGS_RETURN_4K_DATA_ADDR)) { + panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap."); + } + object = vm_object_enter(named_entry->backing.pager, named_entry->size, named_entry->internal, @@ -2193,20 +3040,18 @@ vm_map_enter_mem_object( named_entry_unlock(named_entry); wimg_mode = object->wimg_bits; + if (access == MAP_MEM_IO) { wimg_mode = VM_WIMG_IO; } else if (access == MAP_MEM_COPYBACK) { wimg_mode = VM_WIMG_USE_DEFAULT; + } else if (access == MAP_MEM_INNERWBACK) { + wimg_mode = VM_WIMG_INNERWBACK; } else if (access == MAP_MEM_WTHRU) { wimg_mode = VM_WIMG_WTHRU; } else if (access == MAP_MEM_WCOMB) { wimg_mode = VM_WIMG_WCOMB; } - if (wimg_mode == VM_WIMG_IO || - wimg_mode == VM_WIMG_WCOMB) - cache_attr = TRUE; - else - cache_attr = FALSE; /* wait for object (if any) to be ready */ if (!named_entry->internal) { @@ -2219,25 +3064,201 @@ vm_map_enter_mem_object( } } - if (object->wimg_bits != wimg_mode) { - vm_page_t p; + if (object->wimg_bits != wimg_mode) + vm_object_change_wimg_mode(object, wimg_mode); - vm_object_paging_wait(object, THREAD_UNINT); +#if VM_OBJECT_TRACKING_OP_TRUESHARE + if (!object->true_share && + vm_object_tracking_inited) { + void *bt[VM_OBJECT_TRACKING_BTDEPTH]; + int num = 0; - object->wimg_bits = wimg_mode; - queue_iterate(&object->memq, p, vm_page_t, listq) { - if (!p->fictitious) { - if (p->pmapped) - pmap_disconnect(p->phys_page); - if (cache_attr) - pmap_sync_page_attributes_phys(p->phys_page); - } - } + num = OSBacktrace(bt, + VM_OBJECT_TRACKING_BTDEPTH); + btlog_add_entry(vm_object_tracking_btlog, + object, + VM_OBJECT_TRACKING_OP_TRUESHARE, + bt, + num); } +#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */ + object->true_share = TRUE; + if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; vm_object_unlock(object); + + } else if (named_entry->is_copy) { + kern_return_t kr; + vm_map_copy_t copy_map; + vm_map_entry_t copy_entry; + vm_map_offset_t copy_addr; + + if (flags & ~(VM_FLAGS_FIXED | + VM_FLAGS_ANYWHERE | + VM_FLAGS_OVERWRITE | + VM_FLAGS_RETURN_4K_DATA_ADDR | + VM_FLAGS_RETURN_DATA_ADDR)) { + named_entry_unlock(named_entry); + return KERN_INVALID_ARGUMENT; + } + + if (flags & (VM_FLAGS_RETURN_DATA_ADDR | + VM_FLAGS_RETURN_4K_DATA_ADDR)) { + offset_in_mapping = offset - vm_object_trunc_page(offset); + if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) + offset_in_mapping &= ~((signed)(0xFFF)); + offset = vm_object_trunc_page(offset); + map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset; + } + + copy_map = named_entry->backing.copy; + assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST); + if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) { + /* unsupported type; should not happen */ + printf("vm_map_enter_mem_object: " + "memory_entry->backing.copy " + "unsupported type 0x%x\n", + copy_map->type); + named_entry_unlock(named_entry); + return KERN_INVALID_ARGUMENT; + } + + /* reserve a contiguous range */ + kr = vm_map_enter(target_map, + &map_addr, + /* map whole mem entry, trim later: */ + named_entry->size, + mask, + flags & (VM_FLAGS_ANYWHERE | + VM_FLAGS_OVERWRITE | + VM_FLAGS_RETURN_4K_DATA_ADDR | + VM_FLAGS_RETURN_DATA_ADDR), + VM_OBJECT_NULL, + 0, + FALSE, /* copy */ + cur_protection, + max_protection, + inheritance); + if (kr != KERN_SUCCESS) { + named_entry_unlock(named_entry); + return kr; + } + + copy_addr = map_addr; + + for (copy_entry = vm_map_copy_first_entry(copy_map); + copy_entry != vm_map_copy_to_entry(copy_map); + copy_entry = copy_entry->vme_next) { + int remap_flags = 0; + vm_map_t copy_submap; + vm_object_t copy_object; + vm_map_size_t copy_size; + vm_object_offset_t copy_offset; + + copy_offset = VME_OFFSET(copy_entry); + copy_size = (copy_entry->vme_end - + copy_entry->vme_start); + + /* sanity check */ + if ((copy_addr + copy_size) > + (map_addr + + named_entry->size /* XXX full size */ )) { + /* over-mapping too much !? */ + kr = KERN_INVALID_ARGUMENT; + /* abort */ + break; + } + + /* take a reference on the object */ + if (copy_entry->is_sub_map) { + remap_flags |= VM_FLAGS_SUBMAP; + copy_submap = VME_SUBMAP(copy_entry); + vm_map_lock(copy_submap); + vm_map_reference(copy_submap); + vm_map_unlock(copy_submap); + copy_object = (vm_object_t) copy_submap; + } else { + copy_object = VME_OBJECT(copy_entry); + vm_object_reference(copy_object); + } + + /* over-map the object into destination */ + remap_flags |= flags; + remap_flags |= VM_FLAGS_FIXED; + remap_flags |= VM_FLAGS_OVERWRITE; + remap_flags &= ~VM_FLAGS_ANYWHERE; + kr = vm_map_enter(target_map, + ©_addr, + copy_size, + (vm_map_offset_t) 0, + remap_flags, + copy_object, + copy_offset, + copy, + cur_protection, + max_protection, + inheritance); + if (kr != KERN_SUCCESS) { + if (copy_entry->is_sub_map) { + vm_map_deallocate(copy_submap); + } else { + vm_object_deallocate(copy_object); + } + /* abort */ + break; + } + + /* next mapping */ + copy_addr += copy_size; + } + + if (kr == KERN_SUCCESS) { + if (flags & (VM_FLAGS_RETURN_DATA_ADDR | + VM_FLAGS_RETURN_4K_DATA_ADDR)) { + *address = map_addr + offset_in_mapping; + } else { + *address = map_addr; + } + + if (offset) { + /* + * Trim in front, from 0 to "offset". + */ + vm_map_remove(target_map, + map_addr, + map_addr + offset, + 0); + *address += offset; + } + if (offset + map_size < named_entry->size) { + /* + * Trim in back, from + * "offset + map_size" to + * "named_entry->size". + */ + vm_map_remove(target_map, + (map_addr + + offset + map_size), + (map_addr + + named_entry->size), + 0); + } + } + named_entry_unlock(named_entry); + + if (kr != KERN_SUCCESS) { + if (! (flags & VM_FLAGS_OVERWRITE)) { + /* deallocate the contiguous range */ + (void) vm_deallocate(target_map, + map_addr, + map_size); + } + } + + return kr; + } else { /* This is the case where we are going to map */ /* an already mapped object. If the object is */ @@ -2245,6 +3266,15 @@ vm_map_enter_mem_object( /* object cannot be mapped until it is ready */ /* we can therefore avoid the ready check */ /* in this case. */ + if (flags & (VM_FLAGS_RETURN_DATA_ADDR | + VM_FLAGS_RETURN_4K_DATA_ADDR)) { + offset_in_mapping = offset - vm_object_trunc_page(offset); + if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) + offset_in_mapping &= ~((signed)(0xFFF)); + offset = vm_object_trunc_page(offset); + map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset; + } + object = named_entry->backing.object; assert(object != VM_OBJECT_NULL); named_entry_unlock(named_entry); @@ -2259,7 +3289,11 @@ vm_map_enter_mem_object( * this case, the port isn't really a port at all, but * instead is just a raw memory object. */ - + if (flags & (VM_FLAGS_RETURN_DATA_ADDR | + VM_FLAGS_RETURN_4K_DATA_ADDR)) { + panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object."); + } + object = vm_object_enter((memory_object_t)port, size, FALSE, FALSE, FALSE); if (object == VM_OBJECT_NULL) @@ -2272,19 +3306,66 @@ vm_map_enter_mem_object( " by a non-private kernel entity\n"); return KERN_INVALID_OBJECT; } - vm_object_lock(object); - while (!object->pager_ready) { - vm_object_wait(object, - VM_OBJECT_EVENT_PAGER_READY, - THREAD_UNINT); + if (!object->pager_ready) { vm_object_lock(object); + + while (!object->pager_ready) { + vm_object_wait(object, + VM_OBJECT_EVENT_PAGER_READY, + THREAD_UNINT); + vm_object_lock(object); + } + vm_object_unlock(object); } - vm_object_unlock(object); } } else { return KERN_INVALID_OBJECT; } + if (object != VM_OBJECT_NULL && + object->named && + object->pager != MEMORY_OBJECT_NULL && + object->copy_strategy != MEMORY_OBJECT_COPY_NONE) { + memory_object_t pager; + vm_prot_t pager_prot; + kern_return_t kr; + + /* + * For "named" VM objects, let the pager know that the + * memory object is being mapped. Some pagers need to keep + * track of this, to know when they can reclaim the memory + * object, for example. + * VM calls memory_object_map() for each mapping (specifying + * the protection of each mapping) and calls + * memory_object_last_unmap() when all the mappings are gone. + */ + pager_prot = max_protection; + if (copy) { + /* + * Copy-On-Write mapping: won't modify the + * memory object. + */ + pager_prot &= ~VM_PROT_WRITE; + } + vm_object_lock(object); + pager = object->pager; + if (object->named && + pager != MEMORY_OBJECT_NULL && + object->copy_strategy != MEMORY_OBJECT_COPY_NONE) { + assert(object->pager_ready); + vm_object_mapping_wait(object, THREAD_UNINT); + vm_object_mapping_begin(object); + vm_object_unlock(object); + + kr = memory_object_map(pager, pager_prot); + assert(kr == KERN_SUCCESS); + + vm_object_lock(object); + vm_object_mapping_end(object); + } + vm_object_unlock(object); + } + /* * Perform the copy if requested */ @@ -2293,7 +3374,8 @@ vm_map_enter_mem_object( vm_object_t new_object; vm_object_offset_t new_offset; - result = vm_object_copy_strategically(object, offset, size, + result = vm_object_copy_strategically(object, offset, + map_size, &new_object, &new_offset, ©); @@ -2315,7 +3397,8 @@ vm_map_enter_mem_object( new_object = object; new_offset = offset; success = vm_object_copy_quickly(&new_object, - new_offset, size, + new_offset, + map_size, &src_needs_copy, ©); assert(success); @@ -2328,88 +3411,338 @@ vm_map_enter_mem_object( vm_object_deallocate(object); - if (result != KERN_SUCCESS) + if (result != KERN_SUCCESS) { return result; + } object = new_object; offset = new_offset; } - result = vm_map_enter(target_map, - &map_addr, map_size, - (vm_map_offset_t)mask, - flags, - object, offset, - copy, - cur_protection, max_protection, inheritance); + /* + * If users want to try to prefault pages, the mapping and prefault + * needs to be atomic. + */ + if (try_prefault) + flags |= VM_FLAGS_KEEP_MAP_LOCKED; + + { + result = vm_map_enter(target_map, + &map_addr, map_size, + (vm_map_offset_t)mask, + flags, + object, offset, + copy, + cur_protection, max_protection, + inheritance); + } if (result != KERN_SUCCESS) vm_object_deallocate(object); - *address = map_addr; - return result; -} - -#if VM_CPM - -#ifdef MACH_ASSERT -extern pmap_paddr_t avail_start, avail_end; -#endif -/* - * Allocate memory in the specified map, with the caveat that - * the memory is physically contiguous. This call may fail - * if the system can't find sufficient contiguous memory. - * This call may cause or lead to heart-stopping amounts of - * paging activity. - * - * Memory obtained from this call should be freed in the - * normal way, viz., via vm_deallocate. - */ -kern_return_t -vm_map_enter_cpm( - vm_map_t map, - vm_map_offset_t *addr, - vm_map_size_t size, - int flags) -{ - vm_object_t cpm_obj; - pmap_t pmap; - vm_page_t m, pages; - kern_return_t kr; - vm_map_offset_t va, start, end, offset; -#if MACH_ASSERT - vm_map_offset_t prev_addr; -#endif /* MACH_ASSERT */ + /* + * Try to prefault, and do not forget to release the vm map lock. + */ + if (result == KERN_SUCCESS && try_prefault) { + mach_vm_address_t va = map_addr; + kern_return_t kr = KERN_SUCCESS; + unsigned int i = 0; - boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0); + for (i = 0; i < page_list_count; ++i) { + if (UPL_VALID_PAGE(page_list, i)) { + /* + * If this function call failed, we should stop + * trying to optimize, other calls are likely + * going to fail too. + * + * We are not gonna report an error for such + * failure though. That's an optimization, not + * something critical. + */ + kr = pmap_enter_options(target_map->pmap, + va, UPL_PHYS_PAGE(page_list, i), + cur_protection, VM_PROT_NONE, + 0, TRUE, PMAP_OPTIONS_NOWAIT, NULL); + if (kr != KERN_SUCCESS) { + OSIncrementAtomic64(&vm_prefault_nb_bailout); + break; + } + OSIncrementAtomic64(&vm_prefault_nb_pages); + } - if (!vm_allocate_cpm_enabled) - return KERN_FAILURE; + /* Next virtual address */ + va += PAGE_SIZE; + } + vm_map_unlock(target_map); + } - if (size == 0) { - *addr = 0; - return KERN_SUCCESS; + if (flags & (VM_FLAGS_RETURN_DATA_ADDR | + VM_FLAGS_RETURN_4K_DATA_ADDR)) { + *address = map_addr + offset_in_mapping; + } else { + *address = map_addr; } - if (anywhere) - *addr = vm_map_min(map); - else - *addr = vm_map_trunc_page(*addr); - size = vm_map_round_page(size); + return result; +} - /* - * LP64todo - cpm_allocate should probably allow - * allocations of >4GB, but not with the current - * algorithm, so just cast down the size for now. - */ - if (size > VM_MAX_ADDRESS) - return KERN_RESOURCE_SHORTAGE; - if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size), - &pages, 0, TRUE)) != KERN_SUCCESS) +kern_return_t +vm_map_enter_mem_object( + vm_map_t target_map, + vm_map_offset_t *address, + vm_map_size_t initial_size, + vm_map_offset_t mask, + int flags, + ipc_port_t port, + vm_object_offset_t offset, + boolean_t copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance) +{ + return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags, + port, offset, copy, cur_protection, max_protection, + inheritance, NULL, 0); +} + +kern_return_t +vm_map_enter_mem_object_prefault( + vm_map_t target_map, + vm_map_offset_t *address, + vm_map_size_t initial_size, + vm_map_offset_t mask, + int flags, + ipc_port_t port, + vm_object_offset_t offset, + vm_prot_t cur_protection, + vm_prot_t max_protection, + upl_page_list_ptr_t page_list, + unsigned int page_list_count) +{ + return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags, + port, offset, FALSE, cur_protection, max_protection, + VM_INHERIT_DEFAULT, page_list, page_list_count); +} + + +kern_return_t +vm_map_enter_mem_object_control( + vm_map_t target_map, + vm_map_offset_t *address, + vm_map_size_t initial_size, + vm_map_offset_t mask, + int flags, + memory_object_control_t control, + vm_object_offset_t offset, + boolean_t copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance) +{ + vm_map_address_t map_addr; + vm_map_size_t map_size; + vm_object_t object; + vm_object_size_t size; + kern_return_t result; + memory_object_t pager; + vm_prot_t pager_prot; + kern_return_t kr; + + /* + * Check arguments for validity + */ + if ((target_map == VM_MAP_NULL) || + (cur_protection & ~VM_PROT_ALL) || + (max_protection & ~VM_PROT_ALL) || + (inheritance > VM_INHERIT_LAST_VALID) || + initial_size == 0) { + return KERN_INVALID_ARGUMENT; + } + + { + map_addr = vm_map_trunc_page(*address, + VM_MAP_PAGE_MASK(target_map)); + map_size = vm_map_round_page(initial_size, + VM_MAP_PAGE_MASK(target_map)); + } + size = vm_object_round_page(initial_size); + + object = memory_object_control_to_vm_object(control); + + if (object == VM_OBJECT_NULL) + return KERN_INVALID_OBJECT; + + if (object == kernel_object) { + printf("Warning: Attempt to map kernel object" + " by a non-private kernel entity\n"); + return KERN_INVALID_OBJECT; + } + + vm_object_lock(object); + object->ref_count++; + vm_object_res_reference(object); + + /* + * For "named" VM objects, let the pager know that the + * memory object is being mapped. Some pagers need to keep + * track of this, to know when they can reclaim the memory + * object, for example. + * VM calls memory_object_map() for each mapping (specifying + * the protection of each mapping) and calls + * memory_object_last_unmap() when all the mappings are gone. + */ + pager_prot = max_protection; + if (copy) { + pager_prot &= ~VM_PROT_WRITE; + } + pager = object->pager; + if (object->named && + pager != MEMORY_OBJECT_NULL && + object->copy_strategy != MEMORY_OBJECT_COPY_NONE) { + assert(object->pager_ready); + vm_object_mapping_wait(object, THREAD_UNINT); + vm_object_mapping_begin(object); + vm_object_unlock(object); + + kr = memory_object_map(pager, pager_prot); + assert(kr == KERN_SUCCESS); + + vm_object_lock(object); + vm_object_mapping_end(object); + } + vm_object_unlock(object); + + /* + * Perform the copy if requested + */ + + if (copy) { + vm_object_t new_object; + vm_object_offset_t new_offset; + + result = vm_object_copy_strategically(object, offset, size, + &new_object, &new_offset, + ©); + + + if (result == KERN_MEMORY_RESTART_COPY) { + boolean_t success; + boolean_t src_needs_copy; + + /* + * XXX + * We currently ignore src_needs_copy. + * This really is the issue of how to make + * MEMORY_OBJECT_COPY_SYMMETRIC safe for + * non-kernel users to use. Solution forthcoming. + * In the meantime, since we don't allow non-kernel + * memory managers to specify symmetric copy, + * we won't run into problems here. + */ + new_object = object; + new_offset = offset; + success = vm_object_copy_quickly(&new_object, + new_offset, size, + &src_needs_copy, + ©); + assert(success); + result = KERN_SUCCESS; + } + /* + * Throw away the reference to the + * original object, as it won't be mapped. + */ + + vm_object_deallocate(object); + + if (result != KERN_SUCCESS) { + return result; + } + + object = new_object; + offset = new_offset; + } + + { + result = vm_map_enter(target_map, + &map_addr, map_size, + (vm_map_offset_t)mask, + flags, + object, offset, + copy, + cur_protection, max_protection, + inheritance); + } + if (result != KERN_SUCCESS) + vm_object_deallocate(object); + *address = map_addr; + + return result; +} + + +#if VM_CPM + +#ifdef MACH_ASSERT +extern pmap_paddr_t avail_start, avail_end; +#endif + +/* + * Allocate memory in the specified map, with the caveat that + * the memory is physically contiguous. This call may fail + * if the system can't find sufficient contiguous memory. + * This call may cause or lead to heart-stopping amounts of + * paging activity. + * + * Memory obtained from this call should be freed in the + * normal way, viz., via vm_deallocate. + */ +kern_return_t +vm_map_enter_cpm( + vm_map_t map, + vm_map_offset_t *addr, + vm_map_size_t size, + int flags) +{ + vm_object_t cpm_obj; + pmap_t pmap; + vm_page_t m, pages; + kern_return_t kr; + vm_map_offset_t va, start, end, offset; +#if MACH_ASSERT + vm_map_offset_t prev_addr = 0; +#endif /* MACH_ASSERT */ + + boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0); + vm_tag_t tag; + + VM_GET_FLAGS_ALIAS(flags, tag); + + if (size == 0) { + *addr = 0; + return KERN_SUCCESS; + } + if (anywhere) + *addr = vm_map_min(map); + else + *addr = vm_map_trunc_page(*addr, + VM_MAP_PAGE_MASK(map)); + size = vm_map_round_page(size, + VM_MAP_PAGE_MASK(map)); + + /* + * LP64todo - cpm_allocate should probably allow + * allocations of >4GB, but not with the current + * algorithm, so just cast down the size for now. + */ + if (size > VM_MAX_ADDRESS) + return KERN_RESOURCE_SHORTAGE; + if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size), + &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) return kr; cpm_obj = vm_object_allocate((vm_object_size_t)size); assert(cpm_obj != VM_OBJECT_NULL); assert(cpm_obj->internal); - assert(cpm_obj->size == (vm_object_size_t)size); + assert(cpm_obj->vo_size == (vm_object_size_t)size); assert(cpm_obj->can_persist == FALSE); assert(cpm_obj->pager_created == FALSE); assert(cpm_obj->pageout == FALSE); @@ -2429,7 +3762,7 @@ vm_map_enter_cpm( assert(!m->wanted); assert(!m->pageout); assert(!m->tabled); - assert(m->wire_count); + assert(VM_PAGE_WIRED(m)); /* * ENCRYPTED SWAP: * "m" is not supposed to be pageable, so it @@ -2518,8 +3851,8 @@ vm_map_enter_cpm( type_of_fault = DBG_ZERO_FILL_FAULT; - vm_fault_enter(m, pmap, va, VM_PROT_ALL, - m->wire_count != 0, FALSE, FALSE, + vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE, + VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, 0, NULL, &type_of_fault); vm_object_unlock(cpm_obj); @@ -2534,8 +3867,8 @@ vm_map_enter_cpm( m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset); vm_object_unlock(cpm_obj); if (m == VM_PAGE_NULL) - panic("vm_allocate_cpm: obj 0x%x off 0x%x no page", - cpm_obj, offset); + panic("vm_allocate_cpm: obj %p off 0x%llx no page", + cpm_obj, (uint64_t)offset); assert(m->tabled); assert(!m->busy); assert(!m->wanted); @@ -2544,15 +3877,15 @@ vm_map_enter_cpm( assert(!m->absent); assert(!m->error); assert(!m->cleaning); + assert(!m->laundry); assert(!m->precious); assert(!m->clustered); if (offset != 0) { if (m->phys_page != prev_addr + 1) { - printf("start 0x%x end 0x%x va 0x%x\n", - start, end, va); - printf("obj 0x%x off 0x%x\n", cpm_obj, offset); - printf("m 0x%x prev_address 0x%x\n", m, - prev_addr); + printf("start 0x%llx end 0x%llx va 0x%llx\n", + (uint64_t)start, (uint64_t)end, (uint64_t)va); + printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset); + printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr); panic("vm_allocate_cpm: pages not contig!"); } } @@ -2585,9 +3918,13 @@ vm_map_enter_cpm( } #endif /* VM_CPM */ +/* Not used without nested pmaps */ +#ifndef NO_NESTED_PMAP /* * Clip and unnest a portion of a nested submap mapping. */ + + static void vm_map_clip_unnest( vm_map_t map, @@ -2595,8 +3932,24 @@ vm_map_clip_unnest( vm_map_offset_t start_unnest, vm_map_offset_t end_unnest) { + vm_map_offset_t old_start_unnest = start_unnest; + vm_map_offset_t old_end_unnest = end_unnest; + assert(entry->is_sub_map); - assert(entry->object.sub_map != NULL); + assert(VME_SUBMAP(entry) != NULL); + assert(entry->use_pmap); + + /* + * Query the platform for the optimal unnest range. + * DRK: There's some duplication of effort here, since + * callers may have adjusted the range to some extent. This + * routine was introduced to support 1GiB subtree nesting + * for x86 platforms, which can also nest on 2MiB boundaries + * depending on size/alignment. + */ + if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) { + log_unnest_badness(map, old_start_unnest, old_end_unnest); + } if (entry->vme_start > start_unnest || entry->vme_end < end_unnest) { @@ -2605,32 +3958,46 @@ vm_map_clip_unnest( (long long)start_unnest, (long long)end_unnest, (long long)entry->vme_start, (long long)entry->vme_end); } + if (start_unnest > entry->vme_start) { _vm_map_clip_start(&map->hdr, entry, start_unnest); - UPDATE_FIRST_FREE(map, map->first_free); + if (map->holelistenabled) { + vm_map_store_update_first_free(map, NULL, FALSE); + } else { + vm_map_store_update_first_free(map, map->first_free, FALSE); + } } if (entry->vme_end > end_unnest) { _vm_map_clip_end(&map->hdr, entry, end_unnest); - UPDATE_FIRST_FREE(map, map->first_free); + if (map->holelistenabled) { + vm_map_store_update_first_free(map, NULL, FALSE); + } else { + vm_map_store_update_first_free(map, map->first_free, FALSE); + } } pmap_unnest(map->pmap, entry->vme_start, entry->vme_end - entry->vme_start); - if ((map->mapped) && (map->ref_count)) { + if ((map->mapped_in_other_pmaps) && (map->ref_count)) { /* clean up parent map/maps */ vm_map_submap_pmap_clean( map, entry->vme_start, entry->vme_end, - entry->object.sub_map, - entry->offset); + VME_SUBMAP(entry), + VME_OFFSET(entry)); } entry->use_pmap = FALSE; + if ((map->pmap != kernel_pmap) && + (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) { + VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP); + } } +#endif /* NO_NESTED_PMAP */ /* * vm_map_clip_start: [ internal use only ] @@ -2639,14 +4006,15 @@ vm_map_clip_unnest( * the specified address; if necessary, * it splits the entry into two. */ -static void +void vm_map_clip_start( vm_map_t map, vm_map_entry_t entry, vm_map_offset_t startaddr) { #ifndef NO_NESTED_PMAP - if (entry->use_pmap && + if (entry->is_sub_map && + entry->use_pmap && startaddr >= entry->vme_start) { vm_map_offset_t start_unnest, end_unnest; @@ -2654,6 +4022,8 @@ vm_map_clip_start( * Make sure "startaddr" is no longer in a nested range * before we clip. Unnest only the minimum range the platform * can handle. + * vm_map_clip_unnest may perform additional adjustments to + * the unnest range. */ start_unnest = startaddr & ~(pmap_nesting_size_min - 1); end_unnest = start_unnest + pmap_nesting_size_min; @@ -2661,15 +4031,19 @@ vm_map_clip_start( } #endif /* NO_NESTED_PMAP */ if (startaddr > entry->vme_start) { - if (entry->object.vm_object && + if (VME_OBJECT(entry) && !entry->is_sub_map && - entry->object.vm_object->phys_contiguous) { + VME_OBJECT(entry)->phys_contiguous) { pmap_remove(map->pmap, (addr64_t)(entry->vme_start), (addr64_t)(entry->vme_end)); } _vm_map_clip_start(&map->hdr, entry, startaddr); - UPDATE_FIRST_FREE(map, map->first_free); + if (map->holelistenabled) { + vm_map_store_update_first_free(map, NULL, FALSE); + } else { + vm_map_store_update_first_free(map, map->first_free, FALSE); + } } } @@ -2688,7 +4062,7 @@ static void _vm_map_clip_start( register struct vm_map_header *map_header, register vm_map_entry_t entry, - register vm_map_offset_t start) + register vm_map_offset_t start) { register vm_map_entry_t new_entry; @@ -2700,19 +4074,26 @@ _vm_map_clip_start( * address. */ - new_entry = _vm_map_entry_create(map_header); + if (entry->map_aligned) { + assert(VM_MAP_PAGE_ALIGNED(start, + VM_MAP_HDR_PAGE_MASK(map_header))); + } + + new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable); vm_map_entry_copy_full(new_entry, entry); new_entry->vme_end = start; - entry->offset += (start - entry->vme_start); + assert(new_entry->vme_start < new_entry->vme_end); + VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start)); + assert(start < entry->vme_end); entry->vme_start = start; - _vm_map_entry_link(map_header, entry->vme_prev, new_entry); + _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry); if (entry->is_sub_map) - vm_map_reference(new_entry->object.sub_map); + vm_map_reference(VME_SUBMAP(new_entry)); else - vm_object_reference(new_entry->object.vm_object); + vm_object_reference(VME_OBJECT(new_entry)); } @@ -2723,7 +4104,7 @@ _vm_map_clip_start( * the specified address; if necessary, * it splits the entry into two. */ -static void +void vm_map_clip_end( vm_map_t map, vm_map_entry_t entry, @@ -2737,13 +4118,15 @@ vm_map_clip_end( endaddr = entry->vme_end; } #ifndef NO_NESTED_PMAP - if (entry->use_pmap) { + if (entry->is_sub_map && entry->use_pmap) { vm_map_offset_t start_unnest, end_unnest; /* * Make sure the range between the start of this entry and * the new "endaddr" is no longer nested before we clip. * Unnest only the minimum range the platform can handle. + * vm_map_clip_unnest may perform additional adjustments to + * the unnest range. */ start_unnest = entry->vme_start; end_unnest = @@ -2753,15 +4136,19 @@ vm_map_clip_end( } #endif /* NO_NESTED_PMAP */ if (endaddr < entry->vme_end) { - if (entry->object.vm_object && + if (VME_OBJECT(entry) && !entry->is_sub_map && - entry->object.vm_object->phys_contiguous) { + VME_OBJECT(entry)->phys_contiguous) { pmap_remove(map->pmap, (addr64_t)(entry->vme_start), (addr64_t)(entry->vme_end)); } _vm_map_clip_end(&map->hdr, entry, endaddr); - UPDATE_FIRST_FREE(map, map->first_free); + if (map->holelistenabled) { + vm_map_store_update_first_free(map, NULL, FALSE); + } else { + vm_map_store_update_first_free(map, map->first_free, FALSE); + } } } @@ -2789,18 +4176,26 @@ _vm_map_clip_end( * AFTER the specified entry */ - new_entry = _vm_map_entry_create(map_header); + if (entry->map_aligned) { + assert(VM_MAP_PAGE_ALIGNED(end, + VM_MAP_HDR_PAGE_MASK(map_header))); + } + + new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable); vm_map_entry_copy_full(new_entry, entry); + assert(entry->vme_start < end); new_entry->vme_start = entry->vme_end = end; - new_entry->offset += (end - entry->vme_start); + VME_OFFSET_SET(new_entry, + VME_OFFSET(new_entry) + (end - entry->vme_start)); + assert(new_entry->vme_start < new_entry->vme_end); - _vm_map_entry_link(map_header, entry, new_entry); + _vm_map_store_entry_link(map_header, entry, new_entry); if (entry->is_sub_map) - vm_map_reference(new_entry->object.sub_map); + vm_map_reference(VME_SUBMAP(new_entry)); else - vm_object_reference(new_entry->object.vm_object); + vm_object_reference(VME_OBJECT(new_entry)); } @@ -2901,15 +4296,15 @@ vm_map_range_check( */ kern_return_t vm_map_submap( - vm_map_t map, + vm_map_t map, vm_map_offset_t start, vm_map_offset_t end, - vm_map_t submap, + vm_map_t submap, vm_map_offset_t offset, #ifdef NO_NESTED_PMAP __unused #endif /* NO_NESTED_PMAP */ - boolean_t use_pmap) + boolean_t use_pmap) { vm_map_entry_t entry; register kern_return_t result = KERN_INVALID_ARGUMENT; @@ -2927,37 +4322,51 @@ vm_map_submap( return KERN_INVALID_ARGUMENT; } - assert(!entry->use_pmap); /* we don't want to unnest anything here */ vm_map_clip_start(map, entry, start); vm_map_clip_end(map, entry, end); if ((entry->vme_start == start) && (entry->vme_end == end) && (!entry->is_sub_map) && - ((object = entry->object.vm_object) == vm_submap_object) && + ((object = VME_OBJECT(entry)) == vm_submap_object) && (object->resident_page_count == 0) && (object->copy == VM_OBJECT_NULL) && (object->shadow == VM_OBJECT_NULL) && (!object->pager_created)) { - entry->offset = (vm_object_offset_t)offset; - entry->object.vm_object = VM_OBJECT_NULL; + VME_OFFSET_SET(entry, (vm_object_offset_t)offset); + VME_OBJECT_SET(entry, VM_OBJECT_NULL); vm_object_deallocate(object); entry->is_sub_map = TRUE; - entry->object.sub_map = submap; + entry->use_pmap = FALSE; + VME_SUBMAP_SET(entry, submap); vm_map_reference(submap); - submap->mapped = TRUE; + if (submap->mapped_in_other_pmaps == FALSE && + vm_map_pmap(submap) != PMAP_NULL && + vm_map_pmap(submap) != vm_map_pmap(map)) { + /* + * This submap is being mapped in a map + * that uses a different pmap. + * Set its "mapped_in_other_pmaps" flag + * to indicate that we now need to + * remove mappings from all pmaps rather + * than just the submap's pmap. + */ + submap->mapped_in_other_pmaps = TRUE; + } #ifndef NO_NESTED_PMAP if (use_pmap) { /* nest if platform code will allow */ if(submap->pmap == NULL) { - submap->pmap = pmap_create((vm_map_size_t) 0, FALSE); + ledger_t ledger = map->pmap->ledger; + submap->pmap = pmap_create(ledger, + (vm_map_size_t) 0, FALSE); if(submap->pmap == PMAP_NULL) { vm_map_unlock(map); return(KERN_NO_SPACE); } } result = pmap_nest(map->pmap, - (entry->object.sub_map)->pmap, + (VME_SUBMAP(entry))->pmap, (addr64_t)start, (addr64_t)start, (uint64_t)(end - start)); @@ -2975,6 +4384,7 @@ vm_map_submap( return(result); } + /* * vm_map_protect: * @@ -2998,7 +4408,7 @@ vm_map_protect( XPR(XPR_VM_MAP, "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d", - (integer_t)map, start, end, new_prot, set_max); + map, start, end, new_prot, set_max); vm_map_lock(map); @@ -3011,14 +4421,24 @@ vm_map_protect( return(KERN_INVALID_ADDRESS); } - /* - * Lookup the entry. If it doesn't start in a valid - * entry, return an error. - */ - if (! vm_map_lookup_entry(map, start, &entry)) { - vm_map_unlock(map); - return(KERN_INVALID_ADDRESS); - } + while(1) { + /* + * Lookup the entry. If it doesn't start in a valid + * entry, return an error. + */ + if (! vm_map_lookup_entry(map, start, &entry)) { + vm_map_unlock(map); + return(KERN_INVALID_ADDRESS); + } + + if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */ + start = SUPERPAGE_ROUND_DOWN(start); + continue; + } + break; + } + if (entry->superpage_size) + end = SUPERPAGE_ROUND_UP(end); /* * Make a first pass to check for protection and address @@ -3052,6 +4472,7 @@ vm_map_protect( } } + prev = current->vme_end; current = current->vme_next; } @@ -3079,7 +4500,10 @@ vm_map_protect( vm_map_clip_end(map, current, end); - assert(!current->use_pmap); /* clipping did unnest if needed */ + if (current->is_sub_map) { + /* clipping did unnest if needed */ + assert(!current->use_pmap); + } old_prot = current->protection; @@ -3089,6 +4513,18 @@ vm_map_protect( /* will include write. Caller must be prepared */ /* for loss of shared memory communication in the */ /* target area after taking this step */ + + if (current->is_sub_map == FALSE && + VME_OBJECT(current) == VM_OBJECT_NULL) { + VME_OBJECT_SET(current, + vm_object_allocate( + (vm_map_size_t) + (current->vme_end - + current->vme_start))); + VME_OFFSET_SET(current, 0); + assert(current->use_pmap); + } + assert(current->wired_count == 0); current->needs_copy = TRUE; current->max_protection |= VM_PROT_WRITE; } @@ -3119,11 +4555,12 @@ vm_map_protect( prot = current->protection & ~VM_PROT_WRITE; - if (override_nx(map, current->alias) && prot) + if (override_nx(map, VME_ALIAS(current)) && prot) prot |= VM_PROT_EXECUTE; + if (current->is_sub_map && current->use_pmap) { - pmap_protect(current->object.sub_map->pmap, + pmap_protect(VME_SUBMAP(current)->pmap, current->vme_start, current->vme_end, prot); @@ -3199,7 +4636,10 @@ vm_map_inherit( while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { vm_map_clip_end(map, entry, end); - assert(!entry->use_pmap); /* clip did unnest if needed */ + if (entry->is_sub_map) { + /* clip did unnest if needed */ + assert(!entry->use_pmap); + } entry->inheritance = new_inheritance; @@ -3224,6 +4664,7 @@ add_wire_counts( vm_map_size_t size; if (user_wire) { + unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count; /* * We're wiring memory at the request of the user. Check if this is the first time the user is wiring @@ -3242,7 +4683,8 @@ add_wire_counts( */ if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) || - size + ptoa_64(vm_page_wire_count) > vm_global_user_wire_limit) + size + ptoa_64(total_wire_count) > vm_global_user_wire_limit || + size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount) return KERN_RESOURCE_SHORTAGE; /* @@ -3338,12 +4780,14 @@ vm_map_wire_nested( register vm_map_t map, register vm_map_offset_t start, register vm_map_offset_t end, - register vm_prot_t access_type, + register vm_prot_t caller_prot, boolean_t user_wire, pmap_t map_pmap, - vm_map_offset_t pmap_addr) + vm_map_offset_t pmap_addr, + ppnum_t *physpage_p) { register vm_map_entry_t entry; + register vm_prot_t access_type; struct vm_map_entry *first_entry, tmp_entry; vm_map_t real_map; register vm_map_offset_t s,e; @@ -3354,6 +4798,23 @@ vm_map_wire_nested( thread_t cur_thread; unsigned int last_timestamp; vm_map_size_t size; + boolean_t wire_and_extract; + + access_type = (caller_prot & VM_PROT_ALL); + + wire_and_extract = FALSE; + if (physpage_p != NULL) { + /* + * The caller wants the physical page number of the + * wired page. We return only one physical page number + * so this works for only one page at a time. + */ + if ((end - start) != PAGE_SIZE) { + return KERN_INVALID_ARGUMENT; + } + wire_and_extract = TRUE; + *physpage_p = 0; + } vm_map_lock(map); if(map_pmap == NULL) @@ -3363,6 +4824,8 @@ vm_map_wire_nested( VM_MAP_RANGE_CHECK(map, start, end); assert(page_aligned(start)); assert(page_aligned(end)); + assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map))); + assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map))); if (start == end) { /* We wired what the caller asked for, zero pages */ vm_map_unlock(map); @@ -3454,9 +4917,6 @@ vm_map_wire_nested( * Worse that can happen is, it may not exist anymore. */ if (!vm_map_lookup_entry(map, s, &first_entry)) { - if (!user_wire) - panic("vm_map_wire: re-lookup failed"); - /* * User: undo everything upto the previous * entry. let vm_map_unwire worry about @@ -3476,12 +4936,23 @@ vm_map_wire_nested( vm_map_offset_t local_end; pmap_t pmap; + if (wire_and_extract) { + /* + * Wiring would result in copy-on-write + * which would not be compatible with + * the sharing we have with the original + * provider of this memory. + */ + rc = KERN_INVALID_ARGUMENT; + goto done; + } + vm_map_clip_start(map, entry, s); vm_map_clip_end(map, entry, end); - sub_start = entry->offset; + sub_start = VME_OFFSET(entry); sub_end = entry->vme_end; - sub_end += entry->offset - entry->vme_start; + sub_end += VME_OFFSET(entry) - entry->vme_start; local_end = entry->vme_end; if(map_pmap == NULL) { @@ -3494,7 +4965,7 @@ vm_map_wire_nested( vm_map_t lookup_map; if(entry->use_pmap) { - pmap = entry->object.sub_map->pmap; + pmap = VME_SUBMAP(entry)->pmap; /* ppc implementation requires that */ /* submaps pmap address ranges line */ /* up with parent map */ @@ -3538,15 +5009,16 @@ vm_map_wire_nested( &real_map)) { vm_map_unlock_read(lookup_map); + assert(map_pmap == NULL); vm_map_unwire(map, start, s, user_wire); return(KERN_FAILURE); } + vm_object_unlock(object); if(real_map != lookup_map) vm_map_unlock(real_map); vm_map_unlock_read(lookup_map); vm_map_lock(map); - vm_object_unlock(object); /* we unlocked, so must re-lookup */ if (!vm_map_lookup_entry(map, @@ -3585,10 +5057,11 @@ vm_map_wire_nested( entry->in_transition = TRUE; vm_map_unlock(map); - rc = vm_map_wire_nested(entry->object.sub_map, + rc = vm_map_wire_nested(VME_SUBMAP(entry), sub_start, sub_end, - access_type, - user_wire, pmap, pmap_addr); + caller_prot, + user_wire, pmap, pmap_addr, + NULL); vm_map_lock(map); /* @@ -3634,6 +5107,24 @@ vm_map_wire_nested( * the appropriate wire reference count. */ if (entry->wired_count) { + + if ((entry->protection & access_type) != access_type) { + /* found a protection problem */ + + /* + * XXX FBDP + * We should always return an error + * in this case but since we didn't + * enforce it before, let's do + * it only for the new "wire_and_extract" + * code path for now... + */ + if (wire_and_extract) { + rc = KERN_PROTECTION_FAILURE; + goto done; + } + } + /* * entry is already wired down, get our reference * after clipping to our range. @@ -3644,6 +5135,56 @@ vm_map_wire_nested( if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) goto done; + if (wire_and_extract) { + vm_object_t object; + vm_object_offset_t offset; + vm_page_t m; + + /* + * We don't have to "wire" the page again + * bit we still have to "extract" its + * physical page number, after some sanity + * checks. + */ + assert((entry->vme_end - entry->vme_start) + == PAGE_SIZE); + assert(!entry->needs_copy); + assert(!entry->is_sub_map); + assert(VME_OBJECT(entry)); + if (((entry->vme_end - entry->vme_start) + != PAGE_SIZE) || + entry->needs_copy || + entry->is_sub_map || + VME_OBJECT(entry) == VM_OBJECT_NULL) { + rc = KERN_INVALID_ARGUMENT; + goto done; + } + + object = VME_OBJECT(entry); + offset = VME_OFFSET(entry); + /* need exclusive lock to update m->dirty */ + if (entry->protection & VM_PROT_WRITE) { + vm_object_lock(object); + } else { + vm_object_lock_shared(object); + } + m = vm_page_lookup(object, offset); + assert(m != VM_PAGE_NULL); + assert(m->wire_count); + if (m != VM_PAGE_NULL && m->wire_count) { + *physpage_p = m->phys_page; + if (entry->protection & VM_PROT_WRITE) { + vm_object_lock_assert_exclusive( + m->object); + m->dirty = TRUE; + } + } else { + /* not already wired !? */ + *physpage_p = 0; + } + vm_object_unlock(object); + } + /* map was not unlocked: no need to relookup */ entry = entry->vme_next; s = entry->vme_start; @@ -3668,12 +5209,29 @@ vm_map_wire_nested( * This is aggressive, but once it's wired we can't move it. */ if (entry->needs_copy) { - vm_object_shadow(&entry->object.vm_object, - &entry->offset, size); + if (wire_and_extract) { + /* + * We're supposed to share with the original + * provider so should not be "needs_copy" + */ + rc = KERN_INVALID_ARGUMENT; + goto done; + } + + VME_OBJECT_SHADOW(entry, size); entry->needs_copy = FALSE; - } else if (entry->object.vm_object == VM_OBJECT_NULL) { - entry->object.vm_object = vm_object_allocate(size); - entry->offset = (vm_object_offset_t)0; + } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) { + if (wire_and_extract) { + /* + * We're supposed to share with the original + * provider so should already have an object. + */ + rc = KERN_INVALID_ARGUMENT; + goto done; + } + VME_OBJECT_SET(entry, vm_object_allocate(size)); + VME_OFFSET_SET(entry, (vm_object_offset_t)0); + assert(entry->use_pmap); } vm_map_clip_start(map, entry, s); @@ -3736,11 +5294,13 @@ vm_map_wire_nested( if(map_pmap) rc = vm_fault_wire(map, - &tmp_entry, map_pmap, pmap_addr); + &tmp_entry, caller_prot, map_pmap, pmap_addr, + physpage_p); else rc = vm_fault_wire(map, - &tmp_entry, map->pmap, - tmp_entry.vme_start); + &tmp_entry, caller_prot, map->pmap, + tmp_entry.vme_start, + physpage_p); if (!user_wire && cur_thread != THREAD_NULL) thread_interrupt_level(interruptible_state); @@ -3798,42 +5358,98 @@ done: if (rc != KERN_SUCCESS) { /* undo what has been wired so far */ - vm_map_unwire(map, start, s, user_wire); + vm_map_unwire_nested(map, start, s, user_wire, + map_pmap, pmap_addr); + if (physpage_p) { + *physpage_p = 0; + } } return rc; } +kern_return_t +vm_map_wire_external( + register vm_map_t map, + register vm_map_offset_t start, + register vm_map_offset_t end, + register vm_prot_t caller_prot, + boolean_t user_wire) +{ + kern_return_t kret; + + caller_prot &= ~VM_PROT_MEMORY_TAG_MASK; + caller_prot |= VM_PROT_MEMORY_TAG_MAKE(vm_tag_bt()); + kret = vm_map_wire_nested(map, start, end, caller_prot, + user_wire, (pmap_t)NULL, 0, NULL); + return kret; +} + kern_return_t vm_map_wire( register vm_map_t map, register vm_map_offset_t start, register vm_map_offset_t end, - register vm_prot_t access_type, + register vm_prot_t caller_prot, boolean_t user_wire) { + kern_return_t kret; + + kret = vm_map_wire_nested(map, start, end, caller_prot, + user_wire, (pmap_t)NULL, 0, NULL); + return kret; +} + +kern_return_t +vm_map_wire_and_extract_external( + vm_map_t map, + vm_map_offset_t start, + vm_prot_t caller_prot, + boolean_t user_wire, + ppnum_t *physpage_p) +{ + kern_return_t kret; + + caller_prot &= ~VM_PROT_MEMORY_TAG_MASK; + caller_prot |= VM_PROT_MEMORY_TAG_MAKE(vm_tag_bt()); + kret = vm_map_wire_nested(map, + start, + start+VM_MAP_PAGE_SIZE(map), + caller_prot, + user_wire, + (pmap_t)NULL, + 0, + physpage_p); + if (kret != KERN_SUCCESS && + physpage_p != NULL) { + *physpage_p = 0; + } + return kret; +} +kern_return_t +vm_map_wire_and_extract( + vm_map_t map, + vm_map_offset_t start, + vm_prot_t caller_prot, + boolean_t user_wire, + ppnum_t *physpage_p) +{ kern_return_t kret; -#ifdef ppc - /* - * the calls to mapping_prealloc and mapping_relpre - * (along with the VM_MAP_RANGE_CHECK to insure a - * resonable range was passed in) are - * currently necessary because - * we haven't enabled kernel pre-emption - * and/or the pmap_enter cannot purge and re-use - * existing mappings - */ - VM_MAP_RANGE_CHECK(map, start, end); - mapping_prealloc(end - start); -#endif - kret = vm_map_wire_nested(map, start, end, access_type, - user_wire, (pmap_t)NULL, 0); -#ifdef ppc - mapping_relpre(); -#endif + kret = vm_map_wire_nested(map, + start, + start+VM_MAP_PAGE_SIZE(map), + caller_prot, + user_wire, + (pmap_t)NULL, + 0, + physpage_p); + if (kret != KERN_SUCCESS && + physpage_p != NULL) { + *physpage_p = 0; + } return kret; } @@ -3873,6 +5489,8 @@ vm_map_unwire_nested( VM_MAP_RANGE_CHECK(map, start, end); assert(page_aligned(start)); assert(page_aligned(end)); + assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map))); + assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map))); if (start == end) { /* We unwired what the caller asked for: zero pages */ @@ -3896,6 +5514,12 @@ vm_map_unwire_nested( return(KERN_INVALID_ADDRESS); } + if (entry->superpage_size) { + /* superpages are always wired */ + vm_map_unlock(map); + return KERN_INVALID_ADDRESS; + } + need_wakeup = FALSE; while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { if (entry->in_transition) { @@ -3950,13 +5574,13 @@ vm_map_unwire_nested( vm_map_clip_start(map, entry, start); vm_map_clip_end(map, entry, end); - sub_start = entry->offset; + sub_start = VME_OFFSET(entry); sub_end = entry->vme_end - entry->vme_start; - sub_end += entry->offset; + sub_end += VME_OFFSET(entry); local_end = entry->vme_end; if(map_pmap == NULL) { if(entry->use_pmap) { - pmap = entry->object.sub_map->pmap; + pmap = VME_SUBMAP(entry)->pmap; pmap_addr = sub_start; } else { pmap = map->pmap; @@ -4002,7 +5626,7 @@ vm_map_unwire_nested( * guarantees existance of the entry. */ vm_map_unlock(map); - vm_map_unwire_nested(entry->object.sub_map, + vm_map_unwire_nested(VME_SUBMAP(entry), sub_start, sub_end, user_wire, pmap, pmap_addr); vm_map_lock(map); @@ -4040,7 +5664,7 @@ vm_map_unwire_nested( continue; } else { vm_map_unlock(map); - vm_map_unwire_nested(entry->object.sub_map, + vm_map_unwire_nested(VME_SUBMAP(entry), sub_start, sub_end, user_wire, map_pmap, pmap_addr); vm_map_lock(map); @@ -4101,6 +5725,10 @@ vm_map_unwire_nested( continue; } + if(entry->zero_wired_pages) { + entry->zero_wired_pages = FALSE; + } + entry->in_transition = TRUE; tmp_entry = *entry; /* see comment in vm_map_wire() */ @@ -4201,18 +5829,23 @@ vm_map_entry_delete( e = entry->vme_end; assert(page_aligned(s)); assert(page_aligned(e)); + if (entry->map_aligned == TRUE) { + assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map))); + assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map))); + } assert(entry->wired_count == 0); assert(entry->user_wired_count == 0); + assert(!entry->permanent); if (entry->is_sub_map) { object = NULL; - submap = entry->object.sub_map; + submap = VME_SUBMAP(entry); } else { submap = NULL; - object = entry->object.vm_object; + object = VME_OBJECT(entry); } - vm_map_entry_unlink(map, entry); + vm_map_store_entry_unlink(map, entry); map->size -= e - s; vm_map_entry_dispose(map, entry); @@ -4244,6 +5877,8 @@ vm_map_submap_pmap_clean( submap_end = offset + (end - start); submap_start = offset; + + vm_map_lock_read(sub_map); if(vm_map_lookup_entry(sub_map, offset, &entry)) { remove_size = (entry->vme_end - entry->vme_start); @@ -4260,19 +5895,22 @@ vm_map_submap_pmap_clean( sub_map, start, start + remove_size, - entry->object.sub_map, - entry->offset); + VME_SUBMAP(entry), + VME_OFFSET(entry)); } else { - if((map->mapped) && (map->ref_count) - && (entry->object.vm_object != NULL)) { - vm_object_pmap_protect( - entry->object.vm_object, - entry->offset, + if((map->mapped_in_other_pmaps) && (map->ref_count) + && (VME_OBJECT(entry) != NULL)) { + vm_object_pmap_protect_options( + VME_OBJECT(entry), + (VME_OFFSET(entry) + + offset - + entry->vme_start), remove_size, PMAP_NULL, entry->vme_start, - VM_PROT_NONE); + VM_PROT_NONE, + PMAP_OPTIONS_REMOVE); } else { pmap_remove(map->pmap, (addr64_t)start, @@ -4294,18 +5932,19 @@ vm_map_submap_pmap_clean( sub_map, (start + entry->vme_start) - offset, ((start + entry->vme_start) - offset) + remove_size, - entry->object.sub_map, - entry->offset); + VME_SUBMAP(entry), + VME_OFFSET(entry)); } else { - if((map->mapped) && (map->ref_count) - && (entry->object.vm_object != NULL)) { - vm_object_pmap_protect( - entry->object.vm_object, - entry->offset, + if((map->mapped_in_other_pmaps) && (map->ref_count) + && (VME_OBJECT(entry) != NULL)) { + vm_object_pmap_protect_options( + VME_OBJECT(entry), + VME_OFFSET(entry), remove_size, PMAP_NULL, entry->vme_start, - VM_PROT_NONE); + VM_PROT_NONE, + PMAP_OPTIONS_REMOVE); } else { pmap_remove(map->pmap, (addr64_t)((start + entry->vme_start) @@ -4315,7 +5954,8 @@ vm_map_submap_pmap_clean( } } entry = entry->vme_next; - } + } + vm_map_unlock_read(sub_map); return; } @@ -4362,28 +6002,79 @@ vm_map_delete( */ flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE; - /* - * Find the start of the region, and clip it - */ - if (vm_map_lookup_entry(map, start, &first_entry)) { - entry = first_entry; - if (start == entry->vme_start) { + while(1) { + /* + * Find the start of the region, and clip it + */ + if (vm_map_lookup_entry(map, start, &first_entry)) { + entry = first_entry; + if (map == kalloc_map && + (entry->vme_start != start || + entry->vme_end != end)) { + panic("vm_map_delete(%p,0x%llx,0x%llx): " + "mismatched entry %p [0x%llx:0x%llx]\n", + map, + (uint64_t)start, + (uint64_t)end, + entry, + (uint64_t)entry->vme_start, + (uint64_t)entry->vme_end); + } + if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */ start = SUPERPAGE_ROUND_DOWN(start); + start = SUPERPAGE_ROUND_DOWN(start); + continue; + } + if (start == entry->vme_start) { + /* + * No need to clip. We don't want to cause + * any unnecessary unnesting in this case... + */ + } else { + if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) && + entry->map_aligned && + !VM_MAP_PAGE_ALIGNED( + start, + VM_MAP_PAGE_MASK(map))) { + /* + * The entry will no longer be + * map-aligned after clipping + * and the caller said it's OK. + */ + entry->map_aligned = FALSE; + } + if (map == kalloc_map) { + panic("vm_map_delete(%p,0x%llx,0x%llx):" + " clipping %p at 0x%llx\n", + map, + (uint64_t)start, + (uint64_t)end, + entry, + (uint64_t)start); + } + vm_map_clip_start(map, entry, start); + } + /* - * No need to clip. We don't want to cause - * any unnecessary unnesting in this case... + * Fix the lookup hint now, rather than each + * time through the loop. */ + SAVE_HINT_MAP_WRITE(map, entry->vme_prev); } else { - vm_map_clip_start(map, entry, start); + if (map->pmap == kernel_pmap && + map->ref_count != 0) { + panic("vm_map_delete(%p,0x%llx,0x%llx): " + "no map entry at 0x%llx\n", + map, + (uint64_t)start, + (uint64_t)end, + (uint64_t)start); + } + entry = first_entry->vme_next; } - - /* - * Fix the lookup hint now, rather than each - * time through the loop. - */ - SAVE_HINT_MAP_WRITE(map, entry->vme_prev); - } else { - entry = first_entry->vme_next; + break; } + if (entry->superpage_size) + end = SUPERPAGE_ROUND_UP(end); need_wakeup = FALSE; /* @@ -4414,6 +6105,25 @@ vm_map_delete( * vm_map_simplify_entry(). We need to * re-clip its start. */ + if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) && + entry->map_aligned && + !VM_MAP_PAGE_ALIGNED(s, + VM_MAP_PAGE_MASK(map))) { + /* + * The entry will no longer be map-aligned + * after clipping and the caller said it's OK. + */ + entry->map_aligned = FALSE; + } + if (map == kalloc_map) { + panic("vm_map_delete(%p,0x%llx,0x%llx): " + "clipping %p at 0x%llx\n", + map, + (uint64_t)start, + (uint64_t)end, + entry, + (uint64_t)s); + } vm_map_clip_start(map, entry, s); } if (entry->vme_end <= end) { @@ -4422,8 +6132,35 @@ vm_map_delete( * to clip and possibly cause an unnecessary unnesting. */ } else { + if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) && + entry->map_aligned && + !VM_MAP_PAGE_ALIGNED(end, + VM_MAP_PAGE_MASK(map))) { + /* + * The entry will no longer be map-aligned + * after clipping and the caller said it's OK. + */ + entry->map_aligned = FALSE; + } + if (map == kalloc_map) { + panic("vm_map_delete(%p,0x%llx,0x%llx): " + "clipping %p at 0x%llx\n", + map, + (uint64_t)start, + (uint64_t)end, + entry, + (uint64_t)end); + } vm_map_clip_end(map, entry, end); } + + if (entry->permanent) { + panic("attempt to remove permanent VM map entry " + "%p [0x%llx:0x%llx]\n", + entry, (uint64_t) s, (uint64_t) end); + } + + if (entry->in_transition) { wait_result_t wait_result; @@ -4451,7 +6188,6 @@ vm_map_delete( * We do not clear the needs_wakeup flag, * since we cannot tell if we were the only one. */ - vm_map_unlock(map); return KERN_ABORTED; } @@ -4460,8 +6196,6 @@ vm_map_delete( * may not exist anymore. Look it up again. */ if (!vm_map_lookup_entry(map, s, &first_entry)) { - assert((map != kernel_map) && - (!entry->is_sub_map)); /* * User: use the next entry */ @@ -4481,15 +6215,19 @@ vm_map_delete( user_wire = entry->user_wired_count > 0; /* - * Remove a kernel wiring if requested or if - * there are user wirings. + * Remove a kernel wiring if requested */ - if ((flags & VM_MAP_REMOVE_KUNWIRE) || - (entry->user_wired_count > 0)) + if (flags & VM_MAP_REMOVE_KUNWIRE) { entry->wired_count--; - - /* remove all user wire references */ - entry->user_wired_count = 0; + } + + /* + * Remove all user wirings for proper accounting + */ + if (entry->user_wired_count > 0) { + while (entry->user_wired_count) + subtract_wire_counts(map, entry, user_wire); + } if (entry->wired_count != 0) { assert(map != kernel_map); @@ -4516,7 +6254,6 @@ vm_map_delete( * cannot tell if we were the * only one. */ - vm_map_unlock(map); return KERN_ABORTED; } @@ -4565,8 +6302,8 @@ vm_map_delete( vm_map_offset_t pmap_addr; - sub_map = tmp_entry.object.sub_map; - sub_start = tmp_entry.offset; + sub_map = VME_SUBMAP(&tmp_entry); + sub_start = VME_OFFSET(&tmp_entry); sub_end = sub_start + (tmp_entry.vme_end - tmp_entry.vme_start); if (tmp_entry.use_pmap) { @@ -4582,8 +6319,17 @@ vm_map_delete( pmap, pmap_addr); } else { + if (VME_OBJECT(&tmp_entry) == kernel_object) { + pmap_protect_options( + map->pmap, + tmp_entry.vme_start, + tmp_entry.vme_end, + VM_PROT_NONE, + PMAP_OPTIONS_REMOVE, + NULL); + } vm_fault_unwire(map, &tmp_entry, - tmp_entry.object.vm_object == kernel_object, + VME_OBJECT(&tmp_entry) == kernel_object, map->pmap, tmp_entry.vme_start); } @@ -4648,49 +6394,119 @@ vm_map_delete( } else if (entry->is_sub_map) { if (entry->use_pmap) { #ifndef NO_NESTED_PMAP - pmap_unnest(map->pmap, - (addr64_t)entry->vme_start, - entry->vme_end - entry->vme_start); + int pmap_flags; + + if (flags & VM_MAP_REMOVE_NO_UNNESTING) { + /* + * This is the final cleanup of the + * address space being terminated. + * No new mappings are expected and + * we don't really need to unnest the + * shared region (and lose the "global" + * pmap mappings, if applicable). + * + * Tell the pmap layer that we're + * "clean" wrt nesting. + */ + pmap_flags = PMAP_UNNEST_CLEAN; + } else { + /* + * We're unmapping part of the nested + * shared region, so we can't keep the + * nested pmap. + */ + pmap_flags = 0; + } + pmap_unnest_options( + map->pmap, + (addr64_t)entry->vme_start, + entry->vme_end - entry->vme_start, + pmap_flags); #endif /* NO_NESTED_PMAP */ - if ((map->mapped) && (map->ref_count)) { + if ((map->mapped_in_other_pmaps) && (map->ref_count)) { /* clean up parent map/maps */ vm_map_submap_pmap_clean( map, entry->vme_start, entry->vme_end, - entry->object.sub_map, - entry->offset); + VME_SUBMAP(entry), + VME_OFFSET(entry)); } } else { vm_map_submap_pmap_clean( map, entry->vme_start, entry->vme_end, - entry->object.sub_map, - entry->offset); + VME_SUBMAP(entry), + VME_OFFSET(entry)); } - } else if (entry->object.vm_object != kernel_object) { - object = entry->object.vm_object; - if((map->mapped) && (map->ref_count)) { - vm_object_pmap_protect( - object, entry->offset, + } else if (VME_OBJECT(entry) != kernel_object && + VME_OBJECT(entry) != compressor_object) { + object = VME_OBJECT(entry); + if ((map->mapped_in_other_pmaps) && (map->ref_count)) { + vm_object_pmap_protect_options( + object, VME_OFFSET(entry), entry->vme_end - entry->vme_start, PMAP_NULL, entry->vme_start, - VM_PROT_NONE); - } else { - pmap_remove(map->pmap, - (addr64_t)entry->vme_start, - (addr64_t)entry->vme_end); + VM_PROT_NONE, + PMAP_OPTIONS_REMOVE); + } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) || + (map->pmap == kernel_pmap)) { + /* Remove translations associated + * with this range unless the entry + * does not have an object, or + * it's the kernel map or a descendant + * since the platform could potentially + * create "backdoor" mappings invisible + * to the VM. It is expected that + * objectless, non-kernel ranges + * do not have such VM invisible + * translations. + */ + pmap_remove_options(map->pmap, + (addr64_t)entry->vme_start, + (addr64_t)entry->vme_end, + PMAP_OPTIONS_REMOVE); } } + if (entry->iokit_acct) { + /* alternate accounting */ + DTRACE_VM4(vm_map_iokit_unmapped_region, + vm_map_t, map, + vm_map_offset_t, entry->vme_start, + vm_map_offset_t, entry->vme_end, + int, VME_ALIAS(entry)); + vm_map_iokit_unmapped_region(map, + (entry->vme_end - + entry->vme_start)); + entry->iokit_acct = FALSE; + } + /* * All pmap mappings for this map entry must have been * cleared by now. */ +#if DEBUG assert(vm_map_pmap_is_empty(map, entry->vme_start, entry->vme_end)); +#endif /* DEBUG */ next = entry->vme_next; + + if (map->pmap == kernel_pmap && + map->ref_count != 0 && + entry->vme_end < end && + (next == vm_map_to_entry(map) || + next->vme_start != entry->vme_end)) { + panic("vm_map_delete(%p,0x%llx,0x%llx): " + "hole after %p at 0x%llx\n", + map, + (uint64_t)start, + (uint64_t)end, + entry, + (uint64_t)entry->vme_end); + } + s = next->vme_start; last_timestamp = map->timestamp; @@ -4703,9 +6519,9 @@ vm_map_delete( * these entries. */ /* unlink the entry from "map" ... */ - vm_map_entry_unlink(map, entry); + vm_map_store_entry_unlink(map, entry); /* ... and add it to the end of the "zap_map" */ - vm_map_entry_link(zap_map, + vm_map_store_entry_link(zap_map, vm_map_last_entry(zap_map), entry); entry_size = entry->vme_end - entry->vme_start; @@ -4778,6 +6594,15 @@ vm_map_remove( vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); + /* + * For the zone_map, the kernel controls the allocation/freeing of memory. + * Any free to the zone_map should be within the bounds of the map and + * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a + * free to the zone_map into a no-op, there is a problem and we should + * panic. + */ + if ((map == zone_map) && (start == end)) + panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start); result = vm_map_delete(map, start, end, flags, VM_MAP_NULL); vm_map_unlock(map); @@ -4796,10 +6621,6 @@ void vm_map_copy_discard( vm_map_copy_t copy) { - TR_DECL("vm_map_copy_discard"); - -/* tr3("enter: copy 0x%x type %d", copy, copy->type);*/ - if (copy == VM_MAP_COPY_NULL) return; @@ -4810,7 +6631,11 @@ vm_map_copy_discard( vm_map_entry_t entry = vm_map_copy_first_entry(copy); vm_map_copy_entry_unlink(copy, entry); - vm_object_deallocate(entry->object.vm_object); + if (entry->is_sub_map) { + vm_map_deallocate(VME_SUBMAP(entry)); + } else { + vm_object_deallocate(VME_OBJECT(entry)); + } vm_map_copy_entry_dispose(copy, entry); } break; @@ -4824,7 +6649,10 @@ vm_map_copy_discard( * allocated by a single call to kalloc(), i.e. the * vm_map_copy_t was not allocated out of the zone. */ - kfree(copy, copy->cpy_kalloc_size); + if (copy->size > msg_ool_size_small || copy->offset) + panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld", + (long long)copy->size, (long long)copy->offset); + kfree(copy, copy->size + cpy_kdata_hdr_sz); return; } zfree(vm_map_copy_zone, copy); @@ -4862,6 +6690,7 @@ vm_map_copy_copy( */ new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); + new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE; *new_copy = *copy; if (copy->type == VM_MAP_COPY_ENTRY_LIST) { @@ -4909,7 +6738,8 @@ vm_map_overwrite_submap_recurse( * splitting entries in strange ways. */ - dst_end = vm_map_round_page(dst_addr + dst_size); + dst_end = vm_map_round_page(dst_addr + dst_size, + VM_MAP_PAGE_MASK(dst_map)); vm_map_lock(dst_map); start_pass_1: @@ -4918,8 +6748,14 @@ start_pass_1: return(KERN_INVALID_ADDRESS); } - vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr)); - assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */ + vm_map_clip_start(dst_map, + tmp_entry, + vm_map_trunc_page(dst_addr, + VM_MAP_PAGE_MASK(dst_map))); + if (tmp_entry->is_sub_map) { + /* clipping did unnest if needed */ + assert(!tmp_entry->use_pmap); + } for (entry = tmp_entry;;) { vm_map_entry_t next; @@ -4941,19 +6777,19 @@ start_pass_1: } encountered_sub_map = TRUE; - sub_start = entry->offset; + sub_start = VME_OFFSET(entry); if(entry->vme_end < dst_end) sub_end = entry->vme_end; else sub_end = dst_end; sub_end -= entry->vme_start; - sub_end += entry->offset; + sub_end += VME_OFFSET(entry); local_end = entry->vme_end; vm_map_unlock(dst_map); result = vm_map_overwrite_submap_recurse( - entry->object.sub_map, + VME_SUBMAP(entry), sub_start, sub_end - sub_start); @@ -5011,9 +6847,9 @@ start_pass_1: /* * Check for permanent objects in the destination. */ - if ((entry->object.vm_object != VM_OBJECT_NULL) && - ((!entry->object.vm_object->internal) || - (entry->object.vm_object->true_share))) { + if ((VME_OBJECT(entry) != VM_OBJECT_NULL) && + ((!VME_OBJECT(entry)->internal) || + (VME_OBJECT(entry)->true_share))) { if(encountered_sub_map) { vm_map_unlock(dst_map); return(KERN_FAILURE); @@ -5084,7 +6920,8 @@ vm_map_copy_overwrite_nested( vm_map_address_t dst_addr, vm_map_copy_t copy, boolean_t interruptible, - pmap_t pmap) + pmap_t pmap, + boolean_t discard_on_success) { vm_map_offset_t dst_end; vm_map_entry_t tmp_entry; @@ -5113,7 +6950,7 @@ vm_map_copy_overwrite_nested( if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) { return(vm_map_copyout_kernel_buffer( dst_map, &dst_addr, - copy, TRUE)); + copy, TRUE, discard_on_success)); } /* @@ -5124,7 +6961,8 @@ vm_map_copy_overwrite_nested( assert(copy->type == VM_MAP_COPY_ENTRY_LIST); if (copy->size == 0) { - vm_map_copy_discard(copy); + if (discard_on_success) + vm_map_copy_discard(copy); return(KERN_SUCCESS); } @@ -5135,12 +6973,16 @@ vm_map_copy_overwrite_nested( * splitting entries in strange ways. */ - if (!page_aligned(copy->size) || - !page_aligned (copy->offset) || - !page_aligned (dst_addr)) + if (!VM_MAP_PAGE_ALIGNED(copy->size, + VM_MAP_PAGE_MASK(dst_map)) || + !VM_MAP_PAGE_ALIGNED(copy->offset, + VM_MAP_PAGE_MASK(dst_map)) || + !VM_MAP_PAGE_ALIGNED(dst_addr, + VM_MAP_PAGE_MASK(dst_map))) { aligned = FALSE; - dst_end = vm_map_round_page(dst_addr + copy->size); + dst_end = vm_map_round_page(dst_addr + copy->size, + VM_MAP_PAGE_MASK(dst_map)); } else { dst_end = dst_addr + copy->size; } @@ -5161,7 +7003,10 @@ start_pass_1: vm_map_unlock(dst_map); return(KERN_INVALID_ADDRESS); } - vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr)); + vm_map_clip_start(dst_map, + tmp_entry, + vm_map_trunc_page(dst_addr, + VM_MAP_PAGE_MASK(dst_map))); for (entry = tmp_entry;;) { vm_map_entry_t next = entry->vme_next; @@ -5188,18 +7033,18 @@ start_pass_1: /* there is no need for the follow- */ /* ing check. */ encountered_sub_map = TRUE; - sub_start = entry->offset; + sub_start = VME_OFFSET(entry); if(entry->vme_end < dst_end) sub_end = entry->vme_end; else sub_end = dst_end; sub_end -= entry->vme_start; - sub_end += entry->offset; + sub_end += VME_OFFSET(entry); vm_map_unlock(dst_map); kr = vm_map_overwrite_submap_recurse( - entry->object.sub_map, + VME_SUBMAP(entry), sub_start, sub_end - sub_start); if(kr != KERN_SUCCESS) @@ -5256,9 +7101,9 @@ start_pass_1: /* * Check for permanent objects in the destination. */ - if ((entry->object.vm_object != VM_OBJECT_NULL) && - ((!entry->object.vm_object->internal) || - (entry->object.vm_object->true_share))) { + if ((VME_OBJECT(entry) != VM_OBJECT_NULL) && + ((!VME_OBJECT(entry)->internal) || + (VME_OBJECT(entry)->true_share))) { contains_permanent_objects = TRUE; } @@ -5308,7 +7153,7 @@ start_overwrite: vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL; int nentries; int remaining_entries = 0; - int new_offset = 0; + vm_map_offset_t new_offset = 0; for (entry = tmp_entry; copy_size == 0;) { vm_map_entry_t next; @@ -5363,11 +7208,11 @@ start_overwrite: assert(!entry->use_pmap); entry->is_sub_map = FALSE; vm_map_deallocate( - entry->object.sub_map); - entry->object.sub_map = NULL; + VME_SUBMAP(entry)); + VME_SUBMAP_SET(entry, NULL); entry->is_shared = FALSE; entry->needs_copy = FALSE; - entry->offset = 0; + VME_OFFSET_SET(entry, 0); /* * XXX FBDP * We should propagate the protections @@ -5393,14 +7238,14 @@ start_overwrite: entry->vme_start - base_addr; break; } - sub_start = entry->offset; + sub_start = VME_OFFSET(entry); if(entry->vme_end < dst_end) sub_end = entry->vme_end; else sub_end = dst_end; sub_end -= entry->vme_start; - sub_end += entry->offset; + sub_end += VME_OFFSET(entry); local_end = entry->vme_end; vm_map_unlock(dst_map); copy_size = sub_end - sub_start; @@ -5453,24 +7298,27 @@ start_overwrite: if((entry->use_pmap) && (pmap == NULL)) { kr = vm_map_copy_overwrite_nested( - entry->object.sub_map, + VME_SUBMAP(entry), sub_start, copy, interruptible, - entry->object.sub_map->pmap); + VME_SUBMAP(entry)->pmap, + TRUE); } else if (pmap != NULL) { kr = vm_map_copy_overwrite_nested( - entry->object.sub_map, + VME_SUBMAP(entry), sub_start, copy, - interruptible, pmap); + interruptible, pmap, + TRUE); } else { kr = vm_map_copy_overwrite_nested( - entry->object.sub_map, + VME_SUBMAP(entry), sub_start, copy, interruptible, - dst_map->pmap); + dst_map->pmap, + TRUE); } if(kr != KERN_SUCCESS) { if(next_copy != NULL) { @@ -5491,12 +7339,19 @@ start_overwrite: /* destroyed after successful copy_overwrite */ copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); + copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE; vm_map_copy_first_entry(copy) = vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy); copy->type = VM_MAP_COPY_ENTRY_LIST; copy->offset = new_offset; + /* + * XXX FBDP + * this does not seem to deal with + * the VM map store (R&B tree) + */ + total_size -= copy_size; copy_size = 0; /* put back remainder of copy in container */ @@ -5614,8 +7469,13 @@ start_overwrite: * bits of the region in this case ! */ /* ALWAYS UNLOCKS THE dst_map MAP */ - if ((kr = vm_map_copy_overwrite_unaligned( dst_map, - tmp_entry, copy, base_addr)) != KERN_SUCCESS) { + kr = vm_map_copy_overwrite_unaligned( + dst_map, + tmp_entry, + copy, + base_addr, + discard_on_success); + if (kr != KERN_SUCCESS) { if(next_copy != NULL) { copy->cpy_hdr.nentries += remaining_entries; @@ -5655,7 +7515,10 @@ start_overwrite: break; } } - vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr)); + vm_map_clip_start(dst_map, + tmp_entry, + vm_map_trunc_page(base_addr, + VM_MAP_PAGE_MASK(dst_map))); entry = tmp_entry; } /* while */ @@ -5663,7 +7526,8 @@ start_overwrite: /* * Throw away the vm_map_copy object */ - vm_map_copy_discard(copy); + if (discard_on_success) + vm_map_copy_discard(copy); return(KERN_SUCCESS); }/* vm_map_copy_overwrite */ @@ -5675,8 +7539,241 @@ vm_map_copy_overwrite( vm_map_copy_t copy, boolean_t interruptible) { - return vm_map_copy_overwrite_nested( - dst_map, dst_addr, copy, interruptible, (pmap_t) NULL); + vm_map_size_t head_size, tail_size; + vm_map_copy_t head_copy, tail_copy; + vm_map_offset_t head_addr, tail_addr; + vm_map_entry_t entry; + kern_return_t kr; + + head_size = 0; + tail_size = 0; + head_copy = NULL; + tail_copy = NULL; + head_addr = 0; + tail_addr = 0; + + if (interruptible || + copy == VM_MAP_COPY_NULL || + copy->type != VM_MAP_COPY_ENTRY_LIST) { + /* + * We can't split the "copy" map if we're interruptible + * or if we don't have a "copy" map... + */ + blunt_copy: + return vm_map_copy_overwrite_nested(dst_map, + dst_addr, + copy, + interruptible, + (pmap_t) NULL, + TRUE); + } + + if (copy->size < 3 * PAGE_SIZE) { + /* + * Too small to bother with optimizing... + */ + goto blunt_copy; + } + + if ((dst_addr & VM_MAP_PAGE_MASK(dst_map)) != + (copy->offset & VM_MAP_PAGE_MASK(dst_map))) { + /* + * Incompatible mis-alignment of source and destination... + */ + goto blunt_copy; + } + + /* + * Proper alignment or identical mis-alignment at the beginning. + * Let's try and do a small unaligned copy first (if needed) + * and then an aligned copy for the rest. + */ + if (!page_aligned(dst_addr)) { + head_addr = dst_addr; + head_size = (VM_MAP_PAGE_SIZE(dst_map) - + (copy->offset & VM_MAP_PAGE_MASK(dst_map))); + } + if (!page_aligned(copy->offset + copy->size)) { + /* + * Mis-alignment at the end. + * Do an aligned copy up to the last page and + * then an unaligned copy for the remaining bytes. + */ + tail_size = ((copy->offset + copy->size) & + VM_MAP_PAGE_MASK(dst_map)); + tail_addr = dst_addr + copy->size - tail_size; + } + + if (head_size + tail_size == copy->size) { + /* + * It's all unaligned, no optimization possible... + */ + goto blunt_copy; + } + + /* + * Can't optimize if there are any submaps in the + * destination due to the way we free the "copy" map + * progressively in vm_map_copy_overwrite_nested() + * in that case. + */ + vm_map_lock_read(dst_map); + if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) { + vm_map_unlock_read(dst_map); + goto blunt_copy; + } + for (; + (entry != vm_map_copy_to_entry(copy) && + entry->vme_start < dst_addr + copy->size); + entry = entry->vme_next) { + if (entry->is_sub_map) { + vm_map_unlock_read(dst_map); + goto blunt_copy; + } + } + vm_map_unlock_read(dst_map); + + if (head_size) { + /* + * Unaligned copy of the first "head_size" bytes, to reach + * a page boundary. + */ + + /* + * Extract "head_copy" out of "copy". + */ + head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); + head_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE; + vm_map_copy_first_entry(head_copy) = + vm_map_copy_to_entry(head_copy); + vm_map_copy_last_entry(head_copy) = + vm_map_copy_to_entry(head_copy); + head_copy->type = VM_MAP_COPY_ENTRY_LIST; + head_copy->cpy_hdr.nentries = 0; + head_copy->cpy_hdr.entries_pageable = + copy->cpy_hdr.entries_pageable; + vm_map_store_init(&head_copy->cpy_hdr); + + head_copy->offset = copy->offset; + head_copy->size = head_size; + + copy->offset += head_size; + copy->size -= head_size; + + entry = vm_map_copy_first_entry(copy); + vm_map_copy_clip_end(copy, entry, copy->offset); + vm_map_copy_entry_unlink(copy, entry); + vm_map_copy_entry_link(head_copy, + vm_map_copy_to_entry(head_copy), + entry); + + /* + * Do the unaligned copy. + */ + kr = vm_map_copy_overwrite_nested(dst_map, + head_addr, + head_copy, + interruptible, + (pmap_t) NULL, + FALSE); + if (kr != KERN_SUCCESS) + goto done; + } + + if (tail_size) { + /* + * Extract "tail_copy" out of "copy". + */ + tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); + tail_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE; + vm_map_copy_first_entry(tail_copy) = + vm_map_copy_to_entry(tail_copy); + vm_map_copy_last_entry(tail_copy) = + vm_map_copy_to_entry(tail_copy); + tail_copy->type = VM_MAP_COPY_ENTRY_LIST; + tail_copy->cpy_hdr.nentries = 0; + tail_copy->cpy_hdr.entries_pageable = + copy->cpy_hdr.entries_pageable; + vm_map_store_init(&tail_copy->cpy_hdr); + + tail_copy->offset = copy->offset + copy->size - tail_size; + tail_copy->size = tail_size; + + copy->size -= tail_size; + + entry = vm_map_copy_last_entry(copy); + vm_map_copy_clip_start(copy, entry, tail_copy->offset); + entry = vm_map_copy_last_entry(copy); + vm_map_copy_entry_unlink(copy, entry); + vm_map_copy_entry_link(tail_copy, + vm_map_copy_last_entry(tail_copy), + entry); + } + + /* + * Copy most (or possibly all) of the data. + */ + kr = vm_map_copy_overwrite_nested(dst_map, + dst_addr + head_size, + copy, + interruptible, + (pmap_t) NULL, + FALSE); + if (kr != KERN_SUCCESS) { + goto done; + } + + if (tail_size) { + kr = vm_map_copy_overwrite_nested(dst_map, + tail_addr, + tail_copy, + interruptible, + (pmap_t) NULL, + FALSE); + } + +done: + assert(copy->type == VM_MAP_COPY_ENTRY_LIST); + if (kr == KERN_SUCCESS) { + /* + * Discard all the copy maps. + */ + if (head_copy) { + vm_map_copy_discard(head_copy); + head_copy = NULL; + } + vm_map_copy_discard(copy); + if (tail_copy) { + vm_map_copy_discard(tail_copy); + tail_copy = NULL; + } + } else { + /* + * Re-assemble the original copy map. + */ + if (head_copy) { + entry = vm_map_copy_first_entry(head_copy); + vm_map_copy_entry_unlink(head_copy, entry); + vm_map_copy_entry_link(copy, + vm_map_copy_to_entry(copy), + entry); + copy->offset -= head_size; + copy->size += head_size; + vm_map_copy_discard(head_copy); + head_copy = NULL; + } + if (tail_copy) { + entry = vm_map_copy_last_entry(tail_copy); + vm_map_copy_entry_unlink(tail_copy, entry); + vm_map_copy_entry_link(copy, + vm_map_copy_last_entry(copy), + entry); + copy->size += tail_size; + vm_map_copy_discard(tail_copy); + tail_copy = NULL; + } + } + return kr; } @@ -5708,9 +7805,11 @@ vm_map_copy_overwrite_unaligned( vm_map_t dst_map, vm_map_entry_t entry, vm_map_copy_t copy, - vm_map_offset_t start) + vm_map_offset_t start, + boolean_t discard_on_success) { - vm_map_entry_t copy_entry = vm_map_copy_first_entry(copy); + vm_map_entry_t copy_entry; + vm_map_entry_t copy_entry_next; vm_map_version_t version; vm_object_t dst_object; vm_object_offset_t dst_offset; @@ -5723,6 +7822,9 @@ vm_map_copy_overwrite_unaligned( amount_left; kern_return_t kr = KERN_SUCCESS; + + copy_entry = vm_map_copy_first_entry(copy); + vm_map_lock_write_to_read(dst_map); src_offset = copy->offset - vm_object_trunc_page(copy->offset); @@ -5776,14 +7878,13 @@ vm_map_copy_overwrite_unaligned( vm_map_lock_read(dst_map); goto RetryLookup; } - vm_object_shadow(&entry->object.vm_object, - &entry->offset, - (vm_map_size_t)(entry->vme_end - - entry->vme_start)); + VME_OBJECT_SHADOW(entry, + (vm_map_size_t)(entry->vme_end + - entry->vme_start)); entry->needs_copy = FALSE; vm_map_lock_write_to_read(dst_map); } - dst_object = entry->object.vm_object; + dst_object = VME_OBJECT(entry); /* * unlike with the virtual (aligned) copy we're going * to fault on it therefore we need a target object. @@ -5795,8 +7896,9 @@ vm_map_copy_overwrite_unaligned( } dst_object = vm_object_allocate((vm_map_size_t) entry->vme_end - entry->vme_start); - entry->object.vm_object = dst_object; - entry->offset = 0; + VME_OBJECT(entry) = dst_object; + VME_OFFSET_SET(entry, 0); + assert(entry->use_pmap); vm_map_lock_write_to_read(dst_map); } /* @@ -5805,15 +7907,15 @@ vm_map_copy_overwrite_unaligned( */ vm_object_reference(dst_object); version.main_timestamp = dst_map->timestamp; - entry_offset = entry->offset; + entry_offset = VME_OFFSET(entry); entry_end = entry->vme_end; vm_map_unlock_read(dst_map); /* * Copy as much as possible in one pass */ kr = vm_fault_copy( - copy_entry->object.vm_object, - copy_entry->offset + src_offset, + VME_OBJECT(copy_entry), + VME_OFFSET(copy_entry) + src_offset, ©_size, dst_object, entry_offset + dst_offset, @@ -5840,17 +7942,25 @@ vm_map_copy_overwrite_unaligned( /* * all done with this copy entry, dispose. */ - vm_map_copy_entry_unlink(copy, copy_entry); - vm_object_deallocate(copy_entry->object.vm_object); - vm_map_copy_entry_dispose(copy, copy_entry); + copy_entry_next = copy_entry->vme_next; + + if (discard_on_success) { + vm_map_copy_entry_unlink(copy, copy_entry); + assert(!copy_entry->is_sub_map); + vm_object_deallocate(VME_OBJECT(copy_entry)); + vm_map_copy_entry_dispose(copy, copy_entry); + } - if ((copy_entry = vm_map_copy_first_entry(copy)) - == vm_map_copy_to_entry(copy) && amount_left) { + if (copy_entry_next == vm_map_copy_to_entry(copy) && + amount_left) { /* * not finished copying but run out of source */ return KERN_INVALID_ADDRESS; } + + copy_entry = copy_entry_next; + src_offset = 0; } @@ -5912,6 +8022,10 @@ vm_map_copy_overwrite_unaligned( * to the above pass and make sure that no wiring is involved. */ +int vm_map_copy_overwrite_aligned_src_not_internal = 0; +int vm_map_copy_overwrite_aligned_src_not_symmetric = 0; +int vm_map_copy_overwrite_aligned_src_large = 0; + static kern_return_t vm_map_copy_overwrite_aligned( vm_map_t dst_map, @@ -5932,7 +8046,10 @@ vm_map_copy_overwrite_aligned( copy_size = (copy_entry->vme_end - copy_entry->vme_start); entry = tmp_entry; - assert(!entry->use_pmap); /* unnested when clipped earlier */ + if (entry->is_sub_map) { + /* unnested when clipped earlier */ + assert(!entry->use_pmap); + } if (entry == vm_map_to_entry(dst_map)) { vm_map_unlock(dst_map); return KERN_INVALID_ADDRESS; @@ -5966,6 +8083,12 @@ vm_map_copy_overwrite_aligned( */ if (copy_size < size) { + if (entry->map_aligned && + !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size, + VM_MAP_PAGE_MASK(dst_map))) { + /* no longer map-aligned */ + entry->map_aligned = FALSE; + } vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size); size = copy_size; } @@ -5990,21 +8113,21 @@ vm_map_copy_overwrite_aligned( * installing the source data. */ - object = entry->object.vm_object; + object = VME_OBJECT(entry); if ((!entry->is_shared && ((object == VM_OBJECT_NULL) || (object->internal && !object->true_share))) || entry->needs_copy) { - vm_object_t old_object = entry->object.vm_object; - vm_object_offset_t old_offset = entry->offset; + vm_object_t old_object = VME_OBJECT(entry); + vm_object_offset_t old_offset = VME_OFFSET(entry); vm_object_offset_t offset; /* * Ensure that the source and destination aren't * identical */ - if (old_object == copy_entry->object.vm_object && - old_offset == copy_entry->offset) { + if (old_object == VME_OBJECT(copy_entry) && + old_offset == VME_OFFSET(copy_entry)) { vm_map_copy_entry_unlink(copy, copy_entry); vm_map_copy_entry_dispose(copy, copy_entry); @@ -6016,6 +8139,86 @@ vm_map_copy_overwrite_aligned( continue; } +#define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */ +#define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */ + if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL && + VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE && + copy_size <= __TRADEOFF1_COPY_SIZE) { + /* + * Virtual vs. Physical copy tradeoff #1. + * + * Copying only a few pages out of a large + * object: do a physical copy instead of + * a virtual copy, to avoid possibly keeping + * the entire large object alive because of + * those few copy-on-write pages. + */ + vm_map_copy_overwrite_aligned_src_large++; + goto slow_copy; + } + + if ((dst_map->pmap != kernel_pmap) && + (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) && + (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_LARGE_REUSED)) { + vm_object_t new_object, new_shadow; + + /* + * We're about to map something over a mapping + * established by malloc()... + */ + new_object = VME_OBJECT(copy_entry); + if (new_object != VM_OBJECT_NULL) { + vm_object_lock_shared(new_object); + } + while (new_object != VM_OBJECT_NULL && + !new_object->true_share && + new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC && + new_object->internal) { + new_shadow = new_object->shadow; + if (new_shadow == VM_OBJECT_NULL) { + break; + } + vm_object_lock_shared(new_shadow); + vm_object_unlock(new_object); + new_object = new_shadow; + } + if (new_object != VM_OBJECT_NULL) { + if (!new_object->internal) { + /* + * The new mapping is backed + * by an external object. We + * don't want malloc'ed memory + * to be replaced with such a + * non-anonymous mapping, so + * let's go off the optimized + * path... + */ + vm_map_copy_overwrite_aligned_src_not_internal++; + vm_object_unlock(new_object); + goto slow_copy; + } + if (new_object->true_share || + new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) { + /* + * Same if there's a "true_share" + * object in the shadow chain, or + * an object with a non-default + * (SYMMETRIC) copy strategy. + */ + vm_map_copy_overwrite_aligned_src_not_symmetric++; + vm_object_unlock(new_object); + goto slow_copy; + } + vm_object_unlock(new_object); + } + /* + * The new mapping is still backed by + * anonymous (internal) memory, so it's + * OK to substitute it for the original + * malloc() mapping. + */ + } + if (old_object != VM_OBJECT_NULL) { if(entry->is_sub_map) { if(entry->use_pmap) { @@ -6024,59 +8227,53 @@ vm_map_copy_overwrite_aligned( (addr64_t)entry->vme_start, entry->vme_end - entry->vme_start); #endif /* NO_NESTED_PMAP */ - if(dst_map->mapped) { + if(dst_map->mapped_in_other_pmaps) { /* clean up parent */ /* map/maps */ vm_map_submap_pmap_clean( dst_map, entry->vme_start, entry->vme_end, - entry->object.sub_map, - entry->offset); + VME_SUBMAP(entry), + VME_OFFSET(entry)); } } else { vm_map_submap_pmap_clean( dst_map, entry->vme_start, entry->vme_end, - entry->object.sub_map, - entry->offset); + VME_SUBMAP(entry), + VME_OFFSET(entry)); } - vm_map_deallocate( - entry->object.sub_map); + vm_map_deallocate(VME_SUBMAP(entry)); } else { - if(dst_map->mapped) { - vm_object_pmap_protect( - entry->object.vm_object, - entry->offset, + if(dst_map->mapped_in_other_pmaps) { + vm_object_pmap_protect_options( + VME_OBJECT(entry), + VME_OFFSET(entry), entry->vme_end - entry->vme_start, PMAP_NULL, entry->vme_start, - VM_PROT_NONE); + VM_PROT_NONE, + PMAP_OPTIONS_REMOVE); } else { - pmap_remove(dst_map->pmap, - (addr64_t)(entry->vme_start), - (addr64_t)(entry->vme_end)); + pmap_remove_options( + dst_map->pmap, + (addr64_t)(entry->vme_start), + (addr64_t)(entry->vme_end), + PMAP_OPTIONS_REMOVE); } vm_object_deallocate(old_object); } } entry->is_sub_map = FALSE; - entry->object = copy_entry->object; - object = entry->object.vm_object; + VME_OBJECT_SET(entry, VME_OBJECT(copy_entry)); + object = VME_OBJECT(entry); entry->needs_copy = copy_entry->needs_copy; entry->wired_count = 0; entry->user_wired_count = 0; - offset = entry->offset = copy_entry->offset; - /* - * XXX FBDP - * We should propagate the submap entry's protections - * here instead of forcing VM_PROT_ALL. - * Or better yet, we should inherit the protection - * of the copy_entry. - */ - entry->protection = VM_PROT_ALL; - entry->max_protection = VM_PROT_ALL; + offset = VME_OFFSET(copy_entry); + VME_OFFSET_SET(entry, offset); vm_map_copy_entry_unlink(copy, copy_entry); vm_map_copy_entry_dispose(copy, copy_entry); @@ -6102,16 +8299,49 @@ vm_map_copy_overwrite_aligned( tmp_entry = tmp_entry->vme_next; } else { vm_map_version_t version; - vm_object_t dst_object = entry->object.vm_object; - vm_object_offset_t dst_offset = entry->offset; + vm_object_t dst_object; + vm_object_offset_t dst_offset; kern_return_t r; + slow_copy: + if (entry->needs_copy) { + VME_OBJECT_SHADOW(entry, + (entry->vme_end - + entry->vme_start)); + entry->needs_copy = FALSE; + } + + dst_object = VME_OBJECT(entry); + dst_offset = VME_OFFSET(entry); + /* * Take an object reference, and record * the map version information so that the * map can be safely unlocked. */ + if (dst_object == VM_OBJECT_NULL) { + /* + * We would usually have just taken the + * optimized path above if the destination + * object has not been allocated yet. But we + * now disable that optimization if the copy + * entry's object is not backed by anonymous + * memory to avoid replacing malloc'ed + * (i.e. re-usable) anonymous memory with a + * not-so-anonymous mapping. + * So we have to handle this case here and + * allocate a new VM object for this map entry. + */ + dst_object = vm_object_allocate( + entry->vme_end - entry->vme_start); + dst_offset = 0; + VME_OBJECT_SET(entry, dst_object); + VME_OFFSET_SET(entry, dst_offset); + assert(entry->use_pmap); + + } + vm_object_reference(dst_object); /* account for unlock bumping up timestamp */ @@ -6125,8 +8355,8 @@ vm_map_copy_overwrite_aligned( copy_size = size; r = vm_fault_copy( - copy_entry->object.vm_object, - copy_entry->offset, + VME_OBJECT(copy_entry), + VME_OFFSET(copy_entry), ©_size, dst_object, dst_offset, @@ -6155,7 +8385,7 @@ vm_map_copy_overwrite_aligned( vm_map_copy_clip_end(copy, copy_entry, copy_entry->vme_start + copy_size); vm_map_copy_entry_unlink(copy, copy_entry); - vm_object_deallocate(copy_entry->object.vm_object); + vm_object_deallocate(VME_OBJECT(copy_entry)); vm_map_copy_entry_dispose(copy, copy_entry); } @@ -6168,9 +8398,17 @@ vm_map_copy_overwrite_aligned( start += copy_size; vm_map_lock(dst_map); - if (version.main_timestamp == dst_map->timestamp) { + if (version.main_timestamp == dst_map->timestamp && + copy_size != 0) { /* We can safely use saved tmp_entry value */ + if (tmp_entry->map_aligned && + !VM_MAP_PAGE_ALIGNED( + start, + VM_MAP_PAGE_MASK(dst_map))) { + /* no longer map-aligned */ + tmp_entry->map_aligned = FALSE; + } vm_map_clip_end(dst_map, tmp_entry, start); tmp_entry = tmp_entry->vme_next; } else { @@ -6180,6 +8418,13 @@ vm_map_copy_overwrite_aligned( vm_map_unlock(dst_map); return(KERN_INVALID_ADDRESS); } + if (tmp_entry->map_aligned && + !VM_MAP_PAGE_ALIGNED( + start, + VM_MAP_PAGE_MASK(dst_map))) { + /* no longer map-aligned */ + tmp_entry->map_aligned = FALSE; + } vm_map_clip_start(dst_map, tmp_entry, start); } } @@ -6208,30 +8453,35 @@ vm_map_copyin_kernel_buffer( { kern_return_t kr; vm_map_copy_t copy; - vm_map_size_t kalloc_size = sizeof(struct vm_map_copy) + len; + vm_size_t kalloc_size; - copy = (vm_map_copy_t) kalloc(kalloc_size); - if (copy == VM_MAP_COPY_NULL) { + if (len > msg_ool_size_small) + return KERN_INVALID_ARGUMENT; + + kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len); + + copy = (vm_map_copy_t)kalloc(kalloc_size); + if (copy == VM_MAP_COPY_NULL) return KERN_RESOURCE_SHORTAGE; - } copy->type = VM_MAP_COPY_KERNEL_BUFFER; copy->size = len; copy->offset = 0; - copy->cpy_kdata = (void *) (copy + 1); - copy->cpy_kalloc_size = kalloc_size; - kr = copyinmap(src_map, src_addr, copy->cpy_kdata, len); + kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len); if (kr != KERN_SUCCESS) { kfree(copy, kalloc_size); return kr; } if (src_destroy) { - (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr), - vm_map_round_page(src_addr + len), - VM_MAP_REMOVE_INTERRUPTIBLE | - VM_MAP_REMOVE_WAIT_FOR_KWIRE | - (src_map == kernel_map) ? - VM_MAP_REMOVE_KUNWIRE : 0); + (void) vm_map_remove( + src_map, + vm_map_trunc_page(src_addr, + VM_MAP_PAGE_MASK(src_map)), + vm_map_round_page(src_addr + len, + VM_MAP_PAGE_MASK(src_map)), + (VM_MAP_REMOVE_INTERRUPTIBLE | + VM_MAP_REMOVE_WAIT_FOR_KWIRE | + (src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : 0)); } *copy_result = copy; return KERN_SUCCESS; @@ -6254,11 +8504,19 @@ vm_map_copyout_kernel_buffer( vm_map_t map, vm_map_address_t *addr, /* IN/OUT */ vm_map_copy_t copy, - boolean_t overwrite) + boolean_t overwrite, + boolean_t consume_on_success) { kern_return_t kr = KERN_SUCCESS; thread_t thread = current_thread(); + /* + * check for corrupted vm_map_copy structure + */ + if (copy->size > msg_ool_size_small || copy->offset) + panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld", + (long long)copy->size, (long long)copy->offset); + if (!overwrite) { /* @@ -6267,7 +8525,8 @@ vm_map_copyout_kernel_buffer( *addr = 0; kr = vm_map_enter(map, addr, - vm_map_round_page(copy->size), + vm_map_round_page(copy->size, + VM_MAP_PAGE_MASK(map)), (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE, VM_OBJECT_NULL, @@ -6289,7 +8548,8 @@ vm_map_copyout_kernel_buffer( * If the target map is the current map, just do * the copy. */ - if (copyout(copy->cpy_kdata, *addr, copy->size)) { + assert((vm_size_t) copy->size == copy->size); + if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) { kr = KERN_INVALID_ADDRESS; } } @@ -6304,7 +8564,8 @@ vm_map_copyout_kernel_buffer( vm_map_reference(map); oldmap = vm_map_switch(map); - if (copyout(copy->cpy_kdata, *addr, copy->size)) { + assert((vm_size_t) copy->size == copy->size); + if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) { vm_map_copyout_kernel_buffer_failures++; kr = KERN_INVALID_ADDRESS; } @@ -6319,16 +8580,22 @@ vm_map_copyout_kernel_buffer( /* * Deallocate the space we allocated in the target map. */ - (void) vm_map_remove(map, - vm_map_trunc_page(*addr), - vm_map_round_page(*addr + - vm_map_round_page(copy->size)), - VM_MAP_NO_FLAGS); + (void) vm_map_remove( + map, + vm_map_trunc_page(*addr, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page((*addr + + vm_map_round_page(copy->size, + VM_MAP_PAGE_MASK(map))), + VM_MAP_PAGE_MASK(map)), + VM_MAP_NO_FLAGS); *addr = 0; } } else { /* copy was successful, dicard the copy structure */ - kfree(copy, copy->cpy_kalloc_size); + if (consume_on_success) { + kfree(copy, copy->size + cpy_kdata_hdr_sz); + } } return kr; @@ -6347,21 +8614,87 @@ vm_map_copyout_kernel_buffer( */ #define vm_map_copy_insert(map, where, copy) \ MACRO_BEGIN \ - vm_map_t VMCI_map; \ - vm_map_entry_t VMCI_where; \ - vm_map_copy_t VMCI_copy; \ - VMCI_map = (map); \ - VMCI_where = (where); \ - VMCI_copy = (copy); \ - ((VMCI_where->vme_next)->vme_prev = vm_map_copy_last_entry(VMCI_copy))\ - ->vme_next = (VMCI_where->vme_next); \ - ((VMCI_where)->vme_next = vm_map_copy_first_entry(VMCI_copy)) \ - ->vme_prev = VMCI_where; \ - VMCI_map->hdr.nentries += VMCI_copy->cpy_hdr.nentries; \ - UPDATE_FIRST_FREE(VMCI_map, VMCI_map->first_free); \ - zfree(vm_map_copy_zone, VMCI_copy); \ + vm_map_store_copy_insert(map, where, copy); \ + zfree(vm_map_copy_zone, copy); \ MACRO_END +void +vm_map_copy_remap( + vm_map_t map, + vm_map_entry_t where, + vm_map_copy_t copy, + vm_map_offset_t adjustment, + vm_prot_t cur_prot, + vm_prot_t max_prot, + vm_inherit_t inheritance) +{ + vm_map_entry_t copy_entry, new_entry; + + for (copy_entry = vm_map_copy_first_entry(copy); + copy_entry != vm_map_copy_to_entry(copy); + copy_entry = copy_entry->vme_next) { + /* get a new VM map entry for the map */ + new_entry = vm_map_entry_create(map, + !map->hdr.entries_pageable); + /* copy the "copy entry" to the new entry */ + vm_map_entry_copy(new_entry, copy_entry); + /* adjust "start" and "end" */ + new_entry->vme_start += adjustment; + new_entry->vme_end += adjustment; + /* clear some attributes */ + new_entry->inheritance = inheritance; + new_entry->protection = cur_prot; + new_entry->max_protection = max_prot; + new_entry->behavior = VM_BEHAVIOR_DEFAULT; + /* take an extra reference on the entry's "object" */ + if (new_entry->is_sub_map) { + assert(!new_entry->use_pmap); /* not nested */ + vm_map_lock(VME_SUBMAP(new_entry)); + vm_map_reference(VME_SUBMAP(new_entry)); + vm_map_unlock(VME_SUBMAP(new_entry)); + } else { + vm_object_reference(VME_OBJECT(new_entry)); + } + /* insert the new entry in the map */ + vm_map_store_entry_link(map, where, new_entry); + /* continue inserting the "copy entries" after the new entry */ + where = new_entry; + } +} + + +boolean_t +vm_map_copy_validate_size( + vm_map_t dst_map, + vm_map_copy_t copy, + vm_map_size_t size) +{ + if (copy == VM_MAP_COPY_NULL) + return FALSE; + switch (copy->type) { + case VM_MAP_COPY_OBJECT: + case VM_MAP_COPY_KERNEL_BUFFER: + if (size == copy->size) + return TRUE; + break; + case VM_MAP_COPY_ENTRY_LIST: + /* + * potential page-size rounding prevents us from exactly + * validating this flavor of vm_map_copy, but we can at least + * assert that it's within a range. + */ + if (copy->size >= size && + copy->size <= vm_map_round_page(size, + VM_MAP_PAGE_MASK(dst_map))) + return TRUE; + break; + default: + break; + } + return FALSE; +} + + /* * Routine: vm_map_copyout * @@ -6372,19 +8705,37 @@ MACRO_END * If successful, consumes the copy object. * Otherwise, the caller is responsible for it. */ + kern_return_t vm_map_copyout( vm_map_t dst_map, vm_map_address_t *dst_addr, /* OUT */ vm_map_copy_t copy) +{ + return vm_map_copyout_internal(dst_map, dst_addr, copy, + TRUE, /* consume_on_success */ + VM_PROT_DEFAULT, + VM_PROT_ALL, + VM_INHERIT_DEFAULT); +} + +kern_return_t +vm_map_copyout_internal( + vm_map_t dst_map, + vm_map_address_t *dst_addr, /* OUT */ + vm_map_copy_t copy, + boolean_t consume_on_success, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance) { vm_map_size_t size; vm_map_size_t adjustment; vm_map_offset_t start; vm_object_offset_t vm_copy_start; vm_map_entry_t last; - register vm_map_entry_t entry; + vm_map_entry_t hole_entry; /* * Check for null copy object. @@ -6406,8 +8757,10 @@ vm_map_copyout( vm_object_offset_t offset; offset = vm_object_trunc_page(copy->offset); - size = vm_map_round_page(copy->size + - (vm_map_size_t)(copy->offset - offset)); + size = vm_map_round_page((copy->size + + (vm_map_size_t)(copy->offset - + offset)), + VM_MAP_PAGE_MASK(dst_map)); *dst_addr = 0; kr = vm_map_enter(dst_map, dst_addr, size, (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE, @@ -6418,7 +8771,8 @@ vm_map_copyout( return(kr); /* Account for non-pagealigned copy object */ *dst_addr += (vm_map_offset_t)(copy->offset - offset); - zfree(vm_map_copy_zone, copy); + if (consume_on_success) + zfree(vm_map_copy_zone, copy); return(KERN_SUCCESS); } @@ -6428,24 +8782,51 @@ vm_map_copyout( */ if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) { - return(vm_map_copyout_kernel_buffer(dst_map, dst_addr, - copy, FALSE)); + return vm_map_copyout_kernel_buffer(dst_map, dst_addr, + copy, FALSE, + consume_on_success); } + /* * Find space for the data */ - vm_copy_start = vm_object_trunc_page(copy->offset); - size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size) + vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset, + VM_MAP_COPY_PAGE_MASK(copy)); + size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size, + VM_MAP_COPY_PAGE_MASK(copy)) - vm_copy_start; + StartAgain: ; vm_map_lock(dst_map); - assert(first_free_is_valid(dst_map)); - start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ? - vm_map_min(dst_map) : last->vme_end; + if( dst_map->disable_vmentry_reuse == TRUE) { + VM_MAP_HIGHEST_ENTRY(dst_map, entry, start); + last = entry; + } else { + if (dst_map->holelistenabled) { + hole_entry = (vm_map_entry_t)dst_map->holes_list; + + if (hole_entry == NULL) { + /* + * No more space in the map? + */ + vm_map_unlock(dst_map); + return(KERN_NO_SPACE); + } + + last = hole_entry; + start = last->vme_start; + } else { + assert(first_free_is_valid(dst_map)); + start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ? + vm_map_min(dst_map) : last->vme_end; + } + start = vm_map_round_page(start, + VM_MAP_PAGE_MASK(dst_map)); + } while (TRUE) { vm_map_entry_t next = last->vme_next; @@ -6465,12 +8846,64 @@ StartAgain: ; return(KERN_NO_SPACE); } - if ((next == vm_map_to_entry(dst_map)) || - (next->vme_start >= end)) - break; + if (dst_map->holelistenabled) { + if (last->vme_end >= end) + break; + } else { + /* + * If there are no more entries, we must win. + * + * OR + * + * If there is another entry, it must be + * after the end of the potential new region. + */ + + if (next == vm_map_to_entry(dst_map)) + break; + + if (next->vme_start >= end) + break; + } last = next; - start = last->vme_end; + + if (dst_map->holelistenabled) { + if (last == (vm_map_entry_t) dst_map->holes_list) { + /* + * Wrapped around + */ + vm_map_unlock(dst_map); + return(KERN_NO_SPACE); + } + start = last->vme_start; + } else { + start = last->vme_end; + } + start = vm_map_round_page(start, + VM_MAP_PAGE_MASK(dst_map)); + } + + if (dst_map->holelistenabled) { + if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) { + panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start); + } + } + + + adjustment = start - vm_copy_start; + if (! consume_on_success) { + /* + * We're not allowed to consume "copy", so we'll have to + * copy its map entries into the destination map below. + * No need to re-allocate map entries from the correct + * (pageable or not) zone, since we'll get new map entries + * during the transfer. + * We'll also adjust the map entries's "start" and "end" + * during the transfer, to keep "copy"'s entries consistent + * with its "offset". + */ + goto after_adjustments; } /* @@ -6490,32 +8923,32 @@ StartAgain: ; /* * Find the zone that the copies were allocated from */ - old_zone = (copy->cpy_hdr.entries_pageable) - ? vm_map_entry_zone - : vm_map_kentry_zone; + entry = vm_map_copy_first_entry(copy); /* * Reinitialize the copy so that vm_map_copy_entry_link * will work. */ - copy->cpy_hdr.nentries = 0; + vm_map_store_copy_reset(copy, entry); copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable; - vm_map_copy_first_entry(copy) = - vm_map_copy_last_entry(copy) = - vm_map_copy_to_entry(copy); /* * Copy each entry. */ while (entry != vm_map_copy_to_entry(copy)) { - new = vm_map_copy_entry_create(copy); + new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable); vm_map_entry_copy_full(new, entry); - new->use_pmap = FALSE; /* clr address space specifics */ + assert(!new->iokit_acct); + if (new->is_sub_map) { + /* clr address space specifics */ + new->use_pmap = FALSE; + } vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), new); next = entry->vme_next; + old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone; zfree(old_zone, entry); entry = next; } @@ -6526,13 +8959,29 @@ StartAgain: ; * reset the region attributes. */ - adjustment = start - vm_copy_start; for (entry = vm_map_copy_first_entry(copy); entry != vm_map_copy_to_entry(copy); entry = entry->vme_next) { + if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) { + /* + * We're injecting this copy entry into a map that + * has the standard page alignment, so clear + * "map_aligned" (which might have been inherited + * from the original map entry). + */ + entry->map_aligned = FALSE; + } + entry->vme_start += adjustment; entry->vme_end += adjustment; + if (entry->map_aligned) { + assert(VM_MAP_PAGE_ALIGNED(entry->vme_start, + VM_MAP_PAGE_MASK(dst_map))); + assert(VM_MAP_PAGE_ALIGNED(entry->vme_end, + VM_MAP_PAGE_MASK(dst_map))); + } + entry->inheritance = VM_INHERIT_DEFAULT; entry->protection = VM_PROT_DEFAULT; entry->max_protection = VM_PROT_ALL; @@ -6549,8 +8998,8 @@ StartAgain: ; vm_prot_t prot; int type_of_fault; - object = entry->object.vm_object; - offset = entry->offset; + object = VME_OBJECT(entry); + offset = VME_OFFSET(entry); va = entry->vme_start; pmap_pageable(dst_map->pmap, @@ -6581,7 +9030,7 @@ StartAgain: ; vm_object_lock(object); m = vm_page_lookup(object, offset); - if (m == VM_PAGE_NULL || m->wire_count == 0 || + if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) || m->absent) panic("vm_map_copyout: wiring %p", m); @@ -6597,14 +9046,21 @@ StartAgain: ; prot = entry->protection; - if (override_nx(dst_map, entry->alias) && prot) + if (override_nx(dst_map, VME_ALIAS(entry)) && + prot) prot |= VM_PROT_EXECUTE; type_of_fault = DBG_CACHE_HIT_FAULT; - vm_fault_enter(m, dst_map->pmap, va, prot, - m->wire_count != 0, FALSE, FALSE, - &type_of_fault); + vm_fault_enter(m, dst_map->pmap, va, prot, prot, + VM_PAGE_WIRED(m), FALSE, FALSE, + FALSE, VME_ALIAS(entry), + ((entry->iokit_acct || + (!entry->is_sub_map && + !entry->use_pmap)) + ? PMAP_OPTIONS_ALT_ACCT + : 0), + NULL, &type_of_fault); vm_object_unlock(object); @@ -6614,6 +9070,8 @@ StartAgain: ; } } +after_adjustments: + /* * Correct the page alignment for the result */ @@ -6624,7 +9082,11 @@ StartAgain: ; * Update the hints and the map size */ - SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy)); + if (consume_on_success) { + SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy)); + } else { + SAVE_HINT_MAP_WRITE(dst_map, last); + } dst_map->size += size; @@ -6632,7 +9094,13 @@ StartAgain: ; * Link in the copy */ - vm_map_copy_insert(dst_map, last, copy); + if (consume_on_success) { + vm_map_copy_insert(dst_map, last, copy); + } else { + vm_map_copy_remap(dst_map, last, copy, adjustment, + cur_protection, max_protection, + inheritance); + } vm_map_unlock(dst_map); @@ -6701,13 +9169,35 @@ vm_map_copyin_common( __unused boolean_t src_volatile, vm_map_copy_t *copy_result, /* OUT */ boolean_t use_maxprot) +{ + int flags; + + flags = 0; + if (src_destroy) { + flags |= VM_MAP_COPYIN_SRC_DESTROY; + } + if (use_maxprot) { + flags |= VM_MAP_COPYIN_USE_MAXPROT; + } + return vm_map_copyin_internal(src_map, + src_addr, + len, + flags, + copy_result); +} +kern_return_t +vm_map_copyin_internal( + vm_map_t src_map, + vm_map_address_t src_addr, + vm_map_size_t len, + int flags, + vm_map_copy_t *copy_result) /* OUT */ { vm_map_entry_t tmp_entry; /* Result of last map lookup -- * in multi-level lookup, this * entry contains the actual * vm_object/offset. */ - register vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */ vm_map_offset_t src_start; /* Start of current entry -- @@ -6720,9 +9210,18 @@ vm_map_copyin_common( boolean_t map_share=FALSE; submap_map_t *parent_maps = NULL; - register vm_map_copy_t copy; /* Resulting copy */ - vm_map_address_t copy_addr; + vm_map_address_t copy_addr; + vm_map_size_t copy_size; + boolean_t src_destroy; + boolean_t use_maxprot; + + if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) { + return KERN_INVALID_ARGUMENT; + } + + src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE; + use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE; /* * Check for copies of zero bytes. @@ -6746,17 +9245,21 @@ vm_map_copyin_common( * setting up VM (and taking C-O-W faults) dominates the copy costs * for small regions. */ - if ((len < msg_ool_size_small) && !use_maxprot) + if ((len < msg_ool_size_small) && + !use_maxprot && + !(flags & VM_MAP_COPYIN_ENTRY_LIST)) return vm_map_copyin_kernel_buffer(src_map, src_addr, len, src_destroy, copy_result); /* * Compute (page aligned) start and end of region */ - src_start = vm_map_trunc_page(src_addr); - src_end = vm_map_round_page(src_end); + src_start = vm_map_trunc_page(src_addr, + VM_MAP_PAGE_MASK(src_map)); + src_end = vm_map_round_page(src_end, + VM_MAP_PAGE_MASK(src_map)); - XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", (natural_t)src_map, src_addr, len, src_destroy, 0); + XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0); /* * Allocate a header element for the list. @@ -6766,16 +9269,31 @@ vm_map_copyin_common( */ copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); + copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE; vm_map_copy_first_entry(copy) = vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy); copy->type = VM_MAP_COPY_ENTRY_LIST; copy->cpy_hdr.nentries = 0; copy->cpy_hdr.entries_pageable = TRUE; +#if 00 + copy->cpy_hdr.page_shift = src_map->hdr.page_shift; +#else + /* + * The copy entries can be broken down for a variety of reasons, + * so we can't guarantee that they will remain map-aligned... + * Will need to adjust the first copy_entry's "vme_start" and + * the last copy_entry's "vme_end" to be rounded to PAGE_MASK + * rather than the original map's alignment. + */ + copy->cpy_hdr.page_shift = PAGE_SHIFT; +#endif + + vm_map_store_init( &(copy->cpy_hdr) ); copy->offset = src_addr; copy->size = len; - new_entry = vm_map_copy_entry_create(copy); + new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable); #define RETURN(x) \ MACRO_BEGIN \ @@ -6804,11 +9322,28 @@ vm_map_copyin_common( vm_map_lock(src_map); - if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) + /* + * Lookup the original "src_addr" rather than the truncated + * "src_start", in case "src_start" falls in a non-map-aligned + * map entry *before* the map entry that contains "src_addr"... + */ + if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry)) RETURN(KERN_INVALID_ADDRESS); if(!tmp_entry->is_sub_map) { + /* + * ... but clip to the map-rounded "src_start" rather than + * "src_addr" to preserve map-alignment. We'll adjust the + * first copy entry at the end, if needed. + */ vm_map_clip_start(src_map, tmp_entry, src_start); } + if (src_start < tmp_entry->vme_start) { + /* + * Move "src_start" up to the start of the + * first map entry to copy. + */ + src_start = tmp_entry->vme_start; + } /* set for later submap fix-up */ copy_addr = src_start; @@ -6858,9 +9393,9 @@ vm_map_copyin_common( ptr->base_len = submap_len; src_start -= tmp_entry->vme_start; - src_start += tmp_entry->offset; + src_start += VME_OFFSET(tmp_entry); src_end = src_start + submap_len; - src_map = tmp_entry->object.sub_map; + src_map = VME_SUBMAP(tmp_entry); vm_map_lock(src_map); /* keep an outstanding reference for all maps in */ /* the parents tree except the base map */ @@ -6876,8 +9411,8 @@ vm_map_copyin_common( } /* we are now in the lowest level submap... */ - if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) && - (tmp_entry->object.vm_object->phys_contiguous)) { + if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) && + (VME_OBJECT(tmp_entry)->phys_contiguous)) { /* This is not, supported for now.In future */ /* we will need to detect the phys_contig */ /* condition and then upgrade copy_slowly */ @@ -6897,7 +9432,7 @@ vm_map_copyin_common( version.main_timestamp = src_map->timestamp; vm_map_unlock(src_map); - new_entry = vm_map_copy_entry_create(copy); + new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable); vm_map_lock(src_map); if ((version.main_timestamp + 1) != src_map->timestamp) { @@ -6926,12 +9461,15 @@ vm_map_copyin_common( vm_map_clip_end(src_map, src_entry, src_end); src_size = src_entry->vme_end - src_start; - src_object = src_entry->object.vm_object; - src_offset = src_entry->offset; + src_object = VME_OBJECT(src_entry); + src_offset = VME_OFFSET(src_entry); was_wired = (src_entry->wired_count != 0); vm_map_entry_copy(new_entry, src_entry); - new_entry->use_pmap = FALSE; /* clr address space specifics */ + if (new_entry->is_sub_map) { + /* clr address space specifics */ + new_entry->use_pmap = FALSE; + } /* * Attempt non-blocking copy-on-write optimizations. @@ -6962,12 +9500,12 @@ vm_map_copyin_common( RestartCopy: XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n", - src_object, new_entry, new_entry->object.vm_object, + src_object, new_entry, VME_OBJECT(new_entry), was_wired, 0); if ((src_object == VM_OBJECT_NULL || (!was_wired && !map_share && !tmp_entry->is_shared)) && vm_object_copy_quickly( - &new_entry->object.vm_object, + &VME_OBJECT(new_entry), src_offset, src_size, &src_needs_copy, @@ -6984,7 +9522,8 @@ vm_map_copyin_common( prot = src_entry->protection & ~VM_PROT_WRITE; - if (override_nx(src_map, src_entry->alias) && prot) + if (override_nx(src_map, VME_ALIAS(src_entry)) + && prot) prot |= VM_PROT_EXECUTE; vm_object_pmap_protect( @@ -6997,6 +9536,7 @@ vm_map_copyin_common( src_entry->vme_start, prot); + assert(tmp_entry->wired_count == 0); tmp_entry->needs_copy = TRUE; } @@ -7037,8 +9577,8 @@ vm_map_copyin_common( src_offset, src_size, THREAD_UNINT, - &new_entry->object.vm_object); - new_entry->offset = 0; + &VME_OBJECT(new_entry)); + VME_OFFSET_SET(new_entry, 0); new_entry->needs_copy = FALSE; } @@ -7055,17 +9595,26 @@ vm_map_copyin_common( if (new_object == VM_OBJECT_NULL) goto CopySlowly; - new_entry->object.vm_object = new_object; + VME_OBJECT_SET(new_entry, new_object); + assert(new_entry->wired_count == 0); new_entry->needs_copy = TRUE; + assert(!new_entry->iokit_acct); + assert(new_object->purgable == VM_PURGABLE_DENY); + new_entry->use_pmap = TRUE; result = KERN_SUCCESS; } else { + vm_object_offset_t new_offset; + new_offset = VME_OFFSET(new_entry); result = vm_object_copy_strategically(src_object, src_offset, src_size, - &new_entry->object.vm_object, - &new_entry->offset, + &VME_OBJECT(new_entry), + &new_offset, &new_entry_needs_copy); + if (new_offset != VME_OFFSET(new_entry)) { + VME_OFFSET_SET(new_entry, new_offset); + } new_entry->needs_copy = new_entry_needs_copy; } @@ -7105,6 +9654,12 @@ vm_map_copyin_common( */ if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) { + if (result != KERN_MEMORY_RESTART_COPY) { + vm_object_deallocate(VME_OBJECT(new_entry)); + VME_OBJECT_SET(new_entry, VM_OBJECT_NULL); + assert(!new_entry->iokit_acct); + new_entry->use_pmap = TRUE; + } RETURN(KERN_INVALID_ADDRESS); } @@ -7116,11 +9671,15 @@ vm_map_copyin_common( ((src_entry->max_protection & VM_PROT_READ) == 0)) goto VerificationFailed; - if (src_entry->vme_end < new_entry->vme_end) - src_size = (new_entry->vme_end = src_entry->vme_end) - src_start; + if (src_entry->vme_end < new_entry->vme_end) { + assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end, + VM_MAP_COPY_PAGE_MASK(copy))); + new_entry->vme_end = src_entry->vme_end; + src_size = new_entry->vme_end - src_start; + } - if ((src_entry->object.vm_object != src_object) || - (src_entry->offset != src_offset) ) { + if ((VME_OBJECT(src_entry) != src_object) || + (VME_OFFSET(src_entry) != src_offset) ) { /* * Verification failed. @@ -7130,7 +9689,7 @@ vm_map_copyin_common( VerificationFailed: ; - vm_object_deallocate(new_entry->object.vm_object); + vm_object_deallocate(VME_OBJECT(new_entry)); tmp_entry = src_entry; continue; } @@ -7165,36 +9724,60 @@ vm_map_copyin_common( src_start = new_entry->vme_end; new_entry = VM_MAP_ENTRY_NULL; while ((src_start >= src_end) && (src_end != 0)) { - if (src_map != base_map) { - submap_map_t *ptr; - - ptr = parent_maps; - assert(ptr != NULL); - parent_maps = parent_maps->next; - - /* fix up the damage we did in that submap */ - vm_map_simplify_range(src_map, - src_base, - src_end); - - vm_map_unlock(src_map); - vm_map_deallocate(src_map); - vm_map_lock(ptr->parent_map); - src_map = ptr->parent_map; - src_base = ptr->base_start; - src_start = ptr->base_start + ptr->base_len; - src_end = ptr->base_end; - if ((src_end > src_start) && - !vm_map_lookup_entry( - src_map, src_start, &tmp_entry)) - RETURN(KERN_INVALID_ADDRESS); - kfree(ptr, sizeof(submap_map_t)); - if(parent_maps == NULL) - map_share = FALSE; - src_entry = tmp_entry->vme_prev; - } else + submap_map_t *ptr; + + if (src_map == base_map) { + /* back to the top */ break; + } + + ptr = parent_maps; + assert(ptr != NULL); + parent_maps = parent_maps->next; + + /* fix up the damage we did in that submap */ + vm_map_simplify_range(src_map, + src_base, + src_end); + + vm_map_unlock(src_map); + vm_map_deallocate(src_map); + vm_map_lock(ptr->parent_map); + src_map = ptr->parent_map; + src_base = ptr->base_start; + src_start = ptr->base_start + ptr->base_len; + src_end = ptr->base_end; + if (!vm_map_lookup_entry(src_map, + src_start, + &tmp_entry) && + (src_end > src_start)) { + RETURN(KERN_INVALID_ADDRESS); + } + kfree(ptr, sizeof(submap_map_t)); + if (parent_maps == NULL) + map_share = FALSE; + src_entry = tmp_entry->vme_prev; + } + + if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) && + (src_start >= src_addr + len) && + (src_addr + len != 0)) { + /* + * Stop copying now, even though we haven't reached + * "src_end". We'll adjust the end of the last copy + * entry at the end, if needed. + * + * If src_map's aligment is different from the + * system's page-alignment, there could be + * extra non-map-aligned map entries between + * the original (non-rounded) "src_addr + len" + * and the rounded "src_end". + * We do not want to copy those map entries since + * they're not part of the copied range. + */ + break; } + if ((src_start >= src_end) && (src_end != 0)) break; @@ -7203,9 +9786,10 @@ vm_map_copyin_common( */ tmp_entry = src_entry->vme_next; - if ((tmp_entry->vme_start != src_start) || - (tmp_entry == vm_map_to_entry(src_map))) + if ((tmp_entry->vme_start != src_start) || + (tmp_entry == vm_map_to_entry(src_map))) { RETURN(KERN_INVALID_ADDRESS); + } } /* @@ -7213,54 +9797,268 @@ vm_map_copyin_common( * copy was successful. */ if (src_destroy) { - (void) vm_map_delete(src_map, - vm_map_trunc_page(src_addr), - src_end, - (src_map == kernel_map) ? - VM_MAP_REMOVE_KUNWIRE : - VM_MAP_NO_FLAGS, - VM_MAP_NULL); + (void) vm_map_delete( + src_map, + vm_map_trunc_page(src_addr, + VM_MAP_PAGE_MASK(src_map)), + src_end, + ((src_map == kernel_map) ? + VM_MAP_REMOVE_KUNWIRE : + VM_MAP_NO_FLAGS), + VM_MAP_NULL); } else { /* fix up the damage we did in the base map */ - vm_map_simplify_range(src_map, - vm_map_trunc_page(src_addr), - vm_map_round_page(src_end)); + vm_map_simplify_range( + src_map, + vm_map_trunc_page(src_addr, + VM_MAP_PAGE_MASK(src_map)), + vm_map_round_page(src_end, + VM_MAP_PAGE_MASK(src_map))); } vm_map_unlock(src_map); + if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) { + vm_map_offset_t original_start, original_offset, original_end; + + assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK); + + /* adjust alignment of first copy_entry's "vme_start" */ + tmp_entry = vm_map_copy_first_entry(copy); + if (tmp_entry != vm_map_copy_to_entry(copy)) { + vm_map_offset_t adjustment; + + original_start = tmp_entry->vme_start; + original_offset = VME_OFFSET(tmp_entry); + + /* map-align the start of the first copy entry... */ + adjustment = (tmp_entry->vme_start - + vm_map_trunc_page( + tmp_entry->vme_start, + VM_MAP_PAGE_MASK(src_map))); + tmp_entry->vme_start -= adjustment; + VME_OFFSET_SET(tmp_entry, + VME_OFFSET(tmp_entry) - adjustment); + copy_addr -= adjustment; + assert(tmp_entry->vme_start < tmp_entry->vme_end); + /* ... adjust for mis-aligned start of copy range */ + adjustment = + (vm_map_trunc_page(copy->offset, + PAGE_MASK) - + vm_map_trunc_page(copy->offset, + VM_MAP_PAGE_MASK(src_map))); + if (adjustment) { + assert(page_aligned(adjustment)); + assert(adjustment < VM_MAP_PAGE_SIZE(src_map)); + tmp_entry->vme_start += adjustment; + VME_OFFSET_SET(tmp_entry, + (VME_OFFSET(tmp_entry) + + adjustment)); + copy_addr += adjustment; + assert(tmp_entry->vme_start < tmp_entry->vme_end); + } + + /* + * Assert that the adjustments haven't exposed + * more than was originally copied... + */ + assert(tmp_entry->vme_start >= original_start); + assert(VME_OFFSET(tmp_entry) >= original_offset); + /* + * ... and that it did not adjust outside of a + * a single 16K page. + */ + assert(vm_map_trunc_page(tmp_entry->vme_start, + VM_MAP_PAGE_MASK(src_map)) == + vm_map_trunc_page(original_start, + VM_MAP_PAGE_MASK(src_map))); + } + + /* adjust alignment of last copy_entry's "vme_end" */ + tmp_entry = vm_map_copy_last_entry(copy); + if (tmp_entry != vm_map_copy_to_entry(copy)) { + vm_map_offset_t adjustment; + + original_end = tmp_entry->vme_end; + + /* map-align the end of the last copy entry... */ + tmp_entry->vme_end = + vm_map_round_page(tmp_entry->vme_end, + VM_MAP_PAGE_MASK(src_map)); + /* ... adjust for mis-aligned end of copy range */ + adjustment = + (vm_map_round_page((copy->offset + + copy->size), + VM_MAP_PAGE_MASK(src_map)) - + vm_map_round_page((copy->offset + + copy->size), + PAGE_MASK)); + if (adjustment) { + assert(page_aligned(adjustment)); + assert(adjustment < VM_MAP_PAGE_SIZE(src_map)); + tmp_entry->vme_end -= adjustment; + assert(tmp_entry->vme_start < tmp_entry->vme_end); + } + + /* + * Assert that the adjustments haven't exposed + * more than was originally copied... + */ + assert(tmp_entry->vme_end <= original_end); + /* + * ... and that it did not adjust outside of a + * a single 16K page. + */ + assert(vm_map_round_page(tmp_entry->vme_end, + VM_MAP_PAGE_MASK(src_map)) == + vm_map_round_page(original_end, + VM_MAP_PAGE_MASK(src_map))); + } + } + /* Fix-up start and end points in copy. This is necessary */ /* when the various entries in the copy object were picked */ /* up from different sub-maps */ tmp_entry = vm_map_copy_first_entry(copy); + copy_size = 0; /* compute actual size */ while (tmp_entry != vm_map_copy_to_entry(copy)) { + assert(VM_MAP_PAGE_ALIGNED( + copy_addr + (tmp_entry->vme_end - + tmp_entry->vme_start), + VM_MAP_COPY_PAGE_MASK(copy))); + assert(VM_MAP_PAGE_ALIGNED( + copy_addr, + VM_MAP_COPY_PAGE_MASK(copy))); + + /* + * The copy_entries will be injected directly into the + * destination map and might not be "map aligned" there... + */ + tmp_entry->map_aligned = FALSE; + tmp_entry->vme_end = copy_addr + (tmp_entry->vme_end - tmp_entry->vme_start); tmp_entry->vme_start = copy_addr; + assert(tmp_entry->vme_start < tmp_entry->vme_end); copy_addr += tmp_entry->vme_end - tmp_entry->vme_start; + copy_size += tmp_entry->vme_end - tmp_entry->vme_start; tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next; } + if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT && + copy_size < copy->size) { + /* + * The actual size of the VM map copy is smaller than what + * was requested by the caller. This must be because some + * PAGE_SIZE-sized pages are missing at the end of the last + * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range. + * The caller might not have been aware of those missing + * pages and might not want to be aware of it, which is + * fine as long as they don't try to access (and crash on) + * those missing pages. + * Let's adjust the size of the "copy", to avoid failing + * in vm_map_copyout() or vm_map_copy_overwrite(). + */ + assert(vm_map_round_page(copy_size, + VM_MAP_PAGE_MASK(src_map)) == + vm_map_round_page(copy->size, + VM_MAP_PAGE_MASK(src_map))); + copy->size = copy_size; + } + *copy_result = copy; return(KERN_SUCCESS); #undef RETURN } -/* - * vm_map_copyin_object: - * - * Create a copy object from an object. - * Our caller donates an object reference. - */ - kern_return_t -vm_map_copyin_object( - vm_object_t object, - vm_object_offset_t offset, /* offset of region in object */ - vm_object_size_t size, /* size of region in object */ - vm_map_copy_t *copy_result) /* OUT */ +vm_map_copy_extract( + vm_map_t src_map, + vm_map_address_t src_addr, + vm_map_size_t len, + vm_map_copy_t *copy_result, /* OUT */ + vm_prot_t *cur_prot, /* OUT */ + vm_prot_t *max_prot) +{ + vm_map_offset_t src_start, src_end; + vm_map_copy_t copy; + kern_return_t kr; + + /* + * Check for copies of zero bytes. + */ + + if (len == 0) { + *copy_result = VM_MAP_COPY_NULL; + return(KERN_SUCCESS); + } + + /* + * Check that the end address doesn't overflow + */ + src_end = src_addr + len; + if (src_end < src_addr) + return KERN_INVALID_ADDRESS; + + /* + * Compute (page aligned) start and end of region + */ + src_start = vm_map_trunc_page(src_addr, PAGE_MASK); + src_end = vm_map_round_page(src_end, PAGE_MASK); + + /* + * Allocate a header element for the list. + * + * Use the start and end in the header to + * remember the endpoints prior to rounding. + */ + + copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); + copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE; + vm_map_copy_first_entry(copy) = + vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy); + copy->type = VM_MAP_COPY_ENTRY_LIST; + copy->cpy_hdr.nentries = 0; + copy->cpy_hdr.entries_pageable = TRUE; + + vm_map_store_init(©->cpy_hdr); + + copy->offset = 0; + copy->size = len; + + kr = vm_map_remap_extract(src_map, + src_addr, + len, + FALSE, /* copy */ + ©->cpy_hdr, + cur_prot, + max_prot, + VM_INHERIT_SHARE, + TRUE); /* pageable */ + if (kr != KERN_SUCCESS) { + vm_map_copy_discard(copy); + return kr; + } + + *copy_result = copy; + return KERN_SUCCESS; +} + +/* + * vm_map_copyin_object: + * + * Create a copy object from an object. + * Our caller donates an object reference. + */ + +kern_return_t +vm_map_copyin_object( + vm_object_t object, + vm_object_offset_t offset, /* offset of region in object */ + vm_object_size_t size, /* size of region in object */ + vm_map_copy_t *copy_result) /* OUT */ { vm_map_copy_t copy; /* Resulting copy */ @@ -7270,6 +10068,7 @@ vm_map_copyin_object( */ copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); + copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE; copy->type = VM_MAP_COPY_OBJECT; copy->cpy_object = object; copy->offset = offset; @@ -7298,7 +10097,7 @@ vm_map_fork_share( * make a new shadow and share it. */ - object = old_entry->object.vm_object; + object = VME_OBJECT(old_entry); if (old_entry->is_sub_map) { assert(old_entry->wired_count == 0); #ifndef NO_NESTED_PMAP @@ -7306,7 +10105,7 @@ vm_map_fork_share( kern_return_t result; result = pmap_nest(new_map->pmap, - (old_entry->object.sub_map)->pmap, + (VME_SUBMAP(old_entry))->pmap, (addr64_t)old_entry->vme_start, (addr64_t)old_entry->vme_start, (uint64_t)(old_entry->vme_end - old_entry->vme_start)); @@ -7317,8 +10116,9 @@ vm_map_fork_share( } else if (object == VM_OBJECT_NULL) { object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end - old_entry->vme_start)); - old_entry->offset = 0; - old_entry->object.vm_object = object; + VME_OFFSET_SET(old_entry, 0); + VME_OBJECT_SET(old_entry, object); + old_entry->use_pmap = TRUE; assert(!old_entry->needs_copy); } else if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) { @@ -7335,7 +10135,7 @@ vm_map_fork_share( object->shadowed || /* case 2 */ (!object->true_share && /* case 3 */ !old_entry->is_shared && - (object->size > + (object->vo_size > (vm_map_size_t)(old_entry->vme_end - old_entry->vme_start)))) { @@ -7415,11 +10215,9 @@ vm_map_fork_share( * (This is a preemptive version of * case 2.) */ - - vm_object_shadow(&old_entry->object.vm_object, - &old_entry->offset, - (vm_map_size_t) (old_entry->vme_end - - old_entry->vme_start)); + VME_OBJECT_SHADOW(old_entry, + (vm_map_size_t) (old_entry->vme_end - + old_entry->vme_start)); /* * If we're making a shadow for other than @@ -7433,13 +10231,13 @@ vm_map_fork_share( prot = old_entry->protection & ~VM_PROT_WRITE; - if (override_nx(old_map, old_entry->alias) && prot) + if (override_nx(old_map, VME_ALIAS(old_entry)) && prot) prot |= VM_PROT_EXECUTE; - if (old_map->mapped) { + if (old_map->mapped_in_other_pmaps) { vm_object_pmap_protect( - old_entry->object.vm_object, - old_entry->offset, + VME_OBJECT(old_entry), + VME_OFFSET(old_entry), (old_entry->vme_end - old_entry->vme_start), PMAP_NULL, @@ -7454,8 +10252,9 @@ vm_map_fork_share( } old_entry->needs_copy = FALSE; - object = old_entry->object.vm_object; + object = VME_OBJECT(old_entry); } + /* * If object was using a symmetric copy strategy, @@ -7467,9 +10266,9 @@ vm_map_fork_share( */ if(old_entry->is_sub_map) { - vm_map_lock(old_entry->object.sub_map); - vm_map_reference(old_entry->object.sub_map); - vm_map_unlock(old_entry->object.sub_map); + vm_map_lock(VME_SUBMAP(old_entry)); + vm_map_reference(VME_SUBMAP(old_entry)); + vm_map_unlock(VME_SUBMAP(old_entry)); } else { vm_object_lock(object); vm_object_reference_locked(object); @@ -7484,7 +10283,8 @@ vm_map_fork_share( * Mark both entries as shared. */ - new_entry = vm_map_entry_create(new_map); + new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel + * map or descendants */ vm_map_entry_copy(new_entry, old_entry); old_entry->is_shared = TRUE; new_entry->is_shared = TRUE; @@ -7495,7 +10295,7 @@ vm_map_fork_share( * map. */ - vm_map_entry_link(new_map, vm_map_last_entry(new_map), new_entry); + vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry); /* * Update the physical map @@ -7596,6 +10396,7 @@ vm_map_fork_copy( */ vm_map_t vm_map_fork( + ledger_t ledger, vm_map_t old_map) { pmap_t new_pmap; @@ -7605,16 +10406,17 @@ vm_map_fork( vm_map_entry_t new_entry; boolean_t src_needs_copy; boolean_t new_entry_needs_copy; + boolean_t pmap_is64bit; -#ifdef __i386__ - new_pmap = pmap_create((vm_map_size_t) 0, - old_map->pmap->pm_task_map != TASK_MAP_32BIT); - if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED) - pmap_set_4GB_pagezero(new_pmap); + pmap_is64bit = +#if defined(__i386__) || defined(__x86_64__) + old_map->pmap->pm_task_map != TASK_MAP_32BIT; #else - new_pmap = pmap_create((vm_map_size_t) 0, 0); +#error Unknown architecture. #endif + new_pmap = pmap_create(ledger, (vm_map_size_t) 0, pmap_is64bit); + vm_map_reference_swap(old_map); vm_map_lock(old_map); @@ -7622,7 +10424,8 @@ vm_map_fork( old_map->min_offset, old_map->max_offset, old_map->hdr.entries_pageable); - + /* inherit the parent map's page size */ + vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map)); for ( old_entry = vm_map_first_entry(old_map); old_entry != vm_map_to_entry(old_map); @@ -7650,19 +10453,21 @@ vm_map_fork( if(old_entry->is_sub_map) break; if ((old_entry->wired_count != 0) || - ((old_entry->object.vm_object != NULL) && - (old_entry->object.vm_object->true_share))) { + ((VME_OBJECT(old_entry) != NULL) && + (VME_OBJECT(old_entry)->true_share))) { goto slow_vm_map_fork_copy; } - new_entry = vm_map_entry_create(new_map); + new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */ vm_map_entry_copy(new_entry, old_entry); - /* clear address space specifics */ - new_entry->use_pmap = FALSE; + if (new_entry->is_sub_map) { + /* clear address space specifics */ + new_entry->use_pmap = FALSE; + } if (! vm_object_copy_quickly( - &new_entry->object.vm_object, - old_entry->offset, + &VME_OBJECT(new_entry), + VME_OFFSET(old_entry), (old_entry->vme_end - old_entry->vme_start), &src_needs_copy, @@ -7680,21 +10485,23 @@ vm_map_fork( prot = old_entry->protection & ~VM_PROT_WRITE; - if (override_nx(old_map, old_entry->alias) && prot) + if (override_nx(old_map, VME_ALIAS(old_entry)) + && prot) prot |= VM_PROT_EXECUTE; vm_object_pmap_protect( - old_entry->object.vm_object, - old_entry->offset, + VME_OBJECT(old_entry), + VME_OFFSET(old_entry), (old_entry->vme_end - old_entry->vme_start), ((old_entry->is_shared - || old_map->mapped) + || old_map->mapped_in_other_pmaps) ? PMAP_NULL : old_map->pmap), old_entry->vme_start, prot); + assert(old_entry->wired_count == 0); old_entry->needs_copy = TRUE; } new_entry->needs_copy = new_entry_needs_copy; @@ -7704,7 +10511,7 @@ vm_map_fork( * of the map. */ - vm_map_entry_link(new_map, vm_map_last_entry(new_map), + vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry); new_size += entry_size; break; @@ -7718,6 +10525,7 @@ vm_map_fork( old_entry = old_entry->vme_next; } + new_map->size = new_size; vm_map_unlock(old_map); vm_map_deallocate(old_map); @@ -7741,12 +10549,20 @@ vm_map_exec( { SHARED_REGION_TRACE_DEBUG( ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n", - current_task(), new_map, task, fsroot, cpu)); + (void *)VM_KERNEL_ADDRPERM(current_task()), + (void *)VM_KERNEL_ADDRPERM(new_map), + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(fsroot), + cpu)); (void) vm_commpage_enter(new_map, task); (void) vm_shared_region_enter(new_map, task, fsroot, cpu); SHARED_REGION_TRACE_DEBUG( ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n", - current_task(), new_map, task, fsroot, cpu)); + (void *)VM_KERNEL_ADDRPERM(current_task()), + (void *)VM_KERNEL_ADDRPERM(new_map), + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(fsroot), + cpu)); return KERN_SUCCESS; } @@ -7796,9 +10612,24 @@ vm_map_lookup_locked( vm_map_offset_t old_start = 0; vm_map_offset_t old_end = 0; register vm_prot_t prot; + boolean_t mask_protections; + boolean_t force_copy; + vm_prot_t original_fault_type; + + /* + * VM_PROT_MASK means that the caller wants us to use "fault_type" + * as a mask against the mapping's actual protections, not as an + * absolute value. + */ + mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE; + force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE; + fault_type &= VM_PROT_ALL; + original_fault_type = fault_type; *real_map = map; -RetryLookup: ; + +RetryLookup: + fault_type = original_fault_type; /* * If the map has an interesting hint, try it before calling @@ -7850,19 +10681,18 @@ submap_recurse: if ((*real_map != map) && (*real_map != cow_sub_map_parent)) vm_map_unlock(*real_map); - *real_map = entry->object.sub_map; + *real_map = VME_SUBMAP(entry); } if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) { if (!mapped_needs_copy) { if (vm_map_lock_read_to_write(map)) { vm_map_lock_read(map); - /* XXX FBDP: entry still valid ? */ - if(*real_map == entry->object.sub_map) - *real_map = map; + *real_map = map; goto RetryLookup; } - vm_map_lock_read(entry->object.sub_map); + vm_map_lock_read(VME_SUBMAP(entry)); + *var_map = VME_SUBMAP(entry); cow_sub_map_parent = map; /* reset base to map before cow object */ /* this is the map which will accept */ @@ -7872,13 +10702,15 @@ submap_recurse: cow_parent_vaddr = vaddr; mapped_needs_copy = TRUE; } else { - vm_map_lock_read(entry->object.sub_map); + vm_map_lock_read(VME_SUBMAP(entry)); + *var_map = VME_SUBMAP(entry); if((cow_sub_map_parent != map) && (*real_map != map)) vm_map_unlock(map); } } else { - vm_map_lock_read(entry->object.sub_map); + vm_map_lock_read(VME_SUBMAP(entry)); + *var_map = VME_SUBMAP(entry); /* leave map locked if it is a target */ /* cow sub_map above otherwise, just */ /* follow the maps down to the object */ @@ -7888,11 +10720,10 @@ submap_recurse: vm_map_unlock_read(map); } - /* XXX FBDP: map has been unlocked, what protects "entry" !? */ - *var_map = map = entry->object.sub_map; + map = *var_map; /* calculate the offset in the submap for vaddr */ - local_vaddr = (local_vaddr - entry->vme_start) + entry->offset; + local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry); RetrySubMap: if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) { @@ -7917,13 +10748,13 @@ submap_recurse: /* ultimately be clipped in the top map will only need */ /* to be as big as the portion of the underlying entry */ /* which is mapped */ - start_delta = submap_entry->vme_start > entry->offset ? - submap_entry->vme_start - entry->offset : 0; + start_delta = submap_entry->vme_start > VME_OFFSET(entry) ? + submap_entry->vme_start - VME_OFFSET(entry) : 0; end_delta = - (entry->offset + start_delta + (old_end - old_start)) <= + (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <= submap_entry->vme_end ? - 0 : (entry->offset + + 0 : (VME_OFFSET(entry) + (old_end - old_start)) - submap_entry->vme_end; @@ -7952,15 +10783,15 @@ submap_recurse: } - sub_object = submap_entry->object.vm_object; + sub_object = VME_OBJECT(submap_entry); if (sub_object == VM_OBJECT_NULL) { sub_object = vm_object_allocate( (vm_map_size_t) (submap_entry->vme_end - submap_entry->vme_start)); - submap_entry->object.vm_object = sub_object; - submap_entry->offset = 0; + VME_OBJECT_SET(submap_entry, sub_object); + VME_OFFSET_SET(submap_entry, 0); } local_start = local_vaddr - (cow_parent_vaddr - old_start); @@ -7968,8 +10799,10 @@ submap_recurse: (old_end - cow_parent_vaddr); vm_map_clip_start(map, submap_entry, local_start); vm_map_clip_end(map, submap_entry, local_end); - /* unnesting was done in vm_map_clip_start/end() */ - assert(!submap_entry->use_pmap); + if (submap_entry->is_sub_map) { + /* unnesting was done when clipping */ + assert(!submap_entry->use_pmap); + } /* This is the COW case, lets connect */ /* an entry in our space to the underlying */ @@ -7978,11 +10811,11 @@ submap_recurse: if(submap_entry->wired_count != 0 || - (sub_object->copy_strategy != - MEMORY_OBJECT_COPY_SYMMETRIC)) { + (sub_object->copy_strategy == + MEMORY_OBJECT_COPY_NONE)) { vm_object_lock(sub_object); vm_object_copy_slowly(sub_object, - submap_entry->offset, + VME_OFFSET(submap_entry), (submap_entry->vme_end - submap_entry->vme_start), FALSE, @@ -7994,20 +10827,23 @@ submap_recurse: copy_object = sub_object; vm_object_reference(copy_object); sub_object->shadowed = TRUE; + assert(submap_entry->wired_count == 0); submap_entry->needs_copy = TRUE; prot = submap_entry->protection & ~VM_PROT_WRITE; - if (override_nx(map, submap_entry->alias) && prot) + if (override_nx(old_map, + VME_ALIAS(submap_entry)) + && prot) prot |= VM_PROT_EXECUTE; vm_object_pmap_protect( sub_object, - submap_entry->offset, + VME_OFFSET(submap_entry), submap_entry->vme_end - submap_entry->vme_start, (submap_entry->is_shared - || map->mapped) ? + || map->mapped_in_other_pmaps) ? PMAP_NULL : map->pmap, submap_entry->vme_start, prot); @@ -8018,7 +10854,7 @@ submap_recurse: */ copy_offset = (local_vaddr - submap_entry->vme_start + - submap_entry->offset); + VME_OFFSET(submap_entry)); /* This works diffently than the */ /* normal submap case. We go back */ @@ -8072,25 +10908,30 @@ submap_recurse: vm_map_clip_start(map, entry, local_start); vm_map_clip_end(map, entry, local_end); - /* unnesting was done in vm_map_clip_start/end() */ - assert(!entry->use_pmap); + if (entry->is_sub_map) { + /* unnesting was done when clipping */ + assert(!entry->use_pmap); + } /* substitute copy object for */ /* shared map entry */ - vm_map_deallocate(entry->object.sub_map); + vm_map_deallocate(VME_SUBMAP(entry)); + assert(!entry->iokit_acct); entry->is_sub_map = FALSE; - entry->object.vm_object = copy_object; + entry->use_pmap = TRUE; + VME_OBJECT_SET(entry, copy_object); /* propagate the submap entry's protections */ entry->protection |= submap_entry->protection; entry->max_protection |= submap_entry->max_protection; if(copied_slowly) { - entry->offset = 0; + VME_OFFSET_SET(entry, local_start - old_start); entry->needs_copy = FALSE; entry->is_shared = FALSE; } else { - entry->offset = copy_offset; + VME_OFFSET_SET(entry, copy_offset); + assert(entry->wired_count == 0); entry->needs_copy = TRUE; if(entry->inheritance == VM_INHERIT_SHARE) entry->inheritance = VM_INHERIT_COPY; @@ -8119,14 +10960,21 @@ submap_recurse: prot = entry->protection; - if (override_nx(map, entry->alias) && prot) { + if (override_nx(old_map, VME_ALIAS(entry)) && prot) { /* * HACK -- if not a stack, then allow execution */ prot |= VM_PROT_EXECUTE; } + if (mask_protections) { + fault_type &= prot; + if (fault_type == VM_PROT_NONE) { + goto protection_failure; + } + } if ((fault_type & (prot)) != fault_type) { + protection_failure: if (*real_map != map) { vm_map_unlock(*real_map); } @@ -8161,7 +11009,7 @@ submap_recurse: * demote the permissions allowed. */ - if ((fault_type & VM_PROT_WRITE) || *wired) { + if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) { /* * Make a new object, and place it in the * object chain. Note that no new references @@ -8173,12 +11021,11 @@ submap_recurse: vm_map_lock_read(map); goto RetryLookup; } - vm_object_shadow(&entry->object.vm_object, - &entry->offset, - (vm_map_size_t) (entry->vme_end - - entry->vme_start)); + VME_OBJECT_SHADOW(entry, + (vm_map_size_t) (entry->vme_end - + entry->vme_start)); - entry->object.vm_object->shadowed = TRUE; + VME_OBJECT(entry)->shadowed = TRUE; entry->needs_copy = FALSE; vm_map_lock_write_to_read(map); } @@ -8195,16 +11042,18 @@ submap_recurse: /* * Create an object if necessary. */ - if (entry->object.vm_object == VM_OBJECT_NULL) { + if (VME_OBJECT(entry) == VM_OBJECT_NULL) { if (vm_map_lock_read_to_write(map)) { vm_map_lock_read(map); goto RetryLookup; } - entry->object.vm_object = vm_object_allocate( - (vm_map_size_t)(entry->vme_end - entry->vme_start)); - entry->offset = 0; + VME_OBJECT_SET(entry, + vm_object_allocate( + (vm_map_size_t)(entry->vme_end - + entry->vme_start))); + VME_OFFSET_SET(entry, 0); vm_map_lock_write_to_read(map); } @@ -8214,19 +11063,35 @@ submap_recurse: * return the protection. */ - *offset = (vaddr - entry->vme_start) + entry->offset; - *object = entry->object.vm_object; + *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry); + *object = VME_OBJECT(entry); *out_prot = prot; if (fault_info) { fault_info->interruptible = THREAD_UNINT; /* for now... */ /* ... the caller will change "interruptible" if needed */ fault_info->cluster_size = 0; - fault_info->user_tag = entry->alias; + fault_info->user_tag = VME_ALIAS(entry); + fault_info->pmap_options = 0; + if (entry->iokit_acct || + (!entry->is_sub_map && !entry->use_pmap)) { + fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT; + } fault_info->behavior = entry->behavior; - fault_info->lo_offset = entry->offset; - fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset; + fault_info->lo_offset = VME_OFFSET(entry); + fault_info->hi_offset = + (entry->vme_end - entry->vme_start) + VME_OFFSET(entry); fault_info->no_cache = entry->no_cache; + fault_info->stealth = FALSE; + fault_info->io_sync = FALSE; + if (entry->used_for_jit || + entry->vme_resilient_codesign) { + fault_info->cs_bypass = TRUE; + } else { + fault_info->cs_bypass = FALSE; + } + fault_info->mark_zf_absent = FALSE; + fault_info->batch_pmap_op = FALSE; } /* @@ -8298,6 +11163,7 @@ vm_map_region_recurse_64( vm_region_submap_info_64_t submap_info, /* IN/OUT */ mach_msg_type_number_t *count) /* IN/OUT */ { + mach_msg_type_number_t original_count; vm_region_extended_info_data_t extended; vm_map_entry_t tmp_entry; vm_map_offset_t user_address; @@ -8307,20 +11173,26 @@ vm_map_region_recurse_64( * "curr_entry" is the VM map entry preceding or including the * address we're looking for. * "curr_map" is the map or sub-map containing "curr_entry". + * "curr_address" is the equivalent of the top map's "user_address" + * in the current map. * "curr_offset" is the cumulated offset of "curr_map" in the * target task's address space. * "curr_depth" is the depth of "curr_map" in the chain of * sub-maps. - * "curr_max_offset" is the maximum offset we should take into - * account in the current map. It may be smaller than the current - * map's "max_offset" because we might not have mapped it all in - * the upper level map. + * + * "curr_max_below" and "curr_max_above" limit the range (around + * "curr_address") we should take into account in the current (sub)map. + * They limit the range to what's visible through the map entries + * we've traversed from the top map to the current map. + */ vm_map_entry_t curr_entry; + vm_map_address_t curr_address; vm_map_offset_t curr_offset; vm_map_t curr_map; unsigned int curr_depth; - vm_map_offset_t curr_max_offset; + vm_map_offset_t curr_max_below, curr_max_above; + vm_map_offset_t curr_skip; /* * "next_" is the same as "curr_" but for the VM region immediately @@ -8330,9 +11202,11 @@ vm_map_region_recurse_64( */ vm_map_entry_t next_entry; vm_map_offset_t next_offset; + vm_map_offset_t next_address; vm_map_t next_map; unsigned int next_depth; - vm_map_offset_t next_max_offset; + vm_map_offset_t next_max_below, next_max_above; + vm_map_offset_t next_skip; boolean_t look_for_pages; vm_region_submap_short_info_64_t short_info; @@ -8342,71 +11216,97 @@ vm_map_region_recurse_64( return KERN_INVALID_ARGUMENT; } - if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) { - if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) { - /* - * "info" structure is not big enough and - * would overflow - */ - return KERN_INVALID_ARGUMENT; - } else { - look_for_pages = FALSE; - *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64; - short_info = (vm_region_submap_short_info_64_t) submap_info; - submap_info = NULL; - } + + if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) { + /* + * "info" structure is not big enough and + * would overflow + */ + return KERN_INVALID_ARGUMENT; + } + + original_count = *count; + + if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) { + *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64; + look_for_pages = FALSE; + short_info = (vm_region_submap_short_info_64_t) submap_info; + submap_info = NULL; } else { look_for_pages = TRUE; - *count = VM_REGION_SUBMAP_INFO_COUNT_64; + *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64; short_info = NULL; + + if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) { + *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64; + } } - - + user_address = *address; user_max_depth = *nesting_depth; + if (not_in_kdp) { + vm_map_lock_read(map); + } + +recurse_again: curr_entry = NULL; curr_map = map; + curr_address = user_address; curr_offset = 0; + curr_skip = 0; curr_depth = 0; - curr_max_offset = curr_map->max_offset; + curr_max_above = ((vm_map_offset_t) -1) - curr_address; + curr_max_below = curr_address; next_entry = NULL; next_map = NULL; + next_address = 0; next_offset = 0; + next_skip = 0; next_depth = 0; - next_max_offset = curr_max_offset; - - if (not_in_kdp) { - vm_map_lock_read(curr_map); - } + next_max_above = (vm_map_offset_t) -1; + next_max_below = (vm_map_offset_t) -1; for (;;) { if (vm_map_lookup_entry(curr_map, - user_address - curr_offset, + curr_address, &tmp_entry)) { /* tmp_entry contains the address we're looking for */ curr_entry = tmp_entry; } else { + vm_map_offset_t skip; /* * The address is not mapped. "tmp_entry" is the * map entry preceding the address. We want the next * one, if it exists. */ curr_entry = tmp_entry->vme_next; + if (curr_entry == vm_map_to_entry(curr_map) || - curr_entry->vme_start >= curr_max_offset) { + (curr_entry->vme_start >= + curr_address + curr_max_above)) { /* no next entry at this level: stop looking */ if (not_in_kdp) { vm_map_unlock_read(curr_map); } curr_entry = NULL; curr_map = NULL; + curr_skip = 0; curr_offset = 0; curr_depth = 0; - curr_max_offset = 0; + curr_max_above = 0; + curr_max_below = 0; break; } + + /* adjust current address and offset */ + skip = curr_entry->vme_start - curr_address; + curr_address = curr_entry->vme_start; + curr_skip += skip; + curr_offset += skip; + curr_max_above -= skip; + curr_max_below = 0; } /* @@ -8417,7 +11317,8 @@ vm_map_region_recurse_64( tmp_entry = curr_entry->vme_next; if (tmp_entry == vm_map_to_entry(curr_map)) { /* no next entry at this level */ - } else if (tmp_entry->vme_start >= curr_max_offset) { + } else if (tmp_entry->vme_start >= + curr_address + curr_max_above) { /* * tmp_entry is beyond the scope of what we mapped of * this submap in the upper level: ignore it. @@ -8438,11 +11339,33 @@ vm_map_region_recurse_64( } next_entry = tmp_entry; next_map = curr_map; - next_offset = curr_offset; next_depth = curr_depth; - next_max_offset = curr_max_offset; + next_address = next_entry->vme_start; + next_skip = curr_skip; + next_skip += (next_address - curr_address); + next_offset = curr_offset; + next_offset += (next_address - curr_address); + next_max_above = MIN(next_max_above, curr_max_above); + next_max_above = MIN(next_max_above, + next_entry->vme_end - next_address); + next_max_below = MIN(next_max_below, curr_max_below); + next_max_below = MIN(next_max_below, + next_address - next_entry->vme_start); } + /* + * "curr_max_{above,below}" allow us to keep track of the + * portion of the submap that is actually mapped at this level: + * the rest of that submap is irrelevant to us, since it's not + * mapped here. + * The relevant portion of the map starts at + * "VME_OFFSET(curr_entry)" up to the size of "curr_entry". + */ + curr_max_above = MIN(curr_max_above, + curr_entry->vme_end - curr_address); + curr_max_below = MIN(curr_max_below, + curr_address - curr_entry->vme_start); + if (!curr_entry->is_sub_map || curr_depth >= user_max_depth) { /* @@ -8463,40 +11386,31 @@ vm_map_region_recurse_64( * later. */ if (not_in_kdp) { - vm_map_lock_read(curr_entry->object.sub_map); + vm_map_lock_read(VME_SUBMAP(curr_entry)); } if (curr_map == next_map) { /* keep "next_map" locked in case we need it */ } else { /* release this map */ - vm_map_unlock_read(curr_map); + if (not_in_kdp) + vm_map_unlock_read(curr_map); } /* * Adjust the offset. "curr_entry" maps the submap * at relative address "curr_entry->vme_start" in the - * curr_map but skips the first "curr_entry->offset" + * curr_map but skips the first "VME_OFFSET(curr_entry)" * bytes of the submap. * "curr_offset" always represents the offset of a virtual * address in the curr_map relative to the absolute address * space (i.e. the top-level VM map). */ curr_offset += - (curr_entry->vme_start - curr_entry->offset); + (VME_OFFSET(curr_entry) - curr_entry->vme_start); + curr_address = user_address + curr_offset; /* switch to the submap */ - curr_map = curr_entry->object.sub_map; + curr_map = VME_SUBMAP(curr_entry); curr_depth++; - /* - * "curr_max_offset" allows us to keep track of the - * portion of the submap that is actually mapped at this level: - * the rest of that submap is irrelevant to us, since it's not - * mapped here. - * The relevant portion of the map starts at - * "curr_entry->offset" up to the size of "curr_entry". - */ - curr_max_offset = - curr_entry->vme_end - curr_entry->vme_start + - curr_entry->offset; curr_entry = NULL; } @@ -8509,9 +11423,12 @@ vm_map_region_recurse_64( /* ... gather info about the next VM region */ curr_entry = next_entry; curr_map = next_map; /* still locked ... */ + curr_address = next_address; + curr_skip = next_skip; curr_offset = next_offset; curr_depth = next_depth; - curr_max_offset = next_max_offset; + curr_max_above = next_max_above; + curr_max_below = next_max_below; } else { /* we won't need "next_entry" after all */ if (next_entry != NULL) { @@ -8524,52 +11441,80 @@ vm_map_region_recurse_64( next_entry = NULL; next_map = NULL; next_offset = 0; + next_skip = 0; next_depth = 0; - next_max_offset = 0; + next_max_below = -1; + next_max_above = -1; + + if (curr_entry->is_sub_map && + curr_depth < user_max_depth) { + /* + * We're not as deep as we could be: we must have + * gone back up after not finding anything mapped + * below the original top-level map entry's. + * Let's move "curr_address" forward and recurse again. + */ + user_address = curr_address; + goto recurse_again; + } *nesting_depth = curr_depth; - *size = curr_entry->vme_end - curr_entry->vme_start; - *address = curr_entry->vme_start + curr_offset; + *size = curr_max_above + curr_max_below; + *address = user_address + curr_skip - curr_max_below; + +// LP64todo: all the current tools are 32bit, obviously never worked for 64b +// so probably should be a real 32b ID vs. ptr. +// Current users just check for equality +#define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p)) if (look_for_pages) { - submap_info->user_tag = curr_entry->alias; - submap_info->offset = curr_entry->offset; + submap_info->user_tag = VME_ALIAS(curr_entry); + submap_info->offset = VME_OFFSET(curr_entry); submap_info->protection = curr_entry->protection; submap_info->inheritance = curr_entry->inheritance; submap_info->max_protection = curr_entry->max_protection; submap_info->behavior = curr_entry->behavior; submap_info->user_wired_count = curr_entry->user_wired_count; submap_info->is_submap = curr_entry->is_sub_map; - submap_info->object_id = (uint32_t) curr_entry->object.vm_object; + submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry)); } else { - short_info->user_tag = curr_entry->alias; - short_info->offset = curr_entry->offset; + short_info->user_tag = VME_ALIAS(curr_entry); + short_info->offset = VME_OFFSET(curr_entry); short_info->protection = curr_entry->protection; short_info->inheritance = curr_entry->inheritance; short_info->max_protection = curr_entry->max_protection; short_info->behavior = curr_entry->behavior; short_info->user_wired_count = curr_entry->user_wired_count; short_info->is_submap = curr_entry->is_sub_map; - short_info->object_id = (uint32_t) curr_entry->object.vm_object; + short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry)); } extended.pages_resident = 0; extended.pages_swapped_out = 0; extended.pages_shared_now_private = 0; extended.pages_dirtied = 0; + extended.pages_reusable = 0; extended.external_pager = 0; extended.shadow_depth = 0; + extended.share_mode = SM_EMPTY; + extended.ref_count = 0; if (not_in_kdp) { if (!curr_entry->is_sub_map) { + vm_map_offset_t range_start, range_end; + range_start = MAX((curr_address - curr_max_below), + curr_entry->vme_start); + range_end = MIN((curr_address + curr_max_above), + curr_entry->vme_end); vm_map_region_walk(curr_map, - curr_entry->vme_start, + range_start, curr_entry, - curr_entry->offset, - (curr_entry->vme_end - - curr_entry->vme_start), + (VME_OFFSET(curr_entry) + + (range_start - + curr_entry->vme_start)), + range_end - range_start, &extended, - look_for_pages); + look_for_pages, VM_REGION_EXTENDED_INFO_COUNT); if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED) { @@ -8581,8 +11526,7 @@ vm_map_region_recurse_64( } else { extended.share_mode = SM_PRIVATE; } - extended.ref_count = - curr_entry->object.sub_map->ref_count; + extended.ref_count = VME_SUBMAP(curr_entry)->ref_count; } } @@ -8596,6 +11540,10 @@ vm_map_region_recurse_64( submap_info->shadow_depth = extended.shadow_depth; submap_info->share_mode = extended.share_mode; submap_info->ref_count = extended.ref_count; + + if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) { + submap_info->pages_reusable = extended.pages_reusable; + } } else { short_info->external_pager = extended.external_pager; short_info->shadow_depth = extended.shadow_depth; @@ -8666,7 +11614,7 @@ vm_map_region( start = entry->vme_start; - basic->offset = (uint32_t)entry->offset; + basic->offset = (uint32_t)VME_OFFSET(entry); basic->protection = entry->protection; basic->inheritance = entry->inheritance; basic->max_protection = entry->max_protection; @@ -8711,7 +11659,7 @@ vm_map_region( start = entry->vme_start; - basic->offset = entry->offset; + basic->offset = VME_OFFSET(entry); basic->protection = entry->protection; basic->inheritance = entry->inheritance; basic->max_protection = entry->max_protection; @@ -8732,14 +11680,18 @@ vm_map_region( return(KERN_SUCCESS); } case VM_REGION_EXTENDED_INFO: - { - vm_region_extended_info_t extended; - if (*count < VM_REGION_EXTENDED_INFO_COUNT) return(KERN_INVALID_ARGUMENT); + /*fallthru*/ + case VM_REGION_EXTENDED_INFO__legacy: + if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) + return KERN_INVALID_ARGUMENT; + + { + vm_region_extended_info_t extended; + mach_msg_type_number_t original_count; extended = (vm_region_extended_info_t) info; - *count = VM_REGION_EXTENDED_INFO_COUNT; vm_map_lock_read(map); @@ -8755,7 +11707,7 @@ vm_map_region( start = entry->vme_start; extended->protection = entry->protection; - extended->user_tag = entry->alias; + extended->user_tag = VME_ALIAS(entry); extended->pages_resident = 0; extended->pages_swapped_out = 0; extended->pages_shared_now_private = 0; @@ -8763,7 +11715,15 @@ vm_map_region( extended->external_pager = 0; extended->shadow_depth = 0; - vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE); + original_count = *count; + if (flavor == VM_REGION_EXTENDED_INFO__legacy) { + *count = VM_REGION_EXTENDED_INFO_COUNT__legacy; + } else { + extended->pages_reusable = 0; + *count = VM_REGION_EXTENDED_INFO_COUNT; + } + + vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count); if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) extended->share_mode = SM_PRIVATE; @@ -8818,7 +11778,11 @@ vm_map_region( } } -#define min(a, b) (((a) < (b)) ? (a) : (b)) +#define OBJ_RESIDENT_COUNT(obj, entry_size) \ + MIN((entry_size), \ + ((obj)->all_reusable ? \ + (obj)->wired_page_count : \ + (obj)->resident_page_count - (obj)->reusable_page_count)) void vm_map_region_top_walk( @@ -8826,7 +11790,7 @@ vm_map_region_top_walk( vm_region_top_info_t top) { - if (entry->object.vm_object == 0 || entry->is_sub_map) { + if (VME_OBJECT(entry) == 0 || entry->is_sub_map) { top->share_mode = SM_EMPTY; top->ref_count = 0; top->obj_id = 0; @@ -8838,20 +11802,23 @@ vm_map_region_top_walk( int ref_count; uint32_t entry_size; - entry_size = (entry->vme_end - entry->vme_start) / PAGE_SIZE; + entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64); - obj = entry->object.vm_object; + obj = VME_OBJECT(entry); vm_object_lock(obj); if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) ref_count--; + assert(obj->reusable_page_count <= obj->resident_page_count); if (obj->shadow) { if (ref_count == 1) - top->private_pages_resident = min(obj->resident_page_count, entry_size); + top->private_pages_resident = + OBJ_RESIDENT_COUNT(obj, entry_size); else - top->shared_pages_resident = min(obj->resident_page_count, entry_size); + top->shared_pages_resident = + OBJ_RESIDENT_COUNT(obj, entry_size); top->ref_count = ref_count; top->share_mode = SM_COW; @@ -8863,26 +11830,38 @@ vm_map_region_top_walk( if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) ref_count--; - top->shared_pages_resident += min(obj->resident_page_count, entry_size); + assert(obj->reusable_page_count <= obj->resident_page_count); + top->shared_pages_resident += + OBJ_RESIDENT_COUNT(obj, entry_size); top->ref_count += ref_count - 1; } } else { - if (entry->needs_copy) { + if (entry->superpage_size) { + top->share_mode = SM_LARGE_PAGE; + top->shared_pages_resident = 0; + top->private_pages_resident = entry_size; + } else if (entry->needs_copy) { top->share_mode = SM_COW; - top->shared_pages_resident = min(obj->resident_page_count, entry_size); + top->shared_pages_resident = + OBJ_RESIDENT_COUNT(obj, entry_size); } else { if (ref_count == 1 || (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) { top->share_mode = SM_PRIVATE; - top->private_pages_resident = min(obj->resident_page_count, entry_size); + top->private_pages_resident = + OBJ_RESIDENT_COUNT(obj, + entry_size); } else { top->share_mode = SM_SHARED; - top->shared_pages_resident = min(obj->resident_page_count, entry_size); + top->shared_pages_resident = + OBJ_RESIDENT_COUNT(obj, + entry_size); } } top->ref_count = ref_count; } - top->obj_id = (int)obj; + /* XXX K64: obj_id will be truncated */ + top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj); vm_object_unlock(obj); } @@ -8896,7 +11875,8 @@ vm_map_region_walk( vm_object_offset_t offset, vm_object_size_t range, vm_region_extended_info_t extended, - boolean_t look_for_pages) + boolean_t look_for_pages, + mach_msg_type_number_t count) { register struct vm_object *obj, *tmp_obj; register vm_map_offset_t last_offset; @@ -8905,15 +11885,27 @@ vm_map_region_walk( struct vm_object *shadow_object; int shadow_depth; - if ((entry->object.vm_object == 0) || + if ((VME_OBJECT(entry) == 0) || (entry->is_sub_map) || - (entry->object.vm_object->phys_contiguous)) { + (VME_OBJECT(entry)->phys_contiguous && + !entry->superpage_size)) { extended->share_mode = SM_EMPTY; extended->ref_count = 0; return; } + + if (entry->superpage_size) { + extended->shadow_depth = 0; + extended->share_mode = SM_LARGE_PAGE; + extended->ref_count = 1; + extended->external_pager = 0; + extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT); + extended->shadow_depth = 0; + return; + } + { - obj = entry->object.vm_object; + obj = VME_OBJECT(entry); vm_object_lock(obj); @@ -8923,30 +11915,39 @@ vm_map_region_walk( if (look_for_pages) { for (last_offset = offset + range; offset < last_offset; - offset += PAGE_SIZE_64, va += PAGE_SIZE) - vm_map_region_look_for_page(map, va, obj, - offset, ref_count, - 0, extended); - } - - shadow_object = obj->shadow; - shadow_depth = 0; - if (shadow_object != VM_OBJECT_NULL) { - vm_object_lock(shadow_object); - for (; - shadow_object != VM_OBJECT_NULL; - shadow_depth++) { - vm_object_t next_shadow; - - next_shadow = shadow_object->shadow; - if (next_shadow) { - vm_object_lock(next_shadow); + offset += PAGE_SIZE_64, va += PAGE_SIZE) { + vm_map_region_look_for_page(map, va, obj, + offset, ref_count, + 0, extended, count); + } + } else { + shadow_object = obj->shadow; + shadow_depth = 0; + + if ( !(obj->pager_trusted) && !(obj->internal)) + extended->external_pager = 1; + + if (shadow_object != VM_OBJECT_NULL) { + vm_object_lock(shadow_object); + for (; + shadow_object != VM_OBJECT_NULL; + shadow_depth++) { + vm_object_t next_shadow; + + if ( !(shadow_object->pager_trusted) && + !(shadow_object->internal)) + extended->external_pager = 1; + + next_shadow = shadow_object->shadow; + if (next_shadow) { + vm_object_lock(next_shadow); + } + vm_object_unlock(shadow_object); + shadow_object = next_shadow; } - vm_object_unlock(shadow_object); - shadow_object = next_shadow; } + extended->shadow_depth = shadow_depth; } - extended->shadow_depth = shadow_depth; if (extended->shadow_depth || entry->needs_copy) extended->share_mode = SM_COW; @@ -8981,7 +11982,7 @@ vm_map_region_walk( register vm_map_entry_t last; int my_refs; - obj = entry->object.vm_object; + obj = VME_OBJECT(entry); last = vm_map_to_entry(map); my_refs = 0; @@ -9010,15 +12011,14 @@ vm_map_region_look_for_page( vm_object_offset_t offset, int max_refcnt, int depth, - vm_region_extended_info_t extended) + vm_region_extended_info_t extended, + mach_msg_type_number_t count) { register vm_page_t p; register vm_object_t shadow; register int ref_count; vm_object_t caller_object; -#if MACH_PAGEMAP kern_return_t kr; -#endif shadow = object->shadow; caller_object = object; @@ -9032,11 +12032,16 @@ vm_map_region_look_for_page( if (shadow && (max_refcnt == 1)) extended->pages_shared_now_private++; - if (!p->fictitious && + if (!p->fictitious && (p->dirty || pmap_is_modified(p->phys_page))) extended->pages_dirtied++; + else if (count >= VM_REGION_EXTENDED_INFO_COUNT) { + if (p->reusable || p->object->all_reusable) { + extended->pages_reusable++; + } + } - extended->pages_resident++; + extended->pages_resident++; if(object != caller_object) vm_object_unlock(object); @@ -9054,36 +12059,49 @@ vm_map_region_look_for_page( return; } - } else if (object->internal && - object->alive && - !object->terminating && - object->pager_ready) { - - memory_object_t pager; + } else +#endif /* MACH_PAGEMAP */ + if (object->internal && + object->alive && + !object->terminating && + object->pager_ready) { + + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + if (VM_COMPRESSOR_PAGER_STATE_GET(object, + offset) + == VM_EXTERNAL_STATE_EXISTS) { + /* the pager has that page */ + extended->pages_swapped_out++; + if (object != caller_object) + vm_object_unlock(object); + return; + } + } else { + memory_object_t pager; - vm_object_paging_begin(object); - pager = object->pager; - vm_object_unlock(object); + vm_object_paging_begin(object); + pager = object->pager; + vm_object_unlock(object); - kr = memory_object_data_request( - pager, - offset + object->paging_offset, - 0, /* just poke the pager */ - VM_PROT_READ, - NULL); + kr = memory_object_data_request( + pager, + offset + object->paging_offset, + 0, /* just poke the pager */ + VM_PROT_READ, + NULL); - vm_object_lock(object); - vm_object_paging_end(object); + vm_object_lock(object); + vm_object_paging_end(object); - if (kr == KERN_SUCCESS) { - /* the pager has that page */ - extended->pages_swapped_out++; - if (object != caller_object) - vm_object_unlock(object); - return; + if (kr == KERN_SUCCESS) { + /* the pager has that page */ + extended->pages_swapped_out++; + if (object != caller_object) + vm_object_unlock(object); + return; + } } } -#endif /* MACH_PAGEMAP */ if (shadow) { vm_object_lock(shadow); @@ -9100,7 +12118,7 @@ vm_map_region_look_for_page( if(object != caller_object) vm_object_unlock(object); - offset = offset + object->shadow_offset; + offset = offset + object->vo_shadow_offset; object = shadow; shadow = object->shadow; continue; @@ -9120,7 +12138,7 @@ vm_map_region_count_obj_refs( register vm_object_t chk_obj; register vm_object_t tmp_obj; - if (entry->object.vm_object == 0) + if (VME_OBJECT(entry) == 0) return(0); if (entry->is_sub_map) @@ -9128,7 +12146,7 @@ vm_map_region_count_obj_refs( else { ref_count = 0; - chk_obj = entry->object.vm_object; + chk_obj = VME_OBJECT(entry); vm_object_lock(chk_obj); while (chk_obj) { @@ -9176,39 +12194,58 @@ vm_map_simplify_entry( (prev_entry->vme_end == this_entry->vme_start) && (prev_entry->is_sub_map == this_entry->is_sub_map) && - - (prev_entry->object.vm_object == this_entry->object.vm_object) && - ((prev_entry->offset + (prev_entry->vme_end - + (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) && + ((VME_OFFSET(prev_entry) + (prev_entry->vme_end - prev_entry->vme_start)) - == this_entry->offset) && + == VME_OFFSET(this_entry)) && - (prev_entry->inheritance == this_entry->inheritance) && + (prev_entry->behavior == this_entry->behavior) && + (prev_entry->needs_copy == this_entry->needs_copy) && (prev_entry->protection == this_entry->protection) && (prev_entry->max_protection == this_entry->max_protection) && - (prev_entry->behavior == this_entry->behavior) && - (prev_entry->alias == this_entry->alias) && + (prev_entry->inheritance == this_entry->inheritance) && + (prev_entry->use_pmap == this_entry->use_pmap) && + (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) && (prev_entry->no_cache == this_entry->no_cache) && + (prev_entry->permanent == this_entry->permanent) && + (prev_entry->map_aligned == this_entry->map_aligned) && + (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) && + (prev_entry->used_for_jit == this_entry->used_for_jit) && + /* from_reserved_zone: OK if that field doesn't match */ + (prev_entry->iokit_acct == this_entry->iokit_acct) && + (prev_entry->vme_resilient_codesign == + this_entry->vme_resilient_codesign) && + (prev_entry->vme_resilient_media == + this_entry->vme_resilient_media) && + (prev_entry->wired_count == this_entry->wired_count) && (prev_entry->user_wired_count == this_entry->user_wired_count) && - (prev_entry->needs_copy == this_entry->needs_copy) && - - (prev_entry->use_pmap == FALSE) && - (this_entry->use_pmap == FALSE) && (prev_entry->in_transition == FALSE) && (this_entry->in_transition == FALSE) && (prev_entry->needs_wakeup == FALSE) && (this_entry->needs_wakeup == FALSE) && (prev_entry->is_shared == FALSE) && - (this_entry->is_shared == FALSE) + (this_entry->is_shared == FALSE) && + (prev_entry->superpage_size == FALSE) && + (this_entry->superpage_size == FALSE) ) { - _vm_map_entry_unlink(&map->hdr, prev_entry); + vm_map_store_entry_unlink(map, prev_entry); + assert(prev_entry->vme_start < this_entry->vme_end); + if (prev_entry->map_aligned) + assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start, + VM_MAP_PAGE_MASK(map))); this_entry->vme_start = prev_entry->vme_start; - this_entry->offset = prev_entry->offset; + VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry)); + + if (map->holelistenabled) { + vm_map_store_update_first_free(map, this_entry, TRUE); + } + if (prev_entry->is_sub_map) { - vm_map_deallocate(prev_entry->object.sub_map); + vm_map_deallocate(VME_SUBMAP(prev_entry)); } else { - vm_object_deallocate(prev_entry->object.vm_object); + vm_object_deallocate(VME_OBJECT(prev_entry)); } vm_map_entry_dispose(map, prev_entry); SAVE_HINT_MAP_WRITE(map, this_entry); @@ -9249,8 +12286,10 @@ vm_map_simplify_range( return; } - start = vm_map_trunc_page(start); - end = vm_map_round_page(end); + start = vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)); + end = vm_map_round_page(end, + VM_MAP_PAGE_MASK(map)); if (!vm_map_lookup_entry(map, start, &entry)) { /* "start" is not mapped and "entry" ends before "start" */ @@ -9332,15 +12371,15 @@ vm_map_machine_attribute( vm_map_offset_t sub_end; sub_start = (start - entry->vme_start) - + entry->offset; + + VME_OFFSET(entry); sub_end = sub_start + sub_size; vm_map_machine_attribute( - entry->object.sub_map, + VME_SUBMAP(entry), sub_start, sub_end, attribute, value); } else { - if(entry->object.vm_object) { + if (VME_OBJECT(entry)) { vm_page_t m; vm_object_t object; vm_object_t base_object; @@ -9350,9 +12389,9 @@ vm_map_machine_attribute( vm_map_size_t range; range = sub_size; offset = (start - entry->vme_start) - + entry->offset; + + VME_OFFSET(entry); base_offset = offset; - object = entry->object.vm_object; + object = VME_OBJECT(entry); base_object = object; last_object = NULL; @@ -9370,7 +12409,7 @@ vm_map_machine_attribute( attribute, value); } else if (object->shadow) { - offset = offset + object->shadow_offset; + offset = offset + object->vo_shadow_offset; last_object = object; object = object->shadow; vm_object_lock(last_object->shadow); @@ -9424,348 +12463,710 @@ vm_map_behavior_set( XPR(XPR_VM_MAP, "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d", - (integer_t)map, start, end, new_behavior, 0); + map, start, end, new_behavior, 0); + + if (start > end || + start < vm_map_min(map) || + end > vm_map_max(map)) { + return KERN_NO_SPACE; + } switch (new_behavior) { + + /* + * This first block of behaviors all set a persistent state on the specified + * memory range. All we have to do here is to record the desired behavior + * in the vm_map_entry_t's. + */ + case VM_BEHAVIOR_DEFAULT: case VM_BEHAVIOR_RANDOM: case VM_BEHAVIOR_SEQUENTIAL: case VM_BEHAVIOR_RSEQNTL: + case VM_BEHAVIOR_ZERO_WIRED_PAGES: + vm_map_lock(map); + + /* + * The entire address range must be valid for the map. + * Note that vm_map_range_check() does a + * vm_map_lookup_entry() internally and returns the + * entry containing the start of the address range if + * the entire range is valid. + */ + if (vm_map_range_check(map, start, end, &temp_entry)) { + entry = temp_entry; + vm_map_clip_start(map, entry, start); + } + else { + vm_map_unlock(map); + return(KERN_INVALID_ADDRESS); + } + + while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { + vm_map_clip_end(map, entry, end); + if (entry->is_sub_map) { + assert(!entry->use_pmap); + } + + if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) { + entry->zero_wired_pages = TRUE; + } else { + entry->behavior = new_behavior; + } + entry = entry->vme_next; + } + + vm_map_unlock(map); break; + + /* + * The rest of these are different from the above in that they cause + * an immediate action to take place as opposed to setting a behavior that + * affects future actions. + */ + case VM_BEHAVIOR_WILLNEED: + return vm_map_willneed(map, start, end); + case VM_BEHAVIOR_DONTNEED: - new_behavior = VM_BEHAVIOR_DEFAULT; - break; - default: - return(KERN_INVALID_ARGUMENT); - } + return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS); - vm_map_lock(map); + case VM_BEHAVIOR_FREE: + return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS); - /* - * The entire address range must be valid for the map. - * Note that vm_map_range_check() does a - * vm_map_lookup_entry() internally and returns the - * entry containing the start of the address range if - * the entire range is valid. - */ - if (vm_map_range_check(map, start, end, &temp_entry)) { - entry = temp_entry; - vm_map_clip_start(map, entry, start); - } - else { - vm_map_unlock(map); - return(KERN_INVALID_ADDRESS); - } + case VM_BEHAVIOR_REUSABLE: + return vm_map_reusable_pages(map, start, end); - while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { - vm_map_clip_end(map, entry, end); - assert(!entry->use_pmap); + case VM_BEHAVIOR_REUSE: + return vm_map_reuse_pages(map, start, end); - entry->behavior = new_behavior; + case VM_BEHAVIOR_CAN_REUSE: + return vm_map_can_reuse(map, start, end); - entry = entry->vme_next; +#if MACH_ASSERT + case VM_BEHAVIOR_PAGEOUT: + return vm_map_pageout(map, start, end); +#endif /* MACH_ASSERT */ + + default: + return(KERN_INVALID_ARGUMENT); } - vm_map_unlock(map); return(KERN_SUCCESS); } -#include -#if MACH_KDB -#include -#include - -#define printf db_printf - /* - * Forward declarations for internal functions. + * Internals for madvise(MADV_WILLNEED) system call. + * + * The present implementation is to do a read-ahead if the mapping corresponds + * to a mapped regular file. If it's an anonymous mapping, then we do nothing + * and basically ignore the "advice" (which we are always free to do). */ -extern void vm_map_links_print( - struct vm_map_links *links); -extern void vm_map_header_print( - struct vm_map_header *header); -extern void vm_map_entry_print( - vm_map_entry_t entry); +static kern_return_t +vm_map_willneed( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end +) +{ + vm_map_entry_t entry; + vm_object_t object; + memory_object_t pager; + struct vm_object_fault_info fault_info; + kern_return_t kr; + vm_object_size_t len; + vm_object_offset_t offset; -extern void vm_follow_entry( - vm_map_entry_t entry); + /* + * Fill in static values in fault_info. Several fields get ignored by the code + * we call, but we'll fill them in anyway since uninitialized fields are bad + * when it comes to future backwards compatibility. + */ -extern void vm_follow_map( - vm_map_t map); + fault_info.interruptible = THREAD_UNINT; /* ignored value */ + fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL; + fault_info.no_cache = FALSE; /* ignored value */ + fault_info.stealth = TRUE; + fault_info.io_sync = FALSE; + fault_info.cs_bypass = FALSE; + fault_info.mark_zf_absent = FALSE; + fault_info.batch_pmap_op = FALSE; -/* - * vm_map_links_print: [ debug ] - */ -void -vm_map_links_print( - struct vm_map_links *links) -{ - iprintf("prev = %08X next = %08X start = %016llX end = %016llX\n", - links->prev, - links->next, - (unsigned long long)links->start, - (unsigned long long)links->end); -} + /* + * The MADV_WILLNEED operation doesn't require any changes to the + * vm_map_entry_t's, so the read lock is sufficient. + */ -/* - * vm_map_header_print: [ debug ] - */ -void -vm_map_header_print( - struct vm_map_header *header) -{ - vm_map_links_print(&header->links); - iprintf("nentries = %08X, %sentries_pageable\n", - header->nentries, - (header->entries_pageable ? "" : "!")); -} + vm_map_lock_read(map); -/* - * vm_follow_entry: [ debug ] - */ -void -vm_follow_entry( - vm_map_entry_t entry) -{ - int shadows; + /* + * The madvise semantics require that the address range be fully + * allocated with no holes. Otherwise, we're required to return + * an error. + */ + + if (! vm_map_range_check(map, start, end, &entry)) { + vm_map_unlock_read(map); + return KERN_INVALID_ADDRESS; + } + + /* + * Examine each vm_map_entry_t in the range. + */ + for (; entry != vm_map_to_entry(map) && start < end; ) { + + /* + * The first time through, the start address could be anywhere + * within the vm_map_entry we found. So adjust the offset to + * correspond. After that, the offset will always be zero to + * correspond to the beginning of the current vm_map_entry. + */ + offset = (start - entry->vme_start) + VME_OFFSET(entry); - iprintf("map entry %08X\n", entry); + /* + * Set the length so we don't go beyond the end of the + * map_entry or beyond the end of the range we were given. + * This range could span also multiple map entries all of which + * map different files, so make sure we only do the right amount + * of I/O for each object. Note that it's possible for there + * to be multiple map entries all referring to the same object + * but with different page permissions, but it's not worth + * trying to optimize that case. + */ + len = MIN(entry->vme_end - start, end - start); - db_indent += 2; + if ((vm_size_t) len != len) { + /* 32-bit overflow */ + len = (vm_size_t) (0 - PAGE_SIZE); + } + fault_info.cluster_size = (vm_size_t) len; + fault_info.lo_offset = offset; + fault_info.hi_offset = offset + len; + fault_info.user_tag = VME_ALIAS(entry); + fault_info.pmap_options = 0; + if (entry->iokit_acct || + (!entry->is_sub_map && !entry->use_pmap)) { + fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT; + } - shadows = vm_follow_object(entry->object.vm_object); - iprintf("Total objects : %d\n",shadows); + /* + * If there's no read permission to this mapping, then just + * skip it. + */ + if ((entry->protection & VM_PROT_READ) == 0) { + entry = entry->vme_next; + start = entry->vme_start; + continue; + } - db_indent -= 2; -} + /* + * Find the file object backing this map entry. If there is + * none, then we simply ignore the "will need" advice for this + * entry and go on to the next one. + */ + if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) { + entry = entry->vme_next; + start = entry->vme_start; + continue; + } -/* - * vm_map_entry_print: [ debug ] - */ -void -vm_map_entry_print( - register vm_map_entry_t entry) -{ - static const char *inheritance_name[4] = - { "share", "copy", "none", "?"}; - static const char *behavior_name[4] = - { "dflt", "rand", "seqtl", "rseqntl" }; - - iprintf("map entry %08X - prev = %08X next = %08X\n", entry, entry->vme_prev, entry->vme_next); + /* + * The data_request() could take a long time, so let's + * release the map lock to avoid blocking other threads. + */ + vm_map_unlock_read(map); + + vm_object_paging_begin(object); + pager = object->pager; + vm_object_unlock(object); - db_indent += 2; + /* + * Get the data from the object asynchronously. + * + * Note that memory_object_data_request() places limits on the + * amount of I/O it will do. Regardless of the len we + * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it + * silently truncates the len to that size. This isn't + * necessarily bad since madvise shouldn't really be used to + * page in unlimited amounts of data. Other Unix variants + * limit the willneed case as well. If this turns out to be an + * issue for developers, then we can always adjust the policy + * here and still be backwards compatible since this is all + * just "advice". + */ + kr = memory_object_data_request( + pager, + offset + object->paging_offset, + 0, /* ignored */ + VM_PROT_READ, + (memory_object_fault_info_t)&fault_info); - vm_map_links_print(&entry->links); + vm_object_lock(object); + vm_object_paging_end(object); + vm_object_unlock(object); - iprintf("start = %016llX end = %016llX - prot=%x/%x/%s\n", - (unsigned long long)entry->vme_start, - (unsigned long long)entry->vme_end, - entry->protection, - entry->max_protection, - inheritance_name[(entry->inheritance & 0x3)]); + /* + * If we couldn't do the I/O for some reason, just give up on + * the madvise. We still return success to the user since + * madvise isn't supposed to fail when the advice can't be + * taken. + */ + if (kr != KERN_SUCCESS) { + return KERN_SUCCESS; + } - iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n", - behavior_name[(entry->behavior & 0x3)], - entry->wired_count, - entry->user_wired_count); - iprintf("%sin_transition, %sneeds_wakeup\n", - (entry->in_transition ? "" : "!"), - (entry->needs_wakeup ? "" : "!")); + start += len; + if (start >= end) { + /* done */ + return KERN_SUCCESS; + } - if (entry->is_sub_map) { - iprintf("submap = %08X - offset = %016llX\n", - entry->object.sub_map, - (unsigned long long)entry->offset); - } else { - iprintf("object = %08X offset = %016llX - ", - entry->object.vm_object, - (unsigned long long)entry->offset); - printf("%sis_shared, %sneeds_copy\n", - (entry->is_shared ? "" : "!"), - (entry->needs_copy ? "" : "!")); + /* look up next entry */ + vm_map_lock_read(map); + if (! vm_map_lookup_entry(map, start, &entry)) { + /* + * There's a new hole in the address range. + */ + vm_map_unlock_read(map); + return KERN_INVALID_ADDRESS; + } } - db_indent -= 2; + vm_map_unlock_read(map); + return KERN_SUCCESS; } -/* - * vm_follow_map: [ debug ] - */ -void -vm_follow_map( - vm_map_t map) +static boolean_t +vm_map_entry_is_reusable( + vm_map_entry_t entry) { - register vm_map_entry_t entry; + /* Only user map entries */ + + vm_object_t object; - iprintf("task map %08X\n", map); + if (entry->is_sub_map) { + return FALSE; + } - db_indent += 2; + switch (VME_ALIAS(entry)) { + case VM_MEMORY_MALLOC: + case VM_MEMORY_MALLOC_SMALL: + case VM_MEMORY_MALLOC_LARGE: + case VM_MEMORY_REALLOC: + case VM_MEMORY_MALLOC_TINY: + case VM_MEMORY_MALLOC_LARGE_REUSABLE: + case VM_MEMORY_MALLOC_LARGE_REUSED: + /* + * This is a malloc() memory region: check if it's still + * in its original state and can be re-used for more + * malloc() allocations. + */ + break; + default: + /* + * Not a malloc() memory region: let the caller decide if + * it's re-usable. + */ + return TRUE; + } - for (entry = vm_map_first_entry(map); - entry && entry != vm_map_to_entry(map); - entry = entry->vme_next) { - vm_follow_entry(entry); + if (entry->is_shared || + entry->is_sub_map || + entry->in_transition || + entry->protection != VM_PROT_DEFAULT || + entry->max_protection != VM_PROT_ALL || + entry->inheritance != VM_INHERIT_DEFAULT || + entry->no_cache || + entry->permanent || + entry->superpage_size != FALSE || + entry->zero_wired_pages || + entry->wired_count != 0 || + entry->user_wired_count != 0) { + return FALSE; } - db_indent -= 2; + object = VME_OBJECT(entry); + if (object == VM_OBJECT_NULL) { + return TRUE; + } + if ( +#if 0 + /* + * Let's proceed even if the VM object is potentially + * shared. + * We check for this later when processing the actual + * VM pages, so the contents will be safe if shared. + * + * But we can still mark this memory region as "reusable" to + * acknowledge that the caller did let us know that the memory + * could be re-used and should not be penalized for holding + * on to it. This allows its "resident size" to not include + * the reusable range. + */ + object->ref_count == 1 && +#endif + object->wired_page_count == 0 && + object->copy == VM_OBJECT_NULL && + object->shadow == VM_OBJECT_NULL && + object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC && + object->internal && + !object->true_share && + object->wimg_bits == VM_WIMG_USE_DEFAULT && + !object->code_signed) { + return TRUE; + } + return FALSE; + + } -/* - * vm_map_print: [ debug ] - */ -void -vm_map_print( - db_addr_t inmap) +static kern_return_t +vm_map_reuse_pages( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end) { - register vm_map_entry_t entry; - vm_map_t map; -#if TASK_SWAPPER - char *swstate; -#endif /* TASK_SWAPPER */ + vm_map_entry_t entry; + vm_object_t object; + vm_object_offset_t start_offset, end_offset; - map = (vm_map_t)(long) - inmap; /* Make sure we have the right type */ + /* + * The MADV_REUSE operation doesn't require any changes to the + * vm_map_entry_t's, so the read lock is sufficient. + */ - iprintf("task map %08X\n", map); + vm_map_lock_read(map); + assert(map->pmap != kernel_pmap); /* protect alias access */ - db_indent += 2; + /* + * The madvise semantics require that the address range be fully + * allocated with no holes. Otherwise, we're required to return + * an error. + */ - vm_map_header_print(&map->hdr); + if (!vm_map_range_check(map, start, end, &entry)) { + vm_map_unlock_read(map); + vm_page_stats_reusable.reuse_pages_failure++; + return KERN_INVALID_ADDRESS; + } - iprintf("pmap = %08X size = %08X ref = %d hint = %08X first_free = %08X\n", - map->pmap, - map->size, - map->ref_count, - map->hint, - map->first_free); + /* + * Examine each vm_map_entry_t in the range. + */ + for (; entry != vm_map_to_entry(map) && entry->vme_start < end; + entry = entry->vme_next) { + /* + * Sanity check on the VM map entry. + */ + if (! vm_map_entry_is_reusable(entry)) { + vm_map_unlock_read(map); + vm_page_stats_reusable.reuse_pages_failure++; + return KERN_INVALID_ADDRESS; + } - iprintf("%swait_for_space, %swiring_required, timestamp = %d\n", - (map->wait_for_space ? "" : "!"), - (map->wiring_required ? "" : "!"), - map->timestamp); + /* + * The first time through, the start address could be anywhere + * within the vm_map_entry we found. So adjust the offset to + * correspond. + */ + if (entry->vme_start < start) { + start_offset = start - entry->vme_start; + } else { + start_offset = 0; + } + end_offset = MIN(end, entry->vme_end) - entry->vme_start; + start_offset += VME_OFFSET(entry); + end_offset += VME_OFFSET(entry); -#if TASK_SWAPPER - switch (map->sw_state) { - case MAP_SW_IN: - swstate = "SW_IN"; - break; - case MAP_SW_OUT: - swstate = "SW_OUT"; - break; - default: - swstate = "????"; - break; - } - iprintf("res = %d, sw_state = %s\n", map->res_count, swstate); -#endif /* TASK_SWAPPER */ + assert(!entry->is_sub_map); + object = VME_OBJECT(entry); + if (object != VM_OBJECT_NULL) { + vm_object_lock(object); + vm_object_reuse_pages(object, start_offset, end_offset, + TRUE); + vm_object_unlock(object); + } - for (entry = vm_map_first_entry(map); - entry && entry != vm_map_to_entry(map); - entry = entry->vme_next) { - vm_map_entry_print(entry); + if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) { + /* + * XXX + * We do not hold the VM map exclusively here. + * The "alias" field is not that critical, so it's + * safe to update it here, as long as it is the only + * one that can be modified while holding the VM map + * "shared". + */ + VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED); + } } - - db_indent -= 2; + + vm_map_unlock_read(map); + vm_page_stats_reusable.reuse_pages_success++; + return KERN_SUCCESS; } -/* - * Routine: vm_map_copy_print - * Purpose: - * Pretty-print a copy object for ddb. - */ -void -vm_map_copy_print( - db_addr_t incopy) +static kern_return_t +vm_map_reusable_pages( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end) { - vm_map_copy_t copy; - vm_map_entry_t entry; + vm_map_entry_t entry; + vm_object_t object; + vm_object_offset_t start_offset, end_offset; + vm_map_offset_t pmap_offset; - copy = (vm_map_copy_t)(long) - incopy; /* Make sure we have the right type */ - - printf("copy object 0x%x\n", copy); + /* + * The MADV_REUSABLE operation doesn't require any changes to the + * vm_map_entry_t's, so the read lock is sufficient. + */ - db_indent += 2; + vm_map_lock_read(map); + assert(map->pmap != kernel_pmap); /* protect alias access */ - iprintf("type=%d", copy->type); - switch (copy->type) { - case VM_MAP_COPY_ENTRY_LIST: - printf("[entry_list]"); - break; - - case VM_MAP_COPY_OBJECT: - printf("[object]"); - break; - - case VM_MAP_COPY_KERNEL_BUFFER: - printf("[kernel_buffer]"); - break; + /* + * The madvise semantics require that the address range be fully + * allocated with no holes. Otherwise, we're required to return + * an error. + */ - default: - printf("[bad type]"); - break; + if (!vm_map_range_check(map, start, end, &entry)) { + vm_map_unlock_read(map); + vm_page_stats_reusable.reusable_pages_failure++; + return KERN_INVALID_ADDRESS; } - printf(", offset=0x%llx", (unsigned long long)copy->offset); - printf(", size=0x%x\n", copy->size); - switch (copy->type) { - case VM_MAP_COPY_ENTRY_LIST: - vm_map_header_print(©->cpy_hdr); - for (entry = vm_map_copy_first_entry(copy); - entry && entry != vm_map_copy_to_entry(copy); - entry = entry->vme_next) { - vm_map_entry_print(entry); + /* + * Examine each vm_map_entry_t in the range. + */ + for (; entry != vm_map_to_entry(map) && entry->vme_start < end; + entry = entry->vme_next) { + int kill_pages = 0; + + /* + * Sanity check on the VM map entry. + */ + if (! vm_map_entry_is_reusable(entry)) { + vm_map_unlock_read(map); + vm_page_stats_reusable.reusable_pages_failure++; + return KERN_INVALID_ADDRESS; } - break; - case VM_MAP_COPY_OBJECT: - iprintf("object=0x%x\n", copy->cpy_object); - break; + /* + * The first time through, the start address could be anywhere + * within the vm_map_entry we found. So adjust the offset to + * correspond. + */ + if (entry->vme_start < start) { + start_offset = start - entry->vme_start; + pmap_offset = start; + } else { + start_offset = 0; + pmap_offset = entry->vme_start; + } + end_offset = MIN(end, entry->vme_end) - entry->vme_start; + start_offset += VME_OFFSET(entry); + end_offset += VME_OFFSET(entry); - case VM_MAP_COPY_KERNEL_BUFFER: - iprintf("kernel buffer=0x%x", copy->cpy_kdata); - printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size); - break; + assert(!entry->is_sub_map); + object = VME_OBJECT(entry); + if (object == VM_OBJECT_NULL) + continue; - } - db_indent -=2; + vm_object_lock(object); + if (object->ref_count == 1 && + !object->shadow && + /* + * "iokit_acct" entries are billed for their virtual size + * (rather than for their resident pages only), so they + * wouldn't benefit from making pages reusable, and it + * would be hard to keep track of pages that are both + * "iokit_acct" and "reusable" in the pmap stats and ledgers. + */ + !(entry->iokit_acct || + (!entry->is_sub_map && !entry->use_pmap))) + kill_pages = 1; + else + kill_pages = -1; + if (kill_pages != -1) { + vm_object_deactivate_pages(object, + start_offset, + end_offset - start_offset, + kill_pages, + TRUE /*reusable_pages*/, + map->pmap, + pmap_offset); + } else { + vm_page_stats_reusable.reusable_pages_shared++; + } + vm_object_unlock(object); + + if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE || + VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) { + /* + * XXX + * We do not hold the VM map exclusively here. + * The "alias" field is not that critical, so it's + * safe to update it here, as long as it is the only + * one that can be modified while holding the VM map + * "shared". + */ + VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE); + } + } + + vm_map_unlock_read(map); + vm_page_stats_reusable.reusable_pages_success++; + return KERN_SUCCESS; } -/* - * db_vm_map_total_size(map) [ debug ] - * - * return the total virtual size (in bytes) of the map - */ -vm_map_size_t -db_vm_map_total_size( - db_addr_t inmap) -{ - vm_map_entry_t entry; - vm_map_size_t total; - vm_map_t map; - map = (vm_map_t)(long) - inmap; /* Make sure we have the right type */ +static kern_return_t +vm_map_can_reuse( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end) +{ + vm_map_entry_t entry; - total = 0; - for (entry = vm_map_first_entry(map); - entry != vm_map_to_entry(map); - entry = entry->vme_next) { - total += entry->vme_end - entry->vme_start; - } + /* + * The MADV_REUSABLE operation doesn't require any changes to the + * vm_map_entry_t's, so the read lock is sufficient. + */ - return total; -} + vm_map_lock_read(map); + assert(map->pmap != kernel_pmap); /* protect alias access */ -#endif /* MACH_KDB */ + /* + * The madvise semantics require that the address range be fully + * allocated with no holes. Otherwise, we're required to return + * an error. + */ -/* + if (!vm_map_range_check(map, start, end, &entry)) { + vm_map_unlock_read(map); + vm_page_stats_reusable.can_reuse_failure++; + return KERN_INVALID_ADDRESS; + } + + /* + * Examine each vm_map_entry_t in the range. + */ + for (; entry != vm_map_to_entry(map) && entry->vme_start < end; + entry = entry->vme_next) { + /* + * Sanity check on the VM map entry. + */ + if (! vm_map_entry_is_reusable(entry)) { + vm_map_unlock_read(map); + vm_page_stats_reusable.can_reuse_failure++; + return KERN_INVALID_ADDRESS; + } + } + + vm_map_unlock_read(map); + vm_page_stats_reusable.can_reuse_success++; + return KERN_SUCCESS; +} + + +#if MACH_ASSERT +static kern_return_t +vm_map_pageout( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end) +{ + vm_map_entry_t entry; + + /* + * The MADV_PAGEOUT operation doesn't require any changes to the + * vm_map_entry_t's, so the read lock is sufficient. + */ + + vm_map_lock_read(map); + + /* + * The madvise semantics require that the address range be fully + * allocated with no holes. Otherwise, we're required to return + * an error. + */ + + if (!vm_map_range_check(map, start, end, &entry)) { + vm_map_unlock_read(map); + return KERN_INVALID_ADDRESS; + } + + /* + * Examine each vm_map_entry_t in the range. + */ + for (; entry != vm_map_to_entry(map) && entry->vme_start < end; + entry = entry->vme_next) { + vm_object_t object; + + /* + * Sanity check on the VM map entry. + */ + if (entry->is_sub_map) { + vm_map_t submap; + vm_map_offset_t submap_start; + vm_map_offset_t submap_end; + vm_map_entry_t submap_entry; + + submap = VME_SUBMAP(entry); + submap_start = VME_OFFSET(entry); + submap_end = submap_start + (entry->vme_end - + entry->vme_start); + + vm_map_lock_read(submap); + + if (! vm_map_range_check(submap, + submap_start, + submap_end, + &submap_entry)) { + vm_map_unlock_read(submap); + vm_map_unlock_read(map); + return KERN_INVALID_ADDRESS; + } + + object = VME_OBJECT(submap_entry); + if (submap_entry->is_sub_map || + object == VM_OBJECT_NULL || + !object->internal) { + vm_map_unlock_read(submap); + continue; + } + + vm_object_pageout(object); + + vm_map_unlock_read(submap); + submap = VM_MAP_NULL; + submap_entry = VM_MAP_ENTRY_NULL; + continue; + } + + object = VME_OBJECT(entry); + if (entry->is_sub_map || + object == VM_OBJECT_NULL || + !object->internal) { + continue; + } + + vm_object_pageout(object); + } + + vm_map_unlock_read(map); + return KERN_SUCCESS; +} +#endif /* MACH_ASSERT */ + + +/* * Routine: vm_map_entry_insert * * Descritpion: This routine inserts a new vm_entry in a locked map. @@ -9786,23 +13187,45 @@ vm_map_entry_insert( vm_behavior_t behavior, vm_inherit_t inheritance, unsigned wired_count, - boolean_t no_cache) + boolean_t no_cache, + boolean_t permanent, + unsigned int superpage_size, + boolean_t clear_map_aligned, + boolean_t is_submap) { vm_map_entry_t new_entry; assert(insp_entry != (vm_map_entry_t)0); - new_entry = vm_map_entry_create(map); + new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable); + + if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) { + new_entry->map_aligned = TRUE; + } else { + new_entry->map_aligned = FALSE; + } + if (clear_map_aligned && + (! VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) || + ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) { + new_entry->map_aligned = FALSE; + } new_entry->vme_start = start; new_entry->vme_end = end; assert(page_aligned(new_entry->vme_start)); assert(page_aligned(new_entry->vme_end)); + if (new_entry->map_aligned) { + assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start, + VM_MAP_PAGE_MASK(map))); + assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end, + VM_MAP_PAGE_MASK(map))); + } + assert(new_entry->vme_start < new_entry->vme_end); - new_entry->object.vm_object = object; - new_entry->offset = offset; + VME_OBJECT_SET(new_entry, object); + VME_OFFSET_SET(new_entry, offset); new_entry->is_shared = is_shared; - new_entry->is_sub_map = FALSE; + new_entry->is_sub_map = is_submap; new_entry->needs_copy = needs_copy; new_entry->in_transition = in_transition; new_entry->needs_wakeup = FALSE; @@ -9812,15 +13235,37 @@ vm_map_entry_insert( new_entry->behavior = behavior; new_entry->wired_count = wired_count; new_entry->user_wired_count = 0; - new_entry->use_pmap = FALSE; - new_entry->alias = 0; + if (is_submap) { + /* + * submap: "use_pmap" means "nested". + * default: false. + */ + new_entry->use_pmap = FALSE; + } else { + /* + * object: "use_pmap" means "use pmap accounting" for footprint. + * default: true. + */ + new_entry->use_pmap = TRUE; + } + VME_ALIAS_SET(new_entry, 0); + new_entry->zero_wired_pages = FALSE; new_entry->no_cache = no_cache; + new_entry->permanent = permanent; + if (superpage_size) + new_entry->superpage_size = TRUE; + else + new_entry->superpage_size = FALSE; + new_entry->used_for_jit = FALSE; + new_entry->iokit_acct = FALSE; + new_entry->vme_resilient_codesign = FALSE; + new_entry->vme_resilient_media = FALSE; /* * Insert the new entry into the list. */ - vm_map_entry_link(map, insp_entry, new_entry); + vm_map_store_entry_link(map, insp_entry, new_entry); map->size += end - start; /* @@ -9864,7 +13309,8 @@ vm_map_remap_extract( boolean_t new_entry_needs_copy; assert(map != VM_MAP_NULL); - assert(size != 0 && size == vm_map_round_page(size)); + assert(size != 0); + assert(size == vm_map_round_page(size, PAGE_MASK)); assert(inheritance == VM_INHERIT_NONE || inheritance == VM_INHERIT_COPY || inheritance == VM_INHERIT_SHARE); @@ -9872,8 +13318,9 @@ vm_map_remap_extract( /* * Compute start and end of region. */ - src_start = vm_map_trunc_page(addr); - src_end = vm_map_round_page(src_start + size); + src_start = vm_map_trunc_page(addr, PAGE_MASK); + src_end = vm_map_round_page(src_start + size, PAGE_MASK); + /* * Initialize map_header. @@ -9882,6 +13329,9 @@ vm_map_remap_extract( map_header->links.prev = (struct vm_map_entry *)&map_header->links; map_header->nentries = 0; map_header->entries_pageable = pageable; + map_header->page_shift = PAGE_SHIFT; + + vm_map_store_init( map_header ); *cur_protection = VM_PROT_ALL; *max_protection = VM_PROT_ALL; @@ -9912,11 +13362,6 @@ vm_map_remap_extract( break; } - if(src_entry->is_sub_map) { - result = KERN_INVALID_ADDRESS; - break; - } - tmp_size = size - mapped_size; if (src_end > src_entry->vme_end) tmp_size -= (src_end - src_entry->vme_end); @@ -9925,15 +13370,33 @@ vm_map_remap_extract( src_entry->vme_start); if(src_entry->is_sub_map) { - vm_map_reference(src_entry->object.sub_map); + vm_map_reference(VME_SUBMAP(src_entry)); object = VM_OBJECT_NULL; } else { - object = src_entry->object.vm_object; + object = VME_OBJECT(src_entry); + if (src_entry->iokit_acct) { + /* + * This entry uses "IOKit accounting". + */ + } else if (object != VM_OBJECT_NULL && + object->purgable != VM_PURGABLE_DENY) { + /* + * Purgeable objects have their own accounting: + * no pmap accounting for them. + */ + assert(!src_entry->use_pmap); + } else { + /* + * Not IOKit or purgeable: + * must be accounted by pmap stats. + */ + assert(src_entry->use_pmap); + } if (object == VM_OBJECT_NULL) { object = vm_object_allocate(entry_size); - src_entry->offset = 0; - src_entry->object.vm_object = object; + VME_OFFSET_SET(src_entry, 0); + VME_OBJECT_SET(src_entry, object); } else if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) { /* @@ -9945,11 +13408,9 @@ vm_map_remap_extract( } else if (src_entry->needs_copy || object->shadowed || (object->internal && !object->true_share && !src_entry->is_shared && - object->size > entry_size)) { + object->vo_size > entry_size)) { - vm_object_shadow(&src_entry->object.vm_object, - &src_entry->offset, - entry_size); + VME_OBJECT_SHADOW(src_entry, entry_size); if (!src_entry->needs_copy && (src_entry->protection & VM_PROT_WRITE)) { @@ -9957,13 +13418,15 @@ vm_map_remap_extract( prot = src_entry->protection & ~VM_PROT_WRITE; - if (override_nx(map, src_entry->alias) && prot) + if (override_nx(map, + VME_ALIAS(src_entry)) + && prot) prot |= VM_PROT_EXECUTE; - if(map->mapped) { + if(map->mapped_in_other_pmaps) { vm_object_pmap_protect( - src_entry->object.vm_object, - src_entry->offset, + VME_OBJECT(src_entry), + VME_OFFSET(src_entry), entry_size, PMAP_NULL, src_entry->vme_start, @@ -9976,7 +13439,7 @@ vm_map_remap_extract( } } - object = src_entry->object.vm_object; + object = VME_OBJECT(src_entry); src_entry->needs_copy = FALSE; } @@ -9991,22 +13454,37 @@ vm_map_remap_extract( vm_object_unlock(object); } - offset = src_entry->offset + (src_start - src_entry->vme_start); + offset = (VME_OFFSET(src_entry) + + (src_start - src_entry->vme_start)); - new_entry = _vm_map_entry_create(map_header); + new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable); vm_map_entry_copy(new_entry, src_entry); - new_entry->use_pmap = FALSE; /* clr address space specifics */ + if (new_entry->is_sub_map) { + /* clr address space specifics */ + new_entry->use_pmap = FALSE; + } + + new_entry->map_aligned = FALSE; new_entry->vme_start = map_address; new_entry->vme_end = map_address + tmp_size; + assert(new_entry->vme_start < new_entry->vme_end); new_entry->inheritance = inheritance; - new_entry->offset = offset; + VME_OFFSET_SET(new_entry, offset); /* * The new region has to be copied now if required. */ RestartCopy: if (!copy) { + /* + * Cannot allow an entry describing a JIT + * region to be shared across address spaces. + */ + if (src_entry->used_for_jit == TRUE) { + result = KERN_INVALID_ARGUMENT; + break; + } src_entry->is_shared = TRUE; new_entry->is_shared = TRUE; if (!(new_entry->is_sub_map)) @@ -10014,11 +13492,12 @@ vm_map_remap_extract( } else if (src_entry->is_sub_map) { /* make this a COW sub_map if not already */ + assert(new_entry->wired_count == 0); new_entry->needs_copy = TRUE; object = VM_OBJECT_NULL; } else if (src_entry->wired_count == 0 && - vm_object_copy_quickly(&new_entry->object.vm_object, - new_entry->offset, + vm_object_copy_quickly(&VME_OBJECT(new_entry), + VME_OFFSET(new_entry), (new_entry->vme_end - new_entry->vme_start), &src_needs_copy, @@ -10035,18 +13514,21 @@ vm_map_remap_extract( prot = src_entry->protection & ~VM_PROT_WRITE; - if (override_nx(map, src_entry->alias) && prot) + if (override_nx(map, + VME_ALIAS(src_entry)) + && prot) prot |= VM_PROT_EXECUTE; vm_object_pmap_protect(object, offset, entry_size, ((src_entry->is_shared - || map->mapped) ? + || map->mapped_in_other_pmaps) ? PMAP_NULL : map->pmap), src_entry->vme_start, prot); + assert(src_entry->wired_count == 0); src_entry->needs_copy = TRUE; } /* @@ -10077,18 +13559,24 @@ vm_map_remap_extract( offset, entry_size, THREAD_UNINT, - &new_entry->object.vm_object); + &VME_OBJECT(new_entry)); - new_entry->offset = 0; + VME_OFFSET_SET(new_entry, 0); new_entry->needs_copy = FALSE; } else { + vm_object_offset_t new_offset; + + new_offset = VME_OFFSET(new_entry); result = vm_object_copy_strategically( object, offset, entry_size, - &new_entry->object.vm_object, - &new_entry->offset, + &VME_OBJECT(new_entry), + &new_offset, &new_entry_needs_copy); + if (new_offset != VME_OFFSET(new_entry)) { + VME_OFFSET_SET(new_entry, new_offset); + } new_entry->needs_copy = new_entry_needs_copy; } @@ -10117,8 +13605,7 @@ vm_map_remap_extract( * Retry the lookup and verify that the * same object/offset are still present. */ - vm_object_deallocate(new_entry-> - object.vm_object); + vm_object_deallocate(VME_OBJECT(new_entry)); _vm_map_entry_dispose(map_header, new_entry); if (result == KERN_MEMORY_RESTART_COPY) result = KERN_SUCCESS; @@ -10131,12 +13618,14 @@ vm_map_remap_extract( } } - _vm_map_entry_link(map_header, + _vm_map_store_entry_link(map_header, map_header->links.prev, new_entry); - *cur_protection &= src_entry->protection; - *max_protection &= src_entry->max_protection; - + /*Protections for submap mapping are irrelevant here*/ + if( !src_entry->is_sub_map ) { + *cur_protection &= src_entry->protection; + *max_protection &= src_entry->max_protection; + } map_address += tmp_size; mapped_size += tmp_size; src_start += tmp_size; @@ -10152,8 +13641,12 @@ vm_map_remap_extract( src_entry != (struct vm_map_entry *)&map_header->links; src_entry = new_entry) { new_entry = src_entry->vme_next; - _vm_map_entry_unlink(map_header, src_entry); - vm_object_deallocate(src_entry->object.vm_object); + _vm_map_store_entry_unlink(map_header, src_entry); + if (src_entry->is_sub_map) { + vm_map_deallocate(VME_SUBMAP(src_entry)); + } else { + vm_object_deallocate(VME_OBJECT(src_entry)); + } _vm_map_entry_dispose(map_header, src_entry); } } @@ -10177,7 +13670,7 @@ vm_map_remap( vm_map_address_t *address, vm_map_size_t size, vm_map_offset_t mask, - boolean_t anywhere, + int flags, vm_map_t src_map, vm_map_offset_t memory_address, boolean_t copy, @@ -10190,6 +13683,7 @@ vm_map_remap( vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL; vm_map_entry_t new_entry; struct vm_map_header map_header; + vm_map_offset_t offset_in_mapping; if (target_map == VM_MAP_NULL) return KERN_INVALID_ARGUMENT; @@ -10205,15 +13699,39 @@ vm_map_remap( return KERN_INVALID_ARGUMENT; } - size = vm_map_round_page(size); + /* + * If the user is requesting that we return the address of the + * first byte of the data (rather than the base of the page), + * then we use different rounding semantics: specifically, + * we assume that (memory_address, size) describes a region + * all of whose pages we must cover, rather than a base to be truncated + * down and a size to be added to that base. So we figure out + * the highest page that the requested region includes and make + * sure that the size will cover it. + * + * The key example we're worried about it is of the form: + * + * memory_address = 0x1ff0, size = 0x20 + * + * With the old semantics, we round down the memory_address to 0x1000 + * and round up the size to 0x1000, resulting in our covering *only* + * page 0x1000. With the new semantics, we'd realize that the region covers + * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page + * 0x1000 and page 0x2000 in the region we remap. + */ + if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { + offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK); + size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK); + } else { + size = vm_map_round_page(size, PAGE_MASK); + } result = vm_map_remap_extract(src_map, memory_address, size, copy, &map_header, cur_protection, max_protection, inheritance, - target_map->hdr. - entries_pageable); + target_map->hdr.entries_pageable); if (result != KERN_SUCCESS) { return result; @@ -10223,31 +13741,51 @@ vm_map_remap( * Allocate/check a range of free virtual address * space for the target */ - *address = vm_map_trunc_page(*address); + *address = vm_map_trunc_page(*address, + VM_MAP_PAGE_MASK(target_map)); vm_map_lock(target_map); result = vm_map_remap_range_allocate(target_map, address, size, - mask, anywhere, &insp_entry); + mask, flags, &insp_entry); for (entry = map_header.links.next; entry != (struct vm_map_entry *)&map_header.links; entry = new_entry) { new_entry = entry->vme_next; - _vm_map_entry_unlink(&map_header, entry); + _vm_map_store_entry_unlink(&map_header, entry); if (result == KERN_SUCCESS) { + if (flags & VM_FLAGS_RESILIENT_CODESIGN) { + /* no codesigning -> read-only access */ + assert(!entry->used_for_jit); + entry->max_protection = VM_PROT_READ; + entry->protection = VM_PROT_READ; + entry->vme_resilient_codesign = TRUE; + } entry->vme_start += *address; entry->vme_end += *address; - vm_map_entry_link(target_map, insp_entry, entry); + assert(!entry->map_aligned); + vm_map_store_entry_link(target_map, insp_entry, entry); insp_entry = entry; } else { if (!entry->is_sub_map) { - vm_object_deallocate(entry->object.vm_object); + vm_object_deallocate(VME_OBJECT(entry)); } else { - vm_map_deallocate(entry->object.sub_map); + vm_map_deallocate(VME_SUBMAP(entry)); } _vm_map_entry_dispose(&map_header, entry); } } + if (flags & VM_FLAGS_RESILIENT_CODESIGN) { + *cur_protection = VM_PROT_READ; + *max_protection = VM_PROT_READ; + } + + if( target_map->disable_vmentry_reuse == TRUE) { + if( target_map->highest_entry_end < insp_entry->vme_end ){ + target_map->highest_entry_end = insp_entry->vme_end; + } + } + if (result == KERN_SUCCESS) { target_map->size += size; SAVE_HINT_MAP_WRITE(target_map, insp_entry); @@ -10256,7 +13794,17 @@ vm_map_remap( if (result == KERN_SUCCESS && target_map->wiring_required) result = vm_map_wire(target_map, *address, - *address + size, *cur_protection, TRUE); + *address + size, *cur_protection | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK), + TRUE); + + /* + * If requested, return the address of the data pointed to by the + * request, rather than the base of the resulting page. + */ + if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { + *address += offset_in_mapping; + } + return result; } @@ -10277,18 +13825,20 @@ vm_map_remap_range_allocate( vm_map_address_t *address, /* IN/OUT */ vm_map_size_t size, vm_map_offset_t mask, - boolean_t anywhere, + int flags, vm_map_entry_t *map_entry) /* OUT */ { - register vm_map_entry_t entry; - register vm_map_offset_t start; - register vm_map_offset_t end; + vm_map_entry_t entry; + vm_map_offset_t start; + vm_map_offset_t end; + kern_return_t kr; + vm_map_entry_t hole_entry; StartAgain: ; start = *address; - if (anywhere) + if (flags & VM_FLAGS_ANYWHERE) { /* * Calculate the first possible address. @@ -10305,15 +13855,57 @@ StartAgain: ; * address, we have to start after it. */ - assert(first_free_is_valid(map)); - if (start == map->min_offset) { - if ((entry = map->first_free) != vm_map_to_entry(map)) - start = entry->vme_end; + if( map->disable_vmentry_reuse == TRUE) { + VM_MAP_HIGHEST_ENTRY(map, entry, start); } else { - vm_map_entry_t tmp_entry; - if (vm_map_lookup_entry(map, start, &tmp_entry)) - start = tmp_entry->vme_end; - entry = tmp_entry; + + if (map->holelistenabled) { + hole_entry = (vm_map_entry_t)map->holes_list; + + if (hole_entry == NULL) { + /* + * No more space in the map? + */ + return(KERN_NO_SPACE); + } else { + + boolean_t found_hole = FALSE; + + do { + if (hole_entry->vme_start >= start) { + start = hole_entry->vme_start; + found_hole = TRUE; + break; + } + + if (hole_entry->vme_end > start) { + found_hole = TRUE; + break; + } + hole_entry = hole_entry->vme_next; + + } while (hole_entry != (vm_map_entry_t) map->holes_list); + + if (found_hole == FALSE) { + return (KERN_NO_SPACE); + } + + entry = hole_entry; + } + } else { + assert(first_free_is_valid(map)); + if (start == map->min_offset) { + if ((entry = map->first_free) != vm_map_to_entry(map)) + start = entry->vme_end; + } else { + vm_map_entry_t tmp_entry; + if (vm_map_lookup_entry(map, start, &tmp_entry)) + start = tmp_entry->vme_end; + entry = tmp_entry; + } + } + start = vm_map_round_page(start, + VM_MAP_PAGE_MASK(map)); } /* @@ -10332,6 +13924,8 @@ StartAgain: ; */ end = ((start + mask) & ~mask); + end = vm_map_round_page(end, + VM_MAP_PAGE_MASK(map)); if (end < start) return(KERN_NO_SPACE); start = end; @@ -10352,30 +13946,56 @@ StartAgain: ; return(KERN_NO_SPACE); } - /* - * If there are no more entries, we must win. - */ - next = entry->vme_next; - if (next == vm_map_to_entry(map)) - break; - /* - * If there is another entry, it must be - * after the end of the potential new region. - */ + if (map->holelistenabled) { + if (entry->vme_end >= end) + break; + } else { + /* + * If there are no more entries, we must win. + * + * OR + * + * If there is another entry, it must be + * after the end of the potential new region. + */ - if (next->vme_start >= end) - break; + if (next == vm_map_to_entry(map)) + break; + + if (next->vme_start >= end) + break; + } /* * Didn't fit -- move to the next entry. */ entry = next; - start = entry->vme_end; + + if (map->holelistenabled) { + if (entry == (vm_map_entry_t) map->holes_list) { + /* + * Wrapped around + */ + return(KERN_NO_SPACE); + } + start = entry->vme_start; + } else { + start = entry->vme_end; + } + } + + if (map->holelistenabled) { + + if (vm_map_lookup_entry(map, entry->vme_start, &entry)) { + panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start); + } } + *address = start; + } else { vm_map_entry_t temp_entry; @@ -10401,6 +14021,40 @@ StartAgain: ; return(KERN_INVALID_ADDRESS); } + /* + * If we're asked to overwrite whatever was mapped in that + * range, first deallocate that range. + */ + if (flags & VM_FLAGS_OVERWRITE) { + vm_map_t zap_map; + + /* + * We use a "zap_map" to avoid having to unlock + * the "map" in vm_map_delete(), which would compromise + * the atomicity of the "deallocate" and then "remap" + * combination. + */ + zap_map = vm_map_create(PMAP_NULL, + start, + end, + map->hdr.entries_pageable); + if (zap_map == VM_MAP_NULL) { + return KERN_RESOURCE_SHORTAGE; + } + vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map)); + vm_map_disable_hole_optimization(zap_map); + + kr = vm_map_delete(map, start, end, + (VM_MAP_REMOVE_SAVE_ENTRIES | + VM_MAP_REMOVE_NO_MAP_ALIGN), + zap_map); + if (kr == KERN_SUCCESS) { + vm_map_destroy(zap_map, + VM_MAP_REMOVE_NO_PMAP_CLEANUP); + zap_map = VM_MAP_NULL; + } + } + /* * ... the starting address isn't allocated */ @@ -10607,6 +14261,7 @@ vm_map_purgable_control( vm_map_entry_t entry; vm_object_t object; kern_return_t kr; + boolean_t was_nonvolatile; /* * Vet all the input parameters and current type and state of the @@ -10616,22 +14271,28 @@ vm_map_purgable_control( return(KERN_INVALID_ARGUMENT); if (control != VM_PURGABLE_SET_STATE && - control != VM_PURGABLE_GET_STATE) + control != VM_PURGABLE_GET_STATE && + control != VM_PURGABLE_PURGE_ALL) return(KERN_INVALID_ARGUMENT); + if (control == VM_PURGABLE_PURGE_ALL) { + vm_purgeable_object_purge_all(); + return KERN_SUCCESS; + } + if (control == VM_PURGABLE_SET_STATE && - (((*state & ~(VM_PURGABLE_STATE_MASK|VM_VOLATILE_ORDER_MASK|VM_PURGABLE_ORDERING_MASK|VM_PURGABLE_BEHAVIOR_MASK|VM_VOLATILE_GROUP_MASK)) != 0) || + (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) || ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) return(KERN_INVALID_ARGUMENT); - vm_map_lock(map); + vm_map_lock_read(map); if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) { /* * Must pass a valid non-submap address. */ - vm_map_unlock(map); + vm_map_unlock_read(map); return(KERN_INVALID_ADDRESS); } @@ -10639,140 +14300,244 @@ vm_map_purgable_control( /* * Can't apply purgable controls to something you can't write. */ - vm_map_unlock(map); + vm_map_unlock_read(map); return(KERN_PROTECTION_FAILURE); } - object = entry->object.vm_object; - if (object == VM_OBJECT_NULL) { + object = VME_OBJECT(entry); + if (object == VM_OBJECT_NULL || + object->purgable == VM_PURGABLE_DENY) { /* - * Object must already be present or it can't be purgable. + * Object must already be present and be purgeable. */ - vm_map_unlock(map); + vm_map_unlock_read(map); return KERN_INVALID_ARGUMENT; } vm_object_lock(object); - if (entry->offset != 0 || - entry->vme_end - entry->vme_start != object->size) { +#if 00 + if (VME_OFFSET(entry) != 0 || + entry->vme_end - entry->vme_start != object->vo_size) { /* * Can only apply purgable controls to the whole (existing) * object at once. */ - vm_map_unlock(map); + vm_map_unlock_read(map); vm_object_unlock(object); return KERN_INVALID_ARGUMENT; } - - vm_map_unlock(map); +#endif + + assert(!entry->is_sub_map); + assert(!entry->use_pmap); /* purgeable has its own accounting */ + + vm_map_unlock_read(map); + + was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE); kr = vm_object_purgable_control(object, control, state); + if (was_nonvolatile && + object->purgable != VM_PURGABLE_NONVOLATILE && + map->pmap == kernel_pmap) { +#if DEBUG + object->vo_purgeable_volatilizer = kernel_task; +#endif /* DEBUG */ + } + vm_object_unlock(object); return kr; } kern_return_t -vm_map_page_info( +vm_map_page_query_internal( vm_map_t target_map, vm_map_offset_t offset, int *disposition, int *ref_count) { - vm_map_entry_t map_entry; - vm_object_t object; - vm_page_t m; - kern_return_t kr; - kern_return_t retval = KERN_SUCCESS; - boolean_t top_object = TRUE; - - *disposition = 0; - *ref_count = 0; + kern_return_t kr; + vm_page_info_basic_data_t info; + mach_msg_type_number_t count; + + count = VM_PAGE_INFO_BASIC_COUNT; + kr = vm_map_page_info(target_map, + offset, + VM_PAGE_INFO_BASIC, + (vm_page_info_t) &info, + &count); + if (kr == KERN_SUCCESS) { + *disposition = info.disposition; + *ref_count = info.ref_count; + } else { + *disposition = 0; + *ref_count = 0; + } - vm_map_lock_read(target_map); + return kr; +} + +kern_return_t +vm_map_page_info( + vm_map_t map, + vm_map_offset_t offset, + vm_page_info_flavor_t flavor, + vm_page_info_t info, + mach_msg_type_number_t *count) +{ + vm_map_entry_t map_entry; + vm_object_t object; + vm_page_t m; + kern_return_t kr; + kern_return_t retval = KERN_SUCCESS; + boolean_t top_object; + int disposition; + int ref_count; + vm_page_info_basic_t basic_info; + int depth; + vm_map_offset_t offset_in_page; -restart_page_query: - if (!vm_map_lookup_entry(target_map, offset, &map_entry)) { - vm_map_unlock_read(target_map); - return KERN_FAILURE; + switch (flavor) { + case VM_PAGE_INFO_BASIC: + if (*count != VM_PAGE_INFO_BASIC_COUNT) { + /* + * The "vm_page_info_basic_data" structure was not + * properly padded, so allow the size to be off by + * one to maintain backwards binary compatibility... + */ + if (*count != VM_PAGE_INFO_BASIC_COUNT - 1) + return KERN_INVALID_ARGUMENT; + } + break; + default: + return KERN_INVALID_ARGUMENT; } - offset -= map_entry->vme_start; /* adjust to offset within entry */ - offset += map_entry->offset; /* adjust to target object offset */ - if (map_entry->object.vm_object != VM_OBJECT_NULL) { - if (!map_entry->is_sub_map) { - object = map_entry->object.vm_object; - } else { - vm_map_t sub_map; + disposition = 0; + ref_count = 0; + top_object = TRUE; + depth = 0; - sub_map = map_entry->object.sub_map; - vm_map_lock_read(sub_map); - vm_map_unlock_read(target_map); + retval = KERN_SUCCESS; + offset_in_page = offset & PAGE_MASK; + offset = vm_map_trunc_page(offset, PAGE_MASK); - target_map = sub_map; - goto restart_page_query; - } - } else { - vm_map_unlock_read(target_map); - return KERN_SUCCESS; + vm_map_lock_read(map); + + /* + * First, find the map entry covering "offset", going down + * submaps if necessary. + */ + for (;;) { + if (!vm_map_lookup_entry(map, offset, &map_entry)) { + vm_map_unlock_read(map); + return KERN_INVALID_ADDRESS; + } + /* compute offset from this map entry's start */ + offset -= map_entry->vme_start; + /* compute offset into this map entry's object (or submap) */ + offset += VME_OFFSET(map_entry); + + if (map_entry->is_sub_map) { + vm_map_t sub_map; + + sub_map = VME_SUBMAP(map_entry); + vm_map_lock_read(sub_map); + vm_map_unlock_read(map); + + map = sub_map; + + ref_count = MAX(ref_count, map->ref_count); + continue; + } + break; + } + + object = VME_OBJECT(map_entry); + if (object == VM_OBJECT_NULL) { + /* no object -> no page */ + vm_map_unlock_read(map); + goto done; } + vm_object_lock(object); - vm_map_unlock_read(target_map); + vm_map_unlock_read(map); + + /* + * Go down the VM object shadow chain until we find the page + * we're looking for. + */ + for (;;) { + ref_count = MAX(ref_count, object->ref_count); - while (TRUE) { m = vm_page_lookup(object, offset); if (m != VM_PAGE_NULL) { - *disposition |= VM_PAGE_QUERY_PAGE_PRESENT; + disposition |= VM_PAGE_QUERY_PAGE_PRESENT; break; } else { #if MACH_PAGEMAP if (object->existence_map) { - if (vm_external_state_get(object->existence_map, offset) - == VM_EXTERNAL_STATE_EXISTS) { + if (vm_external_state_get(object->existence_map, + offset) == + VM_EXTERNAL_STATE_EXISTS) { /* * this page has been paged out */ - *disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT; + disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT; break; } } else #endif - if (object->internal && - object->alive && - !object->terminating && - object->pager_ready) { - - memory_object_t pager; - - vm_object_paging_begin(object); - pager = object->pager; - vm_object_unlock(object); - - kr = memory_object_data_request( - pager, - offset + object->paging_offset, - 0, /* just poke the pager */ - VM_PROT_READ, - NULL); + if (object->internal && + object->alive && + !object->terminating && + object->pager_ready) { + + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + if (VM_COMPRESSOR_PAGER_STATE_GET( + object, + offset) + == VM_EXTERNAL_STATE_EXISTS) { + /* the pager has that page */ + disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT; + break; + } + } else { + memory_object_t pager; - vm_object_lock(object); - vm_object_paging_end(object); + vm_object_paging_begin(object); + pager = object->pager; + vm_object_unlock(object); - if (kr == KERN_SUCCESS) { /* - * the pager has this page + * Ask the default pager if + * it has this page. */ - *disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT; - break; + kr = memory_object_data_request( + pager, + offset + object->paging_offset, + 0, /* just poke the pager */ + VM_PROT_READ, + NULL); + + vm_object_lock(object); + vm_object_paging_end(object); + + if (kr == KERN_SUCCESS) { + /* the default pager has it */ + disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT; + break; + } } } + if (object->shadow != VM_OBJECT_NULL) { vm_object_t shadow; - offset += object->shadow_offset; + offset += object->vo_shadow_offset; shadow = object->shadow; vm_object_lock(shadow); @@ -10780,12 +14545,13 @@ restart_page_query: object = shadow; top_object = FALSE; + depth++; } else { - if (!object->internal) - break; - - retval = KERN_FAILURE; - goto page_query_done; +// if (!object->internal) +// break; +// retval = KERN_FAILURE; +// goto done_with_object; + break; } } } @@ -10799,29 +14565,51 @@ restart_page_query: /* but this would under count as only faulted-in mappings would */ /* show up. */ - *ref_count = object->ref_count; - if (top_object == TRUE && object->shadow) - *disposition |= VM_PAGE_QUERY_PAGE_COPIED; + disposition |= VM_PAGE_QUERY_PAGE_COPIED; + + if (! object->internal) + disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL; if (m == VM_PAGE_NULL) - goto page_query_done; + goto done_with_object; if (m->fictitious) { - *disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS; - goto page_query_done; + disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS; + goto done_with_object; } if (m->dirty || pmap_is_modified(m->phys_page)) - *disposition |= VM_PAGE_QUERY_PAGE_DIRTY; + disposition |= VM_PAGE_QUERY_PAGE_DIRTY; if (m->reference || pmap_is_referenced(m->phys_page)) - *disposition |= VM_PAGE_QUERY_PAGE_REF; + disposition |= VM_PAGE_QUERY_PAGE_REF; if (m->speculative) - *disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE; + disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE; -page_query_done: + if (m->cs_validated) + disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED; + if (m->cs_tainted) + disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED; + if (m->cs_nx) + disposition |= VM_PAGE_QUERY_PAGE_CS_NX; + +done_with_object: vm_object_unlock(object); +done: + + switch (flavor) { + case VM_PAGE_INFO_BASIC: + basic_info = (vm_page_info_basic_t) info; + basic_info->disposition = disposition; + basic_info->ref_count = ref_count; + basic_info->object_id = (vm_object_id_t) (uintptr_t) + VM_KERNEL_ADDRPERM(object); + basic_info->offset = + (memory_object_offset_t) offset + offset_in_page; + basic_info->depth = depth; + break; + } return retval; } @@ -10878,9 +14666,9 @@ vm_map_msync( vm_map_size_t amount_left; vm_object_offset_t offset; boolean_t do_sync_req; - boolean_t modifiable; boolean_t had_hole = FALSE; memory_object_t pager; + vm_map_offset_t pmap_offset; if ((sync_flags & VM_SYNC_ASYNCHRONOUS) && (sync_flags & VM_SYNC_SYNCHRONOUS)) @@ -10889,8 +14677,12 @@ vm_map_msync( /* * align address and size on page boundaries */ - size = vm_map_round_page(address + size) - vm_map_trunc_page(address); - address = vm_map_trunc_page(address); + size = (vm_map_round_page(address + size, + VM_MAP_PAGE_MASK(map)) - + vm_map_trunc_page(address, + VM_MAP_PAGE_MASK(map))); + address = vm_map_trunc_page(address, + VM_MAP_PAGE_MASK(map)); if (map == VM_MAP_NULL) return(KERN_INVALID_TASK); @@ -10907,7 +14699,8 @@ vm_map_msync( vm_map_lock(map); if (!vm_map_lookup_entry(map, - vm_map_trunc_page(address), &entry)) { + address, + &entry)) { vm_map_size_t skip; @@ -10947,6 +14740,7 @@ vm_map_msync( } offset = address - entry->vme_start; + pmap_offset = address; /* * do we have more to flush than is contained in this @@ -10965,8 +14759,8 @@ vm_map_msync( vm_map_t local_map; vm_map_offset_t local_offset; - local_map = entry->object.sub_map; - local_offset = entry->offset; + local_map = VME_SUBMAP(entry); + local_offset = VME_OFFSET(entry); vm_map_unlock(map); if (vm_map_msync( local_map, @@ -10977,7 +14771,7 @@ vm_map_msync( } continue; } - object = entry->object.vm_object; + object = VME_OBJECT(entry); /* * We can't sync this object if the object has not been @@ -10987,24 +14781,29 @@ vm_map_msync( vm_map_unlock(map); continue; } - offset += entry->offset; - modifiable = (entry->protection & VM_PROT_WRITE) - != VM_PROT_NONE; + offset += VME_OFFSET(entry); vm_object_lock(object); if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) { - boolean_t kill_pages = 0; + int kill_pages = 0; + boolean_t reusable_pages = FALSE; if (sync_flags & VM_SYNC_KILLPAGES) { - if (object->ref_count == 1 && !entry->needs_copy && !object->shadow) + if (object->ref_count == 1 && !object->shadow) kill_pages = 1; else kill_pages = -1; } if (kill_pages != -1) - vm_object_deactivate_pages(object, offset, - (vm_object_size_t)flush_size, kill_pages); + vm_object_deactivate_pages( + object, + offset, + (vm_object_size_t) flush_size, + kill_pages, + reusable_pages, + map->pmap, + pmap_offset); vm_object_unlock(object); vm_map_unlock(map); continue; @@ -11032,15 +14831,14 @@ vm_map_msync( offset, flush_size, sync_flags & VM_SYNC_INVALIDATE, - (modifiable && - (sync_flags & VM_SYNC_SYNCHRONOUS || - sync_flags & VM_SYNC_ASYNCHRONOUS)), + ((sync_flags & VM_SYNC_SYNCHRONOUS) || + (sync_flags & VM_SYNC_ASYNCHRONOUS)), sync_flags & VM_SYNC_SYNCHRONOUS); /* * only send a m_o_s if we returned pages or if the entry * is writable (ie dirty pages may have already been sent back) */ - if (!do_sync_req && !modifiable) { + if (!do_sync_req) { if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) { /* * clear out the clustering and read-ahead hints @@ -11079,6 +14877,8 @@ vm_map_msync( if (pager == MEMORY_OBJECT_NULL) { vm_object_unlock(object); vm_object_deallocate(object); + msync_req_free(new_msr); + new_msr = NULL; continue; } @@ -11175,7 +14975,7 @@ convert_port_entry_to_map( == IKOT_NAMED_ENTRY)) { named_entry = (vm_named_entry_t)port->ip_kobject; - if (!(mutex_try(&(named_entry)->Lock))) { + if (!(lck_mtx_try_lock(&(named_entry)->Lock))) { ip_unlock(port); try_failed_count++; @@ -11183,7 +14983,7 @@ convert_port_entry_to_map( continue; } named_entry->ref_count++; - mutex_unlock(&(named_entry)->Lock); + lck_mtx_unlock(&(named_entry)->Lock); ip_unlock(port); if ((named_entry->is_sub_map) && (named_entry->protection @@ -11222,45 +15022,35 @@ vm_object_t convert_port_entry_to_object( ipc_port_t port) { - vm_object_t object; + vm_object_t object = VM_OBJECT_NULL; vm_named_entry_t named_entry; - uint32_t try_failed_count = 0; - - if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) { - while(TRUE) { - ip_lock(port); - if(ip_active(port) && (ip_kotype(port) - == IKOT_NAMED_ENTRY)) { - named_entry = - (vm_named_entry_t)port->ip_kobject; - if (!(mutex_try(&(named_entry)->Lock))) { - ip_unlock(port); - - try_failed_count++; - mutex_pause(try_failed_count); - continue; - } - named_entry->ref_count++; - mutex_unlock(&(named_entry)->Lock); + uint32_t try_failed_count = 0; + + if (IP_VALID(port) && + (ip_kotype(port) == IKOT_NAMED_ENTRY)) { + try_again: + ip_lock(port); + if (ip_active(port) && + (ip_kotype(port) == IKOT_NAMED_ENTRY)) { + named_entry = (vm_named_entry_t)port->ip_kobject; + if (!(lck_mtx_try_lock(&(named_entry)->Lock))) { ip_unlock(port); - if ((!named_entry->is_sub_map) && - (!named_entry->is_pager) && - (named_entry->protection - & VM_PROT_WRITE)) { - object = named_entry->backing.object; - } else { - mach_destroy_memory_entry(port); - return (vm_object_t)NULL; - } - vm_object_reference(named_entry->backing.object); - mach_destroy_memory_entry(port); - break; + try_failed_count++; + mutex_pause(try_failed_count); + goto try_again; } - else - return (vm_object_t)NULL; + named_entry->ref_count++; + lck_mtx_unlock(&(named_entry)->Lock); + ip_unlock(port); + if (!(named_entry->is_sub_map) && + !(named_entry->is_pager) && + !(named_entry->is_copy) && + (named_entry->protection & VM_PROT_WRITE)) { + object = named_entry->backing.object; + vm_object_reference(object); + } + mach_destroy_memory_entry(port); } - } else { - return (vm_object_t)NULL; } return object; @@ -11292,14 +15082,14 @@ vm_map_reference( if (map == VM_MAP_NULL) return; - mutex_lock(&map->s_lock); + lck_mtx_lock(&map->s_lock); #if TASK_SWAPPER assert(map->res_count > 0); assert(map->ref_count >= map->res_count); map->res_count++; #endif map->ref_count++; - mutex_unlock(&map->s_lock); + lck_mtx_unlock(&map->s_lock); } /* @@ -11318,15 +15108,15 @@ vm_map_deallocate( if (map == VM_MAP_NULL) return; - mutex_lock(&map->s_lock); + lck_mtx_lock(&map->s_lock); ref = --map->ref_count; if (ref > 0) { vm_map_res_deallocate(map); - mutex_unlock(&map->s_lock); + lck_mtx_unlock(&map->s_lock); return; } assert(map->ref_count == 0); - mutex_unlock(&map->s_lock); + lck_mtx_unlock(&map->s_lock); #if TASK_SWAPPER /* @@ -11352,6 +15142,15 @@ vm_map_disable_NX(vm_map_t map) pmap_disable_NX(map->pmap); } +void +vm_map_disallow_data_exec(vm_map_t map) +{ + if (map == NULL) + return; + + map->map_disallow_data_exec = TRUE; +} + /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS) * more descriptive. */ @@ -11369,11 +15168,17 @@ vm_map_set_64bit(vm_map_t map) } vm_map_offset_t -vm_compute_max_offset(unsigned is64) +vm_compute_max_offset(boolean_t is64) { return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS); } +uint64_t +vm_map_get_max_aslr_slide_pages(vm_map_t map) +{ + return (1 << (vm_map_is_64bit(map) ? 16 : 8)); +} + boolean_t vm_map_is_64bit( vm_map_t map) @@ -11382,8 +15187,9 @@ vm_map_is_64bit( } boolean_t -vm_map_has_4GB_pagezero( - vm_map_t map) +vm_map_has_hard_pagezero( + vm_map_t map, + vm_map_offset_t pagezero_size) { /* * XXX FBDP @@ -11395,21 +15201,41 @@ vm_map_has_4GB_pagezero( * VM map is being torn down, and when a new map is created via * load_machfile()/execve(). */ - return (map->min_offset >= 0x100000000ULL); + return (map->min_offset >= pagezero_size); } -void -vm_map_set_4GB_pagezero(vm_map_t map) +/* + * Raise a VM map's maximun offset. + */ +kern_return_t +vm_map_raise_max_offset( + vm_map_t map, + vm_map_offset_t new_max_offset) { - pmap_set_4GB_pagezero(map->pmap); -} + kern_return_t ret; -void -vm_map_clear_4GB_pagezero(vm_map_t map) -{ - pmap_clear_4GB_pagezero(map->pmap); + vm_map_lock(map); + ret = KERN_INVALID_ADDRESS; + + if (new_max_offset >= map->max_offset) { + if (!vm_map_is_64bit(map)) { + if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) { + map->max_offset = new_max_offset; + ret = KERN_SUCCESS; + } + } else { + if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) { + map->max_offset = new_max_offset; + ret = KERN_SUCCESS; + } + } + } + + vm_map_unlock(map); + return ret; } + /* * Raise a VM map's minimum offset. * To strictly enforce "page zero" reservation. @@ -11421,7 +15247,8 @@ vm_map_raise_min_offset( { vm_map_entry_t first_entry; - new_min_offset = vm_map_round_page(new_min_offset); + new_min_offset = vm_map_round_page(new_min_offset, + VM_MAP_PAGE_MASK(map)); vm_map_lock(map); @@ -11434,6 +15261,11 @@ vm_map_raise_min_offset( vm_map_unlock(map); return KERN_INVALID_ADDRESS; } + if (new_min_offset >= map->max_offset) { + /* can't go beyond the end of the address space */ + vm_map_unlock(map); + return KERN_INVALID_ADDRESS; + } first_entry = vm_map_first_entry(map); if (first_entry != vm_map_to_entry(map) && @@ -11448,6 +15280,10 @@ vm_map_raise_min_offset( map->min_offset = new_min_offset; + assert(map->holes_list); + map->holes_list->start = new_min_offset; + assert(new_min_offset < map->holes_list->end); + vm_map_unlock(map); return KERN_SUCCESS; @@ -11466,3 +15302,790 @@ vm_map_set_user_wire_limit(vm_map_t map, { map->user_wire_limit = limit; } + + +void vm_map_switch_protect(vm_map_t map, + boolean_t val) +{ + vm_map_lock(map); + map->switch_protect=val; + vm_map_unlock(map); +} + +/* + * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately. + * phys_footprint is a composite limit consisting of iokit + physmem, so we need to + * bump both counters. + */ +void +vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes) +{ + pmap_t pmap = vm_map_pmap(map); + + ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes); + ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes); +} + +void +vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes) +{ + pmap_t pmap = vm_map_pmap(map); + + ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes); + ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes); +} + +/* Add (generate) code signature for memory range */ +#if CONFIG_DYNAMIC_CODE_SIGNING +kern_return_t vm_map_sign(vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end) +{ + vm_map_entry_t entry; + vm_page_t m; + vm_object_t object; + + /* + * Vet all the input parameters and current type and state of the + * underlaying object. Return with an error if anything is amiss. + */ + if (map == VM_MAP_NULL) + return(KERN_INVALID_ARGUMENT); + + vm_map_lock_read(map); + + if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) { + /* + * Must pass a valid non-submap address. + */ + vm_map_unlock_read(map); + return(KERN_INVALID_ADDRESS); + } + + if((entry->vme_start > start) || (entry->vme_end < end)) { + /* + * Map entry doesn't cover the requested range. Not handling + * this situation currently. + */ + vm_map_unlock_read(map); + return(KERN_INVALID_ARGUMENT); + } + + object = VME_OBJECT(entry); + if (object == VM_OBJECT_NULL) { + /* + * Object must already be present or we can't sign. + */ + vm_map_unlock_read(map); + return KERN_INVALID_ARGUMENT; + } + + vm_object_lock(object); + vm_map_unlock_read(map); + + while(start < end) { + uint32_t refmod; + + m = vm_page_lookup(object, + start - entry->vme_start + VME_OFFSET(entry)); + if (m==VM_PAGE_NULL) { + /* shoud we try to fault a page here? we can probably + * demand it exists and is locked for this request */ + vm_object_unlock(object); + return KERN_FAILURE; + } + /* deal with special page status */ + if (m->busy || + (m->unusual && (m->error || m->restart || m->private || m->absent))) { + vm_object_unlock(object); + return KERN_FAILURE; + } + + /* Page is OK... now "validate" it */ + /* This is the place where we'll call out to create a code + * directory, later */ + m->cs_validated = TRUE; + + /* The page is now "clean" for codesigning purposes. That means + * we don't consider it as modified (wpmapped) anymore. But + * we'll disconnect the page so we note any future modification + * attempts. */ + m->wpmapped = FALSE; + refmod = pmap_disconnect(m->phys_page); + + /* Pull the dirty status from the pmap, since we cleared the + * wpmapped bit */ + if ((refmod & VM_MEM_MODIFIED) && !m->dirty) { + SET_PAGE_DIRTY(m, FALSE); + } + + /* On to the next page */ + start += PAGE_SIZE; + } + vm_object_unlock(object); + + return KERN_SUCCESS; +} +#endif + +kern_return_t vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed) +{ + vm_map_entry_t entry = VM_MAP_ENTRY_NULL; + vm_map_entry_t next_entry; + kern_return_t kr = KERN_SUCCESS; + vm_map_t zap_map; + + vm_map_lock(map); + + /* + * We use a "zap_map" to avoid having to unlock + * the "map" in vm_map_delete(). + */ + zap_map = vm_map_create(PMAP_NULL, + map->min_offset, + map->max_offset, + map->hdr.entries_pageable); + + if (zap_map == VM_MAP_NULL) { + return KERN_RESOURCE_SHORTAGE; + } + + vm_map_set_page_shift(zap_map, + VM_MAP_PAGE_SHIFT(map)); + vm_map_disable_hole_optimization(zap_map); + + for (entry = vm_map_first_entry(map); + entry != vm_map_to_entry(map); + entry = next_entry) { + next_entry = entry->vme_next; + + if (VME_OBJECT(entry) && + !entry->is_sub_map && + (VME_OBJECT(entry)->internal == TRUE) && + (VME_OBJECT(entry)->ref_count == 1)) { + + *reclaimed_resident += VME_OBJECT(entry)->resident_page_count; + *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager); + + (void)vm_map_delete(map, + entry->vme_start, + entry->vme_end, + VM_MAP_REMOVE_SAVE_ENTRIES, + zap_map); + } + } + + vm_map_unlock(map); + + /* + * Get rid of the "zap_maps" and all the map entries that + * they may still contain. + */ + if (zap_map != VM_MAP_NULL) { + vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP); + zap_map = VM_MAP_NULL; + } + + return kr; +} + +#if CONFIG_FREEZE + +kern_return_t vm_map_freeze_walk( + vm_map_t map, + unsigned int *purgeable_count, + unsigned int *wired_count, + unsigned int *clean_count, + unsigned int *dirty_count, + unsigned int dirty_budget, + boolean_t *has_shared) +{ + vm_map_entry_t entry; + + vm_map_lock_read(map); + + *purgeable_count = *wired_count = *clean_count = *dirty_count = 0; + *has_shared = FALSE; + + for (entry = vm_map_first_entry(map); + entry != vm_map_to_entry(map); + entry = entry->vme_next) { + unsigned int purgeable, clean, dirty, wired; + boolean_t shared; + + if ((VME_OBJECT(entry) == 0) || + (entry->is_sub_map) || + (VME_OBJECT(entry)->phys_contiguous)) { + continue; + } + + default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, VME_OBJECT(entry), NULL); + + *purgeable_count += purgeable; + *wired_count += wired; + *clean_count += clean; + *dirty_count += dirty; + + if (shared) { + *has_shared = TRUE; + } + + /* Adjust pageout budget and finish up if reached */ + if (dirty_budget) { + dirty_budget -= dirty; + if (dirty_budget == 0) { + break; + } + } + } + + vm_map_unlock_read(map); + + return KERN_SUCCESS; +} + +int c_freezer_swapout_count; +int c_freezer_compression_count = 0; +AbsoluteTime c_freezer_last_yield_ts = 0; + +kern_return_t vm_map_freeze( + vm_map_t map, + unsigned int *purgeable_count, + unsigned int *wired_count, + unsigned int *clean_count, + unsigned int *dirty_count, + unsigned int dirty_budget, + boolean_t *has_shared) +{ + vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL; + kern_return_t kr = KERN_SUCCESS; + boolean_t default_freezer_active = TRUE; + + *purgeable_count = *wired_count = *clean_count = *dirty_count = 0; + *has_shared = FALSE; + + /* + * We need the exclusive lock here so that we can + * block any page faults or lookups while we are + * in the middle of freezing this vm map. + */ + vm_map_lock(map); + + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + default_freezer_active = FALSE; + + if (vm_compressor_low_on_space() || vm_swap_low_on_space()) { + kr = KERN_NO_SPACE; + goto done; + } + } + assert(default_freezer_active == FALSE); + + if (default_freezer_active) { + if (map->default_freezer_handle == NULL) { + map->default_freezer_handle = default_freezer_handle_allocate(); + } + + if ((kr = default_freezer_handle_init(map->default_freezer_handle)) != KERN_SUCCESS) { + /* + * Can happen if default_freezer_handle passed in is NULL + * Or, a table has already been allocated and associated + * with this handle, i.e. the map is already frozen. + */ + goto done; + } + } + c_freezer_compression_count = 0; + clock_get_uptime(&c_freezer_last_yield_ts); + + for (entry2 = vm_map_first_entry(map); + entry2 != vm_map_to_entry(map); + entry2 = entry2->vme_next) { + + vm_object_t src_object = VME_OBJECT(entry2); + + if (VME_OBJECT(entry2) && + !entry2->is_sub_map && + !VME_OBJECT(entry2)->phys_contiguous) { + /* If eligible, scan the entry, moving eligible pages over to our parent object */ + if (default_freezer_active) { + unsigned int purgeable, clean, dirty, wired; + boolean_t shared; + + default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, + src_object, map->default_freezer_handle); + + *purgeable_count += purgeable; + *wired_count += wired; + *clean_count += clean; + *dirty_count += dirty; + + /* Adjust pageout budget and finish up if reached */ + if (dirty_budget) { + dirty_budget -= dirty; + if (dirty_budget == 0) { + break; + } + } + + if (shared) { + *has_shared = TRUE; + } + } else { + if (VME_OBJECT(entry2)->internal == TRUE) { + + if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) { + /* + * Pages belonging to this object could be swapped to disk. + * Make sure it's not a shared object because we could end + * up just bringing it back in again. + */ + if (VME_OBJECT(entry2)->ref_count > 1) { + continue; + } + } + vm_object_compressed_freezer_pageout(VME_OBJECT(entry2)); + } + + if (vm_compressor_low_on_space() || vm_swap_low_on_space()) { + kr = KERN_NO_SPACE; + break; + } + } + } + } + + if (default_freezer_active) { + /* Finally, throw out the pages to swap */ + default_freezer_pageout(map->default_freezer_handle); + } + +done: + vm_map_unlock(map); + + if (!default_freezer_active) { + vm_object_compressed_freezer_done(); + } + if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) { + /* + * reset the counter tracking the # of swapped c_segs + * because we are now done with this freeze session and task. + */ + c_freezer_swapout_count = 0; + } + return kr; +} + +kern_return_t +vm_map_thaw( + vm_map_t map) +{ + kern_return_t kr = KERN_SUCCESS; + + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + /* + * We will on-demand thaw in the presence of the compressed pager. + */ + return kr; + } + + vm_map_lock(map); + + if (map->default_freezer_handle == NULL) { + /* + * This map is not in a frozen state. + */ + kr = KERN_FAILURE; + goto out; + } + + kr = default_freezer_unpack(map->default_freezer_handle); +out: + vm_map_unlock(map); + + return kr; +} +#endif + +/* + * vm_map_entry_should_cow_for_true_share: + * + * Determines if the map entry should be clipped and setup for copy-on-write + * to avoid applying "true_share" to a large VM object when only a subset is + * targeted. + * + * For now, we target only the map entries created for the Objective C + * Garbage Collector, which initially have the following properties: + * - alias == VM_MEMORY_MALLOC + * - wired_count == 0 + * - !needs_copy + * and a VM object with: + * - internal + * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC + * - !true_share + * - vo_size == ANON_CHUNK_SIZE + * + * Only non-kernel map entries. + */ +boolean_t +vm_map_entry_should_cow_for_true_share( + vm_map_entry_t entry) +{ + vm_object_t object; + + if (entry->is_sub_map) { + /* entry does not point at a VM object */ + return FALSE; + } + + if (entry->needs_copy) { + /* already set for copy_on_write: done! */ + return FALSE; + } + + if (VME_ALIAS(entry) != VM_MEMORY_MALLOC && + VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) { + /* not a malloc heap or Obj-C Garbage Collector heap */ + return FALSE; + } + + if (entry->wired_count) { + /* wired: can't change the map entry... */ + vm_counters.should_cow_but_wired++; + return FALSE; + } + + object = VME_OBJECT(entry); + + if (object == VM_OBJECT_NULL) { + /* no object yet... */ + return FALSE; + } + + if (!object->internal) { + /* not an internal object */ + return FALSE; + } + + if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) { + /* not the default copy strategy */ + return FALSE; + } + + if (object->true_share) { + /* already true_share: too late to avoid it */ + return FALSE; + } + + if (VME_ALIAS(entry) == VM_MEMORY_MALLOC && + object->vo_size != ANON_CHUNK_SIZE) { + /* ... not an object created for the ObjC Garbage Collector */ + return FALSE; + } + + if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL && + object->vo_size != 2048 * 4096) { + /* ... not a "MALLOC_SMALL" heap */ + return FALSE; + } + + /* + * All the criteria match: we have a large object being targeted for "true_share". + * To limit the adverse side-effects linked with "true_share", tell the caller to + * try and avoid setting up the entire object for "true_share" by clipping the + * targeted range and setting it up for copy-on-write. + */ + return TRUE; +} + +vm_map_offset_t +vm_map_round_page_mask( + vm_map_offset_t offset, + vm_map_offset_t mask) +{ + return VM_MAP_ROUND_PAGE(offset, mask); +} + +vm_map_offset_t +vm_map_trunc_page_mask( + vm_map_offset_t offset, + vm_map_offset_t mask) +{ + return VM_MAP_TRUNC_PAGE(offset, mask); +} + +boolean_t +vm_map_page_aligned( + vm_map_offset_t offset, + vm_map_offset_t mask) +{ + return ((offset) & mask) == 0; +} + +int +vm_map_page_shift( + vm_map_t map) +{ + return VM_MAP_PAGE_SHIFT(map); +} + +int +vm_map_page_size( + vm_map_t map) +{ + return VM_MAP_PAGE_SIZE(map); +} + +vm_map_offset_t +vm_map_page_mask( + vm_map_t map) +{ + return VM_MAP_PAGE_MASK(map); +} + +kern_return_t +vm_map_set_page_shift( + vm_map_t map, + int pageshift) +{ + if (map->hdr.nentries != 0) { + /* too late to change page size */ + return KERN_FAILURE; + } + + map->hdr.page_shift = pageshift; + + return KERN_SUCCESS; +} + +int +vm_map_purge( + vm_map_t map) +{ + int num_object_purged; + vm_map_entry_t entry; + vm_map_offset_t next_address; + vm_object_t object; + int state; + kern_return_t kr; + + num_object_purged = 0; + + vm_map_lock_read(map); + entry = vm_map_first_entry(map); + while (entry != vm_map_to_entry(map)) { + if (entry->is_sub_map) { + goto next; + } + if (! (entry->protection & VM_PROT_WRITE)) { + goto next; + } + object = VME_OBJECT(entry); + if (object == VM_OBJECT_NULL) { + goto next; + } + if (object->purgable != VM_PURGABLE_VOLATILE) { + goto next; + } + + vm_object_lock(object); +#if 00 + if (VME_OFFSET(entry) != 0 || + (entry->vme_end - entry->vme_start) != object->vo_size) { + vm_object_unlock(object); + goto next; + } +#endif + next_address = entry->vme_end; + vm_map_unlock_read(map); + state = VM_PURGABLE_EMPTY; + kr = vm_object_purgable_control(object, + VM_PURGABLE_SET_STATE, + &state); + if (kr == KERN_SUCCESS) { + num_object_purged++; + } + vm_object_unlock(object); + + vm_map_lock_read(map); + if (vm_map_lookup_entry(map, next_address, &entry)) { + continue; + } + next: + entry = entry->vme_next; + } + vm_map_unlock_read(map); + + return num_object_purged; +} + +kern_return_t +vm_map_query_volatile( + vm_map_t map, + mach_vm_size_t *volatile_virtual_size_p, + mach_vm_size_t *volatile_resident_size_p, + mach_vm_size_t *volatile_compressed_size_p, + mach_vm_size_t *volatile_pmap_size_p, + mach_vm_size_t *volatile_compressed_pmap_size_p) +{ + mach_vm_size_t volatile_virtual_size; + mach_vm_size_t volatile_resident_count; + mach_vm_size_t volatile_compressed_count; + mach_vm_size_t volatile_pmap_count; + mach_vm_size_t volatile_compressed_pmap_count; + mach_vm_size_t resident_count; + vm_map_entry_t entry; + vm_object_t object; + + /* map should be locked by caller */ + + volatile_virtual_size = 0; + volatile_resident_count = 0; + volatile_compressed_count = 0; + volatile_pmap_count = 0; + volatile_compressed_pmap_count = 0; + + for (entry = vm_map_first_entry(map); + entry != vm_map_to_entry(map); + entry = entry->vme_next) { + mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes; + + if (entry->is_sub_map) { + continue; + } + if (! (entry->protection & VM_PROT_WRITE)) { + continue; + } + object = VME_OBJECT(entry); + if (object == VM_OBJECT_NULL) { + continue; + } + if (object->purgable != VM_PURGABLE_VOLATILE && + object->purgable != VM_PURGABLE_EMPTY) { + continue; + } + if (VME_OFFSET(entry)) { + /* + * If the map entry has been split and the object now + * appears several times in the VM map, we don't want + * to count the object's resident_page_count more than + * once. We count it only for the first one, starting + * at offset 0 and ignore the other VM map entries. + */ + continue; + } + resident_count = object->resident_page_count; + if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) { + resident_count = 0; + } else { + resident_count -= (VME_OFFSET(entry) / PAGE_SIZE); + } + + volatile_virtual_size += entry->vme_end - entry->vme_start; + volatile_resident_count += resident_count; + if (object->pager) { + volatile_compressed_count += + vm_compressor_pager_get_count(object->pager); + } + pmap_compressed_bytes = 0; + pmap_resident_bytes = + pmap_query_resident(map->pmap, + entry->vme_start, + entry->vme_end, + &pmap_compressed_bytes); + volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE); + volatile_compressed_pmap_count += (pmap_compressed_bytes + / PAGE_SIZE); + } + + /* map is still locked on return */ + + *volatile_virtual_size_p = volatile_virtual_size; + *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE; + *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE; + *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE; + *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE; + + return KERN_SUCCESS; +} + +void +vm_map_sizes(vm_map_t map, + vm_map_size_t * psize, + vm_map_size_t * pfree, + vm_map_size_t * plargest_free) +{ + vm_map_entry_t entry; + vm_map_offset_t prev; + vm_map_size_t free, total_free, largest_free; + boolean_t end; + + total_free = largest_free = 0; + + vm_map_lock_read(map); + if (psize) *psize = map->max_offset - map->min_offset; + + prev = map->min_offset; + for (entry = vm_map_first_entry(map);; entry = entry->vme_next) + { + end = (entry == vm_map_to_entry(map)); + + if (end) free = entry->vme_end - prev; + else free = entry->vme_start - prev; + + total_free += free; + if (free > largest_free) largest_free = free; + + if (end) break; + prev = entry->vme_end; + } + vm_map_unlock_read(map); + if (pfree) *pfree = total_free; + if (plargest_free) *plargest_free = largest_free; +} + +#if VM_SCAN_FOR_SHADOW_CHAIN +int vm_map_shadow_max(vm_map_t map); +int vm_map_shadow_max( + vm_map_t map) +{ + int shadows, shadows_max; + vm_map_entry_t entry; + vm_object_t object, next_object; + + if (map == NULL) + return 0; + + shadows_max = 0; + + vm_map_lock_read(map); + + for (entry = vm_map_first_entry(map); + entry != vm_map_to_entry(map); + entry = entry->vme_next) { + if (entry->is_sub_map) { + continue; + } + object = VME_OBJECT(entry); + if (object == NULL) { + continue; + } + vm_object_lock_shared(object); + for (shadows = 0; + object->shadow != NULL; + shadows++, object = next_object) { + next_object = object->shadow; + vm_object_lock_shared(next_object); + vm_object_unlock(object); + } + vm_object_unlock(object); + if (shadows > shadows_max) { + shadows_max = shadows; + } + } + + vm_map_unlock_read(map); + + return shadows_max; +} +#endif /* VM_SCAN_FOR_SHADOW_CHAIN */