X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/5d5c5d0d5b79ade9a973d55186ffda2638ba2b6e..b0d623f7f2ae71ed96e60569f61f9a9a27016e80:/osfmk/vm/vm_map.c diff --git a/osfmk/vm/vm_map.c b/osfmk/vm/vm_map.c index 42ef2f13b..d48a044fa 100644 --- a/osfmk/vm/vm_map.c +++ b/osfmk/vm/vm_map.c @@ -1,31 +1,29 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2007 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_OSREFERENCE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the - * License may not be used to create, or enable the creation or - * redistribution of, unlawful or unlicensed copies of an Apple operating - * system, or to circumvent, violate, or enable the circumvention or - * violation of, any terms of an Apple operating system software license - * agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this - * file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and * limitations under the License. - * - * @APPLE_LICENSE_OSREFERENCE_HEADER_END@ + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ @@ -78,6 +76,7 @@ #include #include #include +#include #include #include @@ -90,183 +89,206 @@ #include #include #include +#include #include #include #include #include -#include #include #include #include #include -#include -#include // for vm_map_commpage64 and vm_map_remove_compage64 +#include +#include #ifdef ppc #include #endif /* ppc */ #include +#include /* Internal prototypes */ - + static void vm_map_simplify_range( vm_map_t map, vm_map_offset_t start, vm_map_offset_t end); /* forward */ static boolean_t vm_map_range_check( - vm_map_t map, - vm_map_offset_t start, - vm_map_offset_t end, - vm_map_entry_t *entry); + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end, + vm_map_entry_t *entry); static vm_map_entry_t _vm_map_entry_create( - struct vm_map_header *map_header); + struct vm_map_header *map_header); static void _vm_map_entry_dispose( - struct vm_map_header *map_header, - vm_map_entry_t entry); + struct vm_map_header *map_header, + vm_map_entry_t entry); static void vm_map_pmap_enter( - vm_map_t map, - vm_map_offset_t addr, - vm_map_offset_t end_addr, - vm_object_t object, - vm_object_offset_t offset, - vm_prot_t protection); + vm_map_t map, + vm_map_offset_t addr, + vm_map_offset_t end_addr, + vm_object_t object, + vm_object_offset_t offset, + vm_prot_t protection); static void _vm_map_clip_end( - struct vm_map_header *map_header, - vm_map_entry_t entry, - vm_map_offset_t end); + struct vm_map_header *map_header, + vm_map_entry_t entry, + vm_map_offset_t end); static void _vm_map_clip_start( - struct vm_map_header *map_header, - vm_map_entry_t entry, - vm_map_offset_t start); + struct vm_map_header *map_header, + vm_map_entry_t entry, + vm_map_offset_t start); static void vm_map_entry_delete( - vm_map_t map, - vm_map_entry_t entry); + vm_map_t map, + vm_map_entry_t entry); static kern_return_t vm_map_delete( - vm_map_t map, - vm_map_offset_t start, - vm_map_offset_t end, - int flags, - vm_map_t zap_map); + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end, + int flags, + vm_map_t zap_map); static kern_return_t vm_map_copy_overwrite_unaligned( - vm_map_t dst_map, - vm_map_entry_t entry, - vm_map_copy_t copy, - vm_map_address_t start); + vm_map_t dst_map, + vm_map_entry_t entry, + vm_map_copy_t copy, + vm_map_address_t start); static kern_return_t vm_map_copy_overwrite_aligned( - vm_map_t dst_map, - vm_map_entry_t tmp_entry, - vm_map_copy_t copy, - vm_map_offset_t start, - pmap_t pmap); + vm_map_t dst_map, + vm_map_entry_t tmp_entry, + vm_map_copy_t copy, + vm_map_offset_t start, + pmap_t pmap); static kern_return_t vm_map_copyin_kernel_buffer( - vm_map_t src_map, - vm_map_address_t src_addr, - vm_map_size_t len, - boolean_t src_destroy, - vm_map_copy_t *copy_result); /* OUT */ + vm_map_t src_map, + vm_map_address_t src_addr, + vm_map_size_t len, + boolean_t src_destroy, + vm_map_copy_t *copy_result); /* OUT */ static kern_return_t vm_map_copyout_kernel_buffer( - vm_map_t map, - vm_map_address_t *addr, /* IN/OUT */ - vm_map_copy_t copy, - boolean_t overwrite); + vm_map_t map, + vm_map_address_t *addr, /* IN/OUT */ + vm_map_copy_t copy, + boolean_t overwrite); static void vm_map_fork_share( - vm_map_t old_map, - vm_map_entry_t old_entry, - vm_map_t new_map); + vm_map_t old_map, + vm_map_entry_t old_entry, + vm_map_t new_map); static boolean_t vm_map_fork_copy( - vm_map_t old_map, - vm_map_entry_t *old_entry_p, - vm_map_t new_map); + vm_map_t old_map, + vm_map_entry_t *old_entry_p, + vm_map_t new_map); void vm_map_region_top_walk( - vm_map_entry_t entry, - vm_region_top_info_t top); + vm_map_entry_t entry, + vm_region_top_info_t top); void vm_map_region_walk( - vm_map_t map, - vm_map_offset_t va, - vm_map_entry_t entry, - vm_object_offset_t offset, - vm_object_size_t range, - vm_region_extended_info_t extended); + vm_map_t map, + vm_map_offset_t va, + vm_map_entry_t entry, + vm_object_offset_t offset, + vm_object_size_t range, + vm_region_extended_info_t extended, + boolean_t look_for_pages); static kern_return_t vm_map_wire_nested( - vm_map_t map, - vm_map_offset_t start, - vm_map_offset_t end, - vm_prot_t access_type, - boolean_t user_wire, - pmap_t map_pmap, - vm_map_offset_t pmap_addr); + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end, + vm_prot_t access_type, + boolean_t user_wire, + pmap_t map_pmap, + vm_map_offset_t pmap_addr); static kern_return_t vm_map_unwire_nested( - vm_map_t map, - vm_map_offset_t start, - vm_map_offset_t end, - boolean_t user_wire, - pmap_t map_pmap, - vm_map_offset_t pmap_addr); + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end, + boolean_t user_wire, + pmap_t map_pmap, + vm_map_offset_t pmap_addr); static kern_return_t vm_map_overwrite_submap_recurse( - vm_map_t dst_map, - vm_map_offset_t dst_addr, - vm_map_size_t dst_size); + vm_map_t dst_map, + vm_map_offset_t dst_addr, + vm_map_size_t dst_size); static kern_return_t vm_map_copy_overwrite_nested( - vm_map_t dst_map, - vm_map_offset_t dst_addr, - vm_map_copy_t copy, - boolean_t interruptible, - pmap_t pmap); + vm_map_t dst_map, + vm_map_offset_t dst_addr, + vm_map_copy_t copy, + boolean_t interruptible, + pmap_t pmap); static kern_return_t vm_map_remap_extract( - vm_map_t map, - vm_map_offset_t addr, - vm_map_size_t size, - boolean_t copy, - struct vm_map_header *map_header, - vm_prot_t *cur_protection, - vm_prot_t *max_protection, - vm_inherit_t inheritance, - boolean_t pageable); + vm_map_t map, + vm_map_offset_t addr, + vm_map_size_t size, + boolean_t copy, + struct vm_map_header *map_header, + vm_prot_t *cur_protection, + vm_prot_t *max_protection, + vm_inherit_t inheritance, + boolean_t pageable); static kern_return_t vm_map_remap_range_allocate( - vm_map_t map, - vm_map_address_t *address, - vm_map_size_t size, - vm_map_offset_t mask, - boolean_t anywhere, - vm_map_entry_t *map_entry); + vm_map_t map, + vm_map_address_t *address, + vm_map_size_t size, + vm_map_offset_t mask, + boolean_t anywhere, + vm_map_entry_t *map_entry); static void vm_map_region_look_for_page( - vm_map_t map, - vm_map_offset_t va, - vm_object_t object, - vm_object_offset_t offset, - int max_refcnt, - int depth, - vm_region_extended_info_t extended); + vm_map_t map, + vm_map_offset_t va, + vm_object_t object, + vm_object_offset_t offset, + int max_refcnt, + int depth, + vm_region_extended_info_t extended); static int vm_map_region_count_obj_refs( - vm_map_entry_t entry, - vm_object_t object); + vm_map_entry_t entry, + vm_object_t object); + + +static kern_return_t vm_map_willneed( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end); + +static kern_return_t vm_map_reuse_pages( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end); + +static kern_return_t vm_map_reusable_pages( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end); + +static kern_return_t vm_map_can_reuse( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end); /* * Macros to copy a vm_map_entry. We must be careful to correctly @@ -279,16 +301,70 @@ static int vm_map_region_count_obj_refs( */ #define vm_map_entry_copy(NEW,OLD) \ MACRO_BEGIN \ - *(NEW) = *(OLD); \ - (NEW)->is_shared = FALSE; \ - (NEW)->needs_wakeup = FALSE; \ - (NEW)->in_transition = FALSE; \ - (NEW)->wired_count = 0; \ - (NEW)->user_wired_count = 0; \ + *(NEW) = *(OLD); \ + (NEW)->is_shared = FALSE; \ + (NEW)->needs_wakeup = FALSE; \ + (NEW)->in_transition = FALSE; \ + (NEW)->wired_count = 0; \ + (NEW)->user_wired_count = 0; \ + (NEW)->permanent = FALSE; \ MACRO_END #define vm_map_entry_copy_full(NEW,OLD) (*(NEW) = *(OLD)) +/* + * Decide if we want to allow processes to execute from their data or stack areas. + * override_nx() returns true if we do. Data/stack execution can be enabled independently + * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec + * or allow_stack_exec to enable data execution for that type of data area for that particular + * ABI (or both by or'ing the flags together). These are initialized in the architecture + * specific pmap files since the default behavior varies according to architecture. The + * main reason it varies is because of the need to provide binary compatibility with old + * applications that were written before these restrictions came into being. In the old + * days, an app could execute anything it could read, but this has slowly been tightened + * up over time. The default behavior is: + * + * 32-bit PPC apps may execute from both stack and data areas + * 32-bit Intel apps may exeucte from data areas but not stack + * 64-bit PPC/Intel apps may not execute from either data or stack + * + * An application on any architecture may override these defaults by explicitly + * adding PROT_EXEC permission to the page in question with the mprotect(2) + * system call. This code here just determines what happens when an app tries to + * execute from a page that lacks execute permission. + * + * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the + * default behavior for both 32 and 64 bit apps on a system-wide basis. + */ + +extern int allow_data_exec, allow_stack_exec; + +int +override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */ +{ + int current_abi; + + /* + * Determine if the app is running in 32 or 64 bit mode. + */ + + if (vm_map_is_64bit(map)) + current_abi = VM_ABI_64; + else + current_abi = VM_ABI_32; + + /* + * Determine if we should allow the execution based on whether it's a + * stack or data area and the current architecture. + */ + + if (user_tag == VM_MEMORY_STACK) + return allow_stack_exec & current_abi; + + return allow_data_exec & current_abi; +} + + /* * Virtual memory maps provide for the mapping, protection, * and sharing of virtual memory objects. In addition, @@ -346,51 +422,33 @@ static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */ vm_object_t vm_submap_object; -/* - * vm_map_init: - * - * Initialize the vm_map module. Must be called before - * any other vm_map routines. - * - * Map and entry structures are allocated from zones -- we must - * initialize those zones. - * - * There are three zones of interest: - * - * vm_map_zone: used to allocate maps. - * vm_map_entry_zone: used to allocate map entries. - * vm_map_kentry_zone: used to allocate map entries for the kernel. - * - * The kernel allocates map entries from a special zone that is initially - * "crammed" with memory. It would be difficult (perhaps impossible) for - * the kernel to allocate more memory to a entry zone when it became - * empty since the very act of allocating memory implies the creation - * of a new entry. - */ - static void *map_data; -static vm_map_size_t map_data_size; +static vm_size_t map_data_size; static void *kentry_data; -static vm_map_size_t kentry_data_size; +static vm_size_t kentry_data_size; static int kentry_count = 2048; /* to init kentry_data_size */ -#define NO_COALESCE_LIMIT (1024 * 128) +#define NO_COALESCE_LIMIT ((1024 * 128) - 1) -/* - * Threshold for aggressive (eager) page map entering for vm copyout - * operations. Any copyout larger will NOT be aggressively entered. - */ -static vm_map_size_t vm_map_aggressive_enter_max; /* set by bootstrap */ /* Skip acquiring locks if we're in the midst of a kernel core dump */ -extern unsigned int not_in_kdp; +unsigned int not_in_kdp = 1; -#ifdef __i386__ +#if CONFIG_CODE_DECRYPTION +/* + * vm_map_apple_protected: + * This remaps the requested part of the object with an object backed by + * the decrypting pager. + * crypt_info contains entry points and session data for the crypt module. + * The crypt_info block will be copied by vm_map_apple_protected. The data structures + * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called. + */ kern_return_t vm_map_apple_protected( vm_map_t map, vm_map_offset_t start, - vm_map_offset_t end) + vm_map_offset_t end, + struct pager_crypt_info *crypt_info) { boolean_t map_locked; kern_return_t kr; @@ -406,7 +464,7 @@ vm_map_apple_protected( if (!vm_map_lookup_entry(map, start, &map_entry) || - map_entry->vme_end != end || + map_entry->vme_end < end || map_entry->is_sub_map) { /* that memory is not properly mapped */ kr = KERN_INVALID_ARGUMENT; @@ -419,6 +477,12 @@ vm_map_apple_protected( goto done; } + /* make sure protected object stays alive while map is unlocked */ + vm_object_reference(protected_object); + + vm_map_unlock_read(map); + map_locked = FALSE; + /* * Lookup (and create if necessary) the protected memory object * matching that VM object. @@ -426,34 +490,31 @@ vm_map_apple_protected( * to guarantee that it doesn't go away before we get a chance to map * it. */ - - protected_mem_obj = apple_protect_pager_setup(protected_object); + protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info); + + /* release extra ref on protected object */ + vm_object_deallocate(protected_object); + if (protected_mem_obj == NULL) { kr = KERN_FAILURE; goto done; } - vm_map_unlock_read(map); - map_locked = FALSE; - /* map this memory object in place of the current one */ map_addr = start; - kr = mach_vm_map(map, - &map_addr, - end - start, - (mach_vm_offset_t) 0, - VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, - (ipc_port_t) protected_mem_obj, - map_entry->offset + (start - map_entry->vme_start), - TRUE, - map_entry->protection, - map_entry->max_protection, - map_entry->inheritance); + kr = vm_map_enter_mem_object(map, + &map_addr, + end - start, + (mach_vm_offset_t) 0, + VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, + (ipc_port_t) protected_mem_obj, + (map_entry->offset + + (start - map_entry->vme_start)), + TRUE, + map_entry->protection, + map_entry->max_protection, + map_entry->inheritance); assert(map_addr == start); - if (kr == KERN_SUCCESS) { - /* let the pager know that this mem_obj is mapped */ - apple_protect_pager_map(protected_mem_obj); - } /* * Release the reference obtained by apple_protect_pager_setup(). * The mapping (if it succeeded) is now holding a reference on the @@ -467,26 +528,52 @@ done: } return kr; } -#endif /* __i386__ */ +#endif /* CONFIG_CODE_DECRYPTION */ +lck_grp_t vm_map_lck_grp; +lck_grp_attr_t vm_map_lck_grp_attr; +lck_attr_t vm_map_lck_attr; + + +/* + * vm_map_init: + * + * Initialize the vm_map module. Must be called before + * any other vm_map routines. + * + * Map and entry structures are allocated from zones -- we must + * initialize those zones. + * + * There are three zones of interest: + * + * vm_map_zone: used to allocate maps. + * vm_map_entry_zone: used to allocate map entries. + * vm_map_kentry_zone: used to allocate map entries for the kernel. + * + * The kernel allocates map entries from a special zone that is initially + * "crammed" with memory. It would be difficult (perhaps impossible) for + * the kernel to allocate more memory to a entry zone when it became + * empty since the very act of allocating memory implies the creation + * of a new entry. + */ void vm_map_init( void) { - vm_map_zone = zinit((vm_map_size_t) sizeof(struct vm_map), 40*1024, - PAGE_SIZE, "maps"); + vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024, + PAGE_SIZE, "maps"); vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry), - 1024*1024, PAGE_SIZE*5, - "non-kernel map entries"); + 1024*1024, PAGE_SIZE*5, + "non-kernel map entries"); vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry), - kentry_data_size, kentry_data_size, - "kernel map entries"); + kentry_data_size, kentry_data_size, + "kernel map entries"); vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy), - 16*1024, PAGE_SIZE, "map copies"); + 16*1024, PAGE_SIZE, "map copies"); /* * Cram the map and kentry zones with initial data. @@ -495,15 +582,20 @@ vm_map_init( zone_change(vm_map_zone, Z_COLLECT, FALSE); zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE); zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE); + zone_change(vm_map_kentry_zone, Z_FOREIGN, TRUE); zcram(vm_map_zone, map_data, map_data_size); zcram(vm_map_kentry_zone, kentry_data, kentry_data_size); + + lck_grp_attr_setdefault(&vm_map_lck_grp_attr); + lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr); + lck_attr_setdefault(&vm_map_lck_attr); } void vm_map_steal_memory( void) { - map_data_size = vm_map_round_page(10 * sizeof(struct vm_map)); + map_data_size = round_page(10 * sizeof(struct _vm_map)); map_data = pmap_steal_memory(map_data_size); #if 0 @@ -519,7 +611,7 @@ vm_map_steal_memory( kentry_data_size = - vm_map_round_page(kentry_count * sizeof(struct vm_map_entry)); + round_page(kentry_count * sizeof(struct vm_map_entry)); kentry_data = pmap_steal_memory(kentry_data_size); } @@ -537,6 +629,7 @@ vm_map_create( vm_map_offset_t max, boolean_t pageable) { + static int color_seed = 0; register vm_map_t result; result = (vm_map_t) zalloc(vm_map_zone); @@ -549,6 +642,8 @@ vm_map_create( result->hdr.entries_pageable = pageable; result->size = 0; + result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */ + result->user_wire_size = 0; result->ref_count = 1; #if TASK_SWAPPER result->res_count = 1; @@ -561,11 +656,13 @@ vm_map_create( result->no_zero_fill = FALSE; result->mapped = FALSE; result->wait_for_space = FALSE; + result->switch_protect = FALSE; result->first_free = vm_map_to_entry(result); result->hint = vm_map_to_entry(result); + result->color_rr = (color_seed++) & vm_color_mask; vm_map_lock_init(result); - mutex_init(&result->s_lock, 0); - + lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr); + return(result); } @@ -576,10 +673,10 @@ vm_map_create( * given map (or map copy). No fields are filled. */ #define vm_map_entry_create(map) \ - _vm_map_entry_create(&(map)->hdr) + _vm_map_entry_create(&(map)->hdr) #define vm_map_copy_entry_create(copy) \ - _vm_map_entry_create(&(copy)->cpy_hdr) + _vm_map_entry_create(&(copy)->cpy_hdr) static vm_map_entry_t _vm_map_entry_create( @@ -589,9 +686,9 @@ _vm_map_entry_create( register vm_map_entry_t entry; if (map_header->entries_pageable) - zone = vm_map_entry_zone; + zone = vm_map_entry_zone; else - zone = vm_map_kentry_zone; + zone = vm_map_kentry_zone; entry = (vm_map_entry_t) zalloc(zone); if (entry == VM_MAP_ENTRY_NULL) @@ -604,15 +701,19 @@ _vm_map_entry_create( * vm_map_entry_dispose: [ internal use only ] * * Inverse of vm_map_entry_create. + * + * write map lock held so no need to + * do anything special to insure correctness + * of the stores */ #define vm_map_entry_dispose(map, entry) \ -MACRO_BEGIN \ + MACRO_BEGIN \ if((entry) == (map)->first_free) \ (map)->first_free = vm_map_to_entry(map); \ if((entry) == (map)->hint) \ (map)->hint = vm_map_to_entry(map); \ _vm_map_entry_dispose(&(map)->hdr, (entry)); \ -MACRO_END + MACRO_END #define vm_map_copy_entry_dispose(map, entry) \ _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry)) @@ -625,9 +726,9 @@ _vm_map_entry_dispose( register zone_t zone; if (map_header->entries_pageable) - zone = vm_map_entry_zone; + zone = vm_map_entry_zone; else - zone = vm_map_kentry_zone; + zone = vm_map_kentry_zone; zfree(zone, entry); } @@ -643,7 +744,7 @@ first_free_is_valid( if (!first_free_check) return TRUE; - + entry = vm_map_to_entry(map); next = entry->vme_next; while (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_end) || @@ -655,7 +756,7 @@ first_free_is_valid( break; } if (map->first_free != entry) { - printf("Bad first_free for map 0x%x: 0x%x should be 0x%x\n", + printf("Bad first_free for map %p: %p should be %p\n", map, map->first_free, entry); return FALSE; } @@ -671,7 +772,7 @@ first_free_is_valid( * The map should be locked. */ #define UPDATE_FIRST_FREE(map, new_first_free) \ -MACRO_BEGIN \ + MACRO_BEGIN \ vm_map_t UFF_map; \ vm_map_entry_t UFF_first_free; \ vm_map_entry_t UFF_next_entry; \ @@ -690,7 +791,7 @@ MACRO_BEGIN \ } \ UFF_map->first_free = UFF_first_free; \ assert(first_free_is_valid(UFF_map)); \ -MACRO_END + MACRO_END /* * vm_map_entry_{un,}link: @@ -698,14 +799,14 @@ MACRO_END * Insert/remove entries from maps (or map copies). */ #define vm_map_entry_link(map, after_where, entry) \ -MACRO_BEGIN \ + MACRO_BEGIN \ vm_map_t VMEL_map; \ vm_map_entry_t VMEL_entry; \ VMEL_map = (map); \ VMEL_entry = (entry); \ _vm_map_entry_link(&VMEL_map->hdr, after_where, VMEL_entry); \ UPDATE_FIRST_FREE(VMEL_map, VMEL_map->first_free); \ -MACRO_END + MACRO_END #define vm_map_copy_entry_link(copy, after_where, entry) \ @@ -720,7 +821,7 @@ MACRO_END MACRO_END #define vm_map_entry_unlink(map, entry) \ -MACRO_BEGIN \ + MACRO_BEGIN \ vm_map_t VMEU_map; \ vm_map_entry_t VMEU_entry; \ vm_map_entry_t VMEU_first_free; \ @@ -732,7 +833,7 @@ MACRO_BEGIN \ VMEU_first_free = VMEU_map->first_free; \ _vm_map_entry_unlink(&VMEU_map->hdr, VMEU_entry); \ UPDATE_FIRST_FREE(VMEU_map, VMEU_first_free); \ -MACRO_END + MACRO_END #define vm_map_copy_entry_unlink(copy, entry) \ _vm_map_entry_unlink(&(copy)->cpy_hdr, (entry)) @@ -760,10 +861,10 @@ void vm_map_res_reference(register vm_map_t map) assert(map->res_count >= 0); assert(map->ref_count >= map->res_count); if (map->res_count == 0) { - mutex_unlock(&map->s_lock); + lck_mtx_unlock(&map->s_lock); vm_map_lock(map); vm_map_swapin(map); - mutex_lock(&map->s_lock); + lck_mtx_lock(&map->s_lock); ++map->res_count; vm_map_unlock(map); } else @@ -781,12 +882,12 @@ void vm_map_res_reference(register vm_map_t map) void vm_map_reference_swap(register vm_map_t map) { assert(map != VM_MAP_NULL); - mutex_lock(&map->s_lock); + lck_mtx_lock(&map->s_lock); assert(map->res_count >= 0); assert(map->ref_count >= map->res_count); map->ref_count++; vm_map_res_reference(map); - mutex_unlock(&map->s_lock); + lck_mtx_unlock(&map->s_lock); } /* @@ -803,11 +904,11 @@ void vm_map_res_deallocate(register vm_map_t map) { assert(map->res_count > 0); if (--map->res_count == 0) { - mutex_unlock(&map->s_lock); + lck_mtx_unlock(&map->s_lock); vm_map_lock(map); vm_map_swapout(map); vm_map_unlock(map); - mutex_lock(&map->s_lock); + lck_mtx_lock(&map->s_lock); } assert(map->ref_count >= map->res_count); } @@ -820,22 +921,35 @@ void vm_map_res_deallocate(register vm_map_t map) */ void vm_map_destroy( - register vm_map_t map) + vm_map_t map, + int flags) { vm_map_lock(map); - (void) vm_map_delete(map, map->min_offset, - map->max_offset, VM_MAP_NO_FLAGS, - VM_MAP_NULL); + + /* clean up regular map entries */ + (void) vm_map_delete(map, map->min_offset, map->max_offset, + flags, VM_MAP_NULL); + /* clean up leftover special mappings (commpage, etc...) */ +#ifdef __ppc__ + /* + * PPC51: ppc64 is limited to 51-bit addresses. + * Memory beyond this 51-bit limit is mapped specially at the + * pmap level, so do not interfere. + * On PPC64, the commpage is mapped beyond the addressable range + * via a special pmap hack, so ask pmap to clean it explicitly... + */ + if (map->pmap) { + pmap_unmap_sharedpage(map->pmap); + } + /* ... and do not let regular pmap cleanup apply here */ + flags |= VM_MAP_REMOVE_NO_PMAP_CLEANUP; +#endif /* __ppc__ */ + (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL, + flags, VM_MAP_NULL); vm_map_unlock(map); - - if (map->hdr.nentries!=0) - vm_map_remove_commpage(map); - -// assert(map->hdr.nentries==0); -// if(map->hdr.nentries) { /* (BRINGUP) */ -// panic("vm_map_destroy: hdr.nentries is not 0 (%d) in map %08X\n", map->hdr.nentries, map); -// } + assert(map->hdr.nentries == 0); + if(map->pmap) pmap_destroy(map->pmap); @@ -895,7 +1009,7 @@ int vm_map_swap_enable = 1; void vm_map_swapin (vm_map_t map) { register vm_map_entry_t entry; - + if (!vm_map_swap_enable) /* debug */ return; @@ -940,9 +1054,9 @@ void vm_map_swapin (vm_map_t map) if (entry->object.vm_object != VM_OBJECT_NULL) { if (entry->is_sub_map) { vm_map_t lmap = entry->object.sub_map; - mutex_lock(&lmap->s_lock); + lck_mtx_lock(&lmap->s_lock); vm_map_res_reference(lmap); - mutex_unlock(&lmap->s_lock); + lck_mtx_unlock(&lmap->s_lock); } else { vm_object_t object = entry->object.vm_object; vm_object_lock(object); @@ -970,12 +1084,12 @@ void vm_map_swapout(vm_map_t map) * If we raced with a swapin and lost, the residence count * will have been incremented to 1, and we simply return. */ - mutex_lock(&map->s_lock); + lck_mtx_lock(&map->s_lock); if (map->res_count != 0) { - mutex_unlock(&map->s_lock); + lck_mtx_unlock(&map->s_lock); return; } - mutex_unlock(&map->s_lock); + lck_mtx_unlock(&map->s_lock); /* * There are no intermediate states of a map going out or @@ -1001,9 +1115,9 @@ void vm_map_swapout(vm_map_t map) if (entry->object.vm_object != VM_OBJECT_NULL) { if (entry->is_sub_map) { vm_map_t lmap = entry->object.sub_map; - mutex_lock(&lmap->s_lock); + lck_mtx_lock(&lmap->s_lock); vm_map_res_deallocate(lmap); - mutex_unlock(&lmap->s_lock); + lck_mtx_unlock(&lmap->s_lock); } else { vm_object_t object = entry->object.vm_object; vm_object_lock(object); @@ -1036,9 +1150,9 @@ void vm_map_swapout(vm_map_t map) * and someone else wins and stores their 'hint' */ #define SAVE_HINT_MAP_READ(map,value) \ -MACRO_BEGIN \ - OSCompareAndSwap((UInt32)((map)->hint), (UInt32)value, (UInt32 *)(&(map)->hint)); \ -MACRO_END + MACRO_BEGIN \ + OSCompareAndSwapPtr((map)->hint, value, &(map)->hint); \ + MACRO_END /* @@ -1051,9 +1165,9 @@ MACRO_END * to just do an assignment */ #define SAVE_HINT_MAP_WRITE(map,value) \ -MACRO_BEGIN \ - (map)->hint = (value); \ -MACRO_END + MACRO_BEGIN \ + (map)->hint = (value); \ + MACRO_END /* * vm_map_lookup_entry: [ internal use only ] @@ -1084,7 +1198,7 @@ vm_map_lookup_entry( cur = cur->vme_next; if (address >= cur->vme_start) { - /* + /* * Go from hint to end of list. * * But first, make a quick check to see if @@ -1102,7 +1216,7 @@ vm_map_lookup_entry( } } else { - /* + /* * Go from start to hint, *inclusively* */ last = cur->vme_next; @@ -1116,7 +1230,7 @@ vm_map_lookup_entry( while (cur != last) { if (cur->vme_end > address) { if (address >= cur->vme_start) { - /* + /* * Save this lookup for future * hints, and return */ @@ -1167,6 +1281,11 @@ vm_map_find_space( return KERN_INVALID_ARGUMENT; } + if (flags & VM_FLAGS_GUARD_AFTER) { + /* account for the back guard page in the size */ + size += PAGE_SIZE_64; + } + new_entry = vm_map_entry_create(map); /* @@ -1196,7 +1315,12 @@ vm_map_find_space( * wrap around the address. */ + if (flags & VM_FLAGS_GUARD_BEFORE) { + /* reserve space for the front guard page */ + start += PAGE_SIZE_64; + } end = ((start + mask) & ~mask); + if (end < start) { vm_map_entry_dispose(map, new_entry); vm_map_unlock(map); @@ -1245,6 +1369,10 @@ vm_map_find_space( * the map should be locked. */ + if (flags & VM_FLAGS_GUARD_BEFORE) { + /* go back for the front guard page */ + start -= PAGE_SIZE_64; + } *address = start; new_entry->vme_start = start; @@ -1269,6 +1397,12 @@ vm_map_find_space( new_entry->in_transition = FALSE; new_entry->needs_wakeup = FALSE; + new_entry->no_cache = FALSE; + new_entry->permanent = FALSE; + new_entry->superpage_size = 0; + + new_entry->alias = 0; + new_entry->zero_wired_pages = FALSE; VM_GET_FLAGS_ALIAS(flags, new_entry->alias); @@ -1315,7 +1449,8 @@ vm_map_pmap_enter( vm_object_offset_t offset, vm_prot_t protection) { - unsigned int cache_attr; + int type_of_fault; + kern_return_t kr; if(map->pmap == 0) return; @@ -1324,7 +1459,6 @@ vm_map_pmap_enter( register vm_page_t m; vm_object_lock(object); - vm_object_paging_begin(object); m = vm_page_lookup(object, offset); /* @@ -1333,42 +1467,22 @@ vm_map_pmap_enter( * enter an encrypted page in the page table. */ if (m == VM_PAGE_NULL || m->busy || m->encrypted || - (m->unusual && ( m->error || m->restart || m->absent || - protection & m->page_lock))) { - - vm_object_paging_end(object); + m->fictitious || + (m->unusual && ( m->error || m->restart || m->absent))) { vm_object_unlock(object); return; } - assert(!m->fictitious); /* XXX is this possible ??? */ - if (vm_map_pmap_enter_print) { printf("vm_map_pmap_enter:"); - printf("map: %x, addr: %llx, object: %x, offset: %llx\n", - map, (unsigned long long)addr, object, (unsigned long long)offset); - } - m->busy = TRUE; - - if (m->no_isync == TRUE) { - pmap_sync_page_data_phys(m->phys_page); - m->no_isync = FALSE; + printf("map: %p, addr: %llx, object: %p, offset: %llx\n", + map, (unsigned long long)addr, object, (unsigned long long)offset); } + type_of_fault = DBG_CACHE_HIT_FAULT; + kr = vm_fault_enter(m, map->pmap, addr, protection, + VM_PAGE_WIRED(m), FALSE, FALSE, + &type_of_fault); - cache_attr = ((unsigned int)object->wimg_bits) & VM_WIMG_MASK; - vm_object_unlock(object); - - PMAP_ENTER(map->pmap, addr, m, - protection, cache_attr, FALSE); - - vm_object_lock(object); - - PAGE_WAKEUP_DONE(m); - vm_page_lock_queues(); - if (!m->active && !m->inactive) - vm_page_activate(m); - vm_page_unlock_queues(); - vm_object_paging_end(object); vm_object_unlock(object); offset += PAGE_SIZE_64; @@ -1385,12 +1499,16 @@ boolean_t vm_map_pmap_is_empty( vm_map_offset_t start, vm_map_offset_t end) { +#ifdef MACHINE_PMAP_IS_EMPTY + return pmap_is_empty(map->pmap, start, end); +#else /* MACHINE_PMAP_IS_EMPTY */ vm_map_offset_t offset; ppnum_t phys_page; if (map->pmap == NULL) { return TRUE; } + for (offset = start; offset < end; offset += PAGE_SIZE) { @@ -1398,11 +1516,13 @@ boolean_t vm_map_pmap_is_empty( if (phys_page) { kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): " "page %d at 0x%llx\n", - map, start, end, phys_page, offset); + map, (long long)start, (long long)end, + phys_page, (long long)offset); return FALSE; } } return TRUE; +#endif /* MACHINE_PMAP_IS_EMPTY */ } /* @@ -1421,9 +1541,9 @@ static unsigned int vm_map_enter_restore_failures = 0; kern_return_t vm_map_enter( vm_map_t map, - vm_map_offset_t *address, /* IN/OUT */ + vm_map_offset_t *address, /* IN/OUT */ vm_map_size_t size, - vm_map_offset_t mask, + vm_map_offset_t mask, int flags, vm_object_t object, vm_object_offset_t offset, @@ -1433,8 +1553,10 @@ vm_map_enter( vm_inherit_t inheritance) { vm_map_entry_t entry, new_entry; - vm_map_offset_t start, tmp_start; + vm_map_offset_t start, tmp_start, tmp_offset; vm_map_offset_t end, tmp_end; + vm_map_offset_t tmp2_start, tmp2_end; + vm_map_offset_t step; kern_return_t result = KERN_SUCCESS; vm_map_t zap_old_map = VM_MAP_NULL; vm_map_t zap_new_map = VM_MAP_NULL; @@ -1444,15 +1566,98 @@ vm_map_enter( boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0); boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0); boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0); + boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0); + boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0); + boolean_t permanent = ((flags & VM_FLAGS_PERMANENT) != 0); + unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT); char alias; + vm_map_offset_t effective_min_offset, effective_max_offset; + kern_return_t kr; - if (size == 0) { + if (superpage_size) { + switch (superpage_size) { + /* + * Note that the current implementation only supports + * a single size for superpages, SUPERPAGE_SIZE, per + * architecture. As soon as more sizes are supposed + * to be supported, SUPERPAGE_SIZE has to be replaced + * with a lookup of the size depending on superpage_size. + */ +#ifdef __x86_64__ + case SUPERPAGE_SIZE_2MB: + break; +#endif + default: + return KERN_INVALID_ARGUMENT; + } + mask = SUPERPAGE_SIZE-1; + if (size & (SUPERPAGE_SIZE-1)) + return KERN_INVALID_ARGUMENT; + inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */ + } + +#if CONFIG_EMBEDDED + if (cur_protection & VM_PROT_WRITE) { + if (cur_protection & VM_PROT_EXECUTE) { + printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__); + cur_protection &= ~VM_PROT_EXECUTE; + } + } +#endif /* CONFIG_EMBEDDED */ + + if (is_submap) { + if (purgable) { + /* submaps can not be purgeable */ + return KERN_INVALID_ARGUMENT; + } + if (object == VM_OBJECT_NULL) { + /* submaps can not be created lazily */ + return KERN_INVALID_ARGUMENT; + } + } + if (flags & VM_FLAGS_ALREADY) { + /* + * VM_FLAGS_ALREADY says that it's OK if the same mapping + * is already present. For it to be meaningul, the requested + * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and + * we shouldn't try and remove what was mapped there first + * (!VM_FLAGS_OVERWRITE). + */ + if ((flags & VM_FLAGS_ANYWHERE) || + (flags & VM_FLAGS_OVERWRITE)) { + return KERN_INVALID_ARGUMENT; + } + } + + if (flags & VM_FLAGS_BELOW_MIN) { + /* + * Allow an insertion below the map's min offset. + */ + effective_min_offset = 0ULL; + } else { + effective_min_offset = map->min_offset; + } + + if (flags & VM_FLAGS_BEYOND_MAX) { + /* + * Allow an insertion beyond the map's max offset. + */ + if (vm_map_is_64bit(map)) + effective_max_offset = 0xFFFFFFFFFFFFF000ULL; + else + effective_max_offset = 0x00000000FFFFF000ULL; + } else { + effective_max_offset = map->max_offset; + } + + if (size == 0 || + (offset & PAGE_MASK_64) != 0) { *address = 0; return KERN_INVALID_ARGUMENT; } VM_GET_FLAGS_ALIAS(flags, alias); - + #define RETURN(value) { result = value; goto BailOut; } assert(page_aligned(*address)); @@ -1466,8 +1671,8 @@ vm_map_enter( (offset != 0 || (object != VM_OBJECT_NULL && (object->size != size || - object->purgable == VM_OBJECT_NONPURGABLE)) - || size > VM_MAX_ADDRESS)) /* LP64todo: remove when dp capable */ + object->purgable == VM_PURGABLE_DENY)) + || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */ return KERN_INVALID_ARGUMENT; if (!anywhere && overwrite) { @@ -1483,10 +1688,10 @@ vm_map_enter( zap_old_map = vm_map_create(PMAP_NULL, *address, *address + size, - TRUE); + map->hdr.entries_pageable); } - StartAgain: ; +StartAgain: ; start = *address; @@ -1498,9 +1703,9 @@ vm_map_enter( * Calculate the first possible address. */ - if (start < map->min_offset) - start = map->min_offset; - if (start > map->max_offset) + if (start < effective_min_offset) + start = effective_min_offset; + if (start > effective_max_offset) RETURN(KERN_NO_SPACE); /* @@ -1510,7 +1715,7 @@ vm_map_enter( */ assert(first_free_is_valid(map)); - if (start == map->min_offset) { + if (start == effective_min_offset) { if ((entry = map->first_free) != vm_map_to_entry(map)) start = entry->vme_end; } else { @@ -1529,7 +1734,7 @@ vm_map_enter( while (TRUE) { register vm_map_entry_t next; - /* + /* * Find the end of the proposed new region. * Be sure we didn't go beyond the end, or * wrap around the address. @@ -1541,10 +1746,10 @@ vm_map_enter( start = end; end += size; - if ((end > map->max_offset) || (end < start)) { + if ((end > effective_max_offset) || (end < start)) { if (map->wait_for_space) { - if (size <= (map->max_offset - - map->min_offset)) { + if (size <= (effective_max_offset - + effective_min_offset)) { assert_wait((event_t)map, THREAD_ABORTSAFE); vm_map_unlock(map); @@ -1581,8 +1786,6 @@ vm_map_enter( } *address = start; } else { - vm_map_entry_t temp_entry; - /* * Verify that: * the address doesn't itself violate @@ -1600,8 +1803,8 @@ vm_map_enter( end = start + size; - if ((start < map->min_offset) || - (end > map->max_offset) || + if ((start < effective_min_offset) || + (end > effective_max_offset) || (start >= end)) { RETURN(KERN_INVALID_ADDRESS); } @@ -1621,13 +1824,81 @@ vm_map_enter( * ... the starting address isn't allocated */ - if (vm_map_lookup_entry(map, start, &temp_entry)) - RETURN(KERN_NO_SPACE); - - entry = temp_entry; - - /* - * ... the next region doesn't overlap the + if (vm_map_lookup_entry(map, start, &entry)) { + if (! (flags & VM_FLAGS_ALREADY)) { + RETURN(KERN_NO_SPACE); + } + /* + * Check if what's already there is what we want. + */ + tmp_start = start; + tmp_offset = offset; + if (entry->vme_start < start) { + tmp_start -= start - entry->vme_start; + tmp_offset -= start - entry->vme_start; + + } + for (; entry->vme_start < end; + entry = entry->vme_next) { + /* + * Check if the mapping's attributes + * match the existing map entry. + */ + if (entry == vm_map_to_entry(map) || + entry->vme_start != tmp_start || + entry->is_sub_map != is_submap || + entry->offset != tmp_offset || + entry->needs_copy != needs_copy || + entry->protection != cur_protection || + entry->max_protection != max_protection || + entry->inheritance != inheritance || + entry->alias != alias) { + /* not the same mapping ! */ + RETURN(KERN_NO_SPACE); + } + /* + * Check if the same object is being mapped. + */ + if (is_submap) { + if (entry->object.sub_map != + (vm_map_t) object) { + /* not the same submap */ + RETURN(KERN_NO_SPACE); + } + } else { + if (entry->object.vm_object != object) { + /* not the same VM object... */ + vm_object_t obj2; + + obj2 = entry->object.vm_object; + if ((obj2 == VM_OBJECT_NULL || + obj2->internal) && + (object == VM_OBJECT_NULL || + object->internal)) { + /* + * ... but both are + * anonymous memory, + * so equivalent. + */ + } else { + RETURN(KERN_NO_SPACE); + } + } + } + + tmp_offset += entry->vme_end - entry->vme_start; + tmp_start += entry->vme_end - entry->vme_start; + if (entry->vme_end >= end) { + /* reached the end of our mapping */ + break; + } + } + /* it all matches: let's use what's already there ! */ + RETURN(KERN_MEMORY_PRESENT); + } + + /* + * ... the next region doesn't overlap the * end point. */ @@ -1659,28 +1930,33 @@ vm_map_enter( if (object == VM_OBJECT_NULL) { object = vm_object_allocate(size); object->copy_strategy = MEMORY_OBJECT_COPY_NONE; - object->purgable = VM_OBJECT_PURGABLE_NONVOLATILE; + object->purgable = VM_PURGABLE_NONVOLATILE; offset = (vm_object_offset_t)0; } - } else if ((object == VM_OBJECT_NULL) && - (entry != vm_map_to_entry(map)) && - (entry->vme_end == start) && - (!entry->is_shared) && - (!entry->is_sub_map) && - (entry->alias == alias) && - (entry->inheritance == inheritance) && - (entry->protection == cur_protection) && - (entry->max_protection == max_protection) && - (entry->behavior == VM_BEHAVIOR_DEFAULT) && - (entry->in_transition == 0) && - ((alias == VM_MEMORY_REALLOC) || ((entry->vme_end - entry->vme_start) + size < NO_COALESCE_LIMIT)) && - (entry->wired_count == 0)) { /* implies user_wired_count == 0 */ + } else if ((is_submap == FALSE) && + (object == VM_OBJECT_NULL) && + (entry != vm_map_to_entry(map)) && + (entry->vme_end == start) && + (!entry->is_shared) && + (!entry->is_sub_map) && + (entry->alias == alias) && + (entry->inheritance == inheritance) && + (entry->protection == cur_protection) && + (entry->max_protection == max_protection) && + (entry->behavior == VM_BEHAVIOR_DEFAULT) && + (entry->in_transition == 0) && + (entry->no_cache == no_cache) && + ((entry->vme_end - entry->vme_start) + size <= + (alias == VM_MEMORY_REALLOC ? + ANON_CHUNK_SIZE : + NO_COALESCE_LIMIT)) && + (entry->wired_count == 0)) { /* implies user_wired_count == 0 */ if (vm_object_coalesce(entry->object.vm_object, - VM_OBJECT_NULL, - entry->offset, - (vm_object_offset_t) 0, - (vm_map_size_t)(entry->vme_end - entry->vme_start), - (vm_map_size_t)(end - entry->vme_end))) { + VM_OBJECT_NULL, + entry->offset, + (vm_object_offset_t) 0, + (vm_map_size_t)(entry->vme_end - entry->vme_start), + (vm_map_size_t)(end - entry->vme_end))) { /* * Coalesced the two objects - can extend @@ -1694,39 +1970,118 @@ vm_map_enter( } } - /* - * Create a new entry - * LP64todo - for now, we can only allocate 4GB internal objects - * because the default pager can't page bigger ones. Remove this - * when it can. - * - * XXX FBDP - * The reserved "page zero" in each process's address space can - * be arbitrarily large. Splitting it into separate 4GB objects and - * therefore different VM map entries serves no purpose and just - * slows down operations on the VM map, so let's not split the - * allocation into 4GB chunks if the max protection is NONE. That - * memory should never be accessible, so it will never get to the - * default pager. - */ - tmp_start = start; - if (object == VM_OBJECT_NULL && - size > (vm_map_size_t)VM_MAX_ADDRESS && - max_protection != VM_PROT_NONE) - tmp_end = tmp_start + (vm_map_size_t)VM_MAX_ADDRESS; - else - tmp_end = end; - do { - new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end, - object, offset, needs_copy, FALSE, FALSE, - cur_protection, max_protection, - VM_BEHAVIOR_DEFAULT, inheritance, 0); - new_entry->alias = alias; - entry = new_entry; - } while (tmp_end != end && - (tmp_start = tmp_end) && - (tmp_end = (end - tmp_end > (vm_map_size_t)VM_MAX_ADDRESS) ? - tmp_end + (vm_map_size_t)VM_MAX_ADDRESS : end)); + step = superpage_size ? SUPERPAGE_SIZE : (end - start); + new_entry = NULL; + + for (tmp2_start = start; tmp2_start (vm_map_size_t)ANON_CHUNK_SIZE && + max_protection != VM_PROT_NONE && + superpage_size == 0) + tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE; + else + tmp_end = tmp2_end; + do { + new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end, + object, offset, needs_copy, + FALSE, FALSE, + cur_protection, max_protection, + VM_BEHAVIOR_DEFAULT, + inheritance, 0, no_cache, + permanent, superpage_size); + new_entry->alias = alias; + if (is_submap) { + vm_map_t submap; + boolean_t submap_is_64bit; + boolean_t use_pmap; + + new_entry->is_sub_map = TRUE; + submap = (vm_map_t) object; + submap_is_64bit = vm_map_is_64bit(submap); + use_pmap = (alias == VM_MEMORY_SHARED_PMAP); + #ifndef NO_NESTED_PMAP + if (use_pmap && submap->pmap == NULL) { + /* we need a sub pmap to nest... */ + submap->pmap = pmap_create(0, submap_is_64bit); + if (submap->pmap == NULL) { + /* let's proceed without nesting... */ + } + } + if (use_pmap && submap->pmap != NULL) { + kr = pmap_nest(map->pmap, + submap->pmap, + tmp_start, + tmp_start, + tmp_end - tmp_start); + if (kr != KERN_SUCCESS) { + printf("vm_map_enter: " + "pmap_nest(0x%llx,0x%llx) " + "error 0x%x\n", + (long long)tmp_start, + (long long)tmp_end, + kr); + } else { + /* we're now nested ! */ + new_entry->use_pmap = TRUE; + pmap_empty = FALSE; + } + } + #endif /* NO_NESTED_PMAP */ + } + entry = new_entry; + + if (superpage_size) { + vm_page_t pages, m; + vm_object_t sp_object; + + entry->offset = 0; + + /* allocate one superpage */ + kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0); + if (kr != KERN_SUCCESS) { + new_mapping_established = TRUE; /* will cause deallocation of whole range */ + RETURN(kr); + } + + /* create one vm_object per superpage */ + sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start)); + sp_object->phys_contiguous = TRUE; + sp_object->shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE; + entry->object.vm_object = sp_object; + + /* enter the base pages into the object */ + vm_object_lock(sp_object); + for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) { + m = pages; + pmap_zero_page(m->phys_page); + pages = NEXT_PAGE(m); + *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL; + vm_page_insert(m, sp_object, offset); + } + vm_object_unlock(sp_object); + } + } while (tmp_end != tmp2_end && + (tmp_start = tmp_end) && + (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ? + tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end)); + } vm_map_unlock(map); map_locked = FALSE; @@ -1736,7 +2091,7 @@ vm_map_enter( /* Wire down the new entry if the user * requested all new map entries be wired. */ - if (map->wiring_required) { + if ((map->wiring_required)||(superpage_size)) { pmap_empty = FALSE; /* pmap won't be empty */ result = vm_map_wire(map, start, end, new_entry->protection, TRUE); @@ -1750,22 +2105,64 @@ vm_map_enter( (size < (128*1024))) { pmap_empty = FALSE; /* pmap won't be empty */ -#ifdef STACK_ONLY_NX - if (alias != VM_MEMORY_STACK && cur_protection) + if (override_nx(map, alias) && cur_protection) cur_protection |= VM_PROT_EXECUTE; -#endif + vm_map_pmap_enter(map, start, end, object, offset, cur_protection); } - BailOut: ; - if (result == KERN_SUCCESS && - pmap_empty && - !(flags & VM_FLAGS_NO_PMAP_CHECK)) { - assert(vm_map_pmap_is_empty(map, *address, *address+size)); - } +BailOut: ; + if (result == KERN_SUCCESS) { + vm_prot_t pager_prot; + memory_object_t pager; - if (result != KERN_SUCCESS) { + if (pmap_empty && + !(flags & VM_FLAGS_NO_PMAP_CHECK)) { + assert(vm_map_pmap_is_empty(map, + *address, + *address+size)); + } + + /* + * For "named" VM objects, let the pager know that the + * memory object is being mapped. Some pagers need to keep + * track of this, to know when they can reclaim the memory + * object, for example. + * VM calls memory_object_map() for each mapping (specifying + * the protection of each mapping) and calls + * memory_object_last_unmap() when all the mappings are gone. + */ + pager_prot = max_protection; + if (needs_copy) { + /* + * Copy-On-Write mapping: won't modify + * the memory object. + */ + pager_prot &= ~VM_PROT_WRITE; + } + if (!is_submap && + object != VM_OBJECT_NULL && + object->named && + object->pager != MEMORY_OBJECT_NULL) { + vm_object_lock(object); + pager = object->pager; + if (object->named && + pager != MEMORY_OBJECT_NULL) { + assert(object->pager_ready); + vm_object_mapping_wait(object, THREAD_UNINT); + vm_object_mapping_begin(object); + vm_object_unlock(object); + + kr = memory_object_map(pager, pager_prot); + assert(kr == KERN_SUCCESS); + + vm_object_lock(object); + vm_object_mapping_end(object); + } + vm_object_unlock(object); + } + } else { if (new_mapping_established) { /* * We have to get rid of the new mappings since we @@ -1776,7 +2173,7 @@ vm_map_enter( zap_new_map = vm_map_create(PMAP_NULL, *address, *address + size, - TRUE); + map->hdr.entries_pageable); if (!map_locked) { vm_map_lock(map); map_locked = TRUE; @@ -1819,9 +2216,15 @@ vm_map_enter( for (entry2 = vm_map_first_entry(zap_old_map); entry2 != vm_map_to_entry(zap_old_map); entry2 = vm_map_first_entry(zap_old_map)) { + vm_map_size_t entry_size; + + entry_size = (entry2->vme_end - + entry2->vme_start); vm_map_entry_unlink(zap_old_map, entry2); + zap_old_map->size -= entry_size; vm_map_entry_link(map, entry1, entry2); + map->size += entry_size; entry1 = entry2; } if (map->wiring_required) { @@ -1844,11 +2247,11 @@ vm_map_enter( * they may still contain. */ if (zap_old_map != VM_MAP_NULL) { - vm_map_destroy(zap_old_map); + vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP); zap_old_map = VM_MAP_NULL; } if (zap_new_map != VM_MAP_NULL) { - vm_map_destroy(zap_new_map); + vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP); zap_new_map = VM_MAP_NULL; } @@ -1857,6 +2260,496 @@ vm_map_enter( #undef RETURN } +kern_return_t +vm_map_enter_mem_object( + vm_map_t target_map, + vm_map_offset_t *address, + vm_map_size_t initial_size, + vm_map_offset_t mask, + int flags, + ipc_port_t port, + vm_object_offset_t offset, + boolean_t copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance) +{ + vm_map_address_t map_addr; + vm_map_size_t map_size; + vm_object_t object; + vm_object_size_t size; + kern_return_t result; + + /* + * Check arguments for validity + */ + if ((target_map == VM_MAP_NULL) || + (cur_protection & ~VM_PROT_ALL) || + (max_protection & ~VM_PROT_ALL) || + (inheritance > VM_INHERIT_LAST_VALID) || + initial_size == 0) + return KERN_INVALID_ARGUMENT; + + map_addr = vm_map_trunc_page(*address); + map_size = vm_map_round_page(initial_size); + size = vm_object_round_page(initial_size); + + /* + * Find the vm object (if any) corresponding to this port. + */ + if (!IP_VALID(port)) { + object = VM_OBJECT_NULL; + offset = 0; + copy = FALSE; + } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) { + vm_named_entry_t named_entry; + + named_entry = (vm_named_entry_t) port->ip_kobject; + /* a few checks to make sure user is obeying rules */ + if (size == 0) { + if (offset >= named_entry->size) + return KERN_INVALID_RIGHT; + size = named_entry->size - offset; + } + if ((named_entry->protection & max_protection) != + max_protection) + return KERN_INVALID_RIGHT; + if ((named_entry->protection & cur_protection) != + cur_protection) + return KERN_INVALID_RIGHT; + if (named_entry->size < (offset + size)) + return KERN_INVALID_ARGUMENT; + + /* the callers parameter offset is defined to be the */ + /* offset from beginning of named entry offset in object */ + offset = offset + named_entry->offset; + + named_entry_lock(named_entry); + if (named_entry->is_sub_map) { + vm_map_t submap; + + submap = named_entry->backing.map; + vm_map_lock(submap); + vm_map_reference(submap); + vm_map_unlock(submap); + named_entry_unlock(named_entry); + + result = vm_map_enter(target_map, + &map_addr, + map_size, + mask, + flags | VM_FLAGS_SUBMAP, + (vm_object_t) submap, + offset, + copy, + cur_protection, + max_protection, + inheritance); + if (result != KERN_SUCCESS) { + vm_map_deallocate(submap); + } else { + /* + * No need to lock "submap" just to check its + * "mapped" flag: that flag is never reset + * once it's been set and if we race, we'll + * just end up setting it twice, which is OK. + */ + if (submap->mapped == FALSE) { + /* + * This submap has never been mapped. + * Set its "mapped" flag now that it + * has been mapped. + * This happens only for the first ever + * mapping of a "submap". + */ + vm_map_lock(submap); + submap->mapped = TRUE; + vm_map_unlock(submap); + } + *address = map_addr; + } + return result; + + } else if (named_entry->is_pager) { + unsigned int access; + vm_prot_t protections; + unsigned int wimg_mode; + boolean_t cache_attr; + + protections = named_entry->protection & VM_PROT_ALL; + access = GET_MAP_MEM(named_entry->protection); + + object = vm_object_enter(named_entry->backing.pager, + named_entry->size, + named_entry->internal, + FALSE, + FALSE); + if (object == VM_OBJECT_NULL) { + named_entry_unlock(named_entry); + return KERN_INVALID_OBJECT; + } + + /* JMM - drop reference on pager here */ + + /* create an extra ref for the named entry */ + vm_object_lock(object); + vm_object_reference_locked(object); + named_entry->backing.object = object; + named_entry->is_pager = FALSE; + named_entry_unlock(named_entry); + + wimg_mode = object->wimg_bits; + if (access == MAP_MEM_IO) { + wimg_mode = VM_WIMG_IO; + } else if (access == MAP_MEM_COPYBACK) { + wimg_mode = VM_WIMG_USE_DEFAULT; + } else if (access == MAP_MEM_WTHRU) { + wimg_mode = VM_WIMG_WTHRU; + } else if (access == MAP_MEM_WCOMB) { + wimg_mode = VM_WIMG_WCOMB; + } + if (wimg_mode == VM_WIMG_IO || + wimg_mode == VM_WIMG_WCOMB) + cache_attr = TRUE; + else + cache_attr = FALSE; + + /* wait for object (if any) to be ready */ + if (!named_entry->internal) { + while (!object->pager_ready) { + vm_object_wait( + object, + VM_OBJECT_EVENT_PAGER_READY, + THREAD_UNINT); + vm_object_lock(object); + } + } + + if (object->wimg_bits != wimg_mode) { + vm_page_t p; + + vm_object_paging_wait(object, THREAD_UNINT); + + object->wimg_bits = wimg_mode; + queue_iterate(&object->memq, p, vm_page_t, listq) { + if (!p->fictitious) { + if (p->pmapped) + pmap_disconnect(p->phys_page); + if (cache_attr) + pmap_sync_page_attributes_phys(p->phys_page); + } + } + } + object->true_share = TRUE; + if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) + object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; + vm_object_unlock(object); + } else { + /* This is the case where we are going to map */ + /* an already mapped object. If the object is */ + /* not ready it is internal. An external */ + /* object cannot be mapped until it is ready */ + /* we can therefore avoid the ready check */ + /* in this case. */ + object = named_entry->backing.object; + assert(object != VM_OBJECT_NULL); + named_entry_unlock(named_entry); + vm_object_reference(object); + } + } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) { + /* + * JMM - This is temporary until we unify named entries + * and raw memory objects. + * + * Detected fake ip_kotype for a memory object. In + * this case, the port isn't really a port at all, but + * instead is just a raw memory object. + */ + + object = vm_object_enter((memory_object_t)port, + size, FALSE, FALSE, FALSE); + if (object == VM_OBJECT_NULL) + return KERN_INVALID_OBJECT; + + /* wait for object (if any) to be ready */ + if (object != VM_OBJECT_NULL) { + if (object == kernel_object) { + printf("Warning: Attempt to map kernel object" + " by a non-private kernel entity\n"); + return KERN_INVALID_OBJECT; + } + if (!object->pager_ready) { + vm_object_lock(object); + + while (!object->pager_ready) { + vm_object_wait(object, + VM_OBJECT_EVENT_PAGER_READY, + THREAD_UNINT); + vm_object_lock(object); + } + vm_object_unlock(object); + } + } + } else { + return KERN_INVALID_OBJECT; + } + + if (object != VM_OBJECT_NULL && + object->named && + object->pager != MEMORY_OBJECT_NULL && + object->copy_strategy != MEMORY_OBJECT_COPY_NONE) { + memory_object_t pager; + vm_prot_t pager_prot; + kern_return_t kr; + + /* + * For "named" VM objects, let the pager know that the + * memory object is being mapped. Some pagers need to keep + * track of this, to know when they can reclaim the memory + * object, for example. + * VM calls memory_object_map() for each mapping (specifying + * the protection of each mapping) and calls + * memory_object_last_unmap() when all the mappings are gone. + */ + pager_prot = max_protection; + if (copy) { + /* + * Copy-On-Write mapping: won't modify the + * memory object. + */ + pager_prot &= ~VM_PROT_WRITE; + } + vm_object_lock(object); + pager = object->pager; + if (object->named && + pager != MEMORY_OBJECT_NULL && + object->copy_strategy != MEMORY_OBJECT_COPY_NONE) { + assert(object->pager_ready); + vm_object_mapping_wait(object, THREAD_UNINT); + vm_object_mapping_begin(object); + vm_object_unlock(object); + + kr = memory_object_map(pager, pager_prot); + assert(kr == KERN_SUCCESS); + + vm_object_lock(object); + vm_object_mapping_end(object); + } + vm_object_unlock(object); + } + + /* + * Perform the copy if requested + */ + + if (copy) { + vm_object_t new_object; + vm_object_offset_t new_offset; + + result = vm_object_copy_strategically(object, offset, size, + &new_object, &new_offset, + ©); + + + if (result == KERN_MEMORY_RESTART_COPY) { + boolean_t success; + boolean_t src_needs_copy; + + /* + * XXX + * We currently ignore src_needs_copy. + * This really is the issue of how to make + * MEMORY_OBJECT_COPY_SYMMETRIC safe for + * non-kernel users to use. Solution forthcoming. + * In the meantime, since we don't allow non-kernel + * memory managers to specify symmetric copy, + * we won't run into problems here. + */ + new_object = object; + new_offset = offset; + success = vm_object_copy_quickly(&new_object, + new_offset, size, + &src_needs_copy, + ©); + assert(success); + result = KERN_SUCCESS; + } + /* + * Throw away the reference to the + * original object, as it won't be mapped. + */ + + vm_object_deallocate(object); + + if (result != KERN_SUCCESS) + return result; + + object = new_object; + offset = new_offset; + } + + result = vm_map_enter(target_map, + &map_addr, map_size, + (vm_map_offset_t)mask, + flags, + object, offset, + copy, + cur_protection, max_protection, inheritance); + if (result != KERN_SUCCESS) + vm_object_deallocate(object); + *address = map_addr; + return result; +} + + + + +kern_return_t +vm_map_enter_mem_object_control( + vm_map_t target_map, + vm_map_offset_t *address, + vm_map_size_t initial_size, + vm_map_offset_t mask, + int flags, + memory_object_control_t control, + vm_object_offset_t offset, + boolean_t copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance) +{ + vm_map_address_t map_addr; + vm_map_size_t map_size; + vm_object_t object; + vm_object_size_t size; + kern_return_t result; + memory_object_t pager; + vm_prot_t pager_prot; + kern_return_t kr; + + /* + * Check arguments for validity + */ + if ((target_map == VM_MAP_NULL) || + (cur_protection & ~VM_PROT_ALL) || + (max_protection & ~VM_PROT_ALL) || + (inheritance > VM_INHERIT_LAST_VALID) || + initial_size == 0) + return KERN_INVALID_ARGUMENT; + + map_addr = vm_map_trunc_page(*address); + map_size = vm_map_round_page(initial_size); + size = vm_object_round_page(initial_size); + + object = memory_object_control_to_vm_object(control); + + if (object == VM_OBJECT_NULL) + return KERN_INVALID_OBJECT; + + if (object == kernel_object) { + printf("Warning: Attempt to map kernel object" + " by a non-private kernel entity\n"); + return KERN_INVALID_OBJECT; + } + + vm_object_lock(object); + object->ref_count++; + vm_object_res_reference(object); + + /* + * For "named" VM objects, let the pager know that the + * memory object is being mapped. Some pagers need to keep + * track of this, to know when they can reclaim the memory + * object, for example. + * VM calls memory_object_map() for each mapping (specifying + * the protection of each mapping) and calls + * memory_object_last_unmap() when all the mappings are gone. + */ + pager_prot = max_protection; + if (copy) { + pager_prot &= ~VM_PROT_WRITE; + } + pager = object->pager; + if (object->named && + pager != MEMORY_OBJECT_NULL && + object->copy_strategy != MEMORY_OBJECT_COPY_NONE) { + assert(object->pager_ready); + vm_object_mapping_wait(object, THREAD_UNINT); + vm_object_mapping_begin(object); + vm_object_unlock(object); + + kr = memory_object_map(pager, pager_prot); + assert(kr == KERN_SUCCESS); + + vm_object_lock(object); + vm_object_mapping_end(object); + } + vm_object_unlock(object); + + /* + * Perform the copy if requested + */ + + if (copy) { + vm_object_t new_object; + vm_object_offset_t new_offset; + + result = vm_object_copy_strategically(object, offset, size, + &new_object, &new_offset, + ©); + + + if (result == KERN_MEMORY_RESTART_COPY) { + boolean_t success; + boolean_t src_needs_copy; + + /* + * XXX + * We currently ignore src_needs_copy. + * This really is the issue of how to make + * MEMORY_OBJECT_COPY_SYMMETRIC safe for + * non-kernel users to use. Solution forthcoming. + * In the meantime, since we don't allow non-kernel + * memory managers to specify symmetric copy, + * we won't run into problems here. + */ + new_object = object; + new_offset = offset; + success = vm_object_copy_quickly(&new_object, + new_offset, size, + &src_needs_copy, + ©); + assert(success); + result = KERN_SUCCESS; + } + /* + * Throw away the reference to the + * original object, as it won't be mapped. + */ + + vm_object_deallocate(object); + + if (result != KERN_SUCCESS) + return result; + + object = new_object; + offset = new_offset; + } + + result = vm_map_enter(target_map, + &map_addr, map_size, + (vm_map_offset_t)mask, + flags, + object, offset, + copy, + cur_protection, max_protection, inheritance); + if (result != KERN_SUCCESS) + vm_object_deallocate(object); + *address = map_addr; + + return result; +} + #if VM_CPM @@ -1899,7 +2792,6 @@ vm_map_enter_cpm( *addr = 0; return KERN_SUCCESS; } - if (anywhere) *addr = vm_map_min(map); else @@ -1914,7 +2806,7 @@ vm_map_enter_cpm( if (size > VM_MAX_ADDRESS) return KERN_RESOURCE_SHORTAGE; if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size), - &pages, TRUE)) != KERN_SUCCESS) + &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) return kr; cpm_obj = vm_object_allocate((vm_object_size_t)size); @@ -1940,6 +2832,7 @@ vm_map_enter_cpm( assert(!m->wanted); assert(!m->pageout); assert(!m->tabled); + assert(VM_PAGE_WIRED(m)); /* * ENCRYPTED SWAP: * "m" is not supposed to be pageable, so it @@ -1974,17 +2867,17 @@ vm_map_enter_cpm( */ kr = vm_map_enter( - map, - addr, - size, - (vm_map_offset_t)0, - flags, - cpm_obj, - (vm_object_offset_t)0, - FALSE, - VM_PROT_ALL, - VM_PROT_ALL, - VM_INHERIT_DEFAULT); + map, + addr, + size, + (vm_map_offset_t)0, + flags, + cpm_obj, + (vm_object_offset_t)0, + FALSE, + VM_PROT_ALL, + VM_PROT_ALL, + VM_INHERIT_DEFAULT); if (kr != KERN_SUCCESS) { /* @@ -2018,13 +2911,21 @@ vm_map_enter_cpm( */ for (offset = 0, va = start; offset < size; va += PAGE_SIZE, offset += PAGE_SIZE) { + int type_of_fault; + vm_object_lock(cpm_obj); m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset); - vm_object_unlock(cpm_obj); assert(m != VM_PAGE_NULL); - PMAP_ENTER(pmap, va, m, VM_PROT_ALL, - ((unsigned int)(m->object->wimg_bits)) & VM_WIMG_MASK, - TRUE); + + vm_page_zero_fill(m); + + type_of_fault = DBG_ZERO_FILL_FAULT; + + vm_fault_enter(m, pmap, va, VM_PROT_ALL, + VM_PAGE_WIRED(m), FALSE, FALSE, + &type_of_fault); + + vm_object_unlock(cpm_obj); } #if MACH_ASSERT @@ -2087,61 +2988,123 @@ vm_map_enter_cpm( } #endif /* VM_CPM */ +/* Not used without nested pmaps */ +#ifndef NO_NESTED_PMAP /* - * vm_map_clip_start: [ internal use only ] - * - * Asserts that the given entry begins at or after - * the specified address; if necessary, - * it splits the entry into two. + * Clip and unnest a portion of a nested submap mapping. */ -#ifndef NO_NESTED_PMAP -#define vm_map_clip_start(map, entry, startaddr) \ -MACRO_BEGIN \ - vm_map_t VMCS_map; \ - vm_map_entry_t VMCS_entry; \ - vm_map_offset_t VMCS_startaddr; \ - VMCS_map = (map); \ - VMCS_entry = (entry); \ - VMCS_startaddr = (startaddr); \ - if (VMCS_startaddr > VMCS_entry->vme_start) { \ - if(entry->use_pmap) { \ - vm_map_offset_t pmap_base_addr; \ - \ - pmap_base_addr = 0xF0000000 & entry->vme_start; \ - pmap_unnest(map->pmap, (addr64_t)pmap_base_addr); \ - entry->use_pmap = FALSE; \ - } else if(entry->object.vm_object \ - && !entry->is_sub_map \ - && entry->object.vm_object->phys_contiguous) { \ - pmap_remove(map->pmap, \ - (addr64_t)(entry->vme_start), \ - (addr64_t)(entry->vme_end)); \ - } \ - _vm_map_clip_start(&VMCS_map->hdr,VMCS_entry,VMCS_startaddr);\ - } \ - UPDATE_FIRST_FREE(VMCS_map, VMCS_map->first_free); \ -MACRO_END -#else /* NO_NESTED_PMAP */ -#define vm_map_clip_start(map, entry, startaddr) \ -MACRO_BEGIN \ - vm_map_t VMCS_map; \ - vm_map_entry_t VMCS_entry; \ - vm_map_offset_t VMCS_startaddr; \ - VMCS_map = (map); \ - VMCS_entry = (entry); \ - VMCS_startaddr = (startaddr); \ - if (VMCS_startaddr > VMCS_entry->vme_start) { \ - _vm_map_clip_start(&VMCS_map->hdr,VMCS_entry,VMCS_startaddr);\ - } \ - UPDATE_FIRST_FREE(VMCS_map, VMCS_map->first_free); \ -MACRO_END -#endif /* NO_NESTED_PMAP */ -#define vm_map_copy_clip_start(copy, entry, startaddr) \ - MACRO_BEGIN \ - if ((startaddr) > (entry)->vme_start) \ - _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \ - MACRO_END + +static void +vm_map_clip_unnest( + vm_map_t map, + vm_map_entry_t entry, + vm_map_offset_t start_unnest, + vm_map_offset_t end_unnest) +{ + vm_map_offset_t old_start_unnest = start_unnest; + vm_map_offset_t old_end_unnest = end_unnest; + + assert(entry->is_sub_map); + assert(entry->object.sub_map != NULL); + + /* + * Query the platform for the optimal unnest range. + * DRK: There's some duplication of effort here, since + * callers may have adjusted the range to some extent. This + * routine was introduced to support 1GiB subtree nesting + * for x86 platforms, which can also nest on 2MiB boundaries + * depending on size/alignment. + */ + if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) { + log_unnest_badness(map, old_start_unnest, old_end_unnest); + } + + if (entry->vme_start > start_unnest || + entry->vme_end < end_unnest) { + panic("vm_map_clip_unnest(0x%llx,0x%llx): " + "bad nested entry: start=0x%llx end=0x%llx\n", + (long long)start_unnest, (long long)end_unnest, + (long long)entry->vme_start, (long long)entry->vme_end); + } + + if (start_unnest > entry->vme_start) { + _vm_map_clip_start(&map->hdr, + entry, + start_unnest); + UPDATE_FIRST_FREE(map, map->first_free); + } + if (entry->vme_end > end_unnest) { + _vm_map_clip_end(&map->hdr, + entry, + end_unnest); + UPDATE_FIRST_FREE(map, map->first_free); + } + + pmap_unnest(map->pmap, + entry->vme_start, + entry->vme_end - entry->vme_start); + if ((map->mapped) && (map->ref_count)) { + /* clean up parent map/maps */ + vm_map_submap_pmap_clean( + map, entry->vme_start, + entry->vme_end, + entry->object.sub_map, + entry->offset); + } + entry->use_pmap = FALSE; +} +#endif /* NO_NESTED_PMAP */ + +/* + * vm_map_clip_start: [ internal use only ] + * + * Asserts that the given entry begins at or after + * the specified address; if necessary, + * it splits the entry into two. + */ +static void +vm_map_clip_start( + vm_map_t map, + vm_map_entry_t entry, + vm_map_offset_t startaddr) +{ +#ifndef NO_NESTED_PMAP + if (entry->use_pmap && + startaddr >= entry->vme_start) { + vm_map_offset_t start_unnest, end_unnest; + + /* + * Make sure "startaddr" is no longer in a nested range + * before we clip. Unnest only the minimum range the platform + * can handle. + * vm_map_clip_unnest may perform additional adjustments to + * the unnest range. + */ + start_unnest = startaddr & ~(pmap_nesting_size_min - 1); + end_unnest = start_unnest + pmap_nesting_size_min; + vm_map_clip_unnest(map, entry, start_unnest, end_unnest); + } +#endif /* NO_NESTED_PMAP */ + if (startaddr > entry->vme_start) { + if (entry->object.vm_object && + !entry->is_sub_map && + entry->object.vm_object->phys_contiguous) { + pmap_remove(map->pmap, + (addr64_t)(entry->vme_start), + (addr64_t)(entry->vme_end)); + } + _vm_map_clip_start(&map->hdr, entry, startaddr); + UPDATE_FIRST_FREE(map, map->first_free); + } +} + + +#define vm_map_copy_clip_start(copy, entry, startaddr) \ + MACRO_BEGIN \ + if ((startaddr) > (entry)->vme_start) \ + _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \ + MACRO_END /* * This routine is called only when it is known that @@ -2173,7 +3136,7 @@ _vm_map_clip_start( _vm_map_entry_link(map_header, entry->vme_prev, new_entry); if (entry->is_sub_map) - vm_map_reference(new_entry->object.sub_map); + vm_map_reference(new_entry->object.sub_map); else vm_object_reference(new_entry->object.vm_object); } @@ -2186,48 +3149,49 @@ _vm_map_clip_start( * the specified address; if necessary, * it splits the entry into two. */ -#ifndef NO_NESTED_PMAP -#define vm_map_clip_end(map, entry, endaddr) \ -MACRO_BEGIN \ - vm_map_t VMCE_map; \ - vm_map_entry_t VMCE_entry; \ - vm_map_offset_t VMCE_endaddr; \ - VMCE_map = (map); \ - VMCE_entry = (entry); \ - VMCE_endaddr = (endaddr); \ - if (VMCE_endaddr < VMCE_entry->vme_end) { \ - if(entry->use_pmap) { \ - vm_map_offset_t pmap_base_addr; \ - \ - pmap_base_addr = 0xF0000000 & entry->vme_start; \ - pmap_unnest(map->pmap, (addr64_t)pmap_base_addr); \ - entry->use_pmap = FALSE; \ - } else if(entry->object.vm_object \ - && !entry->is_sub_map \ - && entry->object.vm_object->phys_contiguous) { \ - pmap_remove(map->pmap, \ - (addr64_t)(entry->vme_start), \ - (addr64_t)(entry->vme_end)); \ - } \ - _vm_map_clip_end(&VMCE_map->hdr,VMCE_entry,VMCE_endaddr); \ - } \ - UPDATE_FIRST_FREE(VMCE_map, VMCE_map->first_free); \ -MACRO_END -#else /* NO_NESTED_PMAP */ -#define vm_map_clip_end(map, entry, endaddr) \ -MACRO_BEGIN \ - vm_map_t VMCE_map; \ - vm_map_entry_t VMCE_entry; \ - vm_map_offset_t VMCE_endaddr; \ - VMCE_map = (map); \ - VMCE_entry = (entry); \ - VMCE_endaddr = (endaddr); \ - if (VMCE_endaddr < VMCE_entry->vme_end) { \ - _vm_map_clip_end(&VMCE_map->hdr,VMCE_entry,VMCE_endaddr); \ - } \ - UPDATE_FIRST_FREE(VMCE_map, VMCE_map->first_free); \ -MACRO_END -#endif /* NO_NESTED_PMAP */ +static void +vm_map_clip_end( + vm_map_t map, + vm_map_entry_t entry, + vm_map_offset_t endaddr) +{ + if (endaddr > entry->vme_end) { + /* + * Within the scope of this clipping, limit "endaddr" to + * the end of this map entry... + */ + endaddr = entry->vme_end; + } +#ifndef NO_NESTED_PMAP + if (entry->use_pmap) { + vm_map_offset_t start_unnest, end_unnest; + + /* + * Make sure the range between the start of this entry and + * the new "endaddr" is no longer nested before we clip. + * Unnest only the minimum range the platform can handle. + * vm_map_clip_unnest may perform additional adjustments to + * the unnest range. + */ + start_unnest = entry->vme_start; + end_unnest = + (endaddr + pmap_nesting_size_min - 1) & + ~(pmap_nesting_size_min - 1); + vm_map_clip_unnest(map, entry, start_unnest, end_unnest); + } +#endif /* NO_NESTED_PMAP */ + if (endaddr < entry->vme_end) { + if (entry->object.vm_object && + !entry->is_sub_map && + entry->object.vm_object->phys_contiguous) { + pmap_remove(map->pmap, + (addr64_t)(entry->vme_start), + (addr64_t)(entry->vme_end)); + } + _vm_map_clip_end(&map->hdr, entry, endaddr); + UPDATE_FIRST_FREE(map, map->first_free); + } +} #define vm_map_copy_clip_end(copy, entry, endaddr) \ @@ -2244,7 +3208,7 @@ static void _vm_map_clip_end( register struct vm_map_header *map_header, register vm_map_entry_t entry, - register vm_map_offset_t end) + register vm_map_offset_t end) { register vm_map_entry_t new_entry; @@ -2262,7 +3226,7 @@ _vm_map_clip_end( _vm_map_entry_link(map_header, entry, new_entry); if (entry->is_sub_map) - vm_map_reference(new_entry->object.sub_map); + vm_map_reference(new_entry->object.sub_map); else vm_object_reference(new_entry->object.vm_object); } @@ -2274,15 +3238,15 @@ _vm_map_clip_end( * Asserts that the starting and ending region * addresses fall within the valid range of the map. */ -#define VM_MAP_RANGE_CHECK(map, start, end) \ - { \ - if (start < vm_map_min(map)) \ - start = vm_map_min(map); \ - if (end > vm_map_max(map)) \ - end = vm_map_max(map); \ - if (start > end) \ - start = end; \ - } +#define VM_MAP_RANGE_CHECK(map, start, end) \ + MACRO_BEGIN \ + if (start < vm_map_min(map)) \ + start = vm_map_min(map); \ + if (end > vm_map_max(map)) \ + end = vm_map_max(map); \ + if (start > end) \ + start = end; \ + MACRO_END /* * vm_map_range_check: [ internal use only ] @@ -2381,21 +3345,18 @@ vm_map_submap( vm_map_lock(map); - submap->mapped = TRUE; - - VM_MAP_RANGE_CHECK(map, start, end); - - if (vm_map_lookup_entry(map, start, &entry)) { - vm_map_clip_start(map, entry, start); - } - else + if (! vm_map_lookup_entry(map, start, &entry)) { entry = entry->vme_next; + } - if(entry == vm_map_to_entry(map)) { + if (entry == vm_map_to_entry(map) || + entry->is_sub_map) { vm_map_unlock(map); return KERN_INVALID_ARGUMENT; } + assert(!entry->use_pmap); /* we don't want to unnest anything here */ + vm_map_clip_start(map, entry, start); vm_map_clip_end(map, entry, end); if ((entry->vme_start == start) && (entry->vme_end == end) && @@ -2405,34 +3366,37 @@ vm_map_submap( (object->copy == VM_OBJECT_NULL) && (object->shadow == VM_OBJECT_NULL) && (!object->pager_created)) { - entry->offset = (vm_object_offset_t)offset; - entry->object.vm_object = VM_OBJECT_NULL; - vm_object_deallocate(object); - entry->is_sub_map = TRUE; - entry->object.sub_map = submap; - vm_map_reference(submap); + entry->offset = (vm_object_offset_t)offset; + entry->object.vm_object = VM_OBJECT_NULL; + vm_object_deallocate(object); + entry->is_sub_map = TRUE; + entry->object.sub_map = submap; + vm_map_reference(submap); + submap->mapped = TRUE; + #ifndef NO_NESTED_PMAP - if ((use_pmap) && (offset == 0)) { - /* nest if platform code will allow */ - if(submap->pmap == NULL) { - submap->pmap = pmap_create((vm_map_size_t) 0, FALSE); - if(submap->pmap == PMAP_NULL) { - vm_map_unlock(map); - return(KERN_NO_SPACE); - } + if (use_pmap) { + /* nest if platform code will allow */ + if(submap->pmap == NULL) { + submap->pmap = pmap_create((vm_map_size_t) 0, FALSE); + if(submap->pmap == PMAP_NULL) { + vm_map_unlock(map); + return(KERN_NO_SPACE); } - result = pmap_nest(map->pmap, (entry->object.sub_map)->pmap, - (addr64_t)start, - (addr64_t)start, - (uint64_t)(end - start)); - if(result) - panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result); - entry->use_pmap = TRUE; } + result = pmap_nest(map->pmap, + (entry->object.sub_map)->pmap, + (addr64_t)start, + (addr64_t)start, + (uint64_t)(end - start)); + if(result) + panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result); + entry->use_pmap = TRUE; + } #else /* NO_NESTED_PMAP */ - pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end); + pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end); #endif /* NO_NESTED_PMAP */ - result = KERN_SUCCESS; + result = KERN_SUCCESS; } vm_map_unlock(map); @@ -2456,14 +3420,13 @@ vm_map_protect( register boolean_t set_max) { register vm_map_entry_t current; - register vm_map_offset_t prev; + register vm_map_offset_t prev; vm_map_entry_t entry; vm_prot_t new_max; - boolean_t clip; XPR(XPR_VM_MAP, - "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d", - (integer_t)map, start, end, new_prot, set_max); + "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d", + map, start, end, new_prot, set_max); vm_map_lock(map); @@ -2476,18 +3439,24 @@ vm_map_protect( return(KERN_INVALID_ADDRESS); } - /* - * Lookup the entry. If it doesn't start in a valid - * entry, return an error. Remember if we need to - * clip the entry. We don't do it here because we don't - * want to make any changes until we've scanned the - * entire range below for address and protection - * violations. - */ - if (!(clip = vm_map_lookup_entry(map, start, &entry))) { - vm_map_unlock(map); - return(KERN_INVALID_ADDRESS); - } + while(1) { + /* + * Lookup the entry. If it doesn't start in a valid + * entry, return an error. + */ + if (! vm_map_lookup_entry(map, start, &entry)) { + vm_map_unlock(map); + return(KERN_INVALID_ADDRESS); + } + + if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */ + start = SUPERPAGE_ROUND_DOWN(start); + continue; + } + break; + } + if (entry->superpage_size) + end = SUPERPAGE_ROUND_UP(end); /* * Make a first pass to check for protection and address @@ -2521,6 +3490,15 @@ vm_map_protect( } } +#if CONFIG_EMBEDDED + if (new_prot & VM_PROT_WRITE) { + if (new_prot & VM_PROT_EXECUTE) { + printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__); + new_prot &= ~VM_PROT_EXECUTE; + } + } +#endif + prev = current->vme_end; current = current->vme_next; } @@ -2536,9 +3514,11 @@ vm_map_protect( */ current = entry; - if (clip) { - vm_map_clip_start(map, entry, start); + if (current != vm_map_to_entry(map)) { + /* clip and unnest if necessary */ + vm_map_clip_start(map, current, start); } + while ((current != vm_map_to_entry(map)) && (current->vme_start < end)) { @@ -2546,6 +3526,8 @@ vm_map_protect( vm_map_clip_end(map, current, end); + assert(!current->use_pmap); /* clipping did unnest if needed */ + old_prot = current->protection; if(new_prot & VM_PROT_COPY) { @@ -2561,8 +3543,8 @@ vm_map_protect( if (set_max) current->protection = (current->max_protection = - new_prot & ~VM_PROT_COPY) & - old_prot; + new_prot & ~VM_PROT_COPY) & + old_prot; else current->protection = new_prot & ~VM_PROT_COPY; @@ -2575,54 +3557,29 @@ vm_map_protect( * write-protect fault occurred, it will be fixed up * properly, COW or not. */ - /* the 256M hack for existing hardware limitations */ if (current->protection != old_prot) { - if(current->is_sub_map && current->use_pmap) { - vm_map_offset_t pmap_base_addr; - vm_map_offset_t pmap_end_addr; -#ifdef NO_NESTED_PMAP - __unused -#endif /* NO_NESTED_PMAP */ - vm_map_entry_t local_entry; - - - pmap_base_addr = 0xF0000000 & current->vme_start; - pmap_end_addr = (pmap_base_addr + 0x10000000) - 1; -#ifndef NO_NESTED_PMAP - if(!vm_map_lookup_entry(map, - pmap_base_addr, &local_entry)) - panic("vm_map_protect: nested pmap area is missing"); - while ((local_entry != vm_map_to_entry(map)) && - (local_entry->vme_start < pmap_end_addr)) { - local_entry->use_pmap = FALSE; - local_entry = local_entry->vme_next; - } - pmap_unnest(map->pmap, (addr64_t)pmap_base_addr); -#endif /* NO_NESTED_PMAP */ - } - if (!(current->protection & VM_PROT_WRITE)) { /* Look one level in we support nested pmaps */ /* from mapped submaps which are direct entries */ /* in our map */ - vm_prot_t prot; + vm_prot_t prot; - prot = current->protection; -#ifdef STACK_ONLY_NX - if (current->alias != VM_MEMORY_STACK && prot) + prot = current->protection & ~VM_PROT_WRITE; + + if (override_nx(map, current->alias) && prot) prot |= VM_PROT_EXECUTE; -#endif + if (current->is_sub_map && current->use_pmap) { pmap_protect(current->object.sub_map->pmap, - current->vme_start, - current->vme_end, - prot); + current->vme_start, + current->vme_end, + prot); } else { - pmap_protect(map->pmap, current->vme_start, - current->vme_end, - prot); + pmap_protect(map->pmap, + current->vme_start, + current->vme_end, + prot); } - } } current = current->vme_next; } @@ -2662,7 +3619,6 @@ vm_map_inherit( if (vm_map_lookup_entry(map, start, &temp_entry)) { entry = temp_entry; - vm_map_clip_start(map, entry, start); } else { temp_entry = temp_entry->vme_next; @@ -2683,9 +3639,14 @@ vm_map_inherit( } entry = temp_entry; + if (entry != vm_map_to_entry(map)) { + /* clip and unnest if necessary */ + vm_map_clip_start(map, entry, start); + } while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { vm_map_clip_end(map, entry, end); + assert(!entry->use_pmap); /* clip did unnest if needed */ entry->inheritance = new_inheritance; @@ -2696,6 +3657,117 @@ vm_map_inherit( return(KERN_SUCCESS); } +/* + * Update the accounting for the amount of wired memory in this map. If the user has + * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails. + */ + +static kern_return_t +add_wire_counts( + vm_map_t map, + vm_map_entry_t entry, + boolean_t user_wire) +{ + vm_map_size_t size; + + if (user_wire) { + + /* + * We're wiring memory at the request of the user. Check if this is the first time the user is wiring + * this map entry. + */ + + if (entry->user_wired_count == 0) { + size = entry->vme_end - entry->vme_start; + + /* + * Since this is the first time the user is wiring this map entry, check to see if we're + * exceeding the user wire limits. There is a per map limit which is the smaller of either + * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also + * a system-wide limit on the amount of memory all users can wire. If the user is over either + * limit, then we fail. + */ + + if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) || + size + ptoa_64(vm_page_wire_count) > vm_global_user_wire_limit || + size + ptoa_64(vm_page_wire_count) > max_mem - vm_global_no_user_wire_amount) + return KERN_RESOURCE_SHORTAGE; + + /* + * The first time the user wires an entry, we also increment the wired_count and add this to + * the total that has been wired in the map. + */ + + if (entry->wired_count >= MAX_WIRE_COUNT) + return KERN_FAILURE; + + entry->wired_count++; + map->user_wire_size += size; + } + + if (entry->user_wired_count >= MAX_WIRE_COUNT) + return KERN_FAILURE; + + entry->user_wired_count++; + + } else { + + /* + * The kernel's wiring the memory. Just bump the count and continue. + */ + + if (entry->wired_count >= MAX_WIRE_COUNT) + panic("vm_map_wire: too many wirings"); + + entry->wired_count++; + } + + return KERN_SUCCESS; +} + +/* + * Update the memory wiring accounting now that the given map entry is being unwired. + */ + +static void +subtract_wire_counts( + vm_map_t map, + vm_map_entry_t entry, + boolean_t user_wire) +{ + + if (user_wire) { + + /* + * We're unwiring memory at the request of the user. See if we're removing the last user wire reference. + */ + + if (entry->user_wired_count == 1) { + + /* + * We're removing the last user wire reference. Decrement the wired_count and the total + * user wired memory for this map. + */ + + assert(entry->wired_count >= 1); + entry->wired_count--; + map->user_wire_size -= entry->vme_end - entry->vme_start; + } + + assert(entry->user_wired_count >= 1); + entry->user_wired_count--; + + } else { + + /* + * The kernel is unwiring the memory. Just update the count. + */ + + assert(entry->wired_count >= 1); + entry->wired_count--; + } +} + /* * vm_map_wire: * @@ -2745,19 +3817,38 @@ vm_map_wire_nested( return KERN_SUCCESS; } - if (vm_map_lookup_entry(map, start, &first_entry)) { + need_wakeup = FALSE; + cur_thread = current_thread(); + + s = start; + rc = KERN_SUCCESS; + + if (vm_map_lookup_entry(map, s, &first_entry)) { entry = first_entry; - /* vm_map_clip_start will be done later. */ + /* + * vm_map_clip_start will be done later. + * We don't want to unnest any nested submaps here ! + */ } else { /* Start address is not in map */ - vm_map_unlock(map); - return(KERN_INVALID_ADDRESS); + rc = KERN_INVALID_ADDRESS; + goto done; } - s=start; - need_wakeup = FALSE; - cur_thread = current_thread(); - while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { + while ((entry != vm_map_to_entry(map)) && (s < end)) { + /* + * At this point, we have wired from "start" to "s". + * We still need to wire from "s" to "end". + * + * "entry" hasn't been clipped, so it could start before "s" + * and/or end after "end". + */ + + /* "e" is how far we want to wire in this entry */ + e = entry->vme_end; + if (e > end) + e = end; + /* * If another thread is wiring/unwiring this entry then * block after informing other thread to wake us up. @@ -2769,8 +3860,10 @@ vm_map_wire_nested( * We have not clipped the entry. Make sure that * the start address is in range so that the lookup * below will succeed. + * "s" is the current starting point: we've already + * wired from "start" to "s" and we still have + * to wire from "s" to "end". */ - s = entry->vme_start < start? start: entry->vme_start; entry->needs_wakeup = TRUE; @@ -2786,8 +3879,8 @@ vm_map_wire_nested( * User wiring is interruptible */ wait_result = vm_map_entry_wait(map, - (user_wire) ? THREAD_ABORTSAFE : - THREAD_UNINT); + (user_wire) ? THREAD_ABORTSAFE : + THREAD_UNINT); if (user_wire && wait_result == THREAD_INTERRUPTED) { /* * undo the wirings we have done so far @@ -2795,9 +3888,8 @@ vm_map_wire_nested( * because we cannot tell if we were the * only one waiting. */ - vm_map_unlock(map); - vm_map_unwire(map, start, s, user_wire); - return(KERN_FAILURE); + rc = KERN_FAILURE; + goto done; } /* @@ -2818,30 +3910,37 @@ vm_map_wire_nested( * entry. let vm_map_unwire worry about * checking the validity of the range. */ - vm_map_unlock(map); - vm_map_unwire(map, start, s, user_wire); - return(KERN_FAILURE); + rc = KERN_FAILURE; + goto done; } entry = first_entry; continue; } - - if(entry->is_sub_map) { + + if (entry->is_sub_map) { vm_map_offset_t sub_start; vm_map_offset_t sub_end; vm_map_offset_t local_start; vm_map_offset_t local_end; pmap_t pmap; - - vm_map_clip_start(map, entry, start); + + vm_map_clip_start(map, entry, s); vm_map_clip_end(map, entry, end); sub_start = entry->offset; - sub_end = entry->vme_end - entry->vme_start; - sub_end += entry->offset; - + sub_end = entry->vme_end; + sub_end += entry->offset - entry->vme_start; + local_end = entry->vme_end; if(map_pmap == NULL) { + vm_object_t object; + vm_object_offset_t offset; + vm_prot_t prot; + boolean_t wired; + vm_map_entry_t local_entry; + vm_map_version_t version; + vm_map_t lookup_map; + if(entry->use_pmap) { pmap = entry->object.sub_map->pmap; /* ppc implementation requires that */ @@ -2850,127 +3949,114 @@ vm_map_wire_nested( #ifdef notdef pmap_addr = sub_start; #endif - pmap_addr = start; + pmap_addr = s; } else { pmap = map->pmap; - pmap_addr = start; + pmap_addr = s; } + if (entry->wired_count) { - if (entry->wired_count - >= MAX_WIRE_COUNT) - panic("vm_map_wire: too many wirings"); - - if (user_wire && - entry->user_wired_count - >= MAX_WIRE_COUNT) { - vm_map_unlock(map); - vm_map_unwire(map, start, - entry->vme_start, user_wire); - return(KERN_FAILURE); - } - if(user_wire) - entry->user_wired_count++; - if((!user_wire) || - (entry->user_wired_count == 0)) - entry->wired_count++; + if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) + goto done; + + /* + * The map was not unlocked: + * no need to goto re-lookup. + * Just go directly to next entry. + */ entry = entry->vme_next; + s = entry->vme_start; continue; - } else { - vm_object_t object; - vm_map_offset_t offset_hi; - vm_map_offset_t offset_lo; - vm_object_offset_t offset; - vm_prot_t prot; - boolean_t wired; - vm_behavior_t behavior; - vm_map_entry_t local_entry; - vm_map_version_t version; - vm_map_t lookup_map; - - /* call vm_map_lookup_locked to */ - /* cause any needs copy to be */ - /* evaluated */ - local_start = entry->vme_start; - lookup_map = map; - vm_map_lock_write_to_read(map); - if(vm_map_lookup_locked( - &lookup_map, local_start, - access_type, - &version, &object, - &offset, &prot, &wired, - &behavior, &offset_lo, - &offset_hi, &real_map)) { - - vm_map_unlock_read(lookup_map); - vm_map_unwire(map, start, - entry->vme_start, user_wire); - return(KERN_FAILURE); - } - if(real_map != lookup_map) - vm_map_unlock(real_map); - vm_map_unlock_read(lookup_map); - vm_map_lock(map); - vm_object_unlock(object); + } - if (!vm_map_lookup_entry(map, - local_start, &local_entry)) { - vm_map_unlock(map); - vm_map_unwire(map, start, - entry->vme_start, user_wire); - return(KERN_FAILURE); - } - /* did we have a change of type? */ - if (!local_entry->is_sub_map) { - last_timestamp = map->timestamp; - continue; - } - entry = local_entry; - if (user_wire) - entry->user_wired_count++; - if((!user_wire) || - (entry->user_wired_count == 1)) - entry->wired_count++; + /* call vm_map_lookup_locked to */ + /* cause any needs copy to be */ + /* evaluated */ + local_start = entry->vme_start; + lookup_map = map; + vm_map_lock_write_to_read(map); + if(vm_map_lookup_locked( + &lookup_map, local_start, + access_type, + OBJECT_LOCK_EXCLUSIVE, + &version, &object, + &offset, &prot, &wired, + NULL, + &real_map)) { - entry->in_transition = TRUE; + vm_map_unlock_read(lookup_map); + vm_map_unwire(map, start, + s, user_wire); + return(KERN_FAILURE); + } + if(real_map != lookup_map) + vm_map_unlock(real_map); + vm_map_unlock_read(lookup_map); + vm_map_lock(map); + vm_object_unlock(object); - vm_map_unlock(map); - rc = vm_map_wire_nested( - entry->object.sub_map, - sub_start, sub_end, - access_type, - user_wire, pmap, pmap_addr); - vm_map_lock(map); + /* we unlocked, so must re-lookup */ + if (!vm_map_lookup_entry(map, + local_start, + &local_entry)) { + rc = KERN_FAILURE; + goto done; + } + + /* + * entry could have been "simplified", + * so re-clip + */ + entry = local_entry; + assert(s == local_start); + vm_map_clip_start(map, entry, s); + vm_map_clip_end(map, entry, end); + /* re-compute "e" */ + e = entry->vme_end; + if (e > end) + e = end; + + /* did we have a change of type? */ + if (!entry->is_sub_map) { + last_timestamp = map->timestamp; + continue; } } else { local_start = entry->vme_start; - if (user_wire) - entry->user_wired_count++; - if((!user_wire) || - (entry->user_wired_count == 1)) - entry->wired_count++; - vm_map_unlock(map); - rc = vm_map_wire_nested(entry->object.sub_map, - sub_start, sub_end, - access_type, - user_wire, map_pmap, pmap_addr); - vm_map_lock(map); + pmap = map_pmap; } - s = entry->vme_start; - e = entry->vme_end; - /* - * Find the entry again. It could have been clipped - * after we unlocked the map. - */ - if (!vm_map_lookup_entry(map, local_start, + if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) + goto done; + + entry->in_transition = TRUE; + + vm_map_unlock(map); + rc = vm_map_wire_nested(entry->object.sub_map, + sub_start, sub_end, + access_type, + user_wire, pmap, pmap_addr); + vm_map_lock(map); + + /* + * Find the entry again. It could have been clipped + * after we unlocked the map. + */ + if (!vm_map_lookup_entry(map, local_start, &first_entry)) panic("vm_map_wire: re-lookup failed"); entry = first_entry; + assert(local_start == s); + /* re-compute "e" */ + e = entry->vme_end; + if (e > end) + e = end; + last_timestamp = map->timestamp; while ((entry != vm_map_to_entry(map)) && - (entry->vme_start < e)) { + (entry->vme_start < e)) { assert(entry->in_transition); entry->in_transition = FALSE; if (entry->needs_wakeup) { @@ -2978,24 +4064,16 @@ vm_map_wire_nested( need_wakeup = TRUE; } if (rc != KERN_SUCCESS) {/* from vm_*_wire */ - if (user_wire) - entry->user_wired_count--; - if ((!user_wire) || - (entry->user_wired_count == 0)) - entry->wired_count--; + subtract_wire_counts(map, entry, user_wire); } entry = entry->vme_next; } if (rc != KERN_SUCCESS) { /* from vm_*_wire */ - vm_map_unlock(map); - if (need_wakeup) - vm_map_entry_wakeup(map); - /* - * undo everything upto the previous entry. - */ - (void)vm_map_unwire(map, start, s, user_wire); - return rc; + goto done; } + + /* no need to relookup again */ + s = entry->vme_start; continue; } @@ -3004,29 +4082,19 @@ vm_map_wire_nested( * the appropriate wire reference count. */ if (entry->wired_count) { - /* sanity check: wired_count is a short */ - if (entry->wired_count >= MAX_WIRE_COUNT) - panic("vm_map_wire: too many wirings"); - - if (user_wire && - entry->user_wired_count >= MAX_WIRE_COUNT) { - vm_map_unlock(map); - vm_map_unwire(map, start, - entry->vme_start, user_wire); - return(KERN_FAILURE); - } /* * entry is already wired down, get our reference * after clipping to our range. */ - vm_map_clip_start(map, entry, start); + vm_map_clip_start(map, entry, s); vm_map_clip_end(map, entry, end); - if (user_wire) - entry->user_wired_count++; - if ((!user_wire) || (entry->user_wired_count == 1)) - entry->wired_count++; + if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) + goto done; + + /* map was not unlocked: no need to relookup */ entry = entry->vme_next; + s = entry->vme_start; continue; } @@ -3056,11 +4124,13 @@ vm_map_wire_nested( entry->offset = (vm_object_offset_t)0; } - vm_map_clip_start(map, entry, start); + vm_map_clip_start(map, entry, s); vm_map_clip_end(map, entry, end); - s = entry->vme_start; + /* re-compute "e" */ e = entry->vme_end; + if (e > end) + e = end; /* * Check for holes and protection mismatch. @@ -3069,30 +4139,23 @@ vm_map_wire_nested( * Protection: Access requested must be allowed, unless * wiring is by protection class */ - if ((((entry->vme_end < end) && - ((entry->vme_next == vm_map_to_entry(map)) || - (entry->vme_next->vme_start > entry->vme_end))) || - ((entry->protection & access_type) != access_type))) { - /* - * Found a hole or protection problem. - * Unwire the region we wired so far. - */ - if (start != entry->vme_start) { - vm_map_unlock(map); - vm_map_unwire(map, start, s, user_wire); - } else { - vm_map_unlock(map); - } - return((entry->protection&access_type) != access_type? - KERN_PROTECTION_FAILURE: KERN_INVALID_ADDRESS); + if ((entry->vme_end < end) && + ((entry->vme_next == vm_map_to_entry(map)) || + (entry->vme_next->vme_start > entry->vme_end))) { + /* found a hole */ + rc = KERN_INVALID_ADDRESS; + goto done; + } + if ((entry->protection & access_type) != access_type) { + /* found a protection problem */ + rc = KERN_PROTECTION_FAILURE; + goto done; } assert(entry->wired_count == 0 && entry->user_wired_count == 0); - if (user_wire) - entry->user_wired_count++; - if ((!user_wire) || (entry->user_wired_count == 1)) - entry->wired_count++; + if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) + goto done; entry->in_transition = TRUE; @@ -3121,11 +4184,11 @@ vm_map_wire_nested( if(map_pmap) rc = vm_fault_wire(map, - &tmp_entry, map_pmap, pmap_addr); + &tmp_entry, map_pmap, pmap_addr); else rc = vm_fault_wire(map, - &tmp_entry, map->pmap, - tmp_entry.vme_start); + &tmp_entry, map->pmap, + tmp_entry.vme_start); if (!user_wire && cur_thread != THREAD_NULL) thread_interrupt_level(interruptible_state); @@ -3138,7 +4201,7 @@ vm_map_wire_nested( * after we unlocked the map. */ if (!vm_map_lookup_entry(map, tmp_entry.vme_start, - &first_entry)) + &first_entry)) panic("vm_map_wire: re-lookup failed"); entry = first_entry; @@ -3155,26 +4218,24 @@ vm_map_wire_nested( need_wakeup = TRUE; } if (rc != KERN_SUCCESS) { /* from vm_*_wire */ - if (user_wire) - entry->user_wired_count--; - if ((!user_wire) || - (entry->user_wired_count == 0)) - entry->wired_count--; + subtract_wire_counts(map, entry, user_wire); } entry = entry->vme_next; } if (rc != KERN_SUCCESS) { /* from vm_*_wire */ - vm_map_unlock(map); - if (need_wakeup) - vm_map_entry_wakeup(map); - /* - * undo everything upto the previous entry. - */ - (void)vm_map_unwire(map, start, s, user_wire); - return rc; + goto done; } + + s = entry->vme_start; } /* end while loop through map entries */ + +done: + if (rc == KERN_SUCCESS) { + /* repair any damage we may have made to the VM map */ + vm_map_simplify_range(map, start, end); + } + vm_map_unlock(map); /* @@ -3183,7 +4244,12 @@ vm_map_wire_nested( if (need_wakeup) vm_map_entry_wakeup(map); - return(KERN_SUCCESS); + if (rc != KERN_SUCCESS) { + /* undo what has been wired so far */ + vm_map_unwire(map, start, s, user_wire); + } + + return rc; } @@ -3209,10 +4275,11 @@ vm_map_wire( * existing mappings */ VM_MAP_RANGE_CHECK(map, start, end); - mapping_prealloc(end - start); + assert((unsigned int) (end - start) == (end - start)); + mapping_prealloc((unsigned int) (end - start)); #endif kret = vm_map_wire_nested(map, start, end, access_type, - user_wire, (pmap_t)NULL, 0); + user_wire, (pmap_t)NULL, 0); #ifdef ppc mapping_relpre(); #endif @@ -3256,16 +4323,34 @@ vm_map_unwire_nested( assert(page_aligned(start)); assert(page_aligned(end)); + if (start == end) { + /* We unwired what the caller asked for: zero pages */ + vm_map_unlock(map); + return KERN_SUCCESS; + } + if (vm_map_lookup_entry(map, start, &first_entry)) { entry = first_entry; - /* vm_map_clip_start will be done later. */ + /* + * vm_map_clip_start will be done later. + * We don't want to unnest any nested sub maps here ! + */ } else { + if (!user_wire) { + panic("vm_map_unwire: start not found"); + } /* Start address is not in map. */ vm_map_unlock(map); return(KERN_INVALID_ADDRESS); } + if (entry->superpage_size) { + /* superpages are always wired */ + vm_map_unlock(map); + return KERN_INVALID_ADDRESS; + } + need_wakeup = FALSE; while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { if (entry->in_transition) { @@ -3282,20 +4367,41 @@ vm_map_unwire_nested( * have a reference to it, because if we did, this * entry will not be getting unwired now. */ - if (!user_wire) + if (!user_wire) { + /* + * XXX FBDP + * This could happen: there could be some + * overlapping vslock/vsunlock operations + * going on. + * We should probably just wait and retry, + * but then we have to be careful that this + * entry could get "simplified" after + * "in_transition" gets unset and before + * we re-lookup the entry, so we would + * have to re-clip the entry to avoid + * re-unwiring what we have already unwired... + * See vm_map_wire_nested(). + * + * Or we could just ignore "in_transition" + * here and proceed to decement the wired + * count(s) on this entry. That should be fine + * as long as "wired_count" doesn't drop all + * the way to 0 (and we should panic if THAT + * happens). + */ panic("vm_map_unwire: in_transition entry"); + } entry = entry->vme_next; continue; } - if(entry->is_sub_map) { + if (entry->is_sub_map) { vm_map_offset_t sub_start; vm_map_offset_t sub_end; vm_map_offset_t local_end; pmap_t pmap; - - + vm_map_clip_start(map, entry, start); vm_map_clip_end(map, entry, end); @@ -3304,127 +4410,126 @@ vm_map_unwire_nested( sub_end += entry->offset; local_end = entry->vme_end; if(map_pmap == NULL) { - if(entry->use_pmap) { + if(entry->use_pmap) { pmap = entry->object.sub_map->pmap; pmap_addr = sub_start; - } else { + } else { pmap = map->pmap; pmap_addr = start; - } - if (entry->wired_count == 0 || - (user_wire && entry->user_wired_count == 0)) { - if (!user_wire) - panic("vm_map_unwire: entry is unwired"); - entry = entry->vme_next; - continue; - } - - /* - * Check for holes - * Holes: Next entry should be contiguous unless - * this is the end of the region. - */ - if (((entry->vme_end < end) && - ((entry->vme_next == vm_map_to_entry(map)) || - (entry->vme_next->vme_start - > entry->vme_end)))) { - if (!user_wire) - panic("vm_map_unwire: non-contiguous region"); + } + if (entry->wired_count == 0 || + (user_wire && entry->user_wired_count == 0)) { + if (!user_wire) + panic("vm_map_unwire: entry is unwired"); + entry = entry->vme_next; + continue; + } + + /* + * Check for holes + * Holes: Next entry should be contiguous unless + * this is the end of the region. + */ + if (((entry->vme_end < end) && + ((entry->vme_next == vm_map_to_entry(map)) || + (entry->vme_next->vme_start + > entry->vme_end)))) { + if (!user_wire) + panic("vm_map_unwire: non-contiguous region"); /* - entry = entry->vme_next; - continue; + entry = entry->vme_next; + continue; */ - } + } - if (!user_wire || (--entry->user_wired_count == 0)) - entry->wired_count--; + subtract_wire_counts(map, entry, user_wire); - if (entry->wired_count != 0) { - entry = entry->vme_next; - continue; - } + if (entry->wired_count != 0) { + entry = entry->vme_next; + continue; + } - entry->in_transition = TRUE; - tmp_entry = *entry;/* see comment in vm_map_wire() */ + entry->in_transition = TRUE; + tmp_entry = *entry;/* see comment in vm_map_wire() */ + + /* + * We can unlock the map now. The in_transition state + * guarantees existance of the entry. + */ + vm_map_unlock(map); + vm_map_unwire_nested(entry->object.sub_map, + sub_start, sub_end, user_wire, pmap, pmap_addr); + vm_map_lock(map); - /* - * We can unlock the map now. The in_transition state - * guarantees existance of the entry. - */ - vm_map_unlock(map); - vm_map_unwire_nested(entry->object.sub_map, - sub_start, sub_end, user_wire, pmap, pmap_addr); - vm_map_lock(map); + if (last_timestamp+1 != map->timestamp) { + /* + * Find the entry again. It could have been + * clipped or deleted after we unlocked the map. + */ + if (!vm_map_lookup_entry(map, + tmp_entry.vme_start, + &first_entry)) { + if (!user_wire) + panic("vm_map_unwire: re-lookup failed"); + entry = first_entry->vme_next; + } else + entry = first_entry; + } + last_timestamp = map->timestamp; - if (last_timestamp+1 != map->timestamp) { /* - * Find the entry again. It could have been - * clipped or deleted after we unlocked the map. - */ - if (!vm_map_lookup_entry(map, - tmp_entry.vme_start, - &first_entry)) { - if (!user_wire) - panic("vm_map_unwire: re-lookup failed"); - entry = first_entry->vme_next; - } else - entry = first_entry; - } - last_timestamp = map->timestamp; - - /* - * clear transition bit for all constituent entries - * that were in the original entry (saved in - * tmp_entry). Also check for waiters. - */ - while ((entry != vm_map_to_entry(map)) && - (entry->vme_start < tmp_entry.vme_end)) { - assert(entry->in_transition); - entry->in_transition = FALSE; - if (entry->needs_wakeup) { - entry->needs_wakeup = FALSE; - need_wakeup = TRUE; + * clear transition bit for all constituent entries + * that were in the original entry (saved in + * tmp_entry). Also check for waiters. + */ + while ((entry != vm_map_to_entry(map)) && + (entry->vme_start < tmp_entry.vme_end)) { + assert(entry->in_transition); + entry->in_transition = FALSE; + if (entry->needs_wakeup) { + entry->needs_wakeup = FALSE; + need_wakeup = TRUE; + } + entry = entry->vme_next; } - entry = entry->vme_next; - } - continue; + continue; } else { - vm_map_unlock(map); - vm_map_unwire_nested(entry->object.sub_map, - sub_start, sub_end, user_wire, map_pmap, - pmap_addr); - vm_map_lock(map); + vm_map_unlock(map); + vm_map_unwire_nested(entry->object.sub_map, + sub_start, sub_end, user_wire, map_pmap, + pmap_addr); + vm_map_lock(map); - if (last_timestamp+1 != map->timestamp) { - /* - * Find the entry again. It could have been - * clipped or deleted after we unlocked the map. - */ - if (!vm_map_lookup_entry(map, - tmp_entry.vme_start, - &first_entry)) { - if (!user_wire) - panic("vm_map_unwire: re-lookup failed"); - entry = first_entry->vme_next; - } else - entry = first_entry; - } - last_timestamp = map->timestamp; + if (last_timestamp+1 != map->timestamp) { + /* + * Find the entry again. It could have been + * clipped or deleted after we unlocked the map. + */ + if (!vm_map_lookup_entry(map, + tmp_entry.vme_start, + &first_entry)) { + if (!user_wire) + panic("vm_map_unwire: re-lookup failed"); + entry = first_entry->vme_next; + } else + entry = first_entry; + } + last_timestamp = map->timestamp; } } if ((entry->wired_count == 0) || - (user_wire && entry->user_wired_count == 0)) { + (user_wire && entry->user_wired_count == 0)) { if (!user_wire) panic("vm_map_unwire: entry is unwired"); entry = entry->vme_next; continue; } - + assert(entry->wired_count > 0 && - (!user_wire || entry->user_wired_count > 0)); + (!user_wire || entry->user_wired_count > 0)); vm_map_clip_start(map, entry, start); vm_map_clip_end(map, entry, end); @@ -3435,8 +4540,8 @@ vm_map_unwire_nested( * this is the end of the region. */ if (((entry->vme_end < end) && - ((entry->vme_next == vm_map_to_entry(map)) || - (entry->vme_next->vme_start > entry->vme_end)))) { + ((entry->vme_next == vm_map_to_entry(map)) || + (entry->vme_next->vme_start > entry->vme_end)))) { if (!user_wire) panic("vm_map_unwire: non-contiguous region"); @@ -3444,14 +4549,17 @@ vm_map_unwire_nested( continue; } - if (!user_wire || (--entry->user_wired_count == 0)) - entry->wired_count--; + subtract_wire_counts(map, entry, user_wire); if (entry->wired_count != 0) { entry = entry->vme_next; continue; } + if(entry->zero_wired_pages) { + entry->zero_wired_pages = FALSE; + } + entry->in_transition = TRUE; tmp_entry = *entry; /* see comment in vm_map_wire() */ @@ -3462,11 +4570,11 @@ vm_map_unwire_nested( vm_map_unlock(map); if(map_pmap) { vm_fault_unwire(map, - &tmp_entry, FALSE, map_pmap, pmap_addr); + &tmp_entry, FALSE, map_pmap, pmap_addr); } else { vm_fault_unwire(map, - &tmp_entry, FALSE, map->pmap, - tmp_entry.vme_start); + &tmp_entry, FALSE, map->pmap, + tmp_entry.vme_start); } vm_map_lock(map); @@ -3476,9 +4584,9 @@ vm_map_unwire_nested( * or deleted after we unlocked the map. */ if (!vm_map_lookup_entry(map, tmp_entry.vme_start, - &first_entry)) { + &first_entry)) { if (!user_wire) - panic("vm_map_unwire: re-lookup failed"); + panic("vm_map_unwire: re-lookup failed"); entry = first_entry->vme_next; } else entry = first_entry; @@ -3530,7 +4638,7 @@ vm_map_unwire( boolean_t user_wire) { return vm_map_unwire_nested(map, start, end, - user_wire, (pmap_t)NULL, 0); + user_wire, (pmap_t)NULL, 0); } @@ -3554,6 +4662,7 @@ vm_map_entry_delete( assert(page_aligned(e)); assert(entry->wired_count == 0); assert(entry->user_wired_count == 0); + assert(!entry->permanent); if (entry->is_sub_map) { object = NULL; @@ -3576,11 +4685,10 @@ vm_map_entry_delete( if (submap) vm_map_deallocate(submap); else - vm_object_deallocate(object); + vm_object_deallocate(object); } - void vm_map_submap_pmap_clean( vm_map_t map, @@ -3597,11 +4705,11 @@ vm_map_submap_pmap_clean( submap_end = offset + (end - start); submap_start = offset; if(vm_map_lookup_entry(sub_map, offset, &entry)) { - + remove_size = (entry->vme_end - entry->vme_start); if(offset > entry->vme_start) remove_size -= offset - entry->vme_start; - + if(submap_end < entry->vme_end) { remove_size -= @@ -3617,7 +4725,7 @@ vm_map_submap_pmap_clean( } else { if((map->mapped) && (map->ref_count) - && (entry->object.vm_object != NULL)) { + && (entry->object.vm_object != NULL)) { vm_object_pmap_protect( entry->object.vm_object, entry->offset, @@ -3627,16 +4735,16 @@ vm_map_submap_pmap_clean( VM_PROT_NONE); } else { pmap_remove(map->pmap, - (addr64_t)start, - (addr64_t)(start + remove_size)); + (addr64_t)start, + (addr64_t)(start + remove_size)); } } } entry = entry->vme_next; - + while((entry != vm_map_to_entry(sub_map)) - && (entry->vme_start < submap_end)) { + && (entry->vme_start < submap_end)) { remove_size = (entry->vme_end - entry->vme_start); if(submap_end < entry->vme_end) { remove_size -= entry->vme_end - submap_end; @@ -3650,7 +4758,7 @@ vm_map_submap_pmap_clean( entry->offset); } else { if((map->mapped) && (map->ref_count) - && (entry->object.vm_object != NULL)) { + && (entry->object.vm_object != NULL)) { vm_object_pmap_protect( entry->object.vm_object, entry->offset, @@ -3660,10 +4768,10 @@ vm_map_submap_pmap_clean( VM_PROT_NONE); } else { pmap_remove(map->pmap, - (addr64_t)((start + entry->vme_start) - - offset), - (addr64_t)(((start + entry->vme_start) - - offset) + remove_size)); + (addr64_t)((start + entry->vme_start) + - offset), + (addr64_t)(((start + entry->vme_start) + - offset) + remove_size)); } } entry = entry->vme_next; @@ -3692,14 +4800,14 @@ vm_map_delete( { vm_map_entry_t entry, next; struct vm_map_entry *first_entry, tmp_entry; - register vm_map_offset_t s, e; + register vm_map_offset_t s; register vm_object_t object; boolean_t need_wakeup; unsigned int last_timestamp = ~0; /* unlikely value */ int interruptible; interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ? - THREAD_ABORTSAFE : THREAD_UNINT; + THREAD_ABORTSAFE : THREAD_UNINT; /* * All our DMA I/O operations in IOKit are currently done by @@ -3714,29 +4822,85 @@ vm_map_delete( */ flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE; - /* - * Find the start of the region, and clip it - */ - if (vm_map_lookup_entry(map, start, &first_entry)) { - entry = first_entry; - vm_map_clip_start(map, entry, start); - + while(1) { /* - * Fix the lookup hint now, rather than each - * time through the loop. + * Find the start of the region, and clip it */ - SAVE_HINT_MAP_WRITE(map, entry->vme_prev); - } else { - entry = first_entry->vme_next; + if (vm_map_lookup_entry(map, start, &first_entry)) { + entry = first_entry; + if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */ start = SUPERPAGE_ROUND_DOWN(start); + start = SUPERPAGE_ROUND_DOWN(start); + continue; + } + if (start == entry->vme_start) { + /* + * No need to clip. We don't want to cause + * any unnecessary unnesting in this case... + */ + } else { + vm_map_clip_start(map, entry, start); + } + + /* + * Fix the lookup hint now, rather than each + * time through the loop. + */ + SAVE_HINT_MAP_WRITE(map, entry->vme_prev); + } else { + entry = first_entry->vme_next; + } + break; } + if (entry->superpage_size) + end = SUPERPAGE_ROUND_UP(end); need_wakeup = FALSE; /* * Step through all entries in this region */ - while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { + s = entry->vme_start; + while ((entry != vm_map_to_entry(map)) && (s < end)) { + /* + * At this point, we have deleted all the memory entries + * between "start" and "s". We still need to delete + * all memory entries between "s" and "end". + * While we were blocked and the map was unlocked, some + * new memory entries could have been re-allocated between + * "start" and "s" and we don't want to mess with those. + * Some of those entries could even have been re-assembled + * with an entry after "s" (in vm_map_simplify_entry()), so + * we may have to vm_map_clip_start() again. + */ + + if (entry->vme_start >= s) { + /* + * This entry starts on or after "s" + * so no need to clip its start. + */ + } else { + /* + * This entry has been re-assembled by a + * vm_map_simplify_entry(). We need to + * re-clip its start. + */ + vm_map_clip_start(map, entry, s); + } + if (entry->vme_end <= end) { + /* + * This entry is going away completely, so no need + * to clip and possibly cause an unnecessary unnesting. + */ + } else { + vm_map_clip_end(map, entry, end); + } + + if (entry->permanent) { + panic("attempt to remove permanent VM map entry " + "%p [0x%llx:0x%llx]\n", + entry, (uint64_t) s, (uint64_t) end); + } + - vm_map_clip_end(map, entry, end); if (entry->in_transition) { wait_result_t wait_result; @@ -3744,7 +4908,7 @@ vm_map_delete( * Another thread is wiring/unwiring this entry. * Let the other thread know we are waiting. */ - s = entry->vme_start; + assert(s == entry->vme_start); entry->needs_wakeup = TRUE; /* @@ -3779,6 +4943,7 @@ vm_map_delete( * User: use the next entry */ entry = first_entry->vme_next; + s = entry->vme_start; } else { entry = first_entry; SAVE_HINT_MAP_WRITE(map, entry->vme_prev); @@ -3788,20 +4953,27 @@ vm_map_delete( } /* end in_transition */ if (entry->wired_count) { + boolean_t user_wire; + + user_wire = entry->user_wired_count > 0; + /* - * Remove a kernel wiring if requested or if - * there are user wirings. + * Remove a kernel wiring if requested */ - if ((flags & VM_MAP_REMOVE_KUNWIRE) || - (entry->user_wired_count > 0)) + if (flags & VM_MAP_REMOVE_KUNWIRE) { entry->wired_count--; - - /* remove all user wire references */ - entry->user_wired_count = 0; + } + + /* + * Remove all user wirings for proper accounting + */ + if (entry->user_wired_count > 0) { + while (entry->user_wired_count) + subtract_wire_counts(map, entry, user_wire); + } if (entry->wired_count != 0) { - assert((map != kernel_map) && - (!entry->is_sub_map)); + assert(map != kernel_map); /* * Cannot continue. Typical case is when * a user thread has physical io pending on @@ -3812,36 +4984,36 @@ vm_map_delete( if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) { wait_result_t wait_result; - s = entry->vme_start; + assert(s == entry->vme_start); entry->needs_wakeup = TRUE; wait_result = vm_map_entry_wait(map, - interruptible); + interruptible); if (interruptible && - wait_result == THREAD_INTERRUPTED) { + wait_result == THREAD_INTERRUPTED) { /* - * We do not clear the + * We do not clear the * needs_wakeup flag, since we * cannot tell if we were the * only one. - */ + */ vm_map_unlock(map); return KERN_ABORTED; } /* - * The entry could have been clipped or + * The entry could have been clipped or * it may not exist anymore. Look it * up again. - */ + */ if (!vm_map_lookup_entry(map, s, - &first_entry)) { - assert((map != kernel_map) && - (!entry->is_sub_map)); + &first_entry)) { + assert(map != kernel_map); /* - * User: use the next entry - */ + * User: use the next entry + */ entry = first_entry->vme_next; + s = entry->vme_start; } else { entry = first_entry; SAVE_HINT_MAP_WRITE(map, entry->vme_prev); @@ -3859,17 +5031,43 @@ vm_map_delete( * copy current entry. see comment in vm_map_wire() */ tmp_entry = *entry; - s = entry->vme_start; - e = entry->vme_end; + assert(s == entry->vme_start); /* * We can unlock the map now. The in_transition * state guarentees existance of the entry. */ vm_map_unlock(map); - vm_fault_unwire(map, &tmp_entry, - tmp_entry.object.vm_object == kernel_object, - map->pmap, tmp_entry.vme_start); + + if (tmp_entry.is_sub_map) { + vm_map_t sub_map; + vm_map_offset_t sub_start, sub_end; + pmap_t pmap; + vm_map_offset_t pmap_addr; + + + sub_map = tmp_entry.object.sub_map; + sub_start = tmp_entry.offset; + sub_end = sub_start + (tmp_entry.vme_end - + tmp_entry.vme_start); + if (tmp_entry.use_pmap) { + pmap = sub_map->pmap; + pmap_addr = tmp_entry.vme_start; + } else { + pmap = map->pmap; + pmap_addr = tmp_entry.vme_start; + } + (void) vm_map_unwire_nested(sub_map, + sub_start, sub_end, + user_wire, + pmap, pmap_addr); + } else { + + vm_fault_unwire(map, &tmp_entry, + tmp_entry.object.vm_object == kernel_object, + map->pmap, tmp_entry.vme_start); + } + vm_map_lock(map); if (last_timestamp+1 != map->timestamp) { @@ -3879,8 +5077,9 @@ vm_map_delete( */ if (!vm_map_lookup_entry(map, s, &first_entry)){ assert((map != kernel_map) && - (!entry->is_sub_map)); + (!entry->is_sub_map)); first_entry = first_entry->vme_next; + s = first_entry->vme_start; } else { SAVE_HINT_MAP_WRITE(map, entry->vme_prev); } @@ -3914,16 +5113,27 @@ vm_map_delete( assert(entry->wired_count == 0); assert(entry->user_wired_count == 0); - if ((!entry->is_sub_map && - entry->object.vm_object != kernel_object) || - entry->is_sub_map) { - if(entry->is_sub_map) { - if(entry->use_pmap) { + assert(s == entry->vme_start); + + if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) { + /* + * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to + * vm_map_delete(), some map entries might have been + * transferred to a "zap_map", which doesn't have a + * pmap. The original pmap has already been flushed + * in the vm_map_delete() call targeting the original + * map, but when we get to destroying the "zap_map", + * we don't have any pmap to flush, so let's just skip + * all this. + */ + } else if (entry->is_sub_map) { + if (entry->use_pmap) { #ifndef NO_NESTED_PMAP pmap_unnest(map->pmap, - (addr64_t)entry->vme_start); + (addr64_t)entry->vme_start, + entry->vme_end - entry->vme_start); #endif /* NO_NESTED_PMAP */ - if((map->mapped) && (map->ref_count)) { + if ((map->mapped) && (map->ref_count)) { /* clean up parent map/maps */ vm_map_submap_pmap_clean( map, entry->vme_start, @@ -3931,26 +5141,25 @@ vm_map_delete( entry->object.sub_map, entry->offset); } - } else { + } else { vm_map_submap_pmap_clean( map, entry->vme_start, entry->vme_end, entry->object.sub_map, entry->offset); - } - } else { - object = entry->object.vm_object; - if((map->mapped) && (map->ref_count)) { - vm_object_pmap_protect( + } + } else if (entry->object.vm_object != kernel_object) { + object = entry->object.vm_object; + if((map->mapped) && (map->ref_count)) { + vm_object_pmap_protect( object, entry->offset, entry->vme_end - entry->vme_start, PMAP_NULL, entry->vme_start, VM_PROT_NONE); - } else { - pmap_remove(map->pmap, - (addr64_t)entry->vme_start, - (addr64_t)entry->vme_end); - } + } else { + pmap_remove(map->pmap, + (addr64_t)entry->vme_start, + (addr64_t)entry->vme_end); } } @@ -3968,6 +5177,7 @@ vm_map_delete( if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) && zap_map != VM_MAP_NULL) { + vm_map_size_t entry_size; /* * The caller wants to save the affected VM map entries * into the "zap_map". The caller will take care of @@ -3979,6 +5189,11 @@ vm_map_delete( vm_map_entry_link(zap_map, vm_map_last_entry(zap_map), entry); + entry_size = entry->vme_end - entry->vme_start; + map->size -= entry_size; + zap_map->size += entry_size; + /* we didn't unlock the map, so no timestamp increase */ + last_timestamp--; } else { vm_map_entry_delete(map, entry); /* vm_map_entry_delete unlocks the map */ @@ -4001,8 +5216,9 @@ vm_map_delete( */ if (!vm_map_lookup_entry(map, s, &entry)){ entry = entry->vme_next; + s = entry->vme_start; } else { - SAVE_HINT_MAP_WRITE(map, entry->vme_prev); + SAVE_HINT_MAP_WRITE(map, entry->vme_prev); } /* * others can not only allocate behind us, we can @@ -4011,7 +5227,6 @@ vm_map_delete( if(entry == vm_map_to_entry(map)) { break; } - vm_map_clip_start(map, entry, s); } last_timestamp = map->timestamp; } @@ -4027,7 +5242,6 @@ vm_map_delete( return KERN_SUCCESS; } - /* * vm_map_remove: * @@ -4063,17 +5277,13 @@ void vm_map_copy_discard( vm_map_copy_t copy) { - TR_DECL("vm_map_copy_discard"); - -/* tr3("enter: copy 0x%x type %d", copy, copy->type);*/ - if (copy == VM_MAP_COPY_NULL) return; switch (copy->type) { case VM_MAP_COPY_ENTRY_LIST: while (vm_map_copy_first_entry(copy) != - vm_map_copy_to_entry(copy)) { + vm_map_copy_to_entry(copy)) { vm_map_entry_t entry = vm_map_copy_first_entry(copy); vm_map_copy_entry_unlink(copy, entry); @@ -4186,6 +5396,7 @@ start_pass_1: } vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr)); + assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */ for (entry = tmp_entry;;) { vm_map_entry_t next; @@ -4197,9 +5408,9 @@ start_pass_1: vm_map_offset_t local_end; if (entry->in_transition) { - /* - * Say that we are waiting, and wait for entry. - */ + /* + * Say that we are waiting, and wait for entry. + */ entry->needs_wakeup = TRUE; vm_map_entry_wait(dst_map, THREAD_UNINT); @@ -4219,9 +5430,9 @@ start_pass_1: vm_map_unlock(dst_map); result = vm_map_overwrite_submap_recurse( - entry->object.sub_map, - sub_start, - sub_end - sub_start); + entry->object.sub_map, + sub_start, + sub_end - sub_start); if(result != KERN_SUCCESS) return result; @@ -4278,8 +5489,8 @@ start_pass_1: * Check for permanent objects in the destination. */ if ((entry->object.vm_object != VM_OBJECT_NULL) && - ((!entry->object.vm_object->internal) || - (entry->object.vm_object->true_share))) { + ((!entry->object.vm_object->internal) || + (entry->object.vm_object->true_share))) { if(encountered_sub_map) { vm_map_unlock(dst_map); return(KERN_FAILURE); @@ -4378,8 +5589,8 @@ vm_map_copy_overwrite_nested( if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) { return(vm_map_copyout_kernel_buffer( - dst_map, &dst_addr, - copy, TRUE)); + dst_map, &dst_addr, + copy, TRUE)); } /* @@ -4402,8 +5613,8 @@ vm_map_copy_overwrite_nested( */ if (!page_aligned(copy->size) || - !page_aligned (copy->offset) || - !page_aligned (dst_addr)) + !page_aligned (copy->offset) || + !page_aligned (dst_addr)) { aligned = FALSE; dst_end = vm_map_round_page(dst_addr + copy->size); @@ -4438,9 +5649,9 @@ start_pass_1: if (entry->in_transition) { - /* - * Say that we are waiting, and wait for entry. - */ + /* + * Say that we are waiting, and wait for entry. + */ entry->needs_wakeup = TRUE; vm_map_entry_wait(dst_map, THREAD_UNINT); @@ -4523,8 +5734,8 @@ start_pass_1: * Check for permanent objects in the destination. */ if ((entry->object.vm_object != VM_OBJECT_NULL) && - ((!entry->object.vm_object->internal) || - (entry->object.vm_object->true_share))) { + ((!entry->object.vm_object->internal) || + (entry->object.vm_object->true_share))) { contains_permanent_objects = TRUE; } @@ -4574,7 +5785,7 @@ start_overwrite: vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL; int nentries; int remaining_entries = 0; - int new_offset = 0; + vm_map_offset_t new_offset = 0; for (entry = tmp_entry; copy_size == 0;) { vm_map_entry_t next; @@ -4597,7 +5808,7 @@ start_overwrite: vm_map_entry_wait(dst_map, THREAD_UNINT); if(!vm_map_lookup_entry(dst_map, base_addr, - &tmp_entry)) { + &tmp_entry)) { vm_map_unlock(dst_map); return(KERN_INVALID_ADDRESS); } @@ -4626,6 +5837,7 @@ start_overwrite: dst_map, entry, sub_end); vm_map_clip_start( dst_map, entry, sub_start); + assert(!entry->use_pmap); entry->is_sub_map = FALSE; vm_map_deallocate( entry->object.sub_map); @@ -4633,13 +5845,21 @@ start_overwrite: entry->is_shared = FALSE; entry->needs_copy = FALSE; entry->offset = 0; + /* + * XXX FBDP + * We should propagate the protections + * of the submap entry here instead + * of forcing them to VM_PROT_ALL... + * Or better yet, we should inherit + * the protection of the copy_entry. + */ entry->protection = VM_PROT_ALL; entry->max_protection = VM_PROT_ALL; entry->wired_count = 0; entry->user_wired_count = 0; if(entry->inheritance - == VM_INHERIT_SHARE) - entry->inheritance = VM_INHERIT_COPY; + == VM_INHERIT_SHARE) + entry->inheritance = VM_INHERIT_COPY; continue; } /* first take care of any non-sub_map */ @@ -4667,45 +5887,45 @@ start_overwrite: vm_map_size_t local_size = 0; vm_map_size_t entry_size; - nentries = 1; - new_offset = copy->offset; - copy_entry = vm_map_copy_first_entry(copy); - while(copy_entry != - vm_map_copy_to_entry(copy)){ - entry_size = copy_entry->vme_end - - copy_entry->vme_start; - if((local_size < copy_size) && - ((local_size + entry_size) - >= copy_size)) { - vm_map_copy_clip_end(copy, - copy_entry, - copy_entry->vme_start + - (copy_size - local_size)); - entry_size = copy_entry->vme_end - - copy_entry->vme_start; - local_size += entry_size; - new_offset += entry_size; - } - if(local_size >= copy_size) { - next_copy = copy_entry->vme_next; - copy_entry->vme_next = - vm_map_copy_to_entry(copy); - previous_prev = - copy->cpy_hdr.links.prev; - copy->cpy_hdr.links.prev = copy_entry; - copy->size = copy_size; - remaining_entries = - copy->cpy_hdr.nentries; - remaining_entries -= nentries; - copy->cpy_hdr.nentries = nentries; - break; - } else { - local_size += entry_size; - new_offset += entry_size; - nentries++; - } - copy_entry = copy_entry->vme_next; - } + nentries = 1; + new_offset = copy->offset; + copy_entry = vm_map_copy_first_entry(copy); + while(copy_entry != + vm_map_copy_to_entry(copy)){ + entry_size = copy_entry->vme_end - + copy_entry->vme_start; + if((local_size < copy_size) && + ((local_size + entry_size) + >= copy_size)) { + vm_map_copy_clip_end(copy, + copy_entry, + copy_entry->vme_start + + (copy_size - local_size)); + entry_size = copy_entry->vme_end - + copy_entry->vme_start; + local_size += entry_size; + new_offset += entry_size; + } + if(local_size >= copy_size) { + next_copy = copy_entry->vme_next; + copy_entry->vme_next = + vm_map_copy_to_entry(copy); + previous_prev = + copy->cpy_hdr.links.prev; + copy->cpy_hdr.links.prev = copy_entry; + copy->size = copy_size; + remaining_entries = + copy->cpy_hdr.nentries; + remaining_entries -= nentries; + copy->cpy_hdr.nentries = nentries; + break; + } else { + local_size += entry_size; + new_offset += entry_size; + nentries++; + } + copy_entry = copy_entry->vme_next; + } } if((entry->use_pmap) && (pmap == NULL)) { @@ -4731,13 +5951,13 @@ start_overwrite: } if(kr != KERN_SUCCESS) { if(next_copy != NULL) { - copy->cpy_hdr.nentries += - remaining_entries; - copy->cpy_hdr.links.prev->vme_next = - next_copy; - copy->cpy_hdr.links.prev - = previous_prev; - copy->size = total_size; + copy->cpy_hdr.nentries += + remaining_entries; + copy->cpy_hdr.links.prev->vme_next = + next_copy; + copy->cpy_hdr.links.prev + = previous_prev; + copy->size = total_size; } return kr; } @@ -4747,10 +5967,10 @@ start_overwrite: /* otherwise copy no longer exists, it was */ /* destroyed after successful copy_overwrite */ copy = (vm_map_copy_t) - zalloc(vm_map_copy_zone); + zalloc(vm_map_copy_zone); vm_map_copy_first_entry(copy) = - vm_map_copy_last_entry(copy) = - vm_map_copy_to_entry(copy); + vm_map_copy_last_entry(copy) = + vm_map_copy_to_entry(copy); copy->type = VM_MAP_COPY_ENTRY_LIST; copy->offset = new_offset; @@ -4758,18 +5978,18 @@ start_overwrite: copy_size = 0; /* put back remainder of copy in container */ if(next_copy != NULL) { - copy->cpy_hdr.nentries = remaining_entries; - copy->cpy_hdr.links.next = next_copy; - copy->cpy_hdr.links.prev = previous_prev; - copy->size = total_size; - next_copy->vme_prev = - vm_map_copy_to_entry(copy); - next_copy = NULL; + copy->cpy_hdr.nentries = remaining_entries; + copy->cpy_hdr.links.next = next_copy; + copy->cpy_hdr.links.prev = previous_prev; + copy->size = total_size; + next_copy->vme_prev = + vm_map_copy_to_entry(copy); + next_copy = NULL; } base_addr = local_end; vm_map_lock(dst_map); if(!vm_map_lookup_entry(dst_map, - local_end, &tmp_entry)) { + local_end, &tmp_entry)) { vm_map_unlock(dst_map); return(KERN_INVALID_ADDRESS); } @@ -4782,7 +6002,7 @@ start_overwrite: } if ((next == vm_map_to_entry(dst_map)) || - (next->vme_start != entry->vme_end)) { + (next->vme_start != entry->vme_end)) { vm_map_unlock(dst_map); return(KERN_INVALID_ADDRESS); } @@ -4802,15 +6022,15 @@ start_overwrite: copy_entry = vm_map_copy_first_entry(copy); while(copy_entry != vm_map_copy_to_entry(copy)) { entry_size = copy_entry->vme_end - - copy_entry->vme_start; + copy_entry->vme_start; if((local_size < copy_size) && - ((local_size + entry_size) - >= copy_size)) { + ((local_size + entry_size) + >= copy_size)) { vm_map_copy_clip_end(copy, copy_entry, - copy_entry->vme_start + - (copy_size - local_size)); + copy_entry->vme_start + + (copy_size - local_size)); entry_size = copy_entry->vme_end - - copy_entry->vme_start; + copy_entry->vme_start; local_size += entry_size; new_offset += entry_size; } @@ -4845,39 +6065,39 @@ start_overwrite: local_pmap = dst_map->pmap; if ((kr = vm_map_copy_overwrite_aligned( - dst_map, tmp_entry, copy, - base_addr, local_pmap)) != KERN_SUCCESS) { + dst_map, tmp_entry, copy, + base_addr, local_pmap)) != KERN_SUCCESS) { if(next_copy != NULL) { copy->cpy_hdr.nentries += - remaining_entries; + remaining_entries; copy->cpy_hdr.links.prev->vme_next = - next_copy; + next_copy; copy->cpy_hdr.links.prev = - previous_prev; + previous_prev; copy->size += copy_size; } return kr; } vm_map_unlock(dst_map); } else { - /* - * Performance gain: - * - * if the copy and dst address are misaligned but the same - * offset within the page we can copy_not_aligned the - * misaligned parts and copy aligned the rest. If they are - * aligned but len is unaligned we simply need to copy - * the end bit unaligned. We'll need to split the misaligned - * bits of the region in this case ! - */ - /* ALWAYS UNLOCKS THE dst_map MAP */ + /* + * Performance gain: + * + * if the copy and dst address are misaligned but the same + * offset within the page we can copy_not_aligned the + * misaligned parts and copy aligned the rest. If they are + * aligned but len is unaligned we simply need to copy + * the end bit unaligned. We'll need to split the misaligned + * bits of the region in this case ! + */ + /* ALWAYS UNLOCKS THE dst_map MAP */ if ((kr = vm_map_copy_overwrite_unaligned( dst_map, - tmp_entry, copy, base_addr)) != KERN_SUCCESS) { + tmp_entry, copy, base_addr)) != KERN_SUCCESS) { if(next_copy != NULL) { copy->cpy_hdr.nentries += - remaining_entries; + remaining_entries; copy->cpy_hdr.links.prev->vme_next = - next_copy; + next_copy; copy->cpy_hdr.links.prev = previous_prev; copy->size += copy_size; @@ -4901,7 +6121,7 @@ start_overwrite: vm_map_lock(dst_map); while(TRUE) { if (!vm_map_lookup_entry(dst_map, - base_addr, &tmp_entry)) { + base_addr, &tmp_entry)) { vm_map_unlock(dst_map); return(KERN_INVALID_ADDRESS); } @@ -4933,7 +6153,7 @@ vm_map_copy_overwrite( boolean_t interruptible) { return vm_map_copy_overwrite_nested( - dst_map, dst_addr, copy, interruptible, (pmap_t) NULL); + dst_map, dst_addr, copy, interruptible, (pmap_t) NULL); } @@ -5027,16 +6247,16 @@ vm_map_copy_overwrite_unaligned( * Copy on write region. */ if (entry->needs_copy && - ((entry->protection & VM_PROT_WRITE) != 0)) + ((entry->protection & VM_PROT_WRITE) != 0)) { if (vm_map_lock_read_to_write(dst_map)) { vm_map_lock_read(dst_map); goto RetryLookup; } vm_object_shadow(&entry->object.vm_object, - &entry->offset, - (vm_map_size_t)(entry->vme_end - - entry->vme_start)); + &entry->offset, + (vm_map_size_t)(entry->vme_end + - entry->vme_start)); entry->needs_copy = FALSE; vm_map_lock_write_to_read(dst_map); } @@ -5051,7 +6271,7 @@ vm_map_copy_overwrite_unaligned( goto RetryLookup; } dst_object = vm_object_allocate((vm_map_size_t) - entry->vme_end - entry->vme_start); + entry->vme_end - entry->vme_start); entry->object.vm_object = dst_object; entry->offset = 0; vm_map_lock_write_to_read(dst_map); @@ -5092,7 +6312,7 @@ vm_map_copy_overwrite_unaligned( return kr; if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end - || amount_left == 0) + || amount_left == 0) { /* * all done with this copy entry, dispose. @@ -5102,7 +6322,7 @@ vm_map_copy_overwrite_unaligned( vm_map_copy_entry_dispose(copy, copy_entry); if ((copy_entry = vm_map_copy_first_entry(copy)) - == vm_map_copy_to_entry(copy) && amount_left) { + == vm_map_copy_to_entry(copy) && amount_left) { /* * not finished copying but run out of source */ @@ -5138,7 +6358,7 @@ vm_map_copy_overwrite_unaligned( * we must lookup the entry because somebody * might have changed the map behind our backs. */ -RetryLookup: + RetryLookup: if (!vm_map_lookup_entry(dst_map, start, &entry)) { vm_map_unlock_read(dst_map); @@ -5175,10 +6395,7 @@ vm_map_copy_overwrite_aligned( vm_map_entry_t tmp_entry, vm_map_copy_t copy, vm_map_offset_t start, -#if !BAD_OPTIMIZATION - __unused -#endif /* !BAD_OPTIMIZATION */ - pmap_t pmap) + __unused pmap_t pmap) { vm_object_t object; vm_map_entry_t copy_entry; @@ -5187,11 +6404,12 @@ vm_map_copy_overwrite_aligned( vm_map_entry_t entry; while ((copy_entry = vm_map_copy_first_entry(copy)) - != vm_map_copy_to_entry(copy)) + != vm_map_copy_to_entry(copy)) { copy_size = (copy_entry->vme_end - copy_entry->vme_start); entry = tmp_entry; + assert(!entry->use_pmap); /* unnested when clipped earlier */ if (entry == vm_map_to_entry(dst_map)) { vm_map_unlock(dst_map); return KERN_INVALID_ADDRESS; @@ -5205,7 +6423,7 @@ vm_map_copy_overwrite_aligned( */ if ((entry->vme_start != start) || ((entry->is_sub_map) - && !entry->needs_copy)) { + && !entry->needs_copy)) { vm_map_unlock(dst_map); return(KERN_INVALID_ADDRESS); } @@ -5235,7 +6453,7 @@ vm_map_copy_overwrite_aligned( if (size < copy_size) { vm_map_copy_clip_end(copy, copy_entry, - copy_entry->vme_start + size); + copy_entry->vme_start + size); copy_size = size; } @@ -5251,8 +6469,8 @@ vm_map_copy_overwrite_aligned( object = entry->object.vm_object; if ((!entry->is_shared && - ((object == VM_OBJECT_NULL) || - (object->internal && !object->true_share))) || + ((object == VM_OBJECT_NULL) || + (object->internal && !object->true_share))) || entry->needs_copy) { vm_object_t old_object = entry->object.vm_object; vm_object_offset_t old_offset = entry->offset; @@ -5280,16 +6498,17 @@ vm_map_copy_overwrite_aligned( if(entry->use_pmap) { #ifndef NO_NESTED_PMAP pmap_unnest(dst_map->pmap, - (addr64_t)entry->vme_start); + (addr64_t)entry->vme_start, + entry->vme_end - entry->vme_start); #endif /* NO_NESTED_PMAP */ if(dst_map->mapped) { /* clean up parent */ /* map/maps */ - vm_map_submap_pmap_clean( - dst_map, entry->vme_start, - entry->vme_end, - entry->object.sub_map, - entry->offset); + vm_map_submap_pmap_clean( + dst_map, entry->vme_start, + entry->vme_end, + entry->object.sub_map, + entry->offset); } } else { vm_map_submap_pmap_clean( @@ -5306,14 +6525,14 @@ vm_map_copy_overwrite_aligned( entry->object.vm_object, entry->offset, entry->vme_end - - entry->vme_start, + - entry->vme_start, PMAP_NULL, entry->vme_start, VM_PROT_NONE); } else { - pmap_remove(dst_map->pmap, - (addr64_t)(entry->vme_start), - (addr64_t)(entry->vme_end)); + pmap_remove(dst_map->pmap, + (addr64_t)(entry->vme_start), + (addr64_t)(entry->vme_end)); } vm_object_deallocate(old_object); } @@ -5329,89 +6548,16 @@ vm_map_copy_overwrite_aligned( vm_map_copy_entry_unlink(copy, copy_entry); vm_map_copy_entry_dispose(copy, copy_entry); -#if BAD_OPTIMIZATION + /* - * if we turn this optimization back on - * we need to revisit our use of pmap mappings - * large copies will cause us to run out and panic + * we could try to push pages into the pmap at this point, BUT * this optimization only saved on average 2 us per page if ALL * the pages in the source were currently mapped * and ALL the pages in the dest were touched, if there were fewer * than 2/3 of the pages touched, this optimization actually cost more cycles + * it also puts a lot of pressure on the pmap layer w/r to mapping structures */ - /* - * Try to aggressively enter physical mappings - * (but avoid uninstantiated objects) - */ - if (object != VM_OBJECT_NULL) { - vm_map_offset_t va = entry->vme_start; - - while (va < entry->vme_end) { - register vm_page_t m; - vm_prot_t prot; - - /* - * Look for the page in the top object - */ - prot = entry->protection; - vm_object_lock(object); - vm_object_paging_begin(object); - - /* - * ENCRYPTED SWAP: - * If the page is encrypted, skip it: - * we can't let the user see the encrypted - * contents. The page will get decrypted - * on demand when the user generates a - * soft-fault when trying to access it. - */ - if ((m = vm_page_lookup(object,offset)) != - VM_PAGE_NULL && !m->busy && - !m->fictitious && !m->encrypted && - (!m->unusual || (!m->error && - !m->restart && !m->absent && - (prot & m->page_lock) == 0))) { - - m->busy = TRUE; - vm_object_unlock(object); - - /* - * Honor COW obligations - */ - if (entry->needs_copy) - prot &= ~VM_PROT_WRITE; -#ifdef STACK_ONLY_NX - if (entry->alias != VM_MEMORY_STACK && prot) - prot |= VM_PROT_EXECUTE; -#endif - /* It is our policy to require */ - /* explicit sync from anyone */ - /* writing code and then */ - /* a pc to execute it. */ - /* No isync here */ - - PMAP_ENTER(pmap, va, m, prot, - ((unsigned int) - (m->object->wimg_bits)) - & VM_WIMG_MASK, - FALSE); - - vm_object_lock(object); - vm_page_lock_queues(); - if (!m->active && !m->inactive) - vm_page_activate(m); - vm_page_unlock_queues(); - PAGE_WAKEUP_DONE(m); - } - vm_object_paging_end(object); - vm_object_unlock(object); - - offset += PAGE_SIZE_64; - va += PAGE_SIZE; - } /* end while (va < entry->vme_end) */ - } /* end if (object) */ -#endif /* * Set up for the next iteration. The map * has not been unlocked, so the next @@ -5447,14 +6593,14 @@ vm_map_copy_overwrite_aligned( copy_size = size; r = vm_fault_copy( - copy_entry->object.vm_object, - copy_entry->offset, - ©_size, - dst_object, - dst_offset, - dst_map, - &version, - THREAD_UNINT ); + copy_entry->object.vm_object, + copy_entry->offset, + ©_size, + dst_object, + dst_offset, + dst_map, + &version, + THREAD_UNINT ); /* * Release the object reference @@ -5475,7 +6621,7 @@ vm_map_copy_overwrite_aligned( */ vm_map_copy_clip_end(copy, copy_entry, - copy_entry->vme_start + copy_size); + copy_entry->vme_start + copy_size); vm_map_copy_entry_unlink(copy, copy_entry); vm_object_deallocate(copy_entry->object.vm_object); vm_map_copy_entry_dispose(copy, copy_entry); @@ -5530,7 +6676,14 @@ vm_map_copyin_kernel_buffer( { kern_return_t kr; vm_map_copy_t copy; - vm_map_size_t kalloc_size = sizeof(struct vm_map_copy) + len; + vm_size_t kalloc_size; + + if ((vm_size_t) len != len) { + /* "len" is too big and doesn't fit in a "vm_size_t" */ + return KERN_RESOURCE_SHORTAGE; + } + kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len); + assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len); copy = (vm_map_copy_t) kalloc(kalloc_size); if (copy == VM_MAP_COPY_NULL) { @@ -5542,18 +6695,18 @@ vm_map_copyin_kernel_buffer( copy->cpy_kdata = (void *) (copy + 1); copy->cpy_kalloc_size = kalloc_size; - kr = copyinmap(src_map, src_addr, copy->cpy_kdata, len); + kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len); if (kr != KERN_SUCCESS) { kfree(copy, kalloc_size); return kr; } if (src_destroy) { (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr), - vm_map_round_page(src_addr + len), - VM_MAP_REMOVE_INTERRUPTIBLE | - VM_MAP_REMOVE_WAIT_FOR_KWIRE | - (src_map == kernel_map) ? - VM_MAP_REMOVE_KUNWIRE : 0); + vm_map_round_page(src_addr + len), + VM_MAP_REMOVE_INTERRUPTIBLE | + VM_MAP_REMOVE_WAIT_FOR_KWIRE | + (src_map == kernel_map) ? + VM_MAP_REMOVE_KUNWIRE : 0); } *copy_result = copy; return KERN_SUCCESS; @@ -5611,7 +6764,8 @@ vm_map_copyout_kernel_buffer( * If the target map is the current map, just do * the copy. */ - if (copyout(copy->cpy_kdata, *addr, copy->size)) { + assert((vm_size_t) copy->size == copy->size); + if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) { kr = KERN_INVALID_ADDRESS; } } @@ -5626,7 +6780,8 @@ vm_map_copyout_kernel_buffer( vm_map_reference(map); oldmap = vm_map_switch(map); - if (copyout(copy->cpy_kdata, *addr, copy->size)) { + assert((vm_size_t) copy->size == copy->size); + if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) { vm_map_copyout_kernel_buffer_failures++; kr = KERN_INVALID_ADDRESS; } @@ -5729,7 +6884,7 @@ vm_map_copyout( offset = vm_object_trunc_page(copy->offset); size = vm_map_round_page(copy->size + - (vm_map_size_t)(copy->offset - offset)); + (vm_map_size_t)(copy->offset - offset)); *dst_addr = 0; kr = vm_map_enter(dst_map, dst_addr, size, (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE, @@ -5760,9 +6915,9 @@ vm_map_copyout( vm_copy_start = vm_object_trunc_page(copy->offset); size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size) - - vm_copy_start; + - vm_copy_start; - StartAgain: ; +StartAgain: ; vm_map_lock(dst_map); assert(first_free_is_valid(dst_map)); @@ -5802,45 +6957,45 @@ vm_map_copyout( */ if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) { - /* - * Mismatches occur when dealing with the default - * pager. - */ - zone_t old_zone; - vm_map_entry_t next, new; - - /* - * Find the zone that the copies were allocated from - */ - old_zone = (copy->cpy_hdr.entries_pageable) + /* + * Mismatches occur when dealing with the default + * pager. + */ + zone_t old_zone; + vm_map_entry_t next, new; + + /* + * Find the zone that the copies were allocated from + */ + old_zone = (copy->cpy_hdr.entries_pageable) ? vm_map_entry_zone : vm_map_kentry_zone; - entry = vm_map_copy_first_entry(copy); - - /* - * Reinitialize the copy so that vm_map_copy_entry_link - * will work. - */ - copy->cpy_hdr.nentries = 0; - copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable; - vm_map_copy_first_entry(copy) = - vm_map_copy_last_entry(copy) = - vm_map_copy_to_entry(copy); - - /* - * Copy each entry. - */ - while (entry != vm_map_copy_to_entry(copy)) { - new = vm_map_copy_entry_create(copy); - vm_map_entry_copy_full(new, entry); - new->use_pmap = FALSE; /* clr address space specifics */ - vm_map_copy_entry_link(copy, - vm_map_copy_last_entry(copy), - new); - next = entry->vme_next; - zfree(old_zone, entry); - entry = next; - } + entry = vm_map_copy_first_entry(copy); + + /* + * Reinitialize the copy so that vm_map_copy_entry_link + * will work. + */ + copy->cpy_hdr.nentries = 0; + copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable; + vm_map_copy_first_entry(copy) = + vm_map_copy_last_entry(copy) = + vm_map_copy_to_entry(copy); + + /* + * Copy each entry. + */ + while (entry != vm_map_copy_to_entry(copy)) { + new = vm_map_copy_entry_create(copy); + vm_map_entry_copy_full(new, entry); + new->use_pmap = FALSE; /* clr address space specifics */ + vm_map_copy_entry_link(copy, + vm_map_copy_last_entry(copy), + new); + next = entry->vme_next; + zfree(old_zone, entry); + entry = next; + } } /* @@ -5865,150 +7020,73 @@ vm_map_copyout( * map the pages into the destination map. */ if (entry->wired_count != 0) { - register vm_map_offset_t va; - vm_object_offset_t offset; - register vm_object_t object; - vm_prot_t prot; + register vm_map_offset_t va; + vm_object_offset_t offset; + register vm_object_t object; + vm_prot_t prot; + int type_of_fault; - object = entry->object.vm_object; - offset = entry->offset; - va = entry->vme_start; - - pmap_pageable(dst_map->pmap, - entry->vme_start, - entry->vme_end, - TRUE); - - while (va < entry->vme_end) { - register vm_page_t m; + object = entry->object.vm_object; + offset = entry->offset; + va = entry->vme_start; - /* - * Look up the page in the object. - * Assert that the page will be found in the - * top object: - * either - * the object was newly created by - * vm_object_copy_slowly, and has - * copies of all of the pages from - * the source object - * or - * the object was moved from the old - * map entry; because the old map - * entry was wired, all of the pages - * were in the top-level object. - * (XXX not true if we wire pages for - * reading) - */ - vm_object_lock(object); - vm_object_paging_begin(object); + pmap_pageable(dst_map->pmap, + entry->vme_start, + entry->vme_end, + TRUE); - m = vm_page_lookup(object, offset); - if (m == VM_PAGE_NULL || m->wire_count == 0 || - m->absent) - panic("vm_map_copyout: wiring 0x%x", m); + while (va < entry->vme_end) { + register vm_page_t m; - /* - * ENCRYPTED SWAP: - * The page is assumed to be wired here, so it - * shouldn't be encrypted. Otherwise, we - * couldn't enter it in the page table, since - * we don't want the user to see the encrypted - * data. - */ - ASSERT_PAGE_DECRYPTED(m); + /* + * Look up the page in the object. + * Assert that the page will be found in the + * top object: + * either + * the object was newly created by + * vm_object_copy_slowly, and has + * copies of all of the pages from + * the source object + * or + * the object was moved from the old + * map entry; because the old map + * entry was wired, all of the pages + * were in the top-level object. + * (XXX not true if we wire pages for + * reading) + */ + vm_object_lock(object); - m->busy = TRUE; - vm_object_unlock(object); - prot = entry->protection; -#ifdef STACK_ONLY_NX - if (entry->alias != VM_MEMORY_STACK && prot) - prot |= VM_PROT_EXECUTE; -#endif - PMAP_ENTER(dst_map->pmap, va, m, prot, - ((unsigned int) - (m->object->wimg_bits)) - & VM_WIMG_MASK, - TRUE); + m = vm_page_lookup(object, offset); + if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) || + m->absent) + panic("vm_map_copyout: wiring %p", m); - vm_object_lock(object); - PAGE_WAKEUP_DONE(m); - /* the page is wired, so we don't have to activate */ - vm_object_paging_end(object); - vm_object_unlock(object); + /* + * ENCRYPTED SWAP: + * The page is assumed to be wired here, so it + * shouldn't be encrypted. Otherwise, we + * couldn't enter it in the page table, since + * we don't want the user to see the encrypted + * data. + */ + ASSERT_PAGE_DECRYPTED(m); - offset += PAGE_SIZE_64; - va += PAGE_SIZE; - } - } - else if (size <= vm_map_aggressive_enter_max) { + prot = entry->protection; - register vm_map_offset_t va; - vm_object_offset_t offset; - register vm_object_t object; - vm_prot_t prot; + if (override_nx(dst_map, entry->alias) && prot) + prot |= VM_PROT_EXECUTE; - object = entry->object.vm_object; - if (object != VM_OBJECT_NULL) { + type_of_fault = DBG_CACHE_HIT_FAULT; - offset = entry->offset; - va = entry->vme_start; - while (va < entry->vme_end) { - register vm_page_t m; - - /* - * Look up the page in the object. - * Assert that the page will be found - * in the top object if at all... - */ - vm_object_lock(object); - vm_object_paging_begin(object); + vm_fault_enter(m, dst_map->pmap, va, prot, + VM_PAGE_WIRED(m), FALSE, FALSE, + &type_of_fault); - /* - * ENCRYPTED SWAP: - * If the page is encrypted, skip it: - * we can't let the user see the - * encrypted contents. The page will - * get decrypted on demand when the - * user generates a soft-fault when - * trying to access it. - */ - if (((m = vm_page_lookup(object, - offset)) - != VM_PAGE_NULL) && - !m->busy && !m->fictitious && - !m->encrypted && - !m->absent && !m->error) { - m->busy = TRUE; - vm_object_unlock(object); - - /* honor cow obligations */ - prot = entry->protection; - if (entry->needs_copy) - prot &= ~VM_PROT_WRITE; -#ifdef STACK_ONLY_NX - if (entry->alias != VM_MEMORY_STACK && prot) - prot |= VM_PROT_EXECUTE; -#endif - PMAP_ENTER(dst_map->pmap, va, - m, prot, - ((unsigned int) - (m->object->wimg_bits)) - & VM_WIMG_MASK, - FALSE); - - vm_object_lock(object); - vm_page_lock_queues(); - if (!m->active && !m->inactive) - vm_page_activate(m); - vm_page_unlock_queues(); - PAGE_WAKEUP_DONE(m); - } - vm_object_paging_end(object); - vm_object_unlock(object); + vm_object_unlock(object); - offset += PAGE_SIZE_64; - va += PAGE_SIZE; - } + offset += PAGE_SIZE_64; + va += PAGE_SIZE; } } } @@ -6046,6 +7124,28 @@ vm_map_copyout( * Routine: vm_map_copyin * * Description: + * see vm_map_copyin_common. Exported via Unsupported.exports. + * + */ + +#undef vm_map_copyin + +kern_return_t +vm_map_copyin( + vm_map_t src_map, + vm_map_address_t src_addr, + vm_map_size_t len, + boolean_t src_destroy, + vm_map_copy_t *copy_result) /* OUT */ +{ + return(vm_map_copyin_common(src_map, src_addr, len, src_destroy, + FALSE, copy_result, FALSE)); +} + +/* + * Routine: vm_map_copyin_common + * + * Description: * Copy the specified region (src_addr, len) from the * source address space (src_map), possibly removing * the region from the source address space (src_destroy). @@ -6065,6 +7165,7 @@ typedef struct submap_map { vm_map_t parent_map; vm_map_offset_t base_start; vm_map_offset_t base_end; + vm_map_size_t base_len; struct submap_map *next; } submap_map_t; @@ -6091,6 +7192,7 @@ vm_map_copyin_common( */ vm_map_offset_t src_end; /* End of entire region to be * copied */ + vm_map_offset_t src_base; vm_map_t base_map = src_map; boolean_t map_share=FALSE; submap_map_t *parent_maps = NULL; @@ -6122,8 +7224,8 @@ vm_map_copyin_common( * for small regions. */ if ((len < msg_ool_size_small) && !use_maxprot) - return vm_map_copyin_kernel_buffer(src_map, src_addr, len, - src_destroy, copy_result); + return vm_map_copyin_kernel_buffer(src_map, src_addr, len, + src_destroy, copy_result); /* * Compute (page aligned) start and end of region @@ -6131,7 +7233,7 @@ vm_map_copyin_common( src_start = vm_map_trunc_page(src_addr); src_end = vm_map_round_page(src_end); - XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", (natural_t)src_map, src_addr, len, src_destroy, 0); + XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0); /* * Allocate a header element for the list. @@ -6142,7 +7244,7 @@ vm_map_copyin_common( copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); vm_map_copy_first_entry(copy) = - vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy); + vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy); copy->type = VM_MAP_COPY_ENTRY_LIST; copy->cpy_hdr.nentries = 0; copy->cpy_hdr.entries_pageable = TRUE; @@ -6230,7 +7332,7 @@ vm_map_copyin_common( submap_len = tmp_entry->vme_end - src_start; if(submap_len > (src_end-src_start)) submap_len = src_end-src_start; - ptr->base_start += submap_len; + ptr->base_len = submap_len; src_start -= tmp_entry->vme_start; src_start += tmp_entry->offset; @@ -6242,13 +7344,15 @@ vm_map_copyin_common( vm_map_reference(src_map); vm_map_unlock(ptr->parent_map); if (!vm_map_lookup_entry( - src_map, src_start, &tmp_entry)) + src_map, src_start, &tmp_entry)) RETURN(KERN_INVALID_ADDRESS); map_share = TRUE; if(!tmp_entry->is_sub_map) - vm_map_clip_start(src_map, tmp_entry, src_start); + vm_map_clip_start(src_map, tmp_entry, src_start); src_entry = tmp_entry; } + /* we are now in the lowest level submap... */ + if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) && (tmp_entry->object.vm_object->phys_contiguous)) { /* This is not, supported for now.In future */ @@ -6267,27 +7371,28 @@ vm_map_copyin_common( * to allocate a map entry. */ if (new_entry == VM_MAP_ENTRY_NULL) { - version.main_timestamp = src_map->timestamp; - vm_map_unlock(src_map); + version.main_timestamp = src_map->timestamp; + vm_map_unlock(src_map); - new_entry = vm_map_copy_entry_create(copy); + new_entry = vm_map_copy_entry_create(copy); - vm_map_lock(src_map); - if ((version.main_timestamp + 1) != src_map->timestamp) { - if (!vm_map_lookup_entry(src_map, src_start, - &tmp_entry)) { - RETURN(KERN_INVALID_ADDRESS); + vm_map_lock(src_map); + if ((version.main_timestamp + 1) != src_map->timestamp) { + if (!vm_map_lookup_entry(src_map, src_start, + &tmp_entry)) { + RETURN(KERN_INVALID_ADDRESS); + } + if (!tmp_entry->is_sub_map) + vm_map_clip_start(src_map, tmp_entry, src_start); + continue; /* restart w/ new tmp_entry */ } - vm_map_clip_start(src_map, tmp_entry, src_start); - continue; /* restart w/ new tmp_entry */ - } } /* * Verify that the region can be read. */ if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE && - !use_maxprot) || + !use_maxprot) || (src_entry->max_protection & VM_PROT_READ) == 0) RETURN(KERN_PROTECTION_FAILURE); @@ -6311,39 +7416,39 @@ vm_map_copyin_common( if (src_destroy && (src_object == VM_OBJECT_NULL || - (src_object->internal && !src_object->true_share - && !map_share))) { - /* - * If we are destroying the source, and the object - * is internal, we can move the object reference - * from the source to the copy. The copy is - * copy-on-write only if the source is. - * We make another reference to the object, because - * destroying the source entry will deallocate it. - */ - vm_object_reference(src_object); + (src_object->internal && !src_object->true_share + && !map_share))) { + /* + * If we are destroying the source, and the object + * is internal, we can move the object reference + * from the source to the copy. The copy is + * copy-on-write only if the source is. + * We make another reference to the object, because + * destroying the source entry will deallocate it. + */ + vm_object_reference(src_object); - /* - * Copy is always unwired. vm_map_copy_entry - * set its wired count to zero. - */ + /* + * Copy is always unwired. vm_map_copy_entry + * set its wired count to zero. + */ - goto CopySuccessful; + goto CopySuccessful; } -RestartCopy: + RestartCopy: XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n", src_object, new_entry, new_entry->object.vm_object, was_wired, 0); if ((src_object == VM_OBJECT_NULL || - (!was_wired && !map_share && !tmp_entry->is_shared)) && - vm_object_copy_quickly( - &new_entry->object.vm_object, - src_offset, - src_size, - &src_needs_copy, - &new_entry_needs_copy)) { + (!was_wired && !map_share && !tmp_entry->is_shared)) && + vm_object_copy_quickly( + &new_entry->object.vm_object, + src_offset, + src_size, + &src_needs_copy, + &new_entry_needs_copy)) { new_entry->needs_copy = new_entry_needs_copy; @@ -6355,17 +7460,17 @@ RestartCopy: vm_prot_t prot; prot = src_entry->protection & ~VM_PROT_WRITE; -#ifdef STACK_ONLY_NX - if (src_entry->alias != VM_MEMORY_STACK && prot) + + if (override_nx(src_map, src_entry->alias) && prot) prot |= VM_PROT_EXECUTE; -#endif + vm_object_pmap_protect( src_object, src_offset, src_size, (src_entry->is_shared ? - PMAP_NULL - : src_map->pmap), + PMAP_NULL + : src_map->pmap), src_entry->vme_start, prot); @@ -6405,24 +7510,25 @@ RestartCopy: CopySlowly: vm_object_lock(src_object); result = vm_object_copy_slowly( - src_object, - src_offset, - src_size, - THREAD_UNINT, - &new_entry->object.vm_object); + src_object, + src_offset, + src_size, + THREAD_UNINT, + &new_entry->object.vm_object); new_entry->offset = 0; new_entry->needs_copy = FALSE; } else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC && - (tmp_entry->is_shared || map_share)) { + (tmp_entry->is_shared || map_share)) { vm_object_t new_object; - vm_object_lock(src_object); + vm_object_lock_shared(src_object); new_object = vm_object_copy_delayed( - src_object, - src_offset, - src_size); + src_object, + src_offset, + src_size, + TRUE); if (new_object == VM_OBJECT_NULL) goto CopySlowly; @@ -6432,11 +7538,11 @@ RestartCopy: } else { result = vm_object_copy_strategically(src_object, - src_offset, - src_size, - &new_entry->object.vm_object, - &new_entry->offset, - &new_entry_needs_copy); + src_offset, + src_size, + &new_entry->object.vm_object, + &new_entry->offset, + &new_entry_needs_copy); new_entry->needs_copy = new_entry_needs_copy; } @@ -6499,7 +7605,7 @@ RestartCopy: * Start over with this top-level entry. */ - VerificationFailed: ; + VerificationFailed: ; vm_object_deallocate(new_entry->object.vm_object); tmp_entry = src_entry; @@ -6510,7 +7616,7 @@ RestartCopy: * Verification succeeded. */ - VerificationSuccessful: ; + VerificationSuccessful: ; if (result == KERN_MEMORY_RESTART_COPY) goto RestartCopy; @@ -6519,7 +7625,7 @@ RestartCopy: * Copy succeeded. */ - CopySuccessful: ; + CopySuccessful: ; /* * Link in the new copy entry. @@ -6532,6 +7638,7 @@ RestartCopy: * Determine whether the entire region * has been copied. */ + src_base = src_start; src_start = new_entry->vme_end; new_entry = VM_MAP_ENTRY_NULL; while ((src_start >= src_end) && (src_end != 0)) { @@ -6541,15 +7648,22 @@ RestartCopy: ptr = parent_maps; assert(ptr != NULL); parent_maps = parent_maps->next; + + /* fix up the damage we did in that submap */ + vm_map_simplify_range(src_map, + src_base, + src_end); + vm_map_unlock(src_map); vm_map_deallocate(src_map); vm_map_lock(ptr->parent_map); src_map = ptr->parent_map; - src_start = ptr->base_start; + src_base = ptr->base_start; + src_start = ptr->base_start + ptr->base_len; src_end = ptr->base_end; if ((src_end > src_start) && - !vm_map_lookup_entry( - src_map, src_start, &tmp_entry)) + !vm_map_lookup_entry( + src_map, src_start, &tmp_entry)) RETURN(KERN_INVALID_ADDRESS); kfree(ptr, sizeof(submap_map_t)); if(parent_maps == NULL) @@ -6567,7 +7681,7 @@ RestartCopy: tmp_entry = src_entry->vme_next; if ((tmp_entry->vme_start != src_start) || - (tmp_entry == vm_map_to_entry(src_map))) + (tmp_entry == vm_map_to_entry(src_map))) RETURN(KERN_INVALID_ADDRESS); } @@ -6580,9 +7694,14 @@ RestartCopy: vm_map_trunc_page(src_addr), src_end, (src_map == kernel_map) ? - VM_MAP_REMOVE_KUNWIRE : - VM_MAP_NO_FLAGS, + VM_MAP_REMOVE_KUNWIRE : + VM_MAP_NO_FLAGS, VM_MAP_NULL); + } else { + /* fix up the damage we did in the base map */ + vm_map_simplify_range(src_map, + vm_map_trunc_page(src_addr), + vm_map_round_page(src_end)); } vm_map_unlock(src_map); @@ -6664,22 +7783,22 @@ vm_map_fork_share( kern_return_t result; result = pmap_nest(new_map->pmap, - (old_entry->object.sub_map)->pmap, - (addr64_t)old_entry->vme_start, - (addr64_t)old_entry->vme_start, - (uint64_t)(old_entry->vme_end - old_entry->vme_start)); + (old_entry->object.sub_map)->pmap, + (addr64_t)old_entry->vme_start, + (addr64_t)old_entry->vme_start, + (uint64_t)(old_entry->vme_end - old_entry->vme_start)); if(result) panic("vm_map_fork_share: pmap_nest failed!"); } #endif /* NO_NESTED_PMAP */ } else if (object == VM_OBJECT_NULL) { object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end - - old_entry->vme_start)); + old_entry->vme_start)); old_entry->offset = 0; old_entry->object.vm_object = object; assert(!old_entry->needs_copy); } else if (object->copy_strategy != - MEMORY_OBJECT_COPY_SYMMETRIC) { + MEMORY_OBJECT_COPY_SYMMETRIC) { /* * We are already using an asymmetric @@ -6692,10 +7811,10 @@ vm_map_fork_share( else if (old_entry->needs_copy || /* case 1 */ object->shadowed || /* case 2 */ (!object->true_share && /* case 3 */ - !old_entry->is_shared && - (object->size > - (vm_map_size_t)(old_entry->vme_end - - old_entry->vme_start)))) { + !old_entry->is_shared && + (object->size > + (vm_map_size_t)(old_entry->vme_end - + old_entry->vme_start)))) { /* * We need to create a shadow. @@ -6774,11 +7893,10 @@ vm_map_fork_share( * case 2.) */ - assert(!(object->shadowed && old_entry->is_shared)); vm_object_shadow(&old_entry->object.vm_object, &old_entry->offset, (vm_map_size_t) (old_entry->vme_end - - old_entry->vme_start)); + old_entry->vme_start)); /* * If we're making a shadow for other than @@ -6791,24 +7909,24 @@ vm_map_fork_share( vm_prot_t prot; prot = old_entry->protection & ~VM_PROT_WRITE; -#ifdef STACK_ONLY_NX - if (old_entry->alias != VM_MEMORY_STACK && prot) + + if (override_nx(old_map, old_entry->alias) && prot) prot |= VM_PROT_EXECUTE; -#endif + if (old_map->mapped) { vm_object_pmap_protect( old_entry->object.vm_object, old_entry->offset, (old_entry->vme_end - - old_entry->vme_start), + old_entry->vme_start), PMAP_NULL, old_entry->vme_start, prot); } else { pmap_protect(old_map->pmap, - old_entry->vme_start, - old_entry->vme_end, - prot); + old_entry->vme_start, + old_entry->vme_end, + prot); } } @@ -6831,8 +7949,7 @@ vm_map_fork_share( vm_map_unlock(old_entry->object.sub_map); } else { vm_object_lock(object); - object->ref_count++; - vm_object_res_reference(object); + vm_object_reference_locked(object); if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) { object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; } @@ -6865,8 +7982,8 @@ vm_map_fork_share( /* Bill Angell pmap support goes here */ } else { pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start, - old_entry->vme_end - old_entry->vme_start, - old_entry->vme_start); + old_entry->vme_end - old_entry->vme_start, + old_entry->vme_start); } } @@ -6930,7 +8047,15 @@ vm_map_fork_copy( if (! vm_map_lookup_entry(old_map, start, &last)) { last = last->vme_next; } else { - vm_map_clip_start(old_map, last, start); + if (last->vme_start == start) { + /* + * No need to clip here and we don't + * want to cause any unnecessary + * unnesting... + */ + } else { + vm_map_clip_start(old_map, last, start); + } } *old_entry_p = last; @@ -6950,9 +8075,7 @@ vm_map_t vm_map_fork( vm_map_t old_map) { - pmap_t new_pmap = pmap_create( - (vm_map_size_t) 0, - task_has_64BitAddr(current_task())); + pmap_t new_pmap; vm_map_t new_map; vm_map_entry_t old_entry; vm_map_size_t new_size = 0, entry_size; @@ -6960,18 +8083,30 @@ vm_map_fork( boolean_t src_needs_copy; boolean_t new_entry_needs_copy; + new_pmap = pmap_create((vm_map_size_t) 0, +#if defined(__i386__) || defined(__x86_64__) + old_map->pmap->pm_task_map != TASK_MAP_32BIT +#else + 0 +#endif + ); +#if defined(__i386__) + if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED) + pmap_set_4GB_pagezero(new_pmap); +#endif + vm_map_reference_swap(old_map); vm_map_lock(old_map); new_map = vm_map_create(new_pmap, - old_map->min_offset, - old_map->max_offset, - old_map->hdr.entries_pageable); + old_map->min_offset, + old_map->max_offset, + old_map->hdr.entries_pageable); for ( - old_entry = vm_map_first_entry(old_map); - old_entry != vm_map_to_entry(old_map); - ) { + old_entry = vm_map_first_entry(old_map); + old_entry != vm_map_to_entry(old_map); + ) { entry_size = old_entry->vme_end - old_entry->vme_start; @@ -6995,8 +8130,8 @@ vm_map_fork( if(old_entry->is_sub_map) break; if ((old_entry->wired_count != 0) || - ((old_entry->object.vm_object != NULL) && - (old_entry->object.vm_object->true_share))) { + ((old_entry->object.vm_object != NULL) && + (old_entry->object.vm_object->true_share))) { goto slow_vm_map_fork_copy; } @@ -7006,12 +8141,12 @@ vm_map_fork( new_entry->use_pmap = FALSE; if (! vm_object_copy_quickly( - &new_entry->object.vm_object, - old_entry->offset, - (old_entry->vme_end - - old_entry->vme_start), - &src_needs_copy, - &new_entry_needs_copy)) { + &new_entry->object.vm_object, + old_entry->offset, + (old_entry->vme_end - + old_entry->vme_start), + &src_needs_copy, + &new_entry_needs_copy)) { vm_map_entry_dispose(new_map, new_entry); goto slow_vm_map_fork_copy; } @@ -7024,19 +8159,19 @@ vm_map_fork( vm_prot_t prot; prot = old_entry->protection & ~VM_PROT_WRITE; -#ifdef STACK_ONLY_NX - if (old_entry->alias != VM_MEMORY_STACK && prot) + + if (override_nx(old_map, old_entry->alias) && prot) prot |= VM_PROT_EXECUTE; -#endif + vm_object_pmap_protect( old_entry->object.vm_object, old_entry->offset, (old_entry->vme_end - - old_entry->vme_start), + old_entry->vme_start), ((old_entry->is_shared - || old_map->mapped) - ? PMAP_NULL : - old_map->pmap), + || old_map->mapped) + ? PMAP_NULL : + old_map->pmap), old_entry->vme_start, prot); @@ -7070,6 +8205,30 @@ vm_map_fork( return(new_map); } +/* + * vm_map_exec: + * + * Setup the "new_map" with the proper execution environment according + * to the type of executable (platform, 64bit, chroot environment). + * Map the comm page and shared region, etc... + */ +kern_return_t +vm_map_exec( + vm_map_t new_map, + task_t task, + void *fsroot, + cpu_type_t cpu) +{ + SHARED_REGION_TRACE_DEBUG( + ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n", + current_task(), new_map, task, fsroot, cpu)); + (void) vm_commpage_enter(new_map, task); + (void) vm_shared_region_enter(new_map, task, fsroot, cpu); + SHARED_REGION_TRACE_DEBUG( + ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n", + current_task(), new_map, task, fsroot, cpu)); + return KERN_SUCCESS; +} /* * vm_map_lookup_locked: @@ -7098,16 +8257,15 @@ vm_map_fork( kern_return_t vm_map_lookup_locked( vm_map_t *var_map, /* IN/OUT */ - vm_map_offset_t vaddr, + vm_map_offset_t vaddr, vm_prot_t fault_type, + int object_lock_type, vm_map_version_t *out_version, /* OUT */ vm_object_t *object, /* OUT */ vm_object_offset_t *offset, /* OUT */ vm_prot_t *out_prot, /* OUT */ boolean_t *wired, /* OUT */ - int *behavior, /* OUT */ - vm_map_offset_t *lo_offset, /* OUT */ - vm_map_offset_t *hi_offset, /* OUT */ + vm_object_fault_info_t fault_info, /* OUT */ vm_map_t *real_map) { vm_map_entry_t entry; @@ -7120,7 +8278,7 @@ vm_map_lookup_locked( register vm_prot_t prot; *real_map = map; - RetryLookup: ; +RetryLookup: ; /* * If the map has an interesting hint, try it before calling @@ -7140,7 +8298,7 @@ vm_map_lookup_locked( if((cow_sub_map_parent) && (cow_sub_map_parent != map)) vm_map_unlock(cow_sub_map_parent); if((*real_map != map) - && (*real_map != cow_sub_map_parent)) + && (*real_map != cow_sub_map_parent)) vm_map_unlock(*real_map); return KERN_INVALID_ADDRESS; } @@ -7167,18 +8325,19 @@ submap_recurse: local_vaddr = vaddr; - if ((!entry->needs_copy) && (entry->use_pmap)) { + if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) { /* if real_map equals map we unlock below */ if ((*real_map != map) && - (*real_map != cow_sub_map_parent)) + (*real_map != cow_sub_map_parent)) vm_map_unlock(*real_map); *real_map = entry->object.sub_map; } - if(entry->needs_copy) { + if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) { if (!mapped_needs_copy) { if (vm_map_lock_read_to_write(map)) { vm_map_lock_read(map); + /* XXX FBDP: entry still valid ? */ if(*real_map == entry->object.sub_map) *real_map = map; goto RetryLookup; @@ -7195,7 +8354,7 @@ submap_recurse: } else { vm_map_lock_read(entry->object.sub_map); if((cow_sub_map_parent != map) && - (*real_map != map)) + (*real_map != map)) vm_map_unlock(map); } } else { @@ -7209,23 +8368,25 @@ submap_recurse: vm_map_unlock_read(map); } + /* XXX FBDP: map has been unlocked, what protects "entry" !? */ *var_map = map = entry->object.sub_map; /* calculate the offset in the submap for vaddr */ local_vaddr = (local_vaddr - entry->vme_start) + entry->offset; -RetrySubMap: + RetrySubMap: if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) { if((cow_sub_map_parent) && (cow_sub_map_parent != map)){ vm_map_unlock(cow_sub_map_parent); } if((*real_map != map) - && (*real_map != cow_sub_map_parent)) { + && (*real_map != cow_sub_map_parent)) { vm_map_unlock(*real_map); } *real_map = map; return KERN_INVALID_ADDRESS; } + /* find the attenuated shadow of the underlying object */ /* on our target map */ @@ -7237,14 +8398,14 @@ RetrySubMap: /* to be as big as the portion of the underlying entry */ /* which is mapped */ start_delta = submap_entry->vme_start > entry->offset ? - submap_entry->vme_start - entry->offset : 0; + submap_entry->vme_start - entry->offset : 0; end_delta = - (entry->offset + start_delta + (old_end - old_start)) <= + (entry->offset + start_delta + (old_end - old_start)) <= submap_entry->vme_end ? - 0 : (entry->offset + - (old_end - old_start)) - - submap_entry->vme_end; + 0 : (entry->offset + + (old_end - old_start)) + - submap_entry->vme_end; old_start += start_delta; old_end -= end_delta; @@ -7257,7 +8418,8 @@ RetrySubMap: if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) { - vm_object_t copy_object; + vm_object_t sub_object, copy_object; + vm_object_offset_t copy_offset; vm_map_offset_t local_start; vm_map_offset_t local_end; boolean_t copied_slowly = FALSE; @@ -7270,20 +8432,24 @@ RetrySubMap: } - if (submap_entry->object.vm_object == VM_OBJECT_NULL) { - submap_entry->object.vm_object = + sub_object = submap_entry->object.vm_object; + if (sub_object == VM_OBJECT_NULL) { + sub_object = vm_object_allocate( (vm_map_size_t) - (submap_entry->vme_end - - submap_entry->vme_start)); + (submap_entry->vme_end - + submap_entry->vme_start)); + submap_entry->object.vm_object = sub_object; submap_entry->offset = 0; } local_start = local_vaddr - - (cow_parent_vaddr - old_start); + (cow_parent_vaddr - old_start); local_end = local_vaddr + - (old_end - cow_parent_vaddr); + (old_end - cow_parent_vaddr); vm_map_clip_start(map, submap_entry, local_start); vm_map_clip_end(map, submap_entry, local_end); + /* unnesting was done in vm_map_clip_start/end() */ + assert(!submap_entry->use_pmap); /* This is the COW case, lets connect */ /* an entry in our space to the underlying */ @@ -7291,41 +8457,48 @@ RetrySubMap: /* submap. */ - if(submap_entry->wired_count != 0) { - vm_object_lock( - submap_entry->object.vm_object); - vm_object_copy_slowly( - submap_entry->object.vm_object, - submap_entry->offset, - submap_entry->vme_end - - submap_entry->vme_start, - FALSE, - ©_object); - copied_slowly = TRUE; + if(submap_entry->wired_count != 0 || + (sub_object->copy_strategy == + MEMORY_OBJECT_COPY_NONE)) { + vm_object_lock(sub_object); + vm_object_copy_slowly(sub_object, + submap_entry->offset, + (submap_entry->vme_end - + submap_entry->vme_start), + FALSE, + ©_object); + copied_slowly = TRUE; } else { + /* set up shadow object */ - copy_object = submap_entry->object.vm_object; + copy_object = sub_object; vm_object_reference(copy_object); - submap_entry->object.vm_object->shadowed = TRUE; + sub_object->shadowed = TRUE; submap_entry->needs_copy = TRUE; prot = submap_entry->protection & ~VM_PROT_WRITE; -#ifdef STACK_ONLY_NX - if (submap_entry->alias != VM_MEMORY_STACK && prot) + + if (override_nx(map, submap_entry->alias) && prot) prot |= VM_PROT_EXECUTE; -#endif + vm_object_pmap_protect( - submap_entry->object.vm_object, + sub_object, submap_entry->offset, submap_entry->vme_end - - submap_entry->vme_start, + submap_entry->vme_start, (submap_entry->is_shared - || map->mapped) ? - PMAP_NULL : map->pmap, + || map->mapped) ? + PMAP_NULL : map->pmap, submap_entry->vme_start, prot); } + /* + * Adjust the fault offset to the submap entry. + */ + copy_offset = (local_vaddr - + submap_entry->vme_start + + submap_entry->offset); /* This works diffently than the */ /* normal submap case. We go back */ @@ -7343,20 +8516,44 @@ RetrySubMap: cow_sub_map_parent = NULL; if(!vm_map_lookup_entry(map, - vaddr, &entry)) { - vm_object_deallocate( - copy_object); - vm_map_lock_write_to_read(map); - return KERN_INVALID_ADDRESS; + vaddr, &entry)) { + vm_object_deallocate( + copy_object); + vm_map_lock_write_to_read(map); + return KERN_INVALID_ADDRESS; } /* clip out the portion of space */ /* mapped by the sub map which */ /* corresponds to the underlying */ /* object */ + + /* + * Clip (and unnest) the smallest nested chunk + * possible around the faulting address... + */ + local_start = vaddr & ~(pmap_nesting_size_min - 1); + local_end = local_start + pmap_nesting_size_min; + /* + * ... but don't go beyond the "old_start" to "old_end" + * range, to avoid spanning over another VM region + * with a possibly different VM object and/or offset. + */ + if (local_start < old_start) { + local_start = old_start; + } + if (local_end > old_end) { + local_end = old_end; + } + /* + * Adjust copy_offset to the start of the range. + */ + copy_offset -= (vaddr - local_start); + vm_map_clip_start(map, entry, local_start); vm_map_clip_end(map, entry, local_end); - + /* unnesting was done in vm_map_clip_start/end() */ + assert(!entry->use_pmap); /* substitute copy object for */ /* shared map entry */ @@ -7364,14 +8561,16 @@ RetrySubMap: entry->is_sub_map = FALSE; entry->object.vm_object = copy_object; - entry->protection |= VM_PROT_WRITE; - entry->max_protection |= VM_PROT_WRITE; + /* propagate the submap entry's protections */ + entry->protection |= submap_entry->protection; + entry->max_protection |= submap_entry->max_protection; + if(copied_slowly) { - entry->offset = 0; + entry->offset = local_start - old_start; entry->needs_copy = FALSE; entry->is_shared = FALSE; } else { - entry->offset = submap_entry->offset; + entry->offset = copy_offset; entry->needs_copy = TRUE; if(entry->inheritance == VM_INHERIT_SHARE) entry->inheritance = VM_INHERIT_COPY; @@ -7384,8 +8583,8 @@ RetrySubMap: vm_map_lock_write_to_read(map); } else { if((cow_sub_map_parent) - && (cow_sub_map_parent != *real_map) - && (cow_sub_map_parent != map)) { + && (cow_sub_map_parent != *real_map) + && (cow_sub_map_parent != map)) { vm_map_unlock(cow_sub_map_parent); } entry = submap_entry; @@ -7397,24 +8596,26 @@ RetrySubMap: * Check whether this task is allowed to have * this page. */ + prot = entry->protection; -#ifdef STACK_ONLY_NX - if (entry->alias != VM_MEMORY_STACK && prot) + if (override_nx(map, entry->alias) && prot) { /* - * HACK -- if not a stack, than allow execution + * HACK -- if not a stack, then allow execution */ prot |= VM_PROT_EXECUTE; -#endif + } + if ((fault_type & (prot)) != fault_type) { - if (*real_map != map) { - vm_map_unlock(*real_map); + if (*real_map != map) { + vm_map_unlock(*real_map); } *real_map = map; if ((fault_type & VM_PROT_EXECUTE) && prot) - log_nx_failure((addr64_t)vaddr, prot); + log_stack_execution_failure((addr64_t)vaddr, prot); + DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL); return KERN_PROTECTION_FAILURE; } @@ -7455,7 +8656,7 @@ RetrySubMap: vm_object_shadow(&entry->object.vm_object, &entry->offset, (vm_map_size_t) (entry->vme_end - - entry->vme_start)); + entry->vme_start)); entry->object.vm_object->shadowed = TRUE; entry->needs_copy = FALSE; @@ -7496,16 +8697,27 @@ RetrySubMap: *offset = (vaddr - entry->vme_start) + entry->offset; *object = entry->object.vm_object; *out_prot = prot; - *behavior = entry->behavior; - *lo_offset = entry->offset; - *hi_offset = (entry->vme_end - entry->vme_start) + entry->offset; + + if (fault_info) { + fault_info->interruptible = THREAD_UNINT; /* for now... */ + /* ... the caller will change "interruptible" if needed */ + fault_info->cluster_size = 0; + fault_info->user_tag = entry->alias; + fault_info->behavior = entry->behavior; + fault_info->lo_offset = entry->offset; + fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset; + fault_info->no_cache = entry->no_cache; + fault_info->stealth = FALSE; + } /* * Lock the object to prevent it from disappearing */ - - vm_object_lock(*object); - + if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) + vm_object_lock(*object); + else + vm_object_lock_shared(*object); + /* * Save the version number */ @@ -7603,17 +8815,33 @@ vm_map_region_recurse_64( unsigned int next_depth; vm_map_offset_t next_max_offset; + boolean_t look_for_pages; + vm_region_submap_short_info_64_t short_info; + if (map == VM_MAP_NULL) { /* no address space to work on */ return KERN_INVALID_ARGUMENT; } if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) { - /* "info" structure is not big enough and would overflow */ - return KERN_INVALID_ARGUMENT; + if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) { + /* + * "info" structure is not big enough and + * would overflow + */ + return KERN_INVALID_ARGUMENT; + } else { + look_for_pages = FALSE; + *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64; + short_info = (vm_region_submap_short_info_64_t) submap_info; + submap_info = NULL; + } + } else { + look_for_pages = TRUE; + *count = VM_REGION_SUBMAP_INFO_COUNT_64; + short_info = NULL; } - *count = VM_REGION_SUBMAP_INFO_COUNT_64; user_address = *address; user_max_depth = *nesting_depth; @@ -7722,7 +8950,8 @@ vm_map_region_recurse_64( /* keep "next_map" locked in case we need it */ } else { /* release this map */ - vm_map_unlock_read(curr_map); + if (not_in_kdp) + vm_map_unlock_read(curr_map); } /* @@ -7784,15 +9013,32 @@ vm_map_region_recurse_64( *size = curr_entry->vme_end - curr_entry->vme_start; *address = curr_entry->vme_start + curr_offset; - submap_info->user_tag = curr_entry->alias; - submap_info->offset = curr_entry->offset; - submap_info->protection = curr_entry->protection; - submap_info->inheritance = curr_entry->inheritance; - submap_info->max_protection = curr_entry->max_protection; - submap_info->behavior = curr_entry->behavior; - submap_info->user_wired_count = curr_entry->user_wired_count; - submap_info->is_submap = curr_entry->is_sub_map; - submap_info->object_id = (uint32_t) curr_entry->object.vm_object; +// LP64todo: all the current tools are 32bit, obviously never worked for 64b +// so probably should be a real 32b ID vs. ptr. +// Current users just check for equality +#define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)p) + + if (look_for_pages) { + submap_info->user_tag = curr_entry->alias; + submap_info->offset = curr_entry->offset; + submap_info->protection = curr_entry->protection; + submap_info->inheritance = curr_entry->inheritance; + submap_info->max_protection = curr_entry->max_protection; + submap_info->behavior = curr_entry->behavior; + submap_info->user_wired_count = curr_entry->user_wired_count; + submap_info->is_submap = curr_entry->is_sub_map; + submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object); + } else { + short_info->user_tag = curr_entry->alias; + short_info->offset = curr_entry->offset; + short_info->protection = curr_entry->protection; + short_info->inheritance = curr_entry->inheritance; + short_info->max_protection = curr_entry->max_protection; + short_info->behavior = curr_entry->behavior; + short_info->user_wired_count = curr_entry->user_wired_count; + short_info->is_submap = curr_entry->is_sub_map; + short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object); + } extended.pages_resident = 0; extended.pages_swapped_out = 0; @@ -7809,32 +9055,40 @@ vm_map_region_recurse_64( curr_entry->offset, (curr_entry->vme_end - curr_entry->vme_start), - &extended); - submap_info->share_mode = extended.share_mode; + &extended, + look_for_pages); if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED) { - submap_info->share_mode = SM_PRIVATE; + extended.share_mode = SM_PRIVATE; } - submap_info->ref_count = extended.ref_count; } else { if (curr_entry->use_pmap) { - submap_info->share_mode = SM_TRUESHARED; + extended.share_mode = SM_TRUESHARED; } else { - submap_info->share_mode = SM_PRIVATE; + extended.share_mode = SM_PRIVATE; } - submap_info->ref_count = + extended.ref_count = curr_entry->object.sub_map->ref_count; } } - submap_info->pages_resident = extended.pages_resident; - submap_info->pages_swapped_out = extended.pages_swapped_out; - submap_info->pages_shared_now_private = - extended.pages_shared_now_private; - submap_info->pages_dirtied = extended.pages_dirtied; - submap_info->external_pager = extended.external_pager; - submap_info->shadow_depth = extended.shadow_depth; + if (look_for_pages) { + submap_info->pages_resident = extended.pages_resident; + submap_info->pages_swapped_out = extended.pages_swapped_out; + submap_info->pages_shared_now_private = + extended.pages_shared_now_private; + submap_info->pages_dirtied = extended.pages_dirtied; + submap_info->external_pager = extended.external_pager; + submap_info->shadow_depth = extended.shadow_depth; + submap_info->share_mode = extended.share_mode; + submap_info->ref_count = extended.ref_count; + } else { + short_info->external_pager = extended.external_pager; + short_info->shadow_depth = extended.shadow_depth; + short_info->share_mode = extended.share_mode; + short_info->ref_count = extended.ref_count; + } if (not_in_kdp) { vm_map_unlock_read(curr_map); @@ -7875,253 +9129,276 @@ vm_map_region( switch (flavor) { case VM_REGION_BASIC_INFO: - /* legacy for old 32-bit objects info */ + /* legacy for old 32-bit objects info */ { - vm_region_basic_info_t basic; + vm_region_basic_info_t basic; - if (*count < VM_REGION_BASIC_INFO_COUNT) - return(KERN_INVALID_ARGUMENT); + if (*count < VM_REGION_BASIC_INFO_COUNT) + return(KERN_INVALID_ARGUMENT); - basic = (vm_region_basic_info_t) info; - *count = VM_REGION_BASIC_INFO_COUNT; + basic = (vm_region_basic_info_t) info; + *count = VM_REGION_BASIC_INFO_COUNT; - vm_map_lock_read(map); + vm_map_lock_read(map); - start = *address; - if (!vm_map_lookup_entry(map, start, &tmp_entry)) { - if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { - vm_map_unlock_read(map); - return(KERN_INVALID_ADDRESS); + start = *address; + if (!vm_map_lookup_entry(map, start, &tmp_entry)) { + if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { + vm_map_unlock_read(map); + return(KERN_INVALID_ADDRESS); + } + } else { + entry = tmp_entry; } - } else { - entry = tmp_entry; - } - start = entry->vme_start; + start = entry->vme_start; - basic->offset = (uint32_t)entry->offset; - basic->protection = entry->protection; - basic->inheritance = entry->inheritance; - basic->max_protection = entry->max_protection; - basic->behavior = entry->behavior; - basic->user_wired_count = entry->user_wired_count; - basic->reserved = entry->is_sub_map; - *address = start; - *size = (entry->vme_end - start); + basic->offset = (uint32_t)entry->offset; + basic->protection = entry->protection; + basic->inheritance = entry->inheritance; + basic->max_protection = entry->max_protection; + basic->behavior = entry->behavior; + basic->user_wired_count = entry->user_wired_count; + basic->reserved = entry->is_sub_map; + *address = start; + *size = (entry->vme_end - start); - if (object_name) *object_name = IP_NULL; - if (entry->is_sub_map) { - basic->shared = FALSE; - } else { - basic->shared = entry->is_shared; - } + if (object_name) *object_name = IP_NULL; + if (entry->is_sub_map) { + basic->shared = FALSE; + } else { + basic->shared = entry->is_shared; + } - vm_map_unlock_read(map); - return(KERN_SUCCESS); + vm_map_unlock_read(map); + return(KERN_SUCCESS); } case VM_REGION_BASIC_INFO_64: { - vm_region_basic_info_64_t basic; + vm_region_basic_info_64_t basic; - if (*count < VM_REGION_BASIC_INFO_COUNT_64) - return(KERN_INVALID_ARGUMENT); + if (*count < VM_REGION_BASIC_INFO_COUNT_64) + return(KERN_INVALID_ARGUMENT); - basic = (vm_region_basic_info_64_t) info; - *count = VM_REGION_BASIC_INFO_COUNT_64; + basic = (vm_region_basic_info_64_t) info; + *count = VM_REGION_BASIC_INFO_COUNT_64; - vm_map_lock_read(map); + vm_map_lock_read(map); - start = *address; - if (!vm_map_lookup_entry(map, start, &tmp_entry)) { - if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { - vm_map_unlock_read(map); - return(KERN_INVALID_ADDRESS); + start = *address; + if (!vm_map_lookup_entry(map, start, &tmp_entry)) { + if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { + vm_map_unlock_read(map); + return(KERN_INVALID_ADDRESS); + } + } else { + entry = tmp_entry; } - } else { - entry = tmp_entry; - } - - start = entry->vme_start; - - basic->offset = entry->offset; - basic->protection = entry->protection; - basic->inheritance = entry->inheritance; - basic->max_protection = entry->max_protection; - basic->behavior = entry->behavior; - basic->user_wired_count = entry->user_wired_count; - basic->reserved = entry->is_sub_map; - *address = start; - *size = (entry->vme_end - start); - - if (object_name) *object_name = IP_NULL; - if (entry->is_sub_map) { - basic->shared = FALSE; - } else { - basic->shared = entry->is_shared; - } - - vm_map_unlock_read(map); - return(KERN_SUCCESS); + + start = entry->vme_start; + + basic->offset = entry->offset; + basic->protection = entry->protection; + basic->inheritance = entry->inheritance; + basic->max_protection = entry->max_protection; + basic->behavior = entry->behavior; + basic->user_wired_count = entry->user_wired_count; + basic->reserved = entry->is_sub_map; + *address = start; + *size = (entry->vme_end - start); + + if (object_name) *object_name = IP_NULL; + if (entry->is_sub_map) { + basic->shared = FALSE; + } else { + basic->shared = entry->is_shared; + } + + vm_map_unlock_read(map); + return(KERN_SUCCESS); } case VM_REGION_EXTENDED_INFO: { - vm_region_extended_info_t extended; + vm_region_extended_info_t extended; - if (*count < VM_REGION_EXTENDED_INFO_COUNT) - return(KERN_INVALID_ARGUMENT); + if (*count < VM_REGION_EXTENDED_INFO_COUNT) + return(KERN_INVALID_ARGUMENT); - extended = (vm_region_extended_info_t) info; - *count = VM_REGION_EXTENDED_INFO_COUNT; + extended = (vm_region_extended_info_t) info; + *count = VM_REGION_EXTENDED_INFO_COUNT; - vm_map_lock_read(map); + vm_map_lock_read(map); - start = *address; - if (!vm_map_lookup_entry(map, start, &tmp_entry)) { - if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { - vm_map_unlock_read(map); - return(KERN_INVALID_ADDRESS); + start = *address; + if (!vm_map_lookup_entry(map, start, &tmp_entry)) { + if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { + vm_map_unlock_read(map); + return(KERN_INVALID_ADDRESS); + } + } else { + entry = tmp_entry; } - } else { - entry = tmp_entry; - } - start = entry->vme_start; + start = entry->vme_start; - extended->protection = entry->protection; - extended->user_tag = entry->alias; - extended->pages_resident = 0; - extended->pages_swapped_out = 0; - extended->pages_shared_now_private = 0; - extended->pages_dirtied = 0; - extended->external_pager = 0; - extended->shadow_depth = 0; + extended->protection = entry->protection; + extended->user_tag = entry->alias; + extended->pages_resident = 0; + extended->pages_swapped_out = 0; + extended->pages_shared_now_private = 0; + extended->pages_dirtied = 0; + extended->external_pager = 0; + extended->shadow_depth = 0; - vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended); + vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE); - if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) - extended->share_mode = SM_PRIVATE; + if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) + extended->share_mode = SM_PRIVATE; - if (object_name) - *object_name = IP_NULL; - *address = start; - *size = (entry->vme_end - start); + if (object_name) + *object_name = IP_NULL; + *address = start; + *size = (entry->vme_end - start); - vm_map_unlock_read(map); - return(KERN_SUCCESS); + vm_map_unlock_read(map); + return(KERN_SUCCESS); } case VM_REGION_TOP_INFO: { - vm_region_top_info_t top; - - if (*count < VM_REGION_TOP_INFO_COUNT) - return(KERN_INVALID_ARGUMENT); + vm_region_top_info_t top; - top = (vm_region_top_info_t) info; - *count = VM_REGION_TOP_INFO_COUNT; + if (*count < VM_REGION_TOP_INFO_COUNT) + return(KERN_INVALID_ARGUMENT); - vm_map_lock_read(map); - - start = *address; - if (!vm_map_lookup_entry(map, start, &tmp_entry)) { - if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { - vm_map_unlock_read(map); - return(KERN_INVALID_ADDRESS); - } - } else { - entry = tmp_entry; + top = (vm_region_top_info_t) info; + *count = VM_REGION_TOP_INFO_COUNT; - } - start = entry->vme_start; + vm_map_lock_read(map); - top->private_pages_resident = 0; - top->shared_pages_resident = 0; + start = *address; + if (!vm_map_lookup_entry(map, start, &tmp_entry)) { + if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { + vm_map_unlock_read(map); + return(KERN_INVALID_ADDRESS); + } + } else { + entry = tmp_entry; - vm_map_region_top_walk(entry, top); + } + start = entry->vme_start; - if (object_name) - *object_name = IP_NULL; - *address = start; - *size = (entry->vme_end - start); + top->private_pages_resident = 0; + top->shared_pages_resident = 0; - vm_map_unlock_read(map); - return(KERN_SUCCESS); + vm_map_region_top_walk(entry, top); + + if (object_name) + *object_name = IP_NULL; + *address = start; + *size = (entry->vme_end - start); + + vm_map_unlock_read(map); + return(KERN_SUCCESS); } default: - return(KERN_INVALID_ARGUMENT); + return(KERN_INVALID_ARGUMENT); } } +#define OBJ_RESIDENT_COUNT(obj, entry_size) \ + MIN((entry_size), \ + ((obj)->all_reusable ? \ + (obj)->wired_page_count : \ + (obj)->resident_page_count - (obj)->reusable_page_count)) + void vm_map_region_top_walk( vm_map_entry_t entry, vm_region_top_info_t top) { - register struct vm_object *obj, *tmp_obj; - register int ref_count; if (entry->object.vm_object == 0 || entry->is_sub_map) { - top->share_mode = SM_EMPTY; - top->ref_count = 0; - top->obj_id = 0; - return; + top->share_mode = SM_EMPTY; + top->ref_count = 0; + top->obj_id = 0; + return; } + { - obj = entry->object.vm_object; + struct vm_object *obj, *tmp_obj; + int ref_count; + uint32_t entry_size; - vm_object_lock(obj); + entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64); - if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) - ref_count--; + obj = entry->object.vm_object; - if (obj->shadow) { - if (ref_count == 1) - top->private_pages_resident = obj->resident_page_count; - else - top->shared_pages_resident = obj->resident_page_count; - top->ref_count = ref_count; - top->share_mode = SM_COW; - - while ((tmp_obj = obj->shadow)) { - vm_object_lock(tmp_obj); - vm_object_unlock(obj); - obj = tmp_obj; + vm_object_lock(obj); - if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) - ref_count--; + if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) + ref_count--; - top->shared_pages_resident += obj->resident_page_count; - top->ref_count += ref_count - 1; - } - } else { - if (entry->needs_copy) { - top->share_mode = SM_COW; - top->shared_pages_resident = obj->resident_page_count; + assert(obj->reusable_page_count <= obj->resident_page_count); + if (obj->shadow) { + if (ref_count == 1) + top->private_pages_resident = + OBJ_RESIDENT_COUNT(obj, entry_size); + else + top->shared_pages_resident = + OBJ_RESIDENT_COUNT(obj, entry_size); + top->ref_count = ref_count; + top->share_mode = SM_COW; + + while ((tmp_obj = obj->shadow)) { + vm_object_lock(tmp_obj); + vm_object_unlock(obj); + obj = tmp_obj; + + if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) + ref_count--; + + assert(obj->reusable_page_count <= obj->resident_page_count); + top->shared_pages_resident += + OBJ_RESIDENT_COUNT(obj, entry_size); + top->ref_count += ref_count - 1; + } } else { - if (ref_count == 1 || - (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) { - top->share_mode = SM_PRIVATE; - top->private_pages_resident = obj->resident_page_count; - } else { - top->share_mode = SM_SHARED; - top->shared_pages_resident = obj->resident_page_count; - } + if (entry->needs_copy) { + top->share_mode = SM_COW; + top->shared_pages_resident = + OBJ_RESIDENT_COUNT(obj, entry_size); + } else { + if (ref_count == 1 || + (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) { + top->share_mode = SM_PRIVATE; + top->private_pages_resident = + OBJ_RESIDENT_COUNT(obj, + entry_size); + } else { + top->share_mode = SM_SHARED; + top->shared_pages_resident = + OBJ_RESIDENT_COUNT(obj, + entry_size); + } + } + top->ref_count = ref_count; } - top->ref_count = ref_count; - } - top->obj_id = (int)obj; + /* XXX K64: obj_id will be truncated */ + top->obj_id = (unsigned int) (uintptr_t)obj; - vm_object_unlock(obj); + vm_object_unlock(obj); } } void vm_map_region_walk( vm_map_t map, - vm_map_offset_t va, - vm_map_entry_t entry, + vm_map_offset_t va, + vm_map_entry_t entry, vm_object_offset_t offset, vm_object_size_t range, - vm_region_extended_info_t extended) + vm_region_extended_info_t extended, + boolean_t look_for_pages) { register struct vm_object *obj, *tmp_obj; register vm_map_offset_t last_offset; @@ -8131,89 +9408,103 @@ vm_map_region_walk( int shadow_depth; if ((entry->object.vm_object == 0) || - (entry->is_sub_map) || - (entry->object.vm_object->phys_contiguous)) { - extended->share_mode = SM_EMPTY; - extended->ref_count = 0; - return; + (entry->is_sub_map) || + (entry->object.vm_object->phys_contiguous)) { + extended->share_mode = SM_EMPTY; + extended->ref_count = 0; + return; } { - obj = entry->object.vm_object; - - vm_object_lock(obj); - - if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) - ref_count--; - - for (last_offset = offset + range; offset < last_offset; offset += PAGE_SIZE_64, va += PAGE_SIZE) - vm_map_region_look_for_page(map, va, obj, offset, ref_count, 0, extended); - - shadow_object = obj->shadow; - shadow_depth = 0; - if (shadow_object != VM_OBJECT_NULL) { - vm_object_lock(shadow_object); - for (; - shadow_object != VM_OBJECT_NULL; - shadow_depth++) { - vm_object_t next_shadow; - - next_shadow = shadow_object->shadow; - if (next_shadow) { - vm_object_lock(next_shadow); - } - vm_object_unlock(shadow_object); - shadow_object = next_shadow; - } - } - extended->shadow_depth = shadow_depth; - - if (extended->shadow_depth || entry->needs_copy) - extended->share_mode = SM_COW; - else { - if (ref_count == 1) - extended->share_mode = SM_PRIVATE; + obj = entry->object.vm_object; + + vm_object_lock(obj); + + if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) + ref_count--; + + if (look_for_pages) { + for (last_offset = offset + range; + offset < last_offset; + offset += PAGE_SIZE_64, va += PAGE_SIZE) + vm_map_region_look_for_page(map, va, obj, + offset, ref_count, + 0, extended); + } else { + shadow_object = obj->shadow; + shadow_depth = 0; + + if ( !(obj->pager_trusted) && !(obj->internal)) + extended->external_pager = 1; + + if (shadow_object != VM_OBJECT_NULL) { + vm_object_lock(shadow_object); + for (; + shadow_object != VM_OBJECT_NULL; + shadow_depth++) { + vm_object_t next_shadow; + + if ( !(shadow_object->pager_trusted) && + !(shadow_object->internal)) + extended->external_pager = 1; + + next_shadow = shadow_object->shadow; + if (next_shadow) { + vm_object_lock(next_shadow); + } + vm_object_unlock(shadow_object); + shadow_object = next_shadow; + } + } + extended->shadow_depth = shadow_depth; + } + + if (extended->shadow_depth || entry->needs_copy) + extended->share_mode = SM_COW; else { - if (obj->true_share) - extended->share_mode = SM_TRUESHARED; - else - extended->share_mode = SM_SHARED; + if (ref_count == 1) + extended->share_mode = SM_PRIVATE; + else { + if (obj->true_share) + extended->share_mode = SM_TRUESHARED; + else + extended->share_mode = SM_SHARED; + } } - } - extended->ref_count = ref_count - extended->shadow_depth; + extended->ref_count = ref_count - extended->shadow_depth; - for (i = 0; i < extended->shadow_depth; i++) { - if ((tmp_obj = obj->shadow) == 0) - break; - vm_object_lock(tmp_obj); - vm_object_unlock(obj); + for (i = 0; i < extended->shadow_depth; i++) { + if ((tmp_obj = obj->shadow) == 0) + break; + vm_object_lock(tmp_obj); + vm_object_unlock(obj); - if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) - ref_count--; + if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) + ref_count--; - extended->ref_count += ref_count; - obj = tmp_obj; - } - vm_object_unlock(obj); + extended->ref_count += ref_count; + obj = tmp_obj; + } + vm_object_unlock(obj); - if (extended->share_mode == SM_SHARED) { - register vm_map_entry_t cur; - register vm_map_entry_t last; - int my_refs; + if (extended->share_mode == SM_SHARED) { + register vm_map_entry_t cur; + register vm_map_entry_t last; + int my_refs; - obj = entry->object.vm_object; - last = vm_map_to_entry(map); - my_refs = 0; + obj = entry->object.vm_object; + last = vm_map_to_entry(map); + my_refs = 0; - if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) - ref_count--; - for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) - my_refs += vm_map_region_count_obj_refs(cur, obj); + if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) + ref_count--; + for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) + my_refs += vm_map_region_count_obj_refs(cur, obj); - if (my_refs == ref_count) - extended->share_mode = SM_PRIVATE_ALIASED; - else if (my_refs > 1) - extended->share_mode = SM_SHARED_ALIASED; - } + if (my_refs == ref_count) + extended->share_mode = SM_PRIVATE_ALIASED; + else if (my_refs > 1) + extended->share_mode = SM_SHARED_ALIASED; + } } } @@ -8224,18 +9515,20 @@ vm_map_region_walk( static void vm_map_region_look_for_page( __unused vm_map_t map, - __unused vm_map_offset_t va, - vm_object_t object, - vm_object_offset_t offset, + __unused vm_map_offset_t va, + vm_object_t object, + vm_object_offset_t offset, int max_refcnt, int depth, vm_region_extended_info_t extended) { - register vm_page_t p; - register vm_object_t shadow; - register int ref_count; - vm_object_t caller_object; - + register vm_page_t p; + register vm_object_t shadow; + register int ref_count; + vm_object_t caller_object; +#if MACH_PAGEMAP + kern_return_t kr; +#endif shadow = object->shadow; caller_object = object; @@ -8243,7 +9536,7 @@ vm_map_region_look_for_page( while (TRUE) { if ( !(object->pager_trusted) && !(object->internal)) - extended->external_pager = 1; + extended->external_pager = 1; if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { if (shadow && (max_refcnt == 1)) @@ -8256,23 +9549,54 @@ vm_map_region_look_for_page( extended->pages_resident++; if(object != caller_object) - vm_object_unlock(object); + vm_object_unlock(object); return; } +#if MACH_PAGEMAP if (object->existence_map) { if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) { extended->pages_swapped_out++; if(object != caller_object) - vm_object_unlock(object); + vm_object_unlock(object); return; } + } else if (object->internal && + object->alive && + !object->terminating && + object->pager_ready) { + + memory_object_t pager; + + vm_object_paging_begin(object); + pager = object->pager; + vm_object_unlock(object); + + kr = memory_object_data_request( + pager, + offset + object->paging_offset, + 0, /* just poke the pager */ + VM_PROT_READ, + NULL); + + vm_object_lock(object); + vm_object_paging_end(object); + + if (kr == KERN_SUCCESS) { + /* the pager has that page */ + extended->pages_swapped_out++; + if (object != caller_object) + vm_object_unlock(object); + return; + } } +#endif /* MACH_PAGEMAP */ + if (shadow) { - vm_object_lock(shadow); + vm_object_lock(shadow); if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) ref_count--; @@ -8284,7 +9608,7 @@ vm_map_region_look_for_page( max_refcnt = ref_count; if(object != caller_object) - vm_object_unlock(object); + vm_object_unlock(object); offset = offset + object->shadow_offset; object = shadow; @@ -8292,7 +9616,7 @@ vm_map_region_look_for_page( continue; } if(object != caller_object) - vm_object_unlock(object); + vm_object_unlock(object); break; } } @@ -8307,26 +9631,26 @@ vm_map_region_count_obj_refs( register vm_object_t tmp_obj; if (entry->object.vm_object == 0) - return(0); + return(0); if (entry->is_sub_map) - return(0); + return(0); else { - ref_count = 0; + ref_count = 0; - chk_obj = entry->object.vm_object; - vm_object_lock(chk_obj); + chk_obj = entry->object.vm_object; + vm_object_lock(chk_obj); - while (chk_obj) { - if (chk_obj == object) - ref_count++; - tmp_obj = chk_obj->shadow; - if (tmp_obj) - vm_object_lock(tmp_obj); - vm_object_unlock(chk_obj); + while (chk_obj) { + if (chk_obj == object) + ref_count++; + tmp_obj = chk_obj->shadow; + if (tmp_obj) + vm_object_lock(tmp_obj); + vm_object_unlock(chk_obj); - chk_obj = tmp_obj; - } + chk_obj = tmp_obj; + } } return(ref_count); } @@ -8357,12 +9681,11 @@ vm_map_simplify_entry( prev_entry = this_entry->vme_prev; if ((this_entry != vm_map_to_entry(map)) && - (prev_entry != vm_map_to_entry(map)) && + (prev_entry != vm_map_to_entry(map)) && (prev_entry->vme_end == this_entry->vme_start) && - (prev_entry->is_sub_map == FALSE) && - (this_entry->is_sub_map == FALSE) && + (prev_entry->is_sub_map == this_entry->is_sub_map) && (prev_entry->object.vm_object == this_entry->object.vm_object) && ((prev_entry->offset + (prev_entry->vme_end - @@ -8374,10 +9697,13 @@ vm_map_simplify_entry( (prev_entry->max_protection == this_entry->max_protection) && (prev_entry->behavior == this_entry->behavior) && (prev_entry->alias == this_entry->alias) && + (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) && + (prev_entry->no_cache == this_entry->no_cache) && (prev_entry->wired_count == this_entry->wired_count) && (prev_entry->user_wired_count == this_entry->user_wired_count) && (prev_entry->needs_copy == this_entry->needs_copy) && + (prev_entry->permanent == this_entry->permanent) && (prev_entry->use_pmap == FALSE) && (this_entry->use_pmap == FALSE) && @@ -8387,11 +9713,15 @@ vm_map_simplify_entry( (this_entry->needs_wakeup == FALSE) && (prev_entry->is_shared == FALSE) && (this_entry->is_shared == FALSE) - ) { + ) { _vm_map_entry_unlink(&map->hdr, prev_entry); this_entry->vme_start = prev_entry->vme_start; this_entry->offset = prev_entry->offset; - vm_object_deallocate(prev_entry->object.vm_object); + if (prev_entry->is_sub_map) { + vm_map_deallocate(prev_entry->object.sub_map); + } else { + vm_object_deallocate(prev_entry->object.vm_object); + } vm_map_entry_dispose(map, prev_entry); SAVE_HINT_MAP_WRITE(map, this_entry); counter(c_vm_map_simplified++); @@ -8431,6 +9761,9 @@ vm_map_simplify_range( return; } + start = vm_map_trunc_page(start); + end = vm_map_round_page(end); + if (!vm_map_lookup_entry(map, start, &entry)) { /* "start" is not mapped and "entry" ends before "start" */ if (entry == vm_map_to_entry(map)) { @@ -8504,14 +9837,14 @@ vm_map_machine_attribute( sync_size = 0; } else { sub_size = entry->vme_end - start; - sync_size -= sub_size; + sync_size -= sub_size; } if(entry->is_sub_map) { vm_map_offset_t sub_start; vm_map_offset_t sub_end; sub_start = (start - entry->vme_start) - + entry->offset; + + entry->offset; sub_end = sub_start + sub_size; vm_map_machine_attribute( entry->object.sub_map, @@ -8529,7 +9862,7 @@ vm_map_machine_attribute( vm_map_size_t range; range = sub_size; offset = (start - entry->vme_start) - + entry->offset; + + entry->offset; base_offset = offset; object = entry->object.vm_object; base_object = object; @@ -8543,10 +9876,10 @@ vm_map_machine_attribute( if (m && !m->fictitious) { ret = - pmap_attribute_cache_sync( - m->phys_page, - PAGE_SIZE, - attribute, value); + pmap_attribute_cache_sync( + m->phys_page, + PAGE_SIZE, + attribute, value); } else if (object->shadow) { offset = offset + object->shadow_offset; @@ -8583,73 +9916,530 @@ vm_map_machine_attribute( return ret; } -/* - * vm_map_behavior_set: - * - * Sets the paging reference behavior of the specified address - * range in the target map. Paging reference behavior affects - * how pagein operations resulting from faults on the map will be - * clustered. - */ -kern_return_t -vm_map_behavior_set( +/* + * vm_map_behavior_set: + * + * Sets the paging reference behavior of the specified address + * range in the target map. Paging reference behavior affects + * how pagein operations resulting from faults on the map will be + * clustered. + */ +kern_return_t +vm_map_behavior_set( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end, + vm_behavior_t new_behavior) +{ + register vm_map_entry_t entry; + vm_map_entry_t temp_entry; + + XPR(XPR_VM_MAP, + "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d", + map, start, end, new_behavior, 0); + + switch (new_behavior) { + + /* + * This first block of behaviors all set a persistent state on the specified + * memory range. All we have to do here is to record the desired behavior + * in the vm_map_entry_t's. + */ + + case VM_BEHAVIOR_DEFAULT: + case VM_BEHAVIOR_RANDOM: + case VM_BEHAVIOR_SEQUENTIAL: + case VM_BEHAVIOR_RSEQNTL: + case VM_BEHAVIOR_ZERO_WIRED_PAGES: + vm_map_lock(map); + + /* + * The entire address range must be valid for the map. + * Note that vm_map_range_check() does a + * vm_map_lookup_entry() internally and returns the + * entry containing the start of the address range if + * the entire range is valid. + */ + if (vm_map_range_check(map, start, end, &temp_entry)) { + entry = temp_entry; + vm_map_clip_start(map, entry, start); + } + else { + vm_map_unlock(map); + return(KERN_INVALID_ADDRESS); + } + + while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { + vm_map_clip_end(map, entry, end); + assert(!entry->use_pmap); + + if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) { + entry->zero_wired_pages = TRUE; + } else { + entry->behavior = new_behavior; + } + entry = entry->vme_next; + } + + vm_map_unlock(map); + break; + + /* + * The rest of these are different from the above in that they cause + * an immediate action to take place as opposed to setting a behavior that + * affects future actions. + */ + + case VM_BEHAVIOR_WILLNEED: + return vm_map_willneed(map, start, end); + + case VM_BEHAVIOR_DONTNEED: + return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS); + + case VM_BEHAVIOR_FREE: + return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS); + + case VM_BEHAVIOR_REUSABLE: + return vm_map_reusable_pages(map, start, end); + + case VM_BEHAVIOR_REUSE: + return vm_map_reuse_pages(map, start, end); + + case VM_BEHAVIOR_CAN_REUSE: + return vm_map_can_reuse(map, start, end); + + default: + return(KERN_INVALID_ARGUMENT); + } + + return(KERN_SUCCESS); +} + + +/* + * Internals for madvise(MADV_WILLNEED) system call. + * + * The present implementation is to do a read-ahead if the mapping corresponds + * to a mapped regular file. If it's an anonymous mapping, then we do nothing + * and basically ignore the "advice" (which we are always free to do). + */ + + +static kern_return_t +vm_map_willneed( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end +) +{ + vm_map_entry_t entry; + vm_object_t object; + memory_object_t pager; + struct vm_object_fault_info fault_info; + kern_return_t kr; + vm_object_size_t len; + vm_object_offset_t offset; + + /* + * Fill in static values in fault_info. Several fields get ignored by the code + * we call, but we'll fill them in anyway since uninitialized fields are bad + * when it comes to future backwards compatibility. + */ + + fault_info.interruptible = THREAD_UNINT; /* ignored value */ + fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL; + fault_info.no_cache = FALSE; /* ignored value */ + fault_info.stealth = TRUE; + + /* + * The MADV_WILLNEED operation doesn't require any changes to the + * vm_map_entry_t's, so the read lock is sufficient. + */ + + vm_map_lock_read(map); + + /* + * The madvise semantics require that the address range be fully + * allocated with no holes. Otherwise, we're required to return + * an error. + */ + + if (vm_map_range_check(map, start, end, &entry)) { + + /* + * Examine each vm_map_entry_t in the range. + */ + + for (; entry->vme_start < end; start += len, entry = entry->vme_next) { + + /* + * The first time through, the start address could be anywhere within the + * vm_map_entry we found. So adjust the offset to correspond. After that, + * the offset will always be zero to correspond to the beginning of the current + * vm_map_entry. + */ + + offset = (start - entry->vme_start) + entry->offset; + + /* + * Set the length so we don't go beyond the end of the map_entry or beyond the + * end of the range we were given. This range could span also multiple map + * entries all of which map different files, so make sure we only do the right + * amount of I/O for each object. Note that it's possible for there to be + * multiple map entries all referring to the same object but with different + * page permissions, but it's not worth trying to optimize that case. + */ + + len = MIN(entry->vme_end - start, end - start); + + if ((vm_size_t) len != len) { + /* 32-bit overflow */ + len = (vm_size_t) (0 - PAGE_SIZE); + } + fault_info.cluster_size = (vm_size_t) len; + fault_info.lo_offset = offset; + fault_info.hi_offset = offset + len; + fault_info.user_tag = entry->alias; + + /* + * If there's no read permission to this mapping, then just skip it. + */ + + if ((entry->protection & VM_PROT_READ) == 0) { + continue; + } + + /* + * Find the file object backing this map entry. If there is none, + * then we simply ignore the "will need" advice for this entry and + * go on to the next one. + */ + + if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) { + continue; + } + + vm_object_paging_begin(object); + pager = object->pager; + vm_object_unlock(object); + + /* + * Get the data from the object asynchronously. + * + * Note that memory_object_data_request() places limits on the amount + * of I/O it will do. Regardless of the len we specified, it won't do + * more than MAX_UPL_TRANSFER and it silently truncates the len to that + * size. This isn't necessarily bad since madvise shouldn't really be + * used to page in unlimited amounts of data. Other Unix variants limit + * the willneed case as well. If this turns out to be an issue for + * developers, then we can always adjust the policy here and still be + * backwards compatible since this is all just "advice". + */ + + kr = memory_object_data_request( + pager, + offset + object->paging_offset, + 0, /* ignored */ + VM_PROT_READ, + (memory_object_fault_info_t)&fault_info); + + vm_object_lock(object); + vm_object_paging_end(object); + vm_object_unlock(object); + + /* + * If we couldn't do the I/O for some reason, just give up on the + * madvise. We still return success to the user since madvise isn't + * supposed to fail when the advice can't be taken. + */ + + if (kr != KERN_SUCCESS) { + break; + } + } + + kr = KERN_SUCCESS; + } else + kr = KERN_INVALID_ADDRESS; + + vm_map_unlock_read(map); + return kr; +} + +static boolean_t +vm_map_entry_is_reusable( + vm_map_entry_t entry) +{ + vm_object_t object; + + if (entry->is_shared || + entry->is_sub_map || + entry->in_transition || + entry->protection != VM_PROT_DEFAULT || + entry->max_protection != VM_PROT_ALL || + entry->inheritance != VM_INHERIT_DEFAULT || + entry->no_cache || + entry->permanent || + entry->superpage_size != 0 || + entry->zero_wired_pages || + entry->wired_count != 0 || + entry->user_wired_count != 0) { + return FALSE; + } + + object = entry->object.vm_object; + if (object == VM_OBJECT_NULL) { + return TRUE; + } + if (object->ref_count == 1 && + object->wired_page_count == 0 && + object->copy == VM_OBJECT_NULL && + object->shadow == VM_OBJECT_NULL && + object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC && + object->internal && + !object->true_share && + object->wimg_bits == VM_WIMG_DEFAULT && + !object->code_signed) { + return TRUE; + } + return FALSE; + + +} + +static kern_return_t +vm_map_reuse_pages( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end) +{ + vm_map_entry_t entry; + vm_object_t object; + vm_object_offset_t start_offset, end_offset; + + /* + * The MADV_REUSE operation doesn't require any changes to the + * vm_map_entry_t's, so the read lock is sufficient. + */ + + vm_map_lock_read(map); + + /* + * The madvise semantics require that the address range be fully + * allocated with no holes. Otherwise, we're required to return + * an error. + */ + + if (!vm_map_range_check(map, start, end, &entry)) { + vm_map_unlock_read(map); + vm_page_stats_reusable.reuse_pages_failure++; + return KERN_INVALID_ADDRESS; + } + + /* + * Examine each vm_map_entry_t in the range. + */ + for (; entry != vm_map_to_entry(map) && entry->vme_start < end; + entry = entry->vme_next) { + /* + * Sanity check on the VM map entry. + */ + if (! vm_map_entry_is_reusable(entry)) { + vm_map_unlock_read(map); + vm_page_stats_reusable.reuse_pages_failure++; + return KERN_INVALID_ADDRESS; + } + + /* + * The first time through, the start address could be anywhere + * within the vm_map_entry we found. So adjust the offset to + * correspond. + */ + if (entry->vme_start < start) { + start_offset = start - entry->vme_start; + } else { + start_offset = 0; + } + end_offset = MIN(end, entry->vme_end) - entry->vme_start; + start_offset += entry->offset; + end_offset += entry->offset; + + object = entry->object.vm_object; + if (object != VM_OBJECT_NULL) { + vm_object_lock(object); + vm_object_reuse_pages(object, start_offset, end_offset, + TRUE); + vm_object_unlock(object); + } + + if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) { + /* + * XXX + * We do not hold the VM map exclusively here. + * The "alias" field is not that critical, so it's + * safe to update it here, as long as it is the only + * one that can be modified while holding the VM map + * "shared". + */ + entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED; + } + } + + vm_map_unlock_read(map); + vm_page_stats_reusable.reuse_pages_success++; + return KERN_SUCCESS; +} + + +static kern_return_t +vm_map_reusable_pages( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end) +{ + vm_map_entry_t entry; + vm_object_t object; + vm_object_offset_t start_offset, end_offset; + + /* + * The MADV_REUSABLE operation doesn't require any changes to the + * vm_map_entry_t's, so the read lock is sufficient. + */ + + vm_map_lock_read(map); + + /* + * The madvise semantics require that the address range be fully + * allocated with no holes. Otherwise, we're required to return + * an error. + */ + + if (!vm_map_range_check(map, start, end, &entry)) { + vm_map_unlock_read(map); + vm_page_stats_reusable.reusable_pages_failure++; + return KERN_INVALID_ADDRESS; + } + + /* + * Examine each vm_map_entry_t in the range. + */ + for (; entry != vm_map_to_entry(map) && entry->vme_start < end; + entry = entry->vme_next) { + int kill_pages = 0; + + /* + * Sanity check on the VM map entry. + */ + if (! vm_map_entry_is_reusable(entry)) { + vm_map_unlock_read(map); + vm_page_stats_reusable.reusable_pages_failure++; + return KERN_INVALID_ADDRESS; + } + + /* + * The first time through, the start address could be anywhere + * within the vm_map_entry we found. So adjust the offset to + * correspond. + */ + if (entry->vme_start < start) { + start_offset = start - entry->vme_start; + } else { + start_offset = 0; + } + end_offset = MIN(end, entry->vme_end) - entry->vme_start; + start_offset += entry->offset; + end_offset += entry->offset; + + object = entry->object.vm_object; + if (object == VM_OBJECT_NULL) + continue; + + + vm_object_lock(object); + if (object->ref_count == 1 && !object->shadow) + kill_pages = 1; + else + kill_pages = -1; + if (kill_pages != -1) { + vm_object_deactivate_pages(object, + start_offset, + end_offset - start_offset, + kill_pages, + TRUE /*reusable_pages*/); + } else { + vm_page_stats_reusable.reusable_pages_shared++; + } + vm_object_unlock(object); + + if (entry->alias == VM_MEMORY_MALLOC_LARGE || + entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) { + /* + * XXX + * We do not hold the VM map exclusively here. + * The "alias" field is not that critical, so it's + * safe to update it here, as long as it is the only + * one that can be modified while holding the VM map + * "shared". + */ + entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE; + } + } + + vm_map_unlock_read(map); + vm_page_stats_reusable.reusable_pages_success++; + return KERN_SUCCESS; +} + + +static kern_return_t +vm_map_can_reuse( vm_map_t map, vm_map_offset_t start, - vm_map_offset_t end, - vm_behavior_t new_behavior) + vm_map_offset_t end) { - register vm_map_entry_t entry; - vm_map_entry_t temp_entry; - - XPR(XPR_VM_MAP, - "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d", - (integer_t)map, start, end, new_behavior, 0); - - switch (new_behavior) { - case VM_BEHAVIOR_DEFAULT: - case VM_BEHAVIOR_RANDOM: - case VM_BEHAVIOR_SEQUENTIAL: - case VM_BEHAVIOR_RSEQNTL: - break; - case VM_BEHAVIOR_WILLNEED: - case VM_BEHAVIOR_DONTNEED: - new_behavior = VM_BEHAVIOR_DEFAULT; - break; - default: - return(KERN_INVALID_ARGUMENT); - } - - vm_map_lock(map); + vm_map_entry_t entry; /* - * The entire address range must be valid for the map. - * Note that vm_map_range_check() does a - * vm_map_lookup_entry() internally and returns the - * entry containing the start of the address range if - * the entire range is valid. + * The MADV_REUSABLE operation doesn't require any changes to the + * vm_map_entry_t's, so the read lock is sufficient. */ - if (vm_map_range_check(map, start, end, &temp_entry)) { - entry = temp_entry; - vm_map_clip_start(map, entry, start); - } - else { - vm_map_unlock(map); - return(KERN_INVALID_ADDRESS); - } - while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { - vm_map_clip_end(map, entry, end); + vm_map_lock_read(map); - entry->behavior = new_behavior; + /* + * The madvise semantics require that the address range be fully + * allocated with no holes. Otherwise, we're required to return + * an error. + */ - entry = entry->vme_next; + if (!vm_map_range_check(map, start, end, &entry)) { + vm_map_unlock_read(map); + vm_page_stats_reusable.can_reuse_failure++; + return KERN_INVALID_ADDRESS; } - vm_map_unlock(map); - return(KERN_SUCCESS); + /* + * Examine each vm_map_entry_t in the range. + */ + for (; entry != vm_map_to_entry(map) && entry->vme_start < end; + entry = entry->vme_next) { + /* + * Sanity check on the VM map entry. + */ + if (! vm_map_entry_is_reusable(entry)) { + vm_map_unlock_read(map); + vm_page_stats_reusable.can_reuse_failure++; + return KERN_INVALID_ADDRESS; + } + } + + vm_map_unlock_read(map); + vm_page_stats_reusable.can_reuse_success++; + return KERN_SUCCESS; } + #include #if MACH_KDB #include @@ -8661,19 +10451,19 @@ vm_map_behavior_set( * Forward declarations for internal functions. */ extern void vm_map_links_print( - struct vm_map_links *links); + struct vm_map_links *links); extern void vm_map_header_print( - struct vm_map_header *header); + struct vm_map_header *header); extern void vm_map_entry_print( - vm_map_entry_t entry); + vm_map_entry_t entry); extern void vm_follow_entry( - vm_map_entry_t entry); + vm_map_entry_t entry); extern void vm_follow_map( - vm_map_t map); + vm_map_t map); /* * vm_map_links_print: [ debug ] @@ -8756,8 +10546,8 @@ vm_map_entry_print( if (entry->is_sub_map) { iprintf("submap = %08X - offset = %016llX\n", - entry->object.sub_map, - (unsigned long long)entry->offset); + entry->object.sub_map, + (unsigned long long)entry->offset); } else { iprintf("object = %08X offset = %016llX - ", entry->object.vm_object, @@ -8786,7 +10576,7 @@ vm_follow_map( for (entry = vm_map_first_entry(map); entry && entry != vm_map_to_entry(map); entry = entry->vme_next) { - vm_follow_entry(entry); + vm_follow_entry(entry); } db_indent -= 2; @@ -8828,13 +10618,13 @@ vm_map_print( #if TASK_SWAPPER switch (map->sw_state) { - case MAP_SW_IN: + case MAP_SW_IN: swstate = "SW_IN"; break; - case MAP_SW_OUT: + case MAP_SW_OUT: swstate = "SW_OUT"; break; - default: + default: swstate = "????"; break; } @@ -8872,19 +10662,19 @@ vm_map_copy_print( iprintf("type=%d", copy->type); switch (copy->type) { - case VM_MAP_COPY_ENTRY_LIST: + case VM_MAP_COPY_ENTRY_LIST: printf("[entry_list]"); break; - case VM_MAP_COPY_OBJECT: + case VM_MAP_COPY_OBJECT: printf("[object]"); break; - case VM_MAP_COPY_KERNEL_BUFFER: + case VM_MAP_COPY_KERNEL_BUFFER: printf("[kernel_buffer]"); break; - default: + default: printf("[bad type]"); break; } @@ -8892,7 +10682,7 @@ vm_map_copy_print( printf(", size=0x%x\n", copy->size); switch (copy->type) { - case VM_MAP_COPY_ENTRY_LIST: + case VM_MAP_COPY_ENTRY_LIST: vm_map_header_print(©->cpy_hdr); for (entry = vm_map_copy_first_entry(copy); entry && entry != vm_map_copy_to_entry(copy); @@ -8901,11 +10691,11 @@ vm_map_copy_print( } break; - case VM_MAP_COPY_OBJECT: + case VM_MAP_COPY_OBJECT: iprintf("object=0x%x\n", copy->cpy_object); break; - case VM_MAP_COPY_KERNEL_BUFFER: + case VM_MAP_COPY_KERNEL_BUFFER: iprintf("kernel buffer=0x%x", copy->cpy_kdata); printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size); break; @@ -8963,7 +10753,10 @@ vm_map_entry_insert( vm_prot_t max_protection, vm_behavior_t behavior, vm_inherit_t inheritance, - unsigned wired_count) + unsigned wired_count, + boolean_t no_cache, + boolean_t permanent, + unsigned int superpage_size) { vm_map_entry_t new_entry; @@ -8991,6 +10784,10 @@ vm_map_entry_insert( new_entry->user_wired_count = 0; new_entry->use_pmap = FALSE; new_entry->alias = 0; + new_entry->zero_wired_pages = FALSE; + new_entry->no_cache = no_cache; + new_entry->permanent = permanent; + new_entry->superpage_size = superpage_size; /* * Insert the new entry into the list. @@ -9088,17 +10885,12 @@ vm_map_remap_extract( break; } - if(src_entry->is_sub_map) { - result = KERN_INVALID_ADDRESS; - break; - } - tmp_size = size - mapped_size; if (src_end > src_entry->vme_end) tmp_size -= (src_end - src_entry->vme_end); entry_size = (vm_map_size_t)(src_entry->vme_end - - src_entry->vme_start); + src_entry->vme_start); if(src_entry->is_sub_map) { vm_map_reference(src_entry->object.sub_map); @@ -9120,7 +10912,7 @@ vm_map_remap_extract( assert(!src_entry->needs_copy); } else if (src_entry->needs_copy || object->shadowed || (object->internal && !object->true_share && - !src_entry->is_shared && + !src_entry->is_shared && object->size > entry_size)) { vm_object_shadow(&src_entry->object.vm_object, @@ -9132,23 +10924,23 @@ vm_map_remap_extract( vm_prot_t prot; prot = src_entry->protection & ~VM_PROT_WRITE; -#ifdef STACK_ONLY_NX - if (src_entry->alias != VM_MEMORY_STACK && prot) + + if (override_nx(map, src_entry->alias) && prot) prot |= VM_PROT_EXECUTE; -#endif + if(map->mapped) { - vm_object_pmap_protect( - src_entry->object.vm_object, - src_entry->offset, - entry_size, - PMAP_NULL, - src_entry->vme_start, - prot); - } else { - pmap_protect(vm_map_pmap(map), + vm_object_pmap_protect( + src_entry->object.vm_object, + src_entry->offset, + entry_size, + PMAP_NULL, src_entry->vme_start, - src_entry->vme_end, prot); + } else { + pmap_protect(vm_map_pmap(map), + src_entry->vme_start, + src_entry->vme_end, + prot); } } @@ -9158,10 +10950,9 @@ vm_map_remap_extract( vm_object_lock(object); - object->ref_count++; /* object ref. for new entry */ - VM_OBJ_RES_INCR(object); + vm_object_reference_locked(object); /* object ref. for new entry */ if (object->copy_strategy == - MEMORY_OBJECT_COPY_SYMMETRIC) { + MEMORY_OBJECT_COPY_SYMMETRIC) { object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; } @@ -9194,12 +10985,12 @@ vm_map_remap_extract( new_entry->needs_copy = TRUE; object = VM_OBJECT_NULL; } else if (src_entry->wired_count == 0 && - vm_object_copy_quickly(&new_entry->object.vm_object, - new_entry->offset, - (new_entry->vme_end - - new_entry->vme_start), - &src_needs_copy, - &new_entry_needs_copy)) { + vm_object_copy_quickly(&new_entry->object.vm_object, + new_entry->offset, + (new_entry->vme_end - + new_entry->vme_start), + &src_needs_copy, + &new_entry_needs_copy)) { new_entry->needs_copy = new_entry_needs_copy; new_entry->is_shared = FALSE; @@ -9211,15 +11002,15 @@ vm_map_remap_extract( vm_prot_t prot; prot = src_entry->protection & ~VM_PROT_WRITE; -#ifdef STACK_ONLY_NX - if (src_entry->alias != VM_MEMORY_STACK && prot) + + if (override_nx(map, src_entry->alias) && prot) prot |= VM_PROT_EXECUTE; -#endif + vm_object_pmap_protect(object, offset, entry_size, ((src_entry->is_shared - || map->mapped) ? + || map->mapped) ? PMAP_NULL : map->pmap), src_entry->vme_start, prot); @@ -9250,22 +11041,22 @@ vm_map_remap_extract( if (src_entry->wired_count > 0) { vm_object_lock(object); result = vm_object_copy_slowly( - object, - offset, - entry_size, - THREAD_UNINT, - &new_entry->object.vm_object); + object, + offset, + entry_size, + THREAD_UNINT, + &new_entry->object.vm_object); new_entry->offset = 0; new_entry->needs_copy = FALSE; } else { result = vm_object_copy_strategically( - object, - offset, - entry_size, - &new_entry->object.vm_object, - &new_entry->offset, - &new_entry_needs_copy); + object, + offset, + entry_size, + &new_entry->object.vm_object, + &new_entry->offset, + &new_entry_needs_copy); new_entry->needs_copy = new_entry_needs_copy; } @@ -9372,25 +11163,25 @@ vm_map_remap( return KERN_INVALID_ARGUMENT; switch (inheritance) { - case VM_INHERIT_NONE: - case VM_INHERIT_COPY: - case VM_INHERIT_SHARE: + case VM_INHERIT_NONE: + case VM_INHERIT_COPY: + case VM_INHERIT_SHARE: if (size != 0 && src_map != VM_MAP_NULL) break; /*FALL THRU*/ - default: + default: return KERN_INVALID_ARGUMENT; } size = vm_map_round_page(size); result = vm_map_remap_extract(src_map, memory_address, - size, copy, &map_header, - cur_protection, - max_protection, - inheritance, - target_map->hdr. - entries_pageable); + size, copy, &map_header, + cur_protection, + max_protection, + inheritance, + target_map->hdr. + entries_pageable); if (result != KERN_SUCCESS) { return result; @@ -9403,7 +11194,7 @@ vm_map_remap( *address = vm_map_trunc_page(*address); vm_map_lock(target_map); result = vm_map_remap_range_allocate(target_map, address, size, - mask, anywhere, &insp_entry); + mask, anywhere, &insp_entry); for (entry = map_header.links.next; entry != (struct vm_map_entry *)&map_header.links; @@ -9420,7 +11211,7 @@ vm_map_remap( vm_object_deallocate(entry->object.vm_object); } else { vm_map_deallocate(entry->object.sub_map); - } + } _vm_map_entry_dispose(&map_header, entry); } } @@ -9461,143 +11252,143 @@ vm_map_remap_range_allocate( register vm_map_offset_t start; register vm_map_offset_t end; - StartAgain: ; +StartAgain: ; - start = *address; + start = *address; - if (anywhere) - { - /* - * Calculate the first possible address. - */ + if (anywhere) + { + /* + * Calculate the first possible address. + */ - if (start < map->min_offset) - start = map->min_offset; - if (start > map->max_offset) - return(KERN_NO_SPACE); + if (start < map->min_offset) + start = map->min_offset; + if (start > map->max_offset) + return(KERN_NO_SPACE); - /* - * Look for the first possible address; - * if there's already something at this - * address, we have to start after it. - */ + /* + * Look for the first possible address; + * if there's already something at this + * address, we have to start after it. + */ - assert(first_free_is_valid(map)); - if (start == map->min_offset) { - if ((entry = map->first_free) != vm_map_to_entry(map)) - start = entry->vme_end; - } else { - vm_map_entry_t tmp_entry; - if (vm_map_lookup_entry(map, start, &tmp_entry)) - start = tmp_entry->vme_end; - entry = tmp_entry; - } + assert(first_free_is_valid(map)); + if (start == map->min_offset) { + if ((entry = map->first_free) != vm_map_to_entry(map)) + start = entry->vme_end; + } else { + vm_map_entry_t tmp_entry; + if (vm_map_lookup_entry(map, start, &tmp_entry)) + start = tmp_entry->vme_end; + entry = tmp_entry; + } - /* - * In any case, the "entry" always precedes - * the proposed new region throughout the - * loop: - */ + /* + * In any case, the "entry" always precedes + * the proposed new region throughout the + * loop: + */ - while (TRUE) { - register vm_map_entry_t next; - - /* - * Find the end of the proposed new region. - * Be sure we didn't go beyond the end, or - * wrap around the address. - */ - - end = ((start + mask) & ~mask); - if (end < start) - return(KERN_NO_SPACE); - start = end; - end += size; - - if ((end > map->max_offset) || (end < start)) { - if (map->wait_for_space) { - if (size <= (map->max_offset - - map->min_offset)) { - assert_wait((event_t) map, THREAD_INTERRUPTIBLE); - vm_map_unlock(map); - thread_block(THREAD_CONTINUE_NULL); - vm_map_lock(map); - goto StartAgain; - } - } + while (TRUE) { + register vm_map_entry_t next; + + /* + * Find the end of the proposed new region. + * Be sure we didn't go beyond the end, or + * wrap around the address. + */ + + end = ((start + mask) & ~mask); + if (end < start) + return(KERN_NO_SPACE); + start = end; + end += size; + + if ((end > map->max_offset) || (end < start)) { + if (map->wait_for_space) { + if (size <= (map->max_offset - + map->min_offset)) { + assert_wait((event_t) map, THREAD_INTERRUPTIBLE); + vm_map_unlock(map); + thread_block(THREAD_CONTINUE_NULL); + vm_map_lock(map); + goto StartAgain; + } + } - return(KERN_NO_SPACE); - } + return(KERN_NO_SPACE); + } - /* - * If there are no more entries, we must win. - */ + /* + * If there are no more entries, we must win. + */ - next = entry->vme_next; - if (next == vm_map_to_entry(map)) - break; + next = entry->vme_next; + if (next == vm_map_to_entry(map)) + break; - /* - * If there is another entry, it must be - * after the end of the potential new region. - */ + /* + * If there is another entry, it must be + * after the end of the potential new region. + */ - if (next->vme_start >= end) - break; + if (next->vme_start >= end) + break; - /* - * Didn't fit -- move to the next entry. - */ + /* + * Didn't fit -- move to the next entry. + */ - entry = next; - start = entry->vme_end; - } - *address = start; - } else { - vm_map_entry_t temp_entry; + entry = next; + start = entry->vme_end; + } + *address = start; + } else { + vm_map_entry_t temp_entry; - /* - * Verify that: - * the address doesn't itself violate - * the mask requirement. - */ + /* + * Verify that: + * the address doesn't itself violate + * the mask requirement. + */ - if ((start & mask) != 0) - return(KERN_NO_SPACE); + if ((start & mask) != 0) + return(KERN_NO_SPACE); - /* - * ... the address is within bounds - */ + /* + * ... the address is within bounds + */ - end = start + size; + end = start + size; - if ((start < map->min_offset) || - (end > map->max_offset) || - (start >= end)) { - return(KERN_INVALID_ADDRESS); - } + if ((start < map->min_offset) || + (end > map->max_offset) || + (start >= end)) { + return(KERN_INVALID_ADDRESS); + } - /* - * ... the starting address isn't allocated - */ + /* + * ... the starting address isn't allocated + */ - if (vm_map_lookup_entry(map, start, &temp_entry)) - return(KERN_NO_SPACE); + if (vm_map_lookup_entry(map, start, &temp_entry)) + return(KERN_NO_SPACE); - entry = temp_entry; + entry = temp_entry; - /* - * ... the next region doesn't overlap the - * end point. - */ + /* + * ... the next region doesn't overlap the + * end point. + */ - if ((entry->vme_next != vm_map_to_entry(map)) && - (entry->vme_next->vme_start < end)) - return(KERN_NO_SPACE); - } - *map_entry = entry; - return(KERN_SUCCESS); + if ((entry->vme_next != vm_map_to_entry(map)) && + (entry->vme_next->vme_start < end)) + return(KERN_NO_SPACE); + } + *map_entry = entry; + return(KERN_SUCCESS); } /* @@ -9719,21 +11510,19 @@ vm_map_read_user( * privilege on the entire address region given. * The entire region must be allocated. */ -boolean_t vm_map_check_protection(map, start, end, protection) - register vm_map_t map; - register vm_map_offset_t start; - register vm_map_offset_t end; - register vm_prot_t protection; +boolean_t +vm_map_check_protection(vm_map_t map, vm_map_offset_t start, + vm_map_offset_t end, vm_prot_t protection) { - register vm_map_entry_t entry; - vm_map_entry_t tmp_entry; + vm_map_entry_t entry; + vm_map_entry_t tmp_entry; vm_map_lock(map); - if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) + if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) { - vm_map_unlock(map); - return (FALSE); + vm_map_unlock(map); + return (FALSE); } if (!vm_map_lookup_entry(map, start, &tmp_entry)) { @@ -9795,22 +11584,28 @@ vm_map_purgable_control( return(KERN_INVALID_ARGUMENT); if (control != VM_PURGABLE_SET_STATE && - control != VM_PURGABLE_GET_STATE) + control != VM_PURGABLE_GET_STATE && + control != VM_PURGABLE_PURGE_ALL) return(KERN_INVALID_ARGUMENT); + if (control == VM_PURGABLE_PURGE_ALL) { + vm_purgeable_object_purge_all(); + return KERN_SUCCESS; + } + if (control == VM_PURGABLE_SET_STATE && - (*state < VM_PURGABLE_STATE_MIN || - *state > VM_PURGABLE_STATE_MAX)) + (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) || + ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) return(KERN_INVALID_ARGUMENT); - vm_map_lock(map); + vm_map_lock_read(map); if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) { /* * Must pass a valid non-submap address. */ - vm_map_unlock(map); + vm_map_unlock_read(map); return(KERN_INVALID_ADDRESS); } @@ -9818,7 +11613,7 @@ vm_map_purgable_control( /* * Can't apply purgable controls to something you can't write. */ - vm_map_unlock(map); + vm_map_unlock_read(map); return(KERN_PROTECTION_FAILURE); } @@ -9827,7 +11622,7 @@ vm_map_purgable_control( /* * Object must already be present or it can't be purgable. */ - vm_map_unlock(map); + vm_map_unlock_read(map); return KERN_INVALID_ARGUMENT; } @@ -9839,12 +11634,12 @@ vm_map_purgable_control( * Can only apply purgable controls to the whole (existing) * object at once. */ - vm_map_unlock(map); + vm_map_unlock_read(map); vm_object_unlock(object); return KERN_INVALID_ARGUMENT; } - vm_map_unlock(map); + vm_map_unlock_read(map); kr = vm_object_purgable_control(object, control, state); @@ -9854,58 +11649,194 @@ vm_map_purgable_control( } kern_return_t -vm_map_page_info( - vm_map_t target_map, +vm_map_page_query_internal( + vm_map_t target_map, vm_map_offset_t offset, - int *disposition, - int *ref_count) + int *disposition, + int *ref_count) { - vm_map_entry_t map_entry; - vm_object_t object; - vm_page_t m; + kern_return_t kr; + vm_page_info_basic_data_t info; + mach_msg_type_number_t count; + + count = VM_PAGE_INFO_BASIC_COUNT; + kr = vm_map_page_info(target_map, + offset, + VM_PAGE_INFO_BASIC, + (vm_page_info_t) &info, + &count); + if (kr == KERN_SUCCESS) { + *disposition = info.disposition; + *ref_count = info.ref_count; + } else { + *disposition = 0; + *ref_count = 0; + } -restart_page_query: - *disposition = 0; - *ref_count = 0; - vm_map_lock(target_map); - if(!vm_map_lookup_entry(target_map, offset, &map_entry)) { - vm_map_unlock(target_map); - return KERN_FAILURE; + return kr; +} + +kern_return_t +vm_map_page_info( + vm_map_t map, + vm_map_offset_t offset, + vm_page_info_flavor_t flavor, + vm_page_info_t info, + mach_msg_type_number_t *count) +{ + vm_map_entry_t map_entry; + vm_object_t object; + vm_page_t m; + kern_return_t kr; + kern_return_t retval = KERN_SUCCESS; + boolean_t top_object; + int disposition; + int ref_count; + vm_object_id_t object_id; + vm_page_info_basic_t basic_info; + int depth; + + switch (flavor) { + case VM_PAGE_INFO_BASIC: + if (*count != VM_PAGE_INFO_BASIC_COUNT) { + return KERN_INVALID_ARGUMENT; + } + break; + default: + return KERN_INVALID_ARGUMENT; } - offset -= map_entry->vme_start; /* adjust to offset within entry */ - offset += map_entry->offset; /* adjust to target object offset */ - if(map_entry->object.vm_object != VM_OBJECT_NULL) { - if(!map_entry->is_sub_map) { - object = map_entry->object.vm_object; - } else { - vm_map_unlock(target_map); - target_map = map_entry->object.sub_map; - goto restart_page_query; + + disposition = 0; + ref_count = 0; + object_id = 0; + top_object = TRUE; + depth = 0; + + retval = KERN_SUCCESS; + offset = vm_map_trunc_page(offset); + + vm_map_lock_read(map); + + /* + * First, find the map entry covering "offset", going down + * submaps if necessary. + */ + for (;;) { + if (!vm_map_lookup_entry(map, offset, &map_entry)) { + vm_map_unlock_read(map); + return KERN_INVALID_ADDRESS; } - } else { - vm_map_unlock(target_map); - return KERN_FAILURE; + /* compute offset from this map entry's start */ + offset -= map_entry->vme_start; + /* compute offset into this map entry's object (or submap) */ + offset += map_entry->offset; + + if (map_entry->is_sub_map) { + vm_map_t sub_map; + + sub_map = map_entry->object.sub_map; + vm_map_lock_read(sub_map); + vm_map_unlock_read(map); + + map = sub_map; + + ref_count = MAX(ref_count, map->ref_count); + continue; + } + break; + } + + object = map_entry->object.vm_object; + if (object == VM_OBJECT_NULL) { + /* no object -> no page */ + vm_map_unlock_read(map); + goto done; } + vm_object_lock(object); - vm_map_unlock(target_map); - while(TRUE) { + vm_map_unlock_read(map); + + /* + * Go down the VM object shadow chain until we find the page + * we're looking for. + */ + for (;;) { + ref_count = MAX(ref_count, object->ref_count); + m = vm_page_lookup(object, offset); + if (m != VM_PAGE_NULL) { - *disposition |= VM_PAGE_QUERY_PAGE_PRESENT; + disposition |= VM_PAGE_QUERY_PAGE_PRESENT; break; } else { - if(object->shadow) { +#if MACH_PAGEMAP + if (object->existence_map) { + if (vm_external_state_get(object->existence_map, + offset) == + VM_EXTERNAL_STATE_EXISTS) { + /* + * this page has been paged out + */ + disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT; + break; + } + } else +#endif + { + if (object->internal && + object->alive && + !object->terminating && + object->pager_ready) { + + memory_object_t pager; + + vm_object_paging_begin(object); + pager = object->pager; + vm_object_unlock(object); + + /* + * Ask the default pager if + * it has this page. + */ + kr = memory_object_data_request( + pager, + offset + object->paging_offset, + 0, /* just poke the pager */ + VM_PROT_READ, + NULL); + + vm_object_lock(object); + vm_object_paging_end(object); + + if (kr == KERN_SUCCESS) { + /* the default pager has it */ + disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT; + break; + } + } + } + + if (object->shadow != VM_OBJECT_NULL) { + vm_object_t shadow; + offset += object->shadow_offset; + shadow = object->shadow; + + vm_object_lock(shadow); vm_object_unlock(object); - object = object->shadow; - vm_object_lock(object); - continue; + + object = shadow; + top_object = FALSE; + depth++; + } else { +// if (!object->internal) +// break; +// retval = KERN_FAILURE; +// goto done_with_object; + break; } - vm_object_unlock(object); - return KERN_FAILURE; } } - /* The ref_count is not strictly accurate, it measures the number */ /* of entities holding a ref on the object, they may not be mapping */ /* the object or may not be mapping the section holding the */ @@ -9916,133 +11847,49 @@ restart_page_query: /* but this would under count as only faulted-in mappings would */ /* show up. */ - *ref_count = object->ref_count; + if (top_object == TRUE && object->shadow) + disposition |= VM_PAGE_QUERY_PAGE_COPIED; - if (m->fictitious) { - *disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS; - vm_object_unlock(object); - return KERN_SUCCESS; - } + if (! object->internal) + disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL; - if (m->dirty) - *disposition |= VM_PAGE_QUERY_PAGE_DIRTY; - else if(pmap_is_modified(m->phys_page)) - *disposition |= VM_PAGE_QUERY_PAGE_DIRTY; + if (m == VM_PAGE_NULL) + goto done_with_object; - if (m->reference) - *disposition |= VM_PAGE_QUERY_PAGE_REF; - else if(pmap_is_referenced(m->phys_page)) - *disposition |= VM_PAGE_QUERY_PAGE_REF; + if (m->fictitious) { + disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS; + goto done_with_object; + } + if (m->dirty || pmap_is_modified(m->phys_page)) + disposition |= VM_PAGE_QUERY_PAGE_DIRTY; - vm_object_unlock(object); - return KERN_SUCCESS; - -} + if (m->reference || pmap_is_referenced(m->phys_page)) + disposition |= VM_PAGE_QUERY_PAGE_REF; + if (m->speculative) + disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE; -/* For a given range, check all map entries. If the entry coresponds to */ -/* the old vm_region/map provided on the call, replace it with the */ -/* corresponding range in the new vm_region/map */ -kern_return_t vm_map_region_replace( - vm_map_t target_map, - ipc_port_t old_region, - ipc_port_t new_region, - vm_map_offset_t start, - vm_map_offset_t end) -{ - vm_named_entry_t old_object; - vm_named_entry_t new_object; - vm_map_t old_submap; - vm_map_t new_submap; - vm_map_offset_t addr; - vm_map_entry_t entry; - int nested_pmap = 0; + if (m->cs_validated) + disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED; + if (m->cs_tainted) + disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED; +done_with_object: + vm_object_unlock(object); +done: - vm_map_lock(target_map); - old_object = (vm_named_entry_t)old_region->ip_kobject; - new_object = (vm_named_entry_t)new_region->ip_kobject; - if((!old_object->is_sub_map) || (!new_object->is_sub_map)) { - vm_map_unlock(target_map); - return KERN_INVALID_ARGUMENT; - } - old_submap = (vm_map_t)old_object->backing.map; - new_submap = (vm_map_t)new_object->backing.map; - vm_map_lock(old_submap); - if((old_submap->min_offset != new_submap->min_offset) || - (old_submap->max_offset != new_submap->max_offset)) { - vm_map_unlock(old_submap); - vm_map_unlock(target_map); - return KERN_INVALID_ARGUMENT; - } - if(!vm_map_lookup_entry(target_map, start, &entry)) { - /* if the src is not contained, the entry preceeds */ - /* our range */ - addr = entry->vme_start; - if(entry == vm_map_to_entry(target_map)) { - vm_map_unlock(old_submap); - vm_map_unlock(target_map); - return KERN_SUCCESS; - } - } - if ((entry->use_pmap) && - (new_submap->pmap == NULL)) { - new_submap->pmap = pmap_create((vm_map_size_t) 0, FALSE); - if(new_submap->pmap == PMAP_NULL) { - vm_map_unlock(old_submap); - vm_map_unlock(target_map); - return(KERN_NO_SPACE); - } + switch (flavor) { + case VM_PAGE_INFO_BASIC: + basic_info = (vm_page_info_basic_t) info; + basic_info->disposition = disposition; + basic_info->ref_count = ref_count; + basic_info->object_id = (vm_object_id_t) (uintptr_t) object; + basic_info->offset = (memory_object_offset_t) offset; + basic_info->depth = depth; + break; } - /* - * Mark the new submap as "mapped", so that we get proper - * cleanup of the sub-pmap when we unmap it. - */ - new_submap->mapped = TRUE; - - addr = entry->vme_start; - vm_map_reference(old_submap); - while((entry != vm_map_to_entry(target_map)) && - (entry->vme_start < end)) { - if((entry->is_sub_map) && - (entry->object.sub_map == old_submap)) { - if(entry->use_pmap) { - if((start & 0x0fffffff) || - ((end - start) != 0x10000000)) { - vm_map_unlock(old_submap); - vm_map_deallocate(old_submap); - vm_map_unlock(target_map); - return KERN_INVALID_ARGUMENT; - } - nested_pmap = 1; - } - entry->object.sub_map = new_submap; - vm_map_reference(new_submap); - vm_map_deallocate(old_submap); - } - entry = entry->vme_next; - addr = entry->vme_start; - } - if(nested_pmap) { -#ifndef NO_NESTED_PMAP - pmap_unnest(target_map->pmap, (addr64_t)start); - if(target_map->mapped) { - vm_map_submap_pmap_clean(target_map, - start, end, old_submap, 0); - } - pmap_nest(target_map->pmap, new_submap->pmap, - (addr64_t)start, (addr64_t)start, - (uint64_t)(end - start)); -#endif /* NO_NESTED_PMAP */ - } else { - vm_map_submap_pmap_clean(target_map, - start, end, old_submap, 0); - } - vm_map_unlock(old_submap); - vm_map_deallocate(old_submap); - vm_map_unlock(target_map); - return KERN_SUCCESS; + return retval; } /* @@ -10097,8 +11944,8 @@ vm_map_msync( vm_map_size_t amount_left; vm_object_offset_t offset; boolean_t do_sync_req; - boolean_t modifiable; boolean_t had_hole = FALSE; + memory_object_t pager; if ((sync_flags & VM_SYNC_ASYNCHRONOUS) && (sync_flags & VM_SYNC_SYNCHRONOUS)) @@ -10125,9 +11972,9 @@ vm_map_msync( vm_map_lock(map); if (!vm_map_lookup_entry(map, - vm_map_trunc_page(address), &entry)) { + vm_map_trunc_page(address), &entry)) { - vm_size_t skip; + vm_map_size_t skip; /* * hole in the address map. @@ -10172,7 +12019,7 @@ vm_map_msync( */ if (amount_left + entry->vme_start + offset > entry->vme_end) { flush_size = entry->vme_end - - (entry->vme_start + offset); + (entry->vme_start + offset); } else { flush_size = amount_left; } @@ -10187,10 +12034,10 @@ vm_map_msync( local_offset = entry->offset; vm_map_unlock(map); if (vm_map_msync( - local_map, - local_offset, - flush_size, - sync_flags) == KERN_INVALID_ADDRESS) { + local_map, + local_offset, + flush_size, + sync_flags) == KERN_INVALID_ADDRESS) { had_hole = TRUE; } continue; @@ -10206,23 +12053,22 @@ vm_map_msync( continue; } offset += entry->offset; - modifiable = (entry->protection & VM_PROT_WRITE) - != VM_PROT_NONE; vm_object_lock(object); if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) { - boolean_t kill_pages = 0; + int kill_pages = 0; + boolean_t reusable_pages = FALSE; if (sync_flags & VM_SYNC_KILLPAGES) { - if (object->ref_count == 1 && !entry->needs_copy && !object->shadow) + if (object->ref_count == 1 && !object->shadow) kill_pages = 1; else kill_pages = -1; } if (kill_pages != -1) vm_object_deactivate_pages(object, offset, - (vm_object_size_t)flush_size, kill_pages); + (vm_object_size_t)flush_size, kill_pages, reusable_pages); vm_object_unlock(object); vm_map_unlock(map); continue; @@ -10241,26 +12087,36 @@ vm_map_msync( /* * keep reference on the object until syncing is done */ - assert(object->ref_count > 0); - object->ref_count++; - vm_object_res_reference(object); + vm_object_reference_locked(object); vm_object_unlock(object); vm_map_unlock(map); do_sync_req = vm_object_sync(object, - offset, - flush_size, - sync_flags & VM_SYNC_INVALIDATE, - (modifiable && - (sync_flags & VM_SYNC_SYNCHRONOUS || - sync_flags & VM_SYNC_ASYNCHRONOUS)), - sync_flags & VM_SYNC_SYNCHRONOUS); + offset, + flush_size, + sync_flags & VM_SYNC_INVALIDATE, + ((sync_flags & VM_SYNC_SYNCHRONOUS) || + (sync_flags & VM_SYNC_ASYNCHRONOUS)), + sync_flags & VM_SYNC_SYNCHRONOUS); /* * only send a m_o_s if we returned pages or if the entry * is writable (ie dirty pages may have already been sent back) */ - if (!do_sync_req && !modifiable) { + if (!do_sync_req) { + if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) { + /* + * clear out the clustering and read-ahead hints + */ + vm_object_lock(object); + + object->pages_created = 0; + object->pages_used = 0; + object->sequential = 0; + object->last_alloc = 0; + + vm_object_unlock(object); + } vm_object_deallocate(object); continue; } @@ -10273,7 +12129,22 @@ vm_map_msync( new_msr->length = flush_size; new_msr->object = object; new_msr->flag = VM_MSYNC_SYNCHRONIZING; -re_iterate: + re_iterate: + + /* + * We can't sync this object if there isn't a pager. The + * pager can disappear anytime we're not holding the object + * lock. So this has to be checked anytime we goto re_iterate. + */ + + pager = object->pager; + + if (pager == MEMORY_OBJECT_NULL) { + vm_object_unlock(object); + vm_object_deallocate(object); + continue; + } + queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) { /* * need to check for overlapping entry, if found, wait @@ -10297,15 +12168,21 @@ re_iterate: }/* queue_iterate */ queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q); + + vm_object_paging_begin(object); vm_object_unlock(object); queue_enter(&req_q, new_msr, msync_req_t, req_q); (void) memory_object_synchronize( - object->pager, - offset, - flush_size, - sync_flags & ~VM_SYNC_CONTIGUOUS); + pager, + offset, + flush_size, + sync_flags & ~VM_SYNC_CONTIGUOUS); + + vm_object_lock(object); + vm_object_paging_end(object); + vm_object_unlock(object); }/* while */ /* @@ -10334,75 +12211,6 @@ re_iterate: return(KERN_SUCCESS); }/* vm_msync */ -/* Takes existing source and destination sub-maps and clones the contents of */ -/* the source map */ -kern_return_t -vm_region_clone( - ipc_port_t src_region, - ipc_port_t dst_region) -{ - vm_named_entry_t src_object; - vm_named_entry_t dst_object; - vm_map_t src_map; - vm_map_t dst_map; - vm_map_offset_t addr; - vm_map_offset_t max_off; - vm_map_entry_t entry; - vm_map_entry_t new_entry; - vm_map_entry_t insert_point; - - src_object = (vm_named_entry_t)src_region->ip_kobject; - dst_object = (vm_named_entry_t)dst_region->ip_kobject; - if((!src_object->is_sub_map) || (!dst_object->is_sub_map)) { - return KERN_INVALID_ARGUMENT; - } - src_map = (vm_map_t)src_object->backing.map; - dst_map = (vm_map_t)dst_object->backing.map; - /* destination map is assumed to be unavailable to any other */ - /* activity. i.e. it is new */ - vm_map_lock(src_map); - if((src_map->min_offset != dst_map->min_offset) - || (src_map->max_offset != dst_map->max_offset)) { - vm_map_unlock(src_map); - return KERN_INVALID_ARGUMENT; - } - addr = src_map->min_offset; - vm_map_lookup_entry(dst_map, addr, &entry); - if(entry == vm_map_to_entry(dst_map)) { - entry = entry->vme_next; - } - if(entry == vm_map_to_entry(dst_map)) { - max_off = src_map->max_offset; - } else { - max_off = entry->vme_start; - } - vm_map_lookup_entry(src_map, addr, &entry); - if(entry == vm_map_to_entry(src_map)) { - entry = entry->vme_next; - } - vm_map_lookup_entry(dst_map, addr, &insert_point); - while((entry != vm_map_to_entry(src_map)) && - (entry->vme_end <= max_off)) { - addr = entry->vme_start; - new_entry = vm_map_entry_create(dst_map); - vm_map_entry_copy(new_entry, entry); - vm_map_entry_link(dst_map, insert_point, new_entry); - insert_point = new_entry; - if (entry->object.vm_object != VM_OBJECT_NULL) { - if (new_entry->is_sub_map) { - vm_map_reference(new_entry->object.sub_map); - } else { - vm_object_reference( - new_entry->object.vm_object); - } - } - dst_map->size += new_entry->vme_end - new_entry->vme_start; - entry = entry->vme_next; - } - vm_map_unlock(src_map); - return KERN_SUCCESS; -} - /* * Routine: convert_port_entry_to_map * Purpose: @@ -10421,25 +12229,28 @@ convert_port_entry_to_map( { vm_map_t map; vm_named_entry_t named_entry; + uint32_t try_failed_count = 0; if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) { while(TRUE) { ip_lock(port); if(ip_active(port) && (ip_kotype(port) - == IKOT_NAMED_ENTRY)) { + == IKOT_NAMED_ENTRY)) { named_entry = - (vm_named_entry_t)port->ip_kobject; - if (!(mutex_try(&(named_entry)->Lock))) { + (vm_named_entry_t)port->ip_kobject; + if (!(lck_mtx_try_lock(&(named_entry)->Lock))) { ip_unlock(port); - mutex_pause(); + + try_failed_count++; + mutex_pause(try_failed_count); continue; } named_entry->ref_count++; - mutex_unlock(&(named_entry)->Lock); + lck_mtx_unlock(&(named_entry)->Lock); ip_unlock(port); if ((named_entry->is_sub_map) && - (named_entry->protection - & VM_PROT_WRITE)) { + (named_entry->protection + & VM_PROT_WRITE)) { map = named_entry->backing.map; } else { mach_destroy_memory_entry(port); @@ -10476,26 +12287,29 @@ convert_port_entry_to_object( { vm_object_t object; vm_named_entry_t named_entry; + uint32_t try_failed_count = 0; if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) { while(TRUE) { ip_lock(port); if(ip_active(port) && (ip_kotype(port) - == IKOT_NAMED_ENTRY)) { + == IKOT_NAMED_ENTRY)) { named_entry = - (vm_named_entry_t)port->ip_kobject; - if (!(mutex_try(&(named_entry)->Lock))) { + (vm_named_entry_t)port->ip_kobject; + if (!(lck_mtx_try_lock(&(named_entry)->Lock))) { ip_unlock(port); - mutex_pause(); + + try_failed_count++; + mutex_pause(try_failed_count); continue; } named_entry->ref_count++; - mutex_unlock(&(named_entry)->Lock); + lck_mtx_unlock(&(named_entry)->Lock); ip_unlock(port); if ((!named_entry->is_sub_map) && - (!named_entry->is_pager) && - (named_entry->protection - & VM_PROT_WRITE)) { + (!named_entry->is_pager) && + (named_entry->protection + & VM_PROT_WRITE)) { object = named_entry->backing.object; } else { mach_destroy_memory_entry(port); @@ -10541,14 +12355,14 @@ vm_map_reference( if (map == VM_MAP_NULL) return; - mutex_lock(&map->s_lock); + lck_mtx_lock(&map->s_lock); #if TASK_SWAPPER assert(map->res_count > 0); assert(map->ref_count >= map->res_count); map->res_count++; #endif map->ref_count++; - mutex_unlock(&map->s_lock); + lck_mtx_unlock(&map->s_lock); } /* @@ -10567,15 +12381,15 @@ vm_map_deallocate( if (map == VM_MAP_NULL) return; - mutex_lock(&map->s_lock); + lck_mtx_lock(&map->s_lock); ref = --map->ref_count; if (ref > 0) { vm_map_res_deallocate(map); - mutex_unlock(&map->s_lock); + lck_mtx_unlock(&map->s_lock); return; } assert(map->ref_count == 0); - mutex_unlock(&map->s_lock); + lck_mtx_unlock(&map->s_lock); #if TASK_SWAPPER /* @@ -10586,114 +12400,9 @@ vm_map_deallocate( */ #endif - vm_map_destroy(map); -} - - -/* LP64todo - this whole mechanism is temporary. It should be redone when - * the pmap layer can handle 64-bit address spaces. Until then, we trump - * up a map entry for the 64-bit commpage above the map's max_offset. - */ -extern vm_map_t com_region_map64; /* the submap for 64-bit commpage */ -extern vm_map_t com_region_map32; /* the submap for 32-bit commpage */ - - -static void -vm_map_commpage( - vm_map_t user_map, - vm_map_t com_region_map, /* com_region_map32 or com_region_map64 */ - vm_map_offset_t base_address, - vm_map_size_t size) -{ - vm_map_entry_t entry; - vm_object_t object; - - vm_map_lock(user_map); - - /* The commpage is necessarily the last entry in the map. - * See if one is already there (not sure if this can happen???) - */ - entry = vm_map_last_entry(user_map); - if (entry != vm_map_to_entry(user_map)) { - if (entry->vme_end >= base_address) { - vm_map_unlock(user_map); - return; - } - } - - entry = vm_map_first_entry(com_region_map); - object = entry->object.vm_object; - vm_object_reference(object); - - /* We bypass vm_map_enter() because we are adding the entry past the - * map's max_offset. - */ - entry = vm_map_entry_insert( - user_map, - vm_map_last_entry(user_map), /* insert after last entry */ - base_address, - base_address + size, - object, - 0, /* offset */ - FALSE, /* needs_copy */ - FALSE, /* is_shared */ - FALSE, /* in_transition */ - VM_PROT_READ|VM_PROT_EXECUTE, - VM_PROT_READ|VM_PROT_EXECUTE, - VM_BEHAVIOR_DEFAULT, - VM_INHERIT_NONE, - 1 ); /* wired_count */ - - vm_map_unlock(user_map); -} - -#ifdef __i386__ -void -vm_map_commpage32( - vm_map_t map) -{ - vm_map_commpage(map, - com_region_map32, - (vm_map_offset_t) (unsigned) _COMM_PAGE32_BASE_ADDRESS, - (vm_map_size_t) (unsigned) _COMM_PAGE32_AREA_USED); -} -#endif /* __i386__ */ - - - -void -vm_map_commpage64( - vm_map_t map) -{ - - vm_map_commpage(map, - com_region_map64, - (vm_map_offset_t) _COMM_PAGE64_BASE_ADDRESS, - (vm_map_size_t) _COMM_PAGE64_AREA_USED); + vm_map_destroy(map, VM_MAP_NO_FLAGS); } -void -vm_map_remove_commpage( - vm_map_t map ) -{ - vm_map_entry_t entry; - - while( 1 ) { - vm_map_lock(map); - - entry = vm_map_last_entry(map); - - if ((entry == vm_map_to_entry(map)) || - (entry->vme_start < map->max_offset)) - break; - - /* clearing the wired count isn't strictly correct */ - entry->wired_count = 0; - vm_map_entry_delete(map,entry); - } - - vm_map_unlock(map); -} void vm_map_disable_NX(vm_map_t map) @@ -10729,7 +12438,15 @@ vm_compute_max_offset(unsigned is64) } boolean_t -vm_map_has_4GB_pagezero(vm_map_t map) +vm_map_is_64bit( + vm_map_t map) +{ + return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS); +} + +boolean_t +vm_map_has_4GB_pagezero( + vm_map_t map) { /* * XXX FBDP @@ -10747,13 +12464,22 @@ vm_map_has_4GB_pagezero(vm_map_t map) void vm_map_set_4GB_pagezero(vm_map_t map) { +#ifdef __i386__ pmap_set_4GB_pagezero(map->pmap); +#else +#pragma unused(map) +#endif + } void vm_map_clear_4GB_pagezero(vm_map_t map) { +#ifdef __i386__ pmap_clear_4GB_pagezero(map->pmap); +#else +#pragma unused(map) +#endif } /* @@ -10798,3 +12524,26 @@ vm_map_raise_min_offset( return KERN_SUCCESS; } + +/* + * Set the limit on the maximum amount of user wired memory allowed for this map. + * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of + * the kernel. The limits are checked in the mach VM side, so we keep a copy so we + * don't have to reach over to the BSD data structures. + */ + +void +vm_map_set_user_wire_limit(vm_map_t map, + vm_size_t limit) +{ + map->user_wire_limit = limit; +} + + +void vm_map_switch_protect(vm_map_t map, + boolean_t val) +{ + vm_map_lock(map); + map->switch_protect=val; + vm_map_unlock(map); +}