X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/ebb1b9f42b62218f29061826217bb0f71cd375a6..143464d58d2bd6378e74eec636961ceb0d32fb91:/osfmk/vm/vm_map.c?ds=sidebyside diff --git a/osfmk/vm/vm_map.c b/osfmk/vm/vm_map.c index 2e1fbe691..a30ff18e1 100644 --- a/osfmk/vm/vm_map.c +++ b/osfmk/vm/vm_map.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -84,6 +84,7 @@ #include #include +#include #include #include #include @@ -94,7 +95,6 @@ #include #include #include -#include #include #include @@ -106,6 +106,7 @@ #include #include +extern u_int32_t random(void); /* from */ /* Internal prototypes */ @@ -121,7 +122,7 @@ static boolean_t vm_map_range_check( vm_map_entry_t *entry); static vm_map_entry_t _vm_map_entry_create( - struct vm_map_header *map_header); + struct vm_map_header *map_header, boolean_t map_locked); static void _vm_map_entry_dispose( struct vm_map_header *map_header, @@ -160,7 +161,8 @@ static kern_return_t vm_map_copy_overwrite_unaligned( vm_map_t dst_map, vm_map_entry_t entry, vm_map_copy_t copy, - vm_map_address_t start); + vm_map_address_t start, + boolean_t discard_on_success); static kern_return_t vm_map_copy_overwrite_aligned( vm_map_t dst_map, @@ -180,7 +182,8 @@ static kern_return_t vm_map_copyout_kernel_buffer( vm_map_t map, vm_map_address_t *addr, /* IN/OUT */ vm_map_copy_t copy, - boolean_t overwrite); + boolean_t overwrite, + boolean_t consume_on_success); static void vm_map_fork_share( vm_map_t old_map, @@ -203,7 +206,8 @@ void vm_map_region_walk( vm_object_offset_t offset, vm_object_size_t range, vm_region_extended_info_t extended, - boolean_t look_for_pages); + boolean_t look_for_pages, + mach_msg_type_number_t count); static kern_return_t vm_map_wire_nested( vm_map_t map, @@ -261,7 +265,8 @@ static void vm_map_region_look_for_page( vm_object_offset_t offset, int max_refcnt, int depth, - vm_region_extended_info_t extended); + vm_region_extended_info_t extended, + mach_msg_type_number_t count); static int vm_map_region_count_obj_refs( vm_map_entry_t entry, @@ -288,11 +293,6 @@ static kern_return_t vm_map_can_reuse( vm_map_offset_t start, vm_map_offset_t end); -#if CONFIG_FREEZE -struct default_freezer_table; -__private_extern__ void* default_freezer_mapping_create(vm_object_t, vm_offset_t); -__private_extern__ void default_freezer_mapping_free(void**, boolean_t all); -#endif /* * Macros to copy a vm_map_entry. We must be careful to correctly @@ -303,8 +303,10 @@ __private_extern__ void default_freezer_mapping_free(void**, boolean_t all); * wire count; it's used for map splitting and zone changing in * vm_map_copyout. */ -#define vm_map_entry_copy(NEW,OLD) \ -MACRO_BEGIN \ + +#define vm_map_entry_copy(NEW,OLD) \ +MACRO_BEGIN \ +boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \ *(NEW) = *(OLD); \ (NEW)->is_shared = FALSE; \ (NEW)->needs_wakeup = FALSE; \ @@ -312,9 +314,16 @@ MACRO_BEGIN \ (NEW)->wired_count = 0; \ (NEW)->user_wired_count = 0; \ (NEW)->permanent = FALSE; \ + (NEW)->used_for_jit = FALSE; \ + (NEW)->from_reserved_zone = _vmec_reserved; \ MACRO_END -#define vm_map_entry_copy_full(NEW,OLD) (*(NEW) = *(OLD)) +#define vm_map_entry_copy_full(NEW,OLD) \ +MACRO_BEGIN \ +boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \ +(*(NEW) = *(OLD)); \ +(NEW)->from_reserved_zone = _vmecf_reserved; \ +MACRO_END /* * Decide if we want to allow processes to execute from their data or stack areas. @@ -419,7 +428,8 @@ override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */ static zone_t vm_map_zone; /* zone for vm_map structures */ static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */ -static zone_t vm_map_kentry_zone; /* zone for kernel entry structures */ +static zone_t vm_map_entry_reserved_zone; /* zone with reserve for non-blocking + * allocations */ static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */ @@ -435,13 +445,8 @@ static void *map_data; static vm_size_t map_data_size; static void *kentry_data; static vm_size_t kentry_data_size; -static int kentry_count = 2048; /* to init kentry_data_size */ -#if CONFIG_EMBEDDED -#define NO_COALESCE_LIMIT 0 -#else #define NO_COALESCE_LIMIT ((1024 * 128) - 1) -#endif /* Skip acquiring locks if we're in the midst of a kernel core dump */ unsigned int not_in_kdp = 1; @@ -603,7 +608,7 @@ lck_attr_t vm_map_lck_attr; * * vm_map_zone: used to allocate maps. * vm_map_entry_zone: used to allocate map entries. - * vm_map_kentry_zone: used to allocate map entries for the kernel. + * vm_map_entry_reserved_zone: fallback zone for kernel map entries * * The kernel allocates map entries from a special zone that is initially * "crammed" with memory. It would be difficult (perhaps impossible) for @@ -615,67 +620,96 @@ void vm_map_init( void) { + vm_size_t entry_zone_alloc_size; + const char *mez_name = "VM map entries"; + vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024, PAGE_SIZE, "maps"); zone_change(vm_map_zone, Z_NOENCRYPT, TRUE); - +#if defined(__LP64__) + entry_zone_alloc_size = PAGE_SIZE * 5; +#else + entry_zone_alloc_size = PAGE_SIZE * 6; +#endif vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry), - 1024*1024, PAGE_SIZE*5, - "non-kernel map entries"); + 1024*1024, entry_zone_alloc_size, + mez_name); zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE); + zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE); + zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE); - vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry), - kentry_data_size, kentry_data_size, - "kernel map entries"); - zone_change(vm_map_kentry_zone, Z_NOENCRYPT, TRUE); + vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry), + kentry_data_size * 64, kentry_data_size, + "Reserved VM map entries"); + zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE); vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy), - 16*1024, PAGE_SIZE, "map copies"); + 16*1024, PAGE_SIZE, "VM map copies"); zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE); /* * Cram the map and kentry zones with initial data. - * Set kentry_zone non-collectible to aid zone_gc(). + * Set reserved_zone non-collectible to aid zone_gc(). */ zone_change(vm_map_zone, Z_COLLECT, FALSE); - zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE); - zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE); - zone_change(vm_map_kentry_zone, Z_FOREIGN, TRUE); - zone_change(vm_map_kentry_zone, Z_CALLERACCT, FALSE); /* don't charge caller */ + + zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE); + zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE); + zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE); + zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE); + zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */ zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */ + zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE); - zcram(vm_map_zone, map_data, map_data_size); - zcram(vm_map_kentry_zone, kentry_data, kentry_data_size); + zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size); + zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size); lck_grp_attr_setdefault(&vm_map_lck_grp_attr); lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr); lck_attr_setdefault(&vm_map_lck_attr); + +#if CONFIG_FREEZE + default_freezer_init(); +#endif /* CONFIG_FREEZE */ } void vm_map_steal_memory( void) { + uint32_t kentry_initial_pages; + map_data_size = round_page(10 * sizeof(struct _vm_map)); map_data = pmap_steal_memory(map_data_size); -#if 0 /* - * Limiting worst case: vm_map_kentry_zone needs to map each "available" - * physical page (i.e. that beyond the kernel image and page tables) - * individually; we guess at most one entry per eight pages in the - * real world. This works out to roughly .1 of 1% of physical memory, - * or roughly 1900 entries (64K) for a 64M machine with 4K pages. + * kentry_initial_pages corresponds to the number of kernel map entries + * required during bootstrap until the asynchronous replenishment + * scheme is activated and/or entries are available from the general + * map entry pool. */ +#if defined(__LP64__) + kentry_initial_pages = 10; +#else + kentry_initial_pages = 6; #endif - kentry_count = pmap_free_pages() / 8; +#if CONFIG_GZALLOC + /* If using the guard allocator, reserve more memory for the kernel + * reserved map entry pool. + */ + if (gzalloc_enabled()) + kentry_initial_pages *= 1024; +#endif - kentry_data_size = - round_page(kentry_count * sizeof(struct vm_map_entry)); + kentry_data_size = kentry_initial_pages * PAGE_SIZE; kentry_data = pmap_steal_memory(kentry_data_size); } +void vm_kernel_reserved_entry_init(void) { + zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry)); +} + /* * vm_map_create: * @@ -704,6 +738,8 @@ vm_map_create( vm_map_store_init( &(result->hdr) ); + result->hdr.page_shift = PAGE_SHIFT; + result->size = 0; result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */ result->user_wire_size = 0; @@ -717,7 +753,7 @@ vm_map_create( result->max_offset = max; result->wiring_required = FALSE; result->no_zero_fill = FALSE; - result->mapped = FALSE; + result->mapped_in_other_pmaps = FALSE; result->wait_for_space = FALSE; result->switch_protect = FALSE; result->disable_vmentry_reuse = FALSE; @@ -728,7 +764,7 @@ vm_map_create( result->color_rr = (color_seed++) & vm_color_mask; result->jit_entry_exists = FALSE; #if CONFIG_FREEZE - result->default_freezer_toc = NULL; + result->default_freezer_handle = NULL; #endif vm_map_lock_init(result); lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr); @@ -742,29 +778,47 @@ vm_map_create( * Allocates a VM map entry for insertion in the * given map (or map copy). No fields are filled. */ -#define vm_map_entry_create(map) \ - _vm_map_entry_create(&(map)->hdr) +#define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked) -#define vm_map_copy_entry_create(copy) \ - _vm_map_entry_create(&(copy)->cpy_hdr) +#define vm_map_copy_entry_create(copy, map_locked) \ + _vm_map_entry_create(&(copy)->cpy_hdr, map_locked) +unsigned reserved_zalloc_count, nonreserved_zalloc_count; static vm_map_entry_t _vm_map_entry_create( - register struct vm_map_header *map_header) + struct vm_map_header *map_header, boolean_t __unused map_locked) { - register zone_t zone; - register vm_map_entry_t entry; + zone_t zone; + vm_map_entry_t entry; - if (map_header->entries_pageable) - zone = vm_map_entry_zone; - else - zone = vm_map_kentry_zone; + zone = vm_map_entry_zone; + + assert(map_header->entries_pageable ? !map_locked : TRUE); + + if (map_header->entries_pageable) { + entry = (vm_map_entry_t) zalloc(zone); + } + else { + entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE); + + if (entry == VM_MAP_ENTRY_NULL) { + zone = vm_map_entry_reserved_zone; + entry = (vm_map_entry_t) zalloc(zone); + OSAddAtomic(1, &reserved_zalloc_count); + } else + OSAddAtomic(1, &nonreserved_zalloc_count); + } - entry = (vm_map_entry_t) zalloc(zone); if (entry == VM_MAP_ENTRY_NULL) panic("vm_map_entry_create"); - vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE); + entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone); + vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE); +#if MAP_ENTRY_CREATION_DEBUG + entry->vme_creation_maphdr = map_header; + fastbacktrace(&entry->vme_creation_bt[0], + (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t))); +#endif return(entry); } @@ -778,7 +832,6 @@ _vm_map_entry_create( * of the stores */ #define vm_map_entry_dispose(map, entry) \ - vm_map_store_update( map, entry, VM_MAP_ENTRY_DELETE); \ _vm_map_entry_dispose(&(map)->hdr, (entry)) #define vm_map_copy_entry_dispose(map, entry) \ @@ -791,10 +844,17 @@ _vm_map_entry_dispose( { register zone_t zone; - if (map_header->entries_pageable) + if (map_header->entries_pageable || !(entry->from_reserved_zone)) zone = vm_map_entry_zone; else - zone = vm_map_kentry_zone; + zone = vm_map_entry_reserved_zone; + + if (!map_header->entries_pageable) { + if (zone == vm_map_entry_zone) + OSAddAtomic(-1, &nonreserved_zalloc_count); + else + OSAddAtomic(-1, &reserved_zalloc_count); + } zfree(zone, entry); } @@ -908,8 +968,9 @@ vm_map_destroy( flags, VM_MAP_NULL); #if CONFIG_FREEZE - if (map->default_freezer_toc){ - default_freezer_mapping_free( &(map->default_freezer_toc), TRUE); + if (map->default_freezer_handle) { + default_freezer_handle_deallocate(map->default_freezer_handle); + map->default_freezer_handle = NULL; } #endif vm_map_unlock(map); @@ -1157,10 +1218,10 @@ vm_map_find_space( if (flags & VM_FLAGS_GUARD_AFTER) { /* account for the back guard page in the size */ - size += PAGE_SIZE_64; + size += VM_MAP_PAGE_SIZE(map); } - new_entry = vm_map_entry_create(map); + new_entry = vm_map_entry_create(map, FALSE); /* * Look for the first possible address; if there's already @@ -1195,7 +1256,7 @@ vm_map_find_space( if (flags & VM_FLAGS_GUARD_BEFORE) { /* reserve space for the front guard page */ - start += PAGE_SIZE_64; + start += VM_MAP_PAGE_SIZE(map); } end = ((start + mask) & ~mask); @@ -1249,14 +1310,19 @@ vm_map_find_space( if (flags & VM_FLAGS_GUARD_BEFORE) { /* go back for the front guard page */ - start -= PAGE_SIZE_64; + start -= VM_MAP_PAGE_SIZE(map); } *address = start; + assert(start < end); new_entry->vme_start = start; new_entry->vme_end = end; assert(page_aligned(new_entry->vme_start)); assert(page_aligned(new_entry->vme_end)); + assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start, + VM_MAP_PAGE_MASK(map))); + assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end, + VM_MAP_PAGE_MASK(map))); new_entry->is_shared = FALSE; new_entry->is_sub_map = FALSE; @@ -1277,7 +1343,14 @@ vm_map_find_space( new_entry->needs_wakeup = FALSE; new_entry->no_cache = FALSE; new_entry->permanent = FALSE; - new_entry->superpage_size = 0; + new_entry->superpage_size = FALSE; + if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) { + new_entry->map_aligned = TRUE; + } else { + new_entry->map_aligned = FALSE; + } + + new_entry->used_for_jit = 0; new_entry->alias = 0; new_entry->zero_wired_pages = FALSE; @@ -1358,7 +1431,7 @@ vm_map_pmap_enter( } type_of_fault = DBG_CACHE_HIT_FAULT; kr = vm_fault_enter(m, map->pmap, addr, protection, protection, - VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, + VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL, &type_of_fault); vm_object_unlock(object); @@ -1403,6 +1476,59 @@ boolean_t vm_map_pmap_is_empty( #endif /* MACHINE_PMAP_IS_EMPTY */ } +#define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000 +kern_return_t +vm_map_random_address_for_size( + vm_map_t map, + vm_map_offset_t *address, + vm_map_size_t size) +{ + kern_return_t kr = KERN_SUCCESS; + int tries = 0; + vm_map_offset_t random_addr = 0; + vm_map_offset_t hole_end; + + vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL; + vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL; + vm_map_size_t vm_hole_size = 0; + vm_map_size_t addr_space_size; + + addr_space_size = vm_map_max(map) - vm_map_min(map); + + assert(page_aligned(size)); + + while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) { + random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT; + random_addr = vm_map_trunc_page( + vm_map_min(map) +(random_addr % addr_space_size), + VM_MAP_PAGE_MASK(map)); + + if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) { + if (prev_entry == vm_map_to_entry(map)) { + next_entry = vm_map_first_entry(map); + } else { + next_entry = prev_entry->vme_next; + } + if (next_entry == vm_map_to_entry(map)) { + hole_end = vm_map_max(map); + } else { + hole_end = next_entry->vme_start; + } + vm_hole_size = hole_end - random_addr; + if (vm_hole_size >= size) { + *address = random_addr; + break; + } + } + tries++; + } + + if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) { + kr = KERN_NO_SPACE; + } + return kr; +} + /* * Routine: vm_map_enter * @@ -1447,10 +1573,12 @@ vm_map_enter( boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0); boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0); boolean_t permanent = ((flags & VM_FLAGS_PERMANENT) != 0); + boolean_t entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0); unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT); char alias; vm_map_offset_t effective_min_offset, effective_max_offset; kern_return_t kr; + boolean_t clear_map_aligned = FALSE; if (superpage_size) { switch (superpage_size) { @@ -1478,14 +1606,6 @@ vm_map_enter( } -#if CONFIG_EMBEDDED - if (cur_protection & VM_PROT_WRITE){ - if ((cur_protection & VM_PROT_EXECUTE) && !(flags & VM_FLAGS_MAP_JIT)){ - printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__); - cur_protection &= ~VM_PROT_EXECUTE; - } - } -#endif /* CONFIG_EMBEDDED */ if (is_submap) { if (purgable) { @@ -1538,6 +1658,18 @@ vm_map_enter( assert(page_aligned(*address)); assert(page_aligned(size)); + if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) { + /* + * In most cases, the caller rounds the size up to the + * map's page size. + * If we get a size that is explicitly not map-aligned here, + * we'll have to respect the caller's wish and mark the + * mapping as "not map-aligned" to avoid tripping the + * map alignment checks later. + */ + clear_map_aligned = TRUE; + } + /* * Only zero-fill objects are allowed to be purgable. * LP64todo - limit purgable objects to 32-bits for now @@ -1564,6 +1696,7 @@ vm_map_enter( *address, *address + size, map->hdr.entries_pageable); + vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map)); } StartAgain: ; @@ -1574,11 +1707,22 @@ StartAgain: ; vm_map_lock(map); map_locked = TRUE; - if ((flags & VM_FLAGS_MAP_JIT) && (map->jit_entry_exists)){ - result = KERN_INVALID_ARGUMENT; - goto BailOut; + if (entry_for_jit) { + if (map->jit_entry_exists) { + result = KERN_INVALID_ARGUMENT; + goto BailOut; + } + /* + * Get a random start address. + */ + result = vm_map_random_address_for_size(map, address, size); + if (result != KERN_SUCCESS) { + goto BailOut; + } + start = *address; } + /* * Calculate the first possible address. */ @@ -1612,6 +1756,8 @@ StartAgain: ; } else { if (start < (entry->vme_next)->vme_start ) { start = entry->vme_end; + start = vm_map_round_page(start, + VM_MAP_PAGE_MASK(map)); } else { /* * Need to do a lookup. @@ -1623,8 +1769,12 @@ StartAgain: ; if (entry == NULL) { vm_map_entry_t tmp_entry; - if (vm_map_lookup_entry(map, start, &tmp_entry)) + if (vm_map_lookup_entry(map, start, &tmp_entry)) { + assert(!entry_for_jit); start = tmp_entry->vme_end; + start = vm_map_round_page(start, + VM_MAP_PAGE_MASK(map)); + } entry = tmp_entry; } } @@ -1645,9 +1795,13 @@ StartAgain: ; */ end = ((start + mask) & ~mask); + end = vm_map_round_page(end, + VM_MAP_PAGE_MASK(map)); if (end < start) RETURN(KERN_NO_SPACE); start = end; + assert(VM_MAP_PAGE_ALIGNED(start, + VM_MAP_PAGE_MASK(map))); end += size; if ((end > effective_max_offset) || (end < start)) { @@ -1687,8 +1841,12 @@ StartAgain: ; entry = next; start = entry->vme_end; + start = vm_map_round_page(start, + VM_MAP_PAGE_MASK(map)); } *address = start; + assert(VM_MAP_PAGE_ALIGNED(*address, + VM_MAP_PAGE_MASK(map))); } else { /* * Verify that: @@ -1830,11 +1988,13 @@ StartAgain: ; * semantics. */ - if (purgable) { + if (purgable || entry_for_jit) { if (object == VM_OBJECT_NULL) { object = vm_object_allocate(size); object->copy_strategy = MEMORY_OBJECT_COPY_NONE; - object->purgable = VM_PURGABLE_NONVOLATILE; + if (purgable) { + object->purgable = VM_PURGABLE_NONVOLATILE; + } offset = (vm_object_offset_t)0; } } else if ((is_submap == FALSE) && @@ -1850,6 +2010,11 @@ StartAgain: ; (entry->behavior == VM_BEHAVIOR_DEFAULT) && (entry->in_transition == 0) && (entry->no_cache == no_cache) && + /* + * No coalescing if not map-aligned, to avoid propagating + * that condition any further than needed: + */ + (!entry->map_aligned || !clear_map_aligned) && ((entry->vme_end - entry->vme_start) + size <= (alias == VM_MEMORY_REALLOC ? ANON_CHUNK_SIZE : @@ -1868,6 +2033,9 @@ StartAgain: ; * new range. */ map->size += (end - entry->vme_end); + assert(entry->vme_start < end); + assert(VM_MAP_PAGE_ALIGNED(end, + VM_MAP_PAGE_MASK(map))); entry->vme_end = end; vm_map_store_update_first_free(map, map->first_free); RETURN(KERN_SUCCESS); @@ -1908,11 +2076,13 @@ StartAgain: ; FALSE, FALSE, cur_protection, max_protection, VM_BEHAVIOR_DEFAULT, - (flags & VM_FLAGS_MAP_JIT)? VM_INHERIT_NONE: inheritance, + (entry_for_jit)? VM_INHERIT_NONE: inheritance, 0, no_cache, - permanent, superpage_size); + permanent, + superpage_size, + clear_map_aligned); new_entry->alias = alias; - if (flags & VM_FLAGS_MAP_JIT){ + if (entry_for_jit){ if (!(map->jit_entry_exists)){ new_entry->used_for_jit = TRUE; map->jit_entry_exists = TRUE; @@ -1930,8 +2100,10 @@ StartAgain: ; use_pmap = (alias == VM_MEMORY_SHARED_PMAP); #ifndef NO_NESTED_PMAP if (use_pmap && submap->pmap == NULL) { + ledger_t ledger = map->pmap->ledger; /* we need a sub pmap to nest... */ - submap->pmap = pmap_create(0, submap_is_64bit); + submap->pmap = pmap_create(ledger, 0, + submap_is_64bit); if (submap->pmap == NULL) { /* let's proceed without nesting... */ } @@ -2005,9 +2177,9 @@ StartAgain: ; */ if ((map->wiring_required)||(superpage_size)) { pmap_empty = FALSE; /* pmap won't be empty */ - result = vm_map_wire(map, start, end, + kr = vm_map_wire(map, start, end, new_entry->protection, TRUE); - RETURN(result); + RETURN(kr); } if ((object != VM_OBJECT_NULL) && @@ -2086,6 +2258,8 @@ BailOut: ; *address, *address + size, map->hdr.entries_pageable); + vm_map_set_page_shift(zap_new_map, + VM_MAP_PAGE_SHIFT(map)); if (!map_locked) { vm_map_lock(map); map_locked = TRUE; @@ -2192,6 +2366,7 @@ vm_map_enter_mem_object( vm_object_size_t size; kern_return_t result; boolean_t mask_cur_protection, mask_max_protection; + vm_map_offset_t offset_in_mapping; mask_cur_protection = cur_protection & VM_PROT_IS_MASK; mask_max_protection = max_protection & VM_PROT_IS_MASK; @@ -2208,9 +2383,11 @@ vm_map_enter_mem_object( initial_size == 0) return KERN_INVALID_ARGUMENT; - map_addr = vm_map_trunc_page(*address); - map_size = vm_map_round_page(initial_size); - size = vm_object_round_page(initial_size); + map_addr = vm_map_trunc_page(*address, + VM_MAP_PAGE_MASK(target_map)); + map_size = vm_map_round_page(initial_size, + VM_MAP_PAGE_MASK(target_map)); + size = vm_object_round_page(initial_size); /* * Find the vm object (if any) corresponding to this port. @@ -2223,6 +2400,11 @@ vm_map_enter_mem_object( vm_named_entry_t named_entry; named_entry = (vm_named_entry_t) port->ip_kobject; + + if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { + offset += named_entry->data_offset; + } + /* a few checks to make sure user is obeying rules */ if (size == 0) { if (offset >= named_entry->size) @@ -2241,17 +2423,53 @@ vm_map_enter_mem_object( if ((named_entry->protection & cur_protection) != cur_protection) return KERN_INVALID_RIGHT; + if (offset + size < offset) { + /* overflow */ + return KERN_INVALID_ARGUMENT; + } if (named_entry->size < (offset + size)) return KERN_INVALID_ARGUMENT; + if (named_entry->is_copy) { + /* for a vm_map_copy, we can only map it whole */ + if ((size != named_entry->size) && + (vm_map_round_page(size, + VM_MAP_PAGE_MASK(target_map)) == + named_entry->size)) { + /* XXX FBDP use the rounded size... */ + size = vm_map_round_page( + size, + VM_MAP_PAGE_MASK(target_map)); + } + + if (offset != 0 || + size != named_entry->size) { + return KERN_INVALID_ARGUMENT; + } + } + /* the callers parameter offset is defined to be the */ /* offset from beginning of named entry offset in object */ offset = offset + named_entry->offset; + if (! VM_MAP_PAGE_ALIGNED(size, + VM_MAP_PAGE_MASK(target_map))) { + /* + * Let's not map more than requested; + * vm_map_enter() will handle this "not map-aligned" + * case. + */ + map_size = size; + } + named_entry_lock(named_entry); if (named_entry->is_sub_map) { vm_map_t submap; + if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { + panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap."); + } + submap = named_entry->backing.map; vm_map_lock(submap); vm_map_reference(submap); @@ -2278,16 +2496,20 @@ vm_map_enter_mem_object( * once it's been set and if we race, we'll * just end up setting it twice, which is OK. */ - if (submap->mapped == FALSE) { + if (submap->mapped_in_other_pmaps == FALSE && + vm_map_pmap(submap) != PMAP_NULL && + vm_map_pmap(submap) != + vm_map_pmap(target_map)) { /* - * This submap has never been mapped. - * Set its "mapped" flag now that it - * has been mapped. - * This happens only for the first ever - * mapping of a "submap". + * This submap is being mapped in a map + * that uses a different pmap. + * Set its "mapped_in_other_pmaps" flag + * to indicate that we now need to + * remove mappings from all pmaps rather + * than just the submap's pmap. */ vm_map_lock(submap); - submap->mapped = TRUE; + submap->mapped_in_other_pmaps = TRUE; vm_map_unlock(submap); } *address = map_addr; @@ -2302,6 +2524,10 @@ vm_map_enter_mem_object( protections = named_entry->protection & VM_PROT_ALL; access = GET_MAP_MEM(named_entry->protection); + if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { + panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap."); + } + object = vm_object_enter(named_entry->backing.pager, named_entry->size, named_entry->internal, @@ -2327,6 +2553,8 @@ vm_map_enter_mem_object( wimg_mode = VM_WIMG_IO; } else if (access == MAP_MEM_COPYBACK) { wimg_mode = VM_WIMG_USE_DEFAULT; + } else if (access == MAP_MEM_INNERWBACK) { + wimg_mode = VM_WIMG_INNERWBACK; } else if (access == MAP_MEM_WTHRU) { wimg_mode = VM_WIMG_WTHRU; } else if (access == MAP_MEM_WCOMB) { @@ -2352,6 +2580,147 @@ vm_map_enter_mem_object( if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; vm_object_unlock(object); + + } else if (named_entry->is_copy) { + kern_return_t kr; + vm_map_copy_t copy_map; + vm_map_entry_t copy_entry; + vm_map_offset_t copy_addr; + + if (flags & ~(VM_FLAGS_FIXED | + VM_FLAGS_ANYWHERE | + VM_FLAGS_OVERWRITE | + VM_FLAGS_RETURN_DATA_ADDR)) { + named_entry_unlock(named_entry); + return KERN_INVALID_ARGUMENT; + } + + if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { + offset_in_mapping = offset - vm_object_trunc_page(offset); + offset = vm_object_trunc_page(offset); + map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset; + } + + copy_map = named_entry->backing.copy; + assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST); + if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) { + /* unsupported type; should not happen */ + printf("vm_map_enter_mem_object: " + "memory_entry->backing.copy " + "unsupported type 0x%x\n", + copy_map->type); + named_entry_unlock(named_entry); + return KERN_INVALID_ARGUMENT; + } + + /* reserve a contiguous range */ + kr = vm_map_enter(target_map, + &map_addr, + map_size, + mask, + flags & (VM_FLAGS_ANYWHERE | + VM_FLAGS_OVERWRITE | + VM_FLAGS_RETURN_DATA_ADDR), + VM_OBJECT_NULL, + 0, + FALSE, /* copy */ + cur_protection, + max_protection, + inheritance); + if (kr != KERN_SUCCESS) { + named_entry_unlock(named_entry); + return kr; + } + + copy_addr = map_addr; + + for (copy_entry = vm_map_copy_first_entry(copy_map); + copy_entry != vm_map_copy_to_entry(copy_map); + copy_entry = copy_entry->vme_next) { + int remap_flags = 0; + vm_map_t copy_submap; + vm_object_t copy_object; + vm_map_size_t copy_size; + vm_object_offset_t copy_offset; + + copy_offset = copy_entry->offset; + copy_size = (copy_entry->vme_end - + copy_entry->vme_start); + + /* sanity check */ + if (copy_addr + copy_size > + map_addr + map_size) { + /* over-mapping too much !? */ + kr = KERN_INVALID_ARGUMENT; + /* abort */ + break; + } + + /* take a reference on the object */ + if (copy_entry->is_sub_map) { + remap_flags |= VM_FLAGS_SUBMAP; + copy_submap = + copy_entry->object.sub_map; + vm_map_lock(copy_submap); + vm_map_reference(copy_submap); + vm_map_unlock(copy_submap); + copy_object = (vm_object_t) copy_submap; + } else { + copy_object = + copy_entry->object.vm_object; + vm_object_reference(copy_object); + } + + /* over-map the object into destination */ + remap_flags |= flags; + remap_flags |= VM_FLAGS_FIXED; + remap_flags |= VM_FLAGS_OVERWRITE; + remap_flags &= ~VM_FLAGS_ANYWHERE; + kr = vm_map_enter(target_map, + ©_addr, + copy_size, + (vm_map_offset_t) 0, + remap_flags, + copy_object, + copy_offset, + copy, + cur_protection, + max_protection, + inheritance); + if (kr != KERN_SUCCESS) { + if (copy_entry->is_sub_map) { + vm_map_deallocate(copy_submap); + } else { + vm_object_deallocate(copy_object); + } + /* abort */ + break; + } + + /* next mapping */ + copy_addr += copy_size; + } + + if (kr == KERN_SUCCESS) { + if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { + *address = map_addr + offset_in_mapping; + } else { + *address = map_addr; + } + } + named_entry_unlock(named_entry); + + if (kr != KERN_SUCCESS) { + if (! (flags & VM_FLAGS_OVERWRITE)) { + /* deallocate the contiguous range */ + (void) vm_deallocate(target_map, + map_addr, + map_size); + } + } + + return kr; + } else { /* This is the case where we are going to map */ /* an already mapped object. If the object is */ @@ -2359,6 +2728,12 @@ vm_map_enter_mem_object( /* object cannot be mapped until it is ready */ /* we can therefore avoid the ready check */ /* in this case. */ + if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { + offset_in_mapping = offset - vm_object_trunc_page(offset); + offset = vm_object_trunc_page(offset); + map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset; + } + object = named_entry->backing.object; assert(object != VM_OBJECT_NULL); named_entry_unlock(named_entry); @@ -2373,7 +2748,10 @@ vm_map_enter_mem_object( * this case, the port isn't really a port at all, but * instead is just a raw memory object. */ - + if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { + panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object."); + } + object = vm_object_enter((memory_object_t)port, size, FALSE, FALSE, FALSE); if (object == VM_OBJECT_NULL) @@ -2505,7 +2883,12 @@ vm_map_enter_mem_object( cur_protection, max_protection, inheritance); if (result != KERN_SUCCESS) vm_object_deallocate(object); - *address = map_addr; + + if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { + *address = map_addr + offset_in_mapping; + } else { + *address = map_addr; + } return result; } @@ -2545,8 +2928,10 @@ vm_map_enter_mem_object_control( initial_size == 0) return KERN_INVALID_ARGUMENT; - map_addr = vm_map_trunc_page(*address); - map_size = vm_map_round_page(initial_size); + map_addr = vm_map_trunc_page(*address, + VM_MAP_PAGE_MASK(target_map)); + map_size = vm_map_round_page(initial_size, + VM_MAP_PAGE_MASK(target_map)); size = vm_object_round_page(initial_size); object = memory_object_control_to_vm_object(control); @@ -2688,14 +3073,11 @@ vm_map_enter_cpm( kern_return_t kr; vm_map_offset_t va, start, end, offset; #if MACH_ASSERT - vm_map_offset_t prev_addr; + vm_map_offset_t prev_addr = 0; #endif /* MACH_ASSERT */ boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0); - if (!vm_allocate_cpm_enabled) - return KERN_FAILURE; - if (size == 0) { *addr = 0; return KERN_SUCCESS; @@ -2703,8 +3085,10 @@ vm_map_enter_cpm( if (anywhere) *addr = vm_map_min(map); else - *addr = vm_map_trunc_page(*addr); - size = vm_map_round_page(size); + *addr = vm_map_trunc_page(*addr, + VM_MAP_PAGE_MASK(map)); + size = vm_map_round_page(size, + VM_MAP_PAGE_MASK(map)); /* * LP64todo - cpm_allocate should probably allow @@ -2720,7 +3104,7 @@ vm_map_enter_cpm( cpm_obj = vm_object_allocate((vm_object_size_t)size); assert(cpm_obj != VM_OBJECT_NULL); assert(cpm_obj->internal); - assert(cpm_obj->size == (vm_object_size_t)size); + assert(cpm_obj->vo_size == (vm_object_size_t)size); assert(cpm_obj->can_persist == FALSE); assert(cpm_obj->pager_created == FALSE); assert(cpm_obj->pageout == FALSE); @@ -2830,7 +3214,7 @@ vm_map_enter_cpm( type_of_fault = DBG_ZERO_FILL_FAULT; vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE, - VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, + VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL, &type_of_fault); vm_object_unlock(cpm_obj); @@ -2845,8 +3229,8 @@ vm_map_enter_cpm( m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset); vm_object_unlock(cpm_obj); if (m == VM_PAGE_NULL) - panic("vm_allocate_cpm: obj 0x%x off 0x%x no page", - cpm_obj, offset); + panic("vm_allocate_cpm: obj %p off 0x%llx no page", + cpm_obj, (uint64_t)offset); assert(m->tabled); assert(!m->busy); assert(!m->wanted); @@ -2855,15 +3239,15 @@ vm_map_enter_cpm( assert(!m->absent); assert(!m->error); assert(!m->cleaning); + assert(!m->laundry); assert(!m->precious); assert(!m->clustered); if (offset != 0) { if (m->phys_page != prev_addr + 1) { - printf("start 0x%x end 0x%x va 0x%x\n", - start, end, va); - printf("obj 0x%x off 0x%x\n", cpm_obj, offset); - printf("m 0x%x prev_address 0x%x\n", m, - prev_addr); + printf("start 0x%llx end 0x%llx va 0x%llx\n", + (uint64_t)start, (uint64_t)end, (uint64_t)va); + printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset); + printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr); panic("vm_allocate_cpm: pages not contig!"); } } @@ -2952,7 +3336,7 @@ vm_map_clip_unnest( pmap_unnest(map->pmap, entry->vme_start, entry->vme_end - entry->vme_start); - if ((map->mapped) && (map->ref_count)) { + if ((map->mapped_in_other_pmaps) && (map->ref_count)) { /* clean up parent map/maps */ vm_map_submap_pmap_clean( map, entry->vme_start, @@ -2961,6 +3345,9 @@ vm_map_clip_unnest( entry->offset); } entry->use_pmap = FALSE; + if (entry->alias == VM_MEMORY_SHARED_PMAP) { + entry->alias = VM_MEMORY_UNSHARED_PMAP; + } } #endif /* NO_NESTED_PMAP */ @@ -2971,7 +3358,7 @@ vm_map_clip_unnest( * the specified address; if necessary, * it splits the entry into two. */ -static void +void vm_map_clip_start( vm_map_t map, vm_map_entry_t entry, @@ -3034,11 +3421,17 @@ _vm_map_clip_start( * address. */ - new_entry = _vm_map_entry_create(map_header); + new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable); vm_map_entry_copy_full(new_entry, entry); + assert(VM_MAP_PAGE_ALIGNED(start, + VM_MAP_HDR_PAGE_MASK(map_header))); new_entry->vme_end = start; + assert(new_entry->vme_start < new_entry->vme_end); entry->offset += (start - entry->vme_start); + assert(start < entry->vme_end); + assert(VM_MAP_PAGE_ALIGNED(start, + VM_MAP_HDR_PAGE_MASK(map_header))); entry->vme_start = start; _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry); @@ -3057,7 +3450,7 @@ _vm_map_clip_start( * the specified address; if necessary, * it splits the entry into two. */ -static void +void vm_map_clip_end( vm_map_t map, vm_map_entry_t entry, @@ -3125,11 +3518,15 @@ _vm_map_clip_end( * AFTER the specified entry */ - new_entry = _vm_map_entry_create(map_header); + new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable); vm_map_entry_copy_full(new_entry, entry); + assert(entry->vme_start < end); + assert(VM_MAP_PAGE_ALIGNED(end, + VM_MAP_HDR_PAGE_MASK(map_header))); new_entry->vme_start = entry->vme_end = end; new_entry->offset += (end - entry->vme_start); + assert(new_entry->vme_start < new_entry->vme_end); _vm_map_store_entry_link(map_header, entry, new_entry); @@ -3280,13 +3677,27 @@ vm_map_submap( entry->is_sub_map = TRUE; entry->object.sub_map = submap; vm_map_reference(submap); - submap->mapped = TRUE; + if (submap->mapped_in_other_pmaps == FALSE && + vm_map_pmap(submap) != PMAP_NULL && + vm_map_pmap(submap) != vm_map_pmap(map)) { + /* + * This submap is being mapped in a map + * that uses a different pmap. + * Set its "mapped_in_other_pmaps" flag + * to indicate that we now need to + * remove mappings from all pmaps rather + * than just the submap's pmap. + */ + submap->mapped_in_other_pmaps = TRUE; + } #ifndef NO_NESTED_PMAP if (use_pmap) { /* nest if platform code will allow */ if(submap->pmap == NULL) { - submap->pmap = pmap_create((vm_map_size_t) 0, FALSE); + ledger_t ledger = map->pmap->ledger; + submap->pmap = pmap_create(ledger, + (vm_map_size_t) 0, FALSE); if(submap->pmap == PMAP_NULL) { vm_map_unlock(map); return(KERN_NO_SPACE); @@ -3398,14 +3809,6 @@ vm_map_protect( } } -#if CONFIG_EMBEDDED - if (new_prot & VM_PROT_WRITE) { - if ((new_prot & VM_PROT_EXECUTE) && !(current->used_for_jit)) { - printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__); - new_prot &= ~VM_PROT_EXECUTE; - } - } -#endif prev = current->vme_end; current = current->vme_next; @@ -3725,6 +4128,8 @@ vm_map_wire_nested( VM_MAP_RANGE_CHECK(map, start, end); assert(page_aligned(start)); assert(page_aligned(end)); + assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map))); + assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map))); if (start == end) { /* We wired what the caller asked for, zero pages */ vm_map_unlock(map); @@ -3901,11 +4306,11 @@ vm_map_wire_nested( s, user_wire); return(KERN_FAILURE); } + vm_object_unlock(object); if(real_map != lookup_map) vm_map_unlock(real_map); vm_map_unlock_read(lookup_map); vm_map_lock(map); - vm_object_unlock(object); /* we unlocked, so must re-lookup */ if (!vm_map_lookup_entry(map, @@ -4216,6 +4621,8 @@ vm_map_unwire_nested( VM_MAP_RANGE_CHECK(map, start, end); assert(page_aligned(start)); assert(page_aligned(end)); + assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map))); + assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map))); if (start == end) { /* We unwired what the caller asked for: zero pages */ @@ -4554,6 +4961,10 @@ vm_map_entry_delete( e = entry->vme_end; assert(page_aligned(s)); assert(page_aligned(e)); + if (entry->map_aligned == TRUE) { + assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map))); + assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map))); + } assert(entry->wired_count == 0); assert(entry->user_wired_count == 0); assert(!entry->permanent); @@ -4620,7 +5031,7 @@ vm_map_submap_pmap_clean( entry->offset); } else { - if((map->mapped) && (map->ref_count) + if((map->mapped_in_other_pmaps) && (map->ref_count) && (entry->object.vm_object != NULL)) { vm_object_pmap_protect( entry->object.vm_object, @@ -4653,7 +5064,7 @@ vm_map_submap_pmap_clean( entry->object.sub_map, entry->offset); } else { - if((map->mapped) && (map->ref_count) + if((map->mapped_in_other_pmaps) && (map->ref_count) && (entry->object.vm_object != NULL)) { vm_object_pmap_protect( entry->object.vm_object, @@ -4960,6 +5371,15 @@ vm_map_delete( pmap, pmap_addr); } else { + if (tmp_entry.object.vm_object == kernel_object) { + pmap_protect_options( + map->pmap, + tmp_entry.vme_start, + tmp_entry.vme_end, + VM_PROT_NONE, + PMAP_OPTIONS_REMOVE, + NULL); + } vm_fault_unwire(map, &tmp_entry, tmp_entry.object.vm_object == kernel_object, map->pmap, tmp_entry.vme_start); @@ -5030,7 +5450,7 @@ vm_map_delete( (addr64_t)entry->vme_start, entry->vme_end - entry->vme_start); #endif /* NO_NESTED_PMAP */ - if ((map->mapped) && (map->ref_count)) { + if ((map->mapped_in_other_pmaps) && (map->ref_count)) { /* clean up parent map/maps */ vm_map_submap_pmap_clean( map, entry->vme_start, @@ -5044,19 +5464,35 @@ vm_map_delete( entry->object.sub_map, entry->offset); } - } else if (entry->object.vm_object != kernel_object) { + } else if (entry->object.vm_object != kernel_object && + entry->object.vm_object != compressor_object) { object = entry->object.vm_object; - if((map->mapped) && (map->ref_count)) { - vm_object_pmap_protect( + if ((map->mapped_in_other_pmaps) && (map->ref_count)) { + vm_object_pmap_protect_options( object, entry->offset, entry->vme_end - entry->vme_start, PMAP_NULL, entry->vme_start, - VM_PROT_NONE); - } else { - pmap_remove(map->pmap, - (addr64_t)entry->vme_start, - (addr64_t)entry->vme_end); + VM_PROT_NONE, + PMAP_OPTIONS_REMOVE); + } else if ((entry->object.vm_object != + VM_OBJECT_NULL) || + (map->pmap == kernel_pmap)) { + /* Remove translations associated + * with this range unless the entry + * does not have an object, or + * it's the kernel map or a descendant + * since the platform could potentially + * create "backdoor" mappings invisible + * to the VM. It is expected that + * objectless, non-kernel ranges + * do not have such VM invisible + * translations. + */ + pmap_remove_options(map->pmap, + (addr64_t)entry->vme_start, + (addr64_t)entry->vme_end, + PMAP_OPTIONS_REMOVE); } } @@ -5156,6 +5592,15 @@ vm_map_remove( vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); + /* + * For the zone_map, the kernel controls the allocation/freeing of memory. + * Any free to the zone_map should be within the bounds of the map and + * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a + * free to the zone_map into a no-op, there is a problem and we should + * panic. + */ + if ((map == zone_map) && (start == end)) + panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start); result = vm_map_delete(map, start, end, flags, VM_MAP_NULL); vm_map_unlock(map); @@ -5184,7 +5629,11 @@ vm_map_copy_discard( vm_map_entry_t entry = vm_map_copy_first_entry(copy); vm_map_copy_entry_unlink(copy, entry); - vm_object_deallocate(entry->object.vm_object); + if (entry->is_sub_map) { + vm_map_deallocate(entry->object.sub_map); + } else { + vm_object_deallocate(entry->object.vm_object); + } vm_map_copy_entry_dispose(copy, entry); } break; @@ -5283,7 +5732,8 @@ vm_map_overwrite_submap_recurse( * splitting entries in strange ways. */ - dst_end = vm_map_round_page(dst_addr + dst_size); + dst_end = vm_map_round_page(dst_addr + dst_size, + VM_MAP_PAGE_MASK(dst_map)); vm_map_lock(dst_map); start_pass_1: @@ -5292,7 +5742,10 @@ start_pass_1: return(KERN_INVALID_ADDRESS); } - vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr)); + vm_map_clip_start(dst_map, + tmp_entry, + vm_map_trunc_page(dst_addr, + VM_MAP_PAGE_MASK(dst_map))); assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */ for (entry = tmp_entry;;) { @@ -5488,7 +5941,7 @@ vm_map_copy_overwrite_nested( if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) { return(vm_map_copyout_kernel_buffer( dst_map, &dst_addr, - copy, TRUE)); + copy, TRUE, discard_on_success)); } /* @@ -5511,12 +5964,17 @@ vm_map_copy_overwrite_nested( * splitting entries in strange ways. */ - if (!page_aligned(copy->size) || - !page_aligned (copy->offset) || - !page_aligned (dst_addr)) + if (!VM_MAP_PAGE_ALIGNED(copy->size, + VM_MAP_PAGE_MASK(dst_map)) || + !VM_MAP_PAGE_ALIGNED(copy->offset, + VM_MAP_PAGE_MASK(dst_map)) || + !VM_MAP_PAGE_ALIGNED(dst_addr, + VM_MAP_PAGE_MASK(dst_map)) || + dst_map->hdr.page_shift != copy->cpy_hdr.page_shift) { aligned = FALSE; - dst_end = vm_map_round_page(dst_addr + copy->size); + dst_end = vm_map_round_page(dst_addr + copy->size, + VM_MAP_PAGE_MASK(dst_map)); } else { dst_end = dst_addr + copy->size; } @@ -5537,7 +5995,10 @@ start_pass_1: vm_map_unlock(dst_map); return(KERN_INVALID_ADDRESS); } - vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr)); + vm_map_clip_start(dst_map, + tmp_entry, + vm_map_trunc_page(dst_addr, + VM_MAP_PAGE_MASK(dst_map))); for (entry = tmp_entry;;) { vm_map_entry_t next = entry->vme_next; @@ -5876,6 +6337,12 @@ start_overwrite: copy->type = VM_MAP_COPY_ENTRY_LIST; copy->offset = new_offset; + /* + * XXX FBDP + * this does not seem to deal with + * the VM map store (R&B tree) + */ + total_size -= copy_size; copy_size = 0; /* put back remainder of copy in container */ @@ -5993,8 +6460,13 @@ start_overwrite: * bits of the region in this case ! */ /* ALWAYS UNLOCKS THE dst_map MAP */ - if ((kr = vm_map_copy_overwrite_unaligned( dst_map, - tmp_entry, copy, base_addr)) != KERN_SUCCESS) { + kr = vm_map_copy_overwrite_unaligned( + dst_map, + tmp_entry, + copy, + base_addr, + discard_on_success); + if (kr != KERN_SUCCESS) { if(next_copy != NULL) { copy->cpy_hdr.nentries += remaining_entries; @@ -6034,7 +6506,10 @@ start_overwrite: break; } } - vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr)); + vm_map_clip_start(dst_map, + tmp_entry, + vm_map_trunc_page(base_addr, + VM_MAP_PAGE_MASK(dst_map))); entry = tmp_entry; } /* while */ @@ -6091,7 +6566,8 @@ vm_map_copy_overwrite( goto blunt_copy; } - if ((dst_addr & PAGE_MASK) != (copy->offset & PAGE_MASK)) { + if ((dst_addr & VM_MAP_PAGE_MASK(dst_map)) != + (copy->offset & VM_MAP_PAGE_MASK(dst_map))) { /* * Incompatible mis-alignment of source and destination... */ @@ -6105,7 +6581,8 @@ vm_map_copy_overwrite( */ if (!page_aligned(dst_addr)) { head_addr = dst_addr; - head_size = PAGE_SIZE - (copy->offset & PAGE_MASK); + head_size = (VM_MAP_PAGE_SIZE(dst_map) - + (copy->offset & VM_MAP_PAGE_MASK(dst_map))); } if (!page_aligned(copy->offset + copy->size)) { /* @@ -6113,7 +6590,8 @@ vm_map_copy_overwrite( * Do an aligned copy up to the last page and * then an unaligned copy for the remaining bytes. */ - tail_size = (copy->offset + copy->size) & PAGE_MASK; + tail_size = ((copy->offset + copy->size) & + VM_MAP_PAGE_MASK(dst_map)); tail_addr = dst_addr + copy->size - tail_size; } @@ -6316,9 +6794,11 @@ vm_map_copy_overwrite_unaligned( vm_map_t dst_map, vm_map_entry_t entry, vm_map_copy_t copy, - vm_map_offset_t start) + vm_map_offset_t start, + boolean_t discard_on_success) { - vm_map_entry_t copy_entry = vm_map_copy_first_entry(copy); + vm_map_entry_t copy_entry; + vm_map_entry_t copy_entry_next; vm_map_version_t version; vm_object_t dst_object; vm_object_offset_t dst_offset; @@ -6331,6 +6811,9 @@ vm_map_copy_overwrite_unaligned( amount_left; kern_return_t kr = KERN_SUCCESS; + + copy_entry = vm_map_copy_first_entry(copy); + vm_map_lock_write_to_read(dst_map); src_offset = copy->offset - vm_object_trunc_page(copy->offset); @@ -6448,17 +6931,26 @@ vm_map_copy_overwrite_unaligned( /* * all done with this copy entry, dispose. */ - vm_map_copy_entry_unlink(copy, copy_entry); - vm_object_deallocate(copy_entry->object.vm_object); - vm_map_copy_entry_dispose(copy, copy_entry); + copy_entry_next = copy_entry->vme_next; + + if (discard_on_success) { + vm_map_copy_entry_unlink(copy, copy_entry); + assert(!copy_entry->is_sub_map); + vm_object_deallocate( + copy_entry->object.vm_object); + vm_map_copy_entry_dispose(copy, copy_entry); + } - if ((copy_entry = vm_map_copy_first_entry(copy)) - == vm_map_copy_to_entry(copy) && amount_left) { + if (copy_entry_next == vm_map_copy_to_entry(copy) && + amount_left) { /* * not finished copying but run out of source */ return KERN_INVALID_ADDRESS; } + + copy_entry = copy_entry_next; + src_offset = 0; } @@ -6520,6 +7012,10 @@ vm_map_copy_overwrite_unaligned( * to the above pass and make sure that no wiring is involved. */ +int vm_map_copy_overwrite_aligned_src_not_internal = 0; +int vm_map_copy_overwrite_aligned_src_not_symmetric = 0; +int vm_map_copy_overwrite_aligned_src_large = 0; + static kern_return_t vm_map_copy_overwrite_aligned( vm_map_t dst_map, @@ -6624,6 +7120,24 @@ vm_map_copy_overwrite_aligned( continue; } +#define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */ +#define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */ + if (copy_entry->object.vm_object != VM_OBJECT_NULL && + copy_entry->object.vm_object->vo_size >= __TRADEOFF1_OBJ_SIZE && + copy_size <= __TRADEOFF1_COPY_SIZE) { + /* + * Virtual vs. Physical copy tradeoff #1. + * + * Copying only a few pages out of a large + * object: do a physical copy instead of + * a virtual copy, to avoid possibly keeping + * the entire large object alive because of + * those few copy-on-write pages. + */ + vm_map_copy_overwrite_aligned_src_large++; + goto slow_copy; + } + if (entry->alias >= VM_MEMORY_MALLOC && entry->alias <= VM_MEMORY_MALLOC_LARGE_REUSED) { vm_object_t new_object, new_shadow; @@ -6637,6 +7151,8 @@ vm_map_copy_overwrite_aligned( vm_object_lock_shared(new_object); } while (new_object != VM_OBJECT_NULL && + !new_object->true_share && + new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC && new_object->internal) { new_shadow = new_object->shadow; if (new_shadow == VM_OBJECT_NULL) { @@ -6657,6 +7173,19 @@ vm_map_copy_overwrite_aligned( * let's go off the optimized * path... */ + vm_map_copy_overwrite_aligned_src_not_internal++; + vm_object_unlock(new_object); + goto slow_copy; + } + if (new_object->true_share || + new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) { + /* + * Same if there's a "true_share" + * object in the shadow chain, or + * an object with a non-default + * (SYMMETRIC) copy strategy. + */ + vm_map_copy_overwrite_aligned_src_not_symmetric++; vm_object_unlock(new_object); goto slow_copy; } @@ -6678,7 +7207,7 @@ vm_map_copy_overwrite_aligned( (addr64_t)entry->vme_start, entry->vme_end - entry->vme_start); #endif /* NO_NESTED_PMAP */ - if(dst_map->mapped) { + if(dst_map->mapped_in_other_pmaps) { /* clean up parent */ /* map/maps */ vm_map_submap_pmap_clean( @@ -6697,19 +7226,22 @@ vm_map_copy_overwrite_aligned( vm_map_deallocate( entry->object.sub_map); } else { - if(dst_map->mapped) { - vm_object_pmap_protect( + if(dst_map->mapped_in_other_pmaps) { + vm_object_pmap_protect_options( entry->object.vm_object, entry->offset, entry->vme_end - entry->vme_start, PMAP_NULL, entry->vme_start, - VM_PROT_NONE); + VM_PROT_NONE, + PMAP_OPTIONS_REMOVE); } else { - pmap_remove(dst_map->pmap, - (addr64_t)(entry->vme_start), - (addr64_t)(entry->vme_end)); + pmap_remove_options( + dst_map->pmap, + (addr64_t)(entry->vme_start), + (addr64_t)(entry->vme_end), + PMAP_OPTIONS_REMOVE); } vm_object_deallocate(old_object); } @@ -6752,6 +7284,14 @@ vm_map_copy_overwrite_aligned( kern_return_t r; slow_copy: + if (entry->needs_copy) { + vm_object_shadow(&entry->object.vm_object, + &entry->offset, + (entry->vme_end - + entry->vme_start)); + entry->needs_copy = FALSE; + } + dst_object = entry->object.vm_object; dst_offset = entry->offset; @@ -6838,7 +7378,8 @@ vm_map_copy_overwrite_aligned( start += copy_size; vm_map_lock(dst_map); - if (version.main_timestamp == dst_map->timestamp) { + if (version.main_timestamp == dst_map->timestamp && + copy_size != 0) { /* We can safely use saved tmp_entry value */ vm_map_clip_end(dst_map, tmp_entry, start); @@ -6903,12 +7444,15 @@ vm_map_copyin_kernel_buffer( return kr; } if (src_destroy) { - (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr), - vm_map_round_page(src_addr + len), - VM_MAP_REMOVE_INTERRUPTIBLE | - VM_MAP_REMOVE_WAIT_FOR_KWIRE | - (src_map == kernel_map) ? - VM_MAP_REMOVE_KUNWIRE : 0); + (void) vm_map_remove( + src_map, + vm_map_trunc_page(src_addr, + VM_MAP_PAGE_MASK(src_map)), + vm_map_round_page(src_addr + len, + VM_MAP_PAGE_MASK(src_map)), + (VM_MAP_REMOVE_INTERRUPTIBLE | + VM_MAP_REMOVE_WAIT_FOR_KWIRE | + (src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : 0)); } *copy_result = copy; return KERN_SUCCESS; @@ -6931,7 +7475,8 @@ vm_map_copyout_kernel_buffer( vm_map_t map, vm_map_address_t *addr, /* IN/OUT */ vm_map_copy_t copy, - boolean_t overwrite) + boolean_t overwrite, + boolean_t consume_on_success) { kern_return_t kr = KERN_SUCCESS; thread_t thread = current_thread(); @@ -6944,7 +7489,8 @@ vm_map_copyout_kernel_buffer( *addr = 0; kr = vm_map_enter(map, addr, - vm_map_round_page(copy->size), + vm_map_round_page(copy->size, + VM_MAP_PAGE_MASK(map)), (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE, VM_OBJECT_NULL, @@ -6998,16 +7544,22 @@ vm_map_copyout_kernel_buffer( /* * Deallocate the space we allocated in the target map. */ - (void) vm_map_remove(map, - vm_map_trunc_page(*addr), - vm_map_round_page(*addr + - vm_map_round_page(copy->size)), - VM_MAP_NO_FLAGS); + (void) vm_map_remove( + map, + vm_map_trunc_page(*addr, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page((*addr + + vm_map_round_page(copy->size, + VM_MAP_PAGE_MASK(map))), + VM_MAP_PAGE_MASK(map)), + VM_MAP_NO_FLAGS); *addr = 0; } } else { /* copy was successful, dicard the copy structure */ - kfree(copy, copy->cpy_kalloc_size); + if (consume_on_success) { + kfree(copy, copy->cpy_kalloc_size); + } } return kr; @@ -7030,6 +7582,49 @@ MACRO_BEGIN \ zfree(vm_map_copy_zone, copy); \ MACRO_END +void +vm_map_copy_remap( + vm_map_t map, + vm_map_entry_t where, + vm_map_copy_t copy, + vm_map_offset_t adjustment, + vm_prot_t cur_prot, + vm_prot_t max_prot, + vm_inherit_t inheritance) +{ + vm_map_entry_t copy_entry, new_entry; + + for (copy_entry = vm_map_copy_first_entry(copy); + copy_entry != vm_map_copy_to_entry(copy); + copy_entry = copy_entry->vme_next) { + /* get a new VM map entry for the map */ + new_entry = vm_map_entry_create(map, + !map->hdr.entries_pageable); + /* copy the "copy entry" to the new entry */ + vm_map_entry_copy(new_entry, copy_entry); + /* adjust "start" and "end" */ + new_entry->vme_start += adjustment; + new_entry->vme_end += adjustment; + /* clear some attributes */ + new_entry->inheritance = inheritance; + new_entry->protection = cur_prot; + new_entry->max_protection = max_prot; + new_entry->behavior = VM_BEHAVIOR_DEFAULT; + /* take an extra reference on the entry's "object" */ + if (new_entry->is_sub_map) { + vm_map_lock(new_entry->object.sub_map); + vm_map_reference(new_entry->object.sub_map); + vm_map_unlock(new_entry->object.sub_map); + } else { + vm_object_reference(new_entry->object.vm_object); + } + /* insert the new entry in the map */ + vm_map_store_entry_link(map, where, new_entry); + /* continue inserting the "copy entries" after the new entry */ + where = new_entry; + } +} + /* * Routine: vm_map_copyout * @@ -7040,18 +7635,35 @@ MACRO_END * If successful, consumes the copy object. * Otherwise, the caller is responsible for it. */ + kern_return_t vm_map_copyout( vm_map_t dst_map, vm_map_address_t *dst_addr, /* OUT */ vm_map_copy_t copy) +{ + return vm_map_copyout_internal(dst_map, dst_addr, copy, + TRUE, /* consume_on_success */ + VM_PROT_DEFAULT, + VM_PROT_ALL, + VM_INHERIT_DEFAULT); +} + +kern_return_t +vm_map_copyout_internal( + vm_map_t dst_map, + vm_map_address_t *dst_addr, /* OUT */ + vm_map_copy_t copy, + boolean_t consume_on_success, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance) { vm_map_size_t size; vm_map_size_t adjustment; vm_map_offset_t start; vm_object_offset_t vm_copy_start; vm_map_entry_t last; - register vm_map_entry_t entry; /* @@ -7074,8 +7686,10 @@ vm_map_copyout( vm_object_offset_t offset; offset = vm_object_trunc_page(copy->offset); - size = vm_map_round_page(copy->size + - (vm_map_size_t)(copy->offset - offset)); + size = vm_map_round_page((copy->size + + (vm_map_size_t)(copy->offset - + offset)), + VM_MAP_PAGE_MASK(dst_map)); *dst_addr = 0; kr = vm_map_enter(dst_map, dst_addr, size, (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE, @@ -7086,7 +7700,8 @@ vm_map_copyout( return(kr); /* Account for non-pagealigned copy object */ *dst_addr += (vm_map_offset_t)(copy->offset - offset); - zfree(vm_map_copy_zone, copy); + if (consume_on_success) + zfree(vm_map_copy_zone, copy); return(KERN_SUCCESS); } @@ -7096,18 +7711,23 @@ vm_map_copyout( */ if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) { - return(vm_map_copyout_kernel_buffer(dst_map, dst_addr, - copy, FALSE)); + return vm_map_copyout_kernel_buffer(dst_map, dst_addr, + copy, FALSE, + consume_on_success); } + /* * Find space for the data */ - vm_copy_start = vm_object_trunc_page(copy->offset); - size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size) + vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset, + VM_MAP_COPY_PAGE_MASK(copy)); + size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size, + VM_MAP_COPY_PAGE_MASK(copy)) - vm_copy_start; + StartAgain: ; vm_map_lock(dst_map); @@ -7118,6 +7738,8 @@ StartAgain: ; assert(first_free_is_valid(dst_map)); start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ? vm_map_min(dst_map) : last->vme_end; + start = vm_map_round_page(start, + VM_MAP_PAGE_MASK(dst_map)); } while (TRUE) { @@ -7144,6 +7766,23 @@ StartAgain: ; last = next; start = last->vme_end; + start = vm_map_round_page(start, + VM_MAP_PAGE_MASK(dst_map)); + } + + adjustment = start - vm_copy_start; + if (! consume_on_success) { + /* + * We're not allowed to consume "copy", so we'll have to + * copy its map entries into the destination map below. + * No need to re-allocate map entries from the correct + * (pageable or not) zone, since we'll get new map entries + * during the transfer. + * We'll also adjust the map entries's "start" and "end" + * during the transfer, to keep "copy"'s entries consistent + * with its "offset". + */ + goto after_adjustments; } /* @@ -7163,9 +7802,7 @@ StartAgain: ; /* * Find the zone that the copies were allocated from */ - old_zone = (copy->cpy_hdr.entries_pageable) - ? vm_map_entry_zone - : vm_map_kentry_zone; + entry = vm_map_copy_first_entry(copy); /* @@ -7179,13 +7816,14 @@ StartAgain: ; * Copy each entry. */ while (entry != vm_map_copy_to_entry(copy)) { - new = vm_map_copy_entry_create(copy); + new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable); vm_map_entry_copy_full(new, entry); new->use_pmap = FALSE; /* clr address space specifics */ vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), new); next = entry->vme_next; + old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone; zfree(old_zone, entry); entry = next; } @@ -7196,13 +7834,29 @@ StartAgain: ; * reset the region attributes. */ - adjustment = start - vm_copy_start; for (entry = vm_map_copy_first_entry(copy); entry != vm_map_copy_to_entry(copy); entry = entry->vme_next) { + if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) { + /* + * We're injecting this copy entry into a map that + * has the standard page alignment, so clear + * "map_aligned" (which might have been inherited + * from the original map entry). + */ + entry->map_aligned = FALSE; + } + entry->vme_start += adjustment; entry->vme_end += adjustment; + if (entry->map_aligned) { + assert(VM_MAP_PAGE_ALIGNED(entry->vme_start, + VM_MAP_PAGE_MASK(dst_map))); + assert(VM_MAP_PAGE_ALIGNED(entry->vme_end, + VM_MAP_PAGE_MASK(dst_map))); + } + entry->inheritance = VM_INHERIT_DEFAULT; entry->protection = VM_PROT_DEFAULT; entry->max_protection = VM_PROT_ALL; @@ -7273,7 +7927,7 @@ StartAgain: ; type_of_fault = DBG_CACHE_HIT_FAULT; vm_fault_enter(m, dst_map->pmap, va, prot, prot, - VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, + VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL, &type_of_fault); vm_object_unlock(object); @@ -7284,6 +7938,8 @@ StartAgain: ; } } +after_adjustments: + /* * Correct the page alignment for the result */ @@ -7294,7 +7950,11 @@ StartAgain: ; * Update the hints and the map size */ - SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy)); + if (consume_on_success) { + SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy)); + } else { + SAVE_HINT_MAP_WRITE(dst_map, last); + } dst_map->size += size; @@ -7302,8 +7962,14 @@ StartAgain: ; * Link in the copy */ - vm_map_copy_insert(dst_map, last, copy); - + if (consume_on_success) { + vm_map_copy_insert(dst_map, last, copy); + } else { + vm_map_copy_remap(dst_map, last, copy, adjustment, + cur_protection, max_protection, + inheritance); + } + vm_map_unlock(dst_map); /* @@ -7423,8 +8089,10 @@ vm_map_copyin_common( /* * Compute (page aligned) start and end of region */ - src_start = vm_map_trunc_page(src_addr); - src_end = vm_map_round_page(src_end); + src_start = vm_map_trunc_page(src_addr, + VM_MAP_PAGE_MASK(src_map)); + src_end = vm_map_round_page(src_end, + VM_MAP_PAGE_MASK(src_map)); XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0); @@ -7441,13 +8109,25 @@ vm_map_copyin_common( copy->type = VM_MAP_COPY_ENTRY_LIST; copy->cpy_hdr.nentries = 0; copy->cpy_hdr.entries_pageable = TRUE; +#if 00 + copy->cpy_hdr.page_shift = src_map->hdr.page_shift; +#else + /* + * The copy entries can be broken down for a variety of reasons, + * so we can't guarantee that they will remain map-aligned... + * Will need to adjust the first copy_entry's "vme_start" and + * the last copy_entry's "vme_end" to be rounded to PAGE_MASK + * rather than the original map's alignment. + */ + copy->cpy_hdr.page_shift = PAGE_SHIFT; +#endif vm_map_store_init( &(copy->cpy_hdr) ); copy->offset = src_addr; copy->size = len; - new_entry = vm_map_copy_entry_create(copy); + new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable); #define RETURN(x) \ MACRO_BEGIN \ @@ -7569,7 +8249,7 @@ vm_map_copyin_common( version.main_timestamp = src_map->timestamp; vm_map_unlock(src_map); - new_entry = vm_map_copy_entry_create(copy); + new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable); vm_map_lock(src_map); if ((version.main_timestamp + 1) != src_map->timestamp) { @@ -7788,8 +8468,12 @@ vm_map_copyin_common( ((src_entry->max_protection & VM_PROT_READ) == 0)) goto VerificationFailed; - if (src_entry->vme_end < new_entry->vme_end) - src_size = (new_entry->vme_end = src_entry->vme_end) - src_start; + if (src_entry->vme_end < new_entry->vme_end) { + assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end, + VM_MAP_COPY_PAGE_MASK(copy))); + new_entry->vme_end = src_entry->vme_end; + src_size = new_entry->vme_end - src_start; + } if ((src_entry->object.vm_object != src_object) || (src_entry->offset != src_offset) ) { @@ -7876,8 +8560,44 @@ vm_map_copyin_common( tmp_entry = src_entry->vme_next; if ((tmp_entry->vme_start != src_start) || - (tmp_entry == vm_map_to_entry(src_map))) + (tmp_entry == vm_map_to_entry(src_map))) { + + if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT && + (vm_map_round_page(src_entry->vme_end, + VM_MAP_PAGE_MASK(src_map)) == + src_end)) { + vm_map_entry_t last_copy_entry; + vm_map_offset_t adjustment; + + /* + * This is the last entry in the range we + * want and it happens to miss a few pages + * because it is not map-aligned (must have + * been imported from a differently-aligned + * map). + * Let's say we're done, but first we have + * to compensate for the alignment adjustment + * we're about to do before returning. + */ + + last_copy_entry = vm_map_copy_last_entry(copy); + assert(last_copy_entry != + vm_map_copy_to_entry(copy)); + adjustment = + (vm_map_round_page((copy->offset + + copy->size), + VM_MAP_PAGE_MASK(src_map)) - + vm_map_round_page((copy->offset + + copy->size), + PAGE_MASK)); + last_copy_entry->vme_end += adjustment; + last_copy_entry->map_aligned = FALSE; + /* ... and we're done */ + break; + } + RETURN(KERN_INVALID_ADDRESS); + } } /* @@ -7885,31 +8605,93 @@ vm_map_copyin_common( * copy was successful. */ if (src_destroy) { - (void) vm_map_delete(src_map, - vm_map_trunc_page(src_addr), - src_end, - (src_map == kernel_map) ? - VM_MAP_REMOVE_KUNWIRE : - VM_MAP_NO_FLAGS, - VM_MAP_NULL); + (void) vm_map_delete( + src_map, + vm_map_trunc_page(src_addr, + VM_MAP_PAGE_MASK(src_map)), + src_end, + ((src_map == kernel_map) ? + VM_MAP_REMOVE_KUNWIRE : + VM_MAP_NO_FLAGS), + VM_MAP_NULL); } else { /* fix up the damage we did in the base map */ - vm_map_simplify_range(src_map, - vm_map_trunc_page(src_addr), - vm_map_round_page(src_end)); + vm_map_simplify_range( + src_map, + vm_map_trunc_page(src_addr, + VM_MAP_PAGE_MASK(src_map)), + vm_map_round_page(src_end, + VM_MAP_PAGE_MASK(src_map))); } vm_map_unlock(src_map); + if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) { + assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK); + + /* adjust alignment of first copy_entry's "vme_start" */ + tmp_entry = vm_map_copy_first_entry(copy); + if (tmp_entry != vm_map_copy_to_entry(copy)) { + vm_map_offset_t adjustment; + adjustment = + (vm_map_trunc_page(copy->offset, + PAGE_MASK) - + vm_map_trunc_page(copy->offset, + VM_MAP_PAGE_MASK(src_map))); + if (adjustment) { + assert(page_aligned(adjustment)); + assert(adjustment < VM_MAP_PAGE_SIZE(src_map)); + tmp_entry->vme_start += adjustment; + tmp_entry->offset += adjustment; + copy_addr += adjustment; + assert(tmp_entry->vme_start < tmp_entry->vme_end); + } + } + + /* adjust alignment of last copy_entry's "vme_end" */ + tmp_entry = vm_map_copy_last_entry(copy); + if (tmp_entry != vm_map_copy_to_entry(copy)) { + vm_map_offset_t adjustment; + adjustment = + (vm_map_round_page((copy->offset + + copy->size), + VM_MAP_PAGE_MASK(src_map)) - + vm_map_round_page((copy->offset + + copy->size), + PAGE_MASK)); + if (adjustment) { + assert(page_aligned(adjustment)); + assert(adjustment < VM_MAP_PAGE_SIZE(src_map)); + tmp_entry->vme_end -= adjustment; + assert(tmp_entry->vme_start < tmp_entry->vme_end); + } + } + } + /* Fix-up start and end points in copy. This is necessary */ /* when the various entries in the copy object were picked */ /* up from different sub-maps */ tmp_entry = vm_map_copy_first_entry(copy); while (tmp_entry != vm_map_copy_to_entry(copy)) { + assert(VM_MAP_PAGE_ALIGNED( + copy_addr + (tmp_entry->vme_end - + tmp_entry->vme_start), + VM_MAP_COPY_PAGE_MASK(copy))); + assert(VM_MAP_PAGE_ALIGNED( + copy_addr, + VM_MAP_COPY_PAGE_MASK(copy))); + + /* + * The copy_entries will be injected directly into the + * destination map and might not be "map aligned" there... + */ + tmp_entry->map_aligned = FALSE; + tmp_entry->vme_end = copy_addr + (tmp_entry->vme_end - tmp_entry->vme_start); tmp_entry->vme_start = copy_addr; + assert(tmp_entry->vme_start < tmp_entry->vme_end); copy_addr += tmp_entry->vme_end - tmp_entry->vme_start; tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next; } @@ -7920,6 +8702,78 @@ vm_map_copyin_common( #undef RETURN } +kern_return_t +vm_map_copy_extract( + vm_map_t src_map, + vm_map_address_t src_addr, + vm_map_size_t len, + vm_map_copy_t *copy_result, /* OUT */ + vm_prot_t *cur_prot, /* OUT */ + vm_prot_t *max_prot) +{ + vm_map_offset_t src_start, src_end; + vm_map_copy_t copy; + kern_return_t kr; + + /* + * Check for copies of zero bytes. + */ + + if (len == 0) { + *copy_result = VM_MAP_COPY_NULL; + return(KERN_SUCCESS); + } + + /* + * Check that the end address doesn't overflow + */ + src_end = src_addr + len; + if (src_end < src_addr) + return KERN_INVALID_ADDRESS; + + /* + * Compute (page aligned) start and end of region + */ + src_start = vm_map_trunc_page(src_addr, PAGE_MASK); + src_end = vm_map_round_page(src_end, PAGE_MASK); + + /* + * Allocate a header element for the list. + * + * Use the start and end in the header to + * remember the endpoints prior to rounding. + */ + + copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); + vm_map_copy_first_entry(copy) = + vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy); + copy->type = VM_MAP_COPY_ENTRY_LIST; + copy->cpy_hdr.nentries = 0; + copy->cpy_hdr.entries_pageable = TRUE; + + vm_map_store_init(©->cpy_hdr); + + copy->offset = 0; + copy->size = len; + + kr = vm_map_remap_extract(src_map, + src_addr, + len, + FALSE, /* copy */ + ©->cpy_hdr, + cur_prot, + max_prot, + VM_INHERIT_SHARE, + TRUE); /* pageable */ + if (kr != KERN_SUCCESS) { + vm_map_copy_discard(copy); + return kr; + } + + *copy_result = copy; + return KERN_SUCCESS; +} + /* * vm_map_copyin_object: * @@ -8107,7 +8961,7 @@ vm_map_fork_share( if (override_nx(old_map, old_entry->alias) && prot) prot |= VM_PROT_EXECUTE; - if (old_map->mapped) { + if (old_map->mapped_in_other_pmaps) { vm_object_pmap_protect( old_entry->object.vm_object, old_entry->offset, @@ -8156,7 +9010,8 @@ vm_map_fork_share( * Mark both entries as shared. */ - new_entry = vm_map_entry_create(new_map); + new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel + * map or descendants */ vm_map_entry_copy(new_entry, old_entry); old_entry->is_shared = TRUE; new_entry->is_shared = TRUE; @@ -8268,6 +9123,7 @@ vm_map_fork_copy( */ vm_map_t vm_map_fork( + ledger_t ledger, vm_map_t old_map) { pmap_t new_pmap; @@ -8278,17 +9134,13 @@ vm_map_fork( boolean_t src_needs_copy; boolean_t new_entry_needs_copy; - new_pmap = pmap_create((vm_map_size_t) 0, + new_pmap = pmap_create(ledger, (vm_map_size_t) 0, #if defined(__i386__) || defined(__x86_64__) old_map->pmap->pm_task_map != TASK_MAP_32BIT #else - 0 +#error Unknown architecture. #endif ); -#if defined(__i386__) - if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED) - pmap_set_4GB_pagezero(new_pmap); -#endif vm_map_reference_swap(old_map); vm_map_lock(old_map); @@ -8297,6 +9149,8 @@ vm_map_fork( old_map->min_offset, old_map->max_offset, old_map->hdr.entries_pageable); + /* inherit the parent map's page size */ + vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map)); for ( old_entry = vm_map_first_entry(old_map); old_entry != vm_map_to_entry(old_map); @@ -8329,7 +9183,7 @@ vm_map_fork( goto slow_vm_map_fork_copy; } - new_entry = vm_map_entry_create(new_map); + new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */ vm_map_entry_copy(new_entry, old_entry); /* clear address space specifics */ new_entry->use_pmap = FALSE; @@ -8363,7 +9217,7 @@ vm_map_fork( (old_entry->vme_end - old_entry->vme_start), ((old_entry->is_shared - || old_map->mapped) + || old_map->mapped_in_other_pmaps) ? PMAP_NULL : old_map->pmap), old_entry->vme_start, @@ -8544,12 +9398,11 @@ submap_recurse: if (!mapped_needs_copy) { if (vm_map_lock_read_to_write(map)) { vm_map_lock_read(map); - /* XXX FBDP: entry still valid ? */ - if(*real_map == entry->object.sub_map) - *real_map = map; + *real_map = map; goto RetryLookup; } vm_map_lock_read(entry->object.sub_map); + *var_map = entry->object.sub_map; cow_sub_map_parent = map; /* reset base to map before cow object */ /* this is the map which will accept */ @@ -8560,12 +9413,14 @@ submap_recurse: mapped_needs_copy = TRUE; } else { vm_map_lock_read(entry->object.sub_map); + *var_map = entry->object.sub_map; if((cow_sub_map_parent != map) && (*real_map != map)) vm_map_unlock(map); } } else { vm_map_lock_read(entry->object.sub_map); + *var_map = entry->object.sub_map; /* leave map locked if it is a target */ /* cow sub_map above otherwise, just */ /* follow the maps down to the object */ @@ -8575,8 +9430,7 @@ submap_recurse: vm_map_unlock_read(map); } - /* XXX FBDP: map has been unlocked, what protects "entry" !? */ - *var_map = map = entry->object.sub_map; + map = *var_map; /* calculate the offset in the submap for vaddr */ local_vaddr = (local_vaddr - entry->vme_start) + entry->offset; @@ -8685,7 +9539,7 @@ submap_recurse: prot = submap_entry->protection & ~VM_PROT_WRITE; - if (override_nx(map, submap_entry->alias) && prot) + if (override_nx(old_map, submap_entry->alias) && prot) prot |= VM_PROT_EXECUTE; vm_object_pmap_protect( @@ -8694,7 +9548,7 @@ submap_recurse: submap_entry->vme_end - submap_entry->vme_start, (submap_entry->is_shared - || map->mapped) ? + || map->mapped_in_other_pmaps) ? PMAP_NULL : map->pmap, submap_entry->vme_start, prot); @@ -8806,7 +9660,7 @@ submap_recurse: prot = entry->protection; - if (override_nx(map, entry->alias) && prot) { + if (override_nx(old_map, entry->alias) && prot) { /* * HACK -- if not a stack, then allow execution */ @@ -8925,6 +9779,7 @@ submap_recurse: fault_info->io_sync = FALSE; fault_info->cs_bypass = (entry->used_for_jit)? TRUE : FALSE; fault_info->mark_zf_absent = FALSE; + fault_info->batch_pmap_op = FALSE; } /* @@ -8996,6 +9851,7 @@ vm_map_region_recurse_64( vm_region_submap_info_64_t submap_info, /* IN/OUT */ mach_msg_type_number_t *count) /* IN/OUT */ { + mach_msg_type_number_t original_count; vm_region_extended_info_data_t extended; vm_map_entry_t tmp_entry; vm_map_offset_t user_address; @@ -9048,26 +9904,32 @@ vm_map_region_recurse_64( return KERN_INVALID_ARGUMENT; } - if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) { - if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) { - /* - * "info" structure is not big enough and - * would overflow - */ - return KERN_INVALID_ARGUMENT; - } else { - look_for_pages = FALSE; - *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64; - short_info = (vm_region_submap_short_info_64_t) submap_info; - submap_info = NULL; - } + + if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) { + /* + * "info" structure is not big enough and + * would overflow + */ + return KERN_INVALID_ARGUMENT; + } + + original_count = *count; + + if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) { + *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64; + look_for_pages = FALSE; + short_info = (vm_region_submap_short_info_64_t) submap_info; + submap_info = NULL; } else { look_for_pages = TRUE; - *count = VM_REGION_SUBMAP_INFO_COUNT_64; + *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64; short_info = NULL; + + if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) { + *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64; + } } - - + user_address = *address; user_max_depth = *nesting_depth; @@ -9279,7 +10141,7 @@ vm_map_region_recurse_64( // LP64todo: all the current tools are 32bit, obviously never worked for 64b // so probably should be a real 32b ID vs. ptr. // Current users just check for equality -#define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)p) +#define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p)) if (look_for_pages) { submap_info->user_tag = curr_entry->alias; @@ -9307,6 +10169,7 @@ vm_map_region_recurse_64( extended.pages_swapped_out = 0; extended.pages_shared_now_private = 0; extended.pages_dirtied = 0; + extended.pages_reusable = 0; extended.external_pager = 0; extended.shadow_depth = 0; @@ -9325,7 +10188,7 @@ vm_map_region_recurse_64( curr_entry->vme_start)), range_end - range_start, &extended, - look_for_pages); + look_for_pages, VM_REGION_EXTENDED_INFO_COUNT); if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED) { @@ -9352,6 +10215,10 @@ vm_map_region_recurse_64( submap_info->shadow_depth = extended.shadow_depth; submap_info->share_mode = extended.share_mode; submap_info->ref_count = extended.ref_count; + + if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) { + submap_info->pages_reusable = extended.pages_reusable; + } } else { short_info->external_pager = extended.external_pager; short_info->shadow_depth = extended.shadow_depth; @@ -9488,14 +10355,18 @@ vm_map_region( return(KERN_SUCCESS); } case VM_REGION_EXTENDED_INFO: - { - vm_region_extended_info_t extended; - if (*count < VM_REGION_EXTENDED_INFO_COUNT) return(KERN_INVALID_ARGUMENT); + /*fallthru*/ + case VM_REGION_EXTENDED_INFO__legacy: + if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) + return KERN_INVALID_ARGUMENT; + + { + vm_region_extended_info_t extended; + mach_msg_type_number_t original_count; extended = (vm_region_extended_info_t) info; - *count = VM_REGION_EXTENDED_INFO_COUNT; vm_map_lock_read(map); @@ -9519,7 +10390,15 @@ vm_map_region( extended->external_pager = 0; extended->shadow_depth = 0; - vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE); + original_count = *count; + if (flavor == VM_REGION_EXTENDED_INFO__legacy) { + *count = VM_REGION_EXTENDED_INFO_COUNT__legacy; + } else { + extended->pages_reusable = 0; + *count = VM_REGION_EXTENDED_INFO_COUNT; + } + + vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE, *count); if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) extended->share_mode = SM_PRIVATE; @@ -9644,9 +10523,9 @@ vm_map_region_top_walk( if (ref_count == 1 || (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) { top->share_mode = SM_PRIVATE; - top->private_pages_resident = - OBJ_RESIDENT_COUNT(obj, - entry_size); + top->private_pages_resident = + OBJ_RESIDENT_COUNT(obj, + entry_size); } else { top->share_mode = SM_SHARED; top->shared_pages_resident = @@ -9657,7 +10536,7 @@ vm_map_region_top_walk( top->ref_count = ref_count; } /* XXX K64: obj_id will be truncated */ - top->obj_id = (unsigned int) (uintptr_t)obj; + top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj); vm_object_unlock(obj); } @@ -9671,7 +10550,8 @@ vm_map_region_walk( vm_object_offset_t offset, vm_object_size_t range, vm_region_extended_info_t extended, - boolean_t look_for_pages) + boolean_t look_for_pages, + mach_msg_type_number_t count) { register struct vm_object *obj, *tmp_obj; register vm_map_offset_t last_offset; @@ -9710,10 +10590,11 @@ vm_map_region_walk( if (look_for_pages) { for (last_offset = offset + range; offset < last_offset; - offset += PAGE_SIZE_64, va += PAGE_SIZE) - vm_map_region_look_for_page(map, va, obj, - offset, ref_count, - 0, extended); + offset += PAGE_SIZE_64, va += PAGE_SIZE) { + vm_map_region_look_for_page(map, va, obj, + offset, ref_count, + 0, extended, count); + } } else { shadow_object = obj->shadow; shadow_depth = 0; @@ -9805,15 +10686,14 @@ vm_map_region_look_for_page( vm_object_offset_t offset, int max_refcnt, int depth, - vm_region_extended_info_t extended) + vm_region_extended_info_t extended, + mach_msg_type_number_t count) { register vm_page_t p; register vm_object_t shadow; register int ref_count; vm_object_t caller_object; -#if MACH_PAGEMAP kern_return_t kr; -#endif shadow = object->shadow; caller_object = object; @@ -9827,11 +10707,16 @@ vm_map_region_look_for_page( if (shadow && (max_refcnt == 1)) extended->pages_shared_now_private++; - if (!p->fictitious && + if (!p->fictitious && (p->dirty || pmap_is_modified(p->phys_page))) extended->pages_dirtied++; + else if (count >= VM_REGION_EXTENDED_INFO_COUNT) { + if (p->reusable || p->object->all_reusable) { + extended->pages_reusable++; + } + } - extended->pages_resident++; + extended->pages_resident++; if(object != caller_object) vm_object_unlock(object); @@ -9849,36 +10734,49 @@ vm_map_region_look_for_page( return; } - } else if (object->internal && - object->alive && - !object->terminating && - object->pager_ready) { - - memory_object_t pager; - - vm_object_paging_begin(object); - pager = object->pager; - vm_object_unlock(object); + } else +#endif /* MACH_PAGEMAP */ + if (object->internal && + object->alive && + !object->terminating && + object->pager_ready) { + + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + if (VM_COMPRESSOR_PAGER_STATE_GET(object, + offset) + == VM_EXTERNAL_STATE_EXISTS) { + /* the pager has that page */ + extended->pages_swapped_out++; + if (object != caller_object) + vm_object_unlock(object); + return; + } + } else { + memory_object_t pager; - kr = memory_object_data_request( - pager, - offset + object->paging_offset, - 0, /* just poke the pager */ - VM_PROT_READ, - NULL); + vm_object_paging_begin(object); + pager = object->pager; + vm_object_unlock(object); - vm_object_lock(object); - vm_object_paging_end(object); + kr = memory_object_data_request( + pager, + offset + object->paging_offset, + 0, /* just poke the pager */ + VM_PROT_READ, + NULL); - if (kr == KERN_SUCCESS) { - /* the pager has that page */ - extended->pages_swapped_out++; - if (object != caller_object) - vm_object_unlock(object); - return; + vm_object_lock(object); + vm_object_paging_end(object); + + if (kr == KERN_SUCCESS) { + /* the pager has that page */ + extended->pages_swapped_out++; + if (object != caller_object) + vm_object_unlock(object); + return; + } } } -#endif /* MACH_PAGEMAP */ if (shadow) { vm_object_lock(shadow); @@ -9977,6 +10875,7 @@ vm_map_simplify_entry( prev_entry->vme_start)) == this_entry->offset) && + (prev_entry->map_aligned == this_entry->map_aligned) && (prev_entry->inheritance == this_entry->inheritance) && (prev_entry->protection == this_entry->protection) && (prev_entry->max_protection == this_entry->max_protection) && @@ -9999,7 +10898,11 @@ vm_map_simplify_entry( (prev_entry->is_shared == FALSE) && (this_entry->is_shared == FALSE) ) { - _vm_map_store_entry_unlink(&map->hdr, prev_entry); + vm_map_store_entry_unlink(map, prev_entry); + assert(prev_entry->vme_start < this_entry->vme_end); + if (prev_entry->map_aligned) + assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start, + VM_MAP_PAGE_MASK(map))); this_entry->vme_start = prev_entry->vme_start; this_entry->offset = prev_entry->offset; if (prev_entry->is_sub_map) { @@ -10046,8 +10949,10 @@ vm_map_simplify_range( return; } - start = vm_map_trunc_page(start); - end = vm_map_round_page(end); + start = vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)); + end = vm_map_round_page(end, + VM_MAP_PAGE_MASK(map)); if (!vm_map_lookup_entry(map, start, &entry)) { /* "start" is not mapped and "entry" ends before "start" */ @@ -10344,6 +11249,7 @@ vm_map_willneed( fault_info.io_sync = FALSE; fault_info.cs_bypass = FALSE; fault_info.mark_zf_absent = FALSE; + fault_info.batch_pmap_op = FALSE; /* * The MADV_WILLNEED operation doesn't require any changes to the @@ -10490,6 +11396,28 @@ vm_map_entry_is_reusable( { vm_object_t object; + switch (entry->alias) { + case VM_MEMORY_MALLOC: + case VM_MEMORY_MALLOC_SMALL: + case VM_MEMORY_MALLOC_LARGE: + case VM_MEMORY_REALLOC: + case VM_MEMORY_MALLOC_TINY: + case VM_MEMORY_MALLOC_LARGE_REUSABLE: + case VM_MEMORY_MALLOC_LARGE_REUSED: + /* + * This is a malloc() memory region: check if it's still + * in its original state and can be re-used for more + * malloc() allocations. + */ + break; + default: + /* + * Not a malloc() memory region: let the caller decide if + * it's re-usable. + */ + return TRUE; + } + if (entry->is_shared || entry->is_sub_map || entry->in_transition || @@ -10498,7 +11426,7 @@ vm_map_entry_is_reusable( entry->inheritance != VM_INHERIT_DEFAULT || entry->no_cache || entry->permanent || - entry->superpage_size != 0 || + entry->superpage_size != FALSE || entry->zero_wired_pages || entry->wired_count != 0 || entry->user_wired_count != 0) { @@ -10509,7 +11437,22 @@ vm_map_entry_is_reusable( if (object == VM_OBJECT_NULL) { return TRUE; } - if (object->ref_count == 1 && + if ( +#if 0 + /* + * Let's proceed even if the VM object is potentially + * shared. + * We check for this later when processing the actual + * VM pages, so the contents will be safe if shared. + * + * But we can still mark this memory region as "reusable" to + * acknowledge that the caller did let us know that the memory + * could be re-used and should not be penalized for holding + * on to it. This allows its "resident size" to not include + * the reusable range. + */ + object->ref_count == 1 && +#endif object->wired_page_count == 0 && object->copy == VM_OBJECT_NULL && object->shadow == VM_OBJECT_NULL && @@ -10584,6 +11527,11 @@ vm_map_reuse_pages( object = entry->object.vm_object; if (object != VM_OBJECT_NULL) { + /* tell pmap to not count this range as "reusable" */ + pmap_reusable(map->pmap, + MAX(start, entry->vme_start), + MIN(end, entry->vme_end), + FALSE); vm_object_lock(object); vm_object_reuse_pages(object, start_offset, end_offset, TRUE); @@ -10679,6 +11627,11 @@ vm_map_reusable_pages( else kill_pages = -1; if (kill_pages != -1) { + /* tell pmap to count this range as "reusable" */ + pmap_reusable(map->pmap, + MAX(start, entry->vme_start), + MIN(end, entry->vme_end), + TRUE); vm_object_deactivate_pages(object, start_offset, end_offset - start_offset, @@ -10757,370 +11710,97 @@ vm_map_can_reuse( } - -#include -#if MACH_KDB -#include -#include - -#define printf db_printf - -/* - * Forward declarations for internal functions. - */ -extern void vm_map_links_print( - struct vm_map_links *links); - -extern void vm_map_header_print( - struct vm_map_header *header); - -extern void vm_map_entry_print( - vm_map_entry_t entry); - -extern void vm_follow_entry( - vm_map_entry_t entry); - -extern void vm_follow_map( - vm_map_t map); - -/* - * vm_map_links_print: [ debug ] - */ -void -vm_map_links_print( - struct vm_map_links *links) -{ - iprintf("prev = %08X next = %08X start = %016llX end = %016llX\n", - links->prev, - links->next, - (unsigned long long)links->start, - (unsigned long long)links->end); -} - -/* - * vm_map_header_print: [ debug ] - */ -void -vm_map_header_print( - struct vm_map_header *header) -{ - vm_map_links_print(&header->links); - iprintf("nentries = %08X, %sentries_pageable\n", - header->nentries, - (header->entries_pageable ? "" : "!")); -} - -/* - * vm_follow_entry: [ debug ] - */ -void -vm_follow_entry( - vm_map_entry_t entry) -{ - int shadows; - - iprintf("map entry %08X\n", entry); - - db_indent += 2; - - shadows = vm_follow_object(entry->object.vm_object); - iprintf("Total objects : %d\n",shadows); - - db_indent -= 2; -} - /* - * vm_map_entry_print: [ debug ] + * Routine: vm_map_entry_insert + * + * Descritpion: This routine inserts a new vm_entry in a locked map. */ -void -vm_map_entry_print( - register vm_map_entry_t entry) +vm_map_entry_t +vm_map_entry_insert( + vm_map_t map, + vm_map_entry_t insp_entry, + vm_map_offset_t start, + vm_map_offset_t end, + vm_object_t object, + vm_object_offset_t offset, + boolean_t needs_copy, + boolean_t is_shared, + boolean_t in_transition, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_behavior_t behavior, + vm_inherit_t inheritance, + unsigned wired_count, + boolean_t no_cache, + boolean_t permanent, + unsigned int superpage_size, + boolean_t clear_map_aligned) { - static const char *inheritance_name[4] = - { "share", "copy", "none", "?"}; - static const char *behavior_name[4] = - { "dflt", "rand", "seqtl", "rseqntl" }; - - iprintf("map entry %08X - prev = %08X next = %08X\n", entry, entry->vme_prev, entry->vme_next); - - db_indent += 2; - - vm_map_links_print(&entry->links); + vm_map_entry_t new_entry; - iprintf("start = %016llX end = %016llX - prot=%x/%x/%s\n", - (unsigned long long)entry->vme_start, - (unsigned long long)entry->vme_end, - entry->protection, - entry->max_protection, - inheritance_name[(entry->inheritance & 0x3)]); + assert(insp_entry != (vm_map_entry_t)0); - iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n", - behavior_name[(entry->behavior & 0x3)], - entry->wired_count, - entry->user_wired_count); - iprintf("%sin_transition, %sneeds_wakeup\n", - (entry->in_transition ? "" : "!"), - (entry->needs_wakeup ? "" : "!")); + new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable); - if (entry->is_sub_map) { - iprintf("submap = %08X - offset = %016llX\n", - entry->object.sub_map, - (unsigned long long)entry->offset); + if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) { + new_entry->map_aligned = TRUE; } else { - iprintf("object = %08X offset = %016llX - ", - entry->object.vm_object, - (unsigned long long)entry->offset); - printf("%sis_shared, %sneeds_copy\n", - (entry->is_shared ? "" : "!"), - (entry->needs_copy ? "" : "!")); + new_entry->map_aligned = FALSE; + } + if (clear_map_aligned && + ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map))) { + new_entry->map_aligned = FALSE; } - db_indent -= 2; -} + new_entry->vme_start = start; + new_entry->vme_end = end; + assert(page_aligned(new_entry->vme_start)); + assert(page_aligned(new_entry->vme_end)); + assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start, + VM_MAP_PAGE_MASK(map))); + if (new_entry->map_aligned) { + assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end, + VM_MAP_PAGE_MASK(map))); + } + assert(new_entry->vme_start < new_entry->vme_end); -/* - * vm_follow_map: [ debug ] - */ -void -vm_follow_map( - vm_map_t map) -{ - register vm_map_entry_t entry; + new_entry->object.vm_object = object; + new_entry->offset = offset; + new_entry->is_shared = is_shared; + new_entry->is_sub_map = FALSE; + new_entry->needs_copy = needs_copy; + new_entry->in_transition = in_transition; + new_entry->needs_wakeup = FALSE; + new_entry->inheritance = inheritance; + new_entry->protection = cur_protection; + new_entry->max_protection = max_protection; + new_entry->behavior = behavior; + new_entry->wired_count = wired_count; + new_entry->user_wired_count = 0; + new_entry->use_pmap = FALSE; + new_entry->alias = 0; + new_entry->zero_wired_pages = FALSE; + new_entry->no_cache = no_cache; + new_entry->permanent = permanent; + if (superpage_size) + new_entry->superpage_size = TRUE; + else + new_entry->superpage_size = FALSE; + new_entry->used_for_jit = FALSE; - iprintf("task map %08X\n", map); + /* + * Insert the new entry into the list. + */ - db_indent += 2; + vm_map_store_entry_link(map, insp_entry, new_entry); + map->size += end - start; - for (entry = vm_map_first_entry(map); - entry && entry != vm_map_to_entry(map); - entry = entry->vme_next) { - vm_follow_entry(entry); - } + /* + * Update the free space hint and the lookup hint. + */ - db_indent -= 2; -} - -/* - * vm_map_print: [ debug ] - */ -void -vm_map_print( - db_addr_t inmap) -{ - register vm_map_entry_t entry; - vm_map_t map; -#if TASK_SWAPPER - char *swstate; -#endif /* TASK_SWAPPER */ - - map = (vm_map_t)(long) - inmap; /* Make sure we have the right type */ - - iprintf("task map %08X\n", map); - - db_indent += 2; - - vm_map_header_print(&map->hdr); - - iprintf("pmap = %08X size = %08X ref = %d hint = %08X first_free = %08X\n", - map->pmap, - map->size, - map->ref_count, - map->hint, - map->first_free); - - iprintf("%swait_for_space, %swiring_required, timestamp = %d\n", - (map->wait_for_space ? "" : "!"), - (map->wiring_required ? "" : "!"), - map->timestamp); - -#if TASK_SWAPPER - switch (map->sw_state) { - case MAP_SW_IN: - swstate = "SW_IN"; - break; - case MAP_SW_OUT: - swstate = "SW_OUT"; - break; - default: - swstate = "????"; - break; - } - iprintf("res = %d, sw_state = %s\n", map->res_count, swstate); -#endif /* TASK_SWAPPER */ - - for (entry = vm_map_first_entry(map); - entry && entry != vm_map_to_entry(map); - entry = entry->vme_next) { - vm_map_entry_print(entry); - } - - db_indent -= 2; -} - -/* - * Routine: vm_map_copy_print - * Purpose: - * Pretty-print a copy object for ddb. - */ - -void -vm_map_copy_print( - db_addr_t incopy) -{ - vm_map_copy_t copy; - vm_map_entry_t entry; - - copy = (vm_map_copy_t)(long) - incopy; /* Make sure we have the right type */ - - printf("copy object 0x%x\n", copy); - - db_indent += 2; - - iprintf("type=%d", copy->type); - switch (copy->type) { - case VM_MAP_COPY_ENTRY_LIST: - printf("[entry_list]"); - break; - - case VM_MAP_COPY_OBJECT: - printf("[object]"); - break; - - case VM_MAP_COPY_KERNEL_BUFFER: - printf("[kernel_buffer]"); - break; - - default: - printf("[bad type]"); - break; - } - printf(", offset=0x%llx", (unsigned long long)copy->offset); - printf(", size=0x%x\n", copy->size); - - switch (copy->type) { - case VM_MAP_COPY_ENTRY_LIST: - vm_map_header_print(©->cpy_hdr); - for (entry = vm_map_copy_first_entry(copy); - entry && entry != vm_map_copy_to_entry(copy); - entry = entry->vme_next) { - vm_map_entry_print(entry); - } - break; - - case VM_MAP_COPY_OBJECT: - iprintf("object=0x%x\n", copy->cpy_object); - break; - - case VM_MAP_COPY_KERNEL_BUFFER: - iprintf("kernel buffer=0x%x", copy->cpy_kdata); - printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size); - break; - - } - - db_indent -=2; -} - -/* - * db_vm_map_total_size(map) [ debug ] - * - * return the total virtual size (in bytes) of the map - */ -vm_map_size_t -db_vm_map_total_size( - db_addr_t inmap) -{ - vm_map_entry_t entry; - vm_map_size_t total; - vm_map_t map; - - map = (vm_map_t)(long) - inmap; /* Make sure we have the right type */ - - total = 0; - for (entry = vm_map_first_entry(map); - entry != vm_map_to_entry(map); - entry = entry->vme_next) { - total += entry->vme_end - entry->vme_start; - } - - return total; -} - -#endif /* MACH_KDB */ - -/* - * Routine: vm_map_entry_insert - * - * Descritpion: This routine inserts a new vm_entry in a locked map. - */ -vm_map_entry_t -vm_map_entry_insert( - vm_map_t map, - vm_map_entry_t insp_entry, - vm_map_offset_t start, - vm_map_offset_t end, - vm_object_t object, - vm_object_offset_t offset, - boolean_t needs_copy, - boolean_t is_shared, - boolean_t in_transition, - vm_prot_t cur_protection, - vm_prot_t max_protection, - vm_behavior_t behavior, - vm_inherit_t inheritance, - unsigned wired_count, - boolean_t no_cache, - boolean_t permanent, - unsigned int superpage_size) -{ - vm_map_entry_t new_entry; - - assert(insp_entry != (vm_map_entry_t)0); - - new_entry = vm_map_entry_create(map); - - new_entry->vme_start = start; - new_entry->vme_end = end; - assert(page_aligned(new_entry->vme_start)); - assert(page_aligned(new_entry->vme_end)); - - new_entry->object.vm_object = object; - new_entry->offset = offset; - new_entry->is_shared = is_shared; - new_entry->is_sub_map = FALSE; - new_entry->needs_copy = needs_copy; - new_entry->in_transition = in_transition; - new_entry->needs_wakeup = FALSE; - new_entry->inheritance = inheritance; - new_entry->protection = cur_protection; - new_entry->max_protection = max_protection; - new_entry->behavior = behavior; - new_entry->wired_count = wired_count; - new_entry->user_wired_count = 0; - new_entry->use_pmap = FALSE; - new_entry->alias = 0; - new_entry->zero_wired_pages = FALSE; - new_entry->no_cache = no_cache; - new_entry->permanent = permanent; - new_entry->superpage_size = superpage_size; - new_entry->used_for_jit = FALSE; - - /* - * Insert the new entry into the list. - */ - - vm_map_store_entry_link(map, insp_entry, new_entry); - map->size += end - start; - - /* - * Update the free space hint and the lookup hint. - */ - - SAVE_HINT_MAP_WRITE(map, new_entry); - return new_entry; + SAVE_HINT_MAP_WRITE(map, new_entry); + return new_entry; } /* @@ -11156,7 +11836,8 @@ vm_map_remap_extract( boolean_t new_entry_needs_copy; assert(map != VM_MAP_NULL); - assert(size != 0 && size == vm_map_round_page(size)); + assert(size != 0); + assert(size == vm_map_round_page(size, PAGE_MASK)); assert(inheritance == VM_INHERIT_NONE || inheritance == VM_INHERIT_COPY || inheritance == VM_INHERIT_SHARE); @@ -11164,8 +11845,9 @@ vm_map_remap_extract( /* * Compute start and end of region. */ - src_start = vm_map_trunc_page(addr); - src_end = vm_map_round_page(src_start + size); + src_start = vm_map_trunc_page(addr, PAGE_MASK); + src_end = vm_map_round_page(src_start + size, PAGE_MASK); + /* * Initialize map_header. @@ -11174,6 +11856,7 @@ vm_map_remap_extract( map_header->links.prev = (struct vm_map_entry *)&map_header->links; map_header->nentries = 0; map_header->entries_pageable = pageable; + map_header->page_shift = PAGE_SHIFT; vm_map_store_init( map_header ); @@ -11249,7 +11932,7 @@ vm_map_remap_extract( if (override_nx(map, src_entry->alias) && prot) prot |= VM_PROT_EXECUTE; - if(map->mapped) { + if(map->mapped_in_other_pmaps) { vm_object_pmap_protect( src_entry->object.vm_object, src_entry->offset, @@ -11282,12 +11965,15 @@ vm_map_remap_extract( offset = src_entry->offset + (src_start - src_entry->vme_start); - new_entry = _vm_map_entry_create(map_header); + new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable); vm_map_entry_copy(new_entry, src_entry); new_entry->use_pmap = FALSE; /* clr address space specifics */ + new_entry->map_aligned = FALSE; + new_entry->vme_start = map_address; new_entry->vme_end = map_address + tmp_size; + assert(new_entry->vme_start < new_entry->vme_end); new_entry->inheritance = inheritance; new_entry->offset = offset; @@ -11296,6 +11982,14 @@ vm_map_remap_extract( */ RestartCopy: if (!copy) { + /* + * Cannot allow an entry describing a JIT + * region to be shared across address spaces. + */ + if (src_entry->used_for_jit == TRUE) { + result = KERN_INVALID_ARGUMENT; + break; + } src_entry->is_shared = TRUE; new_entry->is_shared = TRUE; if (!(new_entry->is_sub_map)) @@ -11331,7 +12025,7 @@ vm_map_remap_extract( offset, entry_size, ((src_entry->is_shared - || map->mapped) ? + || map->mapped_in_other_pmaps) ? PMAP_NULL : map->pmap), src_entry->vme_start, prot); @@ -11444,7 +12138,11 @@ vm_map_remap_extract( src_entry = new_entry) { new_entry = src_entry->vme_next; _vm_map_store_entry_unlink(map_header, src_entry); - vm_object_deallocate(src_entry->object.vm_object); + if (src_entry->is_sub_map) { + vm_map_deallocate(src_entry->object.sub_map); + } else { + vm_object_deallocate(src_entry->object.vm_object); + } _vm_map_entry_dispose(map_header, src_entry); } } @@ -11481,6 +12179,7 @@ vm_map_remap( vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL; vm_map_entry_t new_entry; struct vm_map_header map_header; + vm_map_offset_t offset_in_mapping; if (target_map == VM_MAP_NULL) return KERN_INVALID_ARGUMENT; @@ -11496,15 +12195,39 @@ vm_map_remap( return KERN_INVALID_ARGUMENT; } - size = vm_map_round_page(size); + /* + * If the user is requesting that we return the address of the + * first byte of the data (rather than the base of the page), + * then we use different rounding semantics: specifically, + * we assume that (memory_address, size) describes a region + * all of whose pages we must cover, rather than a base to be truncated + * down and a size to be added to that base. So we figure out + * the highest page that the requested region includes and make + * sure that the size will cover it. + * + * The key example we're worried about it is of the form: + * + * memory_address = 0x1ff0, size = 0x20 + * + * With the old semantics, we round down the memory_address to 0x1000 + * and round up the size to 0x1000, resulting in our covering *only* + * page 0x1000. With the new semantics, we'd realize that the region covers + * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page + * 0x1000 and page 0x2000 in the region we remap. + */ + if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { + offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK); + size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK); + } else { + size = vm_map_round_page(size, PAGE_MASK); + } result = vm_map_remap_extract(src_map, memory_address, size, copy, &map_header, cur_protection, max_protection, inheritance, - target_map->hdr. - entries_pageable); + target_map->hdr.entries_pageable); if (result != KERN_SUCCESS) { return result; @@ -11514,7 +12237,8 @@ vm_map_remap( * Allocate/check a range of free virtual address * space for the target */ - *address = vm_map_trunc_page(*address); + *address = vm_map_trunc_page(*address, + VM_MAP_PAGE_MASK(target_map)); vm_map_lock(target_map); result = vm_map_remap_range_allocate(target_map, address, size, mask, flags, &insp_entry); @@ -11527,6 +12251,7 @@ vm_map_remap( if (result == KERN_SUCCESS) { entry->vme_start += *address; entry->vme_end += *address; + assert(!entry->map_aligned); vm_map_store_entry_link(target_map, insp_entry, entry); insp_entry = entry; } else { @@ -11554,6 +12279,15 @@ vm_map_remap( if (result == KERN_SUCCESS && target_map->wiring_required) result = vm_map_wire(target_map, *address, *address + size, *cur_protection, TRUE); + + /* + * If requested, return the address of the data pointed to by the + * request, rather than the base of the resulting page. + */ + if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { + *address += offset_in_mapping; + } + return result; } @@ -11616,6 +12350,8 @@ StartAgain: ; start = tmp_entry->vme_end; entry = tmp_entry; } + start = vm_map_round_page(start, + VM_MAP_PAGE_MASK(map)); } /* @@ -11634,6 +12370,8 @@ StartAgain: ; */ end = ((start + mask) & ~mask); + end = vm_map_round_page(end, + VM_MAP_PAGE_MASK(map)); if (end < start) return(KERN_NO_SPACE); start = end; @@ -11718,11 +12456,12 @@ StartAgain: ; */ zap_map = vm_map_create(PMAP_NULL, start, - end - start, + end, map->hdr.entries_pageable); if (zap_map == VM_MAP_NULL) { return KERN_RESOURCE_SHORTAGE; } + vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map)); kr = vm_map_delete(map, start, end, VM_MAP_REMOVE_SAVE_ENTRIES, @@ -11993,6 +12732,7 @@ vm_map_purgable_control( vm_object_lock(object); +#if 00 if (entry->offset != 0 || entry->vme_end - entry->vme_start != object->vo_size) { /* @@ -12003,6 +12743,7 @@ vm_map_purgable_control( vm_object_unlock(object); return KERN_INVALID_ARGUMENT; } +#endif vm_map_unlock_read(map); @@ -12057,7 +12798,6 @@ vm_map_page_info( boolean_t top_object; int disposition; int ref_count; - vm_object_id_t object_id; vm_page_info_basic_t basic_info; int depth; vm_map_offset_t offset_in_page; @@ -12080,13 +12820,12 @@ vm_map_page_info( disposition = 0; ref_count = 0; - object_id = 0; top_object = TRUE; depth = 0; retval = KERN_SUCCESS; offset_in_page = offset & PAGE_MASK; - offset = vm_map_trunc_page(offset); + offset = vm_map_trunc_page(offset, PAGE_MASK); vm_map_lock_read(map); @@ -12155,12 +12894,21 @@ vm_map_page_info( } } else #endif - { - if (object->internal && - object->alive && - !object->terminating && - object->pager_ready) { - + if (object->internal && + object->alive && + !object->terminating && + object->pager_ready) { + + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + if (VM_COMPRESSOR_PAGER_STATE_GET( + object, + offset) + == VM_EXTERNAL_STATE_EXISTS) { + /* the pager has that page */ + disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT; + break; + } + } else { memory_object_t pager; vm_object_paging_begin(object); @@ -12256,7 +13004,8 @@ done: basic_info = (vm_page_info_basic_t) info; basic_info->disposition = disposition; basic_info->ref_count = ref_count; - basic_info->object_id = (vm_object_id_t) (uintptr_t) object; + basic_info->object_id = (vm_object_id_t) (uintptr_t) + VM_KERNEL_ADDRPERM(object); basic_info->offset = (memory_object_offset_t) offset + offset_in_page; basic_info->depth = depth; @@ -12328,8 +13077,12 @@ vm_map_msync( /* * align address and size on page boundaries */ - size = vm_map_round_page(address + size) - vm_map_trunc_page(address); - address = vm_map_trunc_page(address); + size = (vm_map_round_page(address + size, + VM_MAP_PAGE_MASK(map)) - + vm_map_trunc_page(address, + VM_MAP_PAGE_MASK(map))); + address = vm_map_trunc_page(address, + VM_MAP_PAGE_MASK(map)); if (map == VM_MAP_NULL) return(KERN_INVALID_TASK); @@ -12346,7 +13099,10 @@ vm_map_msync( vm_map_lock(map); if (!vm_map_lookup_entry(map, - vm_map_trunc_page(address), &entry)) { + vm_map_trunc_page( + address, + VM_MAP_PAGE_MASK(map)), + &entry)) { vm_map_size_t skip; @@ -12516,6 +13272,8 @@ vm_map_msync( if (pager == MEMORY_OBJECT_NULL) { vm_object_unlock(object); vm_object_deallocate(object); + msync_req_free(new_msr); + new_msr = NULL; continue; } @@ -12659,45 +13417,35 @@ vm_object_t convert_port_entry_to_object( ipc_port_t port) { - vm_object_t object; + vm_object_t object = VM_OBJECT_NULL; vm_named_entry_t named_entry; - uint32_t try_failed_count = 0; - - if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) { - while(TRUE) { - ip_lock(port); - if(ip_active(port) && (ip_kotype(port) - == IKOT_NAMED_ENTRY)) { - named_entry = - (vm_named_entry_t)port->ip_kobject; - if (!(lck_mtx_try_lock(&(named_entry)->Lock))) { - ip_unlock(port); - - try_failed_count++; - mutex_pause(try_failed_count); - continue; - } - named_entry->ref_count++; - lck_mtx_unlock(&(named_entry)->Lock); + uint32_t try_failed_count = 0; + + if (IP_VALID(port) && + (ip_kotype(port) == IKOT_NAMED_ENTRY)) { + try_again: + ip_lock(port); + if (ip_active(port) && + (ip_kotype(port) == IKOT_NAMED_ENTRY)) { + named_entry = (vm_named_entry_t)port->ip_kobject; + if (!(lck_mtx_try_lock(&(named_entry)->Lock))) { ip_unlock(port); - if ((!named_entry->is_sub_map) && - (!named_entry->is_pager) && - (named_entry->protection - & VM_PROT_WRITE)) { - object = named_entry->backing.object; - } else { - mach_destroy_memory_entry(port); - return (vm_object_t)NULL; - } - vm_object_reference(named_entry->backing.object); - mach_destroy_memory_entry(port); - break; + try_failed_count++; + mutex_pause(try_failed_count); + goto try_again; + } + named_entry->ref_count++; + lck_mtx_unlock(&(named_entry)->Lock); + ip_unlock(port); + if (!(named_entry->is_sub_map) && + !(named_entry->is_pager) && + !(named_entry->is_copy) && + (named_entry->protection & VM_PROT_WRITE)) { + object = named_entry->backing.object; + vm_object_reference(object); } - else - return (vm_object_t)NULL; + mach_destroy_memory_entry(port); } - } else { - return (vm_object_t)NULL; } return object; @@ -12820,6 +13568,12 @@ vm_compute_max_offset(unsigned is64) return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS); } +uint64_t +vm_map_get_max_aslr_slide_pages(vm_map_t map) +{ + return (1 << (vm_map_is_64bit(map) ? 16 : 8)); +} + boolean_t vm_map_is_64bit( vm_map_t map) @@ -12828,8 +13582,9 @@ vm_map_is_64bit( } boolean_t -vm_map_has_4GB_pagezero( - vm_map_t map) +vm_map_has_hard_pagezero( + vm_map_t map, + vm_map_offset_t pagezero_size) { /* * XXX FBDP @@ -12841,30 +13596,54 @@ vm_map_has_4GB_pagezero( * VM map is being torn down, and when a new map is created via * load_machfile()/execve(). */ - return (map->min_offset >= 0x100000000ULL); + return (map->min_offset >= pagezero_size); } void vm_map_set_4GB_pagezero(vm_map_t map) { -#if defined(__i386__) - pmap_set_4GB_pagezero(map->pmap); -#else #pragma unused(map) -#endif } void vm_map_clear_4GB_pagezero(vm_map_t map) { -#if defined(__i386__) - pmap_clear_4GB_pagezero(map->pmap); -#else #pragma unused(map) -#endif } +/* + * Raise a VM map's maximun offset. + */ +kern_return_t +vm_map_raise_max_offset( + vm_map_t map, + vm_map_offset_t new_max_offset) +{ + kern_return_t ret; + + vm_map_lock(map); + ret = KERN_INVALID_ADDRESS; + + if (new_max_offset >= map->max_offset) { + if (!vm_map_is_64bit(map)) { + if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) { + map->max_offset = new_max_offset; + ret = KERN_SUCCESS; + } + } else { + if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) { + map->max_offset = new_max_offset; + ret = KERN_SUCCESS; + } + } + } + + vm_map_unlock(map); + return ret; +} + + /* * Raise a VM map's minimum offset. * To strictly enforce "page zero" reservation. @@ -12876,7 +13655,8 @@ vm_map_raise_min_offset( { vm_map_entry_t first_entry; - new_min_offset = vm_map_round_page(new_min_offset); + new_min_offset = vm_map_round_page(new_min_offset, + VM_MAP_PAGE_MASK(map)); vm_map_lock(map); @@ -12931,6 +13711,29 @@ void vm_map_switch_protect(vm_map_t map, vm_map_unlock(map); } +/* + * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately. + * phys_footprint is a composite limit consisting of iokit + physmem, so we need to + * bump both counters. + */ +void +vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes) +{ + pmap_t pmap = vm_map_pmap(map); + + ledger_credit(pmap->ledger, task_ledgers.iokit_mem, bytes); + ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes); +} + +void +vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes) +{ + pmap_t pmap = vm_map_pmap(map); + + ledger_debit(pmap->ledger, task_ledgers.iokit_mem, bytes); + ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes); +} + /* Add (generate) code signature for memory range */ #if CONFIG_DYNAMIC_CODE_SIGNING kern_return_t vm_map_sign(vm_map_t map, @@ -13011,7 +13814,7 @@ kern_return_t vm_map_sign(vm_map_t map, /* Pull the dirty status from the pmap, since we cleared the * wpmapped bit */ if ((refmod & VM_MEM_MODIFIED) && !m->dirty) { - m->dirty = TRUE; + SET_PAGE_DIRTY(m, FALSE); } /* On to the next page */ @@ -13031,6 +13834,7 @@ kern_return_t vm_map_freeze_walk( unsigned int *wired_count, unsigned int *clean_count, unsigned int *dirty_count, + unsigned int dirty_budget, boolean_t *has_shared) { vm_map_entry_t entry; @@ -13052,7 +13856,7 @@ kern_return_t vm_map_freeze_walk( continue; } - vm_object_pack(&purgeable, &wired, &clean, &dirty, &shared, entry->object.vm_object, VM_OBJECT_NULL, NULL, NULL); + default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, entry->object.vm_object, NULL); *purgeable_count += purgeable; *wired_count += wired; @@ -13062,6 +13866,14 @@ kern_return_t vm_map_freeze_walk( if (shared) { *has_shared = TRUE; } + + /* Adjust pageout budget and finish up if reached */ + if (dirty_budget) { + dirty_budget -= dirty; + if (dirty_budget == 0) { + break; + } + } } vm_map_unlock_read(map); @@ -13075,31 +13887,16 @@ kern_return_t vm_map_freeze( unsigned int *wired_count, unsigned int *clean_count, unsigned int *dirty_count, + unsigned int dirty_budget, boolean_t *has_shared) { - vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL; - vm_object_t compact_object = VM_OBJECT_NULL; - vm_object_offset_t offset = 0x0; - kern_return_t kr = KERN_SUCCESS; - void *default_freezer_toc = NULL; - boolean_t cleanup = FALSE; + vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL; + kern_return_t kr = KERN_SUCCESS; + boolean_t default_freezer_active = TRUE; *purgeable_count = *wired_count = *clean_count = *dirty_count = 0; *has_shared = FALSE; - /* Create our compact object */ - compact_object = vm_object_allocate((vm_map_offset_t)(VM_MAX_ADDRESS) - (vm_map_offset_t)(VM_MIN_ADDRESS)); - if (!compact_object) { - kr = KERN_FAILURE; - goto done; - } - - default_freezer_toc = default_freezer_mapping_create(compact_object, offset); - if (!default_freezer_toc) { - kr = KERN_FAILURE; - goto done; - } - /* * We need the exclusive lock here so that we can * block any page faults or lookups while we are @@ -13107,99 +13904,306 @@ kern_return_t vm_map_freeze( */ vm_map_lock(map); - if (map->default_freezer_toc != NULL){ - /* - * This map has already been frozen. - */ - cleanup = TRUE; - kr = KERN_SUCCESS; - goto done; + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + default_freezer_active = FALSE; } - - /* Get a mapping in place for the freezing about to commence */ - map->default_freezer_toc = default_freezer_toc; - - vm_object_lock(compact_object); - + + if (default_freezer_active) { + if (map->default_freezer_handle == NULL) { + map->default_freezer_handle = default_freezer_handle_allocate(); + } + + if ((kr = default_freezer_handle_init(map->default_freezer_handle)) != KERN_SUCCESS) { + /* + * Can happen if default_freezer_handle passed in is NULL + * Or, a table has already been allocated and associated + * with this handle, i.e. the map is already frozen. + */ + goto done; + } + } + for (entry2 = vm_map_first_entry(map); entry2 != vm_map_to_entry(map); entry2 = entry2->vme_next) { vm_object_t src_object = entry2->object.vm_object; - /* If eligible, scan the entry, moving eligible pages over to our parent object */ if (entry2->object.vm_object && !entry2->is_sub_map && !entry2->object.vm_object->phys_contiguous) { - unsigned int purgeable, clean, dirty, wired; - boolean_t shared; - - vm_object_pack(&purgeable, &wired, &clean, &dirty, &shared, - src_object, compact_object, &default_freezer_toc, &offset); - - *purgeable_count += purgeable; - *wired_count += wired; - *clean_count += clean; - *dirty_count += dirty; + /* If eligible, scan the entry, moving eligible pages over to our parent object */ + if (default_freezer_active) { + unsigned int purgeable, clean, dirty, wired; + boolean_t shared; + + default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, + src_object, map->default_freezer_handle); + + *purgeable_count += purgeable; + *wired_count += wired; + *clean_count += clean; + *dirty_count += dirty; + + /* Adjust pageout budget and finish up if reached */ + if (dirty_budget) { + dirty_budget -= dirty; + if (dirty_budget == 0) { + break; + } + } - if (shared) { - *has_shared = TRUE; + if (shared) { + *has_shared = TRUE; + } + } else { + /* + * To the compressor. + */ + if (entry2->object.vm_object->internal == TRUE) { + vm_object_pageout(entry2->object.vm_object); + } } } } - vm_object_unlock(compact_object); - - /* Finally, throw out the pages to swap */ - vm_object_pageout(compact_object); + if (default_freezer_active) { + /* Finally, throw out the pages to swap */ + default_freezer_pageout(map->default_freezer_handle); + } done: vm_map_unlock(map); - - /* Unwind if there was a failure */ - if ((cleanup) || (KERN_SUCCESS != kr)) { - if (default_freezer_toc){ - default_freezer_mapping_free(&map->default_freezer_toc, TRUE); - } - if (compact_object){ - vm_object_deallocate(compact_object); - } - } return kr; } -__private_extern__ vm_object_t default_freezer_get_compact_vm_object( void** ); - -void +kern_return_t vm_map_thaw( vm_map_t map) { - void **default_freezer_toc; - vm_object_t compact_object; + kern_return_t kr = KERN_SUCCESS; + + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + /* + * We will on-demand thaw in the presence of the compressed pager. + */ + return kr; + } vm_map_lock(map); - if (map->default_freezer_toc == NULL){ + if (map->default_freezer_handle == NULL) { /* * This map is not in a frozen state. */ + kr = KERN_FAILURE; goto out; } - - default_freezer_toc = &(map->default_freezer_toc); - - compact_object = default_freezer_get_compact_vm_object(default_freezer_toc); - - /* Bring the pages back in */ - vm_object_pagein(compact_object); - - /* Shift pages back to their original objects */ - vm_object_unpack(compact_object, default_freezer_toc); - vm_object_deallocate(compact_object); - - map->default_freezer_toc = NULL; - + kr = default_freezer_unpack(map->default_freezer_handle); out: vm_map_unlock(map); + + return kr; } #endif + +/* + * vm_map_entry_should_cow_for_true_share: + * + * Determines if the map entry should be clipped and setup for copy-on-write + * to avoid applying "true_share" to a large VM object when only a subset is + * targeted. + * + * For now, we target only the map entries created for the Objective C + * Garbage Collector, which initially have the following properties: + * - alias == VM_MEMORY_MALLOC + * - wired_count == 0 + * - !needs_copy + * and a VM object with: + * - internal + * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC + * - !true_share + * - vo_size == ANON_CHUNK_SIZE + */ +boolean_t +vm_map_entry_should_cow_for_true_share( + vm_map_entry_t entry) +{ + vm_object_t object; + + if (entry->is_sub_map) { + /* entry does not point at a VM object */ + return FALSE; + } + + if (entry->needs_copy) { + /* already set for copy_on_write: done! */ + return FALSE; + } + + if (entry->alias != VM_MEMORY_MALLOC) { + /* not tagged as an ObjectiveC's Garbage Collector entry */ + return FALSE; + } + + if (entry->wired_count) { + /* wired: can't change the map entry... */ + return FALSE; + } + + object = entry->object.vm_object; + + if (object == VM_OBJECT_NULL) { + /* no object yet... */ + return FALSE; + } + + if (!object->internal) { + /* not an internal object */ + return FALSE; + } + + if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) { + /* not the default copy strategy */ + return FALSE; + } + + if (object->true_share) { + /* already true_share: too late to avoid it */ + return FALSE; + } + + if (object->vo_size != ANON_CHUNK_SIZE) { + /* not an object created for the ObjC Garbage Collector */ + return FALSE; + } + + /* + * All the criteria match: we have a large object being targeted for "true_share". + * To limit the adverse side-effects linked with "true_share", tell the caller to + * try and avoid setting up the entire object for "true_share" by clipping the + * targeted range and setting it up for copy-on-write. + */ + return TRUE; +} + +vm_map_offset_t +vm_map_round_page_mask( + vm_map_offset_t offset, + vm_map_offset_t mask) +{ + return VM_MAP_ROUND_PAGE(offset, mask); +} + +vm_map_offset_t +vm_map_trunc_page_mask( + vm_map_offset_t offset, + vm_map_offset_t mask) +{ + return VM_MAP_TRUNC_PAGE(offset, mask); +} + +int +vm_map_page_shift( + vm_map_t map) +{ + return VM_MAP_PAGE_SHIFT(map); +} + +int +vm_map_page_size( + vm_map_t map) +{ + return VM_MAP_PAGE_SIZE(map); +} + +int +vm_map_page_mask( + vm_map_t map) +{ + return VM_MAP_PAGE_MASK(map); +} + +kern_return_t +vm_map_set_page_shift( + vm_map_t map, + int pageshift) +{ + if (map->hdr.nentries != 0) { + /* too late to change page size */ + return KERN_FAILURE; + } + + map->hdr.page_shift = pageshift; + + return KERN_SUCCESS; +} + +kern_return_t +vm_map_query_volatile( + vm_map_t map, + mach_vm_size_t *volatile_virtual_size_p, + mach_vm_size_t *volatile_resident_size_p, + mach_vm_size_t *volatile_pmap_size_p) +{ + mach_vm_size_t volatile_virtual_size; + mach_vm_size_t volatile_resident_count; + mach_vm_size_t volatile_pmap_count; + mach_vm_size_t resident_count; + vm_map_entry_t entry; + vm_object_t object; + + /* map should be locked by caller */ + + volatile_virtual_size = 0; + volatile_resident_count = 0; + volatile_pmap_count = 0; + + for (entry = vm_map_first_entry(map); + entry != vm_map_to_entry(map); + entry = entry->vme_next) { + if (entry->is_sub_map) { + continue; + } + if (! (entry->protection & VM_PROT_WRITE)) { + continue; + } + object = entry->object.vm_object; + if (object == VM_OBJECT_NULL) { + continue; + } + if (object->purgable != VM_PURGABLE_VOLATILE) { + continue; + } + if (entry->offset != 0) { + /* + * If the map entry has been split and the object now + * appears several times in the VM map, we don't want + * to count the object's resident_page_count more than + * once. We count it only for the first one, starting + * at offset 0 and ignore the other VM map entries. + */ + continue; + } + resident_count = object->resident_page_count; + if ((entry->offset / PAGE_SIZE) >= resident_count) { + resident_count = 0; + } else { + resident_count -= (entry->offset / PAGE_SIZE); + } + + volatile_virtual_size += entry->vme_end - entry->vme_start; + volatile_resident_count += resident_count; + volatile_pmap_count += pmap_query_resident(map->pmap, + entry->vme_start, + entry->vme_end); + } + + /* map is still locked on return */ + + *volatile_virtual_size_p = volatile_virtual_size; + *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE; + *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE; + + return KERN_SUCCESS; +}