X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/2d21ac55c334faf3a56e5634905ed6987fc787d4..813fb2f63a553c957e917ede5f119b021d6ce391:/osfmk/vm/vm_shared_region.c diff --git a/osfmk/vm/vm_shared_region.c b/osfmk/vm/vm_shared_region.c index 50632a9a0..e984f2b00 100644 --- a/osfmk/vm/vm_shared_region.c +++ b/osfmk/vm/vm_shared_region.c @@ -100,6 +100,9 @@ #include #include +#include + +#include #include #include @@ -112,12 +115,22 @@ /* "dyld" uses this to figure out what the kernel supports */ int shared_region_version = 3; -/* should local (non-chroot) shared regions persist when no task uses them ? */ -int shared_region_persistence = 1; /* yes by default */ - /* trace level, output is sent to the system log file */ int shared_region_trace_level = SHARED_REGION_TRACE_ERROR_LVL; +/* should local (non-chroot) shared regions persist when no task uses them ? */ +int shared_region_persistence = 0; /* no by default */ + +/* delay before reclaiming an unused shared region */ +int shared_region_destroy_delay = 120; /* in seconds */ + +/* + * Only one cache gets to slide on Desktop, since we can't + * tear down slide info properly today and the desktop actually + * produces lots of shared caches. + */ +boolean_t shared_region_completed_slide = FALSE; + /* this lock protects all the shared region data structures */ lck_grp_t *vm_shared_region_lck_grp; lck_mtx_t vm_shared_region_lock; @@ -140,6 +153,16 @@ static vm_shared_region_t vm_shared_region_create( boolean_t is_64bit); static void vm_shared_region_destroy(vm_shared_region_t shared_region); +static void vm_shared_region_timeout(thread_call_param_t param0, + thread_call_param_t param1); + +static int __commpage_setup = 0; +#if defined(__i386__) || defined(__x86_64__) +static int __system_power_source = 1; /* init to extrnal power source */ +static void post_sys_powersource_internal(int i, int internal); +#endif /* __i386__ || __x86_64__ */ + + /* * Initialize the module... */ @@ -175,7 +198,7 @@ vm_shared_region_get( SHARED_REGION_TRACE_DEBUG( ("shared_region: -> get(%p)\n", - task)); + (void *)VM_KERNEL_ADDRPERM(task))); task_lock(task); vm_shared_region_lock(); @@ -189,7 +212,8 @@ vm_shared_region_get( SHARED_REGION_TRACE_DEBUG( ("shared_region: get(%p) <- %p\n", - task, shared_region)); + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(shared_region))); return shared_region; } @@ -208,11 +232,12 @@ vm_shared_region_base_address( { SHARED_REGION_TRACE_DEBUG( ("shared_region: -> base_address(%p)\n", - shared_region)); + (void *)VM_KERNEL_ADDRPERM(shared_region))); assert(shared_region->sr_ref_count > 1); SHARED_REGION_TRACE_DEBUG( ("shared_region: base_address(%p) <- 0x%llx\n", - shared_region, (long long)shared_region->sr_base_address)); + (void *)VM_KERNEL_ADDRPERM(shared_region), + (long long)shared_region->sr_base_address)); return shared_region->sr_base_address; } @@ -230,11 +255,12 @@ vm_shared_region_size( { SHARED_REGION_TRACE_DEBUG( ("shared_region: -> size(%p)\n", - shared_region)); + (void *)VM_KERNEL_ADDRPERM(shared_region))); assert(shared_region->sr_ref_count > 1); SHARED_REGION_TRACE_DEBUG( ("shared_region: size(%p) <- 0x%llx\n", - shared_region, (long long)shared_region->sr_size)); + (void *)VM_KERNEL_ADDRPERM(shared_region), + (long long)shared_region->sr_size)); return shared_region->sr_size; } @@ -252,14 +278,50 @@ vm_shared_region_mem_entry( { SHARED_REGION_TRACE_DEBUG( ("shared_region: -> mem_entry(%p)\n", - shared_region)); + (void *)VM_KERNEL_ADDRPERM(shared_region))); assert(shared_region->sr_ref_count > 1); SHARED_REGION_TRACE_DEBUG( ("shared_region: mem_entry(%p) <- %p\n", - shared_region, shared_region->sr_mem_entry)); + (void *)VM_KERNEL_ADDRPERM(shared_region), + (void *)VM_KERNEL_ADDRPERM(shared_region->sr_mem_entry))); return shared_region->sr_mem_entry; } +uint32_t +vm_shared_region_get_slide( + vm_shared_region_t shared_region) +{ + SHARED_REGION_TRACE_DEBUG( + ("shared_region: -> vm_shared_region_get_slide(%p)\n", + (void *)VM_KERNEL_ADDRPERM(shared_region))); + assert(shared_region->sr_ref_count > 1); + SHARED_REGION_TRACE_DEBUG( + ("shared_region: vm_shared_region_get_slide(%p) <- %u\n", + (void *)VM_KERNEL_ADDRPERM(shared_region), + shared_region->sr_slide_info.slide)); + + /* 0 if we haven't slid */ + assert(shared_region->sr_slide_info.slide_object != NULL || + shared_region->sr_slide_info.slide == 0); + + return shared_region->sr_slide_info.slide; +} + +vm_shared_region_slide_info_t +vm_shared_region_get_slide_info( + vm_shared_region_t shared_region) +{ + SHARED_REGION_TRACE_DEBUG( + ("shared_region: -> vm_shared_region_get_slide_info(%p)\n", + (void *)VM_KERNEL_ADDRPERM(shared_region))); + assert(shared_region->sr_ref_count > 1); + SHARED_REGION_TRACE_DEBUG( + ("shared_region: vm_shared_region_get_slide_info(%p) <- %p\n", + (void *)VM_KERNEL_ADDRPERM(shared_region), + (void *)VM_KERNEL_ADDRPERM(&shared_region->sr_slide_info))); + return &shared_region->sr_slide_info; +} + /* * Set the shared region the process should use. * A NULL new shared region means that we just want to release the old @@ -276,7 +338,8 @@ vm_shared_region_set( SHARED_REGION_TRACE_DEBUG( ("shared_region: -> set(%p, %p)\n", - task, new_shared_region)); + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(new_shared_region))); task_lock(task); vm_shared_region_lock(); @@ -298,7 +361,9 @@ vm_shared_region_set( SHARED_REGION_TRACE_DEBUG( ("shared_region: set(%p) <- old=%p new=%p\n", - task, old_shared_region, new_shared_region)); + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(old_shared_region), + (void *)VM_KERNEL_ADDRPERM(new_shared_region))); } /* @@ -319,7 +384,8 @@ vm_shared_region_lookup( SHARED_REGION_TRACE_DEBUG( ("shared_region: -> lookup(root=%p,cpu=%d,64bit=%d)\n", - root_dir, cputype, is_64bit)); + + (void *)VM_KERNEL_ADDRPERM(root_dir), cputype, is_64bit)); shared_region = NULL; new_shared_region = NULL; @@ -375,7 +441,9 @@ done: SHARED_REGION_TRACE_DEBUG( ("shared_region: lookup(root=%p,cpu=%d,64bit=%d) <- %p\n", - root_dir, cputype, is_64bit, shared_region)); + (void *)VM_KERNEL_ADDRPERM(root_dir), + cputype, is_64bit, + (void *)VM_KERNEL_ADDRPERM(shared_region))); assert(shared_region->sr_ref_count > 0); return shared_region; @@ -389,18 +457,33 @@ static void vm_shared_region_reference_locked( vm_shared_region_t shared_region) { -#if DEBUG - lck_mtx_assert(&vm_shared_region_lock, LCK_MTX_ASSERT_OWNED); -#endif + LCK_MTX_ASSERT(&vm_shared_region_lock, LCK_MTX_ASSERT_OWNED); SHARED_REGION_TRACE_DEBUG( ("shared_region: -> reference_locked(%p)\n", - shared_region)); + (void *)VM_KERNEL_ADDRPERM(shared_region))); assert(shared_region->sr_ref_count > 0); shared_region->sr_ref_count++; + + if (shared_region->sr_timer_call != NULL) { + boolean_t cancelled; + + /* cancel and free any pending timeout */ + cancelled = thread_call_cancel(shared_region->sr_timer_call); + if (cancelled) { + thread_call_free(shared_region->sr_timer_call); + shared_region->sr_timer_call = NULL; + /* release the reference held by the cancelled timer */ + shared_region->sr_ref_count--; + } else { + /* the timer will drop the reference and free itself */ + } + } + SHARED_REGION_TRACE_DEBUG( ("shared_region: reference_locked(%p) <- %d\n", - shared_region, shared_region->sr_ref_count)); + (void *)VM_KERNEL_ADDRPERM(shared_region), + shared_region->sr_ref_count)); } /* @@ -413,7 +496,7 @@ vm_shared_region_deallocate( { SHARED_REGION_TRACE_DEBUG( ("shared_region: -> deallocate(%p)\n", - shared_region)); + (void *)VM_KERNEL_ADDRPERM(shared_region))); vm_shared_region_lock(); @@ -425,6 +508,9 @@ vm_shared_region_deallocate( * can persist or not based on the "shared_region_persistence" * sysctl. * Make sure that this one complies. + * + * See comments in vm_shared_region_slide() for notes about + * shared regions we have slid (which are not torn down currently). */ if (shared_region_persistence && !shared_region->sr_persists) { @@ -444,26 +530,77 @@ vm_shared_region_deallocate( shared_region->sr_ref_count--; SHARED_REGION_TRACE_DEBUG( ("shared_region: deallocate(%p): ref now %d\n", - shared_region, shared_region->sr_ref_count)); + (void *)VM_KERNEL_ADDRPERM(shared_region), + shared_region->sr_ref_count)); if (shared_region->sr_ref_count == 0) { - assert(! shared_region->sr_mapping_in_progress); - /* remove it from the queue first, so no one can find it... */ - queue_remove(&vm_shared_region_queue, - shared_region, - vm_shared_region_t, - sr_q); - vm_shared_region_unlock(); - /* ... and destroy it */ - vm_shared_region_destroy(shared_region); - shared_region = NULL; + uint64_t deadline; + + assert(!shared_region->sr_slid); + + if (shared_region->sr_timer_call == NULL) { + /* hold one reference for the timer */ + assert(! shared_region->sr_mapping_in_progress); + shared_region->sr_ref_count++; + + /* set up the timer */ + shared_region->sr_timer_call = thread_call_allocate( + (thread_call_func_t) vm_shared_region_timeout, + (thread_call_param_t) shared_region); + + /* schedule the timer */ + clock_interval_to_deadline(shared_region_destroy_delay, + 1000 * 1000 * 1000, + &deadline); + thread_call_enter_delayed(shared_region->sr_timer_call, + deadline); + + SHARED_REGION_TRACE_DEBUG( + ("shared_region: deallocate(%p): armed timer\n", + (void *)VM_KERNEL_ADDRPERM(shared_region))); + + vm_shared_region_unlock(); + } else { + /* timer expired: let go of this shared region */ + + /* + * We can't properly handle teardown of a slid object today. + */ + assert(!shared_region->sr_slid); + + /* + * Remove it from the queue first, so no one can find + * it... + */ + queue_remove(&vm_shared_region_queue, + shared_region, + vm_shared_region_t, + sr_q); + vm_shared_region_unlock(); + + /* ... and destroy it */ + vm_shared_region_destroy(shared_region); + shared_region = NULL; + } } else { vm_shared_region_unlock(); } SHARED_REGION_TRACE_DEBUG( ("shared_region: deallocate(%p) <-\n", - shared_region)); + (void *)VM_KERNEL_ADDRPERM(shared_region))); +} + +void +vm_shared_region_timeout( + thread_call_param_t param0, + __unused thread_call_param_t param1) +{ + vm_shared_region_t shared_region; + + shared_region = (vm_shared_region_t) param0; + + vm_shared_region_deallocate(shared_region); } /* @@ -479,13 +616,14 @@ vm_shared_region_create( vm_named_entry_t mem_entry; ipc_port_t mem_entry_port; vm_shared_region_t shared_region; + vm_shared_region_slide_info_t si; vm_map_t sub_map; mach_vm_offset_t base_address, pmap_nesting_start; mach_vm_size_t size, pmap_nesting_size; SHARED_REGION_TRACE_DEBUG( ("shared_region: -> create(root=%p,cpu=%d,64bit=%d)\n", - root_dir, cputype, is_64bit)); + (void *)VM_KERNEL_ADDRPERM(root_dir), cputype, is_64bit)); base_address = 0; size = 0; @@ -538,14 +676,6 @@ vm_shared_region_create( pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC; pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC; break; -#ifdef CPU_TYPE_ARM - case CPU_TYPE_ARM: - base_address = SHARED_REGION_BASE_ARM; - size = SHARED_REGION_SIZE_ARM; - pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM; - pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM; - break; -#endif /* CPU_TYPE_ARM */ default: SHARED_REGION_TRACE_ERROR( ("shared_region: create: unknown cpu type %d\n", @@ -553,7 +683,6 @@ vm_shared_region_create( kfree(shared_region, sizeof (*shared_region)); shared_region = NULL; goto done; - } } @@ -570,7 +699,7 @@ vm_shared_region_create( } /* create a VM sub map and its pmap */ - sub_map = vm_map_create(pmap_create(0, is_64bit), + sub_map = vm_map_create(pmap_create(NULL, 0, is_64bit), 0, size, TRUE); if (sub_map == VM_MAP_NULL) { @@ -583,6 +712,9 @@ vm_shared_region_create( goto done; } + assert(!sub_map->disable_vmentry_reuse); + sub_map->is_nested_map = TRUE; + /* make the memory entry point to the VM sub map */ mem_entry->is_sub_map = TRUE; mem_entry->backing.map = sub_map; @@ -603,26 +735,44 @@ vm_shared_region_create( queue_init(&shared_region->sr_q); shared_region->sr_mapping_in_progress = FALSE; + shared_region->sr_slide_in_progress = FALSE; shared_region->sr_persists = FALSE; + shared_region->sr_slid = FALSE; + shared_region->sr_timer_call = NULL; shared_region->sr_first_mapping = (mach_vm_offset_t) -1; /* grab a reference for the caller */ shared_region->sr_ref_count = 1; + /* And set up slide info */ + si = &shared_region->sr_slide_info; + si->start = 0; + si->end = 0; + si->slide = 0; + si->slide_object = NULL; + si->slide_info_size = 0; + si->slide_info_entry = NULL; + done: if (shared_region) { SHARED_REGION_TRACE_INFO( ("shared_region: create(root=%p,cpu=%d,64bit=%d," "base=0x%llx,size=0x%llx) <- " "%p mem=(%p,%p) map=%p pmap=%p\n", - root_dir, cputype, is_64bit, (long long)base_address, - (long long)size, shared_region, - mem_entry_port, mem_entry, sub_map, sub_map->pmap)); + (void *)VM_KERNEL_ADDRPERM(root_dir), + cputype, is_64bit, (long long)base_address, + (long long)size, + (void *)VM_KERNEL_ADDRPERM(shared_region), + (void *)VM_KERNEL_ADDRPERM(mem_entry_port), + (void *)VM_KERNEL_ADDRPERM(mem_entry), + (void *)VM_KERNEL_ADDRPERM(sub_map), + (void *)VM_KERNEL_ADDRPERM(sub_map->pmap))); } else { SHARED_REGION_TRACE_INFO( ("shared_region: create(root=%p,cpu=%d,64bit=%d," "base=0x%llx,size=0x%llx) <- NULL", - root_dir, cputype, is_64bit, (long long)base_address, + (void *)VM_KERNEL_ADDRPERM(root_dir), + cputype, is_64bit, (long long)base_address, (long long)size)); } return shared_region; @@ -641,18 +791,20 @@ vm_shared_region_destroy( SHARED_REGION_TRACE_INFO( ("shared_region: -> destroy(%p) (root=%p,cpu=%d,64bit=%d)\n", - shared_region, - shared_region->sr_root_dir, + (void *)VM_KERNEL_ADDRPERM(shared_region), + (void *)VM_KERNEL_ADDRPERM(shared_region->sr_root_dir), shared_region->sr_cpu_type, shared_region->sr_64bit)); assert(shared_region->sr_ref_count == 0); assert(!shared_region->sr_persists); + assert(!shared_region->sr_slid); mem_entry = (vm_named_entry_t) shared_region->sr_mem_entry->ip_kobject; assert(mem_entry->is_sub_map); assert(!mem_entry->internal); assert(!mem_entry->is_pager); + assert(!mem_entry->is_copy); map = mem_entry->backing.map; /* @@ -681,11 +833,30 @@ vm_shared_region_destroy( mem_entry = NULL; shared_region->sr_mem_entry = IPC_PORT_NULL; + if (shared_region->sr_timer_call) { + thread_call_free(shared_region->sr_timer_call); + } + +#if 0 + /* + * If slid, free those resources. We'll want this eventually, + * but can't handle it properly today. + */ + si = &shared_region->sr_slide_info; + if (si->slide_info_entry) { + kmem_free(kernel_map, + (vm_offset_t) si->slide_info_entry, + (vm_size_t) si->slide_info_size); + vm_object_deallocate(si->slide_object); + } +#endif + /* release the shared region structure... */ kfree(shared_region, sizeof (*shared_region)); + SHARED_REGION_TRACE_DEBUG( ("shared_region: destroy(%p) <-\n", - shared_region)); + (void *)VM_KERNEL_ADDRPERM(shared_region))); shared_region = NULL; } @@ -704,7 +875,7 @@ vm_shared_region_start_address( SHARED_REGION_TRACE_DEBUG( ("shared_region: -> start_address(%p)\n", - shared_region)); + (void *)VM_KERNEL_ADDRPERM(shared_region))); assert(shared_region->sr_ref_count > 1); vm_shared_region_lock(); @@ -738,10 +909,105 @@ vm_shared_region_start_address( SHARED_REGION_TRACE_DEBUG( ("shared_region: start_address(%p) <- 0x%llx\n", - shared_region, (long long)shared_region->sr_base_address)); + (void *)VM_KERNEL_ADDRPERM(shared_region), + (long long)shared_region->sr_base_address)); return kr; } + +void +vm_shared_region_undo_mappings( + vm_map_t sr_map, + mach_vm_offset_t sr_base_address, + struct shared_file_mapping_np *mappings, + unsigned int mappings_count) +{ + unsigned int j = 0; + vm_shared_region_t shared_region = NULL; + boolean_t reset_shared_region_state = FALSE; + + shared_region = vm_shared_region_get(current_task()); + if (shared_region == NULL) { + printf("Failed to undo mappings because of NULL shared region.\n"); + return; + } + + + if (sr_map == NULL) { + ipc_port_t sr_handle; + vm_named_entry_t sr_mem_entry; + + vm_shared_region_lock(); + assert(shared_region->sr_ref_count > 1); + + while (shared_region->sr_mapping_in_progress) { + /* wait for our turn... */ + vm_shared_region_sleep(&shared_region->sr_mapping_in_progress, + THREAD_UNINT); + } + assert(! shared_region->sr_mapping_in_progress); + assert(shared_region->sr_ref_count > 1); + /* let others know we're working in this shared region */ + shared_region->sr_mapping_in_progress = TRUE; + + vm_shared_region_unlock(); + + reset_shared_region_state = TRUE; + + /* no need to lock because this data is never modified... */ + sr_handle = shared_region->sr_mem_entry; + sr_mem_entry = (vm_named_entry_t) sr_handle->ip_kobject; + sr_map = sr_mem_entry->backing.map; + sr_base_address = shared_region->sr_base_address; + } + /* + * Undo the mappings we've established so far. + */ + for (j = 0; j < mappings_count; j++) { + kern_return_t kr2; + + if (mappings[j].sfm_size == 0) { + /* + * We didn't establish this + * mapping, so nothing to undo. + */ + continue; + } + SHARED_REGION_TRACE_INFO( + ("shared_region: mapping[%d]: " + "address:0x%016llx " + "size:0x%016llx " + "offset:0x%016llx " + "maxprot:0x%x prot:0x%x: " + "undoing...\n", + j, + (long long)mappings[j].sfm_address, + (long long)mappings[j].sfm_size, + (long long)mappings[j].sfm_file_offset, + mappings[j].sfm_max_prot, + mappings[j].sfm_init_prot)); + kr2 = mach_vm_deallocate( + sr_map, + (mappings[j].sfm_address - + sr_base_address), + mappings[j].sfm_size); + assert(kr2 == KERN_SUCCESS); + } + + if (reset_shared_region_state) { + vm_shared_region_lock(); + assert(shared_region->sr_ref_count > 1); + assert(shared_region->sr_mapping_in_progress); + /* we're done working on that shared region */ + shared_region->sr_mapping_in_progress = FALSE; + thread_wakeup((event_t) &shared_region->sr_mapping_in_progress); + vm_shared_region_unlock(); + reset_shared_region_state = FALSE; + } + + vm_shared_region_deallocate(shared_region); +} + /* * Establish some mappings of a file in the shared region. * This is used by "dyld" via the shared_region_map_np() system call @@ -759,7 +1025,10 @@ vm_shared_region_map_file( struct shared_file_mapping_np *mappings, memory_object_control_t file_control, memory_object_size_t file_size, - void *root_dir) + void *root_dir, + uint32_t slide, + user_addr_t slide_start, + user_addr_t slide_size) { kern_return_t kr; vm_object_t file_object; @@ -769,7 +1038,14 @@ vm_shared_region_map_file( mach_vm_offset_t sr_base_address; unsigned int i; mach_port_t map_port; - mach_vm_offset_t target_address; + vm_map_offset_t target_address; + vm_object_t object; + vm_object_size_t obj_size; + struct shared_file_mapping_np *mapping_to_slide = NULL; + mach_vm_offset_t first_mapping = (mach_vm_offset_t) -1; + vm_map_offset_t lowest_unnestable_addr = 0; + + kr = KERN_SUCCESS; @@ -813,8 +1089,9 @@ vm_shared_region_map_file( SHARED_REGION_TRACE_DEBUG( ("shared_region: -> map(%p,%d,%p,%p,0x%llx)\n", - shared_region, mappings_count, mappings, - file_control, file_size)); + (void *)VM_KERNEL_ADDRPERM(shared_region), mappings_count, + (void *)VM_KERNEL_ADDRPERM(mappings), + (void *)VM_KERNEL_ADDRPERM(file_control), file_size)); /* get the VM object associated with the file to be mapped */ file_object = memory_object_control_to_vm_object(file_control); @@ -837,66 +1114,204 @@ vm_shared_region_map_file( map_port = MACH_PORT_NULL; } else { /* file-backed memory */ - map_port = (ipc_port_t) file_object->pager; + __IGNORE_WCASTALIGN(map_port = (ipc_port_t) file_object->pager); + } + + if (mappings[i].sfm_init_prot & VM_PROT_SLIDE) { + /* + * This is the mapping that needs to be slid. + */ + if (mapping_to_slide != NULL) { + SHARED_REGION_TRACE_INFO( + ("shared_region: mapping[%d]: " + "address:0x%016llx size:0x%016llx " + "offset:0x%016llx " + "maxprot:0x%x prot:0x%x " + "will not be slid as only one such mapping is allowed...\n", + i, + (long long)mappings[i].sfm_address, + (long long)mappings[i].sfm_size, + (long long)mappings[i].sfm_file_offset, + mappings[i].sfm_max_prot, + mappings[i].sfm_init_prot)); + } else { + mapping_to_slide = &mappings[i]; + } } /* mapping's address is relative to the shared region base */ target_address = mappings[i].sfm_address - sr_base_address; - /* establish that mapping, OK if it's to "already" there */ - kr = vm_map_enter_mem_object( - sr_map, - &target_address, - vm_map_round_page(mappings[i].sfm_size), - 0, - VM_FLAGS_FIXED | VM_FLAGS_ALREADY, - map_port, - mappings[i].sfm_file_offset, - TRUE, - mappings[i].sfm_init_prot & VM_PROT_ALL, - mappings[i].sfm_max_prot & VM_PROT_ALL, - VM_INHERIT_DEFAULT); - if (kr == KERN_MEMORY_PRESENT) { - /* this exact mapping was already there: that's fine */ - SHARED_REGION_TRACE_INFO( - ("shared_region: mapping[%d]: " - "address:0x%016llx size:0x%016llx " - "offset:0x%016llx " - "maxprot:0x%x prot:0x%x already mapped...\n", - i, - (long long)mappings[i].sfm_address, - (long long)mappings[i].sfm_size, - (long long)mappings[i].sfm_file_offset, - mappings[i].sfm_max_prot, - mappings[i].sfm_init_prot)); - kr = KERN_SUCCESS; - } else if (kr != KERN_SUCCESS) { - /* this mapping failed ! */ + /* establish that mapping, OK if it's "already" there */ + if (map_port == MACH_PORT_NULL) { + /* + * We want to map some anonymous memory in a + * shared region. + * We have to create the VM object now, so that it + * can be mapped "copy-on-write". + */ + obj_size = vm_map_round_page(mappings[i].sfm_size, + VM_MAP_PAGE_MASK(sr_map)); + object = vm_object_allocate(obj_size); + if (object == VM_OBJECT_NULL) { + kr = KERN_RESOURCE_SHORTAGE; + } else { + kr = vm_map_enter( + sr_map, + &target_address, + vm_map_round_page(mappings[i].sfm_size, + VM_MAP_PAGE_MASK(sr_map)), + 0, + VM_FLAGS_FIXED | VM_FLAGS_ALREADY, + object, + 0, + TRUE, + mappings[i].sfm_init_prot & VM_PROT_ALL, + mappings[i].sfm_max_prot & VM_PROT_ALL, + VM_INHERIT_DEFAULT); + } + } else { + object = VM_OBJECT_NULL; /* no anonymous memory here */ + kr = vm_map_enter_mem_object( + sr_map, + &target_address, + vm_map_round_page(mappings[i].sfm_size, + VM_MAP_PAGE_MASK(sr_map)), + 0, + VM_FLAGS_FIXED | VM_FLAGS_ALREADY, + map_port, + mappings[i].sfm_file_offset, + TRUE, + mappings[i].sfm_init_prot & VM_PROT_ALL, + mappings[i].sfm_max_prot & VM_PROT_ALL, + VM_INHERIT_DEFAULT); + + } + + if (kr == KERN_SUCCESS) { + /* + * Record the first (chronologically) successful + * mapping in this shared region. + * We're protected by "sr_mapping_in_progress" here, + * so no need to lock "shared_region". + */ + if (first_mapping == (mach_vm_offset_t) -1) { + first_mapping = target_address; + } + + /* + * Record the lowest writable address in this + * sub map, to log any unexpected unnesting below + * that address (see log_unnest_badness()). + */ + if ((mappings[i].sfm_init_prot & VM_PROT_WRITE) && + sr_map->is_nested_map && + (lowest_unnestable_addr == 0 || + (target_address < lowest_unnestable_addr))) { + lowest_unnestable_addr = target_address; + } + } else { + if (map_port == MACH_PORT_NULL) { + /* + * Get rid of the VM object we just created + * but failed to map. + */ + vm_object_deallocate(object); + object = VM_OBJECT_NULL; + } + if (kr == KERN_MEMORY_PRESENT) { + /* + * This exact mapping was already there: + * that's fine. + */ + SHARED_REGION_TRACE_INFO( + ("shared_region: mapping[%d]: " + "address:0x%016llx size:0x%016llx " + "offset:0x%016llx " + "maxprot:0x%x prot:0x%x " + "already mapped...\n", + i, + (long long)mappings[i].sfm_address, + (long long)mappings[i].sfm_size, + (long long)mappings[i].sfm_file_offset, + mappings[i].sfm_max_prot, + mappings[i].sfm_init_prot)); + /* + * We didn't establish this mapping ourselves; + * let's reset its size, so that we do not + * attempt to undo it if an error occurs later. + */ + mappings[i].sfm_size = 0; + kr = KERN_SUCCESS; + } else { + /* this mapping failed ! */ + SHARED_REGION_TRACE_ERROR( + ("shared_region: mapping[%d]: " + "address:0x%016llx size:0x%016llx " + "offset:0x%016llx " + "maxprot:0x%x prot:0x%x failed 0x%x\n", + i, + (long long)mappings[i].sfm_address, + (long long)mappings[i].sfm_size, + (long long)mappings[i].sfm_file_offset, + mappings[i].sfm_max_prot, + mappings[i].sfm_init_prot, + kr)); + + vm_shared_region_undo_mappings(sr_map, sr_base_address, mappings, i); + break; + } + + } + + } + + if (kr == KERN_SUCCESS && + slide_size != 0 && + mapping_to_slide != NULL) { + kr = vm_shared_region_slide(slide, + mapping_to_slide->sfm_file_offset, + mapping_to_slide->sfm_size, + slide_start, + slide_size, + file_control); + if (kr != KERN_SUCCESS) { SHARED_REGION_TRACE_ERROR( - ("shared_region: mapping[%d]: " - "address:0x%016llx size:0x%016llx " - "offset:0x%016llx " - "maxprot:0x%x prot:0x%x failed 0x%x\n", - i, - (long long)mappings[i].sfm_address, - (long long)mappings[i].sfm_size, - (long long)mappings[i].sfm_file_offset, - mappings[i].sfm_max_prot, - mappings[i].sfm_init_prot, + ("shared_region: region_slide(" + "slide:0x%x start:0x%016llx " + "size:0x%016llx) failed 0x%x\n", + slide, + (long long)slide_start, + (long long)slide_size, kr)); - break; + vm_shared_region_undo_mappings(sr_map, + sr_base_address, + mappings, + mappings_count); } + } - /* we're protected by "sr_mapping_in_progress" */ - if (shared_region->sr_first_mapping == (mach_vm_offset_t) -1) { - shared_region->sr_first_mapping = target_address; + if (kr == KERN_SUCCESS) { + /* adjust the map's "lowest_unnestable_start" */ + lowest_unnestable_addr &= ~(pmap_nesting_size_min-1); + if (lowest_unnestable_addr != + sr_map->lowest_unnestable_start) { + vm_map_lock(sr_map); + sr_map->lowest_unnestable_start = + lowest_unnestable_addr; + vm_map_unlock(sr_map); } } vm_shared_region_lock(); assert(shared_region->sr_ref_count > 1); assert(shared_region->sr_mapping_in_progress); + /* set "sr_first_mapping"; dyld uses it to validate the shared cache */ + if (kr == KERN_SUCCESS && + shared_region->sr_first_mapping == (mach_vm_offset_t) -1) { + shared_region->sr_first_mapping = first_mapping; + } /* we're done working on that shared region */ shared_region->sr_mapping_in_progress = FALSE; thread_wakeup((event_t) &shared_region->sr_mapping_in_progress); @@ -905,8 +1320,9 @@ vm_shared_region_map_file( done: SHARED_REGION_TRACE_DEBUG( ("shared_region: map(%p,%d,%p,%p,0x%llx) <- 0x%x \n", - shared_region, mappings_count, mappings, - file_control, file_size, kr)); + (void *)VM_KERNEL_ADDRPERM(shared_region), mappings_count, + (void *)VM_KERNEL_ADDRPERM(mappings), + (void *)VM_KERNEL_ADDRPERM(file_control), file_size, kr)); return kr; } @@ -920,6 +1336,7 @@ kern_return_t vm_shared_region_enter( struct _vm_map *map, struct task *task, + boolean_t is_64bit, void *fsroot, cpu_type_t cpu) { @@ -930,14 +1347,14 @@ vm_shared_region_enter( vm_map_offset_t sr_pmap_nesting_start; vm_map_size_t sr_pmap_nesting_size; ipc_port_t sr_handle; - boolean_t is_64bit; - - is_64bit = task_has_64BitAddr(task); + vm_prot_t cur_prot, max_prot; SHARED_REGION_TRACE_DEBUG( ("shared_region: -> " "enter(map=%p,task=%p,root=%p,cpu=%d,64bit=%d)\n", - map, task, fsroot, cpu, is_64bit)); + (void *)VM_KERNEL_ADDRPERM(map), + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(fsroot), cpu, is_64bit)); /* lookup (create if needed) the shared region for this environment */ shared_region = vm_shared_region_lookup(fsroot, cpu, is_64bit); @@ -947,7 +1364,9 @@ vm_shared_region_enter( ("shared_region: -> " "enter(map=%p,task=%p,root=%p,cpu=%d,64bit=%d): " "lookup failed !\n", - map, task, fsroot, cpu, is_64bit)); + (void *)VM_KERNEL_ADDRPERM(map), + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(fsroot), cpu, is_64bit)); //panic("shared_region_enter: lookup failed\n"); return KERN_FAILURE; } @@ -963,6 +1382,18 @@ vm_shared_region_enter( sr_pmap_nesting_start = shared_region->sr_pmap_nesting_start; sr_pmap_nesting_size = shared_region->sr_pmap_nesting_size; + cur_prot = VM_PROT_READ; +#if __x86_64__ + /* + * XXX BINARY COMPATIBILITY + * java6 apparently needs to modify some code in the + * dyld shared cache and needs to be allowed to add + * write access... + */ + max_prot = VM_PROT_ALL; +#else /* __x86_64__ */ + max_prot = VM_PROT_READ; +#endif /* __x86_64__ */ /* * Start mapping the shared region's VM sub map into the task's VM map. */ @@ -981,24 +1412,30 @@ vm_shared_region_enter( sr_handle, sr_offset, TRUE, - VM_PROT_READ, - VM_PROT_ALL, + cur_prot, + max_prot, VM_INHERIT_SHARE); if (kr != KERN_SUCCESS) { SHARED_REGION_TRACE_ERROR( ("shared_region: enter(%p,%p,%p,%d,%d): " "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n", - map, task, fsroot, cpu, is_64bit, + (void *)VM_KERNEL_ADDRPERM(map), + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(fsroot), + cpu, is_64bit, (long long)target_address, - (long long)mapping_size, sr_handle, kr)); + (long long)mapping_size, + (void *)VM_KERNEL_ADDRPERM(sr_handle), kr)); goto done; } SHARED_REGION_TRACE_DEBUG( ("shared_region: enter(%p,%p,%p,%d,%d): " "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n", - map, task, fsroot, cpu, is_64bit, + (void *)VM_KERNEL_ADDRPERM(map), + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(fsroot), cpu, is_64bit, (long long)target_address, (long long)mapping_size, - sr_handle, kr)); + (void *)VM_KERNEL_ADDRPERM(sr_handle), kr)); sr_offset += mapping_size; sr_size -= mapping_size; } @@ -1026,24 +1463,30 @@ vm_shared_region_enter( sr_handle, sr_offset, TRUE, - VM_PROT_READ, - VM_PROT_ALL, + cur_prot, + max_prot, VM_INHERIT_SHARE); if (kr != KERN_SUCCESS) { SHARED_REGION_TRACE_ERROR( ("shared_region: enter(%p,%p,%p,%d,%d): " "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n", - map, task, fsroot, cpu, is_64bit, + (void *)VM_KERNEL_ADDRPERM(map), + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(fsroot), + cpu, is_64bit, (long long)target_address, - (long long)mapping_size, sr_handle, kr)); + (long long)mapping_size, + (void *)VM_KERNEL_ADDRPERM(sr_handle), kr)); goto done; } SHARED_REGION_TRACE_DEBUG( ("shared_region: enter(%p,%p,%p,%d,%d): " "nested vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n", - map, task, fsroot, cpu, is_64bit, + (void *)VM_KERNEL_ADDRPERM(map), + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(fsroot), cpu, is_64bit, (long long)target_address, (long long)mapping_size, - sr_handle, kr)); + (void *)VM_KERNEL_ADDRPERM(sr_handle), kr)); } if (sr_size > 0) { /* and there's some left to be mapped without pmap-nesting */ @@ -1058,24 +1501,30 @@ vm_shared_region_enter( sr_handle, sr_offset, TRUE, - VM_PROT_READ, - VM_PROT_ALL, + cur_prot, + max_prot, VM_INHERIT_SHARE); if (kr != KERN_SUCCESS) { SHARED_REGION_TRACE_ERROR( ("shared_region: enter(%p,%p,%p,%d,%d): " "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n", - map, task, fsroot, cpu, is_64bit, + (void *)VM_KERNEL_ADDRPERM(map), + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(fsroot), + cpu, is_64bit, (long long)target_address, - (long long)mapping_size, sr_handle, kr)); + (long long)mapping_size, + (void *)VM_KERNEL_ADDRPERM(sr_handle), kr)); goto done; } SHARED_REGION_TRACE_DEBUG( ("shared_region: enter(%p,%p,%p,%d,%d): " "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n", - map, task, fsroot, cpu, is_64bit, + (void *)VM_KERNEL_ADDRPERM(map), + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(fsroot), cpu, is_64bit, (long long)target_address, (long long)mapping_size, - sr_handle, kr)); + (void *)VM_KERNEL_ADDRPERM(sr_handle), kr)); sr_offset += mapping_size; sr_size -= mapping_size; } @@ -1084,10 +1533,533 @@ vm_shared_region_enter( done: SHARED_REGION_TRACE_DEBUG( ("shared_region: enter(%p,%p,%p,%d,%d) <- 0x%x\n", - map, task, fsroot, cpu, is_64bit, kr)); + (void *)VM_KERNEL_ADDRPERM(map), + (void *)VM_KERNEL_ADDRPERM(task), + (void *)VM_KERNEL_ADDRPERM(fsroot), cpu, is_64bit, kr)); + return kr; +} + +#define SANE_SLIDE_INFO_SIZE (2560*1024) /*Can be changed if needed*/ +struct vm_shared_region_slide_info slide_info; + +kern_return_t +vm_shared_region_sliding_valid(uint32_t slide) +{ + kern_return_t kr = KERN_SUCCESS; + vm_shared_region_t sr = vm_shared_region_get(current_task()); + + /* No region yet? we're fine. */ + if (sr == NULL) { + return kr; + } + + if ((sr->sr_slid == TRUE) && slide) { + if (slide != vm_shared_region_get_slide_info(sr)->slide) { + printf("Only one shared region can be slid\n"); + kr = KERN_FAILURE; + } else { + /* + * Request for sliding when we've + * already done it with exactly the + * same slide value before. + * This isn't wrong technically but + * we don't want to slide again and + * so we return this value. + */ + kr = KERN_INVALID_ARGUMENT; + } + } + vm_shared_region_deallocate(sr); return kr; } +kern_return_t +vm_shared_region_slide_init( + vm_shared_region_t sr, + mach_vm_size_t slide_info_size, + mach_vm_offset_t start, + mach_vm_size_t size, + uint32_t slide, + memory_object_control_t sr_file_control) +{ + kern_return_t kr = KERN_SUCCESS; + vm_object_t object = VM_OBJECT_NULL; + vm_object_offset_t offset = 0; + vm_shared_region_slide_info_t si = vm_shared_region_get_slide_info(sr); + vm_offset_t slide_info_entry; + + vm_map_t map = NULL, cur_map = NULL; + boolean_t is_map_locked = FALSE; + + assert(sr->sr_slide_in_progress); + assert(!sr->sr_slid); + assert(si->slide_object == NULL); + assert(si->slide_info_entry == NULL); + + if (slide_info_size > SANE_SLIDE_INFO_SIZE) { + printf("Slide_info_size too large: %lx\n", (uintptr_t)slide_info_size); + kr = KERN_FAILURE; + return kr; + } + + kr = kmem_alloc(kernel_map, + (vm_offset_t *) &slide_info_entry, + (vm_size_t) slide_info_size, VM_KERN_MEMORY_OSFMK); + if (kr != KERN_SUCCESS) { + return kr; + } + + if (sr_file_control != MEMORY_OBJECT_CONTROL_NULL) { + + object = memory_object_control_to_vm_object(sr_file_control); + vm_object_reference(object); + offset = start; + + vm_object_lock(object); + } else { + /* + * Remove this entire "else" block and all "map" references + * once we get rid of the shared_region_slide_np() + * system call. + */ + vm_map_entry_t entry = VM_MAP_ENTRY_NULL; + map = current_map(); + vm_map_lock_read(map); + is_map_locked = TRUE; + Retry: + cur_map = map; + if(!vm_map_lookup_entry(map, start, &entry)) { + kr = KERN_INVALID_ARGUMENT; + } else { + vm_object_t shadow_obj = VM_OBJECT_NULL; + + if (entry->is_sub_map == TRUE) { + map = VME_SUBMAP(entry); + start -= entry->vme_start; + start += VME_OFFSET(entry); + vm_map_lock_read(map); + vm_map_unlock_read(cur_map); + goto Retry; + } else { + object = VME_OBJECT(entry); + offset = ((start - entry->vme_start) + + VME_OFFSET(entry)); + } + + vm_object_lock(object); + while (object->shadow != VM_OBJECT_NULL) { + shadow_obj = object->shadow; + vm_object_lock(shadow_obj); + vm_object_unlock(object); + object = shadow_obj; + } + } + } + + if (object->internal == TRUE) { + kr = KERN_INVALID_ADDRESS; + } else if (object->object_slid) { + /* Can only be slid once */ + printf("%s: found vm_object %p already slid?\n", __FUNCTION__, object); + kr = KERN_FAILURE; + } else { + + si->slide_info_entry = (vm_shared_region_slide_info_entry_t)slide_info_entry; + si->slide_info_size = slide_info_size; + si->slide_object = object; + si->start = offset; + si->end = si->start + size; + si->slide = slide; + + /* + * If we want to have this region get deallocated/freed + * then we will have to make sure that we msync(..MS_INVALIDATE..) + * the pages associated with this shared region. Those pages would + * have been slid with an older slide value. + */ + + /* + * Pointers in object are held without references; they + * are disconnected at the time that we destroy the + * shared region, and since the shared region holds + * a reference on the object, no references in the other + * direction are required. + */ + object->object_slid = TRUE; + object->vo_slide_info = si; + } + + vm_object_unlock(object); + if (is_map_locked == TRUE) { + vm_map_unlock_read(map); + } + + if (kr != KERN_SUCCESS) { + kmem_free(kernel_map, slide_info_entry, slide_info_size); + } + return kr; +} + +void* +vm_shared_region_get_slide_info_entry(vm_shared_region_t sr) { + return (void*)sr->sr_slide_info.slide_info_entry; +} + +static kern_return_t +vm_shared_region_slide_sanity_check_v1(vm_shared_region_slide_info_entry_v1_t s_info) +{ + uint32_t pageIndex=0; + uint16_t entryIndex=0; + uint16_t *toc = NULL; + + toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset); + for (;pageIndex < s_info->toc_count; pageIndex++) { + + entryIndex = (uint16_t)(toc[pageIndex]); + + if (entryIndex >= s_info->entry_count) { + printf("No sliding bitmap entry for pageIndex: %d at entryIndex: %d amongst %d entries\n", pageIndex, entryIndex, s_info->entry_count); + return KERN_FAILURE; + } + + } + return KERN_SUCCESS; +} + +static kern_return_t +vm_shared_region_slide_sanity_check_v2(vm_shared_region_slide_info_entry_v2_t s_info, mach_vm_size_t slide_info_size) +{ + if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) { + return KERN_FAILURE; + } + + /* Ensure that the slide info doesn't reference any data outside of its bounds. */ + + uint32_t page_starts_count = s_info->page_starts_count; + uint32_t page_extras_count = s_info->page_extras_count; + mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count; + if (num_trailing_entries < page_starts_count) { + return KERN_FAILURE; + } + + /* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */ + mach_vm_size_t trailing_size = num_trailing_entries << 1; + if (trailing_size >> 1 != num_trailing_entries) { + return KERN_FAILURE; + } + + mach_vm_size_t required_size = sizeof(*s_info) + trailing_size; + if (required_size < sizeof(*s_info)) { + return KERN_FAILURE; + } + + if (required_size > slide_info_size) { + return KERN_FAILURE; + } + + return KERN_SUCCESS; +} + +kern_return_t +vm_shared_region_slide_sanity_check(vm_shared_region_t sr) +{ + vm_shared_region_slide_info_t si; + vm_shared_region_slide_info_entry_t s_info; + kern_return_t kr; + + si = vm_shared_region_get_slide_info(sr); + s_info = si->slide_info_entry; + + kr = mach_vm_protect(kernel_map, + (mach_vm_offset_t)(vm_offset_t)s_info, + (mach_vm_size_t) si->slide_info_size, + TRUE, VM_PROT_READ); + if (kr != KERN_SUCCESS) { + panic("vm_shared_region_slide_sanity_check: vm_protect() error 0x%x\n", kr); + } + + if (s_info->version == 1) { + kr = vm_shared_region_slide_sanity_check_v1(&s_info->v1); + } else if (s_info->version == 2) { + kr = vm_shared_region_slide_sanity_check_v2(&s_info->v2, si->slide_info_size); + } else { + goto fail; + } + if (kr != KERN_SUCCESS) { + goto fail; + } + + return KERN_SUCCESS; +fail: + if (si->slide_info_entry != NULL) { + kmem_free(kernel_map, + (vm_offset_t) si->slide_info_entry, + (vm_size_t) si->slide_info_size); + + vm_object_lock(si->slide_object); + si->slide_object->object_slid = FALSE; + si->slide_object->vo_slide_info = NULL; + vm_object_unlock(si->slide_object); + + vm_object_deallocate(si->slide_object); + si->slide_object = NULL; + si->start = 0; + si->end = 0; + si->slide = 0; + si->slide_info_entry = NULL; + si->slide_info_size = 0; + } + return KERN_FAILURE; +} + +static kern_return_t +vm_shared_region_slide_page_v1(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex) +{ + uint16_t *toc = NULL; + slide_info_entry_toc_t bitmap = NULL; + uint32_t i=0, j=0; + uint8_t b = 0; + uint32_t slide = si->slide; + int is_64 = task_has_64BitAddr(current_task()); + + vm_shared_region_slide_info_entry_v1_t s_info = &si->slide_info_entry->v1; + toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset); + + if (pageIndex >= s_info->toc_count) { + printf("No slide entry for this page in toc. PageIndex: %d Toc Count: %d\n", pageIndex, s_info->toc_count); + } else { + uint16_t entryIndex = (uint16_t)(toc[pageIndex]); + slide_info_entry_toc_t slide_info_entries = (slide_info_entry_toc_t)((uintptr_t)s_info + s_info->entry_offset); + + if (entryIndex >= s_info->entry_count) { + printf("No sliding bitmap entry for entryIndex: %d amongst %d entries\n", entryIndex, s_info->entry_count); + } else { + bitmap = &slide_info_entries[entryIndex]; + + for(i=0; i < NUM_SLIDING_BITMAPS_PER_PAGE; ++i) { + b = bitmap->entry[i]; + if (b!=0) { + for (j=0; j <8; ++j) { + if (b & (1 <delta_mask); + const uint32_t value_mask = ~delta_mask; + const uint32_t value_add = (uint32_t)(s_info->value_add); + const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2; + + uint32_t page_offset = start_offset; + uint32_t delta = 1; + + while (delta != 0 && page_offset <= last_page_offset) { + uint8_t *loc; + uint32_t value; + + loc = page_content + page_offset; + memcpy(&value, loc, sizeof(value)); + delta = (value & delta_mask) >> delta_shift; + value &= value_mask; + + if (value != 0) { + value += value_add; + value += slide_amount; + } + memcpy(loc, &value, sizeof(value)); + page_offset += delta; + } + + /* If the offset went past the end of the page, then the slide data is invalid. */ + if (page_offset > last_page_offset) { + return KERN_FAILURE; + } + return KERN_SUCCESS; +} + +static kern_return_t +rebase_chain_64( + uint8_t *page_content, + uint16_t start_offset, + uint32_t slide_amount, + vm_shared_region_slide_info_entry_v2_t s_info) +{ + const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint64_t); + + const uint64_t delta_mask = s_info->delta_mask; + const uint64_t value_mask = ~delta_mask; + const uint64_t value_add = s_info->value_add; + const uint64_t delta_shift = __builtin_ctzll(delta_mask) - 2; + + uint32_t page_offset = start_offset; + uint32_t delta = 1; + + while (delta != 0 && page_offset <= last_page_offset) { + uint8_t *loc; + uint64_t value; + + loc = page_content + page_offset; + memcpy(&value, loc, sizeof(value)); + delta = (uint32_t)((value & delta_mask) >> delta_shift); + value &= value_mask; + + if (value != 0) { + value += value_add; + value += slide_amount; + } + memcpy(loc, &value, sizeof(value)); + page_offset += delta; + } + + if (page_offset + sizeof(uint32_t) == PAGE_SIZE_FOR_SR_SLIDE) { + /* If a pointer straddling the page boundary needs to be adjusted, then + * add the slide to the lower half. The encoding guarantees that the upper + * half on the next page will need no masking. + * + * This assumes a little-endian machine and that the region being slid + * never crosses a 4 GB boundary. */ + + uint8_t *loc = page_content + page_offset; + uint32_t value; + + memcpy(&value, loc, sizeof(value)); + value += slide_amount; + memcpy(loc, &value, sizeof(value)); + } else if (page_offset > last_page_offset) { + return KERN_FAILURE; + } + + return KERN_SUCCESS; +} + +static kern_return_t +rebase_chain( + boolean_t is_64, + uint32_t pageIndex, + uint8_t *page_content, + uint16_t start_offset, + uint32_t slide_amount, + vm_shared_region_slide_info_entry_v2_t s_info) +{ + kern_return_t kr; + if (is_64) { + kr = rebase_chain_64(page_content, start_offset, slide_amount, s_info); + } else { + kr = rebase_chain_32(page_content, start_offset, slide_amount, s_info); + } + + if (kr != KERN_SUCCESS) { + printf("vm_shared_region_slide_page() offset overflow: pageIndex=%u, start_offset=%u, slide_amount=%u\n", + pageIndex, start_offset, slide_amount); + } + return kr; +} + +static kern_return_t +vm_shared_region_slide_page_v2(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex) +{ + vm_shared_region_slide_info_entry_v2_t s_info = &si->slide_info_entry->v2; + const uint32_t slide_amount = si->slide; + + /* The high bits of the delta_mask field are nonzero precisely when the shared + * cache is 64-bit. */ + const boolean_t is_64 = (s_info->delta_mask >> 32) != 0; + + const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset); + const uint16_t *page_extras = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset); + + uint8_t *page_content = (uint8_t *)vaddr; + uint16_t page_entry; + + if (pageIndex >= s_info->page_starts_count) { + printf("vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n", + pageIndex, s_info->page_starts_count); + return KERN_FAILURE; + } + page_entry = page_starts[pageIndex]; + + if (page_entry == DYLD_CACHE_SLIDE_PAGE_ATTR_NO_REBASE) { + return KERN_SUCCESS; + } + + if (page_entry & DYLD_CACHE_SLIDE_PAGE_ATTR_EXTRA) { + uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE_PAGE_VALUE; + uint16_t info; + + do { + uint16_t page_start_offset; + kern_return_t kr; + + if (chain_index >= s_info->page_extras_count) { + printf("vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n", + chain_index, s_info->page_extras_count); + return KERN_FAILURE; + } + info = page_extras[chain_index]; + page_start_offset = (info & DYLD_CACHE_SLIDE_PAGE_VALUE) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT; + + kr = rebase_chain(is_64, pageIndex, page_content, page_start_offset, slide_amount, s_info); + if (kr != KERN_SUCCESS) { + return KERN_FAILURE; + } + + chain_index++; + } while (!(info & DYLD_CACHE_SLIDE_PAGE_ATTR_END)); + } else { + const uint32_t page_start_offset = page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT; + kern_return_t kr; + + kr = rebase_chain(is_64, pageIndex, page_content, page_start_offset, slide_amount, s_info); + if (kr != KERN_SUCCESS) { + return KERN_FAILURE; + } + } + + return KERN_SUCCESS; +} + +kern_return_t +vm_shared_region_slide_page(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex) +{ + if (si->slide_info_entry->version == 1) { + return vm_shared_region_slide_page_v1(si, vaddr, pageIndex); + } else { + return vm_shared_region_slide_page_v2(si, vaddr, pageIndex); + } +} + /******************************************************************************/ /* Comm page support */ /******************************************************************************/ @@ -1099,6 +2071,17 @@ vm_named_entry_t commpage64_entry = NULL; vm_map_t commpage32_map = VM_MAP_NULL; vm_map_t commpage64_map = VM_MAP_NULL; +ipc_port_t commpage_text32_handle = IPC_PORT_NULL; +ipc_port_t commpage_text64_handle = IPC_PORT_NULL; +vm_named_entry_t commpage_text32_entry = NULL; +vm_named_entry_t commpage_text64_entry = NULL; +vm_map_t commpage_text32_map = VM_MAP_NULL; +vm_map_t commpage_text64_map = VM_MAP_NULL; + +user32_addr_t commpage_text32_location = (user32_addr_t) _COMM_PAGE32_TEXT_START; +user64_addr_t commpage_text64_location = (user64_addr_t) _COMM_PAGE64_TEXT_START; + +#if defined(__i386__) || defined(__x86_64__) /* * Create a memory entry, VM submap and pmap for one commpage. */ @@ -1120,7 +2103,7 @@ _vm_commpage_init( if (kr != KERN_SUCCESS) { panic("_vm_commpage_init: could not allocate mem_entry"); } - new_map = vm_map_create(pmap_create(0, FALSE), 0, size, TRUE); + new_map = vm_map_create(pmap_create(NULL, 0, 0), 0, size, TRUE); if (new_map == VM_MAP_NULL) { panic("_vm_commpage_init: could not allocate VM map"); } @@ -1133,7 +2116,43 @@ _vm_commpage_init( SHARED_REGION_TRACE_DEBUG( ("commpage: _init(0x%llx) <- %p\n", - (long long)size, *handlep)); + (long long)size, (void *)VM_KERNEL_ADDRPERM(*handlep))); +} +#endif + + +/* + *Initialize the comm text pages at boot time + */ + extern u_int32_t random(void); + void +vm_commpage_text_init(void) +{ + SHARED_REGION_TRACE_DEBUG( + ("commpage text: ->init()\n")); +#if defined(__i386__) || defined(__x86_64__) + /* create the 32 bit comm text page */ + unsigned int offset = (random() % _PFZ32_SLIDE_RANGE) << PAGE_SHIFT; /* restricting to 32bMAX-2PAGE */ + _vm_commpage_init(&commpage_text32_handle, _COMM_PAGE_TEXT_AREA_LENGTH); + commpage_text32_entry = (vm_named_entry_t) commpage_text32_handle->ip_kobject; + commpage_text32_map = commpage_text32_entry->backing.map; + commpage_text32_location = (user32_addr_t) (_COMM_PAGE32_TEXT_START + offset); + /* XXX if (cpu_is_64bit_capable()) ? */ + /* create the 64-bit comm page */ + offset = (random() % _PFZ64_SLIDE_RANGE) << PAGE_SHIFT; /* restricting sliding upto 2Mb range */ + _vm_commpage_init(&commpage_text64_handle, _COMM_PAGE_TEXT_AREA_LENGTH); + commpage_text64_entry = (vm_named_entry_t) commpage_text64_handle->ip_kobject; + commpage_text64_map = commpage_text64_entry->backing.map; + commpage_text64_location = (user64_addr_t) (_COMM_PAGE64_TEXT_START + offset); + + commpage_text_populate(); +#else +#error Unknown architecture. +#endif /* __i386__ || __x86_64__ */ + /* populate the routines in here */ + SHARED_REGION_TRACE_DEBUG( + ("commpage text: init() <-\n")); + } /* @@ -1145,6 +2164,7 @@ vm_commpage_init(void) SHARED_REGION_TRACE_DEBUG( ("commpage: -> init()\n")); +#if defined(__i386__) || defined(__x86_64__) /* create the 32-bit comm page */ _vm_commpage_init(&commpage32_handle, _COMM_PAGE32_AREA_LENGTH); commpage32_entry = (vm_named_entry_t) commpage32_handle->ip_kobject; @@ -1156,8 +2176,16 @@ vm_commpage_init(void) commpage64_entry = (vm_named_entry_t) commpage64_handle->ip_kobject; commpage64_map = commpage64_entry->backing.map; +#endif /* __i386__ || __x86_64__ */ + /* populate them according to this specific platform */ commpage_populate(); + __commpage_setup = 1; +#if defined(__i386__) || defined(__x86_64__) + if (__system_power_source == 0) { + post_sys_powersource_internal(0, 1); + } +#endif /* __i386__ || __x86_64__ */ SHARED_REGION_TRACE_DEBUG( ("commpage: init() <-\n")); @@ -1170,37 +2198,34 @@ vm_commpage_init(void) kern_return_t vm_commpage_enter( vm_map_t map, - task_t task) + task_t task, + boolean_t is64bit) { - ipc_port_t commpage_handle; - vm_map_offset_t commpage_address, objc_address; - vm_map_size_t commpage_size, objc_size; + ipc_port_t commpage_handle, commpage_text_handle; + vm_map_offset_t commpage_address, objc_address, commpage_text_address; + vm_map_size_t commpage_size, objc_size, commpage_text_size; int vm_flags; kern_return_t kr; SHARED_REGION_TRACE_DEBUG( ("commpage: -> enter(%p,%p)\n", - map, task)); + (void *)VM_KERNEL_ADDRPERM(map), + (void *)VM_KERNEL_ADDRPERM(task))); + commpage_text_size = _COMM_PAGE_TEXT_AREA_LENGTH; /* the comm page is likely to be beyond the actual end of the VM map */ vm_flags = VM_FLAGS_FIXED | VM_FLAGS_BEYOND_MAX; /* select the appropriate comm page for this task */ - assert(! (task_has_64BitAddr(task) ^ vm_map_is_64bit(map))); - if (task_has_64BitAddr(task)) { -#ifdef __ppc__ - /* - * PPC51: ppc64 is limited to 51-bit addresses. - * Memory above that limit is handled specially at the - * pmap level, so do not interfere. - */ - vm_flags |= VM_FLAGS_NO_PMAP_CHECK; -#endif /* __ppc__ */ + assert(! (is64bit ^ vm_map_is_64bit(map))); + if (is64bit) { commpage_handle = commpage64_handle; commpage_address = (vm_map_offset_t) _COMM_PAGE64_BASE_ADDRESS; commpage_size = _COMM_PAGE64_AREA_LENGTH; objc_size = _COMM_PAGE64_OBJC_SIZE; objc_address = _COMM_PAGE64_OBJC_BASE; + commpage_text_handle = commpage_text64_handle; + commpage_text_address = (vm_map_offset_t) commpage_text64_location; } else { commpage_handle = commpage32_handle; commpage_address = @@ -1208,6 +2233,8 @@ vm_commpage_enter( commpage_size = _COMM_PAGE32_AREA_LENGTH; objc_size = _COMM_PAGE32_OBJC_SIZE; objc_address = _COMM_PAGE32_OBJC_BASE; + commpage_text_handle = commpage_text32_handle; + commpage_text_address = (vm_map_offset_t) commpage_text32_location; } if ((commpage_address & (pmap_nesting_size_min - 1)) == 0 && @@ -1215,7 +2242,6 @@ vm_commpage_enter( /* the commpage is properly aligned or sized for pmap-nesting */ vm_flags |= VM_MAKE_TAG(VM_MEMORY_SHARED_PMAP); } - /* map the comm page in the task's address space */ assert(commpage_handle != IPC_PORT_NULL); kr = vm_map_enter_mem_object( @@ -1227,15 +2253,41 @@ vm_commpage_enter( commpage_handle, 0, FALSE, - VM_PROT_READ|VM_PROT_EXECUTE, - VM_PROT_READ|VM_PROT_EXECUTE, + VM_PROT_READ, + VM_PROT_READ, VM_INHERIT_SHARE); if (kr != KERN_SUCCESS) { SHARED_REGION_TRACE_ERROR( ("commpage: enter(%p,0x%llx,0x%llx) " "commpage %p mapping failed 0x%x\n", - map, (long long)commpage_address, - (long long)commpage_size, commpage_handle, kr)); + (void *)VM_KERNEL_ADDRPERM(map), + (long long)commpage_address, + (long long)commpage_size, + (void *)VM_KERNEL_ADDRPERM(commpage_handle), kr)); + } + + /* map the comm text page in the task's address space */ + assert(commpage_text_handle != IPC_PORT_NULL); + kr = vm_map_enter_mem_object( + map, + &commpage_text_address, + commpage_text_size, + 0, + vm_flags, + commpage_text_handle, + 0, + FALSE, + VM_PROT_READ|VM_PROT_EXECUTE, + VM_PROT_READ|VM_PROT_EXECUTE, + VM_INHERIT_SHARE); + if (kr != KERN_SUCCESS) { + SHARED_REGION_TRACE_ERROR( + ("commpage text: enter(%p,0x%llx,0x%llx) " + "commpage text %p mapping failed 0x%x\n", + (void *)VM_KERNEL_ADDRPERM(map), + (long long)commpage_text_address, + (long long)commpage_text_size, + (void *)VM_KERNEL_ADDRPERM(commpage_text_handle), kr)); } /* @@ -1259,13 +2311,160 @@ vm_commpage_enter( SHARED_REGION_TRACE_ERROR( ("commpage: enter(%p,0x%llx,0x%llx) " "objc mapping failed 0x%x\n", - map, (long long)objc_address, + (void *)VM_KERNEL_ADDRPERM(map), + (long long)objc_address, (long long)objc_size, kr)); } } SHARED_REGION_TRACE_DEBUG( ("commpage: enter(%p,%p) <- 0x%x\n", - map, task, kr)); + (void *)VM_KERNEL_ADDRPERM(map), + (void *)VM_KERNEL_ADDRPERM(task), kr)); return kr; } + +int +vm_shared_region_slide(uint32_t slide, + mach_vm_offset_t entry_start_address, + mach_vm_size_t entry_size, + mach_vm_offset_t slide_start, + mach_vm_size_t slide_size, + memory_object_control_t sr_file_control) +{ + void *slide_info_entry = NULL; + int error; + vm_shared_region_t sr; + + SHARED_REGION_TRACE_DEBUG( + ("vm_shared_region_slide: -> slide %#x, entry_start %#llx, entry_size %#llx, slide_start %#llx, slide_size %#llx\n", + slide, entry_start_address, entry_size, slide_start, slide_size)); + + sr = vm_shared_region_get(current_task()); + if (sr == NULL) { + printf("%s: no shared region?\n", __FUNCTION__); + SHARED_REGION_TRACE_DEBUG( + ("vm_shared_region_slide: <- %d (no shared region)\n", + KERN_FAILURE)); + return KERN_FAILURE; + } + + /* + * Protect from concurrent access. + */ + vm_shared_region_lock(); + while(sr->sr_slide_in_progress) { + vm_shared_region_sleep(&sr->sr_slide_in_progress, THREAD_UNINT); + } + if (sr->sr_slid + || shared_region_completed_slide + ) { + vm_shared_region_unlock(); + + vm_shared_region_deallocate(sr); + printf("%s: shared region already slid?\n", __FUNCTION__); + SHARED_REGION_TRACE_DEBUG( + ("vm_shared_region_slide: <- %d (already slid)\n", + KERN_FAILURE)); + return KERN_FAILURE; + } + + sr->sr_slide_in_progress = TRUE; + vm_shared_region_unlock(); + + if((error = vm_shared_region_slide_init(sr, slide_size, entry_start_address, entry_size, slide, sr_file_control))) { + printf("slide_info initialization failed with kr=%d\n", error); + goto done; + } + + slide_info_entry = vm_shared_region_get_slide_info_entry(sr); + if (slide_info_entry == NULL){ + error = KERN_FAILURE; + } else { + error = copyin((user_addr_t)slide_start, + slide_info_entry, + (vm_size_t)slide_size); + if (error) { + error = KERN_INVALID_ADDRESS; + } + } + if (error) { + goto done; + } + + if (vm_shared_region_slide_sanity_check(sr) != KERN_SUCCESS) { + error = KERN_INVALID_ARGUMENT; + printf("Sanity Check failed for slide_info\n"); + } else { +#if DEBUG + printf("Succesfully init slide_info with start_address: %p region_size: %ld slide_header_size: %ld\n", + (void*)(uintptr_t)entry_start_address, + (unsigned long)entry_size, + (unsigned long)slide_size); +#endif + } +done: + vm_shared_region_lock(); + + assert(sr->sr_slide_in_progress); + assert(sr->sr_slid == FALSE); + sr->sr_slide_in_progress = FALSE; + thread_wakeup(&sr->sr_slide_in_progress); + + if (error == KERN_SUCCESS) { + sr->sr_slid = TRUE; + + /* + * We don't know how to tear down a slid shared region today, because + * we would have to invalidate all the pages that have been slid + * atomically with respect to anyone mapping the shared region afresh. + * Therefore, take a dangling reference to prevent teardown. + */ + sr->sr_ref_count++; + shared_region_completed_slide = TRUE; + } + vm_shared_region_unlock(); + + vm_shared_region_deallocate(sr); + + SHARED_REGION_TRACE_DEBUG( + ("vm_shared_region_slide: <- %d\n", + error)); + + return error; +} + +/* + * This is called from powermanagement code to let kernel know the current source of power. + * 0 if it is external source (connected to power ) + * 1 if it is internal power source ie battery + */ +void +#if defined(__i386__) || defined(__x86_64__) +post_sys_powersource(int i) +#else +post_sys_powersource(__unused int i) +#endif +{ +#if defined(__i386__) || defined(__x86_64__) + post_sys_powersource_internal(i, 0); +#endif /* __i386__ || __x86_64__ */ +} + + +#if defined(__i386__) || defined(__x86_64__) +static void +post_sys_powersource_internal(int i, int internal) +{ + if (internal == 0) + __system_power_source = i; + + if (__commpage_setup != 0) { + if (__system_power_source != 0) + commpage_set_spin_count(0); + else + commpage_set_spin_count(MP_SPIN_TRIES); + } +} +#endif /* __i386__ || __x86_64__ */ +