X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/cb3231590a3c94ab4375e2228bd5e86b0cf1ad7e..a991bd8d3e7fe02dbca0644054bab73c5b75324a:/bsd/vm/vm_unix.c?ds=sidebyside diff --git a/bsd/vm/vm_unix.c b/bsd/vm/vm_unix.c index b9626cc5e..9421ee0f9 100644 --- a/bsd/vm/vm_unix.c +++ b/bsd/vm/vm_unix.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2018 Apple Inc. All rights reserved. + * Copyright (c) 2000-2020 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -53,6 +53,7 @@ #include #include #include +#include #include #include @@ -65,6 +66,7 @@ #include #include #include +#include #include #include #include @@ -79,6 +81,7 @@ #include #include #include +#include #if NECP #include #endif /* NECP */ @@ -99,19 +102,19 @@ #include #include +#include #if CONFIG_MACF #include #endif +#include + #if CONFIG_CSR #include #endif /* CONFIG_CSR */ #include -int _shared_region_map_and_slide(struct proc*, int, unsigned int, struct shared_file_mapping_np*, uint32_t, user_addr_t, user_addr_t); -int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *); - #if VM_MAP_DEBUG_APPLE_PROTECT SYSCTL_INT(_vm, OID_AUTO, map_debug_apple_protect, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_apple_protect, 0, ""); #endif /* VM_MAP_DEBUG_APPLE_PROTECT */ @@ -150,6 +153,9 @@ SYSCTL_PROC(_vm, OID_AUTO, kmem_alloc_contig, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG extern int vm_region_footprint; SYSCTL_INT(_vm, OID_AUTO, region_footprint, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, &vm_region_footprint, 0, ""); + +#endif /* DEVELOPMENT || DEBUG */ + static int sysctl_vm_self_region_footprint SYSCTL_HANDLER_ARGS { @@ -176,25 +182,53 @@ sysctl_vm_self_region_footprint SYSCTL_HANDLER_ARGS } SYSCTL_PROC(_vm, OID_AUTO, self_region_footprint, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_footprint, "I", ""); -#endif /* DEVELOPMENT || DEBUG */ +static int +sysctl_vm_self_region_page_size SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2, oidp) + int error = 0; + int value; + + value = (1 << thread_self_region_page_shift()); + error = SYSCTL_OUT(req, &value, sizeof(int)); + if (error) { + return error; + } + if (!req->newptr) { + return 0; + } + + error = SYSCTL_IN(req, &value, sizeof(int)); + if (error) { + return error; + } + + if (value != 0 && value != 4096 && value != 16384) { + return EINVAL; + } + +#if !__ARM_MIXED_PAGE_SIZE__ + if (value != vm_map_page_size(current_map())) { + return EINVAL; + } +#endif /* !__ARM_MIXED_PAGE_SIZE__ */ + + thread_self_region_page_shift_set(bit_first(value)); + return 0; +} +SYSCTL_PROC(_vm, OID_AUTO, self_region_page_size, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_page_size, "I", ""); -#if CONFIG_EMBEDDED #if DEVELOPMENT || DEBUG extern int panic_on_unsigned_execute; SYSCTL_INT(_vm, OID_AUTO, panic_on_unsigned_execute, CTLFLAG_RW | CTLFLAG_LOCKED, &panic_on_unsigned_execute, 0, ""); #endif /* DEVELOPMENT || DEBUG */ -extern int log_executable_mem_entry; extern int cs_executable_create_upl; -extern int cs_executable_mem_entry; extern int cs_executable_wire; -SYSCTL_INT(_vm, OID_AUTO, log_executable_mem_entry, CTLFLAG_RD | CTLFLAG_LOCKED, &log_executable_mem_entry, 0, ""); SYSCTL_INT(_vm, OID_AUTO, cs_executable_create_upl, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_create_upl, 0, ""); -SYSCTL_INT(_vm, OID_AUTO, cs_executable_mem_entry, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_mem_entry, 0, ""); SYSCTL_INT(_vm, OID_AUTO, cs_executable_wire, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_wire, 0, ""); -#endif /* CONFIG_EMBEDDED */ #if DEVELOPMENT || DEBUG extern int radar_20146450; @@ -235,6 +269,10 @@ SYSCTL_UINT(_vm, OID_AUTO, kernel_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse #if DEVELOPMENT || DEBUG extern unsigned long pmap_asid_flushes; SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_flushes, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_flushes, ""); +extern unsigned long pmap_asid_hits; +SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_hits, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_hits, ""); +extern unsigned long pmap_asid_misses; +SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_misses, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_misses, ""); #endif #endif /* __arm__ || __arm64__ */ @@ -328,13 +366,22 @@ int shared_region_unnest_log_count_threshold = 5; * Shared cache path enforcement. */ -#ifndef CONFIG_EMBEDDED +#if XNU_TARGET_OS_OSX + +#if defined (__x86_64__) static int scdir_enforce = 1; -static char scdir_path[] = "/var/db/dyld/"; -#else +#else /* defined (__x86_64__) */ +static int scdir_enforce = 0; /* AOT caches live elsewhere */ +#endif /* defined (__x86_64__) */ + +static char scdir_path[] = "/System/Library/dyld/"; + +#else /* XNU_TARGET_OS_OSX */ + static int scdir_enforce = 0; static char scdir_path[] = "/System/Library/Caches/com.apple.dyld/"; -#endif + +#endif /* XNU_TARGET_OS_OSX */ #ifndef SECURE_KERNEL static int sysctl_scdir_enforce SYSCTL_HANDLER_ARGS @@ -473,7 +520,7 @@ vsunlock( for (vaddr = vm_map_trunc_page(addr, PAGE_MASK); vaddr < vm_map_round_page(addr + len, PAGE_MASK); vaddr += PAGE_SIZE) { - paddr = pmap_extract(pmap, vaddr); + paddr = pmap_find_phys(pmap, vaddr); pg = PHYS_TO_VM_PAGE(paddr); vm_page_set_modified(pg); } @@ -676,7 +723,7 @@ pid_for_task( AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK); AUDIT_ARG(mach_port1, t); - t1 = port_name_to_task_inspect(t); + t1 = port_name_to_task_name(t); if (t1 == TASK_NULL) { err = KERN_FAILURE; @@ -827,8 +874,10 @@ task_for_pid( task_t task = TASK_NULL; mach_port_name_t tret = MACH_PORT_NULL; ipc_port_t tfpport = MACH_PORT_NULL; - void * sright; - int error = 0; + void * sright = NULL; + int error = 0; + boolean_t is_current_proc = FALSE; + struct proc_ident pident = {0}; AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID); AUDIT_ARG(pid, pid); @@ -854,6 +903,8 @@ task_for_pid( error = KERN_FAILURE; goto tfpout; } + pident = proc_ident(p); + is_current_proc = (p == current_proc()); #if CONFIG_AUDIT AUDIT_ARG(process, p); @@ -869,21 +920,27 @@ task_for_pid( goto tfpout; } + /* + * Grab a task reference and drop the proc reference as the proc ref + * shouldn't be held accross upcalls. + */ + task = p->task; + task_reference(task); + + proc_rele(p); + p = PROC_NULL; + #if CONFIG_MACF - error = mac_proc_check_get_task(kauth_cred_get(), p); + error = mac_proc_check_get_task(kauth_cred_get(), &pident); if (error) { error = KERN_FAILURE; goto tfpout; } #endif - /* Grab a task reference since the proc ref might be dropped if an upcall to task access server is made */ - task = p->task; - task_reference(task); - /* If we aren't root and target's task access port is set... */ if (!kauth_cred_issuser(kauth_cred_get()) && - p != current_proc() && + !is_current_proc && (task_get_task_access_port(task, &tfpport) == 0) && (tfpport != IPC_PORT_NULL)) { if (tfpport == IPC_PORT_DEAD) { @@ -891,14 +948,6 @@ task_for_pid( goto tfpout; } - /* - * Drop the proc_find proc ref before making an upcall - * to taskgated, since holding a proc_find - * ref while making an upcall can cause deadlock. - */ - proc_rele(p); - p = PROC_NULL; - /* Call up to the task access server */ error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid); @@ -1000,15 +1049,22 @@ task_name_for_pid( || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) && ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) { if (p->task != TASK_NULL) { + struct proc_ident pident = proc_ident(p); + + task_t task = p->task; + task_reference(p->task); + proc_rele(p); + p = PROC_NULL; #if CONFIG_MACF - error = mac_proc_check_get_task_name(kauth_cred_get(), p); + error = mac_proc_check_get_task_name(kauth_cred_get(), &pident); if (error) { - task_deallocate(p->task); + task_deallocate(task); goto noperm; } #endif - sright = (void *)convert_task_name_to_port(p->task); + sright = (void *)convert_task_name_to_port(task); + task = NULL; tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task())); } else { @@ -1041,6 +1097,256 @@ tnfpout: return error; } +/* + * Routine: task_inspect_for_pid + * Purpose: + * Get the task inspect port for another "process", named by its + * process ID on the same host as "target_task". + */ +int +task_inspect_for_pid(struct proc *p __unused, struct task_inspect_for_pid_args *args, int *ret) +{ + mach_port_name_t target_tport = args->target_tport; + int pid = args->pid; + user_addr_t task_addr = args->t; + + proc_t proc = PROC_NULL; + task_t t1 = TASK_NULL; + task_inspect_t task_insp = TASK_INSPECT_NULL; + mach_port_name_t tret = MACH_PORT_NULL; + ipc_port_t tfpport = MACH_PORT_NULL; + int error = 0; + void *sright = NULL; + boolean_t is_current_proc = FALSE; + struct proc_ident pident = {0}; + + /* Disallow inspect port for kernel_task */ + if (pid == 0) { + (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t)); + return EPERM; + } + + t1 = port_name_to_task(target_tport); + if (t1 == TASK_NULL) { + (void) copyout((char *) &t1, task_addr, sizeof(mach_port_name_t)); + return EINVAL; + } + + proc = proc_find(pid); + if (proc == PROC_NULL) { + error = ESRCH; + goto tifpout; + } + pident = proc_ident(proc); + is_current_proc = (proc == current_proc()); + + if (!(task_for_pid_posix_check(proc))) { + error = EPERM; + goto tifpout; + } + + task_insp = proc->task; + if (task_insp == TASK_INSPECT_NULL) { + goto tifpout; + } + + /* + * Grab a task reference and drop the proc reference before making any upcalls. + */ + task_reference(task_insp); + + proc_rele(proc); + proc = PROC_NULL; + + /* + * For now, it performs the same set of permission checks as task_for_pid. This + * will be addressed in rdar://problem/53478660 + */ +#if CONFIG_MACF + error = mac_proc_check_get_task(kauth_cred_get(), &pident); + if (error) { + error = EPERM; + goto tifpout; + } +#endif + + /* If we aren't root and target's task access port is set... */ + if (!kauth_cred_issuser(kauth_cred_get()) && + !is_current_proc && + (task_get_task_access_port(task_insp, &tfpport) == 0) && + (tfpport != IPC_PORT_NULL)) { + if (tfpport == IPC_PORT_DEAD) { + error = EACCES; + goto tifpout; + } + + + /* Call up to the task access server */ + error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid); + + if (error != MACH_MSG_SUCCESS) { + if (error == MACH_RCV_INTERRUPTED) { + error = EINTR; + } else { + error = EPERM; + } + goto tifpout; + } + } + + /* Check if the task has been corpsified */ + if (is_corpsetask(task_insp)) { + error = EACCES; + goto tifpout; + } + + /* could be IP_NULL, consumes a ref */ + sright = (void*) convert_task_inspect_to_port(task_insp); + task_insp = TASK_INSPECT_NULL; + tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task())); + +tifpout: + task_deallocate(t1); + (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t)); + if (proc != PROC_NULL) { + proc_rele(proc); + } + if (tfpport != IPC_PORT_NULL) { + ipc_port_release_send(tfpport); + } + if (task_insp != TASK_INSPECT_NULL) { + task_deallocate(task_insp); + } + + *ret = error; + return error; +} + +/* + * Routine: task_read_for_pid + * Purpose: + * Get the task read port for another "process", named by its + * process ID on the same host as "target_task". + */ +int +task_read_for_pid(struct proc *p __unused, struct task_read_for_pid_args *args, int *ret) +{ + mach_port_name_t target_tport = args->target_tport; + int pid = args->pid; + user_addr_t task_addr = args->t; + + proc_t proc = PROC_NULL; + task_t t1 = TASK_NULL; + task_read_t task_read = TASK_READ_NULL; + mach_port_name_t tret = MACH_PORT_NULL; + ipc_port_t tfpport = MACH_PORT_NULL; + int error = 0; + void *sright = NULL; + boolean_t is_current_proc = FALSE; + struct proc_ident pident = {0}; + + /* Disallow read port for kernel_task */ + if (pid == 0) { + (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t)); + return EPERM; + } + + t1 = port_name_to_task(target_tport); + if (t1 == TASK_NULL) { + (void) copyout((char *) &t1, task_addr, sizeof(mach_port_name_t)); + return EINVAL; + } + + proc = proc_find(pid); + if (proc == PROC_NULL) { + error = ESRCH; + goto trfpout; + } + pident = proc_ident(proc); + is_current_proc = (proc == current_proc()); + + if (!(task_for_pid_posix_check(proc))) { + error = EPERM; + goto trfpout; + } + + task_read = proc->task; + if (task_read == TASK_INSPECT_NULL) { + goto trfpout; + } + + /* + * Grab a task reference and drop the proc reference before making any upcalls. + */ + task_reference(task_read); + + proc_rele(proc); + proc = PROC_NULL; + + /* + * For now, it performs the same set of permission checks as task_for_pid. This + * will be addressed in rdar://problem/53478660 + */ +#if CONFIG_MACF + error = mac_proc_check_get_task(kauth_cred_get(), &pident); + if (error) { + error = EPERM; + goto trfpout; + } +#endif + + /* If we aren't root and target's task access port is set... */ + if (!kauth_cred_issuser(kauth_cred_get()) && + !is_current_proc && + (task_get_task_access_port(task_read, &tfpport) == 0) && + (tfpport != IPC_PORT_NULL)) { + if (tfpport == IPC_PORT_DEAD) { + error = EACCES; + goto trfpout; + } + + + /* Call up to the task access server */ + error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid); + + if (error != MACH_MSG_SUCCESS) { + if (error == MACH_RCV_INTERRUPTED) { + error = EINTR; + } else { + error = EPERM; + } + goto trfpout; + } + } + + /* Check if the task has been corpsified */ + if (is_corpsetask(task_read)) { + error = EACCES; + goto trfpout; + } + + /* could be IP_NULL, consumes a ref */ + sright = (void*) convert_task_read_to_port(task_read); + task_read = TASK_READ_NULL; + tret = ipc_port_copyout_send(sright, get_task_ipcspace(current_task())); + +trfpout: + task_deallocate(t1); + (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t)); + if (proc != PROC_NULL) { + proc_rele(proc); + } + if (tfpport != IPC_PORT_NULL) { + ipc_port_release_send(tfpport); + } + if (task_read != TASK_READ_NULL) { + task_deallocate(task_read); + } + + *ret = error; + return error; +} + kern_return_t pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret) { @@ -1050,14 +1356,6 @@ pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret) int error = 0; mach_port_t tfpport = MACH_PORT_NULL; -#if CONFIG_MACF - error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND); - if (error) { - error = EPERM; - goto out; - } -#endif - if (pid == 0) { error = EPERM; goto out; @@ -1075,6 +1373,14 @@ pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret) goto out; } +#if CONFIG_MACF + error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SUSPEND); + if (error) { + error = EPERM; + goto out; + } +#endif + target = targetproc->task; #ifndef CONFIG_EMBEDDED if (target != TASK_NULL) { @@ -1145,7 +1451,8 @@ debug_control_port_for_pid(struct debug_control_port_for_pid_args *args) ipc_port_t tfpport = MACH_PORT_NULL; ipc_port_t sright = NULL; int error = 0; - + boolean_t is_current_proc = FALSE; + struct proc_ident pident = {0}; AUDIT_MACH_SYSCALL_ENTER(AUE_DBGPORTFORPID); AUDIT_ARG(pid, pid); @@ -1165,12 +1472,13 @@ debug_control_port_for_pid(struct debug_control_port_for_pid_args *args) return KERN_FAILURE; } - p = proc_find(pid); if (p == PROC_NULL) { error = KERN_FAILURE; goto tfpout; } + pident = proc_ident(p); + is_current_proc = (p == current_proc()); #if CONFIG_AUDIT AUDIT_ARG(process, p); @@ -1186,14 +1494,18 @@ debug_control_port_for_pid(struct debug_control_port_for_pid_args *args) goto tfpout; } - /* Grab a task reference since the proc ref might be dropped if an upcall to task access server is made */ + /* + * Grab a task reference and drop the proc reference before making any upcalls. + */ task = p->task; task_reference(task); + proc_rele(p); + p = PROC_NULL; if (!IOTaskHasEntitlement(current_task(), DEBUG_PORT_ENTITLEMENT)) { #if CONFIG_MACF - error = mac_proc_check_get_task(kauth_cred_get(), p); + error = mac_proc_check_get_task(kauth_cred_get(), &pident); if (error) { error = KERN_FAILURE; goto tfpout; @@ -1202,7 +1514,7 @@ debug_control_port_for_pid(struct debug_control_port_for_pid_args *args) /* If we aren't root and target's task access port is set... */ if (!kauth_cred_issuser(kauth_cred_get()) && - p != current_proc() && + !is_current_proc && (task_get_task_access_port(task, &tfpport) == 0) && (tfpport != IPC_PORT_NULL)) { if (tfpport == IPC_PORT_DEAD) { @@ -1210,13 +1522,6 @@ debug_control_port_for_pid(struct debug_control_port_for_pid_args *args) goto tfpout; } - /* - * Drop the proc_find proc ref before making an upcall - * to taskgated, since holding a proc_find - * ref while making an upcall can cause deadlock. - */ - proc_rele(p); - p = PROC_NULL; /* Call up to the task access server */ error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid); @@ -1276,14 +1581,6 @@ pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret) int error = 0; mach_port_t tfpport = MACH_PORT_NULL; -#if CONFIG_MACF - error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME); - if (error) { - error = EPERM; - goto out; - } -#endif - if (pid == 0) { error = EPERM; goto out; @@ -1301,6 +1598,14 @@ pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret) goto out; } +#if CONFIG_MACF + error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_RESUME); + if (error) { + error = EPERM; + goto out; + } +#endif + target = targetproc->task; #ifndef CONFIG_EMBEDDED if (target != TASK_NULL) { @@ -1329,11 +1634,11 @@ pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret) } #endif -#if CONFIG_EMBEDDED +#if !XNU_TARGET_OS_OSX #if SOCKETS resume_proc_sockets(targetproc); #endif /* SOCKETS */ -#endif /* CONFIG_EMBEDDED */ +#endif /* !XNU_TARGET_OS_OSX */ task_reference(target); @@ -1387,14 +1692,6 @@ pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret #pragma unused(pid) #else -#if CONFIG_MACF - error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_HIBERNATE); - if (error) { - error = EPERM; - goto out; - } -#endif - /* * If a pid has been provided, we obtain the process handle and call task_for_pid_posix_check(). */ @@ -1413,6 +1710,15 @@ pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret } } +#if CONFIG_MACF + //Note that targetproc may be null + error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_HIBERNATE); + if (error) { + error = EPERM; + goto out; + } +#endif + if (pid == -2) { vm_pageout_anonymous_pages(); } else if (pid == -1) { @@ -1437,8 +1743,7 @@ out: int networking_memstatus_callout(proc_t p, uint32_t status) { - struct filedesc *fdp; - int i; + struct fileproc *fp; /* * proc list lock NOT held @@ -1449,19 +1754,13 @@ networking_memstatus_callout(proc_t p, uint32_t status) LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED); proc_fdlock(p); - fdp = p->p_fd; - for (i = 0; i < fdp->fd_nfiles; i++) { - struct fileproc *fp; - fp = fdp->fd_ofiles[i]; - if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0) { - continue; - } - switch (FILEGLOB_DTYPE(fp->f_fglob)) { + fdt_foreach(fp, p) { + switch (FILEGLOB_DTYPE(fp->fp_glob)) { #if NECP case DTYPE_NETPOLICY: necp_fd_memstatus(p, status, - (struct necp_fd_data *)fp->f_fglob->fg_data); + (struct necp_fd_data *)fp->fp_glob->fg_data); break; #endif /* NECP */ default: @@ -1480,27 +1779,20 @@ networking_defunct_callout(proc_t p, void *arg) struct pid_shutdown_sockets_args *args = arg; int pid = args->pid; int level = args->level; - struct filedesc *fdp; - int i; + struct fileproc *fp; proc_fdlock(p); - fdp = p->p_fd; - for (i = 0; i < fdp->fd_nfiles; i++) { - struct fileproc *fp = fdp->fd_ofiles[i]; - struct fileglob *fg; - if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0) { - continue; - } + fdt_foreach(fp, p) { + struct fileglob *fg = fp->fp_glob; - fg = fp->f_fglob; switch (FILEGLOB_DTYPE(fg)) { case DTYPE_SOCKET: { struct socket *so = (struct socket *)fg->fg_data; if (p->p_pid == pid || so->last_pid == pid || ((so->so_flags & SOF_DELEGATED) && so->e_pid == pid)) { /* Call networking stack with socket and level */ - (void) socket_defunct(p, so, level); + (void)socket_defunct(p, so, level); } break; } @@ -1537,14 +1829,6 @@ pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args * goto out; } -#if CONFIG_MACF - error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SHUTDOWN_SOCKETS); - if (error) { - error = EPERM; - goto out; - } -#endif - targetproc = proc_find(pid); if (targetproc == PROC_NULL) { error = ESRCH; @@ -1557,6 +1841,14 @@ pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args * goto out; } +#if CONFIG_MACF + error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_SHUTDOWN_SOCKETS); + if (error) { + error = EPERM; + goto out; + } +#endif + proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, networking_defunct_callout, args, NULL, NULL); @@ -1652,7 +1944,7 @@ shared_region_check_np( { vm_shared_region_t shared_region; mach_vm_offset_t start_address = 0; - int error; + int error = 0; kern_return_t kr; SHARED_REGION_TRACE_DEBUG( @@ -1665,16 +1957,27 @@ shared_region_check_np( shared_region = vm_shared_region_get(current_task()); if (shared_region != NULL) { /* retrieve address of its first mapping... */ - kr = vm_shared_region_start_address(shared_region, - &start_address); + kr = vm_shared_region_start_address(shared_region, &start_address); if (kr != KERN_SUCCESS) { error = ENOMEM; } else { +#if __has_feature(ptrauth_calls) + /* + * Remap any section of the shared library that + * has authenticated pointers into private memory. + */ + if (vm_shared_region_auth_remap(shared_region) != KERN_SUCCESS) { + error = ENOMEM; + } +#endif /* __has_feature(ptrauth_calls) */ + /* ... and give it to the caller */ - error = copyout(&start_address, - (user_addr_t) uap->start_address, - sizeof(start_address)); - if (error) { + if (error == 0) { + error = copyout(&start_address, + (user_addr_t) uap->start_address, + sizeof(start_address)); + } + if (error != 0) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] " "check_np(0x%llx) " @@ -1701,386 +2004,598 @@ shared_region_check_np( } -int -shared_region_copyin_mappings( - struct proc *p, - user_addr_t user_mappings, - unsigned int mappings_count, - struct shared_file_mapping_np *mappings) +static int +shared_region_copyin( + struct proc *p, + user_addr_t user_addr, + unsigned int count, + unsigned int element_size, + void *kernel_data) { int error = 0; - vm_size_t mappings_size = 0; + vm_size_t size = count * element_size; - /* get the list of mappings the caller wants us to establish */ - mappings_size = (vm_size_t) (mappings_count * sizeof(mappings[0])); - error = copyin(user_mappings, - mappings, - mappings_size); + error = copyin(user_addr, kernel_data, size); if (error) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(): " - "copyin(0x%llx, %d) failed (error=%d)\n", + "copyin(0x%llx, %ld) failed (error=%d)\n", (void *)VM_KERNEL_ADDRPERM(current_thread()), p->p_pid, p->p_comm, - (uint64_t)user_mappings, mappings_count, error)); + (uint64_t)user_addr, (long)size, error)); } return error; } + +#define _SR_FILE_MAPPINGS_MAX_FILES 2 + +/* forward declaration */ +__attribute__((noinline)) +static void shared_region_map_and_slide_cleanup( + struct proc *p, + uint32_t files_count, + struct _sr_file_mappings *sr_file_mappings, + struct vm_shared_region *shared_region, + struct vnode *scdir_vp); + /* - * shared_region_map_np() - * - * This system call is intended for dyld. - * - * dyld uses this to map a shared cache file into a shared region. - * This is usually done only the first time a shared cache is needed. - * Subsequent processes will just use the populated shared region without - * requiring any further setup. + * Setup part of _shared_region_map_and_slide(). + * It had to be broken out of _shared_region_map_and_slide() to + * prevent compiler inlining from blowing out the stack. */ -int -_shared_region_map_and_slide( - struct proc *p, - int fd, - uint32_t mappings_count, - struct shared_file_mapping_np *mappings, - uint32_t slide, - user_addr_t slide_start, - user_addr_t slide_size) +__attribute__((noinline)) +static int +shared_region_map_and_slide_setup( + struct proc *p, + uint32_t files_count, + struct shared_file_np *files, + uint32_t mappings_count, + struct shared_file_mapping_slide_np *mappings, + struct _sr_file_mappings **sr_file_mappings, + struct vm_shared_region **shared_region_ptr, + struct vnode **scdir_vp, + struct vnode *rdir_vp) { - int error; - kern_return_t kr; - struct fileproc *fp; - struct vnode *vp, *root_vp, *scdir_vp; + int error = 0; + struct _sr_file_mappings *srfmp; + uint32_t mappings_next; struct vnode_attr va; off_t fs; - memory_object_size_t file_size; #if CONFIG_MACF vm_prot_t maxprot = VM_PROT_ALL; #endif - memory_object_control_t file_control; - struct vm_shared_region *shared_region; uint32_t i; + struct vm_shared_region *shared_region; SHARED_REGION_TRACE_DEBUG( ("shared_region: %p [%d(%s)] -> map\n", (void *)VM_KERNEL_ADDRPERM(current_thread()), p->p_pid, p->p_comm)); - shared_region = NULL; - fp = NULL; - vp = NULL; - scdir_vp = NULL; - - /* get file structure from file descriptor */ - error = fp_lookup(p, fd, &fp, 0); - if (error) { - SHARED_REGION_TRACE_ERROR( - ("shared_region: %p [%d(%s)] map: " - "fd=%d lookup failed (error=%d)\n", - (void *)VM_KERNEL_ADDRPERM(current_thread()), - p->p_pid, p->p_comm, fd, error)); + if (files_count > _SR_FILE_MAPPINGS_MAX_FILES) { + error = E2BIG; goto done; } - - /* make sure we're attempting to map a vnode */ - if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) { - SHARED_REGION_TRACE_ERROR( - ("shared_region: %p [%d(%s)] map: " - "fd=%d not a vnode (type=%d)\n", - (void *)VM_KERNEL_ADDRPERM(current_thread()), - p->p_pid, p->p_comm, - fd, FILEGLOB_DTYPE(fp->f_fglob))); + if (files_count == 0) { error = EINVAL; goto done; } - - /* we need at least read permission on the file */ - if (!(fp->f_fglob->fg_flag & FREAD)) { - SHARED_REGION_TRACE_ERROR( - ("shared_region: %p [%d(%s)] map: " - "fd=%d not readable\n", - (void *)VM_KERNEL_ADDRPERM(current_thread()), - p->p_pid, p->p_comm, fd)); - error = EPERM; + *sr_file_mappings = kheap_alloc(KHEAP_TEMP, files_count * sizeof(struct _sr_file_mappings), Z_WAITOK); + if (*sr_file_mappings == NULL) { + error = ENOMEM; goto done; } + bzero(*sr_file_mappings, files_count * sizeof(struct _sr_file_mappings)); + mappings_next = 0; + for (i = 0; i < files_count; i++) { + srfmp = &(*sr_file_mappings)[i]; + srfmp->fd = files[i].sf_fd; + srfmp->mappings_count = files[i].sf_mappings_count; + srfmp->mappings = &mappings[mappings_next]; + mappings_next += srfmp->mappings_count; + if (mappings_next > mappings_count) { + error = EINVAL; + goto done; + } + srfmp->slide = files[i].sf_slide; + } - /* get vnode from file structure */ - error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data); - if (error) { - SHARED_REGION_TRACE_ERROR( - ("shared_region: %p [%d(%s)] map: " - "fd=%d getwithref failed (error=%d)\n", - (void *)VM_KERNEL_ADDRPERM(current_thread()), - p->p_pid, p->p_comm, fd, error)); - goto done; + if (scdir_enforce) { + /* get vnode for scdir_path */ + error = vnode_lookup(scdir_path, 0, scdir_vp, vfs_context_current()); + if (error) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)]: " + "vnode_lookup(%s) failed (error=%d)\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + scdir_path, error)); + goto done; + } } - vp = (struct vnode *) fp->f_fglob->fg_data; - /* make sure the vnode is a regular file */ - if (vp->v_type != VREG) { + /* get the process's shared region (setup in vm_map_exec()) */ + shared_region = vm_shared_region_trim_and_get(current_task()); + *shared_region_ptr = shared_region; + if (shared_region == NULL) { SHARED_REGION_TRACE_ERROR( - ("shared_region: %p [%d(%s)] map(%p:'%s'): " - "not a file (type=%d)\n", + ("shared_region: %p [%d(%s)] map(): " + "no shared region\n", (void *)VM_KERNEL_ADDRPERM(current_thread()), - p->p_pid, p->p_comm, - (void *)VM_KERNEL_ADDRPERM(vp), - vp->v_name, vp->v_type)); + p->p_pid, p->p_comm)); error = EINVAL; goto done; } -#if CONFIG_MACF - /* pass in 0 for the offset argument because AMFI does not need the offset - * of the shared cache */ - error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()), - fp->f_fglob, VM_PROT_ALL, MAP_FILE, 0, &maxprot); - if (error) { - goto done; - } -#endif /* MAC */ - - /* The calling process cannot be chroot-ed. */ - root_vp = p->p_fd->fd_rdir; - if (root_vp == NULL) { - root_vp = rootvnode; - } else { + /* + * Check the shared region matches the current root + * directory of this process. Deny the mapping to + * avoid tainting the shared region with something that + * doesn't quite belong into it. + */ + struct vnode *sr_vnode = vm_shared_region_root_dir(shared_region); + if (sr_vnode != NULL ? rdir_vp != sr_vnode : rdir_vp != rootvnode) { SHARED_REGION_TRACE_ERROR( - ("calling process [%d(%s)] is chroot-ed, permission denied\n", - p->p_pid, p->p_comm)); + ("shared_region: map(%p) root_dir mismatch\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()))); error = EPERM; goto done; } - /* The shared cache file must be owned by root */ - VATTR_INIT(&va); - VATTR_WANTED(&va, va_uid); - VATTR_WANTED(&va, va_flags); - error = vnode_getattr(vp, &va, vfs_context_current()); - if (error) { - SHARED_REGION_TRACE_ERROR( - ("shared_region: %p [%d(%s)] map(%p:'%s'): " - "vnode_getattr(%p) failed (error=%d)\n", - (void *)VM_KERNEL_ADDRPERM(current_thread()), - p->p_pid, p->p_comm, - (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, - (void *)VM_KERNEL_ADDRPERM(vp), error)); - goto done; - } - if (va.va_uid != 0) { - SHARED_REGION_TRACE_ERROR( - ("shared_region: %p [%d(%s)] map(%p:'%s'): " - "owned by uid=%d instead of 0\n", - (void *)VM_KERNEL_ADDRPERM(current_thread()), - p->p_pid, p->p_comm, - (void *)VM_KERNEL_ADDRPERM(vp), - vp->v_name, va.va_uid)); - error = EPERM; - goto done; - } -#if CONFIG_CSR - if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0 && - !(va.va_flags & SF_RESTRICTED)) { + for (srfmp = &(*sr_file_mappings)[0]; + srfmp < &(*sr_file_mappings)[files_count]; + srfmp++) { + if (srfmp->mappings_count == 0) { + /* no mappings here... */ + continue; + } + + /* get file structure from file descriptor */ + error = fp_get_ftype(p, srfmp->fd, DTYPE_VNODE, EINVAL, &srfmp->fp); + if (error) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map: " + "fd=%d lookup failed (error=%d)\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, srfmp->fd, error)); + goto done; + } + + /* we need at least read permission on the file */ + if (!(srfmp->fp->fp_glob->fg_flag & FREAD)) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map: " + "fd=%d not readable\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, srfmp->fd)); + error = EPERM; + goto done; + } + + /* get vnode from file structure */ + error = vnode_getwithref((vnode_t) srfmp->fp->fp_glob->fg_data); + if (error) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map: " + "fd=%d getwithref failed (error=%d)\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, srfmp->fd, error)); + goto done; + } + srfmp->vp = (struct vnode *) srfmp->fp->fp_glob->fg_data; + + /* make sure the vnode is a regular file */ + if (srfmp->vp->v_type != VREG) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(%p:'%s'): " + "not a file (type=%d)\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(srfmp->vp), + srfmp->vp->v_name, srfmp->vp->v_type)); + error = EINVAL; + goto done; + } + +#if CONFIG_MACF + /* pass in 0 for the offset argument because AMFI does not need the offset + * of the shared cache */ + error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()), + srfmp->fp->fp_glob, VM_PROT_ALL, MAP_FILE, 0, &maxprot); + if (error) { + goto done; + } +#endif /* MAC */ + +#if XNU_TARGET_OS_OSX && defined(__arm64__) /* - * CSR is not configured in CSR_ALLOW_UNRESTRICTED_FS mode, and - * the shared cache file is NOT SIP-protected, so reject the - * mapping request + * Check if the shared cache is in the trust cache; + * if so, we can skip the root ownership check. */ - SHARED_REGION_TRACE_ERROR( - ("shared_region: %p [%d(%s)] map(%p:'%s'), " - "vnode is not SIP-protected. \n", - (void *)VM_KERNEL_ADDRPERM(current_thread()), - p->p_pid, p->p_comm, (void *)VM_KERNEL_ADDRPERM(vp), - vp->v_name)); - error = EPERM; - goto done; - } -#else - /* Devices without SIP/ROSP need to make sure that the shared cache is on the root volume. */ - if (vp->v_mount != root_vp->v_mount) { - SHARED_REGION_TRACE_ERROR( - ("shared_region: %p [%d(%s)] map(%p:'%s'): " - "not on process's root volume\n", - (void *)VM_KERNEL_ADDRPERM(current_thread()), - p->p_pid, p->p_comm, - (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name)); - error = EPERM; - goto done; - } -#endif /* CONFIG_CSR */ +#if DEVELOPMENT || DEBUG + /* + * Skip both root ownership and trust cache check if + * enforcement is disabled. + */ + if (!cs_system_enforcement()) { + goto after_root_check; + } +#endif /* DEVELOPMENT || DEBUG */ + struct cs_blob *blob = csvnode_get_blob(srfmp->vp, 0); + if (blob == NULL) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(%p:'%s'): " + "missing CS blob\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(srfmp->vp), + srfmp->vp->v_name)); + goto root_check; + } + const uint8_t *cdhash = csblob_get_cdhash(blob); + if (cdhash == NULL) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(%p:'%s'): " + "missing cdhash\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(srfmp->vp), + srfmp->vp->v_name)); + goto root_check; + } + uint32_t result = pmap_lookup_in_static_trust_cache(cdhash); + boolean_t in_trust_cache = result & (TC_LOOKUP_FOUND << TC_LOOKUP_RESULT_SHIFT); + if (!in_trust_cache) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(%p:'%s'): " + "not in trust cache\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(srfmp->vp), + srfmp->vp->v_name)); + goto root_check; + } + goto after_root_check; +root_check: +#endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */ - if (scdir_enforce) { - /* get vnode for scdir_path */ - error = vnode_lookup(scdir_path, 0, &scdir_vp, vfs_context_current()); + /* The shared cache file must be owned by root */ + VATTR_INIT(&va); + VATTR_WANTED(&va, va_uid); + error = vnode_getattr(srfmp->vp, &va, vfs_context_current()); if (error) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " - "vnode_lookup(%s) failed (error=%d)\n", + "vnode_getattr(%p) failed (error=%d)\n", (void *)VM_KERNEL_ADDRPERM(current_thread()), p->p_pid, p->p_comm, - (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, - scdir_path, error)); + (void *)VM_KERNEL_ADDRPERM(srfmp->vp), + srfmp->vp->v_name, + (void *)VM_KERNEL_ADDRPERM(srfmp->vp), + error)); goto done; } - - /* ensure parent is scdir_vp */ - if (vnode_parent(vp) != scdir_vp) { + if (va.va_uid != 0) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " - "shared cache file not in %s\n", + "owned by uid=%d instead of 0\n", (void *)VM_KERNEL_ADDRPERM(current_thread()), p->p_pid, p->p_comm, - (void *)VM_KERNEL_ADDRPERM(vp), - vp->v_name, scdir_path)); + (void *)VM_KERNEL_ADDRPERM(srfmp->vp), + srfmp->vp->v_name, va.va_uid)); error = EPERM; goto done; } - } - /* get vnode size */ - error = vnode_size(vp, &fs, vfs_context_current()); - if (error) { - SHARED_REGION_TRACE_ERROR( - ("shared_region: %p [%d(%s)] map(%p:'%s'): " - "vnode_size(%p) failed (error=%d)\n", - (void *)VM_KERNEL_ADDRPERM(current_thread()), - p->p_pid, p->p_comm, - (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, - (void *)VM_KERNEL_ADDRPERM(vp), error)); - goto done; - } - file_size = fs; +#if XNU_TARGET_OS_OSX && defined(__arm64__) +after_root_check: +#endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */ - /* get the file's memory object handle */ - file_control = ubc_getobject(vp, UBC_HOLDOBJECT); - if (file_control == MEMORY_OBJECT_CONTROL_NULL) { - SHARED_REGION_TRACE_ERROR( - ("shared_region: %p [%d(%s)] map(%p:'%s'): " - "no memory object\n", - (void *)VM_KERNEL_ADDRPERM(current_thread()), - p->p_pid, p->p_comm, - (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name)); - error = EINVAL; - goto done; - } - - /* check that the mappings are properly covered by code signatures */ - if (!cs_system_enforcement()) { - /* code signing is not enforced: no need to check */ - } else { - for (i = 0; i < mappings_count; i++) { - if (mappings[i].sfm_init_prot & VM_PROT_ZF) { - /* zero-filled mapping: not backed by the file */ - continue; +#if CONFIG_CSR + if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) { + VATTR_INIT(&va); + VATTR_WANTED(&va, va_flags); + error = vnode_getattr(srfmp->vp, &va, vfs_context_current()); + if (error) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(%p:'%s'): " + "vnode_getattr(%p) failed (error=%d)\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(srfmp->vp), + srfmp->vp->v_name, + (void *)VM_KERNEL_ADDRPERM(srfmp->vp), + error)); + goto done; } - if (ubc_cs_is_range_codesigned(vp, - mappings[i].sfm_file_offset, - mappings[i].sfm_size)) { - /* this mapping is fully covered by code signatures */ - continue; + + if (!(va.va_flags & SF_RESTRICTED)) { + /* + * CSR is not configured in CSR_ALLOW_UNRESTRICTED_FS mode, and + * the shared cache file is NOT SIP-protected, so reject the + * mapping request + */ + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(%p:'%s'), " + "vnode is not SIP-protected. \n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(srfmp->vp), + srfmp->vp->v_name)); + error = EPERM; + goto done; } + } +#else /* CONFIG_CSR */ + /* Devices without SIP/ROSP need to make sure that the shared cache is on the root volume. */ + + assert(rdir_vp != NULL); + if (srfmp->vp->v_mount != rdir_vp->v_mount) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " - "mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] " - "is not code-signed\n", + "not on process's root volume\n", (void *)VM_KERNEL_ADDRPERM(current_thread()), p->p_pid, p->p_comm, - (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, - i, mappings_count, - mappings[i].sfm_address, - mappings[i].sfm_size, - mappings[i].sfm_file_offset, - mappings[i].sfm_max_prot, - mappings[i].sfm_init_prot)); - error = EINVAL; + (void *)VM_KERNEL_ADDRPERM(srfmp->vp), + srfmp->vp->v_name)); + error = EPERM; goto done; } - } +#endif /* CONFIG_CSR */ - /* get the process's shared region (setup in vm_map_exec()) */ - shared_region = vm_shared_region_trim_and_get(current_task()); - if (shared_region == NULL) { - SHARED_REGION_TRACE_ERROR( - ("shared_region: %p [%d(%s)] map(%p:'%s'): " - "no shared region\n", - (void *)VM_KERNEL_ADDRPERM(current_thread()), - p->p_pid, p->p_comm, - (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name)); - error = EINVAL; - goto done; - } + if (scdir_enforce) { + /* ensure parent is scdir_vp */ + assert(*scdir_vp != NULL); + if (vnode_parent(srfmp->vp) != *scdir_vp) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(%p:'%s'): " + "shared cache file not in %s\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(srfmp->vp), + srfmp->vp->v_name, scdir_path)); + error = EPERM; + goto done; + } + } - /* map the file into that shared region's submap */ - kr = vm_shared_region_map_file(shared_region, - mappings_count, - mappings, - file_control, - file_size, - (void *) p->p_fd->fd_rdir, - slide, - slide_start, - slide_size); - if (kr != KERN_SUCCESS) { - SHARED_REGION_TRACE_ERROR( - ("shared_region: %p [%d(%s)] map(%p:'%s'): " - "vm_shared_region_map_file() failed kr=0x%x\n", - (void *)VM_KERNEL_ADDRPERM(current_thread()), - p->p_pid, p->p_comm, - (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, kr)); - switch (kr) { - case KERN_INVALID_ADDRESS: - error = EFAULT; - break; - case KERN_PROTECTION_FAILURE: - error = EPERM; - break; - case KERN_NO_SPACE: - error = ENOMEM; - break; - case KERN_FAILURE: - case KERN_INVALID_ARGUMENT: - default: + /* get vnode size */ + error = vnode_size(srfmp->vp, &fs, vfs_context_current()); + if (error) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(%p:'%s'): " + "vnode_size(%p) failed (error=%d)\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(srfmp->vp), + srfmp->vp->v_name, + (void *)VM_KERNEL_ADDRPERM(srfmp->vp), error)); + goto done; + } + srfmp->file_size = fs; + + /* get the file's memory object handle */ + srfmp->file_control = ubc_getobject(srfmp->vp, UBC_HOLDOBJECT); + if (srfmp->file_control == MEMORY_OBJECT_CONTROL_NULL) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(%p:'%s'): " + "no memory object\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(srfmp->vp), + srfmp->vp->v_name)); error = EINVAL; - break; + goto done; + } + + /* check that the mappings are properly covered by code signatures */ + if (!cs_system_enforcement()) { + /* code signing is not enforced: no need to check */ + } else { + for (i = 0; i < srfmp->mappings_count; i++) { + if (srfmp->mappings[i].sms_init_prot & VM_PROT_ZF) { + /* zero-filled mapping: not backed by the file */ + continue; + } + if (ubc_cs_is_range_codesigned(srfmp->vp, + srfmp->mappings[i].sms_file_offset, + srfmp->mappings[i].sms_size)) { + /* this mapping is fully covered by code signatures */ + continue; + } + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(%p:'%s'): " + "mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] " + "is not code-signed\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + (void *)VM_KERNEL_ADDRPERM(srfmp->vp), + srfmp->vp->v_name, + i, srfmp->mappings_count, + srfmp->mappings[i].sms_address, + srfmp->mappings[i].sms_size, + srfmp->mappings[i].sms_file_offset, + srfmp->mappings[i].sms_max_prot, + srfmp->mappings[i].sms_init_prot)); + error = EINVAL; + goto done; + } } - goto done; } +done: + if (error != 0) { + shared_region_map_and_slide_cleanup(p, files_count, *sr_file_mappings, shared_region, *scdir_vp); + *sr_file_mappings = NULL; + *shared_region_ptr = NULL; + *scdir_vp = NULL; + } + return error; +} - error = 0; +/* + * shared_region_map_np() + * + * This system call is intended for dyld. + * + * dyld uses this to map a shared cache file into a shared region. + * This is usually done only the first time a shared cache is needed. + * Subsequent processes will just use the populated shared region without + * requiring any further setup. + */ +static int +_shared_region_map_and_slide( + struct proc *p, + uint32_t files_count, + struct shared_file_np *files, + uint32_t mappings_count, + struct shared_file_mapping_slide_np *mappings) +{ + int error = 0; + kern_return_t kr = KERN_SUCCESS; + struct _sr_file_mappings *sr_file_mappings = NULL; + struct vnode *scdir_vp = NULL; + struct vnode *rdir_vp = NULL; + struct vm_shared_region *shared_region = NULL; - vnode_lock_spin(vp); + /* + * Get a reference to the current proc's root dir. + * Need this to prevent racing with chroot. + */ + proc_fdlock(p); + rdir_vp = p->p_fd->fd_rdir; + if (rdir_vp == NULL) { + rdir_vp = rootvnode; + } + assert(rdir_vp != NULL); + vnode_get(rdir_vp); + proc_fdunlock(p); - vp->v_flag |= VSHARED_DYLD; + /* + * Turn files, mappings into sr_file_mappings and other setup. + */ + error = shared_region_map_and_slide_setup(p, files_count, + files, mappings_count, mappings, + &sr_file_mappings, &shared_region, &scdir_vp, rdir_vp); + if (error != 0) { + vnode_put(rdir_vp); + return error; + } - vnode_unlock(vp); + /* map the file(s) into that shared region's submap */ + kr = vm_shared_region_map_file(shared_region, files_count, sr_file_mappings); + if (kr != KERN_SUCCESS) { + SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] map(): " + "vm_shared_region_map_file() failed kr=0x%x\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, kr)); + } - /* update the vnode's access time */ - if (!(vnode_vfsvisflags(vp) & MNT_NOATIME)) { - VATTR_INIT(&va); - nanotime(&va.va_access_time); - VATTR_SET_ACTIVE(&va, va_access_time); - vnode_setattr(vp, &va, vfs_context_current()); + /* convert kern_return_t to errno */ + switch (kr) { + case KERN_SUCCESS: + error = 0; + break; + case KERN_INVALID_ADDRESS: + error = EFAULT; + break; + case KERN_PROTECTION_FAILURE: + error = EPERM; + break; + case KERN_NO_SPACE: + error = ENOMEM; + break; + case KERN_FAILURE: + case KERN_INVALID_ARGUMENT: + default: + error = EINVAL; + break; } - if (p->p_flag & P_NOSHLIB) { - /* signal that this process is now using split libraries */ + /* + * Mark that this process is now using split libraries. + */ + if (error == 0 && (p->p_flag & P_NOSHLIB)) { OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag); } -done: - if (vp != NULL) { - /* - * release the vnode... - * ubc_map() still holds it for us in the non-error case - */ - (void) vnode_put(vp); - vp = NULL; - } - if (fp != NULL) { - /* release the file descriptor */ - fp_drop(p, fd, fp, 0); - fp = NULL; + vnode_put(rdir_vp); + shared_region_map_and_slide_cleanup(p, files_count, sr_file_mappings, shared_region, scdir_vp); + + SHARED_REGION_TRACE_DEBUG( + ("shared_region: %p [%d(%s)] <- map\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm)); + + return error; +} + +/* + * Clean up part of _shared_region_map_and_slide() + * It had to be broken out of _shared_region_map_and_slide() to + * prevent compiler inlining from blowing out the stack. + */ +__attribute__((noinline)) +static void +shared_region_map_and_slide_cleanup( + struct proc *p, + uint32_t files_count, + struct _sr_file_mappings *sr_file_mappings, + struct vm_shared_region *shared_region, + struct vnode *scdir_vp) +{ + struct _sr_file_mappings *srfmp; + struct vnode_attr va; + + if (sr_file_mappings != NULL) { + for (srfmp = &sr_file_mappings[0]; srfmp < &sr_file_mappings[files_count]; srfmp++) { + if (srfmp->vp != NULL) { + vnode_lock_spin(srfmp->vp); + srfmp->vp->v_flag |= VSHARED_DYLD; + vnode_unlock(srfmp->vp); + + /* update the vnode's access time */ + if (!(vnode_vfsvisflags(srfmp->vp) & MNT_NOATIME)) { + VATTR_INIT(&va); + nanotime(&va.va_access_time); + VATTR_SET_ACTIVE(&va, va_access_time); + vnode_setattr(srfmp->vp, &va, vfs_context_current()); + } + +#if NAMEDSTREAMS + /* + * If the shared cache is compressed, it may + * have a namedstream vnode instantiated for + * for it. That namedstream vnode will also + * have to be marked with VSHARED_DYLD. + */ + if (vnode_hasnamedstreams(srfmp->vp)) { + vnode_t svp; + if (vnode_getnamedstream(srfmp->vp, &svp, XATTR_RESOURCEFORK_NAME, + NS_OPEN, 0, vfs_context_kernel()) == 0) { + vnode_lock_spin(svp); + svp->v_flag |= VSHARED_DYLD; + vnode_unlock(svp); + vnode_put(svp); + } + } +#endif /* NAMEDSTREAMS */ + /* + * release the vnode... + * ubc_map() still holds it for us in the non-error case + */ + (void) vnode_put(srfmp->vp); + srfmp->vp = NULL; + } + if (srfmp->fp != NULL) { + /* release the file descriptor */ + fp_drop(p, srfmp->fd, srfmp->fp, 0); + srfmp->fp = NULL; + } + } + kheap_free(KHEAP_TEMP, sr_file_mappings, files_count * sizeof(*sr_file_mappings)); } + if (scdir_vp != NULL) { (void)vnode_put(scdir_vp); scdir_vp = NULL; @@ -2089,34 +2604,30 @@ done: if (shared_region != NULL) { vm_shared_region_deallocate(shared_region); } +} - SHARED_REGION_TRACE_DEBUG( - ("shared_region: %p [%d(%s)] <- map\n", - (void *)VM_KERNEL_ADDRPERM(current_thread()), - p->p_pid, p->p_comm)); - return error; -} +#define SFM_MAX 1024 /* max mapping structs allowed to pass in */ +/* + * This interface is used by dyld to map shared caches which are + * for any architecture which doesn't have run time support of pointer + * authentication. Note dyld could also use the new ...map_and_slide_2_np() + * call for this case, however, it just doesn't do that yet. + */ int shared_region_map_and_slide_np( - struct proc *p, - struct shared_region_map_and_slide_np_args *uap, - __unused int *retvalp) + struct proc *p, + struct shared_region_map_and_slide_np_args *uap, + __unused int *retvalp) { - struct shared_file_mapping_np *mappings; - unsigned int mappings_count = uap->count; - kern_return_t kr = KERN_SUCCESS; - uint32_t slide = uap->slide; - -#define SFM_MAX_STACK 8 - struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK]; - - /* Is the process chrooted?? */ - if (p->p_fd->fd_rdir != NULL) { - kr = EINVAL; - goto done; - } + unsigned int mappings_count = uap->count; + unsigned int m; + uint32_t slide = uap->slide; + struct shared_file_np shared_files[1]; + struct shared_file_mapping_np legacy_mapping; + struct shared_file_mapping_slide_np *mappings = NULL; + kern_return_t kr = KERN_SUCCESS; if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) { if (kr == KERN_INVALID_ARGUMENT) { @@ -2138,32 +2649,199 @@ shared_region_map_and_slide_np( p->p_pid, p->p_comm)); kr = 0; /* no mappings: we're done ! */ goto done; - } else if (mappings_count <= SFM_MAX_STACK) { - mappings = &stack_mappings[0]; + } else if (mappings_count <= SFM_MAX) { + mappings = kheap_alloc(KHEAP_TEMP, + mappings_count * sizeof(mappings[0]), Z_WAITOK); + if (mappings == NULL) { + kr = KERN_RESOURCE_SHORTAGE; + goto done; + } + } else { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(): " + "too many mappings (%d) max %d\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + mappings_count, SFM_MAX)); + kr = KERN_FAILURE; + goto done; + } + + /* + * Read in the mappings and translate to new format. + */ + for (m = 0; m < mappings_count; ++m) { + user_addr_t from_uaddr = uap->mappings + (m * sizeof(struct shared_file_mapping_np)); + kr = shared_region_copyin(p, from_uaddr, 1, sizeof(legacy_mapping), &legacy_mapping); + if (kr != 0) { + goto done; + } + mappings[m].sms_address = legacy_mapping.sfm_address; + mappings[m].sms_size = legacy_mapping.sfm_size; + mappings[m].sms_file_offset = legacy_mapping.sfm_file_offset; + mappings[m].sms_max_prot = legacy_mapping.sfm_max_prot; + mappings[m].sms_init_prot = legacy_mapping.sfm_init_prot; + mappings[m].sms_slide_size = uap->slide_size; + mappings[m].sms_slide_start = uap->slide_start; + } + + bzero(shared_files, sizeof(shared_files)); + shared_files[0].sf_fd = uap->fd; + shared_files[0].sf_mappings_count = mappings_count; + shared_files[0].sf_slide = slide; + + kr = _shared_region_map_and_slide(p, + 1, /* # of files to map */ + &shared_files[0], /* files to map */ + mappings_count, + mappings); + +done: + if (mappings != NULL) { + kheap_free(KHEAP_TEMP, mappings, mappings_count * sizeof(mappings[0])); + mappings = NULL; + } + return kr; +} + +/* + * This interface for setting up shared region mappings is what dyld + * uses for shared caches that have __AUTH sections. All other shared + * caches use the non _2 version. + * + * The slide used for shared regions setup using this interface is done differently + * from the old interface. The slide value passed in the shared_files_np represents + * a max value. The kernel will choose a random value based on that, then use it + * for all shared regions. + */ +#define SLIDE_AMOUNT_MASK ~PAGE_MASK + +int +shared_region_map_and_slide_2_np( + struct proc *p, + struct shared_region_map_and_slide_2_np_args *uap, + __unused int *retvalp) +{ + unsigned int files_count; + struct shared_file_np *shared_files = NULL; + unsigned int mappings_count; + struct shared_file_mapping_slide_np *mappings = NULL; + kern_return_t kr = KERN_SUCCESS; + boolean_t should_slide_mappings = TRUE; + + files_count = uap->files_count; + mappings_count = uap->mappings_count; + + + if (files_count == 0) { + SHARED_REGION_TRACE_INFO( + ("shared_region: %p [%d(%s)] map(): " + "no files\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm)); + kr = 0; /* no files to map: we're done ! */ + goto done; + } else if (files_count <= _SR_FILE_MAPPINGS_MAX_FILES) { + shared_files = kheap_alloc(KHEAP_TEMP, + files_count * sizeof(shared_files[0]), Z_WAITOK); + if (shared_files == NULL) { + kr = KERN_RESOURCE_SHORTAGE; + goto done; + } } else { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(): " - "too many mappings (%d)\n", + "too many files (%d) max %d\n", (void *)VM_KERNEL_ADDRPERM(current_thread()), p->p_pid, p->p_comm, - mappings_count)); + files_count, _SR_FILE_MAPPINGS_MAX_FILES)); kr = KERN_FAILURE; goto done; } - if ((kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) { + if (mappings_count == 0) { + SHARED_REGION_TRACE_INFO( + ("shared_region: %p [%d(%s)] map(): " + "no mappings\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm)); + kr = 0; /* no mappings: we're done ! */ + goto done; + } else if (mappings_count <= SFM_MAX) { + mappings = kheap_alloc(KHEAP_TEMP, + mappings_count * sizeof(mappings[0]), Z_WAITOK); + if (mappings == NULL) { + kr = KERN_RESOURCE_SHORTAGE; + goto done; + } + } else { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(): " + "too many mappings (%d) max %d\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm, + mappings_count, SFM_MAX)); + kr = KERN_FAILURE; goto done; } + kr = shared_region_copyin(p, uap->files, files_count, sizeof(shared_files[0]), shared_files); + if (kr != KERN_SUCCESS) { + goto done; + } - kr = _shared_region_map_and_slide(p, uap->fd, mappings_count, mappings, - slide, - uap->slide_start, uap->slide_size); + kr = shared_region_copyin(p, uap->mappings, mappings_count, sizeof(mappings[0]), mappings); if (kr != KERN_SUCCESS) { - return kr; + goto done; } + if (should_slide_mappings) { + uint32_t max_slide = shared_files[0].sf_slide; + uint32_t random_val; + uint32_t slide_amount; + + if (max_slide != 0) { + read_random(&random_val, sizeof random_val); + slide_amount = ((random_val % max_slide) & SLIDE_AMOUNT_MASK); + } else { + slide_amount = 0; + } + + /* + * Fix up the mappings to reflect the desired slide. + */ + unsigned int f; + unsigned int m = 0; + unsigned int i; + for (f = 0; f < files_count; ++f) { + shared_files[f].sf_slide = slide_amount; + for (i = 0; i < shared_files[f].sf_mappings_count; ++i, ++m) { + if (m >= mappings_count) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: %p [%d(%s)] map(): " + "mapping count argument was too small\n", + (void *)VM_KERNEL_ADDRPERM(current_thread()), + p->p_pid, p->p_comm)); + kr = KERN_FAILURE; + goto done; + } + mappings[m].sms_address += slide_amount; + if (mappings[m].sms_slide_size != 0) { + mappings[i].sms_slide_start += slide_amount; + } + } + } + } + kr = _shared_region_map_and_slide(p, files_count, shared_files, mappings_count, mappings); done: + if (shared_files != NULL) { + kheap_free(KHEAP_TEMP, shared_files, files_count * sizeof(shared_files[0])); + shared_files = NULL; + } + if (mappings != NULL) { + kheap_free(KHEAP_TEMP, mappings, mappings_count * sizeof(mappings[0])); + mappings = NULL; + } return kr; } @@ -2208,6 +2886,15 @@ SYSCTL_INT(_vm, OID_AUTO, kern_lpage_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_kern_lpage_count, 0, "kernel used large pages"); #if DEVELOPMENT || DEBUG +#if __ARM_MIXED_PAGE_SIZE__ +static int vm_mixed_pagesize_supported = 1; +#else +static int vm_mixed_pagesize_supported = 0; +#endif /*__ARM_MIXED_PAGE_SIZE__ */ +SYSCTL_INT(_debug, OID_AUTO, vm_mixed_pagesize_supported, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_mixed_pagesize_supported, 0, "kernel support for mixed pagesize"); + + extern uint64_t get_pages_grabbed_count(void); static int @@ -2242,7 +2929,7 @@ SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | C &vm_pageout_debug.vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated"); SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_debug.vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */ -#endif +#endif /* DEVELOPMENT || DEBUG */ extern int madvise_free_debug; SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED, @@ -2430,15 +3117,15 @@ kas_info(struct proc *p, struct kas_info_args *uap, int *retval __unused) { -#ifdef SECURE_KERNEL +#ifndef CONFIG_KAS_INFO (void)p; (void)uap; return ENOTSUP; -#else /* !SECURE_KERNEL */ +#else /* CONFIG_KAS_INFO */ int selector = uap->selector; user_addr_t valuep = uap->value; user_addr_t sizep = uap->size; - user_size_t size; + user_size_t size, rsize; int error; if (!kauth_cred_issuser(kauth_cred_get())) { @@ -2474,18 +3161,47 @@ kas_info(struct proc *p, return EINVAL; } - if (IS_64BIT_PROCESS(p)) { - user64_size_t size64 = (user64_size_t)size; - error = copyout(&size64, sizep, sizeof(size64)); - } else { - user32_size_t size32 = (user32_size_t)size; - error = copyout(&size32, sizep, sizeof(size32)); - } + error = copyout(&slide, valuep, sizeof(slide)); if (error) { return error; } + rsize = size; + } + break; + case KAS_INFO_KERNEL_SEGMENT_VMADDR_SELECTOR: + { + uint32_t i; + kernel_mach_header_t *mh = &_mh_execute_header; + struct load_command *cmd; + cmd = (struct load_command*) &mh[1]; + uint64_t *bases; + rsize = mh->ncmds * sizeof(uint64_t); + + /* + * Return the size if no data was passed + */ + if (valuep == 0) { + break; + } + + if (rsize > size) { + return EINVAL; + } + + bases = kheap_alloc(KHEAP_TEMP, rsize, Z_WAITOK | Z_ZERO); + + for (i = 0; i < mh->ncmds; i++) { + if (cmd->cmd == LC_SEGMENT_KERNEL) { + __IGNORE_WCASTALIGN(kernel_segment_command_t * sg = (kernel_segment_command_t *) cmd); + bases[i] = (uint64_t)sg->vmaddr; + } + cmd = (struct load_command *) ((uintptr_t) cmd + cmd->cmdsize); + } + + error = copyout(bases, valuep, rsize); + + kheap_free(KHEAP_TEMP, bases, rsize); - error = copyout(&slide, valuep, sizeof(slide)); if (error) { return error; } @@ -2495,10 +3211,34 @@ kas_info(struct proc *p, return EINVAL; } - return 0; -#endif /* !SECURE_KERNEL */ + if (IS_64BIT_PROCESS(p)) { + user64_size_t size64 = (user64_size_t)rsize; + error = copyout(&size64, sizep, sizeof(size64)); + } else { + user32_size_t size32 = (user32_size_t)rsize; + error = copyout(&size32, sizep, sizeof(size32)); + } + + return error; +#endif /* CONFIG_KAS_INFO */ } +#if __has_feature(ptrauth_calls) +/* + * Generate a random pointer signing key that isn't 0. + */ +uint64_t +generate_jop_key(void) +{ + uint64_t key; + + do { + read_random(&key, sizeof key); + } while (key == 0); + return key; +} +#endif /* __has_feature(ptrauth_calls) */ + #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wcast-qual" @@ -2582,14 +3322,6 @@ SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_full, SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_no_buf, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_no_buf, ""); -#if PMAP_CS -extern uint64_t vm_cs_defer_to_pmap_cs; -extern uint64_t vm_cs_defer_to_pmap_cs_not; -SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_pmap_cs, - CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_pmap_cs, ""); -SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_pmap_cs_not, - CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_pmap_cs_not, ""); -#endif /* PMAP_CS */ extern uint64_t shared_region_pager_copied; extern uint64_t shared_region_pager_slid; @@ -2603,6 +3335,9 @@ SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid_error, CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid_error, ""); SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_reclaimed, ""); +extern int shared_region_destroy_delay; +SYSCTL_INT(_vm, OID_AUTO, shared_region_destroy_delay, + CTLFLAG_RW | CTLFLAG_LOCKED, &shared_region_destroy_delay, 0, ""); #if MACH_ASSERT extern int pmap_ledgers_panic_leeway; @@ -2615,3 +3350,74 @@ SYSCTL_INT(_vm, OID_AUTO, protect_privileged_from_untrusted, extern uint64_t vm_copied_on_read; SYSCTL_QUAD(_vm, OID_AUTO, copied_on_read, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_copied_on_read, ""); + +extern int vm_shared_region_count; +extern int vm_shared_region_peak; +SYSCTL_INT(_vm, OID_AUTO, shared_region_count, + CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_count, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, shared_region_peak, + CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_peak, 0, ""); +#if DEVELOPMENT || DEBUG +extern unsigned int shared_region_pagers_resident_count; +SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_count, + CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_count, 0, ""); +extern unsigned int shared_region_pagers_resident_peak; +SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_peak, + CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_peak, 0, ""); +extern int shared_region_pager_count; +SYSCTL_INT(_vm, OID_AUTO, shared_region_pager_count, + CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_count, 0, ""); +#if __has_feature(ptrauth_calls) +extern int shared_region_key_count; +SYSCTL_INT(_vm, OID_AUTO, shared_region_key_count, + CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_key_count, 0, ""); +extern int vm_shared_region_reslide_count; +SYSCTL_INT(_vm, OID_AUTO, shared_region_reslide_count, + CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_reslide_count, 0, ""); +#endif /* __has_feature(ptrauth_calls) */ +#endif /* DEVELOPMENT || DEBUG */ + +#if MACH_ASSERT +extern int debug4k_filter; +SYSCTL_INT(_vm, OID_AUTO, debug4k_filter, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_filter, 0, ""); +extern int debug4k_panic_on_terminate; +SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_terminate, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_terminate, 0, ""); +extern int debug4k_panic_on_exception; +SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_exception, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_exception, 0, ""); +extern int debug4k_panic_on_misaligned_sharing; +SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_misaligned_sharing, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_misaligned_sharing, 0, ""); +#endif /* MACH_ASSERT */ + +/* + * A sysctl which causes all existing shared regions to become stale. They + * will no longer be used by anything new and will be torn down as soon as + * the last existing user exits. A write of non-zero value causes that to happen. + * This should only be used by launchd, so we check that this is initproc. + */ +static int +shared_region_pivot(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + unsigned int value = 0; + int changed = 0; + int error = sysctl_io_number(req, 0, sizeof(value), &value, &changed); + if (error || !changed) { + return error; + } + if (current_proc() != initproc) { + return EPERM; + } + + vm_shared_region_pivot(); + + return 0; +} + +SYSCTL_PROC(_vm, OID_AUTO, shared_region_pivot, + CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED, + 0, 0, shared_region_pivot, "I", ""); + +extern int vm_remap_old_path, vm_remap_new_path; +SYSCTL_INT(_vm, OID_AUTO, remap_old_path, + CTLFLAG_RD | CTLFLAG_LOCKED, &vm_remap_old_path, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, remap_new_path, + CTLFLAG_RD | CTLFLAG_LOCKED, &vm_remap_new_path, 0, "");