From: Apple Date: Tue, 29 Nov 2016 21:57:34 +0000 (+0000) Subject: xnu-3789.21.4.tar.gz X-Git-Tag: macos-10121^0 X-Git-Url: https://git.saurik.com/apple/xnu.git/commitdiff_plain/743345f9a4b36f7e2f9ba37691e70c50baecb56e xnu-3789.21.4.tar.gz --- diff --git a/.gitignore b/.gitignore index ddcc48045..b502c3239 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,10 @@ BUILD/ build/ .DS_Store +# vim turds +*~ +*.swp + # / /.remotebuild_credential /cscope.* diff --git a/EXTERNAL_HEADERS/corecrypto/cc_config.h b/EXTERNAL_HEADERS/corecrypto/cc_config.h index 2f78c45a6..464f32b18 100644 --- a/EXTERNAL_HEADERS/corecrypto/cc_config.h +++ b/EXTERNAL_HEADERS/corecrypto/cc_config.h @@ -237,6 +237,14 @@ #endif /* __has_include() */ #endif /* defined(__has_include) */ +// Disable FIPS key gen algorithm on userland and kext so that related POST +// is skipped and boot time is reduced +#if defined(TARGET_OS_BRIDGE) && TARGET_OS_BRIDGE && CC_KERNEL +#define CC_DISABLE_RSAKEYGEN 1 /* for iBridge */ +#else +#define CC_DISABLE_RSAKEYGEN 0 /* default */ +#endif + //- functions implemented in assembly ------------------------------------------ //this the list of corecrypto clients that use assembly and the clang compiler #if !(CC_XNU_KERNEL_AVAILABLE || CC_KERNEL || CC_USE_L4 || CC_IBOOT || CC_USE_SEPROM || CC_USE_S3) && !defined(_WIN32) && CORECRYPTO_DEBUG diff --git a/bsd/dev/i386/systemcalls.c b/bsd/dev/i386/systemcalls.c index e6e995ac6..f2398a723 100644 --- a/bsd/dev/i386/systemcalls.c +++ b/bsd/dev/i386/systemcalls.c @@ -95,6 +95,7 @@ unix_syscall(x86_saved_state_t *state) struct uthread *uthread; x86_saved_state32_t *regs; boolean_t is_vfork; + pid_t pid; assert(is_saved_state32(state)); regs = saved_state32(state); @@ -180,6 +181,7 @@ unix_syscall(x86_saved_state_t *state) uthread->uu_rval[1] = 0; uthread->uu_flag |= UT_NOTCANCELPT; uthread->syscall_code = code; + pid = proc_pid(p); #ifdef JOE_DEBUG uthread->uu_iocount = 0; @@ -242,7 +244,7 @@ unix_syscall(x86_saved_state_t *state) if (__probable(!code_is_kdebug_trace(code))) KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, - error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0); + error, uthread->uu_rval[0], uthread->uu_rval[1], pid, 0); if (__improbable(!is_vfork && callp->sy_call == (sy_call_t *)execve && !error)) { pal_execve_return(thread); @@ -272,6 +274,7 @@ unix_syscall64(x86_saved_state_t *state) struct proc *p; struct uthread *uthread; x86_saved_state64_t *regs; + pid_t pid; assert(is_saved_state64(state)); regs = saved_state64(state); @@ -366,6 +369,7 @@ unix_syscall64(x86_saved_state_t *state) uthread->uu_rval[1] = 0; uthread->uu_flag |= UT_NOTCANCELPT; uthread->syscall_code = code; + pid = proc_pid(p); #ifdef JOE_DEBUG uthread->uu_iocount = 0; @@ -445,7 +449,7 @@ unix_syscall64(x86_saved_state_t *state) if (__probable(!code_is_kdebug_trace(code))) KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, - error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0); + error, uthread->uu_rval[0], uthread->uu_rval[1], pid, 0); #if PROC_REF_DEBUG if (__improbable(uthread_get_proc_refcount(uthread))) { diff --git a/bsd/kern/bsd_init.c b/bsd/kern/bsd_init.c index 50270ee4c..1d2c04c9d 100644 --- a/bsd/kern/bsd_init.c +++ b/bsd/kern/bsd_init.c @@ -1140,7 +1140,7 @@ bsd_utaskbootstrap(void) ut = (struct uthread *)get_bsdthread_info(thread); ut->uu_sigmask = 0; act_set_astbsd(thread); - proc_clear_return_wait(initproc, thread); + task_clear_return_wait(get_threadtask(thread)); } static void diff --git a/bsd/kern/kern_cs.c b/bsd/kern/kern_cs.c index 6ff8c458b..214b041b7 100644 --- a/bsd/kern/kern_cs.c +++ b/bsd/kern/kern_cs.c @@ -534,6 +534,20 @@ csproc_get_platform_path(struct proc *p) return (csblob == NULL) ? 0 : csblob->csb_platform_path; } +/* + * Function: csproc_get_prod_signed + * + * Description: Returns 1 if process is not signed with a developer identity. + * Note the inverted meaning from the cs_flag to make the error case safer. + * Will go away with rdar://problem/28322552. + */ +int +csproc_get_prod_signed(struct proc *p) +{ + return ((p->p_csflags & CS_DEV_CODE) == 0); +} + + /* * Function: csfg_get_platform_binary * @@ -637,6 +651,48 @@ out: return str; } +/* + * Function: csfg_get_prod_signed + * + * Description: Returns 1 if code is not signed with a developer identity. + * Note the inverted meaning from the cs_flag to make the error case safer. + * Will go away with rdar://problem/28322552. + */ +int +csfg_get_prod_signed(struct fileglob *fg) +{ + struct ubc_info *uip; + vnode_t vp; + int prod_signed = 0; + + if (FILEGLOB_DTYPE(fg) != DTYPE_VNODE) + return NULL; + + vp = (struct vnode *)fg->fg_data; + if (vp == NULL) + return NULL; + + vnode_lock(vp); + if (!UBCINFOEXISTS(vp)) + goto out; + + uip = vp->v_ubcinfo; + if (uip == NULL) + goto out; + + if (uip->cs_blobs == NULL) + goto out; + + /* It is OK to extract the flag from the first blob + because all blobs of a vnode must have the same cs_flags */ + prod_signed = (uip->cs_blobs->csb_flags & CS_DEV_CODE) == 0; +out: + vnode_unlock(vp); + + return prod_signed; +} + + uint32_t cs_entitlement_flags(struct proc *p) { diff --git a/bsd/kern/kern_descrip.c b/bsd/kern/kern_descrip.c index 7bc3c62a9..7102392b5 100644 --- a/bsd/kern/kern_descrip.c +++ b/bsd/kern/kern_descrip.c @@ -2904,7 +2904,8 @@ fstat1(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsec * going to let them get the basic stat information. */ if (xsecurity == USER_ADDR_NULL) { - error = vn_stat_noauth((vnode_t)data, sbptr, NULL, isstat64, ctx); + error = vn_stat_noauth((vnode_t)data, sbptr, NULL, isstat64, ctx, + fp->f_fglob->fg_cred); } else { error = vn_stat((vnode_t)data, sbptr, &fsec, isstat64, ctx); } diff --git a/bsd/kern/kern_event.c b/bsd/kern/kern_event.c index dd0390022..6d72e54a3 100644 --- a/bsd/kern/kern_event.c +++ b/bsd/kern/kern_event.c @@ -94,8 +94,6 @@ #include #include -#include - #include #include "net/net_str_id.h" @@ -4825,9 +4823,6 @@ knote_fdfind(struct kqueue *kq, * * The knote may have already been detached from * (or not yet attached to) its source object. - * - * should be called at spl == 0, since we don't want to hold spl - * while calling fdrop and free. */ static void knote_drop(struct knote *kn, __unused struct proc *ctxp) diff --git a/bsd/kern/kern_exec.c b/bsd/kern/kern_exec.c index 295c001c5..dc4c83eae 100644 --- a/bsd/kern/kern_exec.c +++ b/bsd/kern/kern_exec.c @@ -175,9 +175,17 @@ static void (*dtrace_proc_waitfor_hook)(proc_t) = NULL; #endif /* support for child creation in exec after vfork */ -thread_t fork_create_child(task_t parent_task, coalition_t *parent_coalition, proc_t child_proc, int inherit_memory, int is64bit); +thread_t fork_create_child(task_t parent_task, coalition_t *parent_coalition, proc_t child_proc, int inherit_memory, int is64bit, int in_exec); void vfork_exit(proc_t p, int rv); extern void proc_apply_task_networkbg_internal(proc_t, thread_t); +extern void task_set_did_exec_flag(task_t task); +extern void task_clear_exec_copy_flag(task_t task); +proc_t proc_exec_switch_task(proc_t p, task_t old_task, task_t new_task, thread_t new_thread); +boolean_t task_is_active(task_t); +boolean_t thread_is_active(thread_t thread); +void thread_copy_resource_info(thread_t dst_thread, thread_t src_thread); +void *ipc_importance_exec_switch_task(task_t old_task, task_t new_task); +extern void ipc_importance_release(void *elem); /* * Mach things for which prototypes are unavailable from Mach headers @@ -798,12 +806,13 @@ exec_mach_imgact(struct image_params *imgp) thread_t thread; struct uthread *uthread; vm_map_t old_map = VM_MAP_NULL; - vm_map_t map; + vm_map_t map = VM_MAP_NULL; load_return_t lret; load_result_t load_result; struct _posix_spawnattr *psa = NULL; int spawn = (imgp->ip_flags & IMGPF_SPAWN); int vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC); + int exec = (imgp->ip_flags & IMGPF_EXEC); os_reason_t exec_failure_reason = OS_REASON_NULL; /* @@ -896,24 +905,21 @@ grade: * obtained indirectly from the image_params vfs_context_t, is the * new child process. */ - if (vfexec || spawn) { - if (vfexec) { - imgp->ip_new_thread = fork_create_child(task, NULL, p, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT)); - if (imgp->ip_new_thread == NULL) { - error = ENOMEM; - goto bad; - } + if (vfexec) { + imgp->ip_new_thread = fork_create_child(task, NULL, p, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT), FALSE); + /* task and thread ref returned, will be released in __mac_execve */ + if (imgp->ip_new_thread == NULL) { + error = ENOMEM; + goto bad; } - - /* reset local idea of thread, uthread, task */ - thread = imgp->ip_new_thread; - uthread = get_bsdthread_info(thread); - task = new_task = get_threadtask(thread); - map = get_task_map(task); - } else { - map = VM_MAP_NULL; } + + /* reset local idea of thread, uthread, task */ + thread = imgp->ip_new_thread; + uthread = get_bsdthread_info(thread); + task = new_task = get_threadtask(thread); + /* * Load the Mach-O file. * @@ -962,7 +968,7 @@ grade: */ if (load_result.csflags & CS_VALID) { imgp->ip_csflags |= load_result.csflags & - (CS_VALID|CS_SIGNED| + (CS_VALID|CS_SIGNED|CS_DEV_CODE| CS_HARD|CS_KILL|CS_RESTRICT|CS_ENFORCEMENT|CS_REQUIRE_LV| CS_ENTITLEMENTS_VALIDATED|CS_DYLD_PLATFORM| CS_ENTITLEMENT_FLAGS| @@ -995,9 +1001,7 @@ grade: */ error = exec_handle_sugid(imgp); if (error) { - if (spawn || !vfexec) { - vm_map_deallocate(map); - } + vm_map_deallocate(map); KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE, p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_SUGID_FAILURE, 0, 0); @@ -1012,10 +1016,8 @@ grade: * each leaves us responsible for the old_map reference. That lets us get * off the pmap associated with it, and then we can release it. */ - if (!vfexec) { - old_map = swap_task_map(task, thread, map, !spawn); - vm_map_deallocate(old_map); - } + old_map = swap_task_map(task, thread, map, !spawn); + vm_map_deallocate(old_map); lret = activate_exec_state(task, p, thread, &load_result); if (lret != KERN_SUCCESS) { @@ -1057,9 +1059,7 @@ grade: goto badtoolate; } - if (vfexec || spawn) { - old_map = vm_map_switch(get_task_map(task)); - } + old_map = vm_map_switch(get_task_map(task)); if (load_result.unixproc) { user_addr_t ap; @@ -1071,8 +1071,7 @@ grade: ap = p->user_stack; error = exec_copyout_strings(imgp, &ap); if (error) { - if (vfexec || spawn) - vm_map_switch(old_map); + vm_map_switch(old_map); KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE, p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_STRINGS, 0, 0); @@ -1092,8 +1091,7 @@ grade: error = copyoutptr(load_result.mach_header, ap, new_ptr_size); if (error) { - if (vfexec || spawn) - vm_map_switch(old_map); + vm_map_switch(old_map); KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXITREASON_CREATE) | DBG_FUNC_NONE, p->p_pid, OS_REASON_EXEC, EXEC_EXIT_REASON_COPYOUT_DYNLINKER, 0, 0); @@ -1107,9 +1105,7 @@ grade: /* Avoid immediate VM faults back into kernel */ exec_prefault_data(p, imgp, &load_result); - if (vfexec || spawn) { - vm_map_switch(old_map); - } + vm_map_switch(old_map); /* Stop profiling */ stopprofclock(p); @@ -1155,25 +1151,25 @@ grade: strncmp(p->p_name, "testCamera", sizeof (p->p_name)) == 0) { - task_set_could_use_secluded_mem(p->task, TRUE); + task_set_could_use_secluded_mem(task, TRUE); } else { - task_set_could_use_secluded_mem(p->task, FALSE); + task_set_could_use_secluded_mem(task, FALSE); } if (strncmp(p->p_name, "mediaserverd", sizeof (p->p_name)) == 0) { - task_set_could_also_use_secluded_mem(p->task, TRUE); + task_set_could_also_use_secluded_mem(task, TRUE); } } #endif /* CONFIG_SECLUDED_MEMORY */ - pal_dbg_set_task_name( p->task ); + pal_dbg_set_task_name( task ); #if DEVELOPMENT || DEBUG /* * Update the pid an proc name for importance base if any */ - task_importance_update_owner_info(p->task); + task_importance_update_owner_info(task); #endif memcpy(&p->p_uuid[0], &load_result.uuid[0], sizeof(p->p_uuid)); @@ -1190,17 +1186,10 @@ grade: */ kdbg_trace_string(p, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4); - if (vfexec || spawn) { - KERNEL_DEBUG_CONSTANT1(TRACE_DATA_EXEC | DBG_FUNC_NONE, - p->p_pid ,0,0,0, (uintptr_t)thread_tid(thread)); - KERNEL_DEBUG_CONSTANT1(TRACE_STRING_EXEC | DBG_FUNC_NONE, - dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, (uintptr_t)thread_tid(thread)); - } else { - KERNEL_DEBUG_CONSTANT(TRACE_DATA_EXEC | DBG_FUNC_NONE, - p->p_pid ,0,0,0,0); - KERNEL_DEBUG_CONSTANT(TRACE_STRING_EXEC | DBG_FUNC_NONE, - dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0); - } + KERNEL_DEBUG_CONSTANT1(TRACE_DATA_EXEC | DBG_FUNC_NONE, + p->p_pid ,0,0,0, (uintptr_t)thread_tid(thread)); + KERNEL_DEBUG_CONSTANT1(TRACE_STRING_EXEC | DBG_FUNC_NONE, + dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, (uintptr_t)thread_tid(thread)); } /* @@ -1213,7 +1202,7 @@ grade: proc_lock(p); p->p_stat = SSTOP; proc_unlock(p); - (void) task_suspend_internal(p->task); + (void) task_suspend_internal(task); } } @@ -1251,6 +1240,11 @@ badtoolate: assert(exec_failure_reason != OS_REASON_NULL); psignal_with_reason(p, SIGKILL, exec_failure_reason); exec_failure_reason = OS_REASON_NULL; + + if (exec) { + /* Terminate the exec copy task */ + task_terminate_internal(task); + } } /* We can't stop this system call at this point, so just pretend we succeeded */ @@ -1267,12 +1261,6 @@ done: proc_knote(p, NOTE_EXEC); } - /* Drop extra references for cases where we don't expect the caller to clean up */ - if (vfexec || (spawn && error == 0)) { - task_deallocate(new_task); - thread_deallocate(thread); - } - if (load_result.threadstate) { kfree(load_result.threadstate, load_result.threadstate_sz); load_result.threadstate = NULL; @@ -1479,18 +1467,6 @@ encapsulated_binary: KAUTH_FILEOP_EXEC, (uintptr_t)ndp->ni_vp, 0); } - - if (error == 0) { - /* - * Reset atm context from task - */ - task_atm_reset(p->task); - - /* - * Reset old bank context from task - */ - task_bank_reset(p->task); - } bad: proc_transend(p, 0); @@ -1604,9 +1580,11 @@ exec_handle_port_actions(struct image_params *imgp, boolean_t * portwatch_presen ipc_port_t * portwatch_ports) { _posix_spawn_port_actions_t pacts = imgp->ip_px_spa; +#if CONFIG_AUDIT proc_t p = vfs_context_proc(imgp->ip_vfs_context); +#endif _ps_port_action_t *act = NULL; - task_t task = p->task; + task_t task = get_threadtask(imgp->ip_new_thread); ipc_port_t port = NULL; errno_t ret = 0; int i; @@ -1647,7 +1625,7 @@ exec_handle_port_actions(struct image_params *imgp, boolean_t * portwatch_presen break; #if CONFIG_AUDIT case PSPA_AU_SESSION: - ret = audit_session_spawnjoin(p, port); + ret = audit_session_spawnjoin(p, task, port); break; #endif case PSPA_IMP_WATCHPORTS: @@ -2122,50 +2100,6 @@ out: } #endif -void -proc_set_return_wait(proc_t p) -{ - proc_lock(p); - p->p_lflag |= P_LRETURNWAIT; - proc_unlock(p); -} - -void -proc_clear_return_wait(proc_t p, thread_t child_thread) -{ - proc_lock(p); - - p->p_lflag &= ~P_LRETURNWAIT; - if (p->p_lflag & P_LRETURNWAITER) { - wakeup(&p->p_lflag); - } - - proc_unlock(p); - - (void)thread_resume(child_thread); -} - -void -proc_wait_to_return() -{ - proc_t p; - - p = current_proc(); - proc_lock(p); - - if (p->p_lflag & P_LRETURNWAIT) { - p->p_lflag |= P_LRETURNWAITER; - do { - msleep(&p->p_lflag, &p->p_mlock, 0, - "thread_check_setup_complete", NULL); - } while (p->p_lflag & P_LRETURNWAIT); - p->p_lflag &= ~P_LRETURNWAITER; - } - - proc_unlock(p); - thread_bootstrap_return(); -} - /* * posix_spawn * @@ -2218,7 +2152,10 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) boolean_t exec_done = FALSE; int portwatch_count = 0; ipc_port_t * portwatch_ports = NULL; - vm_size_t px_sa_offset = offsetof(struct _posix_spawnattr, psa_ports); + vm_size_t px_sa_offset = offsetof(struct _posix_spawnattr, psa_ports); + task_t new_task = NULL; + boolean_t should_release_proc_ref = FALSE; + void *inherit = NULL; #if CONFIG_PERSONAS struct _posix_spawn_persona_info *px_persona = NULL; #endif @@ -2485,7 +2422,11 @@ do_fork1: * caller's persona (if it exists) */ error = fork1(p, &imgp->ip_new_thread, PROC_CREATE_SPAWN, coal); + /* returns a thread and task reference */ + if (error == 0) { + new_task = get_threadtask(imgp->ip_new_thread); + } #if CONFIG_COALITIONS /* set the roles of this task within each given coalition */ if (error == 0) { @@ -2527,6 +2468,42 @@ do_fork1: } #endif /* 0 */ #endif /* CONFIG_PERSONAS */ + } else { + /* + * For execve case, create a new task and thread + * which points to current_proc. The current_proc will point + * to the new task after image activation and proc ref drain. + * + * proc (current_proc) <----- old_task (current_task) + * ^ | ^ + * | | | + * | ---------------------------------- + * | + * --------- new_task (task marked as TF_EXEC_COPY) + * + * After image activation, the proc will point to the new task + * and would look like following. + * + * proc (current_proc) <----- old_task (current_task, marked as TPF_DID_EXEC) + * ^ | + * | | + * | ----------> new_task + * | | + * ----------------- + * + * During exec any transition from new_task -> proc is fine, but don't allow + * transition from proc->task, since it will modify old_task. + */ + imgp->ip_new_thread = fork_create_child(current_task(), + NULL, p, FALSE, p->p_flag & P_LP64, TRUE); + /* task and thread ref returned by fork_create_child */ + if (imgp->ip_new_thread == NULL) { + error = ENOMEM; + goto bad; + } + + new_task = get_threadtask(imgp->ip_new_thread); + imgp->ip_flags |= IMGPF_EXEC; } if (spawn_no_exec) { @@ -2541,17 +2518,9 @@ do_fork1: } assert(p != NULL); - /* By default, the thread everyone plays with is the parent */ - context.vc_thread = current_thread(); + context.vc_thread = imgp->ip_new_thread; context.vc_ucred = p->p_ucred; /* XXX must NOT be kauth_cred_get() */ - /* - * However, if we're not in the setexec case, redirect the context - * to the newly created process instead - */ - if (spawn_no_exec) - context.vc_thread = imgp->ip_new_thread; - /* * Post fdcopy(), pre exec_handle_sugid() - this is where we want * to handle the file_actions. Since vfork() also ends up setting @@ -2729,6 +2698,12 @@ do_fork1: */ error = exec_activate_image(imgp); + if (error == 0 && !spawn_no_exec) { + p = proc_exec_switch_task(p, current_task(), new_task, imgp->ip_new_thread); + /* proc ref returned */ + should_release_proc_ref = TRUE; + } + if (error == 0) { /* process completed the exec */ exec_done = TRUE; @@ -2747,18 +2722,8 @@ do_fork1: * until after the image is activated. */ if (!error && imgp->ip_px_sa != NULL) { - thread_t child_thread = current_thread(); - uthread_t child_uthread = uthread; - - /* - * If we created a new child thread, then the thread and - * uthread are different than the current ones; otherwise, - * we leave them, since we are in the exec case instead. - */ - if (spawn_no_exec) { - child_thread = imgp->ip_new_thread; - child_uthread = get_bsdthread_info(child_thread); - } + thread_t child_thread = imgp->ip_new_thread; + uthread_t child_uthread = get_bsdthread_info(child_thread); /* * Mask a list of signals, instead of them being unmasked, if @@ -2892,9 +2857,6 @@ bad: /* notify only if it has not failed due to FP Key error */ if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0) proc_knote(p, NOTE_EXEC); - } else if (error == 0) { - /* reset the importance attribute from our previous life */ - task_importance_reset(p->task); } if (error == 0) { @@ -2907,7 +2869,7 @@ bad: error = proc_transstart(p, 0, 0); if (error == 0) { - task_bank_init(p->task); + task_bank_init(get_threadtask(imgp->ip_new_thread)); proc_transend(p, 0); } } @@ -2929,11 +2891,22 @@ bad: portwatch_ports, portwatch_count); } + /* + * Need to transfer pending watch port boosts to the new task while still making + * sure that the old task remains in the importance linkage. Create an importance + * linkage from old task to new task, then switch the task importance base + * of old task and new task. After the switch the port watch boost will be + * boosting the new task and new task will be donating importance to old task. + */ + if (error == 0 && task_did_exec(current_task())) { + inherit = ipc_importance_exec_switch_task(current_task(), get_threadtask(imgp->ip_new_thread)); + } + /* Apply the main thread qos */ if (error == 0) { - thread_t main_thread = (imgp->ip_new_thread != NULL) ? imgp->ip_new_thread : current_thread(); + thread_t main_thread = imgp->ip_new_thread; - task_set_main_thread_qos(p->task, main_thread); + task_set_main_thread_qos(get_threadtask(imgp->ip_new_thread), main_thread); } /* @@ -3042,9 +3015,23 @@ bad: } } - if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL) + if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL) { (*dtrace_proc_waitfor_hook)(p); + } #endif + /* + * exec-success dtrace probe fired, clear bsd_info from + * old task if it did exec. + */ + if (task_did_exec(current_task())) { + set_bsdtask_info(current_task(), NULL); + } + + /* clear bsd_info from new task and terminate it if exec failed */ + if (new_task != NULL && task_is_exec_copy(new_task)) { + set_bsdtask_info(new_task, NULL); + task_terminate_internal(new_task); + } /* Return to both the parent and the child? */ if (imgp != NULL && spawn_no_exec) { @@ -3069,37 +3056,165 @@ bad: p->exit_thread = current_thread(); proc_unlock(p); exit1(p, 1, (int *)NULL); - proc_clear_return_wait(p, imgp->ip_new_thread); - if (exec_done == FALSE) { - task_deallocate(get_threadtask(imgp->ip_new_thread)); - thread_deallocate(imgp->ip_new_thread); - } } else { /* someone is doing it for us; just skip it */ proc_unlock(p); - proc_clear_return_wait(p, imgp->ip_new_thread); } - } else { - - /* - * Return to the child - * - * Note: the image activator earlier dropped the - * task/thread references to the newly spawned - * process; this is OK, since we still have suspended - * queue references on them, so we should be fine - * with the delayed resume of the thread here. - */ - proc_clear_return_wait(p, imgp->ip_new_thread); } } + + /* + * Do not terminate the current task, if proc_exec_switch_task did not + * switch the tasks, terminating the current task without the switch would + * result in loosing the SIGKILL status. + */ + if (task_did_exec(current_task())) { + /* Terminate the current task, since exec will start in new task */ + task_terminate_internal(current_task()); + } + + /* Release the thread ref returned by fork_create_child/fork1 */ + if (imgp != NULL && imgp->ip_new_thread) { + /* wake up the new thread */ + task_clear_return_wait(get_threadtask(imgp->ip_new_thread)); + thread_deallocate(imgp->ip_new_thread); + imgp->ip_new_thread = NULL; + } + + /* Release the ref returned by fork_create_child/fork1 */ + if (new_task) { + task_deallocate(new_task); + new_task = NULL; + } + + if (should_release_proc_ref) { + proc_rele(p); + } + if (bufp != NULL) { FREE(bufp, M_TEMP); } + + if (inherit != NULL) { + ipc_importance_release(inherit); + } return(error); } +/* + * proc_exec_switch_task + * + * Parameters: p proc + * old_task task before exec + * new_task task after exec + * new_thread thread in new task + * + * Returns: proc. + * + * Note: The function will switch the task pointer of proc + * from old task to new task. The switch needs to happen + * after draining all proc refs and inside a proc translock. + * In the case of failure to switch the task, which might happen + * if the process received a SIGKILL or jetsam killed it, it will make + * sure that the new tasks terminates. User proc ref returned + * to caller. + * + * This function is called after point of no return, in the case + * failure to switch, it will terminate the new task and swallow the + * error and let the terminated process complete exec and die. + */ +proc_t +proc_exec_switch_task(proc_t p, task_t old_task, task_t new_task, thread_t new_thread) +{ + int error = 0; + boolean_t task_active; + boolean_t proc_active; + boolean_t thread_active; + thread_t old_thread = current_thread(); + + /* + * Switch the task pointer of proc to new task. + * Before switching the task, wait for proc_refdrain. + * After the switch happens, the proc can disappear, + * take a ref before it disappears. + */ + p = proc_refdrain_with_refwait(p, TRUE); + /* extra proc ref returned to the caller */ + + assert(get_threadtask(new_thread) == new_task); + task_active = task_is_active(new_task); + + /* Take the proc_translock to change the task ptr */ + proc_lock(p); + proc_active = !(p->p_lflag & P_LEXIT); + + /* Check if the current thread is not aborted due to SIGKILL */ + thread_active = thread_is_active(old_thread); + + /* + * Do not switch the task if the new task or proc is already terminated + * as a result of error in exec past point of no return + */ + if (proc_active && task_active && thread_active) { + error = proc_transstart(p, 1, 0); + if (error == 0) { + uthread_t new_uthread = get_bsdthread_info(new_thread); + uthread_t old_uthread = get_bsdthread_info(current_thread()); + + /* + * bsd_info of old_task will get cleared in execve and posix_spawn + * after firing exec-success/error dtrace probe. + */ + p->task = new_task; + + /* Copy the signal state, dtrace state and set bsd ast on new thread */ + act_set_astbsd(new_thread); + new_uthread->uu_siglist = old_uthread->uu_siglist; + new_uthread->uu_sigwait = old_uthread->uu_sigwait; + new_uthread->uu_sigmask = old_uthread->uu_sigmask; + new_uthread->uu_oldmask = old_uthread->uu_oldmask; + new_uthread->uu_vforkmask = old_uthread->uu_vforkmask; + new_uthread->uu_exit_reason = old_uthread->uu_exit_reason; +#if CONFIG_DTRACE + new_uthread->t_dtrace_sig = old_uthread->t_dtrace_sig; + new_uthread->t_dtrace_stop = old_uthread->t_dtrace_stop; + new_uthread->t_dtrace_resumepid = old_uthread->t_dtrace_resumepid; + assert(new_uthread->t_dtrace_scratch == NULL); + new_uthread->t_dtrace_scratch = old_uthread->t_dtrace_scratch; + + old_uthread->t_dtrace_sig = 0; + old_uthread->t_dtrace_stop = 0; + old_uthread->t_dtrace_resumepid = 0; + old_uthread->t_dtrace_scratch = NULL; +#endif + /* Copy the resource accounting info */ + thread_copy_resource_info(new_thread, current_thread()); + + /* Clear the exit reason and signal state on old thread */ + old_uthread->uu_exit_reason = NULL; + old_uthread->uu_siglist = 0; + + /* Add the new uthread to proc uthlist and remove the old one */ + TAILQ_INSERT_TAIL(&p->p_uthlist, new_uthread, uu_list); + TAILQ_REMOVE(&p->p_uthlist, old_uthread, uu_list); + + task_set_did_exec_flag(old_task); + task_clear_exec_copy_flag(new_task); + + proc_transend(p, 1); + } + } + + proc_unlock(p); + proc_refwake(p); + + if (error != 0 || !task_active || !proc_active || !thread_active) { + task_terminate_internal(new_task); + } + + return p; +} /* * execve @@ -3177,6 +3292,11 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval) int is_64 = IS_64BIT_PROCESS(p); struct vfs_context context; struct uthread *uthread; + task_t new_task = NULL; + boolean_t should_release_proc_ref = FALSE; + boolean_t exec_done = FALSE; + boolean_t in_vfexec = FALSE; + void *inherit = NULL; context.vc_thread = current_thread(); context.vc_ucred = kauth_cred_proc_ref(p); /* XXX must NOT be kauth_cred_get() */ @@ -3208,6 +3328,45 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval) uthread = get_bsdthread_info(current_thread()); if (uthread->uu_flag & UT_VFORK) { imgp->ip_flags |= IMGPF_VFORK_EXEC; + in_vfexec = TRUE; + } else { + imgp->ip_flags |= IMGPF_EXEC; + + /* + * For execve case, create a new task and thread + * which points to current_proc. The current_proc will point + * to the new task after image activation and proc ref drain. + * + * proc (current_proc) <----- old_task (current_task) + * ^ | ^ + * | | | + * | ---------------------------------- + * | + * --------- new_task (task marked as TF_EXEC_COPY) + * + * After image activation, the proc will point to the new task + * and would look like following. + * + * proc (current_proc) <----- old_task (current_task, marked as TPF_DID_EXEC) + * ^ | + * | | + * | ----------> new_task + * | | + * ----------------- + * + * During exec any transition from new_task -> proc is fine, but don't allow + * transition from proc->task, since it will modify old_task. + */ + imgp->ip_new_thread = fork_create_child(current_task(), + NULL, p, FALSE, p->p_flag & P_LP64, TRUE); + /* task and thread ref returned by fork_create_child */ + if (imgp->ip_new_thread == NULL) { + error = ENOMEM; + goto exit_with_error; + } + + new_task = get_threadtask(imgp->ip_new_thread); + context.vc_thread = imgp->ip_new_thread; } #if CONFIG_MACF @@ -3221,6 +3380,21 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval) #endif error = exec_activate_image(imgp); + /* thread and task ref returned for vfexec case */ + + if (imgp->ip_new_thread != NULL) { + /* + * task reference might be returned by exec_activate_image + * for vfexec. + */ + new_task = get_threadtask(imgp->ip_new_thread); + } + + if (!error && !in_vfexec) { + p = proc_exec_switch_task(p, current_task(), new_task, imgp->ip_new_thread); + /* proc ref returned */ + should_release_proc_ref = TRUE; + } kauth_cred_unref(&context.vc_ucred); @@ -3228,7 +3402,10 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval) if (error == -1) error = ENOEXEC; - if (error == 0) { + if (!error) { + exec_done = TRUE; + assert(imgp->ip_new_thread != NULL); + exec_resettextvp(p, imgp); error = check_for_signature(p, imgp); } @@ -3257,23 +3434,18 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval) * aren't loaded until subsequent calls (including exec_resettextvp). */ error = proc_transstart(p, 0, 0); - - if (!error) { - task_bank_init(p->task); - proc_transend(p, 0); - } } if (!error) { + task_bank_init(get_threadtask(imgp->ip_new_thread)); + proc_transend(p, 0); + /* Sever any extant thread affinity */ thread_affinity_exec(current_thread()); - thread_t main_thread = (imgp->ip_new_thread != NULL) ? imgp->ip_new_thread : current_thread(); + thread_t main_thread = imgp->ip_new_thread; - task_set_main_thread_qos(p->task, main_thread); - - /* reset task importance */ - task_importance_reset(p->task); + task_set_main_thread_qos(new_task, main_thread); DTRACE_PROC(exec__success); @@ -3282,18 +3454,77 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval) (*dtrace_proc_waitfor_hook)(p); #endif - if (imgp->ip_flags & IMGPF_VFORK_EXEC) { + if (in_vfexec) { vfork_return(p, retval, p->p_pid); - proc_clear_return_wait(p, imgp->ip_new_thread); } } else { DTRACE_PROC1(exec__failure, int, error); } exit_with_error: + + /* + * exec-success dtrace probe fired, clear bsd_info from + * old task if it did exec. + */ + if (task_did_exec(current_task())) { + set_bsdtask_info(current_task(), NULL); + } + + /* clear bsd_info from new task and terminate it if exec failed */ + if (new_task != NULL && task_is_exec_copy(new_task)) { + set_bsdtask_info(new_task, NULL); + task_terminate_internal(new_task); + } + + /* + * Need to transfer pending watch port boosts to the new task while still making + * sure that the old task remains in the importance linkage. Create an importance + * linkage from old task to new task, then switch the task importance base + * of old task and new task. After the switch the port watch boost will be + * boosting the new task and new task will be donating importance to old task. + */ + if (error == 0 && task_did_exec(current_task())) { + inherit = ipc_importance_exec_switch_task(current_task(), get_threadtask(imgp->ip_new_thread)); + } + + if (imgp != NULL) { + /* + * Do not terminate the current task, if proc_exec_switch_task did not + * switch the tasks, terminating the current task without the switch would + * result in loosing the SIGKILL status. + */ + if (task_did_exec(current_task())) { + /* Terminate the current task, since exec will start in new task */ + task_terminate_internal(current_task()); + } + + /* Release the thread ref returned by fork_create_child */ + if (imgp->ip_new_thread) { + /* wake up the new exec thread */ + task_clear_return_wait(get_threadtask(imgp->ip_new_thread)); + thread_deallocate(imgp->ip_new_thread); + imgp->ip_new_thread = NULL; + } + } + + /* Release the ref returned by fork_create_child */ + if (new_task) { + task_deallocate(new_task); + new_task = NULL; + } + + if (should_release_proc_ref) { + proc_rele(p); + } + if (bufp != NULL) { FREE(bufp, M_TEMP); } + + if (inherit != NULL) { + ipc_importance_release(inherit); + } return(error); } @@ -4126,6 +4357,7 @@ exec_handle_sugid(struct image_params *imgp) int leave_sugid_clear = 0; int mac_reset_ipc = 0; int error = 0; + task_t task = NULL; #if CONFIG_MACF int mac_transition, disjoint_cred = 0; int label_update_return = 0; @@ -4319,7 +4551,8 @@ handle_mac_transition: * a setuid exec to be able to access/control the * task/thread after. */ - ipc_task_reset(p->task); + ipc_task_reset((imgp->ip_new_thread != NULL) ? + get_threadtask(imgp->ip_new_thread) : p->task); ipc_thread_reset((imgp->ip_new_thread != NULL) ? imgp->ip_new_thread : current_thread()); } @@ -4457,7 +4690,13 @@ handle_mac_transition: /* Update the process' identity version and set the security token */ p->p_idversion++; - set_security_token(p); + + if (imgp->ip_new_thread != NULL) { + task = get_threadtask(imgp->ip_new_thread); + } else { + task = p->task; + } + set_security_token_task_internal(p, task); return(error); } diff --git a/bsd/kern/kern_fork.c b/bsd/kern/kern_fork.c index 78e50cc5e..c0689a316 100644 --- a/bsd/kern/kern_fork.c +++ b/bsd/kern/kern_fork.c @@ -150,12 +150,13 @@ void thread_set_parent(thread_t parent, int pid); extern void act_thread_catt(void *ctx); void thread_set_child(thread_t child, int pid); void *act_thread_csave(void); +extern boolean_t task_is_exec_copy(task_t); thread_t cloneproc(task_t, coalition_t *, proc_t, int, int); proc_t forkproc(proc_t); void forkproc_free(proc_t); -thread_t fork_create_child(task_t parent_task, coalition_t *parent_coalitions, proc_t child, int inherit_memory, int is64bit); +thread_t fork_create_child(task_t parent_task, coalition_t *parent_coalitions, proc_t child, int inherit_memory, int is64bit, int in_exec); void proc_vfork_begin(proc_t parent_proc); void proc_vfork_end(proc_t parent_proc); @@ -324,7 +325,7 @@ vfork(proc_t parent_proc, __unused struct vfork_args *uap, int32_t *retval) * Parameters: parent_proc parent process of the process being * child_threadp pointer to location to receive the * Mach thread_t of the child process - * breated + * created * kind kind of creation being requested * coalitions if spawn, the set of coalitions the * child process should join, or NULL to @@ -724,6 +725,8 @@ vfork_return(proc_t child_proc, int32_t *retval, int rval) * is64bit TRUE, if the child being created will * be associated with a 64 bit process * rather than a 32 bit process + * in_exec TRUE, if called from execve or posix spawn set exec + * FALSE, if called from fork or vfexec * * Note: This code is called in the fork() case, from the execve() call * graph, if implementing an execve() following a vfork(), from @@ -742,7 +745,7 @@ vfork_return(proc_t child_proc, int32_t *retval, int rval) * in this case, 'inherit_memory' MUST be FALSE. */ thread_t -fork_create_child(task_t parent_task, coalition_t *parent_coalitions, proc_t child_proc, int inherit_memory, int is64bit) +fork_create_child(task_t parent_task, coalition_t *parent_coalitions, proc_t child_proc, int inherit_memory, int is64bit, int in_exec) { thread_t child_thread = NULL; task_t child_task; @@ -753,7 +756,8 @@ fork_create_child(task_t parent_task, coalition_t *parent_coalitions, proc_t chi parent_coalitions, inherit_memory, is64bit, - TF_NONE, + TF_LRETURNWAIT | TF_LRETURNWAITER, /* All created threads will wait in task_wait_to_return */ + in_exec ? TPF_EXEC_COPY : TPF_NONE, /* Mark the task exec copy if in execve */ &child_task); if (result != KERN_SUCCESS) { printf("%s: task_create_internal failed. Code: %d\n", @@ -761,8 +765,13 @@ fork_create_child(task_t parent_task, coalition_t *parent_coalitions, proc_t chi goto bad; } - /* Set the child process task to the new task */ - child_proc->task = child_task; + if (!in_exec) { + /* + * Set the child process task to the new task if not in exec, + * will set the task for exec case in proc_exec_switch_task after image activation. + */ + child_proc->task = child_task; + } /* Set child task process to child proc */ set_bsdtask_info(child_task, child_proc); @@ -784,8 +793,15 @@ fork_create_child(task_t parent_task, coalition_t *parent_coalitions, proc_t chi if (child_proc->p_nice != 0) resetpriority(child_proc); - /* Create a new thread for the child process */ - result = thread_create_with_continuation(child_task, &child_thread, (thread_continue_t)proc_wait_to_return); + /* + * Create a new thread for the child process + * The new thread is waiting on the event triggered by 'task_clear_return_wait' + */ + result = thread_create_waiting(child_task, + (thread_continue_t)task_wait_to_return, + task_get_return_wait_event(child_task), + &child_thread); + if (result != KERN_SUCCESS) { printf("%s: thread_create failed. Code: %d\n", __func__, result); @@ -873,7 +889,7 @@ fork(proc_t parent_proc, __unused struct fork_args *uap, int32_t *retval) #endif /* "Return" to the child */ - proc_clear_return_wait(child_proc, child_thread); + task_clear_return_wait(get_threadtask(child_thread)); /* drop the extra references we got during the creation */ if ((child_task = (task_t)get_threadtask(child_thread)) != NULL) { @@ -939,7 +955,7 @@ cloneproc(task_t parent_task, coalition_t *parent_coalitions, proc_t parent_proc goto bad; } - child_thread = fork_create_child(parent_task, parent_coalitions, child_proc, inherit_memory, parent_proc->p_flag & P_LP64); + child_thread = fork_create_child(parent_task, parent_coalitions, child_proc, inherit_memory, parent_proc->p_flag & P_LP64, FALSE); if (child_thread == NULL) { /* @@ -1317,7 +1333,6 @@ retry: */ proc_signalstart(child_proc, 0); proc_transstart(child_proc, 0, 0); - proc_set_return_wait(child_proc); child_proc->p_pcaction = 0; @@ -1522,11 +1537,17 @@ uthread_alloc(task_t task, thread_t thread, int noinherit) uth->uu_sigmask = uth_parent->uu_sigmask; } uth->uu_context.vc_thread = thread; - TAILQ_INSERT_TAIL(&p->p_uthlist, uth, uu_list); + /* + * Do not add the uthread to proc uthlist for exec copy task, + * since they do not hold a ref on proc. + */ + if (!task_is_exec_copy(task)) { + TAILQ_INSERT_TAIL(&p->p_uthlist, uth, uu_list); + } proc_unlock(p); #if CONFIG_DTRACE - if (p->p_dtrace_ptss_pages != NULL) { + if (p->p_dtrace_ptss_pages != NULL && !task_is_exec_copy(task)) { uth->t_dtrace_scratch = dtrace_ptss_claim_entry(p); } #endif @@ -1639,9 +1660,13 @@ uthread_cleanup(task_t task, void *uthread, void * bsd_info) /* * Remove the thread from the process list and * transfer [appropriate] pending signals to the process. + * Do not remove the uthread from proc uthlist for exec + * copy task, since they does not have a ref on proc and + * would not have been added to the list. */ - if (get_bsdtask_info(task) == p) { + if (get_bsdtask_info(task) == p && !task_is_exec_copy(task)) { proc_lock(p); + TAILQ_REMOVE(&p->p_uthlist, uth, uu_list); p->p_siglist |= (uth->uu_siglist & execmask & (~p->p_sigignore | sigcantmask)); proc_unlock(p); @@ -1649,7 +1674,7 @@ uthread_cleanup(task_t task, void *uthread, void * bsd_info) #if CONFIG_DTRACE struct dtrace_ptss_page_entry *tmpptr = uth->t_dtrace_scratch; uth->t_dtrace_scratch = NULL; - if (tmpptr != NULL) { + if (tmpptr != NULL && !task_is_exec_copy(task)) { dtrace_ptss_release_entry(p, tmpptr); } #endif diff --git a/bsd/kern/kern_memorystatus.c b/bsd/kern/kern_memorystatus.c index 236b02d5c..b9f736ce0 100644 --- a/bsd/kern/kern_memorystatus.c +++ b/bsd/kern/kern_memorystatus.c @@ -2549,7 +2549,6 @@ memorystatus_update_idle_priority_locked(proc_t p) { * explicitly because it won't be going through the demotion paths * that take care to apply the limits appropriately. */ - assert((p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS) == 0); memorystatus_update_priority_locked(p, priority, false, true); } else { diff --git a/bsd/kern/kern_proc.c b/bsd/kern/kern_proc.c index 0d719cb4d..3621eeaff 100644 --- a/bsd/kern/kern_proc.c +++ b/bsd/kern/kern_proc.c @@ -472,8 +472,22 @@ proc_ref_locked(proc_t p) /* if process still in creation return failure */ if ((p == PROC_NULL) || ((p->p_listflag & P_LIST_INCREATE) != 0)) return (PROC_NULL); - /* do not return process marked for termination */ - if ((p->p_stat != SZOMB) && ((p->p_listflag & P_LIST_EXITED) == 0) && ((p->p_listflag & (P_LIST_DRAINWAIT | P_LIST_DRAIN | P_LIST_DEAD)) == 0)) { +retry: + /* + * Do not return process marked for termination + * or proc_refdrain called without ref wait. + * Wait for proc_refdrain_with_refwait to complete if + * process in refdrain and refwait flag is set. + */ + if ((p->p_stat != SZOMB) && + ((p->p_listflag & P_LIST_EXITED) == 0) && + ((p->p_listflag & P_LIST_DEAD) == 0) && + (((p->p_listflag & (P_LIST_DRAIN | P_LIST_DRAINWAIT)) == 0) || + ((p->p_listflag & P_LIST_REFWAIT) != 0))) { + if ((p->p_listflag & P_LIST_REFWAIT) != 0) { + msleep(&p->p_listflag, proc_list_mlock, 0, "proc_refwait", 0) ; + goto retry; + } p->p_refcount++; #if PROC_REF_DEBUG record_procref(p, 1); @@ -549,20 +563,59 @@ proc_drop_zombref(proc_t p) void proc_refdrain(proc_t p) { + proc_refdrain_with_refwait(p, FALSE); +} +proc_t +proc_refdrain_with_refwait(proc_t p, boolean_t get_ref_and_allow_wait) +{ + boolean_t initexec = FALSE; proc_list_lock(); p->p_listflag |= P_LIST_DRAIN; - while (p->p_refcount) { + if (get_ref_and_allow_wait) { + /* + * All the calls to proc_ref_locked will wait + * for the flag to get cleared before returning a ref. + */ + p->p_listflag |= P_LIST_REFWAIT; + if (p == initproc) { + initexec = TRUE; + } + } + + /* Do not wait in ref drain for launchd exec */ + while (p->p_refcount && !initexec) { p->p_listflag |= P_LIST_DRAINWAIT; msleep(&p->p_refcount, proc_list_mlock, 0, "proc_refdrain", 0) ; } + p->p_listflag &= ~P_LIST_DRAIN; - p->p_listflag |= P_LIST_DEAD; + if (!get_ref_and_allow_wait) { + p->p_listflag |= P_LIST_DEAD; + } else { + /* Return a ref to the caller */ + p->p_refcount++; +#if PROC_REF_DEBUG + record_procref(p, 1); +#endif + } proc_list_unlock(); + if (get_ref_and_allow_wait) { + return (p); + } + return NULL; +} +void +proc_refwake(proc_t p) +{ + proc_list_lock(); + p->p_listflag &= ~P_LIST_REFWAIT; + wakeup(&p->p_listflag); + proc_list_unlock(); } proc_t diff --git a/bsd/kern/kern_prot.c b/bsd/kern/kern_prot.c index df5fea45a..9d825afcb 100644 --- a/bsd/kern/kern_prot.c +++ b/bsd/kern/kern_prot.c @@ -1984,12 +1984,25 @@ setlogin(proc_t p, struct setlogin_args *uap, __unused int32_t *retval) */ int set_security_token(proc_t p) +{ + return set_security_token_task_internal(p, p->task); +} + +/* + * Set the secrity token of the task with current euid and eguid + * The function takes a proc and a task, where proc->task might point to a + * different task if called from exec. + */ + +int +set_security_token_task_internal(proc_t p, void *t) { security_token_t sec_token; audit_token_t audit_token; kauth_cred_t my_cred; posix_cred_t my_pcred; host_priv_t host_priv; + task_t task = t; /* * Don't allow a vfork child to override the parent's token settings @@ -1997,7 +2010,7 @@ set_security_token(proc_t p) * suffer along using the parent's token until the exec(). It's all * undefined behavior anyway, right? */ - if (p->task == current_task()) { + if (task == current_task()) { uthread_t uthread; uthread = (uthread_t)get_bsdthread_info(current_thread()); if (uthread->uu_flag & UT_VFORK) @@ -2045,11 +2058,11 @@ set_security_token(proc_t p) /* * Update the pid an proc name for importance base if any */ - task_importance_update_owner_info(p->task); + task_importance_update_owner_info(task); #endif return (host_security_set_task_token(host_security_self(), - p->task, + task, sec_token, audit_token, host_priv) != KERN_SUCCESS); diff --git a/bsd/kern/kern_sig.c b/bsd/kern/kern_sig.c index 7fa64636b..82dc644e5 100644 --- a/bsd/kern/kern_sig.c +++ b/bsd/kern/kern_sig.c @@ -126,6 +126,8 @@ extern int thread_enable_fpe(thread_t act, int onoff); extern thread_t port_name_to_thread(mach_port_name_t port_name); extern kern_return_t get_signalact(task_t , thread_t *, int); extern unsigned int get_useraddr(void); +extern boolean_t task_did_exec(task_t task); +extern boolean_t task_is_exec_copy(task_t task); /* * --- @@ -3319,6 +3321,11 @@ bsd_ast(thread_t thread) if (p == NULL) return; + /* don't run bsd ast on exec copy or exec'ed tasks */ + if (task_did_exec(current_task()) || task_is_exec_copy(current_task())) { + return; + } + if ((p->p_flag & P_OWEUPC) && (p->p_flag & P_PROFIL)) { pc = get_useraddr(); addupc_task(p, pc, 1); diff --git a/bsd/kern/kpi_socketfilter.c b/bsd/kern/kpi_socketfilter.c index e4179c09e..091f66b46 100644 --- a/bsd/kern/kpi_socketfilter.c +++ b/bsd/kern/kpi_socketfilter.c @@ -93,6 +93,28 @@ static void sflt_detach_locked(struct socket_filter_entry *entry); #pragma mark -- Internal State Management -- +__private_extern__ int +sflt_permission_check(struct inpcb *inp) +{ + + /* + * All these permissions only apply to the co-processor interface, + * so ignore IPv4. + */ + if (!(inp->inp_vflag & INP_IPV6)) { + return (0); + } + /* Sockets that have this entitlement bypass socket filters. */ + if (INP_INTCOPROC_ALLOWED(inp)) { + return (1); + } + if ((inp->inp_flags & INP_BOUND_IF) && + IFNET_IS_INTCOPROC(inp->inp_boundifp)) { + return (1); + } + return (0); +} + __private_extern__ void sflt_init(void) { @@ -275,6 +297,9 @@ sflt_attach_locked(struct socket *so, struct socket_filter *filter, int error = 0; struct socket_filter_entry *entry = NULL; + if (sflt_permission_check(sotoinpcb(so))) + return (0); + if (filter == NULL) return (ENOENT); @@ -551,7 +576,7 @@ sflt_notify_after_register(struct socket *so, sflt_event_t event, __private_extern__ int sflt_ioctl(struct socket *so, u_long cmd, caddr_t data) { - if (so->so_filt == NULL) + if (so->so_filt == NULL || sflt_permission_check(sotoinpcb(so))) return (0); struct socket_filter_entry *entry; @@ -600,7 +625,7 @@ sflt_ioctl(struct socket *so, u_long cmd, caddr_t data) __private_extern__ int sflt_bind(struct socket *so, const struct sockaddr *nam) { - if (so->so_filt == NULL) + if (so->so_filt == NULL || sflt_permission_check(sotoinpcb(so))) return (0); struct socket_filter_entry *entry; @@ -649,7 +674,7 @@ sflt_bind(struct socket *so, const struct sockaddr *nam) __private_extern__ int sflt_listen(struct socket *so) { - if (so->so_filt == NULL) + if (so->so_filt == NULL || sflt_permission_check(sotoinpcb(so))) return (0); struct socket_filter_entry *entry; @@ -699,7 +724,7 @@ __private_extern__ int sflt_accept(struct socket *head, struct socket *so, const struct sockaddr *local, const struct sockaddr *remote) { - if (so->so_filt == NULL) + if (so->so_filt == NULL || sflt_permission_check(sotoinpcb(so))) return (0); struct socket_filter_entry *entry; @@ -748,7 +773,7 @@ sflt_accept(struct socket *head, struct socket *so, __private_extern__ int sflt_getsockname(struct socket *so, struct sockaddr **local) { - if (so->so_filt == NULL) + if (so->so_filt == NULL || sflt_permission_check(sotoinpcb(so))) return (0); struct socket_filter_entry *entry; @@ -797,7 +822,7 @@ sflt_getsockname(struct socket *so, struct sockaddr **local) __private_extern__ int sflt_getpeername(struct socket *so, struct sockaddr **remote) { - if (so->so_filt == NULL) + if (so->so_filt == NULL || sflt_permission_check(sotoinpcb(so))) return (0); struct socket_filter_entry *entry; @@ -846,7 +871,7 @@ sflt_getpeername(struct socket *so, struct sockaddr **remote) __private_extern__ int sflt_connectin(struct socket *so, const struct sockaddr *remote) { - if (so->so_filt == NULL) + if (so->so_filt == NULL || sflt_permission_check(sotoinpcb(so))) return (0); struct socket_filter_entry *entry; @@ -945,7 +970,7 @@ sflt_connectout(struct socket *so, const struct sockaddr *nam) struct sockaddr *sa; int error; - if (so->so_filt == NULL) + if (so->so_filt == NULL || sflt_permission_check(sotoinpcb(so))) return (0); /* @@ -960,7 +985,7 @@ sflt_connectout(struct socket *so, const struct sockaddr *nam) if (error != 0) return (error); - /* + /* * If the address was modified, copy it back */ if (bcmp(sa, nam, nam->sa_len) != 0) { @@ -978,7 +1003,7 @@ sflt_connectxout(struct socket *so, struct sockaddr_list **dst_sl0) int modified = 0; int error = 0; - if (so->so_filt == NULL) + if (so->so_filt == NULL || sflt_permission_check(sotoinpcb(so))) return (0); /* make a copy as sflt_connectout() releases socket lock */ @@ -1010,7 +1035,7 @@ sflt_connectxout(struct socket *so, struct sockaddr_list **dst_sl0) if (error != 0) break; - /* + /* * If the address was modified, copy it back */ if (bcmp(se->se_addr, sa, se->se_addr->sa_len) != 0) { @@ -1037,7 +1062,7 @@ sflt_connectxout(struct socket *so, struct sockaddr_list **dst_sl0) __private_extern__ int sflt_setsockopt(struct socket *so, struct sockopt *sopt) { - if (so->so_filt == NULL) + if (so->so_filt == NULL || sflt_permission_check(sotoinpcb(so))) return (0); struct socket_filter_entry *entry; @@ -1086,7 +1111,7 @@ sflt_setsockopt(struct socket *so, struct sockopt *sopt) __private_extern__ int sflt_getsockopt(struct socket *so, struct sockopt *sopt) { - if (so->so_filt == NULL) + if (so->so_filt == NULL || sflt_permission_check(sotoinpcb(so))) return (0); struct socket_filter_entry *entry; @@ -1136,7 +1161,7 @@ __private_extern__ int sflt_data_out(struct socket *so, const struct sockaddr *to, mbuf_t *data, mbuf_t *control, sflt_data_flag_t flags) { - if (so->so_filt == NULL) + if (so->so_filt == NULL || sflt_permission_check(sotoinpcb(so))) return (0); struct socket_filter_entry *entry; @@ -1197,7 +1222,7 @@ __private_extern__ int sflt_data_in(struct socket *so, const struct sockaddr *from, mbuf_t *data, mbuf_t *control, sflt_data_flag_t flags) { - if (so->so_filt == NULL) + if (so->so_filt == NULL || sflt_permission_check(sotoinpcb(so))) return (0); struct socket_filter_entry *entry; diff --git a/bsd/kern/mach_loader.c b/bsd/kern/mach_loader.c index 225550f2a..b5ba3831a 100644 --- a/bsd/kern/mach_loader.c +++ b/bsd/kern/mach_loader.c @@ -316,14 +316,12 @@ load_machfile( off_t file_offset = imgp->ip_arch_offset; off_t macho_size = imgp->ip_arch_size; off_t file_size = imgp->ip_vattr->va_data_size; - vm_map_t new_map = *mapp; pmap_t pmap = 0; /* protected by create_map */ vm_map_t map; load_result_t myresult; load_return_t lret; - boolean_t create_map = FALSE; boolean_t enforce_hard_pagezero = TRUE; - int spawn = (imgp->ip_flags & IMGPF_SPAWN); + int in_exec = (imgp->ip_flags & IMGPF_EXEC); task_t task = current_task(); proc_t p = current_proc(); mach_vm_offset_t aslr_offset = 0; @@ -334,38 +332,21 @@ load_machfile( return(LOAD_BADMACHO); } - if (new_map == VM_MAP_NULL) { - create_map = TRUE; - } - result->is64bit = ((imgp->ip_flags & IMGPF_IS_64BIT) == IMGPF_IS_64BIT); - /* - * If we are spawning, we have created backing objects for the process - * already, which include non-lazily creating the task map. So we - * are going to switch out the task map with one appropriate for the - * bitness of the image being loaded. - */ - if (spawn) { - create_map = TRUE; + task_t ledger_task; + if (imgp->ip_new_thread) { + ledger_task = get_threadtask(imgp->ip_new_thread); + } else { + ledger_task = task; } - - if (create_map) { - task_t ledger_task; - if (imgp->ip_new_thread) { - ledger_task = get_threadtask(imgp->ip_new_thread); - } else { - ledger_task = task; - } - pmap = pmap_create(get_task_ledger(ledger_task), - (vm_map_size_t) 0, - result->is64bit); - map = vm_map_create(pmap, - 0, - vm_compute_max_offset(result->is64bit), - TRUE); - } else - map = new_map; + pmap = pmap_create(get_task_ledger(ledger_task), + (vm_map_size_t) 0, + result->is64bit); + map = vm_map_create(pmap, + 0, + vm_compute_max_offset(result->is64bit), + TRUE); #if (__ARM_ARCH_7K__ >= 2) && defined(PLATFORM_WatchOS) /* enforce 16KB alignment for watch targets with new ABI */ @@ -419,9 +400,7 @@ load_machfile( NULL, imgp); if (lret != LOAD_SUCCESS) { - if (create_map) { - vm_map_deallocate(map); /* will lose pmap reference too */ - } + vm_map_deallocate(map); /* will lose pmap reference too */ return(lret); } @@ -439,55 +418,57 @@ load_machfile( if (enforce_hard_pagezero && (vm_map_has_hard_pagezero(map, 0x1000) == FALSE)) { { - if (create_map) { - vm_map_deallocate(map); /* will lose pmap reference too */ - } + vm_map_deallocate(map); /* will lose pmap reference too */ return (LOAD_BADMACHO); } } vm_commit_pagezero_status(map); - if (create_map) { + /* + * If this is an exec, then we are going to destroy the old + * task, and it's correct to halt it; if it's spawn, the + * task is not yet running, and it makes no sense. + */ + if (in_exec) { /* - * If this is an exec, then we are going to destroy the old - * task, and it's correct to halt it; if it's spawn, the - * task is not yet running, and it makes no sense. + * Mark the task as halting and start the other + * threads towards terminating themselves. Then + * make sure any threads waiting for a process + * transition get informed that we are committed to + * this transition, and then finally complete the + * task halting (wait for threads and then cleanup + * task resources). + * + * NOTE: task_start_halt() makes sure that no new + * threads are created in the task during the transition. + * We need to mark the workqueue as exiting before we + * wait for threads to terminate (at the end of which + * we no longer have a prohibition on thread creation). + * + * Finally, clean up any lingering workqueue data structures + * that may have been left behind by the workqueue threads + * as they exited (and then clean up the work queue itself). */ - if (!spawn) { - /* - * Mark the task as halting and start the other - * threads towards terminating themselves. Then - * make sure any threads waiting for a process - * transition get informed that we are committed to - * this transition, and then finally complete the - * task halting (wait for threads and then cleanup - * task resources). - * - * NOTE: task_start_halt() makes sure that no new - * threads are created in the task during the transition. - * We need to mark the workqueue as exiting before we - * wait for threads to terminate (at the end of which - * we no longer have a prohibition on thread creation). - * - * Finally, clean up any lingering workqueue data structures - * that may have been left behind by the workqueue threads - * as they exited (and then clean up the work queue itself). - */ - kret = task_start_halt(task); - if (kret != KERN_SUCCESS) { - vm_map_deallocate(map); /* will lose pmap reference too */ - return (LOAD_FAILURE); - } - proc_transcommit(p, 0); - workqueue_mark_exiting(p); - task_complete_halt(task); - workqueue_exit(p); - kqueue_dealloc(p->p_wqkqueue); - p->p_wqkqueue = NULL; + kret = task_start_halt(task); + if (kret != KERN_SUCCESS) { + vm_map_deallocate(map); /* will lose pmap reference too */ + return (LOAD_FAILURE); } - *mapp = map; + proc_transcommit(p, 0); + workqueue_mark_exiting(p); + task_complete_halt(task); + workqueue_exit(p); + kqueue_dealloc(p->p_wqkqueue); + p->p_wqkqueue = NULL; + /* + * Roll up accounting info to new task. The roll up is done after + * task_complete_halt to make sure the thread accounting info is + * rolled up to current_task. + */ + task_rollup_accounting_info(get_threadtask(thread), task); } + *mapp = map; return(LOAD_SUCCESS); } diff --git a/bsd/kern/policy_check.c b/bsd/kern/policy_check.c index 86b5049c3..0cd3827fe 100644 --- a/bsd/kern/policy_check.c +++ b/bsd/kern/policy_check.c @@ -118,7 +118,7 @@ common_hook(void) return rv; } -#if (MAC_POLICY_OPS_VERSION != 45) +#if (MAC_POLICY_OPS_VERSION != 46) # error "struct mac_policy_ops doesn't match definition in mac_policy.h" #endif /* @@ -280,12 +280,12 @@ static struct mac_policy_ops policy_ops = { CHECK_SET_HOOK(exc_action_label_init) CHECK_SET_HOOK(exc_action_label_update) - .mpo_reserved17 = (mpo_reserved_hook_t *)common_hook, - .mpo_reserved18 = (mpo_reserved_hook_t *)common_hook, - .mpo_reserved19 = (mpo_reserved_hook_t *)common_hook, - .mpo_reserved20 = (mpo_reserved_hook_t *)common_hook, - .mpo_reserved21 = (mpo_reserved_hook_t *)common_hook, - .mpo_reserved22 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved1 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved2 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved3 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved4 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved5 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved6 = (mpo_reserved_hook_t *)common_hook, CHECK_SET_HOOK(posixsem_check_create) CHECK_SET_HOOK(posixsem_check_open) @@ -363,7 +363,7 @@ static struct mac_policy_ops policy_ops = { CHECK_SET_HOOK(system_check_settime) CHECK_SET_HOOK(system_check_swapoff) CHECK_SET_HOOK(system_check_swapon) - .mpo_reserved31 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved7 = (mpo_reserved_hook_t *)common_hook, CHECK_SET_HOOK(sysvmsg_label_associate) CHECK_SET_HOOK(sysvmsg_label_destroy) @@ -396,9 +396,9 @@ static struct mac_policy_ops policy_ops = { CHECK_SET_HOOK(sysvshm_label_init) CHECK_SET_HOOK(sysvshm_label_recycle) - .mpo_reserved23 = (mpo_reserved_hook_t *)common_hook, - .mpo_reserved24 = (mpo_reserved_hook_t *)common_hook, - .mpo_reserved25 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved8 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved9 = (mpo_reserved_hook_t *)common_hook, + CHECK_SET_HOOK(vnode_check_getattr) CHECK_SET_HOOK(mount_check_snapshot_create) CHECK_SET_HOOK(mount_check_snapshot_delete) CHECK_SET_HOOK(vnode_check_clone) diff --git a/bsd/kern/trace_codes b/bsd/kern/trace_codes index 3b70a29c9..f2fc63870 100644 --- a/bsd/kern/trace_codes +++ b/bsd/kern/trace_codes @@ -329,6 +329,7 @@ 0x1700034 PMAP_flush_kernel_TLBS 0x1700038 PMAP_flush_delayed_TLBS 0x170003c PMAP_flush_TLBS_TO +0x1800000 MACH_CLOCK_EPOCH_CHANGE 0x1900000 MP_TLB_FLUSH 0x1900004 MP_CPUS_CALL 0x1900008 MP_CPUS_CALL_LOCAL @@ -1176,22 +1177,32 @@ 0x900808c wq_workqueue_exit 0xa000100 P_CS_Read 0xa000110 P_CS_Write -0xa000180 P_CS_ReadDone -0xa000190 P_CS_WriteDone +0xa000104 P_CS_ReadDone +0xa000114 P_CS_WriteDone 0xa000200 P_CS_ReadChunk 0xa000210 P_CS_WriteChunk -0xa000280 P_CS_ReadChunkDone -0xa000290 P_CS_WriteChunkDone -0xa000300 P_CS_ReadCrypto -0xa000310 P_CS_WriteCrypto -0xa000500 P_CS_Originated_Read -0xa000510 P_CS_Originated_Write -0xa000580 P_CS_Originated_ReadDone -0xa000590 P_CS_Originated_WriteDone -0xa000900 P_CS_MetaRead -0xa000910 P_CS_MetaWrite -0xa000980 P_CS_MetaReadDone -0xa000990 P_CS_MetaWriteDone +0xa000204 P_CS_ReadChunkDone +0xa000214 P_CS_WriteChunkDone +0xa000300 P_CS_ReadMeta +0xa000310 P_CS_WriteMeta +0xa000304 P_CS_ReadMetaDone +0xa000314 P_CS_WriteMetaDone +0xa000400 P_CS_ReadCrypto +0xa000410 P_CS_WriteCrypto +0xa000404 P_CS_ReadCryptoDone +0xa000414 P_CS_WriteCryptoDone +0xa000500 P_CS_TransformRead +0xa000510 P_CS_TransformWrite +0xa000504 P_CS_TransformReadDone +0xa000514 P_CS_TransformWriteDone +0xa000600 P_CS_MigrationRead +0xa000610 P_CS_MigrationWrite +0xa000604 P_CS_MigrationReadDone +0xa000614 P_CS_MigrationWriteDone +0xa000700 P_CS_DirectRead +0xa000710 P_CS_DirectWrite +0xa000704 P_CS_DirectReadDone +0xa000714 P_CS_DirectWriteDone 0xa008000 P_CS_SYNC_DISK 0xa008004 P_CS_WaitForBuffer 0xa008008 P_CS_NoBuffer diff --git a/bsd/kern/uipc_socket.c b/bsd/kern/uipc_socket.c index c8fc70ca3..1f694df67 100644 --- a/bsd/kern/uipc_socket.c +++ b/bsd/kern/uipc_socket.c @@ -4884,13 +4884,14 @@ sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock) sizeof(optval)); if (error != 0) goto out; - if (optval != 0) { + if (optval != 0 && + inp_get_intcoproc_allowed(sotoinpcb(so)) == FALSE) { error = soopt_cred_check(so, PRIV_NET_RESTRICTED_INTCOPROC); if (error == 0) inp_set_intcoproc_allowed( sotoinpcb(so)); - } else + } else if (optval == 0) inp_clear_intcoproc_allowed(sotoinpcb(so)); break; diff --git a/bsd/net/dlil.c b/bsd/net/dlil.c index 7505ec207..84fb0898f 100644 --- a/bsd/net/dlil.c +++ b/bsd/net/dlil.c @@ -1678,10 +1678,15 @@ dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter, filter->filt_cookie = if_filter->iff_cookie; filter->filt_name = if_filter->iff_name; filter->filt_protocol = if_filter->iff_protocol; - filter->filt_input = if_filter->iff_input; - filter->filt_output = if_filter->iff_output; - filter->filt_event = if_filter->iff_event; - filter->filt_ioctl = if_filter->iff_ioctl; + /* + * Do not install filter callbacks for internal coproc interface + */ + if (!IFNET_IS_INTCOPROC(ifp)) { + filter->filt_input = if_filter->iff_input; + filter->filt_output = if_filter->iff_output; + filter->filt_event = if_filter->iff_event; + filter->filt_ioctl = if_filter->iff_ioctl; + } filter->filt_detached = if_filter->iff_detached; lck_mtx_lock(&ifp->if_flt_lock); diff --git a/bsd/net/if.c b/bsd/net/if.c index a43d31f54..053db0860 100644 --- a/bsd/net/if.c +++ b/bsd/net/if.c @@ -718,6 +718,8 @@ if_functional_type(struct ifnet *ifp, bool exclude_delegate) (ifp->if_type == IFT_CELLULAR)) || (!exclude_delegate && IFNET_IS_CELLULAR(ifp))) { ret = IFRTYPE_FUNCTIONAL_CELLULAR; + } else if (IFNET_IS_INTCOPROC(ifp)) { + ret = IFRTYPE_FUNCTIONAL_INTCOPROC; } else if ((exclude_delegate && (ifp->if_family == IFNET_FAMILY_ETHERNET || ifp->if_family == IFNET_FAMILY_FIREWIRE)) || @@ -881,9 +883,9 @@ ifa_ifwithaddr(const struct sockaddr *addr) struct ifaddr *result = NULL; ifnet_head_lock_shared(); - + result = ifa_ifwithaddr_locked(addr); - + ifnet_head_done(); return (result); @@ -982,17 +984,17 @@ ifa_ifwithaddr_scoped_locked(const struct sockaddr *addr, unsigned int ifscope) } return (result); -} - +} + struct ifaddr * ifa_ifwithaddr_scoped(const struct sockaddr *addr, unsigned int ifscope) { struct ifaddr *result = NULL; ifnet_head_lock_shared(); - + result = ifa_ifwithaddr_scoped_locked(addr, ifscope); - + ifnet_head_done(); return (result); @@ -1270,9 +1272,7 @@ link_rtrequest(int cmd, struct rtentry *rt, struct sockaddr *sa) * up/down state and updating the rest of the flags. */ __private_extern__ void -if_updown( - struct ifnet *ifp, - int up) +if_updown( struct ifnet *ifp, int up) { int i; struct ifaddr **ifa; @@ -1308,6 +1308,14 @@ if_updown( /* Drop the lock to notify addresses and route */ ifnet_lock_done(ifp); + + IFCQ_LOCK(ifq); + if_qflush(ifp, 1); + + /* Inform all transmit queues about the new link state */ + ifnet_update_sndq(ifq, up ? CLASSQ_EV_LINK_UP : CLASSQ_EV_LINK_DOWN); + IFCQ_UNLOCK(ifq); + if (ifnet_get_address_list(ifp, &ifa) == 0) { for (i = 0; ifa[i] != 0; i++) { pfctlinput(up ? PRC_IFUP : PRC_IFDOWN, ifa[i]->ifa_addr); @@ -1316,14 +1324,6 @@ if_updown( } rt_ifmsg(ifp); - if (!up) - if_qflush(ifp, 0); - - /* Inform all transmit queues about the new link state */ - IFCQ_LOCK(ifq); - ifnet_update_sndq(ifq, up ? CLASSQ_EV_LINK_UP : CLASSQ_EV_LINK_DOWN); - IFCQ_UNLOCK(ifq); - /* Aquire the lock to clear the changing flag */ ifnet_lock_exclusive(ifp); ifp->if_eflags &= ~IFEF_UPDOWNCHANGE; diff --git a/bsd/net/if.h b/bsd/net/if.h index 6954fc003..9d93391f4 100644 --- a/bsd/net/if.h +++ b/bsd/net/if.h @@ -467,7 +467,8 @@ struct ifreq { #define IFRTYPE_FUNCTIONAL_WIFI_INFRA 3 #define IFRTYPE_FUNCTIONAL_WIFI_AWDL 4 #define IFRTYPE_FUNCTIONAL_CELLULAR 5 -#define IFRTYPE_FUNCTIONAL_LAST 5 +#define IFRTYPE_FUNCTIONAL_INTCOPROC 6 +#define IFRTYPE_FUNCTIONAL_LAST 6 u_int32_t ifru_expensive; u_int32_t ifru_2kcl; struct { diff --git a/bsd/net/kext_net.h b/bsd/net/kext_net.h index 32b5ab6a7..e9c9a6a6a 100644 --- a/bsd/net/kext_net.h +++ b/bsd/net/kext_net.h @@ -46,9 +46,11 @@ struct socket; struct sockopt; +struct inpcb; /* Private, internal implementation functions */ extern void sflt_init(void); +extern int sflt_permission_check(struct inpcb *inp); extern void sflt_initsock(struct socket *so); extern void sflt_termsock(struct socket *so); extern errno_t sflt_attach_internal(struct socket *so, sflt_handle handle); diff --git a/bsd/net/necp.c b/bsd/net/necp.c index 9ada6fe8a..86cfe97eb 100644 --- a/bsd/net/necp.c +++ b/bsd/net/necp.c @@ -339,6 +339,7 @@ static bool necp_is_addr_in_subnet(struct sockaddr *addr, struct sockaddr *subne static int necp_addr_compare(struct sockaddr *sa1, struct sockaddr *sa2, int check_port); static bool necp_buffer_compare_with_bit_prefix(u_int8_t *p1, u_int8_t *p2, u_int32_t bits); static bool necp_is_loopback(struct sockaddr *local_addr, struct sockaddr *remote_addr, struct inpcb *inp, struct mbuf *packet); +static bool necp_is_intcoproc(struct inpcb *inp, struct mbuf *packet); struct necp_uuid_id_mapping { LIST_ENTRY(necp_uuid_id_mapping) chain; @@ -6190,6 +6191,19 @@ necp_socket_is_connected(struct inpcb *inp) return (inp->inp_socket->so_state & (SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING)); } +static inline bool +necp_socket_bypass(struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, struct inpcb *inp) +{ + + if (necp_pass_loopback > 0 && necp_is_loopback(override_local_addr, override_remote_addr, inp, NULL)) { + return (true); + } else if (necp_is_intcoproc(inp, NULL)) { + return (true); + } + + return (false); +} + necp_kernel_policy_id necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int32_t override_bound_interface) { @@ -6233,8 +6247,7 @@ necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local inp->inp_policyresult.flowhash = 0; inp->inp_policyresult.results.filter_control_unit = 0; inp->inp_policyresult.results.route_rule_id = 0; - if (necp_pass_loopback > 0 && - necp_is_loopback(override_local_addr, override_remote_addr, inp, NULL)) { + if (necp_socket_bypass(override_local_addr, override_remote_addr, inp)) { inp->inp_policyresult.results.result = NECP_KERNEL_POLICY_RESULT_PASS; } else { inp->inp_policyresult.results.result = NECP_KERNEL_POLICY_RESULT_DROP; @@ -6244,8 +6257,7 @@ necp_socket_find_policy_match(struct inpcb *inp, struct sockaddr *override_local } // Check for loopback exception - if (necp_pass_loopback > 0 && - necp_is_loopback(override_local_addr, override_remote_addr, inp, NULL)) { + if (necp_socket_bypass(override_local_addr, override_remote_addr, inp)) { // Mark socket as a pass inp->inp_policyresult.policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH; inp->inp_policyresult.policy_gencount = 0; @@ -6564,6 +6576,21 @@ necp_ip_output_find_policy_match_locked(necp_kernel_policy_id socket_policy_id, return (matched_policy); } +static inline bool +necp_output_bypass(struct mbuf *packet) +{ + if (necp_pass_loopback > 0 && necp_is_loopback(NULL, NULL, NULL, packet)) { + return (true); + } + if (necp_pass_keepalives > 0 && necp_get_is_keepalive_from_packet(packet)) { + return (true); + } + if (necp_is_intcoproc(NULL, packet)) { + return (true); + } + return (false); +} + necp_kernel_policy_id necp_ip_output_find_policy_match(struct mbuf *packet, int flags, struct ip_out_args *ipoa, necp_kernel_policy_result *result, necp_kernel_policy_result_parameter *result_parameter) { @@ -6599,10 +6626,7 @@ necp_ip_output_find_policy_match(struct mbuf *packet, int flags, struct ip_out_a if (necp_drop_all_order > 0) { matched_policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH; if (result) { - if ((necp_pass_loopback > 0 && - necp_is_loopback(NULL, NULL, NULL, packet)) || - (necp_pass_keepalives > 0 && - necp_get_is_keepalive_from_packet(packet))) { + if (necp_output_bypass(packet)) { *result = NECP_KERNEL_POLICY_RESULT_PASS; } else { *result = NECP_KERNEL_POLICY_RESULT_DROP; @@ -6614,10 +6638,7 @@ necp_ip_output_find_policy_match(struct mbuf *packet, int flags, struct ip_out_a } // Check for loopback exception - if ((necp_pass_loopback > 0 && - necp_is_loopback(NULL, NULL, NULL, packet)) || - (necp_pass_keepalives > 0 && - necp_get_is_keepalive_from_packet(packet))) { + if (necp_output_bypass(packet)) { matched_policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH; if (result) { *result = NECP_KERNEL_POLICY_RESULT_PASS; @@ -6741,10 +6762,7 @@ necp_ip6_output_find_policy_match(struct mbuf *packet, int flags, struct ip6_out if (necp_drop_all_order > 0) { matched_policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH; if (result) { - if ((necp_pass_loopback > 0 && - necp_is_loopback(NULL, NULL, NULL, packet)) || - (necp_pass_keepalives > 0 && - necp_get_is_keepalive_from_packet(packet))) { + if (necp_output_bypass(packet)) { *result = NECP_KERNEL_POLICY_RESULT_PASS; } else { *result = NECP_KERNEL_POLICY_RESULT_DROP; @@ -6756,10 +6774,7 @@ necp_ip6_output_find_policy_match(struct mbuf *packet, int flags, struct ip6_out } // Check for loopback exception - if ((necp_pass_loopback > 0 && - necp_is_loopback(NULL, NULL, NULL, packet)) || - (necp_pass_keepalives > 0 && - necp_get_is_keepalive_from_packet(packet))) { + if (necp_output_bypass(packet)) { matched_policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH; if (result) { *result = NECP_KERNEL_POLICY_RESULT_PASS; @@ -7376,8 +7391,7 @@ necp_socket_is_allowed_to_send_recv_internal(struct inpcb *inp, struct sockaddr if (necp_kernel_socket_policies_count == 0 || (!(inp->inp_flags2 & INP2_WANT_APP_POLICY) && necp_kernel_socket_policies_non_app_count == 0)) { if (necp_drop_all_order > 0) { - if (necp_pass_loopback > 0 && - necp_is_loopback(override_local_addr, override_remote_addr, inp, NULL)) { + if (necp_socket_bypass(override_local_addr, override_remote_addr, inp)) { allowed_to_receive = TRUE; } else { allowed_to_receive = FALSE; @@ -7421,8 +7435,7 @@ necp_socket_is_allowed_to_send_recv_internal(struct inpcb *inp, struct sockaddr } // Check for loopback exception - if (necp_pass_loopback > 0 && - necp_is_loopback(override_local_addr, override_remote_addr, inp, NULL)) { + if (necp_socket_bypass(override_local_addr, override_remote_addr, inp)) { allowed_to_receive = TRUE; goto done; } @@ -7927,3 +7940,23 @@ necp_is_loopback(struct sockaddr *local_addr, struct sockaddr *remote_addr, stru return (FALSE); } + +static bool +necp_is_intcoproc(struct inpcb *inp, struct mbuf *packet) +{ + + if (inp != NULL) { + return (sflt_permission_check(inp) ? true : false); + } + if (packet != NULL) { + struct ip6_hdr *ip6 = mtod(packet, struct ip6_hdr *); + if ((ip6->ip6_vfc & IPV6_VERSION_MASK) == IPV6_VERSION && + IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst) && + ip6->ip6_dst.s6_addr32[2] == ntohl(0xaede48ff) && + ip6->ip6_dst.s6_addr32[3] == ntohl(0xfe334455)) { + return (true); + } + } + + return (false); +} diff --git a/bsd/net/ntstat.c b/bsd/net/ntstat.c index 35a02c0b3..14f523535 100644 --- a/bsd/net/ntstat.c +++ b/bsd/net/ntstat.c @@ -271,6 +271,7 @@ nstat_ifnet_to_flags( flags |= NSTAT_IFNET_IS_LOOPBACK; break; case IFRTYPE_FUNCTIONAL_WIRED: + case IFRTYPE_FUNCTIONAL_INTCOPROC: flags |= NSTAT_IFNET_IS_WIRED; break; case IFRTYPE_FUNCTIONAL_WIFI_INFRA: diff --git a/bsd/net/pf_ioctl.c b/bsd/net/pf_ioctl.c index 15bfb0724..517ee6856 100644 --- a/bsd/net/pf_ioctl.c +++ b/bsd/net/pf_ioctl.c @@ -2855,7 +2855,7 @@ pf_delete_rule_by_ticket(struct pfioc_rule *pr, u_int32_t req_dev) } if (rule == NULL) return (ENOENT); - else + else i--; if (strcmp(rule->owner, pr->rule.owner)) @@ -2882,7 +2882,7 @@ delete_rule: if (rule == NULL) panic("%s: rule not found!", __func__); - /* + /* * if reqest device != rule's device, bail : * with error if ticket matches; * without error if ticket doesn't match (i.e. its just cleanup) @@ -2905,7 +2905,7 @@ delete_rule: ++delete_ruleset->rules[i].inactive.ticket; goto delete_rule; } else { - /* + /* * process deleting rule only if device that added the * rule matches device that issued the request */ @@ -2937,7 +2937,7 @@ pf_delete_rule_by_owner(char *owner, u_int32_t req_dev) ruleset = &pf_main_ruleset; while (rule) { next = TAILQ_NEXT(rule, entries); - /* + /* * process deleting rule only if device that added the * rule matches device that issued the request */ @@ -4857,6 +4857,10 @@ pf_af_hook(struct ifnet *ifp, struct mbuf **mppn, struct mbuf **mp, net_thread_marks_t marks; struct ifnet * pf_ifp = ifp; + /* Always allow traffic on co-processor interfaces. */ + if (ifp && IFNET_IS_INTCOPROC(ifp)) + return (0); + marks = net_thread_marks_push(NET_THREAD_HELD_PF); if (marks != net_thread_marks_none) { diff --git a/bsd/netinet/in_pcb.c b/bsd/netinet/in_pcb.c index 4bb25be28..c20961c22 100644 --- a/bsd/netinet/in_pcb.c +++ b/bsd/netinet/in_pcb.c @@ -133,6 +133,8 @@ static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */ static boolean_t inpcb_fast_timer_on = FALSE; static boolean_t intcoproc_unrestricted = FALSE; +extern char *proc_best_name(proc_t); + /* * If the total number of gc reqs is above a threshold, schedule * garbage collect timer sooner @@ -1303,7 +1305,7 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr, if (ia == NULL) ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope)); error = ((ia == NULL) ? ENETUNREACH : 0); - + if (apn_fallback_required(proc, inp->inp_socket, (void *)nam)) apn_fallback_trigger(proc); @@ -3341,8 +3343,9 @@ inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp) ret = _inp_restricted_recv(inp, ifp); if (ret == TRUE && log_restricted) { - printf("pid %d is unable to receive packets on %s\n", - current_proc()->p_pid, ifp->if_xname); + printf("pid %d (%s) is unable to receive packets on %s\n", + current_proc()->p_pid, proc_best_name(current_proc()), + ifp->if_xname); } return (ret); } @@ -3388,8 +3391,9 @@ inp_restricted_send(struct inpcb *inp, struct ifnet *ifp) ret = _inp_restricted_send(inp, ifp); if (ret == TRUE && log_restricted) { - printf("pid %d is unable to transmit packets on %s\n", - current_proc()->p_pid, ifp->if_xname); + printf("pid %d (%s) is unable to transmit packets on %s\n", + current_proc()->p_pid, proc_best_name(current_proc()), + ifp->if_xname); } return (ret); } diff --git a/bsd/netinet6/esp_input.c b/bsd/netinet6/esp_input.c index f2f8af9be..23e5aa560 100644 --- a/bsd/netinet6/esp_input.c +++ b/bsd/netinet6/esp_input.c @@ -122,6 +122,7 @@ #include #include +#include #include #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIPSEC, 1) @@ -335,7 +336,7 @@ esp4_input(struct mbuf *m, int off) goto bad; } - if (bcmp(sum0, sum, siz) != 0) { + if (cc_cmp_safe(siz, sum0, sum)) { ipseclog((LOG_WARNING, "auth fail in IPv4 ESP input: %s %s\n", ipsec4_logpacketstr(ip, spi), ipsec_logsastr(sav))); IPSEC_STAT_INCREMENT(ipsecstat.in_espauthfail); @@ -433,8 +434,8 @@ noreplaycheck: IPSEC_STAT_INCREMENT(ipsecstat.in_inval); KERNEL_DEBUG(DBG_FNC_DECRYPT | DBG_FUNC_END, 1,0,0,0,0); goto bad; - } - if (memcmp(saved_icv, tag, algo->icvlen)) { + } + if (cc_cmp_safe(algo->icvlen, saved_icv, tag)) { ipseclog((LOG_ERR, "packet decryption ICV mismatch\n")); IPSEC_STAT_INCREMENT(ipsecstat.in_inval); KERNEL_DEBUG(DBG_FNC_DECRYPT | DBG_FUNC_END, 1,0,0,0,0); @@ -932,7 +933,7 @@ esp6_input(struct mbuf **mp, int *offp, int proto) goto bad; } - if (bcmp(sum0, sum, siz) != 0) { + if (cc_cmp_safe(siz, sum0, sum)) { ipseclog((LOG_WARNING, "auth fail in IPv6 ESP input: %s %s\n", ipsec6_logpacketstr(ip6, spi), ipsec_logsastr(sav))); IPSEC_STAT_INCREMENT(ipsec6stat.in_espauthfail); @@ -1026,8 +1027,8 @@ noreplaycheck: IPSEC_STAT_INCREMENT(ipsecstat.in_inval); KERNEL_DEBUG(DBG_FNC_DECRYPT | DBG_FUNC_END, 1,0,0,0,0); goto bad; - } - if (memcmp(saved_icv, tag, algo->icvlen)) { + } + if (cc_cmp_safe(algo->icvlen, saved_icv, tag)) { ipseclog((LOG_ERR, "packet decryption ICV mismatch\n")); IPSEC_STAT_INCREMENT(ipsecstat.in_inval); KERNEL_DEBUG(DBG_FNC_DECRYPT | DBG_FUNC_END, 1,0,0,0,0); diff --git a/bsd/netinet6/icmp6.c b/bsd/netinet6/icmp6.c index cc5f1af53..23a9178d3 100644 --- a/bsd/netinet6/icmp6.c +++ b/bsd/netinet6/icmp6.c @@ -164,7 +164,7 @@ static const char *icmp6_redirect_diag(struct in6_addr *, static struct mbuf *ni6_input(struct mbuf *, int); static struct mbuf *ni6_nametodns(const char *, int, int); static int ni6_dnsmatch(const char *, int, const char *, int); -static int ni6_addrs(struct icmp6_nodeinfo *, +static int ni6_addrs(struct icmp6_nodeinfo *, struct ifnet **, char *); static int ni6_store_addrs(struct icmp6_nodeinfo *, struct icmp6_nodeinfo *, struct ifnet *, int); @@ -752,7 +752,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) if (!icmp6_nodeinfo) break; //### LD 10/20 Check fbsd differences here. Not sure we're more advanced or not. - /* By RFC 4620 refuse to answer queries from global scope addresses */ + /* By RFC 4620 refuse to answer queries from global scope addresses */ if ((icmp6_nodeinfo & 8) != 8 && in6_addrscope(&ip6->ip6_src) == IPV6_ADDR_SCOPE_GLOBAL) break; @@ -2036,7 +2036,7 @@ icmp6_rip6_input(struct mbuf **mp, int off) rip6src.sin6_family = AF_INET6; rip6src.sin6_len = sizeof(struct sockaddr_in6); rip6src.sin6_addr = ip6->ip6_src; - if (sa6_recoverscope(&rip6src, TRUE)) + if (sa6_recoverscope(&rip6src, TRUE)) return (IPPROTO_DONE); lck_rw_lock_shared(ripcbinfo.ipi_lock); @@ -2136,7 +2136,8 @@ icmp6_reflect(struct mbuf *m, size_t off) struct nd_ifinfo *ndi = NULL; u_int32_t oflow; struct ip6_out_args ip6oa = { IFSCOPE_NONE, { 0 }, - IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR, 0, + IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR | + IP6OAF_INTCOPROC_ALLOWED | IP6OAF_AWDL_UNRESTRICTED, 0, SO_TC_UNSPEC, _NET_SERVICE_TYPE_UNSPEC }; if (!(m->m_pkthdr.pkt_flags & PKTF_LOOP) && m->m_pkthdr.rcvif != NULL) { @@ -2897,7 +2898,7 @@ icmp6_ctloutput(struct socket *so, struct sockopt *sopt) } if (optlen == 0) { - /* According to RFC 3542, an installed filter can be + /* According to RFC 3542, an installed filter can be * cleared by issuing a setsockopt for ICMP6_FILTER * with a zero length. */ diff --git a/bsd/netinet6/ip6_input.c b/bsd/netinet6/ip6_input.c index c8fda8efd..ec32485cb 100644 --- a/bsd/netinet6/ip6_input.c +++ b/bsd/netinet6/ip6_input.c @@ -1140,7 +1140,7 @@ injectit: /* * Call IP filter */ - if (!TAILQ_EMPTY(&ipv6_filters)) { + if (!TAILQ_EMPTY(&ipv6_filters) && !IFNET_IS_INTCOPROC(inifp)) { ipf_ref(); TAILQ_FOREACH(filter, &ipv6_filters, ipf_link) { if (seen == 0) { diff --git a/bsd/netinet6/ip6_output.c b/bsd/netinet6/ip6_output.c index 973c9dcfa..8096be24f 100644 --- a/bsd/netinet6/ip6_output.c +++ b/bsd/netinet6/ip6_output.c @@ -804,7 +804,9 @@ skip_ipsec: in6_delayed_cksum_offset(m, 0, optlen, nxt0); #endif /* IPSEC */ - if (!TAILQ_EMPTY(&ipv6_filters)) { + if (!TAILQ_EMPTY(&ipv6_filters) && + !((flags & IPV6_OUTARGS) && + (ip6oa->ip6oa_flags & IP6OAF_INTCOPROC_ALLOWED))) { struct ipfilter *filter; int seen = (inject_filter_ref == NULL); int fixscope = 0; diff --git a/bsd/nfs/nfs_serv.c b/bsd/nfs/nfs_serv.c index 7b9df74b4..7431b6562 100644 --- a/bsd/nfs/nfs_serv.c +++ b/bsd/nfs/nfs_serv.c @@ -85,10 +85,13 @@ #include #include #include +#include #include #include +#include + #include #include @@ -99,6 +102,11 @@ #include #include +#if CONFIG_MACF +#include +#include +#endif + #if NFSSERVER /* @@ -547,6 +555,34 @@ nfsrv_setattr( if (!error) error = nfsrv_authorize(vp, NULL, action, ctx, nxo, 0); +#if CONFIG_MACF + if (!error) { + /* chown case */ + if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) { + error = mac_vnode_check_setowner(ctx, vp, + VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1, + VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1); + } + /* chmod case */ + if (!error && VATTR_IS_ACTIVE(vap, va_mode)) { + error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode); + } + /* truncate case */ + if (!error && VATTR_IS_ACTIVE(vap, va_data_size)) { + /* NOTE: File has not been open for NFS case, so NOCRED for filecred */ + error = mac_vnode_check_truncate(ctx, NOCRED, vp); + } + /* set utimes case */ + if (!error && (VATTR_IS_ACTIVE(vap, va_access_time) || VATTR_IS_ACTIVE(vap, va_modify_time))) { + struct timespec current_time; + nanotime(¤t_time); + + error = mac_vnode_check_setutimes(ctx, vp, + VATTR_IS_ACTIVE(vap, va_access_time) ? vap->va_access_time : current_time, + VATTR_IS_ACTIVE(vap, va_modify_time) ? vap->va_modify_time : current_time); + } + } +#endif /* set the new attributes */ if (!error) error = vnode_setattr(vp, vap, ctx); @@ -1256,6 +1292,21 @@ nfsrv_write( error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx, nxo, 1); nfsmerr_if(error); +#if CONFIG_MACF + if (!error) { + error = mac_vnode_check_open(ctx, vp, FWRITE); + if (error) { + error = EACCES; + } else { + /* XXXab: Do we need to do this?! */ + error = mac_vnode_check_write(ctx, vfs_context_ucred(ctx), vp); + if (error) + error = EACCES; + } + } + nfsmerr_if(error); +#endif + if (len > 0) { for (mcount=0, m=nmreq->nmc_mcur; m; m = mbuf_next(m)) if (mbuf_len(m) > 0) @@ -1844,6 +1895,8 @@ nfsrv_create( ni.ni_op = OP_LINK; #endif ni.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; + ni.ni_cnd.cn_ndp = ∋ + error = nfsm_chain_get_path_namei(nmreq, len, &ni); if (!error) { error = nfsrv_namei(nd, ctx, &ni, &nfh, &dirp, &nx, &nxo); @@ -1953,6 +2006,12 @@ nfsrv_create( if (!error) error = vnode_authattr_new(dvp, vap, 0, ctx); + if (!error) { + error = vn_authorize_create(dvp, &ni.ni_cnd, vap, ctx, NULL); + if (error) + error = EACCES; + } + if (vap->va_type == VREG || vap->va_type == VSOCK) { if (!error) @@ -2052,6 +2111,14 @@ nfsrv_create( vnode_put(dvp); +#if CONFIG_MACF + if (!error && VATTR_IS_ACTIVE(vap, va_data_size)) { + /* NOTE: File has not been open for NFS case, so NOCRED for filecred */ + error = mac_vnode_check_truncate(ctx, NOCRED, vp); + if (error) + error = EACCES; + } +#endif if (!error && VATTR_IS_ACTIVE(vap, va_data_size)) { error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx, nxo, 0); @@ -2172,6 +2239,7 @@ nfsrv_mknod( ni.ni_op = OP_LINK; #endif ni.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; + ni.ni_cnd.cn_ndp = ∋ error = nfsm_chain_get_path_namei(nmreq, len, &ni); if (!error) { error = nfsrv_namei(nd, ctx, &ni, &nfh, &dirp, &nx, &nxo); @@ -2249,7 +2317,11 @@ nfsrv_mknod( /* validate new-file security information */ if (!error) error = vnode_authattr_new(dvp, vap, 0, ctx); - + if (!error) { + error = vn_authorize_create(dvp, &ni.ni_cnd, vap, ctx, NULL); + if (error) + error = EACCES; + } if (error) goto out1; @@ -2401,6 +2473,7 @@ nfsrv_remove( ni.ni_op = OP_UNLINK; #endif ni.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; + ni.ni_cnd.cn_ndp = ∋ error = nfsm_chain_get_path_namei(nmreq, len, &ni); if (!error) { error = nfsrv_namei(nd, ctx, &ni, &nfh, &dirp, &nx, &nxo); @@ -2436,6 +2509,12 @@ nfsrv_remove( else error = nfsrv_authorize(vp, dvp, KAUTH_VNODE_DELETE, ctx, nxo, 0); + if (!error) { + error = vn_authorize_unlink(dvp, vp, &ni.ni_cnd, ctx, NULL); + if (error) + error = EACCES; + } + if (!error) { #if CONFIG_FSE char *path = NULL; @@ -2589,6 +2668,7 @@ retry: frompath = NULL; fromni.ni_cnd.cn_pnlen = MAXPATHLEN; fromni.ni_cnd.cn_flags |= HASBUF; + fromni.ni_cnd.cn_ndp = &fromni; error = nfsrv_namei(nd, ctx, &fromni, &fnfh, &fdirp, &fnx, &fnxo); if (error) @@ -2624,6 +2704,7 @@ retry: topath = NULL; toni.ni_cnd.cn_pnlen = MAXPATHLEN; toni.ni_cnd.cn_flags |= HASBUF; + toni.ni_cnd.cn_ndp = &toni; if (fvtype == VDIR) toni.ni_cnd.cn_flags |= WILLBEDIR; @@ -2743,6 +2824,12 @@ retry: ((error = nfsrv_authorize(tvp, tdvp, KAUTH_VNODE_DELETE, ctx, tnxo, 0)) != 0)) goto auth_exit; + if (!error && + ((error = vn_authorize_rename(fdvp, fvp, &fromni.ni_cnd , tdvp, tvp, &toni.ni_cnd , ctx, NULL)) != 0)) { + if (error) + error = EACCES; + goto auth_exit; + } /* XXX more checks? */ auth_exit: @@ -3193,6 +3280,13 @@ nfsrv_link( else error = nfsrv_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx, nxo, 0); +#if CONFIG_MACF + if (!error) { + error = mac_vnode_check_link(ctx, dvp, vp, &ni.ni_cnd); + if (error) + error = EACCES; + } +#endif if (!error) error = VNOP_LINK(vp, dvp, &ni.ni_cnd, ctx); @@ -3315,6 +3409,8 @@ nfsrv_symlink( ni.ni_op = OP_LINK; #endif ni.ni_cnd.cn_flags = LOCKPARENT; + ni.ni_flag = 0; + ni.ni_cnd.cn_ndp = ∋ error = nfsm_chain_get_path_namei(nmreq, len, &ni); if (!error) { error = nfsrv_namei(nd, ctx, &ni, &nfh, &dirp, &nx, &nxo); @@ -3386,6 +3482,11 @@ nfsrv_symlink( /* validate given attributes */ if (!error) error = vnode_authattr_new(dvp, vap, 0, ctx); + if (!error) { + error = vn_authorize_create(dvp, &ni.ni_cnd, vap, ctx, NULL); + if (error) + error = EACCES; + } if (!error) error = VNOP_SYMLINK(dvp, &vp, &ni.ni_cnd, vap, linkdata, ctx); @@ -3531,7 +3632,8 @@ nfsrv_mkdir( #if CONFIG_TRIGGERS ni.ni_op = OP_LINK; #endif - ni.ni_cnd.cn_flags = LOCKPARENT; + ni.ni_cnd.cn_flags = LOCKPARENT | WILLBEDIR; + ni.ni_cnd.cn_ndp = ∋ error = nfsm_chain_get_path_namei(nmreq, len, &ni); if (!error) { error = nfsrv_namei(nd, ctx, &ni, &nfh, &dirp, &nx, &nxo); @@ -3617,6 +3719,12 @@ nfsrv_mkdir( if (error) error = EPERM; + if(!error) { + error = vn_authorize_mkdir(dvp, &ni.ni_cnd, vap, ctx, NULL); + if (error) + error = EACCES; + } + if (!error) error = VNOP_MKDIR(dvp, &vp, &ni.ni_cnd, vap, ctx); @@ -3742,6 +3850,7 @@ nfsrv_rmdir( ni.ni_op = OP_UNLINK; #endif ni.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; + ni.ni_cnd.cn_ndp = ∋ error = nfsm_chain_get_path_namei(nmreq, len, &ni); if (!error) { error = nfsrv_namei(nd, ctx, &ni, &nfh, &dirp, &nx, &nxo); @@ -3785,6 +3894,12 @@ nfsrv_rmdir( error = EBUSY; if (!error) error = nfsrv_authorize(vp, dvp, KAUTH_VNODE_DELETE, ctx, nxo, 0); + if (!error) { + error = vn_authorize_rmdir(dvp, vp, &ni.ni_cnd, ctx, NULL); + if (error) + error = EACCES; + } + if (!error) { #if CONFIG_FSE char *path = NULL; diff --git a/bsd/security/audit/audit.h b/bsd/security/audit/audit.h index 67803a185..79136bbb7 100644 --- a/bsd/security/audit/audit.h +++ b/bsd/security/audit/audit.h @@ -267,7 +267,7 @@ void audit_session_ref(kauth_cred_t cred); void audit_session_unref(kauth_cred_t cred); void audit_session_procnew(proc_t p); void audit_session_procexit(proc_t p); -int audit_session_spawnjoin(proc_t p, ipc_port_t port); +int audit_session_spawnjoin(proc_t p, task_t task, ipc_port_t port); void audit_sdev_submit(au_id_t auid, au_asid_t asid, void *record, u_int record_len); diff --git a/bsd/security/audit/audit_session.c b/bsd/security/audit/audit_session.c index 639920f5e..7b6236eda 100644 --- a/bsd/security/audit/audit_session.c +++ b/bsd/security/audit/audit_session.c @@ -386,6 +386,8 @@ SYSCTL_QUAD(_audit_session, OID_AUTO, member_clear_sflags_mask, CTLFLAG_RW | CTL &audit_session_member_clear_sflags_mask, "Audit session flags clearable by a session member"); +extern int set_security_token_task_internal(proc_t p, void *task); + #define AUDIT_SESSION_DEBUG 0 #if AUDIT_SESSION_DEBUG /* @@ -1376,7 +1378,7 @@ done: } static int -audit_session_join_internal(proc_t p, ipc_port_t port, au_asid_t *new_asid) +audit_session_join_internal(proc_t p, task_t task, ipc_port_t port, au_asid_t *new_asid) { auditinfo_addr_t *new_aia_p, *old_aia_p; kauth_cred_t my_cred = NULL; @@ -1424,7 +1426,7 @@ audit_session_join_internal(proc_t p, ipc_port_t port, au_asid_t *new_asid) proc_ucred_unlock(p); /* Propagate the change from the process to the Mach task. */ - set_security_token(p); + set_security_token_task_internal(p, task); /* Decrement the process count of the former session. */ audit_dec_procount(AU_SENTRY_PTR(old_aia_p)); @@ -1450,11 +1452,11 @@ done: * ESRCH Invalid calling process/cred. */ int -audit_session_spawnjoin(proc_t p, ipc_port_t port) +audit_session_spawnjoin(proc_t p, task_t task, ipc_port_t port) { au_asid_t new_asid; - return (audit_session_join_internal(p, port, &new_asid)); + return (audit_session_join_internal(p, task, port, &new_asid)); } /* @@ -1488,7 +1490,7 @@ audit_session_join(proc_t p, struct audit_session_join_args *uap, *ret_asid = AU_DEFAUDITSID; err = EINVAL; } else - err = audit_session_join_internal(p, port, ret_asid); + err = audit_session_join_internal(p, p->task, port, ret_asid); return (err); } diff --git a/bsd/sys/codesign.h b/bsd/sys/codesign.h index ccd1c3aa7..ca225ce58 100644 --- a/bsd/sys/codesign.h +++ b/bsd/sys/codesign.h @@ -56,7 +56,8 @@ #define CS_PLATFORM_PATH 0x8000000 /* platform binary by the fact of path (osx only) */ #define CS_DEBUGGED 0x10000000 /* process is currently or has previously been debugged and allowed to run with invalid pages */ #define CS_SIGNED 0x20000000 /* process has a signature (may have gone invalid) */ - +#define CS_DEV_CODE 0x40000000 /* code is dev signed, cannot be loaded into prod signed code (will go away with rdar://problem/28322552) */ + #define CS_ENTITLEMENT_FLAGS (CS_GET_TASK_ALLOW | CS_INSTALLER) /* MAC flags used by F_ADDFILESIGS_* */ @@ -245,10 +246,12 @@ void csblob_entitlements_dictionary_set(struct cs_blob *csblob, void const char * csproc_get_teamid(struct proc *); const char * csvnode_get_teamid(struct vnode *, off_t); int csproc_get_platform_binary(struct proc *); +int csproc_get_prod_signed(struct proc *); const char * csfg_get_teamid(struct fileglob *); int csfg_get_path(struct fileglob *, char *, int *); int csfg_get_platform_binary(struct fileglob *); uint8_t * csfg_get_cdhash(struct fileglob *, uint64_t, size_t *); +int csfg_get_prod_signed(struct fileglob *); extern int cs_debug; diff --git a/bsd/sys/imgact.h b/bsd/sys/imgact.h index f20dfb2d4..fc23e70e9 100644 --- a/bsd/sys/imgact.h +++ b/bsd/sys/imgact.h @@ -133,5 +133,6 @@ struct image_params { #define IMGPF_DISABLE_ASLR 0x00000020 /* disable ASLR */ #define IMGPF_ALLOW_DATA_EXEC 0x00000040 /* forcibly disallow data execution */ #define IMGPF_VFORK_EXEC 0x00000080 /* vfork followed by exec */ +#define IMGPF_EXEC 0x00000100 /* exec */ #endif /* !_SYS_IMGACT */ diff --git a/bsd/sys/kdebug.h b/bsd/sys/kdebug.h index e44013e0a..42b8c92f1 100644 --- a/bsd/sys/kdebug.h +++ b/bsd/sys/kdebug.h @@ -572,6 +572,7 @@ extern void kdebug_reset(void); #define DBG_IOMDESC 6 /* Memory Descriptors */ #define DBG_IOPOWER 7 /* Power Managerment */ #define DBG_IOSERVICE 8 /* Matching etc. */ +#define DBG_IOREGISTRY 9 /* Registry */ /* **** 9-32 reserved for internal IOKit usage **** */ @@ -774,6 +775,7 @@ extern void kdebug_reset(void); #define DBG_APP_APPKIT 0x0C #define DBG_APP_DFR 0x0E #define DBG_APP_SAMBA 0x80 +#define DBG_APP_EOSSUPPORT 0x81 /* Kernel Debug codes for Throttling (DBG_THROTTLE) */ #define OPEN_THROTTLE_WINDOW 0x1 diff --git a/bsd/sys/proc_internal.h b/bsd/sys/proc_internal.h index 6c00e20e0..4048a73a7 100644 --- a/bsd/sys/proc_internal.h +++ b/bsd/sys/proc_internal.h @@ -434,7 +434,8 @@ struct proc { #define P_LIST_INPGRP 0x00020000 /* process is in pgrp */ #define P_LIST_PGRPTRANS 0x00040000 /* pgrp is getting replaced */ #define P_LIST_PGRPTRWAIT 0x00080000 /* wait for pgrp replacement */ -#define P_LIST_EXITCOUNT 0x00100000 /* counted for process exit */ +#define P_LIST_EXITCOUNT 0x00100000 /* counted for process exit */ +#define P_LIST_REFWAIT 0x00200000 /* wait to take a ref */ /* local flags */ @@ -459,11 +460,9 @@ struct proc { #define P_LLIMWAIT 0x00040000 #define P_LWAITED 0x00080000 #define P_LINSIGNAL 0x00100000 -#define P_LRETURNWAIT 0x00200000 /* process is completing spawn/vfork-exec/fork */ #define P_LRAGE_VNODES 0x00400000 #define P_LREGISTER 0x00800000 /* thread start fns registered */ #define P_LVMRSRCOWNER 0x01000000 /* can handle the resource ownership of */ -#define P_LRETURNWAITER 0x02000000 /* thread is waiting on P_LRETURNWAIT being cleared */ #define P_LTERM_DECRYPTFAIL 0x04000000 /* process terminating due to key failure to decrypt */ #define P_LTERM_JETSAM 0x08000000 /* process is being jetsam'd */ #define P_JETSAM_VMPAGESHORTAGE 0x00000000 /* jetsam: lowest jetsam priority proc, killed due to vm page shortage */ @@ -713,6 +712,8 @@ extern void proc_reparentlocked(struct proc *child, struct proc * newparent, int extern proc_t proc_findinternal(int pid, int locked); extern proc_t proc_findthread(thread_t thread); extern void proc_refdrain(proc_t); +extern proc_t proc_refdrain_with_refwait(proc_t p, boolean_t get_ref_and_allow_wait); +extern void proc_refwake(proc_t p); extern void proc_childdrainlocked(proc_t); extern void proc_childdrainstart(proc_t); extern void proc_childdrainend(proc_t); @@ -767,10 +768,6 @@ extern lck_mtx_t * pthread_list_mlock; #endif /* PSYNCH */ struct uthread * current_uthread(void); -void proc_set_return_wait(struct proc *); -void proc_clear_return_wait(proc_t p, thread_t child_thread); -void proc_wait_to_return(void); - /* process iteration */ #define ALLPROC_FOREACH(var) \ diff --git a/bsd/sys/ucred.h b/bsd/sys/ucred.h index 7dcd85e18..dae092dac 100644 --- a/bsd/sys/ucred.h +++ b/bsd/sys/ucred.h @@ -149,6 +149,7 @@ __BEGIN_DECLS int crcmp(kauth_cred_t cr1, kauth_cred_t cr2); int suser(kauth_cred_t cred, u_short *acflag); int set_security_token(struct proc * p); +int set_security_token_task_internal(struct proc *p, void *task); void cru2x(kauth_cred_t cr, struct xucred *xcr); __END_DECLS #endif /* __APPLE_API_OBSOLETE */ diff --git a/bsd/sys/vnode.h b/bsd/sys/vnode.h index 72da264c4..ee310f320 100644 --- a/bsd/sys/vnode.h +++ b/bsd/sys/vnode.h @@ -2021,8 +2021,10 @@ int vnode_getfromfd (vfs_context_t ctx, int fd, vnode_t *vpp); #ifdef BSD_KERNEL_PRIVATE /* Not in export list so can be private */ struct stat; -int vn_stat(struct vnode *vp, void * sb, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx); -int vn_stat_noauth(struct vnode *vp, void * sb, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx); +int vn_stat(struct vnode *vp, void * sb, kauth_filesec_t *xsec, int isstat64, + vfs_context_t ctx); +int vn_stat_noauth(struct vnode *vp, void * sb, kauth_filesec_t *xsec, int isstat64, + vfs_context_t ctx, struct ucred *file_cred); int vaccess(mode_t file_mode, uid_t uid, gid_t gid, mode_t acc_mode, kauth_cred_t cred); int check_mountedon(dev_t dev, enum vtype type, int *errorp); diff --git a/bsd/vfs/vfs_attrlist.c b/bsd/vfs/vfs_attrlist.c index 86eabd93d..65f1560c7 100644 --- a/bsd/vfs/vfs_attrlist.c +++ b/bsd/vfs/vfs_attrlist.c @@ -907,17 +907,18 @@ getvolattrlist(vfs_context_t ctx, vnode_t vp, struct attrlist *alp, } } -#if CONFIG_MACF - error = mac_mount_check_getattr(ctx, mnt, &vs); - if (error != 0) - goto out; -#endif VFS_DEBUG(ctx, vp, "ATTRLIST - calling to get %016llx with supported %016llx", vs.f_active, vs.f_supported); if ((error = vfs_getattr(mnt, &vs, ctx)) != 0) { VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: filesystem returned %d", error); goto out; } - +#if CONFIG_MACF + error = mac_mount_check_getattr(ctx, mnt, &vs); + if (error != 0) { + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: MAC framework returned %d", error); + goto out; + } +#endif /* * Did we ask for something the filesystem doesn't support? */ @@ -1017,7 +1018,13 @@ getvolattrlist(vfs_context_t ctx, vnode_t vp, struct attrlist *alp, VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: could not fetch attributes from root vnode", vp); goto out; } - +#if CONFIG_MACF + error = mac_vnode_check_getattr(ctx, NOCRED, vp, &va); + if (error != 0) { + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: MAC framework returned %d for root vnode", error); + goto out; + } +#endif if (VATTR_IS_ACTIVE(&va, va_encoding) && !VATTR_IS_SUPPORTED(&va, va_encoding)) { if (!return_valid || pack_invalid) @@ -2536,7 +2543,7 @@ out: static int getattrlist_internal(vfs_context_t ctx, vnode_t vp, struct attrlist *alp, user_addr_t attributeBuffer, size_t bufferSize, uint64_t options, - enum uio_seg segflg, char* alt_name) + enum uio_seg segflg, char* alt_name, struct ucred *file_cred) { struct vnode_attr va; kauth_action_t action; @@ -2638,7 +2645,6 @@ getattrlist_internal(vfs_context_t ctx, vnode_t vp, struct attrlist *alp, } - if (va.va_active != 0) { uint64_t va_active = va.va_active; @@ -2664,7 +2670,25 @@ getattrlist_internal(vfs_context_t ctx, vnode_t vp, struct attrlist *alp, VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: filesystem returned %d", error); goto out; } - +#if CONFIG_MACF + /* + * Give MAC polices a chance to reject or filter the + * attributes returned by the filesystem. Note that MAC + * policies are consulted *after* calling the filesystem + * because filesystems can return more attributes than + * were requested so policies wouldn't be authoritative + * is consulted beforehand. This also gives policies an + * opportunity to change the values of attributes + * retrieved. + */ + error = mac_vnode_check_getattr(ctx, file_cred, vp, &va); + if (error) { + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: MAC framework returned %d", error); + goto out; + } +#else + (void)file_cred; +#endif /* * If ATTR_CMN_NAME is not supported by filesystem and the @@ -2701,17 +2725,19 @@ fgetattrlist(proc_t p, struct fgetattrlist_args *uap, __unused int32_t *retval) vnode_t vp; int error; struct attrlist al; + struct fileproc *fp; ctx = vfs_context_current(); + vp = NULL; + fp = NULL; error = 0; if ((error = file_vnode(uap->fd, &vp)) != 0) return (error); - if ((error = vnode_getwithref(vp)) != 0) { - file_drop(uap->fd); - return(error); - } + if ((error = fp_lookup(p, uap->fd, &fp, 0)) != 0 || + (error = vnode_getwithref(vp)) != 0) + goto out; /* * Fetch the attribute request. @@ -2724,12 +2750,15 @@ fgetattrlist(proc_t p, struct fgetattrlist_args *uap, __unused int32_t *retval) error = getattrlist_internal(ctx, vp, &al, uap->attributeBuffer, uap->bufferSize, uap->options, (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : \ - UIO_USERSPACE32), NULL); + UIO_USERSPACE32), NULL, + fp->f_fglob->fg_cred); out: - file_drop(uap->fd); + if (fp) + fp_drop(p, uap->fd, fp, 0); if (vp) vnode_put(vp); + file_drop(uap->fd); return error; } @@ -2763,7 +2792,7 @@ getattrlistat_internal(vfs_context_t ctx, user_addr_t path, vp = nd.ni_vp; error = getattrlist_internal(ctx, vp, alp, attributeBuffer, - bufferSize, options, segflg, NULL); + bufferSize, options, segflg, NULL, NOCRED); /* Retain the namei reference until the getattrlist completes. */ nameidone(&nd); @@ -3276,7 +3305,8 @@ readdirattr(vnode_t dvp, struct fd_vn_data *fvd, uio_t auio, error = getattrlist_internal(ctx, vp, &al, CAST_USER_ADDR_T(kern_attr_buf), kern_attr_buf_siz, options | FSOPT_REPORT_FULLSIZE, UIO_SYSSPACE, - CAST_DOWN_EXPLICIT(char *, name_buffer)); + CAST_DOWN_EXPLICIT(char *, name_buffer), + NOCRED); nameidone(&nd); diff --git a/bsd/vfs/vfs_subr.c b/bsd/vfs/vfs_subr.c index 2b79b659c..2317401e6 100644 --- a/bsd/vfs/vfs_subr.c +++ b/bsd/vfs/vfs_subr.c @@ -5328,6 +5328,11 @@ vfs_update_vfsstat(mount_t mp, vfs_context_t ctx, __unused int eventtype) VFSATTR_WANTED(&va, f_ffree); VFSATTR_WANTED(&va, f_bsize); VFSATTR_WANTED(&va, f_fssubtype); + + if ((error = vfs_getattr(mp, &va, ctx)) != 0) { + KAUTH_DEBUG("STAT - filesystem returned error %d", error); + return(error); + } #if CONFIG_MACF if (eventtype == VFS_USER_EVENT) { error = mac_mount_check_getattr(ctx, mp, &va); @@ -5335,12 +5340,6 @@ vfs_update_vfsstat(mount_t mp, vfs_context_t ctx, __unused int eventtype) return (error); } #endif - - if ((error = vfs_getattr(mp, &va, ctx)) != 0) { - KAUTH_DEBUG("STAT - filesystem returned error %d", error); - return(error); - } - /* * Unpack into the per-mount structure. * diff --git a/bsd/vfs/vfs_vnops.c b/bsd/vfs/vfs_vnops.c index 1912d1179..27f61a448 100644 --- a/bsd/vfs/vfs_vnops.c +++ b/bsd/vfs/vfs_vnops.c @@ -1203,7 +1203,8 @@ error_out: * vnode_getattr:??? */ int -vn_stat_noauth(struct vnode *vp, void *sbptr, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx) +vn_stat_noauth(struct vnode *vp, void *sbptr, kauth_filesec_t *xsec, int isstat64, + vfs_context_t ctx, struct ucred *file_cred) { struct vnode_attr va; int error; @@ -1244,6 +1245,19 @@ vn_stat_noauth(struct vnode *vp, void *sbptr, kauth_filesec_t *xsec, int isstat6 error = vnode_getattr(vp, &va, ctx); if (error) goto out; +#if CONFIG_MACF + /* + * Give MAC polices a chance to reject or filter the attributes + * returned by the filesystem. Note that MAC policies are consulted + * *after* calling the filesystem because filesystems can return more + * attributes than were requested so policies wouldn't be authoritative + * is consulted beforehand. This also gives policies an opportunity + * to change the values of attributes retrieved. + */ + error = mac_vnode_check_getattr(ctx, file_cred, vp, &va); + if (error) + goto out; +#endif /* * Copy from vattr table */ @@ -1388,7 +1402,7 @@ vn_stat(struct vnode *vp, void *sb, kauth_filesec_t *xsec, int isstat64, vfs_con return(error); /* actual stat */ - return(vn_stat_noauth(vp, sb, xsec, isstat64, ctx)); + return(vn_stat_noauth(vp, sb, xsec, isstat64, ctx, NOCRED)); } diff --git a/config/MasterVersion b/config/MasterVersion index e239b6df5..4f558e748 100644 --- a/config/MasterVersion +++ b/config/MasterVersion @@ -1,4 +1,4 @@ -16.0.0 +16.1.0 # The first line of this file contains the master version number for the kernel. # All other instances of the kernel version in xnu are derived from this file. diff --git a/config/Private.exports b/config/Private.exports index 119402c9e..3df0d077a 100644 --- a/config/Private.exports +++ b/config/Private.exports @@ -112,9 +112,11 @@ _csblob_get_size _csfg_get_cdhash _csfg_get_path _csfg_get_platform_binary +_csfg_get_prod_signed _csfg_get_teamid _csproc_get_blob _csproc_get_platform_binary +_csproc_get_prod_signed _csproc_get_teamid _csvnode_get_blob _csvnode_get_teamid diff --git a/iokit/IOKit/IOTimeStamp.h b/iokit/IOKit/IOTimeStamp.h index ab589f0d0..1c7cf7e57 100644 --- a/iokit/IOKit/IOTimeStamp.h +++ b/iokit/IOKit/IOTimeStamp.h @@ -116,6 +116,7 @@ IOTimeStamp(uintptr_t csc, #define IODBG_MDESC(code) (KDBG_CODE(DBG_IOKIT, DBG_IOMDESC, code)) #define IODBG_POWER(code) (KDBG_CODE(DBG_IOKIT, DBG_IOPOWER, code)) #define IODBG_IOSERVICE(code) (KDBG_CODE(DBG_IOKIT, DBG_IOSERVICE, code)) +#define IODBG_IOREGISTRY(code) (KDBG_CODE(DBG_IOKIT, DBG_IOREGISTRY, code)) /* IOKit specific codes - within each subclass */ @@ -201,5 +202,8 @@ IOTimeStamp(uintptr_t csc, #define IOSERVICE_TERM_UC_DEFER 25 /* 0x05080064 */ #define IOSERVICE_DETACH 26 /* 0x05080068 */ +/* DBG_IOKIT/DBG_IOREGISTRY codes */ +#define IOREGISTRYENTRY_NAME_STRING 1 /* 0x05090004 */ +#define IOREGISTRYENTRY_NAME 2 /* 0x05090008 */ #endif /* ! IOKIT_IOTIMESTAMP_H */ diff --git a/iokit/IOKit/pwr_mgt/RootDomain.h b/iokit/IOKit/pwr_mgt/RootDomain.h index ff6bcb6fd..2a6f8dd01 100644 --- a/iokit/IOKit/pwr_mgt/RootDomain.h +++ b/iokit/IOKit/pwr_mgt/RootDomain.h @@ -636,6 +636,7 @@ private: bool uuidPublished; // Pref: idle time before idle sleep + bool idleSleepEnabled; unsigned long sleepSlider; unsigned long idleSeconds; uint64_t autoWakeStart; diff --git a/iokit/Kernel/IOCPU.cpp b/iokit/Kernel/IOCPU.cpp index 01354d7cf..fac8021d5 100644 --- a/iokit/Kernel/IOCPU.cpp +++ b/iokit/Kernel/IOCPU.cpp @@ -163,7 +163,7 @@ IOCPURunPlatformHaltRestartActions(uint32_t message) extern "C" kern_return_t IOCPURunPlatformPanicActions(uint32_t message) { - if (!gActionQueues[kQueueHaltRestart].next) return (kIOReturnNotReady); + if (!gActionQueues[kQueuePanic].next) return (kIOReturnNotReady); return (iocpu_run_platform_actions(&gActionQueues[kQueuePanic], 0, 0U-1, (void *)(uintptr_t) message, NULL, NULL)); } diff --git a/iokit/Kernel/IOHibernateRestoreKernel.c b/iokit/Kernel/IOHibernateRestoreKernel.c index 7c2d3931f..017d4d4f8 100644 --- a/iokit/Kernel/IOHibernateRestoreKernel.c +++ b/iokit/Kernel/IOHibernateRestoreKernel.c @@ -402,15 +402,15 @@ store_one_page(uint32_t procFlags, uint32_t * src, uint32_t compressedSize, { dst = pal_hib_map(DEST_COPY_AREA, dst); if (compressedSize != 4) WKdm_decompress_new((WK_word*) src, (WK_word*)(uintptr_t)dst, (WK_word*) &scratch[0], compressedSize); - else { - int i; - uint32_t *s, *d; - - s = src; - d = (uint32_t *)(uintptr_t)dst; + else + { + size_t i; + uint32_t s, *d; - for (i = 0; i < (int)(PAGE_SIZE / sizeof(int32_t)); i++) - *d++ = *s; + s = *src; + d = (uint32_t *)(uintptr_t)dst; + if (!s) bzero((void *) dst, PAGE_SIZE); + else for (i = 0; i < (PAGE_SIZE / sizeof(int32_t)); i++) *d++ = s; } } else diff --git a/iokit/Kernel/IOKitKernelInternal.h b/iokit/Kernel/IOKitKernelInternal.h index 8aed5f278..630b39fb5 100644 --- a/iokit/Kernel/IOKitKernelInternal.h +++ b/iokit/Kernel/IOKitKernelInternal.h @@ -44,7 +44,7 @@ __BEGIN_DECLS #if (KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD) #define IOServiceTrace(csc, a, b, c, d) do { \ - if(kIOTraceIOService & gIOKitDebug) { \ + if(kIOTraceIOService & gIOKitTrace) { \ KERNEL_DEBUG_CONSTANT(IODBG_IOSERVICE(csc), a, b, c, d, 0); \ } \ } while(0) diff --git a/iokit/Kernel/IOPMrootDomain.cpp b/iokit/Kernel/IOPMrootDomain.cpp index 6e9701b18..4ae1c904c 100644 --- a/iokit/Kernel/IOPMrootDomain.cpp +++ b/iokit/Kernel/IOPMrootDomain.cpp @@ -1075,6 +1075,7 @@ bool IOPMrootDomain::start( IOService * nub ) userDisabledAllSleep = false; systemBooting = true; + idleSleepEnabled = false; sleepSlider = 0; idleSleepTimerPending = false; wrangler = NULL; @@ -2065,7 +2066,7 @@ uint32_t IOPMrootDomain::getTimeToIdleSleep( void ) uint32_t minutesSinceUserInactive = 0; uint32_t sleepDelay = 0; - if (sleepSlider == 0) + if (!idleSleepEnabled) return 0xffffffff; if (userActivityTime) @@ -2777,7 +2778,7 @@ void IOPMrootDomain::systemDidNotSleep( void ) if (!wrangler) { - if (idleSeconds) + if (idleSleepEnabled) { // stay awake for at least idleSeconds startIdleSleepTimer(idleSeconds); @@ -2785,7 +2786,7 @@ void IOPMrootDomain::systemDidNotSleep( void ) } else { - if (sleepSlider && !userIsActive) + if (idleSleepEnabled && !userIsActive) { // Manually start the idle sleep timer besides waiting for // the user to become inactive. @@ -4342,7 +4343,6 @@ void IOPMrootDomain::evaluateSystemSleepPolicyFinal( void ) bcopy(&gEarlySystemSleepParams, ¶ms, sizeof(params)); params.sleepType = kIOPMSleepTypeAbortedSleep; params.ecWakeTimer = 1; - gIOHibernateMode = 0; if (standbyNixed) { resetTimers = true; @@ -5260,6 +5260,12 @@ void IOPMrootDomain::handleOurPowerChangeDone( if (lowBatteryCondition) { privateSleepSystem (kIOPMSleepReasonLowPower); } + else if ((fullWakeReason == kFullWakeReasonDisplayOn) && (!displayPowerOnRequested)) { + // Request for full wake is removed while system is waking up to full wake + DLOG("DisplayOn fullwake request is removed\n"); + handleDisplayPowerOn(); + } + } } @@ -5489,6 +5495,13 @@ void IOPMrootDomain::handleUpdatePowerClientForDisplayWrangler( evaluatePolicy( kStimulusLeaveUserActiveState ); } } + + if (newPowerState <= kWranglerPowerStateSleep) { + evaluatePolicy( kStimulusDisplayWranglerSleep ); + } + else if (newPowerState == kWranglerPowerStateMax) { + evaluatePolicy( kStimulusDisplayWranglerWake ); + } #endif } @@ -6231,12 +6244,12 @@ bool IOPMrootDomain::checkSystemCanSustainFullWake( void ) void IOPMrootDomain::adjustPowerState( bool sleepASAP ) { - DLOG("adjustPowerState ps %u, asap %d, slider %ld\n", - (uint32_t) getPowerState(), sleepASAP, sleepSlider); + DLOG("adjustPowerState ps %u, asap %d, idleSleepEnabled %d\n", + (uint32_t) getPowerState(), sleepASAP, idleSleepEnabled); ASSERT_GATED(); - if ((sleepSlider == 0) || !checkSystemSleepEnabled()) + if ((!idleSleepEnabled) || !checkSystemSleepEnabled()) { changePowerStateToPriv(ON_STATE); } @@ -6807,7 +6820,6 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) if (!wranglerAsleep) { // first transition to wrangler sleep or lower - wranglerAsleep = true; flags.bit.displaySleep = true; } break; @@ -6869,8 +6881,6 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) DLOG("idle time -> %ld secs (ena %d)\n", idleSeconds, (minutesToIdleSleep != 0)); - if (0x7fffffff == minutesToIdleSleep) - minutesToIdleSleep = idleSeconds; // How long to wait before sleeping the system once // the displays turns off is indicated by 'extraSleepDelay'. @@ -6880,11 +6890,15 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) else if ( minutesToIdleSleep == minutesToDisplayDim ) minutesDelta = 1; - if ((sleepSlider == 0) && (minutesToIdleSleep != 0)) - flags.bit.idleSleepEnabled = true; + if ((!idleSleepEnabled) && (minutesToIdleSleep != 0)) + idleSleepEnabled = flags.bit.idleSleepEnabled = true; - if ((sleepSlider != 0) && (minutesToIdleSleep == 0)) + if ((idleSleepEnabled) && (minutesToIdleSleep == 0)) { flags.bit.idleSleepDisabled = true; + idleSleepEnabled = false; + } + if (0x7fffffff == minutesToIdleSleep) + minutesToIdleSleep = idleSeconds; if (((minutesDelta != extraSleepDelay) || (userActivityTime != userActivityTime_prev)) && @@ -7064,7 +7078,7 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) DLOG("user inactive\n"); } - if (!userIsActive && sleepSlider) + if (!userIsActive && idleSleepEnabled) { startIdleSleepTimer(getTimeToIdleSleep()); } @@ -7079,10 +7093,7 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) if (!wrangler) { changePowerStateToPriv(ON_STATE); - if (idleSeconds) - { - startIdleSleepTimer( idleSeconds ); - } + startIdleSleepTimer( idleSeconds ); } else { @@ -7114,7 +7125,7 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) if (!wrangler) { changePowerStateToPriv(ON_STATE); - if (idleSeconds) + if (idleSleepEnabled) { // stay awake for at least idleSeconds startIdleSleepTimer(idleSeconds); diff --git a/iokit/Kernel/IOPlatformExpert.cpp b/iokit/Kernel/IOPlatformExpert.cpp index f3cfd8b0e..7dff66444 100644 --- a/iokit/Kernel/IOPlatformExpert.cpp +++ b/iokit/Kernel/IOPlatformExpert.cpp @@ -822,7 +822,8 @@ int PEHaltRestart(unsigned int type) } else if(type == kPEPanicRestartCPU || type == kPEPanicSync) { - IOCPURunPlatformPanicActions(type); + IOCPURunPlatformPanicActions(type); + PE_sync_panic_buffers(); } if (gIOPlatform) return gIOPlatform->haltRestart(type); diff --git a/iokit/Kernel/IORegistryEntry.cpp b/iokit/Kernel/IORegistryEntry.cpp index 7af40d293..f07b42318 100644 --- a/iokit/Kernel/IORegistryEntry.cpp +++ b/iokit/Kernel/IORegistryEntry.cpp @@ -30,11 +30,14 @@ #include #include #include +#include #include #include +#include "IOKitKernelInternal.h" + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #define super OSObject @@ -821,6 +824,19 @@ void IORegistryEntry::setName( const OSSymbol * name, else key = gIONameKey; + if (gIOKitTrace && reserved && reserved->fRegistryEntryID) + { + uint64_t str_id = 0; + uint64_t __unused regID = getRegistryEntryID(); + kernel_debug_string(IODBG_IOREGISTRY(IOREGISTRYENTRY_NAME_STRING), &str_id, name->getCStringNoCopy()); + KERNEL_DEBUG_CONSTANT(IODBG_IOREGISTRY(IOREGISTRYENTRY_NAME), + (uintptr_t) regID, + (uintptr_t) (regID >> 32), + (uintptr_t) str_id, + (uintptr_t) (str_id >> 32), + 0); + } + WLOCK; registryTable()->setObject( key, (OSObject *) name); UNLOCK; @@ -1668,6 +1684,7 @@ bool IORegistryEntry::attachToParent( IORegistryEntry * parent, OSArray * links; bool ret; bool needParent; + bool traceName = false; if( this == parent) return( false ); @@ -1675,7 +1692,10 @@ bool IORegistryEntry::attachToParent( IORegistryEntry * parent, WLOCK; if (!reserved->fRegistryEntryID) + { reserved->fRegistryEntryID = ++gIORegistryLastID; + traceName = (0 != gIOKitTrace); + } ret = makeLink( parent, kParentSetIndex, plane ); @@ -1686,6 +1706,19 @@ bool IORegistryEntry::attachToParent( IORegistryEntry * parent, UNLOCK; + if (traceName) + { + uint64_t str_id = 0; + uint64_t __unused regID = getRegistryEntryID(); + kernel_debug_string(IODBG_IOREGISTRY(IOREGISTRYENTRY_NAME_STRING), &str_id, getName()); + KERNEL_DEBUG_CONSTANT(IODBG_IOREGISTRY(IOREGISTRYENTRY_NAME), + (uintptr_t) regID, + (uintptr_t) (regID >> 32), + (uintptr_t) str_id, + (uintptr_t) (str_id >> 32), + 0); + } + PLOCK; // Mark any collections in the property list as immutable diff --git a/iokit/Kernel/IOServicePM.cpp b/iokit/Kernel/IOServicePM.cpp index 25021b3d0..e7fbb0802 100644 --- a/iokit/Kernel/IOServicePM.cpp +++ b/iokit/Kernel/IOServicePM.cpp @@ -188,11 +188,31 @@ do { \ #define kIOPMTardyAckPSCKey "IOPMTardyAckPowerStateChange" #define kPwrMgtKey "IOPowerManagement" -#define OUR_PMLog(t, a, b) do { \ - if (gIOKitDebug & kIOLogPower) \ - pwrMgt->pmPrint(t, a, b); \ - if (gIOKitTrace & kIOTracePowerMgmt) \ - pwrMgt->pmTrace(t, a, b); \ +#define OUR_PMLog(t, a, b) do { \ + if (pwrMgt) { \ + if (gIOKitDebug & kIOLogPower) \ + pwrMgt->pmPrint(t, a, b); \ + if (gIOKitTrace & kIOTracePowerMgmt) \ + pwrMgt->pmTrace(t, DBG_FUNC_NONE, a, b); \ + } \ + } while(0) + +#define OUR_PMLogFuncStart(t, a, b) do { \ + if (pwrMgt) { \ + if (gIOKitDebug & kIOLogPower) \ + pwrMgt->pmPrint(t, a, b); \ + if (gIOKitTrace & kIOTracePowerMgmt) \ + pwrMgt->pmTrace(t, DBG_FUNC_START, a, b); \ + } \ + } while(0) + +#define OUR_PMLogFuncEnd(t, a, b) do { \ + if (pwrMgt) { \ + if (gIOKitDebug & kIOLogPower) \ + pwrMgt->pmPrint(-t, a, b); \ + if (gIOKitTrace & kIOTracePowerMgmt) \ + pwrMgt->pmTrace(t, DBG_FUNC_END, a, b); \ + } \ } while(0) #define NS_TO_MS(nsec) ((int)((nsec) / 1000000ULL)) @@ -3961,13 +3981,13 @@ void IOService::driverSetPowerState( void ) if (assertPMDriverCall(&callEntry)) { - OUR_PMLog( kPMLogProgramHardware, (uintptr_t) this, powerState); + OUR_PMLogFuncStart(kPMLogProgramHardware, (uintptr_t) this, powerState); start_spindump_timer("SetState"); clock_get_uptime(&fDriverCallStartTime); result = fControllingDriver->setPowerState( powerState, this ); clock_get_uptime(&end); stop_spindump_timer(); - OUR_PMLog((UInt32) -kPMLogProgramHardware, (uintptr_t) this, (UInt32) result); + OUR_PMLogFuncEnd(kPMLogProgramHardware, (uintptr_t) this, (UInt32) result); deassertPMDriverCall(&callEntry); @@ -4043,23 +4063,23 @@ void IOService::driverInformPowerChange( void ) { if (fDriverCallReason == kDriverCallInformPreChange) { - OUR_PMLog(kPMLogInformDriverPreChange, (uintptr_t) this, powerState); + OUR_PMLogFuncStart(kPMLogInformDriverPreChange, (uintptr_t) this, powerState); start_spindump_timer("WillChange"); clock_get_uptime(&informee->startTime); result = driver->powerStateWillChangeTo(powerFlags, powerState, this); clock_get_uptime(&end); stop_spindump_timer(); - OUR_PMLog((UInt32)-kPMLogInformDriverPreChange, (uintptr_t) this, result); + OUR_PMLogFuncEnd(kPMLogInformDriverPreChange, (uintptr_t) this, result); } else { - OUR_PMLog(kPMLogInformDriverPostChange, (uintptr_t) this, powerState); + OUR_PMLogFuncStart(kPMLogInformDriverPostChange, (uintptr_t) this, powerState); start_spindump_timer("DidChange"); clock_get_uptime(&informee->startTime); result = driver->powerStateDidChangeTo(powerFlags, powerState, this); clock_get_uptime(&end); stop_spindump_timer(); - OUR_PMLog((UInt32)-kPMLogInformDriverPostChange, (uintptr_t) this, result); + OUR_PMLogFuncEnd(kPMLogInformDriverPostChange, (uintptr_t) this, result); } deassertPMDriverCall(&callEntry); @@ -8820,38 +8840,19 @@ void IOServicePM::pmPrint( void IOServicePM::pmTrace( uint32_t event, + uint32_t eventFunc, uintptr_t param1, uintptr_t param2 ) const { - const char * who = Name; - uint64_t regId = Owner->getRegistryEntryID(); - uintptr_t name = 0; - - static const uint32_t sStartStopBitField[] = - { 0x00000000, 0x00000040 }; // Only Program Hardware so far + uintptr_t nameAsArg = 0; - // Arcane formula from Hacker's Delight by Warren - // abs(x) = ((int) x >> 31) ^ (x + ((int) x >> 31)) - uint32_t sgnevent = ((int) event >> 31); - uint32_t absevent = sgnevent ^ (event + sgnevent); - uint32_t code = IODBG_POWER(absevent); + assert(event < KDBG_CODE_MAX); + assert((eventFunc & ~KDBG_FUNC_MASK) == 0); - uint32_t bit = 1 << (absevent & 0x1f); - if ((absevent < (sizeof(sStartStopBitField) * 8)) && - (sStartStopBitField[absevent >> 5] & bit)) - { - // Or in the START or END bits, Start = 1 & END = 2 - // If sgnevent == 0 then START - 0 => START - // else if sgnevent == -1 then START - -1 => END - code |= DBG_FUNC_START - sgnevent; - } - - // Copy the first characters of the name into an uintptr_t - for (uint32_t i = 0; (i < sizeof(uintptr_t) && who[i] != 0); i++) - { - ((char *) &name)[sizeof(uintptr_t) - i - 1] = who[i]; - } + // Copy the first characters of the name into an uintptr_t. + // NULL termination is not required. + strncpy((char*)&nameAsArg, Name, sizeof(nameAsArg)); - IOTimeStampConstant(code, name, (uintptr_t) regId, (uintptr_t)(OBFUSCATE(param1)), (uintptr_t)(OBFUSCATE(param2))); + IOTimeStampConstant(IODBG_POWER(event) | eventFunc, nameAsArg, (uintptr_t)Owner->getRegistryEntryID(), (uintptr_t)(OBFUSCATE(param1)), (uintptr_t)(OBFUSCATE(param2))); } diff --git a/iokit/Kernel/IOServicePMPrivate.h b/iokit/Kernel/IOServicePMPrivate.h index 9fa90b563..f332c23ee 100644 --- a/iokit/Kernel/IOServicePMPrivate.h +++ b/iokit/Kernel/IOServicePMPrivate.h @@ -349,7 +349,7 @@ private: // PM log and trace void pmPrint( uint32_t event, uintptr_t param1, uintptr_t param2 ) const; - void pmTrace( uint32_t event, uintptr_t param1, uintptr_t param2 ) const; + void pmTrace( uint32_t event, uint32_t eventFunc, uintptr_t param1, uintptr_t param2 ) const; }; #define fOwner pwrMgt->Owner diff --git a/iokit/Kernel/IOUserClient.cpp b/iokit/Kernel/IOUserClient.cpp index 6cd4737c5..e2b671727 100644 --- a/iokit/Kernel/IOUserClient.cpp +++ b/iokit/Kernel/IOUserClient.cpp @@ -2932,6 +2932,7 @@ kern_return_t is_io_registry_entry_get_property_bytes( } else if( (off = OSDynamicCast( OSNumber, obj ))) { offsetBytes = off->unsigned64BitValue(); len = off->numberOfBytes(); + if (len > sizeof(offsetBytes)) len = sizeof(offsetBytes); bytes = &offsetBytes; #ifdef __BIG_ENDIAN__ bytes = (const void *) diff --git a/libkdd/kcdata.h b/libkdd/kcdata.h deleted file mode 100644 index 3e1c76d31..000000000 --- a/libkdd/kcdata.h +++ /dev/null @@ -1,1061 +0,0 @@ -/* - * Copyright (c) 2015 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - - -/* - * - * THE KCDATA MANIFESTO - * - * Kcdata is a self-describing data serialization format. It is meant to get - * nested data structures out of xnu with minimum fuss, but also for that data - * to be easy to parse. It is also meant to allow us to add new fields and - * evolve the data format without breaking old parsers. - * - * Kcdata is a permanent data format suitable for long-term storage including - * in files. It is very important that we continue to be able to parse old - * versions of kcdata-based formats. To this end, there are several - * invariants you MUST MAINTAIN if you alter this file. - * - * * None of the magic numbers should ever be a byteswap of themselves or - * of any of the other magic numbers. - * - * * Never remove any type. - * - * * All kcdata structs must be packed, and must exclusively use fixed-size - * types. - * - * * Never change the definition of any type, except to add new fields to - * the end. - * - * * If you do add new fields to the end of a type, do not actually change - * the definition of the old structure. Instead, define a new structure - * with the new fields. See thread_snapshot_v3 as an example. This - * provides source compatibility for old readers, and also documents where - * the potential size cutoffs are. - * - * * If you change libkdd, or kcdata.py run the unit tests under libkdd. - * - * * If you add a type or extend an existing one, add a sample test to - * libkdd/tests so future changes to libkdd will always parse your struct - * correctly. - * - * For example to add a field to this: - * - * struct foobar { - * uint32_t baz; - * uint32_t quux; - * } __attribute__ ((packed)); - * - * Make it look like this: - * - * struct foobar { - * uint32_t baz; - * uint32_t quux; - * ///////// end version 1 of foobar. sizeof(struct foobar) was 8 //////// - * uint32_t frozzle; - * } __attribute__ ((packed)); - * - * If you are parsing kcdata formats, you MUST - * - * * Check the length field of each struct, including array elements. If the - * struct is longer than you expect, you must ignore the extra data. - * - * * Ignore any data types you do not understand. - * - * Additionally, we want to be as forward compatible as we can. Meaning old - * tools should still be able to use new data whenever possible. To this end, - * you should: - * - * * Try not to add new versions of types that supplant old ones. Instead - * extend the length of existing types or add supplemental types. - * - * * Try not to remove information from existing kcdata formats, unless - * removal was explicitly asked for. For example it is fine to add a - * stackshot flag to remove unwanted information, but you should not - * remove it from the default stackshot if the new flag is absent. - * - * * (TBD) If you do break old readers by removing information or - * supplanting old structs, then increase the major version number. - * - * - * - * The following is a description of the kcdata format. - * - * - * The format for data is setup in a generic format as follows - * - * Layout of data structure: - * - * | 8 - bytes | - * | type = MAGIC | LENGTH | - * | 0 | - * | type | size | - * | flags | - * | data | - * |___________data____________| - * | type | size | - * | flags | - * |___________data____________| - * | type = END | size=0 | - * | 0 | - * - * - * The type field describes what kind of data is passed. For example type = TASK_CRASHINFO_UUID means the following data is a uuid. - * These types need to be defined in task_corpses.h for easy consumption by userspace inspection tools. - * - * Some range of types is reserved for special types like ints, longs etc. A cool new functionality made possible with this - * extensible data format is that kernel can decide to put more information as required without requiring user space tools to - * re-compile to be compatible. The case of rusage struct versions could be introduced without breaking existing tools. - * - * Feature description: Generic data with description - * ------------------- - * Further more generic data with description is very much possible now. For example - * - * - kcdata_add_uint64_with_description(cdatainfo, 0x700, "NUM MACH PORTS"); - * - and more functions that allow adding description. - * The userspace tools can then look at the description and print the data even if they are not compiled with knowledge of the field apriori. - * - * Example data: - * 0000 57 f1 ad de 00 00 00 00 00 00 00 00 00 00 00 00 W............... - * 0010 01 00 00 00 00 00 00 00 30 00 00 00 00 00 00 00 ........0....... - * 0020 50 49 44 00 00 00 00 00 00 00 00 00 00 00 00 00 PID............. - * 0030 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ - * 0040 9c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ - * 0050 01 00 00 00 00 00 00 00 30 00 00 00 00 00 00 00 ........0....... - * 0060 50 41 52 45 4e 54 20 50 49 44 00 00 00 00 00 00 PARENT PID...... - * 0070 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ - * 0080 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ - * 0090 ed 58 91 f1 - * - * Feature description: Container markers for compound data - * ------------------ - * If a given kernel data type is complex and requires adding multiple optional fields inside a container - * object for a consumer to understand arbitrary data, we package it using container markers. - * - * For example, the stackshot code gathers information and describes the state of a given task with respect - * to many subsystems. It includes data such as io stats, vm counters, process names/flags and syscall counts. - * - * kcdata_add_container_marker(kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN, STACKSHOT_KCCONTAINER_TASK, task_uniqueid); - * // add multiple data, or add__with_description()s here - * - * kcdata_add_container_marker(kcdata_p, KCDATA_TYPE_CONTAINER_END, STACKSHOT_KCCONTAINER_TASK, task_uniqueid); - * - * Feature description: Custom Data formats on demand - * -------------------- - * With the self describing nature of format, the kernel provider can describe a data type (uniquely identified by a number) and use - * it in the buffer for sending data. The consumer can parse the type information and have knowledge of describing incoming data. - * Following is an example of how we can describe a kernel specific struct sample_disk_io_stats in buffer. - * - * struct sample_disk_io_stats { - * uint64_t disk_reads_count; - * uint64_t disk_reads_size; - * uint64_t io_priority_count[4]; - * uint64_t io_priority_size; - * } __attribute__ ((packed)); - * - * - * struct kcdata_subtype_descriptor disk_io_stats_def[] = { - * {KCS_SUBTYPE_FLAGS_NONE, KC_ST_UINT64, 0 * sizeof(uint64_t), sizeof(uint64_t), "disk_reads_count"}, - * {KCS_SUBTYPE_FLAGS_NONE, KC_ST_UINT64, 1 * sizeof(uint64_t), sizeof(uint64_t), "disk_reads_size"}, - * {KCS_SUBTYPE_FLAGS_ARRAY, KC_ST_UINT64, 2 * sizeof(uint64_t), KCS_SUBTYPE_PACK_SIZE(4, sizeof(uint64_t)), "io_priority_count"}, - * {KCS_SUBTYPE_FLAGS_ARRAY, KC_ST_UINT64, (2 + 4) * sizeof(uint64_t), sizeof(uint64_t), "io_priority_size"}, - * }; - * - * Now you can add this custom type definition into the buffer as - * kcdata_add_type_definition(kcdata_p, KCTYPE_SAMPLE_DISK_IO_STATS, "sample_disk_io_stats", - * &disk_io_stats_def[0], sizeof(disk_io_stats_def)/sizeof(struct kcdata_subtype_descriptor)); - * - */ - - -#ifndef _KCDATA_H_ -#define _KCDATA_H_ - -#include -#include -#include - -#define KCDATA_DESC_MAXLEN 32 /* including NULL byte at end */ - -#define KCDATA_FLAGS_STRUCT_PADDING_MASK 0xf -#define KCDATA_FLAGS_STRUCT_HAS_PADDING 0x80 - -/* - * kcdata aligns elements to 16 byte boundaries. - */ -#define KCDATA_ALIGNMENT_SIZE 0x10 - -struct kcdata_item { - uint32_t type; - uint32_t size; /* len(data) */ - /* flags. - * - * For structures: - * padding = flags & 0xf - * has_padding = (flags & 0x80) >> 7 - * - * has_padding is needed to disambiguate cases such as - * thread_snapshot_v2 and thread_snapshot_v3. Their - * respective sizes are 0x68 and 0x70, and thread_snapshot_v2 - * was emmitted by old kernels *before* we started recording - * padding. Since legacy thread_snapsht_v2 and modern - * thread_snapshot_v3 will both record 0 for the padding - * flags, we need some other bit which will be nonzero in the - * flags to disambiguate. - * - * This is why we hardcode a special case for - * STACKSHOT_KCTYPE_THREAD_SNAPSHOT into the iterator - * functions below. There is only a finite number of such - * hardcodings which will ever be needed. They can occur - * when: - * - * * We have a legacy structure that predates padding flags - * - * * which we want to extend without changing the kcdata type - * - * * by only so many bytes as would fit in the space that - * was previously unused padding. - * - * For containers: - * container_id = flags - * - * For arrays: - * element_count = flags & UINT32_MAX - * element_type = (flags >> 32) & UINT32_MAX - */ - uint64_t flags; - char data[]; /* must be at the end */ -}; - -typedef struct kcdata_item * kcdata_item_t; - -enum KCDATA_SUBTYPE_TYPES { KC_ST_CHAR = 1, KC_ST_INT8, KC_ST_UINT8, KC_ST_INT16, KC_ST_UINT16, KC_ST_INT32, KC_ST_UINT32, KC_ST_INT64, KC_ST_UINT64 }; -typedef enum KCDATA_SUBTYPE_TYPES kctype_subtype_t; - -/* - * A subtype description structure that defines - * how a compound data is laid out in memory. This - * provides on the fly definition of types and consumption - * by the parser. - */ -struct kcdata_subtype_descriptor { - uint8_t kcs_flags; -#define KCS_SUBTYPE_FLAGS_NONE 0x0 -#define KCS_SUBTYPE_FLAGS_ARRAY 0x1 -/* Force struct type even if only one element. - * - * Normally a kcdata_type_definition is treated as a structure if it has - * more than one subtype descriptor. Otherwise it is treated as a simple - * type. For example libkdd will represent a simple integer 42 as simply - * 42, but it will represent a structure containing an integer 42 as - * {"field_name": 42}.. - * - * If a kcdata_type_definition has only single subtype, then it will be - * treated as a structure iff KCS_SUBTYPE_FLAGS_STRUCT is set. If it has - * multiple subtypes, it will always be treated as a structure. - * - * KCS_SUBTYPE_FLAGS_MERGE has the opposite effect. If this flag is used then - * even if there are multiple elements, they will all be treated as individual - * properties of the parent dictionary. - */ -#define KCS_SUBTYPE_FLAGS_STRUCT 0x2 /* force struct type even if only one element */ -#define KCS_SUBTYPE_FLAGS_MERGE 0x4 /* treat as multiple elements of parents instead of struct */ - uint8_t kcs_elem_type; /* restricted to kctype_subtype_t */ - uint16_t kcs_elem_offset; /* offset in struct where data is found */ - uint32_t kcs_elem_size; /* size of element (or) packed state for array type */ - char kcs_name[KCDATA_DESC_MAXLEN]; /* max 31 bytes for name of field */ -}; - -typedef struct kcdata_subtype_descriptor * kcdata_subtype_descriptor_t; - -/* - * In case of array of basic c types in kctype_subtype_t, - * size is packed in lower 16 bits and - * count is packed in upper 16 bits of kcs_elem_size field. - */ -#define KCS_SUBTYPE_PACK_SIZE(e_count, e_size) (((e_count)&0xffffu) << 16 | ((e_size)&0xffffu)) - -static inline uint32_t -kcs_get_elem_size(kcdata_subtype_descriptor_t d) -{ - if (d->kcs_flags & KCS_SUBTYPE_FLAGS_ARRAY) { - /* size is composed as ((count &0xffff)<<16 | (elem_size & 0xffff)) */ - return (uint32_t)((d->kcs_elem_size & 0xffff) * ((d->kcs_elem_size & 0xffff0000)>>16)); - } - return d->kcs_elem_size; -} - -static inline uint32_t -kcs_get_elem_count(kcdata_subtype_descriptor_t d) -{ - if (d->kcs_flags & KCS_SUBTYPE_FLAGS_ARRAY) - return (d->kcs_elem_size >> 16) & 0xffff; - return 1; -} - -static inline int -kcs_set_elem_size(kcdata_subtype_descriptor_t d, uint32_t size, uint32_t count) -{ - if (count > 1) { - /* means we are setting up an array */ - if (size > 0xffff || count > 0xffff) - return -1; //invalid argument - d->kcs_elem_size = ((count & 0xffff) << 16 | (size & 0xffff)); - } - else - { - d->kcs_elem_size = size; - } - return 0; -} - -struct kcdata_type_definition { - uint32_t kct_type_identifier; - uint32_t kct_num_elements; - char kct_name[KCDATA_DESC_MAXLEN]; - struct kcdata_subtype_descriptor kct_elements[]; -}; - - -/* chunk type definitions. 0 - 0x7ff are reserved and defined here - * NOTE: Please update kcdata/libkdd/kcdtypes.c if you make any changes - * in STACKSHOT_KCTYPE_* types. - */ - -/* - * Types with description value. - * these will have KCDATA_DESC_MAXLEN-1 length string description - * and rest of kcdata_iter_size() - KCDATA_DESC_MAXLEN bytes as data - */ -#define KCDATA_TYPE_INVALID 0x0u -#define KCDATA_TYPE_STRING_DESC 0x1u -#define KCDATA_TYPE_UINT32_DESC 0x2u -#define KCDATA_TYPE_UINT64_DESC 0x3u -#define KCDATA_TYPE_INT32_DESC 0x4u -#define KCDATA_TYPE_INT64_DESC 0x5u -#define KCDATA_TYPE_BINDATA_DESC 0x6u - -/* - * Compound type definitions - */ -#define KCDATA_TYPE_ARRAY 0x11u /* Array of data OBSOLETE DONT USE THIS*/ -#define KCDATA_TYPE_TYPEDEFINTION 0x12u /* Meta type that describes a type on the fly. */ -#define KCDATA_TYPE_CONTAINER_BEGIN \ - 0x13u /* Container type which has corresponding CONTAINER_END header. \ - * KCDATA_TYPE_CONTAINER_BEGIN has type in the data segment. \ - * Both headers have (uint64_t) ID for matching up nested data. \ - */ -#define KCDATA_TYPE_CONTAINER_END 0x14u - -#define KCDATA_TYPE_ARRAY_PAD0 0x20u /* Array of data with 0 byte of padding*/ -#define KCDATA_TYPE_ARRAY_PAD1 0x21u /* Array of data with 1 byte of padding*/ -#define KCDATA_TYPE_ARRAY_PAD2 0x22u /* Array of data with 2 byte of padding*/ -#define KCDATA_TYPE_ARRAY_PAD3 0x23u /* Array of data with 3 byte of padding*/ -#define KCDATA_TYPE_ARRAY_PAD4 0x24u /* Array of data with 4 byte of padding*/ -#define KCDATA_TYPE_ARRAY_PAD5 0x25u /* Array of data with 5 byte of padding*/ -#define KCDATA_TYPE_ARRAY_PAD6 0x26u /* Array of data with 6 byte of padding*/ -#define KCDATA_TYPE_ARRAY_PAD7 0x27u /* Array of data with 7 byte of padding*/ -#define KCDATA_TYPE_ARRAY_PAD8 0x28u /* Array of data with 8 byte of padding*/ -#define KCDATA_TYPE_ARRAY_PAD9 0x29u /* Array of data with 9 byte of padding*/ -#define KCDATA_TYPE_ARRAY_PADa 0x2au /* Array of data with a byte of padding*/ -#define KCDATA_TYPE_ARRAY_PADb 0x2bu /* Array of data with b byte of padding*/ -#define KCDATA_TYPE_ARRAY_PADc 0x2cu /* Array of data with c byte of padding*/ -#define KCDATA_TYPE_ARRAY_PADd 0x2du /* Array of data with d byte of padding*/ -#define KCDATA_TYPE_ARRAY_PADe 0x2eu /* Array of data with e byte of padding*/ -#define KCDATA_TYPE_ARRAY_PADf 0x2fu /* Array of data with f byte of padding*/ - -/* - * Generic data types that are most commonly used - */ -#define KCDATA_TYPE_LIBRARY_LOADINFO 0x30u /* struct dyld_uuid_info_32 */ -#define KCDATA_TYPE_LIBRARY_LOADINFO64 0x31u /* struct dyld_uuid_info_64 */ -#define KCDATA_TYPE_TIMEBASE 0x32u /* struct mach_timebase_info */ -#define KCDATA_TYPE_MACH_ABSOLUTE_TIME 0x33u /* uint64_t */ -#define KCDATA_TYPE_TIMEVAL 0x34u /* struct timeval64 */ -#define KCDATA_TYPE_USECS_SINCE_EPOCH 0x35u /* time in usecs uint64_t */ -#define KCDATA_TYPE_PID 0x36u /* int32_t */ -#define KCDATA_TYPE_PROCNAME 0x37u /* char * */ -#define KCDATA_TYPE_NESTED_KCDATA 0x38u /* nested kcdata buffer */ - -#define KCDATA_TYPE_BUFFER_END 0xF19158EDu - -/* MAGIC numbers defined for each class of chunked data - * - * To future-proof against big-endian arches, make sure none of these magic - * numbers are byteswaps of each other - */ - -#define KCDATA_BUFFER_BEGIN_CRASHINFO 0xDEADF157u /* owner: corpses/task_corpse.h */ - /* type-range: 0x800 - 0x8ff */ -#define KCDATA_BUFFER_BEGIN_STACKSHOT 0x59a25807u /* owner: sys/stackshot.h */ - /* type-range: 0x900 - 0x93f */ -#define KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT 0xDE17A59Au /* owner: sys/stackshot.h */ - /* type-range: 0x940 - 0x9ff */ -#define KCDATA_BUFFER_BEGIN_OS_REASON 0x53A20900u /* owner: sys/reason.h */ - /* type-range: 0x1000-0x103f */ -#define KCDATA_BUFFER_BEGIN_XNUPOST_CONFIG 0x1e21c09fu /* owner: osfmk/tests/kernel_tests.c */ - /* type-range: 0x1040-0x105f */ - -/* next type range number available 0x1060 */ -/**************** definitions for XNUPOST *********************/ -#define XNUPOST_KCTYPE_TESTCONFIG 0x1040 - -/**************** definitions for stackshot *********************/ - -/* This value must always match IO_NUM_PRIORITIES defined in thread_info.h */ -#define STACKSHOT_IO_NUM_PRIORITIES 4 -/* This value must always match MAXTHREADNAMESIZE used in bsd */ -#define STACKSHOT_MAX_THREAD_NAME_SIZE 64 - -/* - * NOTE: Please update kcdata/libkdd/kcdtypes.c if you make any changes - * in STACKSHOT_KCTYPE_* types. - */ -#define STACKSHOT_KCTYPE_IOSTATS 0x901u /* io_stats_snapshot */ -#define STACKSHOT_KCTYPE_GLOBAL_MEM_STATS 0x902u /* struct mem_and_io_snapshot */ -#define STACKSHOT_KCCONTAINER_TASK 0x903u -#define STACKSHOT_KCCONTAINER_THREAD 0x904u -#define STACKSHOT_KCTYPE_TASK_SNAPSHOT 0x905u /* task_snapshot_v2 */ -#define STACKSHOT_KCTYPE_THREAD_SNAPSHOT 0x906u /* thread_snapshot_v2, thread_snapshot_v3 */ -#define STACKSHOT_KCTYPE_DONATING_PIDS 0x907u /* int[] */ -#define STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO 0x908u /* same as KCDATA_TYPE_LIBRARY_LOADINFO64 */ -#define STACKSHOT_KCTYPE_THREAD_NAME 0x909u /* char[] */ -#define STACKSHOT_KCTYPE_KERN_STACKFRAME 0x90Au /* struct stack_snapshot_frame32 */ -#define STACKSHOT_KCTYPE_KERN_STACKFRAME64 0x90Bu /* struct stack_snapshot_frame64 */ -#define STACKSHOT_KCTYPE_USER_STACKFRAME 0x90Cu /* struct stack_snapshot_frame32 */ -#define STACKSHOT_KCTYPE_USER_STACKFRAME64 0x90Du /* struct stack_snapshot_frame64 */ -#define STACKSHOT_KCTYPE_BOOTARGS 0x90Eu /* boot args string */ -#define STACKSHOT_KCTYPE_OSVERSION 0x90Fu /* os version string */ -#define STACKSHOT_KCTYPE_KERN_PAGE_SIZE 0x910u /* kernel page size in uint32_t */ -#define STACKSHOT_KCTYPE_JETSAM_LEVEL 0x911u /* jetsam level in uint32_t */ -#define STACKSHOT_KCTYPE_DELTA_SINCE_TIMESTAMP 0x912u /* timestamp used for the delta stackshot */ - -#define STACKSHOT_KCTYPE_TASK_DELTA_SNAPSHOT 0x940u /* task_delta_snapshot_v2 */ -#define STACKSHOT_KCTYPE_THREAD_DELTA_SNAPSHOT 0x941u /* thread_delta_snapshot_v2 */ - -#define STACKSHOT_KCTYPE_KERN_STACKLR 0x913u /* uint32_t */ -#define STACKSHOT_KCTYPE_KERN_STACKLR64 0x914u /* uint64_t */ -#define STACKSHOT_KCTYPE_USER_STACKLR 0x915u /* uint32_t */ -#define STACKSHOT_KCTYPE_USER_STACKLR64 0x916u /* uint64_t */ -#define STACKSHOT_KCTYPE_NONRUNNABLE_TIDS 0x917u /* uint64_t */ -#define STACKSHOT_KCTYPE_NONRUNNABLE_TASKS 0x918u /* uint64_t */ -#define STACKSHOT_KCTYPE_CPU_TIMES 0x919u /* struct stackshot_cpu_times */ -#define STACKSHOT_KCTYPE_STACKSHOT_DURATION 0x91au /* struct stackshot_duration */ -#define STACKSHOT_KCTYPE_STACKSHOT_FAULT_STATS 0x91bu /* struct stackshot_fault_stats */ -#define STACKSHOT_KCTYPE_KERNELCACHE_LOADINFO 0x91cu /* kernelcache UUID -- same as KCDATA_TYPE_LIBRARY_LOADINFO64 */ - -struct stack_snapshot_frame32 { - uint32_t lr; - uint32_t sp; -}; - -struct stack_snapshot_frame64 { - uint64_t lr; - uint64_t sp; -}; - -struct dyld_uuid_info_32 { - uint32_t imageLoadAddress; /* base address image is mapped at */ - uuid_t imageUUID; -}; - -struct dyld_uuid_info_64 { - uint64_t imageLoadAddress; /* XXX image slide */ - uuid_t imageUUID; -}; - -struct dyld_uuid_info_64_v2 { - uint64_t imageLoadAddress; /* XXX image slide */ - uuid_t imageUUID; - /* end of version 1 of dyld_uuid_info_64. sizeof v1 was 24 */ - uint64_t imageSlidBaseAddress; /* slid base address of image */ -}; - -struct user32_dyld_uuid_info { - uint32_t imageLoadAddress; /* base address image is mapped into */ - uuid_t imageUUID; /* UUID of image */ -}; - -struct user64_dyld_uuid_info { - uint64_t imageLoadAddress; /* base address image is mapped into */ - uuid_t imageUUID; /* UUID of image */ -}; - -enum task_snapshot_flags { - kTaskRsrcFlagged = 0x4, // In the EXC_RESOURCE danger zone? - kTerminatedSnapshot = 0x8, - kPidSuspended = 0x10, // true for suspended task - kFrozen = 0x20, // true for hibernated task (along with pidsuspended) - kTaskDarwinBG = 0x40, - kTaskExtDarwinBG = 0x80, - kTaskVisVisible = 0x100, - kTaskVisNonvisible = 0x200, - kTaskIsForeground = 0x400, - kTaskIsBoosted = 0x800, - kTaskIsSuppressed = 0x1000, - kTaskIsTimerThrottled = 0x2000, /* deprecated */ - kTaskIsImpDonor = 0x4000, - kTaskIsLiveImpDonor = 0x8000, - kTaskIsDirty = 0x10000, - kTaskWqExceededConstrainedThreadLimit = 0x20000, - kTaskWqExceededTotalThreadLimit = 0x40000, - kTaskWqFlagsAvailable = 0x80000, - kTaskUUIDInfoFaultedIn = 0x100000, /* successfully faulted in some UUID info */ - kTaskUUIDInfoMissing = 0x200000, /* some UUID info was paged out */ - kTaskUUIDInfoTriedFault = 0x400000, /* tried to fault in UUID info */ - kTaskSharedRegionInfoUnavailable = 0x800000, /* shared region info unavailable */ -}; - -enum thread_snapshot_flags { - kHasDispatchSerial = 0x4, - kStacksPCOnly = 0x8, /* Stack traces have no frame pointers. */ - kThreadDarwinBG = 0x10, /* Thread is darwinbg */ - kThreadIOPassive = 0x20, /* Thread uses passive IO */ - kThreadSuspended = 0x40, /* Thread is suspended */ - kThreadTruncatedBT = 0x80, /* Unmapped pages caused truncated backtrace */ - kGlobalForcedIdle = 0x100, /* Thread performs global forced idle */ - kThreadFaultedBT = 0x200, /* Some thread stack pages were faulted in as part of BT */ - kThreadTriedFaultBT = 0x400, /* We tried to fault in thread stack pages as part of BT */ - kThreadOnCore = 0x800, /* Thread was on-core when we entered debugger context */ - kThreadIdleWorker = 0x1000, /* Thread is an idle libpthread worker thread */ -}; - -struct mem_and_io_snapshot { - uint32_t snapshot_magic; - uint32_t free_pages; - uint32_t active_pages; - uint32_t inactive_pages; - uint32_t purgeable_pages; - uint32_t wired_pages; - uint32_t speculative_pages; - uint32_t throttled_pages; - uint32_t filebacked_pages; - uint32_t compressions; - uint32_t decompressions; - uint32_t compressor_size; - int32_t busy_buffer_count; - uint32_t pages_wanted; - uint32_t pages_reclaimed; - uint8_t pages_wanted_reclaimed_valid; // did mach_vm_pressure_monitor succeed? -} __attribute__((packed)); - -/* SS_TH_* macros are for ths_state */ -#define SS_TH_WAIT 0x01 /* queued for waiting */ -#define SS_TH_SUSP 0x02 /* stopped or requested to stop */ -#define SS_TH_RUN 0x04 /* running or on runq */ -#define SS_TH_UNINT 0x08 /* waiting uninteruptibly */ -#define SS_TH_TERMINATE 0x10 /* halted at termination */ -#define SS_TH_TERMINATE2 0x20 /* added to termination queue */ -#define SS_TH_IDLE 0x80 /* idling processor */ - -struct thread_snapshot_v2 { - uint64_t ths_thread_id; - uint64_t ths_wait_event; - uint64_t ths_continuation; - uint64_t ths_total_syscalls; - uint64_t ths_voucher_identifier; - uint64_t ths_dqserialnum; - uint64_t ths_user_time; - uint64_t ths_sys_time; - uint64_t ths_ss_flags; - uint64_t ths_last_run_time; - uint64_t ths_last_made_runnable_time; - uint32_t ths_state; - uint32_t ths_sched_flags; - int16_t ths_base_priority; - int16_t ths_sched_priority; - uint8_t ths_eqos; - uint8_t ths_rqos; - uint8_t ths_rqos_override; - uint8_t ths_io_tier; -} __attribute__((packed)); - -struct thread_snapshot_v3 { - uint64_t ths_thread_id; - uint64_t ths_wait_event; - uint64_t ths_continuation; - uint64_t ths_total_syscalls; - uint64_t ths_voucher_identifier; - uint64_t ths_dqserialnum; - uint64_t ths_user_time; - uint64_t ths_sys_time; - uint64_t ths_ss_flags; - uint64_t ths_last_run_time; - uint64_t ths_last_made_runnable_time; - uint32_t ths_state; - uint32_t ths_sched_flags; - int16_t ths_base_priority; - int16_t ths_sched_priority; - uint8_t ths_eqos; - uint8_t ths_rqos; - uint8_t ths_rqos_override; - uint8_t ths_io_tier; - uint64_t ths_thread_t; -} __attribute__((packed)); - -struct thread_delta_snapshot_v2 { - uint64_t tds_thread_id; - uint64_t tds_voucher_identifier; - uint64_t tds_ss_flags; - uint64_t tds_last_made_runnable_time; - uint32_t tds_state; - uint32_t tds_sched_flags; - int16_t tds_base_priority; - int16_t tds_sched_priority; - uint8_t tds_eqos; - uint8_t tds_rqos; - uint8_t tds_rqos_override; - uint8_t tds_io_tier; -} __attribute__ ((packed)); - -struct io_stats_snapshot -{ - /* - * I/O Statistics - * XXX: These fields must be together. - */ - uint64_t ss_disk_reads_count; - uint64_t ss_disk_reads_size; - uint64_t ss_disk_writes_count; - uint64_t ss_disk_writes_size; - uint64_t ss_io_priority_count[STACKSHOT_IO_NUM_PRIORITIES]; - uint64_t ss_io_priority_size[STACKSHOT_IO_NUM_PRIORITIES]; - uint64_t ss_paging_count; - uint64_t ss_paging_size; - uint64_t ss_non_paging_count; - uint64_t ss_non_paging_size; - uint64_t ss_data_count; - uint64_t ss_data_size; - uint64_t ss_metadata_count; - uint64_t ss_metadata_size; - /* XXX: I/O Statistics end */ - -} __attribute__ ((packed)); - -struct task_snapshot_v2 { - uint64_t ts_unique_pid; - uint64_t ts_ss_flags; - uint64_t ts_user_time_in_terminated_threads; - uint64_t ts_system_time_in_terminated_threads; - uint64_t ts_p_start_sec; - uint64_t ts_task_size; - uint64_t ts_max_resident_size; - uint32_t ts_suspend_count; - uint32_t ts_faults; - uint32_t ts_pageins; - uint32_t ts_cow_faults; - uint32_t ts_was_throttled; - uint32_t ts_did_throttle; - uint32_t ts_latency_qos; - int32_t ts_pid; - char ts_p_comm[32]; -} __attribute__ ((packed)); - -struct task_delta_snapshot_v2 { - uint64_t tds_unique_pid; - uint64_t tds_ss_flags; - uint64_t tds_user_time_in_terminated_threads; - uint64_t tds_system_time_in_terminated_threads; - uint64_t tds_task_size; - uint64_t tds_max_resident_size; - uint32_t tds_suspend_count; - uint32_t tds_faults; - uint32_t tds_pageins; - uint32_t tds_cow_faults; - uint32_t tds_was_throttled; - uint32_t tds_did_throttle; - uint32_t tds_latency_qos; -} __attribute__ ((packed)); - -struct stackshot_cpu_times { - uint64_t user_usec; - uint64_t system_usec; -} __attribute__((packed)); - -struct stackshot_duration { - uint64_t stackshot_duration; - uint64_t stackshot_duration_outer; -} __attribute__((packed)); - -struct stackshot_fault_stats { - uint32_t sfs_pages_faulted_in; /* number of pages faulted in using KDP fault path */ - uint64_t sfs_time_spent_faulting; /* MATUs spent faulting */ - uint64_t sfs_system_max_fault_time; /* MATUs fault time limit per stackshot */ - uint8_t sfs_stopped_faulting; /* we stopped decompressing because we hit the limit */ -} __attribute__((packed)); - -/**************** definitions for crashinfo *********************/ - -/* - * NOTE: Please update kcdata/libkdd/kcdtypes.c if you make any changes - * in TASK_CRASHINFO_* types. - */ - -/* FIXME some of these types aren't clean (fixed width, packed, and defined *here*) */ - -#define TASK_CRASHINFO_BEGIN KCDATA_BUFFER_BEGIN_CRASHINFO -#define TASK_CRASHINFO_STRING_DESC KCDATA_TYPE_STRING_DESC -#define TASK_CRASHINFO_UINT32_DESC KCDATA_TYPE_UINT32_DESC -#define TASK_CRASHINFO_UINT64_DESC KCDATA_TYPE_UINT64_DESC - -#define TASK_CRASHINFO_EXTMODINFO 0x801 -#define TASK_CRASHINFO_BSDINFOWITHUNIQID 0x802 /* struct proc_uniqidentifierinfo */ -#define TASK_CRASHINFO_TASKDYLD_INFO 0x803 -#define TASK_CRASHINFO_UUID 0x804 -#define TASK_CRASHINFO_PID 0x805 -#define TASK_CRASHINFO_PPID 0x806 -#define TASK_CRASHINFO_RUSAGE 0x807 /* struct rusage DEPRECATED do not use. - This struct has longs in it */ -#define TASK_CRASHINFO_RUSAGE_INFO 0x808 /* struct rusage_info_v3 from resource.h */ -#define TASK_CRASHINFO_PROC_NAME 0x809 /* char * */ -#define TASK_CRASHINFO_PROC_STARTTIME 0x80B /* struct timeval64 */ -#define TASK_CRASHINFO_USERSTACK 0x80C /* uint64_t */ -#define TASK_CRASHINFO_ARGSLEN 0x80D -#define TASK_CRASHINFO_EXCEPTION_CODES 0x80E /* mach_exception_data_t */ -#define TASK_CRASHINFO_PROC_PATH 0x80F /* string of len MAXPATHLEN */ -#define TASK_CRASHINFO_PROC_CSFLAGS 0x810 /* uint32_t */ -#define TASK_CRASHINFO_PROC_STATUS 0x811 /* char */ -#define TASK_CRASHINFO_UID 0x812 /* uid_t */ -#define TASK_CRASHINFO_GID 0x813 /* gid_t */ -#define TASK_CRASHINFO_PROC_ARGC 0x814 /* int */ -#define TASK_CRASHINFO_PROC_FLAGS 0x815 /* unsigned int */ -#define TASK_CRASHINFO_CPUTYPE 0x816 /* cpu_type_t */ -#define TASK_CRASHINFO_WORKQUEUEINFO 0x817 /* struct proc_workqueueinfo */ -#define TASK_CRASHINFO_RESPONSIBLE_PID 0x818 /* pid_t */ -#define TASK_CRASHINFO_DIRTY_FLAGS 0x819 /* int */ -#define TASK_CRASHINFO_CRASHED_THREADID 0x81A /* uint64_t */ -#define TASK_CRASHINFO_COALITION_ID 0x81B /* uint64_t */ -#define TASK_CRASHINFO_UDATA_PTRS 0x81C /* uint64_t */ -#define TASK_CRASHINFO_MEMORY_LIMIT 0x81D /* uint64_t */ - -#define TASK_CRASHINFO_END KCDATA_TYPE_BUFFER_END - -/**************** definitions for os reasons *********************/ - -#define EXIT_REASON_SNAPSHOT 0x1001 -#define EXIT_REASON_USER_DESC 0x1002 /* string description of reason */ -#define EXIT_REASON_USER_PAYLOAD 0x1003 /* user payload data */ -#define EXIT_REASON_CODESIGNING_INFO 0x1004 - -struct exit_reason_snapshot { - uint32_t ers_namespace; - uint64_t ers_code; - /* end of version 1 of exit_reason_snapshot. sizeof v1 was 12 */ - uint64_t ers_flags; -} __attribute__((packed)); - -#define EXIT_REASON_CODESIG_PATH_MAX 1024 - -struct codesigning_exit_reason_info { - uint64_t ceri_virt_addr; - uint64_t ceri_file_offset; - char ceri_pathname[EXIT_REASON_CODESIG_PATH_MAX]; - char ceri_filename[EXIT_REASON_CODESIG_PATH_MAX]; - uint64_t ceri_codesig_modtime_secs; - uint64_t ceri_codesig_modtime_nsecs; - uint64_t ceri_page_modtime_secs; - uint64_t ceri_page_modtime_nsecs; - uint8_t ceri_path_truncated; - uint8_t ceri_object_codesigned; - uint8_t ceri_page_codesig_validated; - uint8_t ceri_page_codesig_tainted; - uint8_t ceri_page_codesig_nx; - uint8_t ceri_page_wpmapped; - uint8_t ceri_page_slid; - uint8_t ceri_page_dirty; - uint32_t ceri_page_shadow_depth; -} __attribute__((packed)); - -#define EXIT_REASON_USER_DESC_MAX_LEN 1024 -#define EXIT_REASON_PAYLOAD_MAX_LEN 2048 -/**************** safe iterators *********************/ - -typedef struct kcdata_iter { - kcdata_item_t item; - void *end; -} kcdata_iter_t; - - -static inline -kcdata_iter_t kcdata_iter(void *buffer, unsigned long size) { - kcdata_iter_t iter; - iter.item = (kcdata_item_t) buffer; - iter.end = (void*) (((uintptr_t)buffer) + size); - return iter; -} - -static inline -kcdata_iter_t kcdata_iter_unsafe(void *buffer) __attribute__((deprecated)); - -static inline -kcdata_iter_t kcdata_iter_unsafe(void *buffer) { - kcdata_iter_t iter; - iter.item = (kcdata_item_t) buffer; - iter.end = (void*) (uintptr_t) ~0; - return iter; -} - -static const kcdata_iter_t kcdata_invalid_iter = { .item = 0, .end = 0 }; - -static inline -int kcdata_iter_valid(kcdata_iter_t iter) { - return - ( (uintptr_t)iter.item + sizeof(struct kcdata_item) <= (uintptr_t)iter.end ) && - ( (uintptr_t)iter.item + sizeof(struct kcdata_item) + iter.item->size <= (uintptr_t)iter.end); -} - - -static inline -kcdata_iter_t kcdata_iter_next(kcdata_iter_t iter) { - iter.item = (kcdata_item_t) (((uintptr_t)iter.item) + sizeof(struct kcdata_item) + (iter.item->size)); - return iter; -} - -static inline uint32_t -kcdata_iter_type(kcdata_iter_t iter) -{ - if ((iter.item->type & ~0xfu) == KCDATA_TYPE_ARRAY_PAD0) - return KCDATA_TYPE_ARRAY; - else - return iter.item->type; -} - -static inline uint32_t -kcdata_calc_padding(uint32_t size) -{ - /* calculate number of bits to add to size to get something divisible by 16 */ - return (-size) & 0xf; -} - -static inline uint32_t -kcdata_flags_get_padding(uint64_t flags) -{ - return flags & KCDATA_FLAGS_STRUCT_PADDING_MASK; -} - -/* see comment above about has_padding */ -static inline int -kcdata_iter_is_legacy_item(kcdata_iter_t iter, uint32_t legacy_size) -{ - uint32_t legacy_size_padded = legacy_size + kcdata_calc_padding(legacy_size); - return (iter.item->size == legacy_size_padded && - (iter.item->flags & (KCDATA_FLAGS_STRUCT_PADDING_MASK | KCDATA_FLAGS_STRUCT_HAS_PADDING)) == 0); - -} - -static inline uint32_t -kcdata_iter_size(kcdata_iter_t iter) -{ - uint32_t legacy_size = 0; - - switch (kcdata_iter_type(iter)) { - case KCDATA_TYPE_ARRAY: - case KCDATA_TYPE_CONTAINER_BEGIN: - return iter.item->size; - case STACKSHOT_KCTYPE_THREAD_SNAPSHOT: { - legacy_size = sizeof(struct thread_snapshot_v2); - if (kcdata_iter_is_legacy_item(iter, legacy_size)) { - return legacy_size; - } - - goto not_legacy; - } - case STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO: { - legacy_size = sizeof(struct dyld_uuid_info_64); - if (kcdata_iter_is_legacy_item(iter, legacy_size)) { - return legacy_size; - } - - goto not_legacy; - } -not_legacy: - default: - if (iter.item->size < kcdata_flags_get_padding(iter.item->flags)) - return 0; - else - return iter.item->size - kcdata_flags_get_padding(iter.item->flags); - } -} - -static inline uint64_t -kcdata_iter_flags(kcdata_iter_t iter) -{ - return iter.item->flags; -} - -static inline -void * kcdata_iter_payload(kcdata_iter_t iter) { - return &iter.item->data; -} - - -static inline -uint32_t kcdata_iter_array_elem_type(kcdata_iter_t iter) { - return (iter.item->flags >> 32) & UINT32_MAX; -} - -static inline -uint32_t kcdata_iter_array_elem_count(kcdata_iter_t iter) { - return (iter.item->flags) & UINT32_MAX; -} - -/* KCDATA_TYPE_ARRAY is ambiguous about the size of the array elements. Size is - * calculated as total_size / elements_count, but total size got padded out to a - * 16 byte alignment. New kernels will generate KCDATA_TYPE_ARRAY_PAD* instead - * to explicitly tell us how much padding was used. Here we have a fixed, never - * to be altered list of the sizes of array elements that were used before I - * discovered this issue. If you find a KCDATA_TYPE_ARRAY that is not one of - * these types, treat it as invalid data. */ - -static inline -uint32_t -kcdata_iter_array_size_switch(kcdata_iter_t iter) { - switch(kcdata_iter_array_elem_type(iter)) { - case KCDATA_TYPE_LIBRARY_LOADINFO: - return sizeof(struct dyld_uuid_info_32); - case KCDATA_TYPE_LIBRARY_LOADINFO64: - return sizeof(struct dyld_uuid_info_64); - case STACKSHOT_KCTYPE_KERN_STACKFRAME: - case STACKSHOT_KCTYPE_USER_STACKFRAME: - return sizeof(struct stack_snapshot_frame32); - case STACKSHOT_KCTYPE_KERN_STACKFRAME64: - case STACKSHOT_KCTYPE_USER_STACKFRAME64: - return sizeof(struct stack_snapshot_frame64); - case STACKSHOT_KCTYPE_DONATING_PIDS: - return sizeof(int32_t); - case STACKSHOT_KCTYPE_THREAD_DELTA_SNAPSHOT: - return sizeof(struct thread_delta_snapshot_v2); - // This one is only here to make some unit tests work. It should be OK to - // remove. - case TASK_CRASHINFO_CRASHED_THREADID: - return sizeof(uint64_t); - default: - return 0; - } -} - -static inline -int kcdata_iter_array_valid(kcdata_iter_t iter) { - if (!kcdata_iter_valid(iter)) - return 0; - if (kcdata_iter_type(iter) != KCDATA_TYPE_ARRAY) - return 0; - if (kcdata_iter_array_elem_count(iter) == 0) - return iter.item->size == 0; - if (iter.item->type == KCDATA_TYPE_ARRAY) { - uint32_t elem_size = kcdata_iter_array_size_switch(iter); - if (elem_size == 0) - return 0; - /* sizes get aligned to the nearest 16. */ - return - kcdata_iter_array_elem_count(iter) <= iter.item->size / elem_size && - iter.item->size % kcdata_iter_array_elem_count(iter) < 16; - } else { - return - (iter.item->type & 0xf) <= iter.item->size && - kcdata_iter_array_elem_count(iter) <= iter.item->size - (iter.item->type & 0xf) && - (iter.item->size - (iter.item->type & 0xf)) % kcdata_iter_array_elem_count(iter) == 0; - } -} - - -static inline -uint32_t kcdata_iter_array_elem_size(kcdata_iter_t iter) { - if (iter.item->type == KCDATA_TYPE_ARRAY) - return kcdata_iter_array_size_switch(iter); - if (kcdata_iter_array_elem_count(iter) == 0) - return 0; - return (iter.item->size - (iter.item->type & 0xf)) / kcdata_iter_array_elem_count(iter); -} - -static inline -int kcdata_iter_container_valid(kcdata_iter_t iter) { - return - kcdata_iter_valid(iter) && - kcdata_iter_type(iter) == KCDATA_TYPE_CONTAINER_BEGIN && - iter.item->size >= sizeof(uint32_t); -} - -static inline -uint32_t kcdata_iter_container_type(kcdata_iter_t iter) { - return * (uint32_t *) kcdata_iter_payload(iter); -} - -static inline -uint64_t kcdata_iter_container_id(kcdata_iter_t iter) { - return iter.item->flags; -} - - -#define KCDATA_ITER_FOREACH(iter) for(; kcdata_iter_valid(iter) && iter.item->type != KCDATA_TYPE_BUFFER_END; iter = kcdata_iter_next(iter)) -#define KCDATA_ITER_FOREACH_FAILED(iter) (!kcdata_iter_valid(iter) || (iter).item->type != KCDATA_TYPE_BUFFER_END) - -static inline -kcdata_iter_t -kcdata_iter_find_type(kcdata_iter_t iter, uint32_t type) -{ - KCDATA_ITER_FOREACH(iter) - { - if (kcdata_iter_type(iter) == type) - return iter; - } - return kcdata_invalid_iter; -} - -static inline -int kcdata_iter_data_with_desc_valid(kcdata_iter_t iter, uint32_t minsize) { - return - kcdata_iter_valid(iter) && - kcdata_iter_size(iter) >= KCDATA_DESC_MAXLEN + minsize && - ((char*)kcdata_iter_payload(iter))[KCDATA_DESC_MAXLEN-1] == 0; -} - -static inline -char *kcdata_iter_string(kcdata_iter_t iter, uint32_t offset) { - if (offset > kcdata_iter_size(iter)) { - return NULL; - } - uint32_t maxlen = kcdata_iter_size(iter) - offset; - char *s = ((char*)kcdata_iter_payload(iter)) + offset; - if (strnlen(s, maxlen) < maxlen) { - return s; - } else { - return NULL; - } -} - -static inline void kcdata_iter_get_data_with_desc(kcdata_iter_t iter, char **desc_ptr, void **data_ptr, uint32_t *size_ptr) { - if (desc_ptr) - *desc_ptr = (char *)kcdata_iter_payload(iter); - if (data_ptr) - *data_ptr = (void *)((uintptr_t)kcdata_iter_payload(iter) + KCDATA_DESC_MAXLEN); - if (size_ptr) - *size_ptr = kcdata_iter_size(iter) - KCDATA_DESC_MAXLEN; -} - -#endif diff --git a/libkdd/kcdata.h b/libkdd/kcdata.h new file mode 120000 index 000000000..f5573542b --- /dev/null +++ b/libkdd/kcdata.h @@ -0,0 +1 @@ +./osfmk/kern/kcdata.h \ No newline at end of file diff --git a/libkern/.clang-format b/libkern/.clang-format deleted file mode 100644 index cd99c24e5..000000000 --- a/libkern/.clang-format +++ /dev/null @@ -1,30 +0,0 @@ -# See top level .clang-format for explanation of options -AlignEscapedNewlinesLeft: true -AlignTrailingComments: true -AllowAllParametersOfDeclarationOnNextLine: true -AllowShortBlocksOnASingleLine: true -AllowShortCaseLabelsOnASingleLine: true -AllowShortFunctionsOnASingleLine: None -AllowShortIfStatementsOnASingleLine: false -AllowShortLoopsOnASingleLine: false -AlwaysBreakAfterDefinitionReturnType: false -AlwaysBreakBeforeMultilineStrings: true -BinPackArguments: true -BinPackParameters: false -BreakBeforeBinaryOperators: None -BreakBeforeBraces: Allman -ColumnLimit: 132 -IndentCaseLabels: false -IndentWidth: 4 -IndentWrappedFunctionNames: false -KeepEmptyLinesAtTheStartOfBlocks: false -PointerAlignment: Middle -SpaceAfterCStyleCast: false -SpaceBeforeAssignmentOperators: true -SpaceBeforeParens: ControlStatements -SpaceInEmptyParentheses: false -SpacesInCStyleCastParentheses: false -SpacesInParentheses: false -SpacesInSquareBrackets: false -TabWidth: 4 -UseTab: Never diff --git a/libkern/.clang-format b/libkern/.clang-format new file mode 120000 index 000000000..f91369598 --- /dev/null +++ b/libkern/.clang-format @@ -0,0 +1 @@ +./iokit/.clang-format \ No newline at end of file diff --git a/libkern/c++/OSNumber.cpp b/libkern/c++/OSNumber.cpp index 5b3b31a91..36a9ded7e 100644 --- a/libkern/c++/OSNumber.cpp +++ b/libkern/c++/OSNumber.cpp @@ -53,6 +53,8 @@ bool OSNumber::init(unsigned long long inValue, unsigned int newNumberOfBits) { if (!super::init()) return false; + if (newNumberOfBits > 64) + return false; size = newNumberOfBits; value = (inValue & sizeMask); diff --git a/libsyscall/mach/.gitignore b/libsyscall/mach/.gitignore new file mode 100644 index 000000000..f718d68d2 --- /dev/null +++ b/libsyscall/mach/.gitignore @@ -0,0 +1,3 @@ +*.pbxuser +*.perspectivev3 +build/ diff --git a/osfmk/console/serial_console.c b/osfmk/console/serial_console.c index ec139794c..c1cd32b3a 100644 --- a/osfmk/console/serial_console.c +++ b/osfmk/console/serial_console.c @@ -243,8 +243,14 @@ _cnputs(char * c, int size) /* The console device output routines are assumed to be * non-reentrant. */ +#ifdef __x86_64__ + uint32_t lock_timeout_ticks = UINT32_MAX; +#else + uint32_t lock_timeout_ticks = LockTimeOut; +#endif + mp_disable_preemption(); - if (!hw_lock_to(&cnputc_lock, LockTimeOut)) { + if (!hw_lock_to(&cnputc_lock, lock_timeout_ticks)) { /* If we timed out on the lock, and we're in the debugger, * copy lock data for debugging and break the lock. */ diff --git a/osfmk/corpses/corpse.c b/osfmk/corpses/corpse.c index 94b5a0973..8c7f28e42 100644 --- a/osfmk/corpses/corpse.c +++ b/osfmk/corpses/corpse.c @@ -467,6 +467,7 @@ task_generate_corpse_internal( TRUE, is64bit, t_flags, + TPF_NONE, &new_task); if (kr != KERN_SUCCESS) { goto error_task_generate_corpse; diff --git a/osfmk/i386/acpi.c b/osfmk/i386/acpi.c index fce3396d0..955301c6d 100644 --- a/osfmk/i386/acpi.c +++ b/osfmk/i386/acpi.c @@ -386,12 +386,6 @@ acpi_idle_kernel(acpi_sleep_callback func, void *refcon) assert(cpu_number() == master_cpu); - /* - * Effectively set the boot cpu offline. - * This will stop further deadlines being set. - */ - cpu_datap(master_cpu)->cpu_running = FALSE; - /* Cancel any pending deadline */ setPop(0); while (lapic_is_interrupting(LAPIC_TIMER_VECTOR)) { @@ -440,7 +434,6 @@ acpi_idle_kernel(acpi_sleep_callback func, void *refcon) } acpi_wake_postrebase_abstime = mach_absolute_time(); assert(mach_absolute_time() >= acpi_idle_abstime); - cpu_datap(master_cpu)->cpu_running = TRUE; KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEEP_IDLE) | DBG_FUNC_END, diff --git a/osfmk/i386/pmap.h b/osfmk/i386/pmap.h index ccad03ea7..6bf67c0d9 100644 --- a/osfmk/i386/pmap.h +++ b/osfmk/i386/pmap.h @@ -751,7 +751,9 @@ extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__pr #if defined(__x86_64__) #define PMAP_DEACTIVATE_MAP(map, thread, ccpu) \ +/* pmap_assert2((pmap_pcid_ncpus ? (pcid_for_pmap_cpu_tuple(map->pmap, thread, ccpu) == (get_cr3_raw() & 0xFFF)) : TRUE),"PCIDs: 0x%x, active PCID: 0x%x, CR3: 0x%lx, pmap_cr3: 0x%llx, kernel_cr3: 0x%llx, kernel pmap cr3: 0x%llx, CPU active PCID: 0x%x, CPU kernel PCID: 0x%x, specflags: 0x%x, pagezero: 0x%x", pmap_pcid_ncpus, pcid_for_pmap_cpu_tuple(map->pmap, thread, ccpu), get_cr3_raw(), map->pmap->pm_cr3, cpu_datap(ccpu)->cpu_kernel_cr3, kernel_pmap->pm_cr3, cpu_datap(ccpu)->cpu_active_pcid, cpu_datap(ccpu)->cpu_kernel_pcid, thread->machine.specFlags, map->pmap->pagezero_accessible); +*/ #else #define PMAP_DEACTIVATE_MAP(map, thread) #endif diff --git a/osfmk/ipc/flipc.c b/osfmk/ipc/flipc.c index ff0143cd0..1c4cfc563 100644 --- a/osfmk/ipc/flipc.c +++ b/osfmk/ipc/flipc.c @@ -318,7 +318,6 @@ flipc_cmd_ack(flipc_ack_msg_t fmsg, ip_lock(lport); ipc_mqueue_t lport_mq = &lport->ip_messages; - spl_t s = splsched(); imq_lock(lport_mq); assert(fport->peek_count >= msg_count); // Can't ack what we haven't peeked! @@ -330,7 +329,6 @@ flipc_cmd_ack(flipc_ack_msg_t fmsg, } imq_unlock(lport_mq); - splx(s); ip_unlock(lport); if (kick) @@ -461,10 +459,9 @@ flipc_msg_to_remote_node(mach_node_t to_node, port_mq->data.port.fport->peek_count++; /* Clean up outstanding prepost on port_mq. - * This also unlocks port_mq and restores spl. + * This also unlocks port_mq. */ - spl_t spl = splsched(); - ipc_mqueue_release_peek_ref(port_mq, &spl); + ipc_mqueue_release_peek_ref(port_mq); assert(get_preemption_level()==0); /* DANGER: The code below must be allowed to allocate so it can't @@ -636,17 +633,15 @@ flipc_msg_ack(mach_node_t node, #if (0) mach_msg_return_t mmr; - spl_t s; ipc_mqueue_t ack_mqueue; ip_lock(ack_port); ack_mqueue = &ack_port->ip_messages; - s = splsched(); imq_lock(ack_mqueue); ip_unlock(ack_port); - /* ipc_mqueue_send() unlocks ack_mqueue and restores splx(s) */ - mmr = ipc_mqueue_send(ack_mqueue, kmsg, 0, 0, s); + /* ipc_mqueue_send() unlocks ack_mqueue */ + mmr = ipc_mqueue_send(ack_mqueue, kmsg, 0, 0); #else kern_return_t kr; kr = ipc_kmsg_send(kmsg, diff --git a/osfmk/ipc/ipc_importance.c b/osfmk/ipc/ipc_importance.c index 3c16ac82b..749a9c278 100644 --- a/osfmk/ipc/ipc_importance.c +++ b/osfmk/ipc/ipc_importance.c @@ -161,6 +161,14 @@ static zone_t ipc_importance_inherit_zone; static ipc_voucher_attr_control_t ipc_importance_control; +static boolean_t ipc_importance_task_check_transition(ipc_importance_task_t task_imp, + iit_update_type_t type, uint32_t delta); + +static void ipc_importance_task_propagate_assertion_locked(ipc_importance_task_t task_imp, + iit_update_type_t type, boolean_t update_task_imp); + +static ipc_importance_inherit_t ipc_importance_inherit_from_task(task_t from_task, task_t to_task); + /* * Routine: ipc_importance_kmsg_link * Purpose: @@ -277,8 +285,9 @@ ipc_importance_inherit_find( queue_iterate(&link_task->iit_inherits, inherit, ipc_importance_inherit_t, iii_inheritance) { - if (inherit->iii_to_task == to_task && inherit->iii_depth == depth) + if (inherit->iii_to_task == to_task && inherit->iii_depth == depth) { return inherit; + } } return III_NULL; } @@ -399,25 +408,63 @@ ipc_importance_release_locked(ipc_importance_elem_t elem) /* dropping an inherit element */ case IIE_TYPE_INHERIT: { - ipc_importance_inherit_t inherit; + ipc_importance_inherit_t inherit = (ipc_importance_inherit_t)elem; + ipc_importance_task_t to_task = inherit->iii_to_task; ipc_importance_elem_t from_elem; - ipc_importance_task_t to_task; - - inherit = (ipc_importance_inherit_t)elem; - to_task = inherit->iii_to_task; assert(IIT_NULL != to_task); - assert(!inherit->iii_donating); - - /* unlink and release the inherit */ assert(ipc_importance_task_is_any_receiver_type(to_task)); + + /* unlink the inherit from its source element */ from_elem = ipc_importance_inherit_unlink(inherit); assert(IIE_NULL != from_elem); + + /* + * The attribute might have pending external boosts if the attribute + * was given out during exec, drop them from the appropriate destination + * task. + * + * The attribute will not have any pending external boosts if the + * attribute was given out to voucher system since it would have been + * dropped by ipc_importance_release_value, but there is not way to + * detect that, thus if the attribute has a pending external boost, + * drop them from the appropriate destination task. + * + * The inherit attribute from exec and voucher system would not + * get deduped to each other, thus dropping the external boost + * from destination task at two different places will not have + * any unintended side effects. + */ + assert(inherit->iii_externcnt >= inherit->iii_externdrop); + if (inherit->iii_donating) { + uint32_t assertcnt = III_EXTERN(inherit); + + assert(ipc_importance_task_is_any_receiver_type(to_task)); + assert(to_task->iit_externcnt >= inherit->iii_externcnt); + assert(to_task->iit_externdrop >= inherit->iii_externdrop); + to_task->iit_externcnt -= inherit->iii_externcnt; + to_task->iit_externdrop -= inherit->iii_externdrop; + inherit->iii_externcnt = 0; + inherit->iii_externdrop = 0; + inherit->iii_donating = FALSE; + + /* adjust the internal assertions - and propagate as needed */ + if (ipc_importance_task_check_transition(to_task, IIT_UPDATE_DROP, assertcnt)) { + ipc_importance_task_propagate_assertion_locked(to_task, IIT_UPDATE_DROP, TRUE); + } + } else { + inherit->iii_externcnt = 0; + inherit->iii_externdrop = 0; + } + + /* release the reference on the source element */ ipc_importance_release_locked(from_elem); /* unlocked on return */ + /* release the reference on the destination task */ ipc_importance_task_release(to_task); + /* free the inherit */ zfree(ipc_importance_inherit_zone, inherit); break; } @@ -2077,6 +2124,60 @@ ipc_importance_disconnect_task(task_t task) task_deallocate(task); } +/* + * Routine: ipc_importance_exec_switch_task + * Purpose: + * Switch importance task base from old task to new task in exec. + * + * Create an ipc importance linkage from old task to new task, + * once the linkage is created, switch the importance task base + * from old task to new task. After the switch, the linkage will + * represent importance linkage from new task to old task with + * watch port importance inheritance linked to new task. + * Conditions: + * Nothing locked. + * Returns a reference on importance inherit. + */ +ipc_importance_inherit_t +ipc_importance_exec_switch_task( + task_t old_task, + task_t new_task) +{ + ipc_importance_inherit_t inherit = III_NULL; + ipc_importance_task_t old_task_imp = IIT_NULL; + ipc_importance_task_t new_task_imp = IIT_NULL; + + task_importance_reset(old_task); + + /* Create an importance linkage from old_task to new_task */ + inherit = ipc_importance_inherit_from_task(old_task, new_task); + if (inherit == III_NULL) { + return inherit; + } + + /* Switch task importance base from old task to new task */ + ipc_importance_lock(); + + old_task_imp = old_task->task_imp_base; + new_task_imp = new_task->task_imp_base; + + old_task_imp->iit_task = new_task; + new_task_imp->iit_task = old_task; + + old_task->task_imp_base = new_task_imp; + new_task->task_imp_base = old_task_imp; + +#if DEVELOPMENT || DEBUG + /* + * Update the pid an proc name for importance base if any + */ + task_importance_update_owner_info(new_task); +#endif + ipc_importance_unlock(); + + return inherit; +} + /* * Routine: ipc_importance_check_circularity * Purpose: @@ -2524,7 +2625,7 @@ ipc_importance_send( } /* - * Routine: ipc_importance_inherit_from + * Routine: ipc_importance_inherit_from_kmsg * Purpose: * Create a "made" reference for an importance attribute representing * an inheritance between the sender of a message (if linked) and the @@ -2538,7 +2639,7 @@ ipc_importance_send( * Nothing locked on entry. May block. */ static ipc_importance_inherit_t -ipc_importance_inherit_from(ipc_kmsg_t kmsg) +ipc_importance_inherit_from_kmsg(ipc_kmsg_t kmsg) { ipc_importance_task_t task_imp = IIT_NULL; ipc_importance_elem_t from_elem = kmsg->ikm_importance; @@ -2793,6 +2894,181 @@ ipc_importance_inherit_from(ipc_kmsg_t kmsg) return inherit; } +/* + * Routine: ipc_importance_inherit_from_task + * Purpose: + * Create a reference for an importance attribute representing + * an inheritance between the to_task and from_task. The iii + * created will be marked as III_FLAGS_FOR_OTHERS. + * + * It will not dedup any iii which are not marked as III_FLAGS_FOR_OTHERS. + * + * If the task is inactive, there isn't any need to return a new reference. + * Conditions: + * Nothing locked on entry. May block. + * It should not be called from voucher subsystem. + */ +static ipc_importance_inherit_t +ipc_importance_inherit_from_task( + task_t from_task, + task_t to_task) +{ + ipc_importance_task_t to_task_imp = IIT_NULL; + ipc_importance_task_t from_task_imp = IIT_NULL; + ipc_importance_elem_t from_elem = IIE_NULL; + + ipc_importance_inherit_t inherit = III_NULL; + ipc_importance_inherit_t alloc = III_NULL; + boolean_t donating; + uint32_t depth = 1; + + to_task_imp = ipc_importance_for_task(to_task, FALSE); + from_task_imp = ipc_importance_for_task(from_task, FALSE); + from_elem = (ipc_importance_elem_t)from_task_imp; + + ipc_importance_lock(); + + if (IIT_NULL == to_task_imp || IIT_NULL == from_task_imp) { + goto out_locked; + } + + /* + * No need to set up an inherit linkage if the to_task or from_task + * isn't a receiver of one type or the other. + */ + if (!ipc_importance_task_is_any_receiver_type(to_task_imp) || + !ipc_importance_task_is_any_receiver_type(from_task_imp)) { + goto out_locked; + } + + /* Do not allow to create a linkage to self */ + if (to_task_imp == from_task_imp) { + goto out_locked; + } + + incr_ref_counter(to_task_imp->iit_elem.iie_task_refs_added_inherit_from); + incr_ref_counter(from_elem->iie_kmsg_refs_added); + + /* + * Now that we have the from_elem figured out, + * check to see if we already have an inherit for this pairing + */ + while (III_NULL == inherit) { + inherit = ipc_importance_inherit_find(from_elem, to_task_imp, depth); + + /* Do we have to allocate a new inherit */ + if (III_NULL == inherit) { + if (III_NULL != alloc) { + break; + } + + /* allocate space */ + ipc_importance_unlock(); + alloc = (ipc_importance_inherit_t) + zalloc(ipc_importance_inherit_zone); + ipc_importance_lock(); + } + } + + /* snapshot the donating status while we have importance locked */ + donating = ipc_importance_task_is_donor(from_task_imp); + + if (III_NULL != inherit) { + /* We found one, piggyback on that */ + assert(0 < III_REFS(inherit)); + assert(0 < IIE_REFS(inherit->iii_from_elem)); + + /* Take a reference for inherit */ + assert(III_REFS_MAX > III_REFS(inherit)); + ipc_importance_inherit_reference_internal(inherit); + + /* Reflect the inherit's change of status into the task boosts */ + if (0 == III_EXTERN(inherit)) { + assert(!inherit->iii_donating); + inherit->iii_donating = donating; + if (donating) { + to_task_imp->iit_externcnt += inherit->iii_externcnt; + to_task_imp->iit_externdrop += inherit->iii_externdrop; + } + } else { + assert(donating == inherit->iii_donating); + } + + /* add in a external reference for this use of the inherit */ + inherit->iii_externcnt++; + } else { + /* initialize the previously allocated space */ + inherit = alloc; + inherit->iii_bits = IIE_TYPE_INHERIT | 1; + inherit->iii_made = 0; + inherit->iii_externcnt = 1; + inherit->iii_externdrop = 0; + inherit->iii_depth = depth; + inherit->iii_to_task = to_task_imp; + inherit->iii_from_elem = IIE_NULL; + queue_init(&inherit->iii_kmsgs); + + if (donating) { + inherit->iii_donating = TRUE; + } else { + inherit->iii_donating = FALSE; + } + + /* + * Chain our new inherit on the element it inherits from. + * The new inherit takes our reference on from_elem. + */ + ipc_importance_inherit_link(inherit, from_elem); + +#if IIE_REF_DEBUG + ipc_importance_counter_init(&inherit->iii_elem); + from_elem->iie_kmsg_refs_inherited++; + task_imp->iit_elem.iie_task_refs_inherited++; +#endif + } + +out_locked: + + /* If found inherit and donating, reflect that in the task externcnt */ + if (III_NULL != inherit && donating) { + to_task_imp->iit_externcnt++; + /* take the internal assertion */ + ipc_importance_task_hold_internal_assertion_locked(to_task_imp, 1); + /* may have dropped and retaken importance lock */ + } + + /* If we didn't create a new inherit, we have some resources to release */ + if (III_NULL == inherit || inherit != alloc) { + if (IIE_NULL != from_elem) { + if (III_NULL != inherit) { + incr_ref_counter(from_elem->iie_kmsg_refs_coalesced); + } else { + incr_ref_counter(from_elem->iie_kmsg_refs_dropped); + } + ipc_importance_release_locked(from_elem); + /* importance unlocked */ + } else { + ipc_importance_unlock(); + } + + if (IIT_NULL != to_task_imp) { + if (III_NULL != inherit) { + incr_ref_counter(to_task_imp->iit_elem.iie_task_refs_coalesced); + } + ipc_importance_task_release(to_task_imp); + } + + if (III_NULL != alloc) { + zfree(ipc_importance_inherit_zone, alloc); + } + } else { + /* from_elem and to_task_imp references transferred to new inherit */ + ipc_importance_unlock(); + } + + return inherit; +} + /* * Routine: ipc_importance_receive * Purpose: @@ -2845,7 +3121,7 @@ ipc_importance_receive( * transferring any boosts from the kmsg linkage through the * port directly to the new inheritance object. */ - inherit = ipc_importance_inherit_from(kmsg); + inherit = ipc_importance_inherit_from_kmsg(kmsg); handle = (mach_voucher_attr_value_handle_t)inherit; assert(IIE_NULL == kmsg->ikm_importance); @@ -3115,9 +3391,9 @@ ipc_importance_release_value( /* clear made */ elem->iie_made = 0; - /* - * If there are pending external boosts represented by this attribute, - * drop them from the apropriate task + /* + * If there are pending external boosts represented by this attribute, + * drop them from the apropriate task */ if (IIE_TYPE_INHERIT == IIE_TYPE(elem)) { ipc_importance_inherit_t inherit = (ipc_importance_inherit_t)elem; @@ -3145,7 +3421,7 @@ ipc_importance_release_value( inherit->iii_externcnt = 0; inherit->iii_externdrop = 0; } - } + } /* drop the made reference on elem */ ipc_importance_release_locked(elem); diff --git a/osfmk/ipc/ipc_importance.h b/osfmk/ipc/ipc_importance.h index 2a2ac2f45..6f3bc5744 100644 --- a/osfmk/ipc/ipc_importance.h +++ b/osfmk/ipc/ipc_importance.h @@ -194,6 +194,7 @@ extern void ipc_importance_reset(ipc_importance_task_t task_imp, boolean_t donor extern ipc_importance_task_t ipc_importance_for_task(task_t task, boolean_t made); extern void ipc_importance_disconnect_task(task_t task); +extern ipc_importance_inherit_t ipc_importance_exec_switch_task(task_t old_task, task_t new_task); extern boolean_t ipc_importance_task_is_donor(ipc_importance_task_t task_imp); extern boolean_t ipc_importance_task_is_never_donor(ipc_importance_task_t task_imp); diff --git a/osfmk/ipc/ipc_kmsg.c b/osfmk/ipc/ipc_kmsg.c index 479bf2fe2..92363485c 100644 --- a/osfmk/ipc/ipc_kmsg.c +++ b/osfmk/ipc/ipc_kmsg.c @@ -89,7 +89,6 @@ #include #include #include -#include #include #include #include diff --git a/osfmk/ipc/ipc_mqueue.h b/osfmk/ipc/ipc_mqueue.h index b3fbbb2e6..2a83023e8 100644 --- a/osfmk/ipc/ipc_mqueue.h +++ b/osfmk/ipc/ipc_mqueue.h @@ -73,7 +73,6 @@ #include #include #include -#include #include #include diff --git a/osfmk/ipc/ipc_pset.c b/osfmk/ipc/ipc_pset.c index f6772f2c7..4dc635a06 100644 --- a/osfmk/ipc/ipc_pset.c +++ b/osfmk/ipc/ipc_pset.c @@ -74,7 +74,6 @@ #include #include -#include #include @@ -321,8 +320,6 @@ void ipc_pset_destroy( ipc_pset_t pset) { - spl_t s; - assert(ips_active(pset)); pset->ips_object.io_bits &= ~IO_BITS_ACTIVE; @@ -332,16 +329,14 @@ ipc_pset_destroy( * AND remove this message queue from any containing sets */ ipc_mqueue_remove_all(&pset->ips_messages); - + /* * Set all waiters on the portset running to * discover the change. */ - s = splsched(); imq_lock(&pset->ips_messages); ipc_mqueue_changed(&pset->ips_messages); imq_unlock(&pset->ips_messages); - splx(s); ipc_mqueue_deinit(&pset->ips_messages); @@ -378,7 +373,6 @@ filt_machportattach( ipc_space_t space = current_space(); ipc_kmsg_t first; - spl_t s; int error; int result = 0; kern_return_t kr; @@ -395,10 +389,9 @@ filt_machportattach( __IGNORE_WCASTALIGN(pset = (ipc_pset_t)entry->ie_object); mqueue = &pset->ips_messages; - s = splsched(); imq_lock(mqueue); - /* + /* * Bind the portset wait queue directly to knote/kqueue. * This allows us to just use wait_queue foo to effect a wakeup, * rather than having to call knote() from the Mach code on each @@ -412,8 +405,7 @@ filt_machportattach( KNOTE_ATTACH(&mqueue->imq_klist, kn); } imq_unlock(mqueue); - splx(s); - + is_read_unlock(space); /* @@ -428,7 +420,7 @@ filt_machportattach( __IGNORE_WCASTALIGN(port = (ipc_port_t)entry->ie_object); mqueue = &port->ip_messages; ip_reference(port); - + /* * attach knote to port and determine result * If the filter requested direct message receipt, @@ -436,7 +428,6 @@ filt_machportattach( * reflect the requested and override qos of the * first message in the queue. */ - s = splsched(); imq_lock(mqueue); kn->kn_ptr.p_mqueue = mqueue; KNOTE_ATTACH(&mqueue->imq_klist, kn); @@ -446,7 +437,6 @@ filt_machportattach( result = 1; } imq_unlock(mqueue); - splx(s); is_read_unlock(space); error = 0; @@ -482,14 +472,11 @@ filt_machportdetach( { ipc_mqueue_t mqueue = kn->kn_ptr.p_mqueue; ipc_object_t object = mqueue_to_object(mqueue); - spl_t s; - s = splsched(); imq_lock(mqueue); KNOTE_DETACH(&mqueue->imq_klist, kn); kn->kn_ptr.p_mqueue = IMQ_NULL; imq_unlock(mqueue); - splx(s); if (io_otype(object) == IOT_PORT_SET) { /* @@ -558,9 +545,7 @@ filt_machporttouch( ipc_mqueue_t mqueue = kn->kn_ptr.p_mqueue; ipc_kmsg_t first; int result = 0; - spl_t s; - s = splsched(); imq_lock(mqueue); /* copy in new settings and save off new input fflags */ @@ -588,7 +573,6 @@ filt_machporttouch( MACH_MSG_PRIORITY_UNSPECIFIED); } imq_unlock(mqueue); - splx(s); return result; } diff --git a/osfmk/ipc/mach_port.c b/osfmk/ipc/mach_port.c index d15991ebf..9bff072b1 100644 --- a/osfmk/ipc/mach_port.c +++ b/osfmk/ipc/mach_port.c @@ -1713,9 +1713,6 @@ void mach_port_get_status_helper( ipc_port_t port, mach_port_status_t *statusp) { - spl_t s; - - s = splsched(); imq_lock(&port->ip_messages); /* don't leak set IDs, just indicate that the port is in one or not */ statusp->mps_pset = !!(port->ip_in_pset); @@ -1723,8 +1720,7 @@ void mach_port_get_status_helper( statusp->mps_qlimit = port->ip_messages.imq_qlimit; statusp->mps_msgcount = port->ip_messages.imq_msgcount; imq_unlock(&port->ip_messages); - splx(s); - + statusp->mps_mscount = port->ip_mscount; statusp->mps_sorights = port->ip_sorights; statusp->mps_srights = port->ip_srights > 0; diff --git a/osfmk/kern/coalition.c b/osfmk/kern/coalition.c index 7b9885612..02661f548 100644 --- a/osfmk/kern/coalition.c +++ b/osfmk/kern/coalition.c @@ -389,14 +389,17 @@ i_coal_resource_remove_task(coalition_t coal, task_t task) cr->time_nonempty += last_time_nonempty; } - ledger_rollup(cr->ledger, task->ledger); - cr->bytesread += task->task_io_stats->disk_reads.size; - cr->byteswritten += task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size; - cr->gpu_time += task_gpu_utilisation(task); - cr->logical_immediate_writes += task->task_immediate_writes; - cr->logical_deferred_writes += task->task_deferred_writes; - cr->logical_invalidated_writes += task->task_invalidated_writes; - cr->logical_metadata_writes += task->task_metadata_writes; + /* Do not roll up for exec'd task or exec copy task */ + if (!task_is_exec_copy(task) && !task_did_exec(task)) { + ledger_rollup(cr->ledger, task->ledger); + cr->bytesread += task->task_io_stats->disk_reads.size; + cr->byteswritten += task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size; + cr->gpu_time += task_gpu_utilisation(task); + cr->logical_immediate_writes += task->task_immediate_writes; + cr->logical_deferred_writes += task->task_deferred_writes; + cr->logical_invalidated_writes += task->task_invalidated_writes; + cr->logical_metadata_writes += task->task_metadata_writes; + } /* remove the task from the coalition's list */ remqueue(&task->task_coalition[COALITION_TYPE_RESOURCE]); @@ -503,6 +506,14 @@ coalition_resource_usage_internal(coalition_t coal, struct coalition_resource_us */ task_t task; qe_foreach_element(task, &coal->r.tasks, task_coalition[COALITION_TYPE_RESOURCE]) { + /* + * Rolling up stats for exec copy task or exec'd task will lead to double accounting. + * Cannot take task lock after taking coaliton lock + */ + if (task_is_exec_copy(task) || task_did_exec(task)) { + continue; + } + ledger_rollup(sum_ledger, task->ledger); bytesread += task->task_io_stats->disk_reads.size; byteswritten += task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size; diff --git a/osfmk/kern/ledger.c b/osfmk/kern/ledger.c index 983e51b9c..a7e9f4173 100644 --- a/osfmk/kern/ledger.c +++ b/osfmk/kern/ledger.c @@ -824,17 +824,32 @@ kern_return_t ledger_rollup(ledger_t to_ledger, ledger_t from_ledger) { int i; - struct ledger_entry *from_le, *to_le; assert(to_ledger->l_template == from_ledger->l_template); for (i = 0; i < to_ledger->l_size; i++) { - if (ENTRY_VALID(from_ledger, i) && ENTRY_VALID(to_ledger, i)) { - from_le = &from_ledger->l_entries[i]; - to_le = &to_ledger->l_entries[i]; - OSAddAtomic64(from_le->le_credit, &to_le->le_credit); - OSAddAtomic64(from_le->le_debit, &to_le->le_debit); - } + ledger_rollup_entry(to_ledger, from_ledger, i); + } + + return (KERN_SUCCESS); +} + +/* Add one ledger entry value to another. + * They must have been created from the same template. + * Since the credit and debit values are added one + * at a time, other thread might read the a bogus value. + */ +kern_return_t +ledger_rollup_entry(ledger_t to_ledger, ledger_t from_ledger, int entry) +{ + struct ledger_entry *from_le, *to_le; + + assert(to_ledger->l_template == from_ledger->l_template); + if (ENTRY_VALID(from_ledger, entry) && ENTRY_VALID(to_ledger, entry)) { + from_le = &from_ledger->l_entries[entry]; + to_le = &to_ledger->l_entries[entry]; + OSAddAtomic64(from_le->le_credit, &to_le->le_credit); + OSAddAtomic64(from_le->le_debit, &to_le->le_debit); } return (KERN_SUCCESS); diff --git a/osfmk/kern/ledger.h b/osfmk/kern/ledger.h index 689c1d277..5d1c6818f 100644 --- a/osfmk/kern/ledger.h +++ b/osfmk/kern/ledger.h @@ -140,6 +140,7 @@ extern kern_return_t ledger_reset_callback_state(ledger_t ledger, int entry); extern kern_return_t ledger_disable_panic_on_negative(ledger_t ledger, int entry); extern kern_return_t ledger_rollup(ledger_t to_ledger, ledger_t from_ledger); +extern kern_return_t ledger_rollup_entry(ledger_t to_ledger, ledger_t from_ledger, int entry); extern void ledger_ast(thread_t thread); diff --git a/osfmk/kern/task.c b/osfmk/kern/task.c index cc8159895..4be2588be 100644 --- a/osfmk/kern/task.c +++ b/osfmk/kern/task.c @@ -220,6 +220,7 @@ extern kern_return_t iokit_task_terminate(task_t task); extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *); extern void bsd_copythreadname(void *dst_uth, void *src_uth); +extern kern_return_t thread_resume(thread_t thread); // Warn tasks when they hit 80% of their memory limit. #define PHYS_FOOTPRINT_WARNING_LEVEL 80 @@ -411,6 +412,88 @@ task_bank_init(__unused task_t task) { } +void +task_set_did_exec_flag(task_t task) +{ + task->t_procflags |= TPF_DID_EXEC; +} + +void +task_clear_exec_copy_flag(task_t task) +{ + task->t_procflags &= ~TPF_EXEC_COPY; +} + +/* + * This wait event is t_procflags instead of t_flags because t_flags is volatile + * + * TODO: store the flags in the same place as the event + * rdar://problem/28501994 + */ +event_t +task_get_return_wait_event(task_t task) +{ + return (event_t)&task->t_procflags; +} + +void +task_clear_return_wait(task_t task) +{ + task_lock(task); + + task->t_flags &= ~TF_LRETURNWAIT; + + if (task->t_flags & TF_LRETURNWAITER) { + thread_wakeup(task_get_return_wait_event(task)); + task->t_flags &= ~TF_LRETURNWAITER; + } + + task_unlock(task); +} + +void +task_wait_to_return(void) +{ + task_t task; + + task = current_task(); + task_lock(task); + + if (task->t_flags & TF_LRETURNWAIT) { + do { + task->t_flags |= TF_LRETURNWAITER; + assert_wait(task_get_return_wait_event(task), THREAD_UNINT); + task_unlock(task); + + thread_block(THREAD_CONTINUE_NULL); + + task_lock(task); + } while (task->t_flags & TF_LRETURNWAIT); + } + + task_unlock(task); + + thread_bootstrap_return(); +} + +boolean_t +task_is_exec_copy(task_t task) +{ + return task_is_exec_copy_internal(task); +} + +boolean_t +task_did_exec(task_t task) +{ + return task_did_exec_internal(task); +} + +boolean_t +task_is_active(task_t task) +{ + return task->active; +} + #if TASK_REFERENCE_LEAK_DEBUG #include @@ -602,9 +685,9 @@ task_init(void) * Create the kernel task as the first task. */ #ifdef __LP64__ - if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, TF_NONE, &kernel_task) != KERN_SUCCESS) + if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS) #else - if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, TF_NONE, &kernel_task) != KERN_SUCCESS) + if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS) #endif panic("task_init\n"); @@ -857,6 +940,7 @@ task_create_internal( boolean_t inherit_memory, boolean_t is_64bit, uint32_t t_flags, + uint32_t t_procflags, task_t *child_task) /* OUT */ { task_t new_task; @@ -909,6 +993,7 @@ task_create_internal( new_task->user_data = NULL; new_task->priv_flags = 0; new_task->t_flags = t_flags; + new_task->t_procflags = t_procflags; new_task->importance = 0; new_task->corpse_info_kernel = NULL; new_task->exec_token = 0; @@ -1020,24 +1105,27 @@ task_create_internal( #if IMPORTANCE_INHERITANCE ipc_importance_task_t new_task_imp = IIT_NULL; + boolean_t inherit_receive = TRUE; if (task_is_marked_importance_donor(parent_task)) { new_task_imp = ipc_importance_for_task(new_task, FALSE); assert(IIT_NULL != new_task_imp); ipc_importance_task_mark_donor(new_task_imp, TRUE); } - /* Embedded doesn't want this to inherit */ - if (task_is_marked_importance_receiver(parent_task)) { - if (IIT_NULL == new_task_imp) - new_task_imp = ipc_importance_for_task(new_task, FALSE); - assert(IIT_NULL != new_task_imp); - ipc_importance_task_mark_receiver(new_task_imp, TRUE); - } - if (task_is_marked_importance_denap_receiver(parent_task)) { - if (IIT_NULL == new_task_imp) - new_task_imp = ipc_importance_for_task(new_task, FALSE); - assert(IIT_NULL != new_task_imp); - ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE); + + if (inherit_receive) { + if (task_is_marked_importance_receiver(parent_task)) { + if (IIT_NULL == new_task_imp) + new_task_imp = ipc_importance_for_task(new_task, FALSE); + assert(IIT_NULL != new_task_imp); + ipc_importance_task_mark_receiver(new_task_imp, TRUE); + } + if (task_is_marked_importance_denap_receiver(parent_task)) { + if (IIT_NULL == new_task_imp) + new_task_imp = ipc_importance_for_task(new_task, FALSE); + assert(IIT_NULL != new_task_imp); + ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE); + } } if (IIT_NULL != new_task_imp) { @@ -1086,35 +1174,7 @@ task_create_internal( /* Copy resource acc. info from Parent for Corpe Forked task. */ if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) { - new_task->total_user_time = parent_task->total_user_time; - new_task->total_system_time = parent_task->total_system_time; - ledger_rollup(new_task->ledger, parent_task->ledger); - new_task->faults = parent_task->faults; - new_task->pageins = parent_task->pageins; - new_task->cow_faults = parent_task->cow_faults; - new_task->messages_sent = parent_task->messages_sent; - new_task->messages_received = parent_task->messages_received; - new_task->syscalls_mach = parent_task->syscalls_mach; - new_task->syscalls_unix = parent_task->syscalls_unix; - new_task->c_switch = parent_task->c_switch; - new_task->p_switch = parent_task->p_switch; - new_task->ps_switch = parent_task->ps_switch; - new_task->extmod_statistics = parent_task->extmod_statistics; - new_task->low_mem_notified_warn = parent_task->low_mem_notified_warn; - new_task->low_mem_notified_critical = parent_task->low_mem_notified_critical; - new_task->purged_memory_warn = parent_task->purged_memory_warn; - new_task->purged_memory_critical = parent_task->purged_memory_critical; - new_task->low_mem_privileged_listener = parent_task->low_mem_privileged_listener; - *new_task->task_io_stats = *parent_task->task_io_stats; - new_task->cpu_time_qos_stats = parent_task->cpu_time_qos_stats; - new_task->task_timer_wakeups_bin_1 = parent_task->task_timer_wakeups_bin_1; - new_task->task_timer_wakeups_bin_2 = parent_task->task_timer_wakeups_bin_2; - new_task->task_gpu_ns = parent_task->task_gpu_ns; - new_task->task_immediate_writes = parent_task->task_immediate_writes; - new_task->task_deferred_writes = parent_task->task_deferred_writes; - new_task->task_invalidated_writes = parent_task->task_invalidated_writes; - new_task->task_metadata_writes = parent_task->task_metadata_writes; - new_task->task_energy = parent_task->task_energy; + task_rollup_accounting_info(new_task, parent_task); } else { /* Initialize to zero for standard fork/spawn case */ new_task->total_user_time = 0; @@ -1205,6 +1265,63 @@ task_create_internal( return(KERN_SUCCESS); } +/* + * task_rollup_accounting_info + * + * Roll up accounting stats. Used to rollup stats + * for exec copy task and corpse fork. + */ +void +task_rollup_accounting_info(task_t to_task, task_t from_task) +{ + assert(from_task != to_task); + + to_task->total_user_time = from_task->total_user_time; + to_task->total_system_time = from_task->total_system_time; + to_task->faults = from_task->faults; + to_task->pageins = from_task->pageins; + to_task->cow_faults = from_task->cow_faults; + to_task->messages_sent = from_task->messages_sent; + to_task->messages_received = from_task->messages_received; + to_task->syscalls_mach = from_task->syscalls_mach; + to_task->syscalls_unix = from_task->syscalls_unix; + to_task->c_switch = from_task->c_switch; + to_task->p_switch = from_task->p_switch; + to_task->ps_switch = from_task->ps_switch; + to_task->extmod_statistics = from_task->extmod_statistics; + to_task->low_mem_notified_warn = from_task->low_mem_notified_warn; + to_task->low_mem_notified_critical = from_task->low_mem_notified_critical; + to_task->purged_memory_warn = from_task->purged_memory_warn; + to_task->purged_memory_critical = from_task->purged_memory_critical; + to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener; + *to_task->task_io_stats = *from_task->task_io_stats; + to_task->cpu_time_qos_stats = from_task->cpu_time_qos_stats; + to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1; + to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2; + to_task->task_gpu_ns = from_task->task_gpu_ns; + to_task->task_immediate_writes = from_task->task_immediate_writes; + to_task->task_deferred_writes = from_task->task_deferred_writes; + to_task->task_invalidated_writes = from_task->task_invalidated_writes; + to_task->task_metadata_writes = from_task->task_metadata_writes; + to_task->task_energy = from_task->task_energy; + + /* Skip ledger roll up for memory accounting entries */ + ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time); + ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.platform_idle_wakeups); + ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.interrupt_wakeups); +#if CONFIG_SCHED_SFI + for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) { + ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.sfi_wait_times[class_id]); + } +#endif +#if CONFIG_BANK + ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_me); + ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_others); +#endif + ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.physical_writes); + ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.logical_writes); +} + int task_dropped_imp_count = 0; /* @@ -1974,7 +2091,7 @@ task_terminate_internal( } #ifdef MACH_BSD - if (task->bsd_info != NULL) { + if (task->bsd_info != NULL && !task_is_exec_copy(task)) { pid = proc_pid(task->bsd_info); } #endif /* MACH_BSD */ @@ -2045,7 +2162,7 @@ task_terminate_internal( * and we have to report it. */ char procname[17]; - if (task->bsd_info) { + if (task->bsd_info && !task_is_exec_copy(task)) { pid = proc_pid(task->bsd_info); proc_name_kdp(task, procname, sizeof (procname)); } else { @@ -2185,6 +2302,9 @@ task_start_halt_locked(task_t task, boolean_t should_mark_corpse) * Complete task halt by waiting for threads to terminate, then clean * up task resources (VM, port namespace, etc...) and then let the * current thread go in the (practically empty) task context. + * + * Note: task->halting flag is not cleared in order to avoid creation + * of new thread in old exec'ed task. */ void task_complete_halt(task_t task) @@ -2239,8 +2359,6 @@ task_complete_halt(task_t task) * at worst someone is racing a SUID exec. */ iokit_task_terminate(task); - - task->halting = FALSE; } /* diff --git a/osfmk/kern/task.h b/osfmk/kern/task.h index 3449f26c7..a3c5edb6b 100644 --- a/osfmk/kern/task.h +++ b/osfmk/kern/task.h @@ -249,6 +249,9 @@ struct task { #define TF_CORPSE 0x00000020 /* task is a corpse */ #define TF_PENDING_CORPSE 0x00000040 /* task corpse has not been reported yet */ #define TF_CORPSE_FORK 0x00000080 /* task is a forked corpse */ +#define TF_LRETURNWAIT 0x00000100 /* task is waiting for fork/posix_spawn/exec to complete */ +#define TF_LRETURNWAITER 0x00000200 /* task is waiting for TF_LRETURNWAIT to get cleared */ + #define task_has_64BitAddr(task) \ (((task)->t_flags & TF_64B_ADDR) != 0) @@ -277,6 +280,17 @@ struct task { #define task_is_a_corpse_fork(task) \ (((task)->t_flags & TF_CORPSE_FORK) != 0) + uint32_t t_procflags; /* general-purpose task flags protected by proc_lock (PL) */ +#define TPF_NONE 0 +#define TPF_DID_EXEC 0x00000001 /* task has been execed to a new task */ +#define TPF_EXEC_COPY 0x00000002 /* task is the new copy of an exec */ + +#define task_did_exec_internal(task) \ + (((task)->t_procflags & TPF_DID_EXEC) != 0) + +#define task_is_exec_copy_internal(task) \ + (((task)->t_procflags & TPF_EXEC_COPY) != 0) + mach_vm_address_t all_image_info_addr; /* dyld __all_image_info */ mach_vm_size_t all_image_info_size; /* section location and size */ @@ -443,6 +457,12 @@ extern task_t current_task(void); extern void task_reference(task_t task); #define TF_NONE 0 +#define TF_LRETURNWAIT 0x00000100 /* task is waiting for fork/posix_spawn/exec to complete */ +#define TF_LRETURNWAITER 0x00000200 /* task is waiting for TF_LRETURNWAIT to get cleared */ + +#define TPF_NONE 0 +#define TPF_EXEC_COPY 0x00000002 /* task is the new copy of an exec */ + __END_DECLS @@ -528,6 +548,7 @@ extern kern_return_t task_create_internal( boolean_t inherit_memory, boolean_t is_64bit, uint32_t flags, + uint32_t procflags, task_t *child_task); /* OUT */ @@ -667,7 +688,16 @@ thread_t task_findtid(task_t task, uint64_t tid); extern kern_return_t task_wakeups_monitor_ctl(task_t task, uint32_t *rate_hz, int32_t *flags); extern kern_return_t task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags); +extern void task_rollup_accounting_info(task_t new_task, task_t parent_task); extern kern_return_t task_io_monitor_ctl(task_t task, uint32_t *flags); +extern void task_set_did_exec_flag(task_t task); +extern void task_clear_exec_copy_flag(task_t task); +extern boolean_t task_is_exec_copy(task_t); +extern boolean_t task_did_exec(task_t task); +extern boolean_t task_is_active(task_t task); +extern void task_clear_return_wait(task_t task); +extern void task_wait_to_return(void); +extern event_t task_get_return_wait_event(task_t task); extern void task_atm_reset(task_t task); extern void task_bank_reset(task_t task); diff --git a/osfmk/kern/task_policy.c b/osfmk/kern/task_policy.c index 6d9f28919..dcd6fc472 100644 --- a/osfmk/kern/task_policy.c +++ b/osfmk/kern/task_policy.c @@ -664,7 +664,8 @@ task_policy_create(task_t task, task_t parent_task) task->requested_policy.trp_terminated = parent_task->requested_policy.trp_terminated; task->requested_policy.trp_qos_clamp = parent_task->requested_policy.trp_qos_clamp; - if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) { + if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE && !task_is_exec_copy(task)) { + /* Do not update the apptype for exec copy task */ if (parent_task->requested_policy.trp_boosted) { task->requested_policy.trp_apptype = TASK_APPTYPE_DAEMON_INTERACTIVE; task_importance_mark_donor(task, TRUE); diff --git a/osfmk/kern/telemetry.c b/osfmk/kern/telemetry.c index bb58493d4..2e292d1dd 100644 --- a/osfmk/kern/telemetry.c +++ b/osfmk/kern/telemetry.c @@ -66,6 +66,8 @@ extern uint64_t proc_uniqueid(void *p); extern uint64_t proc_was_throttled(void *p); extern uint64_t proc_did_throttle(void *p); extern int proc_selfpid(void); +extern boolean_t task_did_exec(task_t task); +extern boolean_t task_is_exec_copy(task_t task); struct micro_snapshot_buffer { vm_offset_t buffer; @@ -366,7 +368,7 @@ void telemetry_take_sample(thread_t thread, uint8_t microsnapshot_flags, struct return; task = thread->task; - if ((task == TASK_NULL) || (task == kernel_task)) + if ((task == TASK_NULL) || (task == kernel_task) || task_did_exec(task) || task_is_exec_copy(task)) return; /* diff --git a/osfmk/kern/thread.c b/osfmk/kern/thread.c index 5a703f62f..bf0749afc 100644 --- a/osfmk/kern/thread.c +++ b/osfmk/kern/thread.c @@ -420,6 +420,12 @@ thread_init(void) init_thread_ledgers(); } +boolean_t +thread_is_active(thread_t thread) +{ + return (thread->active); +} + void thread_corpse_continue(void) { @@ -504,7 +510,7 @@ thread_terminate_self(void) uthread_cleanup(task, thread->uthread, task->bsd_info); threadcnt = hw_atomic_sub(&task->active_thread_count, 1); - if (task->bsd_info) { + if (task->bsd_info && !task_is_exec_copy(task)) { /* trace out pid before we sign off */ long dbg_arg1 = 0; @@ -518,7 +524,7 @@ thread_terminate_self(void) * If we are the last thread to terminate and the task is * associated with a BSD process, perform BSD process exit. */ - if (threadcnt == 0 && task->bsd_info != NULL) { + if (threadcnt == 0 && task->bsd_info != NULL && !task_is_exec_copy(task)) { mach_exception_data_type_t subcode = 0; { /* since we're the last thread in this process, trace out the command name too */ @@ -1227,9 +1233,24 @@ thread_create_internal( kdbg_trace_data(parent_task->bsd_info, &dbg_arg2); + /* + * Starting with 26604425, exec'ing creates a new task/thread. + * + * NEWTHREAD in the current process has two possible meanings: + * + * 1) Create a new thread for this process. + * 2) Create a new thread for the future process this will become in an exec. + * + * To disambiguate these, arg3 will be set to TRUE for case #2. + * + * The value we need to find (TPF_EXEC_COPY) is stable in the case of a + * task exec'ing. The read of t_procflags does not take the proc_lock. + */ + dbg_arg3 = (task_is_exec_copy(parent_task)) ? TRUE : 0; + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, TRACE_DATA_NEWTHREAD | DBG_FUNC_NONE, - (vm_address_t)(uintptr_t)thread_tid(new_thread), dbg_arg2, 0, 0, 0); + (vm_address_t)(uintptr_t)thread_tid(new_thread), dbg_arg2, dbg_arg3, 0, 0); kdbg_trace_string(parent_task->bsd_info, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4); @@ -1308,6 +1329,56 @@ thread_create_with_continuation( return thread_create_internal2(task, new_thread, FALSE, continuation); } +/* + * Create a thread that is already started, but is waiting on an event + */ +static kern_return_t +thread_create_waiting_internal( + task_t task, + thread_continue_t continuation, + event_t event, + int options, + thread_t *new_thread) +{ + kern_return_t result; + thread_t thread; + + if (task == TASK_NULL || task == kernel_task) + return (KERN_INVALID_ARGUMENT); + + result = thread_create_internal(task, -1, continuation, options, &thread); + if (result != KERN_SUCCESS) + return (result); + + /* note no user_stop_count or thread_hold here */ + + if (task->suspend_count > 0) + thread_hold(thread); + + thread_mtx_lock(thread); + thread_start_in_assert_wait(thread, event, THREAD_INTERRUPTIBLE); + thread_mtx_unlock(thread); + + task_unlock(task); + lck_mtx_unlock(&tasks_threads_lock); + + *new_thread = thread; + + return (KERN_SUCCESS); +} + +kern_return_t +thread_create_waiting( + task_t task, + thread_continue_t continuation, + event_t event, + thread_t *new_thread) +{ + return thread_create_waiting_internal(task, continuation, event, + TH_OPTION_NONE, new_thread); +} + + static kern_return_t thread_create_running_internal2( task_t task, @@ -1422,34 +1493,14 @@ thread_create_workq( kern_return_t thread_create_workq_waiting( task_t task, - thread_continue_t thread_return, + thread_continue_t continuation, event_t event, thread_t *new_thread) { - thread_t thread; - kern_return_t result; - - if (task == TASK_NULL || task == kernel_task) - return KERN_INVALID_ARGUMENT; - - result = thread_create_internal(task, -1, thread_return, TH_OPTION_NOCRED | TH_OPTION_NOSUSP, &thread); - - if (result != KERN_SUCCESS) - return result; - - if (task->suspend_count > 0) - thread_hold(thread); - - thread_mtx_lock(thread); - thread_start_in_assert_wait(thread, event, THREAD_INTERRUPTIBLE); - thread_mtx_unlock(thread); - - task_unlock(task); - lck_mtx_unlock(&tasks_threads_lock); - - *new_thread = thread; - return result; + return thread_create_waiting_internal(task, continuation, event, + TH_OPTION_NOCRED | TH_OPTION_NOSUSP, + new_thread); } /* diff --git a/osfmk/kern/thread.h b/osfmk/kern/thread.h index c27489677..368238042 100644 --- a/osfmk/kern/thread.h +++ b/osfmk/kern/thread.h @@ -621,6 +621,8 @@ extern void thread_release( extern void thread_corpse_continue(void); +extern boolean_t thread_is_active(thread_t thread); + /* Locking for scheduler state, always acquired with interrupts disabled (splsched()) */ #if __SMP__ #define thread_lock_init(th) simple_lock_init(&(th)->sched_lock, 0) @@ -846,6 +848,11 @@ extern kern_return_t thread_create_with_continuation( thread_t *new_thread, thread_continue_t continuation); +extern kern_return_t thread_create_waiting(task_t task, + thread_continue_t continuation, + event_t event, + thread_t *new_thread); + extern kern_return_t thread_create_workq( task_t task, thread_continue_t thread_return, diff --git a/osfmk/kperf/action.c b/osfmk/kperf/action.c index db437d771..34098fa93 100644 --- a/osfmk/kperf/action.c +++ b/osfmk/kperf/action.c @@ -336,6 +336,11 @@ kperf_thread_ast_handler(thread_t thread) task_t task = get_threadtask(thread); + if (task_did_exec(task) || task_is_exec_copy(task)) { + BUF_INFO(PERF_AST_HNDLR | DBG_FUNC_END, SAMPLE_CONTINUE); + return; + } + /* make a context, take a sample */ struct kperf_context ctx; ctx.cur_thread = thread; diff --git a/osfmk/mach/vm_statistics.h b/osfmk/mach/vm_statistics.h index eac764903..0c4623d15 100644 --- a/osfmk/mach/vm_statistics.h +++ b/osfmk/mach/vm_statistics.h @@ -494,6 +494,11 @@ typedef struct pmap_statistics *pmap_statistics_t; /* DHMM data */ #define VM_MEMORY_DHMM 84 +#if !(defined(RC_HIDE_XNU_J79) || defined(RC_HIDE_XNU_J80)) +/* memory needed for DFR related actions */ +#define VM_MEMORY_DFR 85 +#endif // !(defined(RC_HIDE_XNU_J79) || defined(RC_HIDE_XNU_J80)) + /* memory allocated by SceneKit.framework */ #define VM_MEMORY_SCENEKIT 86 diff --git a/osfmk/vm/vm_compressor.c b/osfmk/vm/vm_compressor.c index bf333dca4..206046a2f 100644 --- a/osfmk/vm/vm_compressor.c +++ b/osfmk/vm/vm_compressor.c @@ -3519,9 +3519,9 @@ vm_compressor_get(ppnum_t pn, int *slot, int flags) *dptr++ = data; } #endif - c_segment_sv_hash_drop_ref(slot_ptr->s_cindx); - if ( !(flags & C_KEEP)) { + c_segment_sv_hash_drop_ref(slot_ptr->s_cindx); + OSAddAtomic(-1, &c_segment_pages_compressed); *slot = 0; } diff --git a/osfmk/vm/vm_pageout.c b/osfmk/vm/vm_pageout.c index 1a8aa3558..49f7e2527 100644 --- a/osfmk/vm/vm_pageout.c +++ b/osfmk/vm/vm_pageout.c @@ -2726,7 +2726,18 @@ consider_inactive: bg_m_object = VM_PAGE_OBJECT(m); - if (force_anonymous == FALSE || bg_m_object->internal) { + if (!VM_PAGE_PAGEABLE(m)) { + /* + * This page is on the background queue + * but not on a pageable queue. This is + * likely a transient state and whoever + * took it out of its pageable queue + * will likely put it back on a pageable + * queue soon but we can't deal with it + * at this point, so let's ignore this + * page. + */ + } else if (force_anonymous == FALSE || bg_m_object->internal) { ignore_reference = TRUE; if (bg_m_object->internal) diff --git a/osfmk/vm/vm_resident.c b/osfmk/vm/vm_resident.c index fd55a91d1..b378ebaaa 100644 --- a/osfmk/vm/vm_resident.c +++ b/osfmk/vm/vm_resident.c @@ -2854,6 +2854,7 @@ vm_page_grab_secluded(void) if (mem->dirty || mem->precious) { /* can't grab a dirty page; re-activate */ // printf("SECLUDED: dirty page %p\n", mem); + PAGE_WAKEUP_DONE(mem); vm_page_secluded.grab_failure_dirty++; vm_object_unlock(object); goto reactivate_secluded_page; @@ -2861,6 +2862,9 @@ vm_page_grab_secluded(void) if (mem->reference) { /* it's been used but we do need to grab a page... */ } + /* page could still be on vm_page_queue_background... */ + vm_page_free_prepare_queues(mem); + vm_page_unlock_queues(); /* finish what vm_page_free() would have done... */ @@ -7542,12 +7546,17 @@ vm_page_queues_remove(vm_page_t mem, boolean_t __unused remove_from_backgroundq) { assert(mem->pageq.next == 0 && mem->pageq.prev == 0); #if CONFIG_BACKGROUND_QUEUE - if (mem->vm_page_on_backgroundq == FALSE) { - assert(mem->vm_page_backgroundq.next == 0 && - mem->vm_page_backgroundq.prev == 0 && - mem->vm_page_on_backgroundq == FALSE); + if (remove_from_backgroundq == TRUE) { + vm_page_remove_from_backgroundq(mem); } -#endif + if (mem->vm_page_on_backgroundq) { + assert(mem->vm_page_backgroundq.next != 0); + assert(mem->vm_page_backgroundq.prev != 0); + } else { + assert(mem->vm_page_backgroundq.next == 0); + assert(mem->vm_page_backgroundq.prev == 0); + } +#endif /* CONFIG_BACKGROUND_QUEUE */ return; } if (mem->vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR) diff --git a/pexpert/i386/pe_init.c b/pexpert/i386/pe_init.c index 10a9fb24d..f1e24b31e 100644 --- a/pexpert/i386/pe_init.c +++ b/pexpert/i386/pe_init.c @@ -335,6 +335,11 @@ PE_reboot_on_panic(void) return FALSE; } +void +PE_sync_panic_buffers(void) +{ +} + /* rdar://problem/21244753 */ uint32_t PE_i_can_has_debugger(uint32_t *debug_flags) diff --git a/pexpert/pexpert/pexpert.h b/pexpert/pexpert/pexpert.h index 295bf6b33..e9e6c463d 100644 --- a/pexpert/pexpert/pexpert.h +++ b/pexpert/pexpert/pexpert.h @@ -348,6 +348,7 @@ extern boolean_t PE_imgsrc_mount_supported(void); #if KERNEL_PRIVATE boolean_t PE_reboot_on_panic(void); +void PE_sync_panic_buffers(void); #endif __END_DECLS diff --git a/security/mac_framework.h b/security/mac_framework.h index c71d12228..e64a43dd4 100644 --- a/security/mac_framework.h +++ b/security/mac_framework.h @@ -483,10 +483,8 @@ int mac_vnode_check_exchangedata(vfs_context_t ctx, struct vnode *v1, int mac_vnode_check_exec(vfs_context_t ctx, struct vnode *vp, struct image_params *imgp); int mac_vnode_check_fsgetpath(vfs_context_t ctx, struct vnode *vp); -int mac_vnode_check_signature(struct vnode *vp, - struct cs_blob *cs_blob, struct image_params *imgp, - unsigned int *cs_flags, - int flags); +int mac_vnode_check_getattr(vfs_context_t ctx, struct ucred *file_cred, + struct vnode *vp, struct vnode_attr *va); int mac_vnode_check_getattrlist(vfs_context_t ctx, struct vnode *vp, struct attrlist *alist); int mac_vnode_check_getextattr(vfs_context_t ctx, struct vnode *vp, @@ -530,6 +528,10 @@ int mac_vnode_check_setowner(vfs_context_t ctx, struct vnode *vp, uid_t uid, gid_t gid); int mac_vnode_check_setutimes(vfs_context_t ctx, struct vnode *vp, struct timespec atime, struct timespec mtime); +int mac_vnode_check_signature(struct vnode *vp, + struct cs_blob *cs_blob, struct image_params *imgp, + unsigned int *cs_flags, + int flags); int mac_vnode_check_stat(vfs_context_t ctx, kauth_cred_t file_cred, struct vnode *vp); int mac_vnode_check_truncate(vfs_context_t ctx, diff --git a/security/mac_policy.h b/security/mac_policy.h index ba8355e21..2d5f1b348 100644 --- a/security/mac_policy.h +++ b/security/mac_policy.h @@ -1737,6 +1737,9 @@ typedef int mpo_mount_check_fsctl_t( @return Return 0 if access is granted, otherwise an appropriate value for errno should be returned. + + @note Policies may change the contents of vfa to alter the list of + file system attributes returned. */ typedef int mpo_mount_check_getattr_t( @@ -4660,25 +4663,34 @@ typedef int mpo_vnode_check_fsgetpath_t( struct label *label ); /** - @brief Access control check after determining the code directory hash - @param vp vnode vnode to combine into proc - @param label label associated with the vnode - @param cs_blob the code signature to check - @param cs_flags update code signing flags if needed - @param flags operational flag to mpo_vnode_check_signature - @param fatal_failure_desc description of fatal failure - @param fatal_failure_desc_len failure description len, failure is fatal if non-0 + @brief Access control check for retrieving file attributes + @param active_cred Subject credential + @param file_cred Credential associated with the struct fileproc + @param vp Object vnode + @param vlabel Policy label for vp + @param va Vnode attributes to retrieve + + Determine whether the subject identified by the credential can + get information about the passed vnode. The active_cred hold + the credentials of the subject performing the operation, and + file_cred holds the credentials of the subject that originally + opened the file. This check happens during stat(), lstat(), + fstat(), and getattrlist() syscalls. See for + definitions of the attributes. @return Return 0 if access is granted, otherwise an appropriate value for errno should be returned. - */ -typedef int mpo_vnode_check_signature_t( + + @note Policies may change the contents of va to alter the list of + file attributes returned. +*/ +typedef int mpo_vnode_check_getattr_t( + kauth_cred_t active_cred, + kauth_cred_t file_cred, /* NULLOK */ struct vnode *vp, - struct label *label, - struct cs_blob *cs_blob, - unsigned int *cs_flags, - int flags, - char **fatal_failure_desc, size_t *fatal_failure_desc_len); + struct label *vlabel, + struct vnode_attr *va +); /** @brief Access control check for retrieving file attributes @param cred Subject credential @@ -5245,6 +5257,27 @@ typedef int mpo_vnode_check_setutimes_t( struct timespec atime, struct timespec mtime ); +/** + @brief Access control check after determining the code directory hash + @param vp vnode vnode to combine into proc + @param label label associated with the vnode + @param cs_blob the code signature to check + @param cs_flags update code signing flags if needed + @param flags operational flag to mpo_vnode_check_signature + @param fatal_failure_desc description of fatal failure + @param fatal_failure_desc_len failure description len, failure is fatal if non-0 + + @return Return 0 if access is granted, otherwise an appropriate value for + errno should be returned. + */ +typedef int mpo_vnode_check_signature_t( + struct vnode *vp, + struct label *label, + struct cs_blob *cs_blob, + unsigned int *cs_flags, + int flags, + char **fatal_failure_desc, size_t *fatal_failure_desc_len +); /** @brief Access control check for stat @param active_cred Subject credential @@ -6138,7 +6171,7 @@ typedef void mpo_reserved_hook_t(void); * Please note that this should be kept in sync with the check assumptions * policy in bsd/kern/policy_check.c (policy_ops struct). */ -#define MAC_POLICY_OPS_VERSION 45 /* inc when new reserved slots are taken */ +#define MAC_POLICY_OPS_VERSION 46 /* inc when new reserved slots are taken */ struct mac_policy_ops { mpo_audit_check_postselect_t *mpo_audit_check_postselect; mpo_audit_check_preselect_t *mpo_audit_check_preselect; @@ -6285,17 +6318,17 @@ struct mac_policy_ops { mpo_proc_check_set_host_exception_port_t *mpo_proc_check_set_host_exception_port; mpo_exc_action_check_exception_send_t *mpo_exc_action_check_exception_send; mpo_exc_action_label_associate_t *mpo_exc_action_label_associate; - mpo_exc_action_label_copy_t *mpo_exc_action_label_copy; - mpo_exc_action_label_destroy_t *mpo_exc_action_label_destroy; - mpo_exc_action_label_init_t *mpo_exc_action_label_init; - mpo_exc_action_label_update_t *mpo_exc_action_label_update; - - mpo_reserved_hook_t *mpo_reserved17; - mpo_reserved_hook_t *mpo_reserved18; - mpo_reserved_hook_t *mpo_reserved19; - mpo_reserved_hook_t *mpo_reserved20; - mpo_reserved_hook_t *mpo_reserved21; - mpo_reserved_hook_t *mpo_reserved22; + mpo_exc_action_label_copy_t *mpo_exc_action_label_copy; + mpo_exc_action_label_destroy_t *mpo_exc_action_label_destroy; + mpo_exc_action_label_init_t *mpo_exc_action_label_init; + mpo_exc_action_label_update_t *mpo_exc_action_label_update; + + mpo_reserved_hook_t *mpo_reserved1; + mpo_reserved_hook_t *mpo_reserved2; + mpo_reserved_hook_t *mpo_reserved3; + mpo_reserved_hook_t *mpo_reserved4; + mpo_reserved_hook_t *mpo_reserved5; + mpo_reserved_hook_t *mpo_reserved6; mpo_posixsem_check_create_t *mpo_posixsem_check_create; mpo_posixsem_check_open_t *mpo_posixsem_check_open; @@ -6373,7 +6406,7 @@ struct mac_policy_ops { mpo_system_check_settime_t *mpo_system_check_settime; mpo_system_check_swapoff_t *mpo_system_check_swapoff; mpo_system_check_swapon_t *mpo_system_check_swapon; - mpo_reserved_hook_t *mpo_reserved31; + mpo_reserved_hook_t *mpo_reserved7; mpo_sysvmsg_label_associate_t *mpo_sysvmsg_label_associate; mpo_sysvmsg_label_destroy_t *mpo_sysvmsg_label_destroy; @@ -6406,9 +6439,9 @@ struct mac_policy_ops { mpo_sysvshm_label_init_t *mpo_sysvshm_label_init; mpo_sysvshm_label_recycle_t *mpo_sysvshm_label_recycle; - mpo_reserved_hook_t *mpo_reserved23; - mpo_reserved_hook_t *mpo_reserved24; - mpo_reserved_hook_t *mpo_reserved25; + mpo_reserved_hook_t *mpo_reserved8; + mpo_reserved_hook_t *mpo_reserved9; + mpo_vnode_check_getattr_t *mpo_vnode_check_getattr; mpo_mount_check_snapshot_create_t *mpo_mount_check_snapshot_create; mpo_mount_check_snapshot_delete_t *mpo_mount_check_snapshot_delete; mpo_vnode_check_clone_t *mpo_vnode_check_clone; diff --git a/security/mac_vfs.c b/security/mac_vfs.c index 7d7d6ea9a..1f88f57a3 100644 --- a/security/mac_vfs.c +++ b/security/mac_vfs.c @@ -995,6 +995,26 @@ mac_vnode_check_getacl(vfs_context_t ctx, struct vnode *vp, acl_type_t type) } #endif +int +mac_vnode_check_getattr(vfs_context_t ctx, struct ucred *file_cred, + struct vnode *vp, struct vnode_attr *va) +{ + kauth_cred_t cred; + int error; + +#if SECURITY_MAC_CHECK_ENFORCE + /* 21167099 - only check if we allow write */ + if (!mac_vnode_enforce) + return 0; +#endif + if (!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE)) + return 0; + + cred = vfs_context_ucred(ctx); + MAC_CHECK(vnode_check_getattr, cred, file_cred, vp, vp->v_label, va); + return (error); +} + int mac_vnode_check_getattrlist(vfs_context_t ctx, struct vnode *vp, struct attrlist *alist) diff --git a/tools/tests/darwintests/suspended_spawn_26184412.c b/tools/tests/darwintests/suspended_spawn_26184412.c new file mode 100644 index 000000000..977e96dc8 --- /dev/null +++ b/tools/tests/darwintests/suspended_spawn_26184412.c @@ -0,0 +1,101 @@ + + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Test to validate that suspended-spawn DTRTs when a SIGKILL is recieved + * while the process is waiting for SIGCONT. + * + * Also test that suspended-spawn correctly looks like a SIGSTOP while it's suspended. + * + * posix_spawn non-exec with POSIX_SPAWN_START_SUSPENDED, then killing instead of SIGCONT-ing causes unkillable hung processes + */ + +static void +spawn_and_signal(int signal) +{ + /* do not buffer output to stdout */ + setvbuf(stdout, NULL, _IONBF, 0); + + int ret; + posix_spawnattr_t attr; + + ret = posix_spawnattr_init(&attr); + T_QUIET; + T_ASSERT_POSIX_SUCCESS(ret, "posix_spawnattr_init"); + + ret = posix_spawnattr_setflags(&attr, POSIX_SPAWN_START_SUSPENDED); + T_QUIET; + T_ASSERT_POSIX_SUCCESS(ret, "posix_spawnattr_setflags"); + + char * const prog = "/usr/bin/true"; + char * const argv_child[] = { prog, NULL }; + pid_t child_pid; + extern char **environ; + + ret = posix_spawn(&child_pid, prog, NULL, &attr, argv_child, environ); + T_ASSERT_POSIX_SUCCESS(ret, "posix_spawn"); + + printf("parent: spawned child with pid %d\n", child_pid); + + ret = posix_spawnattr_destroy(&attr); + T_QUIET; + T_ASSERT_POSIX_SUCCESS(ret, "posix_spawnattr_destroy"); + + int status = 0; + int waitpid_result = waitpid(child_pid, &status, WUNTRACED|WNOHANG); + T_ASSERT_POSIX_SUCCESS(waitpid_result, "waitpid"); + + T_ASSERT_EQ(waitpid_result, child_pid, "waitpid should return child we spawned"); + + T_ASSERT_EQ(WIFEXITED(status), 0, "before SIGCONT: must not have exited"); + T_ASSERT_EQ(WIFSTOPPED(status), 1, "before SIGCONT: must be stopped"); + + printf("parent: continuing child process\n"); + + ret = kill(child_pid, signal); + T_ASSERT_POSIX_SUCCESS(ret, "kill(signal)"); + + printf("parent: waiting for child process\n"); + + status = 0; + waitpid_result = waitpid(child_pid, &status, 0); + T_ASSERT_POSIX_SUCCESS(waitpid_result, "waitpid"); + + T_ASSERT_EQ(waitpid_result, child_pid, "waitpid should return child we spawned"); + + if (signal == SIGKILL) { + T_ASSERT_EQ(WIFSIGNALED(status), 1, "child should have exited due to signal"); + T_ASSERT_EQ(WTERMSIG(status), SIGKILL, "child should have exited due to SIGKILL"); + } else { + T_ASSERT_EQ(WIFEXITED(status), 1, "child should have exited normally"); + T_ASSERT_EQ(WEXITSTATUS(status), EX_OK, "child should have exited with success"); + } + + printf("wait returned with pid %d, status %d\n", ret, status); +} + +T_DECL(suspended_spawn_continue, "Tests spawning a suspended process and continuing it", T_META_TIMEOUT(2)) +{ + spawn_and_signal(SIGCONT); +} + +T_DECL(suspended_spawn_kill, "Tests spawning a suspended process and killing it", T_META_TIMEOUT(2)) +{ + spawn_and_signal(SIGKILL); +} +