From 94ff46dc2849db4d43eaaf144872decc522aafb4 Mon Sep 17 00:00:00 2001 From: Apple Date: Fri, 10 Apr 2020 19:49:29 +0000 Subject: [PATCH] xnu-6153.41.3.tar.gz --- bsd/dev/dtrace/fbt_blacklist.c | 2 + bsd/dev/i386/sysctl.c | 6 + bsd/kern/bsd_init.c | 10 + bsd/kern/kdebug.c | 2 - bsd/kern/kern_control.c | 89 ++- bsd/kern/kern_event.c | 85 +- bsd/kern/kern_exec.c | 78 +- bsd/kern/kern_fork.c | 9 +- bsd/kern/kern_lockf.c | 85 +- bsd/kern/kern_memorystatus.c | 90 ++- bsd/kern/kern_synch.c | 1 - bsd/kern/trace_codes | 3 + bsd/kern/uipc_socket.c | 3 +- bsd/kern/uipc_syscalls.c | 16 + bsd/miscfs/specfs/spec_vnops.c | 18 +- bsd/net/if_ipsec.c | 13 +- bsd/net/if_ports_used.c | 17 +- bsd/net/if_ports_used.h | 15 +- bsd/net/kpi_interface.c | 16 +- bsd/net/kpi_interface.h | 4 + bsd/net/ndrv.c | 906 ++++++++++++---------- bsd/net/necp.c | 19 +- bsd/net/necp_client.c | 17 + bsd/net/net_kev.h | 4 + bsd/net/pf_if.c | 4 +- bsd/netinet/in.h | 3 + bsd/netinet/in_pcb.h | 5 - bsd/netinet/in_pcblist.c | 59 +- bsd/netinet/in_tclass.c | 26 + bsd/netinet/ip_output.c | 2 +- bsd/netinet/mptcp.c | 4 + bsd/netinet/mptcp_subr.c | 61 +- bsd/netinet/mptcp_usrreq.c | 3 + bsd/netinet/mptcp_var.h | 3 + bsd/netinet/raw_ip.c | 8 +- bsd/netinet/tcp_timer.c | 22 +- bsd/netinet/tcp_timer.h | 3 +- bsd/netinet/tcp_var.h | 1 + bsd/netinet/udp_usrreq.c | 84 +- bsd/netinet6/ah_input.c | 8 +- bsd/netinet6/esp_chachapoly.c | 2 +- bsd/netinet6/esp_core.c | 10 +- bsd/netinet6/esp_input.c | 6 +- bsd/netinet6/esp_rijndael.c | 10 +- bsd/netinet6/ipsec.c | 76 +- bsd/netinet6/ipsec.h | 23 +- bsd/netinet6/udp6_usrreq.c | 66 +- bsd/netkey/key.c | 158 +++- bsd/netkey/key.h | 1 + bsd/nfs/nfs_subs.c | 68 +- bsd/nfs/nfs_syscalls.c | 43 +- bsd/nfs/nfs_vnops.c | 2 +- bsd/pthread/pthread_workqueue.c | 6 + bsd/pthread/workqueue_internal.h | 12 +- bsd/security/audit/audit.c | 73 +- bsd/security/audit/audit.h | 23 +- bsd/sys/event.h | 6 +- bsd/sys/eventvar.h | 2 +- bsd/sys/kdebug.h | 11 +- bsd/sys/kern_memorystatus.h | 1 + bsd/sys/lockf.h | 1 + bsd/sys/socketvar.h | 1 + bsd/sys/user.h | 22 +- bsd/sys/vnode.h | 1 + bsd/tests/ctrr_test_sysctl.c | 30 - bsd/vfs/kpi_vfs.c | 50 ++ bsd/vfs/vfs_lookup.c | 164 +++- bsd/vfs/vfs_subr.c | 31 +- bsd/vfs/vfs_syscalls.c | 81 +- iokit/IOKit/pwr_mgt/IOPM.h | 6 +- iokit/IOKit/pwr_mgt/IOPMPrivate.h | 8 + iokit/Kernel/IOInterruptController.cpp | 4 +- iokit/Kernel/IOKitDebug.cpp | 4 + iokit/Kernel/IOPMrootDomain.cpp | 30 +- iokit/Kernel/IOService.cpp | 8 +- iokit/Kernel/IOUserServer.cpp | 1 + libkern/os/reason_private.h | 1 + libsyscall/wrappers/persona.c | 14 +- osfmk/arm/cswitch.s | 19 +- osfmk/arm/genassym.c | 5 +- osfmk/arm/machine_routines_asm.s | 2 +- osfmk/arm/pcb.c | 2 +- osfmk/arm/pmap.c | 154 +--- osfmk/arm/pmap.h | 5 +- osfmk/arm/status.c | 10 +- osfmk/arm/thread.h | 45 +- osfmk/arm64/cswitch.s | 3 +- osfmk/arm64/genassym.c | 1 - osfmk/arm64/locore.s | 3 +- osfmk/arm64/loose_ends.c | 17 +- osfmk/arm64/machine_routines_asm.s | 1 - osfmk/arm64/pcb.c | 1 - osfmk/arm64/platform_tests.c | 94 --- osfmk/arm64/proc_reg.h | 3 + osfmk/arm64/status.c | 1 - osfmk/corpses/corpse.c | 8 + osfmk/i386/cpu_data.h | 97 ++- osfmk/i386/cpu_topology.c | 120 ++- osfmk/i386/cpuid.c | 20 +- osfmk/i386/cpuid.h | 2 +- osfmk/i386/fpu.c | 38 +- osfmk/i386/fpu.h | 2 +- osfmk/i386/i386_init.c | 3 - osfmk/i386/proc_reg.h | 1 + osfmk/i386/trap.c | 168 ++-- osfmk/i386/trap_native.c | 8 + osfmk/i386/ucode.c | 2 - osfmk/ipc/ipc_mqueue.c | 22 +- osfmk/ipc/ipc_mqueue.h | 10 +- osfmk/ipc/ipc_notify.c | 2 +- osfmk/ipc/ipc_port.c | 159 ++-- osfmk/ipc/ipc_port.h | 27 +- osfmk/ipc/ipc_pset.c | 6 +- osfmk/ipc/mach_msg.c | 10 +- osfmk/ipc/mach_port.c | 14 +- osfmk/kern/backtrace.c | 4 +- osfmk/kern/debug.h | 2 + osfmk/kern/host.c | 4 + osfmk/kern/ipc_tt.c | 15 +- osfmk/kern/kalloc.c | 11 +- osfmk/kern/kpc_thread.c | 10 +- osfmk/kern/ledger.c | 13 +- osfmk/kern/ledger.h | 3 +- osfmk/kern/mach_node.c | 4 +- osfmk/kern/sched_prim.c | 7 + osfmk/kern/sched_prim.h | 2 + osfmk/kern/startup.c | 5 +- osfmk/kern/task.c | 27 +- osfmk/kern/task.h | 2 + osfmk/kern/thread.c | 2 +- osfmk/kern/thread.h | 76 +- osfmk/kern/timer.h | 8 +- osfmk/kern/turnstile.c | 12 +- osfmk/kern/waitq.c | 112 ++- osfmk/kern/waitq.h | 31 +- osfmk/kperf/action.c | 186 +++-- osfmk/kperf/action.h | 8 +- osfmk/kperf/ast.h | 15 +- osfmk/kperf/callstack.c | 13 +- osfmk/kperf/callstack.h | 3 +- osfmk/kperf/kperf.c | 13 - osfmk/kperf/kperf.h | 5 +- osfmk/kperf/kperf_kpc.c | 3 +- osfmk/kperf/pet.c | 5 +- osfmk/kperf/sample.h | 13 +- osfmk/kperf/thread_samplers.c | 6 +- osfmk/kperf/thread_samplers.h | 2 +- osfmk/mach/arm/thread_status.h | 25 + osfmk/mach/shared_region.h | 8 +- osfmk/mach/sync_policy.h | 7 +- osfmk/man/index.html | 448 ----------- osfmk/vm/vm_compressor.c | 5 - osfmk/vm/vm_fault.c | 5 +- osfmk/vm/vm_map.c | 3 + osfmk/vm/vm_object.h | 8 +- osfmk/vm/vm_pageout.h | 3 + osfmk/x86_64/kpc_x86.c | 48 +- tests/Makefile | 23 +- tests/no32exec_35914211.c | 83 +- tests/no32exec_35914211_helper.c | 17 +- tests/no32exec_35914211_helper_binprefs.c | 34 - tools/lldbmacros/core/cvalue.py | 5 +- tools/lldbmacros/ipc.py | 2 +- tools/lldbmacros/memory.py | 12 +- tools/lldbmacros/misc.py | 4 +- tools/lldbmacros/process.py | 4 +- tools/lldbmacros/waitq.py | 2 +- tools/lldbmacros/xnu.py | 126 ++- tools/tests/zero-to-n/zero-to-n.c | 158 ++++ tools/trace/ktruss.lua | 28 + 170 files changed, 3420 insertions(+), 2205 deletions(-) delete mode 100644 bsd/tests/ctrr_test_sysctl.c delete mode 100644 osfmk/man/index.html delete mode 100644 tests/no32exec_35914211_helper_binprefs.c create mode 100755 tools/trace/ktruss.lua diff --git a/bsd/dev/dtrace/fbt_blacklist.c b/bsd/dev/dtrace/fbt_blacklist.c index f8f34ae33..8b7d371cf 100644 --- a/bsd/dev/dtrace/fbt_blacklist.c +++ b/bsd/dev/dtrace/fbt_blacklist.c @@ -202,6 +202,7 @@ const char * fbt_blacklist[] = CLOSURE(prf) CLOSURE(proc_is64bit) CLOSURE(proc_selfname) + CRITICAL(rbtrace_bt) CRITICAL(register_cpu_setup_func) CRITICAL(ret64_iret) CRITICAL(ret_to_user) @@ -227,6 +228,7 @@ const char * fbt_blacklist[] = ARM_ONLY(timer_state_event) CRITICAL(tmrCvt) CRITICAL(trap_from_kernel) + CRITICAL(traptrace_) CRITICAL(tsc_) CRITICAL(uart_putc) CRITICAL(unlock_debugger) diff --git a/bsd/dev/i386/sysctl.c b/bsd/dev/i386/sysctl.c index 2300e0b7f..567c5817e 100644 --- a/bsd/dev/i386/sysctl.c +++ b/bsd/dev/i386/sysctl.c @@ -1051,4 +1051,10 @@ SYSCTL_PROC(_machdep_misc, OID_AUTO, spin_forever, 0, 0, spin_in_the_kernel, "I", "Spin forever"); + +extern int traptrace_enabled; +SYSCTL_INT(_machdep_misc, OID_AUTO, traptrace_enabled, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &traptrace_enabled, 0, "Enabled/disable trap trace"); + #endif /* DEVELOPMENT || DEBUG */ diff --git a/bsd/kern/bsd_init.c b/bsd/kern/bsd_init.c index 3e2052fb0..bc5b709e0 100644 --- a/bsd/kern/bsd_init.c +++ b/bsd/kern/bsd_init.c @@ -168,6 +168,7 @@ #include /* for restricted_in_port_init() */ #include /* for assert() */ #include /* for init_system_override() */ +#include /* for lf_init() */ #include @@ -315,6 +316,8 @@ __private_extern__ int bootarg_vnode_cache_defeat = 0; __private_extern__ int bootarg_no_vnode_jetsam = 0; #endif /* CONFIG_JETSAM && (DEVELOPMENT || DEBUG) */ +__private_extern__ int bootarg_no_vnode_drain = 0; + /* * Prevent kernel-based ASLR from being used, for testing. */ @@ -760,6 +763,10 @@ bsd_init(void) bsd_init_kprintf("calling vfsinit\n"); vfsinit(); + /* Initialize file locks. */ + bsd_init_kprintf("calling lf_init\n"); + lf_init(); + #if CONFIG_PROC_UUID_POLICY /* Initial proc_uuid_policy subsystem */ bsd_init_kprintf("calling proc_uuid_policy_init()\n"); @@ -1331,6 +1338,9 @@ parse_bsd_args(void) } #endif /* CONFIG_JETSAM && (DEVELOPMENT || DEBUG) */ + if (PE_parse_boot_argn("-no_vnode_drain", namep, sizeof(namep))) { + bootarg_no_vnode_drain = 1; + } #if CONFIG_EMBEDDED /* diff --git a/bsd/kern/kdebug.c b/bsd/kern/kdebug.c index 0110e7114..eb78ca89a 100644 --- a/bsd/kern/kdebug.c +++ b/bsd/kern/kdebug.c @@ -84,8 +84,6 @@ /* * IOP(s) * - * https://coreoswiki.apple.com/wiki/pages/U6z3i0q9/Consistent_Logging_Implementers_Guide.html - * * IOP(s) are auxiliary cores that want to participate in kdebug event logging. * They are registered dynamically. Each is assigned a cpu_id at registration. * diff --git a/bsd/kern/kern_control.c b/bsd/kern/kern_control.c index dede2e6e8..e41d1f103 100644 --- a/bsd/kern/kern_control.c +++ b/bsd/kern/kern_control.c @@ -93,6 +93,7 @@ struct ctl_cb { void *userdata; struct sockaddr_ctl sac; u_int32_t usecount; + u_int32_t kcb_usecount; }; #ifndef ROUNDUP64 @@ -351,6 +352,27 @@ ctl_sofreelastref(struct socket *so) return 0; } +/* + * Use this function to serialize calls into the kctl subsystem + */ +static void +ctl_kcb_increment_use_count(struct ctl_cb *kcb, lck_mtx_t *mutex_held) +{ + LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED); + while (kcb->kcb_usecount > 0) { + msleep(&kcb->kcb_usecount, mutex_held, PSOCK | PCATCH, "kcb_usecount", NULL); + } + kcb->kcb_usecount++; +} + +static void +clt_kcb_decrement_use_count(struct ctl_cb *kcb) +{ + assert(kcb->kcb_usecount != 0); + kcb->kcb_usecount--; + wakeup_one((caddr_t)&kcb->kcb_usecount); +} + static int ctl_detach(struct socket *so) { @@ -360,6 +382,9 @@ ctl_detach(struct socket *so) return 0; } + lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK); + ctl_kcb_increment_use_count(kcb, mtx_held); + if (kcb->kctl != NULL && kcb->kctl->bind != NULL && kcb->userdata != NULL && !(so->so_state & SS_ISCONNECTED)) { // The unit was bound, but not connected @@ -374,6 +399,7 @@ ctl_detach(struct socket *so) soisdisconnected(so); so->so_flags |= SOF_PCBCLEARING; + clt_kcb_decrement_use_count(kcb); return 0; } @@ -522,9 +548,12 @@ ctl_bind(struct socket *so, struct sockaddr *nam, struct proc *p) panic("ctl_bind so_pcb null\n"); } + lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK); + ctl_kcb_increment_use_count(kcb, mtx_held); + error = ctl_setup_kctl(so, nam, p); if (error) { - return error; + goto out; } if (kcb->kctl == NULL) { @@ -532,13 +561,16 @@ ctl_bind(struct socket *so, struct sockaddr *nam, struct proc *p) } if (kcb->kctl->bind == NULL) { - return EINVAL; + error = EINVAL; + goto out; } socket_unlock(so, 0); error = (*kcb->kctl->bind)(kcb->kctl->kctlref, &kcb->sac, &kcb->userdata); socket_lock(so, 0); +out: + clt_kcb_decrement_use_count(kcb); return error; } @@ -552,9 +584,12 @@ ctl_connect(struct socket *so, struct sockaddr *nam, struct proc *p) panic("ctl_connect so_pcb null\n"); } + lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK); + ctl_kcb_increment_use_count(kcb, mtx_held); + error = ctl_setup_kctl(so, nam, p); if (error) { - return error; + goto out; } if (kcb->kctl == NULL) { @@ -596,6 +631,8 @@ end: kctlstat.kcs_conn_fail++; lck_mtx_unlock(ctl_mtx); } +out: + clt_kcb_decrement_use_count(kcb); return error; } @@ -605,6 +642,8 @@ ctl_disconnect(struct socket *so) struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; if ((kcb = (struct ctl_cb *)so->so_pcb)) { + lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK); + ctl_kcb_increment_use_count(kcb, mtx_held); struct kctl *kctl = kcb->kctl; if (kctl && kctl->disconnect) { @@ -628,6 +667,7 @@ ctl_disconnect(struct socket *so) kctlstat.kcs_gencnt++; lck_mtx_unlock(ctl_mtx); socket_lock(so, 0); + clt_kcb_decrement_use_count(kcb); } return 0; } @@ -694,11 +734,20 @@ ctl_sbrcv_trim(struct socket *so) static int ctl_usr_rcvd(struct socket *so, int flags) { + int error = 0; struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; struct kctl *kctl; + if (kcb == NULL) { + return ENOTCONN; + } + + lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK); + ctl_kcb_increment_use_count(kcb, mtx_held); + if ((kctl = kcb->kctl) == NULL) { - return EINVAL; + error = EINVAL; + goto out; } if (kctl->rcvd) { @@ -709,7 +758,9 @@ ctl_usr_rcvd(struct socket *so, int flags) ctl_sbrcv_trim(so); - return 0; +out: + clt_kcb_decrement_use_count(kcb); + return error; } static int @@ -730,6 +781,9 @@ ctl_send(struct socket *so, int flags, struct mbuf *m, error = ENOTCONN; } + lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK); + ctl_kcb_increment_use_count(kcb, mtx_held); + if (error == 0 && (kctl = kcb->kctl) == NULL) { error = EINVAL; } @@ -749,6 +803,8 @@ ctl_send(struct socket *so, int flags, struct mbuf *m, if (error != 0) { OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_send_fail); } + clt_kcb_decrement_use_count(kcb); + return error; } @@ -769,6 +825,9 @@ ctl_send_list(struct socket *so, int flags, struct mbuf *m, error = ENOTCONN; } + lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK); + ctl_kcb_increment_use_count(kcb, mtx_held); + if (error == 0 && (kctl = kcb->kctl) == NULL) { error = EINVAL; } @@ -808,6 +867,8 @@ ctl_send_list(struct socket *so, int flags, struct mbuf *m, if (error != 0) { OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_send_list_fail); } + clt_kcb_decrement_use_count(kcb); + return error; } @@ -1234,16 +1295,21 @@ ctl_ctloutput(struct socket *so, struct sockopt *sopt) return EINVAL; } + lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK); + ctl_kcb_increment_use_count(kcb, mtx_held); + switch (sopt->sopt_dir) { case SOPT_SET: if (kctl->setopt == NULL) { - return ENOTSUP; + error = ENOTSUP; + goto out; } if (sopt->sopt_valsize != 0) { MALLOC(data, void *, sopt->sopt_valsize, M_TEMP, M_WAITOK | M_ZERO); if (data == NULL) { - return ENOMEM; + error = ENOMEM; + goto out; } error = sooptcopyin(sopt, data, sopt->sopt_valsize, sopt->sopt_valsize); @@ -1263,14 +1329,16 @@ ctl_ctloutput(struct socket *so, struct sockopt *sopt) case SOPT_GET: if (kctl->getopt == NULL) { - return ENOTSUP; + error = ENOTSUP; + goto out; } if (sopt->sopt_valsize && sopt->sopt_val) { MALLOC(data, void *, sopt->sopt_valsize, M_TEMP, M_WAITOK | M_ZERO); if (data == NULL) { - return ENOMEM; + error = ENOMEM; + goto out; } /* * 4108337 - copy user data in case the @@ -1306,6 +1374,9 @@ ctl_ctloutput(struct socket *so, struct sockopt *sopt) } break; } + +out: + clt_kcb_decrement_use_count(kcb); return error; } diff --git a/bsd/kern/kern_event.c b/bsd/kern/kern_event.c index 725f96d1e..8f4d2207b 100644 --- a/bsd/kern/kern_event.c +++ b/bsd/kern/kern_event.c @@ -1123,52 +1123,52 @@ filt_procevent(struct knote *kn, long hint) /* * The kernel has a wrapper in place that returns the same data - * as is collected here, in kn_hook64. Any changes to how + * as is collected here, in kn_hook32. Any changes to how * NOTE_EXITSTATUS and NOTE_EXIT_DETAIL are collected * should also be reflected in the proc_pidnoteexit() wrapper. */ if (event == NOTE_EXIT) { - kn->kn_hook64 = 0; + kn->kn_hook32 = 0; if ((kn->kn_sfflags & NOTE_EXITSTATUS) != 0) { kn->kn_fflags |= NOTE_EXITSTATUS; - kn->kn_hook64 |= (hint & NOTE_PDATAMASK); + kn->kn_hook32 |= (hint & NOTE_PDATAMASK); } if ((kn->kn_sfflags & NOTE_EXIT_DETAIL) != 0) { kn->kn_fflags |= NOTE_EXIT_DETAIL; if ((kn->kn_proc->p_lflag & P_LTERM_DECRYPTFAIL) != 0) { - kn->kn_hook64 |= NOTE_EXIT_DECRYPTFAIL; + kn->kn_hook32 |= NOTE_EXIT_DECRYPTFAIL; } if ((kn->kn_proc->p_lflag & P_LTERM_JETSAM) != 0) { - kn->kn_hook64 |= NOTE_EXIT_MEMORY; + kn->kn_hook32 |= NOTE_EXIT_MEMORY; switch (kn->kn_proc->p_lflag & P_JETSAM_MASK) { case P_JETSAM_VMPAGESHORTAGE: - kn->kn_hook64 |= NOTE_EXIT_MEMORY_VMPAGESHORTAGE; + kn->kn_hook32 |= NOTE_EXIT_MEMORY_VMPAGESHORTAGE; break; case P_JETSAM_VMTHRASHING: - kn->kn_hook64 |= NOTE_EXIT_MEMORY_VMTHRASHING; + kn->kn_hook32 |= NOTE_EXIT_MEMORY_VMTHRASHING; break; case P_JETSAM_FCTHRASHING: - kn->kn_hook64 |= NOTE_EXIT_MEMORY_FCTHRASHING; + kn->kn_hook32 |= NOTE_EXIT_MEMORY_FCTHRASHING; break; case P_JETSAM_VNODE: - kn->kn_hook64 |= NOTE_EXIT_MEMORY_VNODE; + kn->kn_hook32 |= NOTE_EXIT_MEMORY_VNODE; break; case P_JETSAM_HIWAT: - kn->kn_hook64 |= NOTE_EXIT_MEMORY_HIWAT; + kn->kn_hook32 |= NOTE_EXIT_MEMORY_HIWAT; break; case P_JETSAM_PID: - kn->kn_hook64 |= NOTE_EXIT_MEMORY_PID; + kn->kn_hook32 |= NOTE_EXIT_MEMORY_PID; break; case P_JETSAM_IDLEEXIT: - kn->kn_hook64 |= NOTE_EXIT_MEMORY_IDLE; + kn->kn_hook32 |= NOTE_EXIT_MEMORY_IDLE; break; } } if ((kn->kn_proc->p_csflags & CS_KILLED) != 0) { - kn->kn_hook64 |= NOTE_EXIT_CSERROR; + kn->kn_hook32 |= NOTE_EXIT_CSERROR; } } } @@ -1208,8 +1208,8 @@ filt_procprocess(struct knote *kn, struct kevent_qos_s *kev) proc_klist_lock(); if (kn->kn_fflags) { - knote_fill_kevent(kn, kev, kn->kn_hook64); - kn->kn_hook64 = 0; + knote_fill_kevent(kn, kev, kn->kn_hook32); + kn->kn_hook32 = 0; res = 1; } proc_klist_unlock(); @@ -3700,14 +3700,14 @@ kevent_register(struct kqueue *kq, struct kevent_qos_s *kev, } if (kq->kq_state & KQ_WORKLOOP) { - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_REGISTER), + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_REGISTER), ((struct kqworkloop *)kq)->kqwl_dynamicid, kev->udata, kev->flags, kev->filter); } else if (kq->kq_state & KQ_WORKQ) { - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_REGISTER), + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWQ_REGISTER), 0, kev->udata, kev->flags, kev->filter); } else { - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_REGISTER), + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQ_REGISTER), VM_KERNEL_UNSLIDE_OR_PERM(kq), kev->udata, kev->flags, kev->filter); } @@ -3995,16 +3995,16 @@ knote_process(struct knote *kn, kevent_ctx_t kectx, assert(!(kn->kn_status & (KN_DISABLED | KN_SUPPRESSED | KN_DROPPING))); if (kq->kq_state & KQ_WORKLOOP) { - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS), + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS), ((struct kqworkloop *)kq)->kqwl_dynamicid, kn->kn_udata, kn->kn_status | (kn->kn_id << 32), kn->kn_filtid); } else if (kq->kq_state & KQ_WORKQ) { - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS), + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS), 0, kn->kn_udata, kn->kn_status | (kn->kn_id << 32), kn->kn_filtid); } else { - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS), + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQ_PROCESS), VM_KERNEL_UNSLIDE_OR_PERM(kq), kn->kn_udata, kn->kn_status | (kn->kn_id << 32), kn->kn_filtid); } @@ -4125,7 +4125,7 @@ knote_process(struct knote *kn, kevent_ctx_t kectx, } if (kev.flags & EV_VANISHED) { - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KNOTE_VANISHED), + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KNOTE_VANISHED), kev.ident, kn->kn_udata, kn->kn_status | (kn->kn_id << 32), kn->kn_filtid); } @@ -4225,13 +4225,13 @@ kqworkq_begin_processing(struct kqworkq *kqwq, workq_threadreq_t kqr, { int rc = 0; - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN) | DBG_FUNC_START, + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN) | DBG_FUNC_START, 0, kqr->tr_kq_qos_index); rc = kqworkq_acknowledge_events(kqwq, kqr, kevent_flags, KQWQAE_BEGIN_PROCESSING); - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN) | DBG_FUNC_END, + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN) | DBG_FUNC_END, thread_tid(kqr_thread(kqr)), kqr->tr_kq_wakeup); return rc; @@ -4274,7 +4274,7 @@ kqworkloop_begin_processing(struct kqworkloop *kqwl, unsigned int kevent_flags) kqlock_held(kq); - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_BEGIN) | DBG_FUNC_START, + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_BEGIN) | DBG_FUNC_START, kqwl->kqwl_dynamicid, 0, 0); /* nobody else should still be processing */ @@ -4352,7 +4352,7 @@ kqworkloop_begin_processing(struct kqworkloop *kqwl, unsigned int kevent_flags) } done: - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_BEGIN) | DBG_FUNC_END, + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_BEGIN) | DBG_FUNC_END, kqwl->kqwl_dynamicid, 0, 0); return rc; @@ -4375,13 +4375,13 @@ kqfile_begin_processing(struct kqfile *kq) kqlock_held(kq); assert((kq->kqf_state & (KQ_WORKQ | KQ_WORKLOOP)) == 0); - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_START, + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_START, VM_KERNEL_UNSLIDE_OR_PERM(kq), 0); /* wait to become the exclusive processing thread */ for (;;) { if (kq->kqf_state & KQ_DRAIN) { - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_END, + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_END, VM_KERNEL_UNSLIDE_OR_PERM(kq), 2); return EBADF; } @@ -4410,7 +4410,7 @@ kqfile_begin_processing(struct kqfile *kq) /* anything left to process? */ if (TAILQ_EMPTY(&kq->kqf_queue)) { - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_END, + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_END, VM_KERNEL_UNSLIDE_OR_PERM(kq), 1); return -1; } @@ -4418,7 +4418,7 @@ kqfile_begin_processing(struct kqfile *kq) /* convert to processing mode */ kq->kqf_state |= KQ_PROCESSING; - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_END, + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_END, VM_KERNEL_UNSLIDE_OR_PERM(kq)); return 0; @@ -4476,7 +4476,7 @@ kqworkloop_end_processing(struct kqworkloop *kqwl, int flags, int kevent_flags) kqlock_held(kq); - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_END) | DBG_FUNC_START, + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_END) | DBG_FUNC_START, kqwl->kqwl_dynamicid, 0, 0); if (flags & KQ_PROCESSING) { @@ -4533,7 +4533,7 @@ kqworkloop_end_processing(struct kqworkloop *kqwl, int flags, int kevent_flags) kqworkloop_unbind_delayed_override_drop(thread); } - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_END) | DBG_FUNC_END, + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_END) | DBG_FUNC_END, kqwl->kqwl_dynamicid, 0, 0); return rc; @@ -4557,7 +4557,7 @@ kqfile_end_processing(struct kqfile *kq) assert((kq->kqf_state & (KQ_WORKQ | KQ_WORKLOOP)) == 0); - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_END), + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_END), VM_KERNEL_UNSLIDE_OR_PERM(kq), 0); /* @@ -4663,6 +4663,7 @@ kqueue_workloop_ctl_internal(proc_t p, uintptr_t cmd, uint64_t __unused options, trp.trp_value = kqwl->kqwl_params; if (trp.trp_flags && !(trp.trp_flags & TRP_RELEASED)) { trp.trp_flags |= TRP_RELEASED; + kqwl->kqwl_params = trp.trp_value; kqworkloop_release_live(kqwl); } else { error = EINVAL; @@ -5045,14 +5046,14 @@ kqueue_threadreq_initiate(struct kqueue *kq, workq_threadreq_t kqr, __assert_only struct kqworkloop *kqwl = (struct kqworkloop *)kq; assert(kqwl->kqwl_owner == THREAD_NULL); - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THREQUEST), + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_THREQUEST), kqwl->kqwl_dynamicid, 0, qos, kqr->tr_kq_wakeup); ts = kqwl->kqwl_turnstile; /* Add a thread request reference on the kqueue. */ kqworkloop_retain(kqwl); } else { assert(kq->kq_state & KQ_WORKQ); - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_THREQUEST), + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWQ_THREQUEST), -1, 0, qos, kqr->tr_kq_wakeup); } @@ -5192,7 +5193,7 @@ kqueue_threadreq_bind(struct proc *p, workq_threadreq_t kqr, thread_t thread, turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD); } - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_BIND), kqu.kqwl->kqwl_dynamicid, + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_BIND), kqu.kqwl->kqwl_dynamicid, thread_tid(thread), kqr->tr_kq_qos_index, (kqr->tr_kq_override_index << 16) | kqr->tr_kq_wakeup); @@ -5203,7 +5204,7 @@ kqueue_threadreq_bind(struct proc *p, workq_threadreq_t kqr, thread_t thread, } else { assert(kqr->tr_kq_override_index == 0); - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_BIND), -1, + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWQ_BIND), -1, thread_tid(thread), kqr->tr_kq_qos_index, (kqr->tr_kq_override_index << 16) | kqr->tr_kq_wakeup); } @@ -5432,7 +5433,7 @@ recompute: if (kqwl_owner) { #if 0 /* JMM - need new trace hooks for owner overrides */ - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THADJUST), + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_THADJUST), kqwl->kqwl_dynamicid, thread_tid(kqwl_owner), kqr->tr_kq_qos_index, (kqr->tr_kq_override_index << 16) | kqr->tr_kq_wakeup); #endif @@ -5503,7 +5504,7 @@ recompute: } if (qos_changed) { - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THADJUST), kqwl->kqwl_dynamicid, + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_THADJUST), kqwl->kqwl_dynamicid, thread_tid(servicer), kqr->tr_kq_qos_index, (kqr->tr_kq_override_index << 16) | kqr->tr_kq_wakeup); } @@ -5696,7 +5697,7 @@ kqworkloop_unbind_locked(struct kqworkloop *kqwl, thread_t thread, struct uthread *ut = get_bsdthread_info(thread); workq_threadreq_t kqr = &kqwl->kqwl_request; - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_UNBIND), kqwl->kqwl_dynamicid, + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_UNBIND), kqwl->kqwl_dynamicid, thread_tid(thread), 0, 0); kqlock_held(kqwl); @@ -5789,7 +5790,7 @@ kqworkq_unbind_locked(struct kqworkq *kqwq, struct uthread *ut = get_bsdthread_info(thread); kq_index_t old_override = kqr->tr_kq_override_index; - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_UNBIND), -1, + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWQ_UNBIND), -1, thread_tid(kqr_thread(kqr)), kqr->tr_kq_qos_index, 0); kqlock_held(kqwq); @@ -6625,7 +6626,7 @@ static inline void knote_mark_active(struct knote *kn) { if ((kn->kn_status & KN_ACTIVE) == 0) { - KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KNOTE_ACTIVATE), + KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KNOTE_ACTIVATE), kn->kn_udata, kn->kn_status | (kn->kn_id << 32), kn->kn_filtid); } diff --git a/bsd/kern/kern_exec.c b/bsd/kern/kern_exec.c index 03bcf7896..eb333d4b1 100644 --- a/bsd/kern/kern_exec.c +++ b/bsd/kern/kern_exec.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2019 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -146,6 +146,10 @@ #include #endif +#if CONFIG_AUDIT +#include +#endif + #if CONFIG_ARCADE #include #endif @@ -1627,6 +1631,7 @@ encapsulated_binary: */ if (imgp->ip_scriptvp) { vnode_put(imgp->ip_scriptvp); + imgp->ip_scriptvp = NULLVP; } if (vnode_getwithref(imgp->ip_vp) == 0) { imgp->ip_scriptvp = imgp->ip_vp; @@ -2013,6 +2018,9 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags) proc_t p = vfs_context_proc(imgp->ip_vfs_context); _posix_spawn_file_actions_t px_sfap = imgp->ip_px_sfa; int ival[2]; /* dummy retval for system calls) */ +#if CONFIG_AUDIT + struct uthread *uthread = get_bsdthread_info(current_thread()); +#endif for (action = 0; action < px_sfap->psfa_act_count; action++) { _psfa_action_t *psfa = &px_sfap->psfa_act_acts[action]; @@ -2049,6 +2057,8 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags) mode = ((mode & ~p->p_fd->fd_cmask) & ALLPERMS) & ~S_ISTXT; VATTR_SET(vap, va_mode, mode & ACCESSPERMS); + AUDIT_SUBCALL_ENTER(OPEN, p, uthread); + NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE, CAST_USER_ADDR_T(psfa->psfaa_openargs.psfao_path), imgp->ip_vfs_context); @@ -2062,6 +2072,8 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags) FREE(bufp, M_TEMP); + AUDIT_SUBCALL_EXIT(uthread, error); + /* * If there's an error, or we get the right fd by * accident, then drop out here. This is easier than @@ -2087,7 +2099,9 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags) * can ignore that, since if we didn't get the * fd we wanted, the error will stop us. */ + AUDIT_SUBCALL_ENTER(DUP2, p, uthread); error = dup2(p, &dup2a, ival); + AUDIT_SUBCALL_EXIT(uthread, error); if (error) { break; } @@ -2097,7 +2111,9 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags) */ ca.fd = origfd; + AUDIT_SUBCALL_ENTER(CLOSE, p, uthread); error = close_nocancel(p, &ca, ival); + AUDIT_SUBCALL_EXIT(uthread, error); } break; @@ -2113,7 +2129,9 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags) * can ignore that, since if we didn't get the * fd we wanted, the error will stop us. */ + AUDIT_SUBCALL_ENTER(DUP2, p, uthread); error = dup2(p, &dup2a, ival); + AUDIT_SUBCALL_EXIT(uthread, error); } break; @@ -2149,12 +2167,16 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags) dup2a.from = ca.fd = ival[0]; dup2a.to = psfa->psfaa_dup2args.psfad_newfiledes; + AUDIT_SUBCALL_ENTER(DUP2, p, uthread); error = dup2(p, &dup2a, ival); + AUDIT_SUBCALL_EXIT(uthread, error); if (error) { break; } + AUDIT_SUBCALL_ENTER(CLOSE, p, uthread); error = close_nocancel(p, &ca, ival); + AUDIT_SUBCALL_EXIT(uthread, error); } break; @@ -2163,7 +2185,9 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags) ca.fd = psfa->psfaa_filedes; + AUDIT_SUBCALL_ENTER(CLOSE, p, uthread); error = close_nocancel(p, &ca, ival); + AUDIT_SUBCALL_EXIT(uthread, error); } break; @@ -2203,11 +2227,13 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags) */ struct nameidata nd; + AUDIT_SUBCALL_ENTER(CHDIR, p, uthread); NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE, CAST_USER_ADDR_T(psfa->psfaa_chdirargs.psfac_path), imgp->ip_vfs_context); error = chdir_internal(p, imgp->ip_vfs_context, &nd, 0); + AUDIT_SUBCALL_EXIT(uthread, error); } break; @@ -2216,7 +2242,9 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags) fchdira.fd = psfa->psfaa_filedes; + AUDIT_SUBCALL_ENTER(FCHDIR, p, uthread); error = fchdir(p, &fchdira, ival); + AUDIT_SUBCALL_EXIT(uthread, error); } break; @@ -2562,6 +2590,20 @@ proc_legacy_footprint_entitled(proc_t p, task_t task, const char *caller) break; } } + +static inline void +proc_ios13extended_footprint_entitled(proc_t p, task_t task, const char *caller) +{ +#pragma unused(p, caller) + boolean_t ios13extended_footprint_entitled; + + /* the entitlement grants a footprint limit increase */ + ios13extended_footprint_entitled = IOTaskHasEntitlement(task, + "com.apple.developer.memory.ios13extended_footprint"); + if (ios13extended_footprint_entitled) { + task_set_ios13extended_footprint_limit(task); + } +} #endif /* __arm64__ */ /* @@ -3133,15 +3175,41 @@ do_fork1: * The POSIX_SPAWN_CLOEXEC_DEFAULT flag * is handled in exec_handle_file_actions(). */ - if ((error = exec_handle_file_actions(imgp, - imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0)) != 0) { +#if CONFIG_AUDIT + /* + * The file actions auditing can overwrite the upath of + * AUE_POSIX_SPAWN audit record. Save the audit record. + */ + struct kaudit_record *save_uu_ar = uthread->uu_ar; + uthread->uu_ar = NULL; +#endif + error = exec_handle_file_actions(imgp, + imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0); +#if CONFIG_AUDIT + /* Restore the AUE_POSIX_SPAWN audit record. */ + uthread->uu_ar = save_uu_ar; +#endif + if (error != 0) { goto bad; } } /* Has spawn port actions? */ if (imgp->ip_px_spa != NULL) { - if ((error = exec_handle_port_actions(imgp, &port_actions)) != 0) { +#if CONFIG_AUDIT + /* + * Do the same for the port actions as we did for the file + * actions. Save the AUE_POSIX_SPAWN audit record. + */ + struct kaudit_record *save_uu_ar = uthread->uu_ar; + uthread->uu_ar = NULL; +#endif + error = exec_handle_port_actions(imgp, &port_actions); +#if CONFIG_AUDIT + /* Restore the AUE_POSIX_SPAWN audit record. */ + uthread->uu_ar = save_uu_ar; +#endif + if (error != 0) { goto bad; } } @@ -3536,6 +3604,7 @@ bad: #if __arm64__ proc_legacy_footprint_entitled(p, new_task, __FUNCTION__); + proc_ios13extended_footprint_entitled(p, new_task, __FUNCTION__); #endif /* __arm64__ */ } @@ -4207,6 +4276,7 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval) #if __arm64__ proc_legacy_footprint_entitled(p, new_task, __FUNCTION__); + proc_ios13extended_footprint_entitled(p, new_task, __FUNCTION__); #endif /* __arm64__ */ /* Sever any extant thread affinity */ diff --git a/bsd/kern/kern_fork.c b/bsd/kern/kern_fork.c index 4b0f0e9a4..c25c85ad2 100644 --- a/bsd/kern/kern_fork.c +++ b/bsd/kern/kern_fork.c @@ -1321,12 +1321,13 @@ retry: if (child_proc->p_textvp != NULLVP) { /* bump references to the text vnode */ /* Need to hold iocount across the ref call */ - if (vnode_getwithref(child_proc->p_textvp) == 0) { + if ((error = vnode_getwithref(child_proc->p_textvp)) == 0) { error = vnode_ref(child_proc->p_textvp); vnode_put(child_proc->p_textvp); - if (error != 0) { - child_proc->p_textvp = NULLVP; - } + } + + if (error != 0) { + child_proc->p_textvp = NULLVP; } } diff --git a/bsd/kern/kern_lockf.c b/bsd/kern/kern_lockf.c index 21edbc5d9..d67a8f84b 100644 --- a/bsd/kern/kern_lockf.c +++ b/bsd/kern/kern_lockf.c @@ -153,6 +153,16 @@ static void lf_boost_blocking_proc(struct lockf *, struct lockf *); static void lf_adjust_assertion(struct lockf *block); #endif /* IMPORTANCE_INHERITANCE */ +static lck_mtx_t lf_dead_lock; +static lck_grp_t *lf_dead_lock_grp; + +void +lf_init(void) +{ + lf_dead_lock_grp = lck_grp_alloc_init("lf_dead_lock", LCK_GRP_ATTR_NULL); + lck_mtx_init(&lf_dead_lock, lf_dead_lock_grp, LCK_ATTR_NULL); +} + /* * lf_advlock * @@ -498,7 +508,7 @@ lf_setlock(struct lockf *lock, struct timespec *timeout) struct lockf *block; struct lockf **head = lock->lf_head; struct lockf **prev, *overlap, *ltmp; - static char lockstr[] = "lockf"; + static const char lockstr[] = "lockf"; int priority, needtolink, error; struct vnode *vp = lock->lf_vnode; overlap_t ovcase; @@ -550,22 +560,43 @@ scan: */ if ((lock->lf_flags & F_POSIX) && (block->lf_flags & F_POSIX)) { - struct proc *wproc; - struct uthread *ut; + lck_mtx_lock(&lf_dead_lock); - /* The block is waiting on something */ - wproc = block->lf_owner; + /* The blocked process is waiting on something */ + struct proc *wproc = block->lf_owner; proc_lock(wproc); + LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p owned by pid %d\n", lock, proc_pid(wproc)); + + struct uthread *ut; TAILQ_FOREACH(ut, &wproc->p_uthlist, uu_list) { /* - * If the thread is asleep (uu_wchan != 0) - * in this code (uu_wmesg == lockstr) - * check to see if the lock is blocked behind + * If the thread is (a) asleep (uu_wchan != 0) + * and (b) in this code (uu_wmesg == lockstr) + * then check to see if the lock is blocked behind * someone blocked behind us. + * + * Note: (i) vp->v_lock is held, preventing other + * threads from mutating the blocking list for our vnode. + * and (ii) the proc_lock is held i.e the thread list + * is stable. + * + * HOWEVER some thread in wproc might be sleeping on a lockf + * structure for a different vnode, and be woken at any + * time. Thus the waitblock list could mutate while + * it's being inspected by this thread, and what + * ut->uu_wchan was just pointing at could even be freed. + * + * Nevertheless this is safe here because of lf_dead_lock; if + * any thread blocked with uu_wmesg == lockstr wakes (see below) + * it will try to acquire lf_dead_lock which is already held + * here. Holding that lock prevents the lockf structure being + * pointed at by ut->uu_wchan from going away. Thus the vnode + * involved can be found and locked, and the corresponding + * blocking chain can then be examined safely. */ - if ((ut->uu_wchan != NULL) && (ut->uu_wmesg == lockstr)) { - struct lockf *waitblock = (struct lockf *)ut->uu_wchan; + const struct lockf *waitblock = (const void *)ut->uu_wchan; + if ((waitblock != NULL) && (ut->uu_wmesg == lockstr)) { LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p which is also blocked on lock %p vnode %p\n", lock, waitblock, waitblock->lf_vnode); vnode_t othervp = NULL; @@ -585,11 +616,13 @@ scan: * v_lock) retry the scan. */ proc_unlock(wproc); + lck_mtx_unlock(&lf_dead_lock); static struct timespec ts = { .tv_sec = 0, - .tv_nsec = 10 * NSEC_PER_MSEC, + .tv_nsec = 2 * NSEC_PER_MSEC, }; - (void) msleep(lock, &vp->v_lock, priority, lockstr, &ts); + static const char pausestr[] = "lockf:pause"; + (void) msleep(lock, &vp->v_lock, priority, pausestr, &ts); LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p contention for vp %p => restart\n", lock, othervp); goto scan; } @@ -604,15 +637,15 @@ scan: * we successfully acquired the * proc_lock). */ - waitblock = waitblock->lf_next; - if (waitblock == NULL) { + const struct lockf *nextblock = waitblock->lf_next; + if (nextblock == NULL) { if (othervp) { lck_mtx_unlock(&othervp->v_lock); } - LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p with no lf_next\n", lock); + LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p with waitblock %p and no lf_next; othervp %p\n", lock, waitblock, othervp); continue; } - LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p which is also blocked on lock %p vnode %p\n", lock, waitblock, waitblock->lf_vnode); + LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p which is also blocked on lock %p vnode %p\n", lock, nextblock, nextblock->lf_vnode); /* * Make sure it's an advisory range @@ -620,7 +653,7 @@ scan: * if we mix lock types, it's our own * fault. */ - if ((waitblock->lf_flags & F_POSIX) == 0) { + if ((nextblock->lf_flags & F_POSIX) == 0) { if (othervp) { lck_mtx_unlock(&othervp->v_lock); } @@ -633,7 +666,7 @@ scan: * getting the requested lock, then we * would deadlock, so error out. */ - struct proc *bproc = waitblock->lf_owner; + struct proc *bproc = nextblock->lf_owner; const boolean_t deadlocked = bproc == lock->lf_owner; if (othervp) { @@ -643,6 +676,7 @@ scan: if (deadlocked) { LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p which is me, so EDEADLK\n", lock); proc_unlock(wproc); + lck_mtx_unlock(&lf_dead_lock); FREE(lock, M_LOCKF); return EDEADLK; } @@ -650,6 +684,7 @@ scan: LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p bottom of thread loop\n", lock); } proc_unlock(wproc); + lck_mtx_unlock(&lf_dead_lock); } /* @@ -709,7 +744,19 @@ scan: #endif /* LOCKF_DEBUGGING */ DTRACE_FSINFO(advlock__wait, vnode_t, vp); - error = msleep(lock, &vp->v_lock, priority, lockstr, timeout); + if (lock->lf_flags & F_POSIX) { + error = msleep(lock, &vp->v_lock, priority, lockstr, timeout); + /* + * Ensure that 'lock' doesn't get mutated or freed if a + * wakeup occurs while hunting for deadlocks (and holding + * lf_dead_lock - see above) + */ + lck_mtx_lock(&lf_dead_lock); + lck_mtx_unlock(&lf_dead_lock); + } else { + static const char lockstr_np[] = "lockf:np"; + error = msleep(lock, &vp->v_lock, priority, lockstr_np, timeout); + } if (error == 0 && (lock->lf_flags & F_ABORT) != 0) { error = EBADF; diff --git a/bsd/kern/kern_memorystatus.c b/bsd/kern/kern_memorystatus.c index 15512dd41..afc9271dc 100644 --- a/bsd/kern/kern_memorystatus.c +++ b/bsd/kern/kern_memorystatus.c @@ -631,6 +631,83 @@ memorystatus_act_on_legacy_footprint_entitlement(proc_t p, boolean_t footprint_i proc_list_unlock(); } +void +memorystatus_act_on_ios13extended_footprint_entitlement(proc_t p) +{ + int memlimit_mb_active = 0, memlimit_mb_inactive = 0; + boolean_t memlimit_active_is_fatal = FALSE, memlimit_inactive_is_fatal = 0, use_active_limit = FALSE; + + if (max_mem < 1500ULL * 1024 * 1024 || + max_mem > 2ULL * 1024 * 1024 * 1024) { + /* ios13extended_footprint is only for 2GB devices */ + return; + } + + proc_list_lock(); + + if (p->p_memstat_memlimit_active > 0) { + memlimit_mb_active = p->p_memstat_memlimit_active; + } else if (p->p_memstat_memlimit_active == -1) { + memlimit_mb_active = max_task_footprint_mb; + } else { + /* + * Nothing to do for '0' which is + * a special value only used internally + * to test 'no limits'. + */ + proc_list_unlock(); + return; + } + + if (p->p_memstat_memlimit_inactive > 0) { + memlimit_mb_inactive = p->p_memstat_memlimit_inactive; + } else if (p->p_memstat_memlimit_inactive == -1) { + memlimit_mb_inactive = max_task_footprint_mb; + } else { + /* + * Nothing to do for '0' which is + * a special value only used internally + * to test 'no limits'. + */ + proc_list_unlock(); + return; + } + + /* limit to "almost 2GB" */ + int ios13extended_footprint_mb = 1800; + if (memlimit_mb_active > ios13extended_footprint_mb) { + /* do not lower the current limit */ + proc_list_unlock(); + return; + } + memlimit_mb_active = ios13extended_footprint_mb; + memlimit_mb_inactive = ios13extended_footprint_mb; + + memlimit_active_is_fatal = (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL); + memlimit_inactive_is_fatal = (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL); + + SET_ACTIVE_LIMITS_LOCKED(p, memlimit_mb_active, memlimit_active_is_fatal); + SET_INACTIVE_LIMITS_LOCKED(p, memlimit_mb_inactive, memlimit_inactive_is_fatal); + + if (proc_jetsam_state_is_active_locked(p) == TRUE) { + use_active_limit = TRUE; + CACHE_ACTIVE_LIMITS_LOCKED(p, memlimit_active_is_fatal); + } else { + CACHE_INACTIVE_LIMITS_LOCKED(p, memlimit_inactive_is_fatal); + } + + + if (memorystatus_highwater_enabled) { + task_set_phys_footprint_limit_internal(p->task, + (p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit : -1, + NULL, /*return old value */ + use_active_limit, /*active limit?*/ + (use_active_limit ? memlimit_active_is_fatal : memlimit_inactive_is_fatal)); + } + + proc_list_unlock(); +} + #endif /* CONFIG_MEMORYSTATUS */ #endif /* __arm64__ */ @@ -4482,11 +4559,12 @@ set_vm_map_fork_pidwatch(task_t task, uint64_t x) * then the vm_map_fork is allowed. * * And if a process's memory footprint calculates less - * than or equal to half of the system-wide task limit, + * than or equal to quarter of the system-wide task limit, * then the vm_map_fork is allowed. This calculation * is based on the assumption that a process can * munch memory up to the system-wide task limit. */ +extern boolean_t corpse_threshold_system_limit; boolean_t memorystatus_allowed_vm_map_fork(task_t task) { @@ -4505,10 +4583,16 @@ memorystatus_allowed_vm_map_fork(task_t task) footprint_in_bytes = get_task_phys_footprint(task); /* - * Maximum is 1/4 of the system-wide task limit. + * Maximum is 1/4 of the system-wide task limit by default. */ max_allowed_bytes = ((uint64_t)max_task_footprint_mb * 1024 * 1024) >> 2; +#if DEBUG || DEVELOPMENT + if (corpse_threshold_system_limit) { + max_allowed_bytes = (uint64_t)max_task_footprint_mb * (1UL << 20); + } +#endif /* DEBUG || DEVELOPMENT */ + if (footprint_in_bytes > max_allowed_bytes) { printf("memorystatus disallowed vm_map_fork %lld %lld\n", footprint_in_bytes, max_allowed_bytes); set_vm_map_fork_pidwatch(task, MEMORYSTATUS_VM_MAP_FORK_NOT_ALLOWED); @@ -6256,7 +6340,7 @@ memorystatus_update_levels_locked(boolean_t critical_only) } #if VM_PRESSURE_EVENTS - memorystatus_available_pages_pressure = (pressure_threshold_percentage / delta_percentage) * memorystatus_delta; + memorystatus_available_pages_pressure = pressure_threshold_percentage * (atop_64(max_mem) / 100); #endif } diff --git a/bsd/kern/kern_synch.c b/bsd/kern/kern_synch.c index d8afd780c..019952e73 100644 --- a/bsd/kern/kern_synch.c +++ b/bsd/kern/kern_synch.c @@ -258,7 +258,6 @@ block: if ((thread_continue_t)continuation != THREAD_CONTINUE_NULL) { ut->uu_continuation = continuation; ut->uu_pri = pri; - ut->uu_timo = abstime? 1: 0; ut->uu_mtx = mtx; (void) thread_block(_sleep_continue); /* NOTREACHED */ diff --git a/bsd/kern/trace_codes b/bsd/kern/trace_codes index 2da5b2b91..fa3e01f84 100644 --- a/bsd/kern/trace_codes +++ b/bsd/kern/trace_codes @@ -260,6 +260,9 @@ 0x132000c RealFaultAddressPurgeable 0x1320010 RealFaultAddressExternal 0x1320014 RealFaultAddressSharedCache +0x1320018 vm_fast_fault +0x132001c vm_slow_fault +0x1320020 vm_map_lookup_object 0x1400000 MACH_SCHED 0x1400004 MACH_STKATTACH 0x1400008 MACH_STKHANDOFF diff --git a/bsd/kern/uipc_socket.c b/bsd/kern/uipc_socket.c index dc2bd511c..aa07cc477 100644 --- a/bsd/kern/uipc_socket.c +++ b/bsd/kern/uipc_socket.c @@ -826,12 +826,11 @@ socreate_internal(int dom, struct socket **aso, int type, int proto, switch (dom) { /* - * Don't mark Unix domain, system or multipath sockets as + * Don't mark Unix domain or system * eligible for defunct by default. */ case PF_LOCAL: case PF_SYSTEM: - case PF_MULTIPATH: so->so_flags |= SOF_NODEFUNCT; break; default: diff --git a/bsd/kern/uipc_syscalls.c b/bsd/kern/uipc_syscalls.c index b903e4a18..ca5e7dd6e 100644 --- a/bsd/kern/uipc_syscalls.c +++ b/bsd/kern/uipc_syscalls.c @@ -1390,6 +1390,11 @@ sendto_nocancel(struct proc *p, KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0); AUDIT_ARG(fd, uap->s); + if (uap->flags & MSG_SKIPCFIL) { + error = EPERM; + goto done; + } + auio = uio_create(1, 0, (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), UIO_WRITE); @@ -1459,6 +1464,12 @@ sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0); AUDIT_ARG(fd, uap->s); + + if (uap->flags & MSG_SKIPCFIL) { + error = EPERM; + goto done; + } + if (IS_64BIT_PROCESS(p)) { msghdrp = (caddr_t)&msg64; size_of_msghdr = sizeof(msg64); @@ -1572,6 +1583,11 @@ sendmsg_x(struct proc *p, struct sendmsg_x_args *uap, user_ssize_t *retval) KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0); + if (uap->flags & MSG_SKIPCFIL) { + error = EPERM; + goto out; + } + error = file_socket(uap->s, &so); if (error) { goto out; diff --git a/bsd/miscfs/specfs/spec_vnops.c b/bsd/miscfs/specfs/spec_vnops.c index 042363b41..300894634 100644 --- a/bsd/miscfs/specfs/spec_vnops.c +++ b/bsd/miscfs/specfs/spec_vnops.c @@ -2915,7 +2915,7 @@ spec_knote_select_and_link(struct knote *kn) /* * This function may be called many times to link or re-link the * underlying vnode to the kqueue. If we've already linked the two, - * we will have a valid kn_hook64 which ties us to the underlying + * we will have a valid kn_hook_waitqid which ties us to the underlying * device's waitq via a the waitq's prepost table object. However, * devices can abort any select action by calling selthreadclear(). * This is OK because the table object will be invalidated by the @@ -2985,13 +2985,15 @@ spec_knote_select_and_link(struct knote *kn) * the table object's ID to us. It will also set the * waitq_prepost_id field within the waitq structure. * - * We can just overwrite kn_hook64 because it's simply a + * We can just overwrite kn_hook_waitqid because it's simply a * table ID used to grab a reference when needed. * * We have a reference on the vnode, so we know that the * device won't go away while we get this ID. + * + * Note: on 32bit this field is 32bit only. */ - kn->kn_hook64 = waitq_get_prepost_id(wq); + kn->kn_hook_waitqid = (typeof(kn->kn_hook_waitqid))waitq_get_prepost_id(wq); } else if (selres == 0) { /* * The device indicated that there's no data to read, but didn't call @@ -3069,7 +3071,7 @@ filt_specattach(struct knote *kn, __unused struct kevent_qos_s *kev) } kn->kn_filtid = EVFILTID_SPEC; - kn->kn_hook64 = 0; + kn->kn_hook_waitqid = 0; knote_markstayactive(kn); return spec_knote_select_and_link(kn); @@ -3084,7 +3086,7 @@ filt_specdetach(struct knote *kn) * This is potentially tricky: the device's selinfo waitq that was * tricked into being part of this knote's waitq set may not be a part * of any other set, and the device itself may have revoked the memory - * in which the waitq was held. We use the knote's kn_hook64 field + * in which the waitq was held. We use the knote's kn_hook_waitqid field * to keep the ID of the waitq's prepost table object. This * object keeps a pointer back to the waitq, and gives us a safe way * to decouple the dereferencing of driver allocated memory: if the @@ -3092,9 +3094,9 @@ filt_specdetach(struct knote *kn) * object will be invalidated. The waitq details are handled in the * waitq API invoked here. */ - if (kn->kn_hook64) { - waitq_unlink_by_prepost_id(kn->kn_hook64, &(knote_get_kq(kn)->kq_wqs)); - kn->kn_hook64 = 0; + if (kn->kn_hook_waitqid) { + waitq_unlink_by_prepost_id(kn->kn_hook_waitqid, &(knote_get_kq(kn)->kq_wqs)); + kn->kn_hook_waitqid = 0; } } diff --git a/bsd/net/if_ipsec.c b/bsd/net/if_ipsec.c index e967cad2c..407f3243e 100644 --- a/bsd/net/if_ipsec.c +++ b/bsd/net/if_ipsec.c @@ -2554,6 +2554,9 @@ ipsec_ctl_connect(kern_ctl_ref kctlref, } struct ipsec_pcb *pcb = *unitinfo; + if (pcb == NULL) { + return EINVAL; + } lck_mtx_lock(&ipsec_lock); @@ -2995,8 +2998,11 @@ ipsec_ctl_setopt(__unused kern_ctl_ref kctlref, void *data, size_t len) { - struct ipsec_pcb *pcb = unitinfo; errno_t result = 0; + struct ipsec_pcb *pcb = unitinfo; + if (pcb == NULL) { + return EINVAL; + } /* check for privileges for privileged options */ switch (opt) { @@ -3364,8 +3370,11 @@ ipsec_ctl_getopt(__unused kern_ctl_ref kctlref, void *data, size_t *len) { - struct ipsec_pcb *pcb = unitinfo; errno_t result = 0; + struct ipsec_pcb *pcb = unitinfo; + if (pcb == NULL) { + return EINVAL; + } switch (opt) { case IPSEC_OPT_FLAGS: { diff --git a/bsd/net/if_ports_used.c b/bsd/net/if_ports_used.c index 378d834fc..29b05770f 100644 --- a/bsd/net/if_ports_used.c +++ b/bsd/net/if_ports_used.c @@ -47,6 +47,8 @@ #include #include +#include +#include #include @@ -662,7 +664,12 @@ if_ports_used_add_inpcb(const uint32_t ifindex, const struct inpcb *inp) npi.npi_timestamp.tv_usec = wakeuiid_last_check.tv_usec; if (SOCK_PROTO(so) == IPPROTO_TCP) { + struct tcpcb *tp = intotcpcb(inp); + npi.npi_flags |= NPIF_TCP; + if (tp != NULL && tp->t_state == TCPS_LISTEN) { + npi.npi_flags |= NPIF_LISTEN; + } } else if (SOCK_PROTO(so) == IPPROTO_UDP) { npi.npi_flags |= NPIF_UDP; } else { @@ -675,7 +682,15 @@ if_ports_used_add_inpcb(const uint32_t ifindex, const struct inpcb *inp) npi.npi_local_port = inp->inp_lport; npi.npi_foreign_port = inp->inp_fport; - if (inp->inp_vflag & INP_IPV4) { + /* + * Take in account IPv4 addresses mapped on IPv6 + */ + if ((inp->inp_vflag & INP_IPV6) != 0 && (inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && + (inp->inp_vflag & (INP_IPV6 | INP_IPV4)) == (INP_IPV6 | INP_IPV4)) { + npi.npi_flags |= NPIF_IPV6 | NPIF_IPV4; + memcpy(&npi.npi_local_addr_in6, + &inp->in6p_laddr, sizeof(struct in6_addr)); + } else if (inp->inp_vflag & INP_IPV4) { npi.npi_flags |= NPIF_IPV4; npi.npi_local_addr_in = inp->inp_laddr; npi.npi_foreign_addr_in = inp->inp_faddr; diff --git a/bsd/net/if_ports_used.h b/bsd/net/if_ports_used.h index 5fcbc480a..ce782a21c 100644 --- a/bsd/net/if_ports_used.h +++ b/bsd/net/if_ports_used.h @@ -68,13 +68,14 @@ union in_addr_4_6 { struct in6_addr _in_a_6; }; -#define NPIF_IPV4 0x00000001 -#define NPIF_IPV6 0x00000002 -#define NPIF_TCP 0x00000004 -#define NPIF_UDP 0x00000008 -#define NPIF_DELEGATED 0x00000010 -#define NPIF_SOCKET 0x00000020 -#define NPIF_CHANNEL 0x00000040 +#define NPIF_IPV4 0x0001 +#define NPIF_IPV6 0x0002 +#define NPIF_TCP 0x0004 +#define NPIF_UDP 0x0008 +#define NPIF_DELEGATED 0x0010 +#define NPIF_SOCKET 0x0020 +#define NPIF_CHANNEL 0x0040 +#define NPIF_LISTEN 0x0080 struct net_port_info { uint16_t npi_if_index; diff --git a/bsd/net/kpi_interface.c b/bsd/net/kpi_interface.c index 4e849c3e5..2d5c6454b 100644 --- a/bsd/net/kpi_interface.c +++ b/bsd/net/kpi_interface.c @@ -2826,7 +2826,6 @@ ifnet_get_local_ports_extended(ifnet_t ifp, protocol_family_t protocol, u_int32_t flags, u_int8_t *bitfield) { u_int32_t ifindex; - u_int32_t inp_flags = 0; if (bitfield == NULL) { return EINVAL; @@ -2847,26 +2846,15 @@ ifnet_get_local_ports_extended(ifnet_t ifp, protocol_family_t protocol, if_ports_used_update_wakeuuid(ifp); - inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_WILDCARDOK) ? - INPCB_GET_PORTS_USED_WILDCARDOK : 0); - inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_NOWAKEUPOK) ? - INPCB_GET_PORTS_USED_NOWAKEUPOK : 0); - inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_RECVANYIFONLY) ? - INPCB_GET_PORTS_USED_RECVANYIFONLY : 0); - inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_EXTBGIDLEONLY) ? - INPCB_GET_PORTS_USED_EXTBGIDLEONLY : 0); - inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_ACTIVEONLY) ? - INPCB_GET_PORTS_USED_ACTIVEONLY : 0); - ifindex = (ifp != NULL) ? ifp->if_index : 0; if (!(flags & IFNET_GET_LOCAL_PORTS_TCPONLY)) { - udp_get_ports_used(ifindex, protocol, inp_flags, + udp_get_ports_used(ifindex, protocol, flags, bitfield); } if (!(flags & IFNET_GET_LOCAL_PORTS_UDPONLY)) { - tcp_get_ports_used(ifindex, protocol, inp_flags, + tcp_get_ports_used(ifindex, protocol, flags, bitfield); } diff --git a/bsd/net/kpi_interface.h b/bsd/net/kpi_interface.h index f131c5746..da7f7579d 100644 --- a/bsd/net/kpi_interface.h +++ b/bsd/net/kpi_interface.h @@ -3193,6 +3193,7 @@ extern errno_t ifnet_get_local_ports(ifnet_t ifp, u_int8_t *bitfield); #define IFNET_GET_LOCAL_PORTS_RECVANYIFONLY 0x10 #define IFNET_GET_LOCAL_PORTS_EXTBGIDLEONLY 0x20 #define IFNET_GET_LOCAL_PORTS_ACTIVEONLY 0x40 +#define IFNET_GET_LOCAL_PORTS_ANYTCPSTATEOK 0x80 /* * @function ifnet_get_local_ports_extended * @discussion Returns a bitfield indicating which local ports of the @@ -3230,6 +3231,9 @@ extern errno_t ifnet_get_local_ports(ifnet_t ifp, u_int8_t *bitfield); * IFNET_GET_LOCAL_PORTS_ACTIVEONLY: When bit is set, the * port is in the list only if the socket is not in a final TCP * state or the connection is not idle in a final TCP state + * IFNET_GET_LOCAL_PORTS_ANYTCPSTATEOK: When bit is set, the + * port is in the list for all the TCP states except CLOSED + * and TIME_WAIT * @param bitfield A pointer to 8192 bytes. * @result Returns 0 on success. */ diff --git a/bsd/net/ndrv.c b/bsd/net/ndrv.c index 7e13e2638..aeec3bb46 100644 --- a/bsd/net/ndrv.c +++ b/bsd/net/ndrv.c @@ -80,7 +80,12 @@ static unsigned int ndrv_multi_max_count = NDRV_DMUX_MAX_DESCR; SYSCTL_UINT(_net, OID_AUTO, ndrv_multi_max_count, CTLFLAG_RW | CTLFLAG_LOCKED, - &ndrv_multi_max_count, 0, "Number of allowed multicast addresses per NRDV socket"); + &ndrv_multi_max_count, 0, "Number of allowed multicast addresses per NRDV socket"); + +/* + * The locking strategy relies on the PF_NRDRV domain mutex that protects both the + * PCB list "ndrvl" and the sockets themselves + */ static int ndrv_do_detach(struct ndrv_cb *); static int ndrv_do_disconnect(struct ndrv_cb *); @@ -97,12 +102,12 @@ static void ndrv_dominit(struct domain *); u_int32_t ndrv_sendspace = NDRVSNDQ; u_int32_t ndrv_recvspace = NDRVRCVQ; -TAILQ_HEAD(, ndrv_cb) ndrvl = TAILQ_HEAD_INITIALIZER(ndrvl); +TAILQ_HEAD(, ndrv_cb) ndrvl = TAILQ_HEAD_INITIALIZER(ndrvl); static struct domain *ndrvdomain = NULL; extern struct domain ndrvdomain_s; -#define NDRV_PROTODEMUX_COUNT 10 +#define NDRV_PROTODEMUX_COUNT 10 /* * Verify these values match. @@ -127,9 +132,9 @@ extern struct domain ndrvdomain_s; static int ndrv_output(struct mbuf *m, struct socket *so) { - struct ndrv_cb *np = sotondrvcb(so); + struct ndrv_cb *np = sotondrvcb(so); struct ifnet *ifp = np->nd_if; - int result = 0; + int result = 0; #if NDRV_DEBUG printf("NDRV output: %x, %x, %x\n", m, so, np); @@ -138,67 +143,77 @@ ndrv_output(struct mbuf *m, struct socket *so) /* * No header is a format error */ - if ((m->m_flags&M_PKTHDR) == 0) - return(EINVAL); + if ((m->m_flags & M_PKTHDR) == 0) { + return EINVAL; + } /* Unlock before calling ifnet_output */ socket_unlock(so, 0); /* - * Call DLIL if we can. DLIL is much safer than calling the - * ifp directly. - */ + * Call DLIL if we can. DLIL is much safer than calling the + * ifp directly. + */ result = ifnet_output_raw(ifp, np->nd_proto_family, m); socket_lock(so, 0); - return (result); + return result; } /* Our input routine called from DLIL */ static errno_t ndrv_input( - ifnet_t ifp, - protocol_family_t proto_family, - mbuf_t m, - char *frame_header) + ifnet_t ifp, + protocol_family_t proto_family, + mbuf_t m, + char *frame_header) { struct socket *so; struct sockaddr_dl ndrvsrc; struct ndrv_cb *np; int error = 0; - ndrvsrc.sdl_len = sizeof (struct sockaddr_dl); - ndrvsrc.sdl_family = AF_NDRV; - ndrvsrc.sdl_index = 0; + ndrvsrc.sdl_len = sizeof(struct sockaddr_dl); + ndrvsrc.sdl_family = AF_NDRV; + ndrvsrc.sdl_index = 0; - /* move packet from if queue to socket */ + /* move packet from if queue to socket */ /* Should be media-independent */ - ndrvsrc.sdl_type = IFT_ETHER; - ndrvsrc.sdl_nlen = 0; - ndrvsrc.sdl_alen = 6; - ndrvsrc.sdl_slen = 0; - bcopy(frame_header, &ndrvsrc.sdl_data, 6); + ndrvsrc.sdl_type = IFT_ETHER; + ndrvsrc.sdl_nlen = 0; + ndrvsrc.sdl_alen = 6; + ndrvsrc.sdl_slen = 0; + bcopy(frame_header, &ndrvsrc.sdl_data, 6); + + /* prepend the frame header */ + m = m_prepend(m, ifnet_hdrlen(ifp), M_NOWAIT); + if (m == NULL) { + return EJUSTRETURN; + } + bcopy(frame_header, m->m_data, ifnet_hdrlen(ifp)); + + /* + * We need to take the domain mutex before the list RW lock + */ + LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(ndrvdomain->dom_mtx); np = ndrv_find_inbound(ifp, proto_family); - if (np == NULL) - { - return(ENOENT); + if (np == NULL) { + lck_mtx_unlock(ndrvdomain->dom_mtx); + return ENOENT; } + so = np->nd_socket; - /* prepend the frame header */ - m = m_prepend(m, ifnet_hdrlen(ifp), M_NOWAIT); - if (m == NULL) - return EJUSTRETURN; - bcopy(frame_header, m->m_data, ifnet_hdrlen(ifp)); - LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED); - lck_mtx_lock(ndrvdomain->dom_mtx); if (sbappendaddr(&(so->so_rcv), (struct sockaddr *)&ndrvsrc, - m, (struct mbuf *)0, &error) != 0) { + m, NULL, &error) != 0) { sorwakeup(so); } + lck_mtx_unlock(ndrvdomain->dom_mtx); + return 0; /* radar 4030377 - always return 0 */ } @@ -208,24 +223,27 @@ ndrv_input( static int ndrv_attach(struct socket *so, int proto, __unused struct proc *p) { - int error; + int error; struct ndrv_cb *np = sotondrvcb(so); - if ((so->so_state & SS_PRIV) == 0) - return(EPERM); + if ((so->so_state & SS_PRIV) == 0) { + return EPERM; + } #if NDRV_DEBUG printf("NDRV attach: %x, %x, %x\n", so, proto, np); #endif - if ((error = soreserve(so, ndrv_sendspace, ndrv_recvspace))) - return(error); + if ((error = soreserve(so, ndrv_sendspace, ndrv_recvspace))) { + return error; + } MALLOC(np, struct ndrv_cb *, sizeof(*np), M_PCB, M_WAITOK); - if (np == NULL) - return (ENOMEM); - so->so_pcb = (caddr_t)np; - bzero(np, sizeof(*np)); + if (np == NULL) { + return ENOMEM; + } + so->so_pcb = (caddr_t)np; + bzero(np, sizeof(*np)); #if NDRV_DEBUG printf("NDRV attach: %x, %x, %x\n", so, proto, np); #endif @@ -234,12 +252,22 @@ ndrv_attach(struct socket *so, int proto, __unused struct proc *p) np->nd_socket = so; np->nd_proto.sp_family = SOCK_DOM(so); np->nd_proto.sp_protocol = proto; - np->nd_if = NULL; - np->nd_proto_family = 0; - np->nd_family = 0; - np->nd_unit = 0; - TAILQ_INSERT_TAIL(&ndrvl, np, nd_next); - return(0); + np->nd_if = NULL; + np->nd_proto_family = 0; + np->nd_family = 0; + np->nd_unit = 0; + + /* + * Use the domain mutex to protect the list + */ + LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(ndrvdomain->dom_mtx); + + TAILQ_INSERT_TAIL(&ndrvl, np, nd_next); + + lck_mtx_unlock(ndrvdomain->dom_mtx); + + return 0; } /* @@ -252,8 +280,9 @@ ndrv_detach(struct socket *so) { struct ndrv_cb *np = sotondrvcb(so); - if (np == 0) + if (np == 0) { return EINVAL; + } return ndrv_do_detach(np); } @@ -272,17 +301,20 @@ ndrv_connect(struct socket *so, struct sockaddr *nam, __unused struct proc *p) { struct ndrv_cb *np = sotondrvcb(so); - if (np == 0) + if (np == 0) { return EINVAL; + } - if (np->nd_faddr) + if (np->nd_faddr) { return EISCONN; + } /* Allocate memory to store the remote address */ MALLOC(np->nd_faddr, struct sockaddr_ndrv*, - nam->sa_len, M_IFADDR, M_WAITOK); - if (np->nd_faddr == NULL) + nam->sa_len, M_IFADDR, M_WAITOK); + if (np->nd_faddr == NULL) { return ENOMEM; + } bcopy((caddr_t) nam, (caddr_t) np->nd_faddr, nam->sa_len); soisconnected(so); @@ -291,12 +323,12 @@ ndrv_connect(struct socket *so, struct sockaddr *nam, __unused struct proc *p) static void ndrv_event(struct ifnet *ifp, __unused protocol_family_t protocol, - const struct kev_msg *event) + const struct kev_msg *event) { if (event->vendor_code == KEV_VENDOR_APPLE && - event->kev_class == KEV_NETWORK_CLASS && - event->kev_subclass == KEV_DL_SUBCLASS && - event->event_code == KEV_DL_IF_DETACHING) { + event->kev_class == KEV_NETWORK_CLASS && + event->kev_subclass == KEV_DL_SUBCLASS && + event->event_code == KEV_DL_IF_DETACHING) { LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED); lck_mtx_lock(ndrvdomain->dom_mtx); ndrv_handle_ifp_detach(ifnet_family(ifp), ifnet_unit(ifp)); @@ -314,30 +346,34 @@ static int name_cmp(struct ifnet *, char *); static int ndrv_bind(struct socket *so, struct sockaddr *nam, __unused struct proc *p) { - struct sockaddr_ndrv *sa = (struct sockaddr_ndrv *) nam; + struct sockaddr_ndrv *sa = (struct sockaddr_ndrv *) nam; char *dname; struct ndrv_cb *np; struct ifnet *ifp; - int result; + int result; - if TAILQ_EMPTY(&ifnet_head) - return(EADDRNOTAVAIL); /* Quick sanity check */ + if (TAILQ_EMPTY(&ifnet_head)) { + return EADDRNOTAVAIL; /* Quick sanity check */ + } np = sotondrvcb(so); - if (np == 0) + if (np == 0) { return EINVAL; + } - if (np->nd_laddr) - return EINVAL; /* XXX */ - + if (np->nd_laddr) { + return EINVAL; /* XXX */ + } /* I think we just latch onto a copy here; the caller frees */ np->nd_laddr = _MALLOC(sizeof(struct sockaddr_ndrv), M_IFADDR, M_WAITOK); - if (np->nd_laddr == NULL) - return(ENOMEM); + if (np->nd_laddr == NULL) { + return ENOMEM; + } bcopy((caddr_t) sa, (caddr_t) np->nd_laddr, sizeof(struct sockaddr_ndrv)); dname = (char *) sa->snd_name; np->nd_laddr->snd_len = sizeof(struct sockaddr_ndrv); - if (*dname == '\0') - return(EINVAL); + if (*dname == '\0') { + return EINVAL; + } #if NDRV_DEBUG printf("NDRV bind: %x, %x, %s\n", so, np, dname); #endif @@ -347,19 +383,20 @@ ndrv_bind(struct socket *so, struct sockaddr *nam, __unused struct proc *p) */ ifnet_head_lock_shared(); TAILQ_FOREACH(ifp, &ifnet_head, if_link) { - if (name_cmp(ifp, dname) == 0) + if (name_cmp(ifp, dname) == 0) { break; + } } ifnet_head_done(); - if (ifp == NULL) - return(EADDRNOTAVAIL); + if (ifp == NULL) { + return EADDRNOTAVAIL; + } // PPP doesn't support PF_NDRV. - if (ifnet_family(ifp) != APPLE_IF_FAM_PPP) - { + if (ifnet_family(ifp) != APPLE_IF_FAM_PPP) { /* NDRV on this interface */ - struct ifnet_attach_proto_param ndrv_proto; + struct ifnet_attach_proto_param ndrv_proto; result = 0; bzero(&ndrv_proto, sizeof(ndrv_proto)); ndrv_proto.event = ndrv_event; @@ -372,16 +409,15 @@ ndrv_bind(struct socket *so, struct sockaddr *nam, __unused struct proc *p) return result; } np->nd_proto_family = PF_NDRV; - } - else { + } else { np->nd_proto_family = 0; } np->nd_if = ifp; - np->nd_family = ifnet_family(ifp); - np->nd_unit = ifnet_unit(ifp); + np->nd_family = ifnet_family(ifp); + np->nd_unit = ifnet_unit(ifp); - return(0); + return 0; } static int @@ -389,11 +425,13 @@ ndrv_disconnect(struct socket *so) { struct ndrv_cb *np = sotondrvcb(so); - if (np == 0) + if (np == 0) { return EINVAL; + } - if (np->nd_faddr == 0) + if (np->nd_faddr == 0) { return ENOTCONN; + } ndrv_do_disconnect(np); return 0; @@ -417,13 +455,14 @@ ndrv_shutdown(struct socket *so) */ static int ndrv_send(struct socket *so, __unused int flags, struct mbuf *m, - __unused struct sockaddr *addr, struct mbuf *control, - __unused struct proc *p) + __unused struct sockaddr *addr, struct mbuf *control, + __unused struct proc *p) { int error; - if (control) + if (control) { return EOPNOTSUPP; + } error = ndrv_output(m, so); m = NULL; @@ -436,8 +475,9 @@ ndrv_abort(struct socket *so) { struct ndrv_cb *np = sotondrvcb(so); - if (np == 0) + if (np == 0) { return EINVAL; + } ndrv_do_disconnect(np); return 0; @@ -449,18 +489,21 @@ ndrv_sockaddr(struct socket *so, struct sockaddr **nam) struct ndrv_cb *np = sotondrvcb(so); int len; - if (np == 0) + if (np == 0) { return EINVAL; + } - if (np->nd_laddr == 0) + if (np->nd_laddr == 0) { return EINVAL; + } len = np->nd_laddr->snd_len; MALLOC(*nam, struct sockaddr *, len, M_SONAME, M_WAITOK); - if (*nam == NULL) + if (*nam == NULL) { return ENOMEM; + } bcopy((caddr_t)np->nd_laddr, *nam, - (unsigned)len); + (unsigned)len); return 0; } @@ -471,18 +514,21 @@ ndrv_peeraddr(struct socket *so, struct sockaddr **nam) struct ndrv_cb *np = sotondrvcb(so); int len; - if (np == 0) + if (np == 0) { return EINVAL; + } - if (np->nd_faddr == 0) + if (np->nd_faddr == 0) { return ENOTCONN; + } len = np->nd_faddr->snd_len; MALLOC(*nam, struct sockaddr *, len, M_SONAME, M_WAITOK); - if (*nam == NULL) + if (*nam == NULL) { return ENOMEM; + } bcopy((caddr_t)np->nd_faddr, *nam, - (unsigned)len); + (unsigned)len); return 0; } @@ -492,58 +538,59 @@ ndrv_peeraddr(struct socket *so, struct sockaddr **nam) static int ndrv_ctloutput(struct socket *so, struct sockopt *sopt) { - struct ndrv_cb *np = sotondrvcb(so); + struct ndrv_cb *np = sotondrvcb(so); int error = 0; - switch(sopt->sopt_name) - { - case NDRV_DELDMXSPEC: /* Delete current spec */ - /* Verify no parameter was passed */ - if (sopt->sopt_val != 0 || sopt->sopt_valsize != 0) { - /* - * We don't support deleting a specific demux, it's - * all or nothing. - */ - return EINVAL; - } - error = ndrv_delspec(np); - break; - case NDRV_SETDMXSPEC: /* Set protocol spec */ - error = ndrv_setspec(np, sopt); - break; - case NDRV_ADDMULTICAST: - error = ndrv_do_add_multicast(np, sopt); - break; - case NDRV_DELMULTICAST: - error = ndrv_do_remove_multicast(np, sopt); - break; - default: - error = ENOTSUP; - } + switch (sopt->sopt_name) { + case NDRV_DELDMXSPEC: /* Delete current spec */ + /* Verify no parameter was passed */ + if (sopt->sopt_val != 0 || sopt->sopt_valsize != 0) { + /* + * We don't support deleting a specific demux, it's + * all or nothing. + */ + return EINVAL; + } + error = ndrv_delspec(np); + break; + case NDRV_SETDMXSPEC: /* Set protocol spec */ + error = ndrv_setspec(np, sopt); + break; + case NDRV_ADDMULTICAST: + error = ndrv_do_add_multicast(np, sopt); + break; + case NDRV_DELMULTICAST: + error = ndrv_do_remove_multicast(np, sopt); + break; + default: + error = ENOTSUP; + } #ifdef NDRV_DEBUG log(LOG_WARNING, "NDRV CTLOUT: %x returns %d\n", sopt->sopt_name, error); #endif - return(error); + return error; } static int ndrv_do_detach(struct ndrv_cb *np) { - struct ndrv_cb* cur_np = NULL; - struct socket *so = np->nd_socket; - int error = 0; - struct ifnet * ifp; + struct ndrv_cb* cur_np = NULL; + struct socket *so = np->nd_socket; + int error = 0; + struct ifnet * ifp; #if NDRV_DEBUG printf("NDRV detach: %x, %x\n", so, np); #endif - ndrv_remove_all_multicast(np); + ndrv_remove_all_multicast(np); + + /* Remove from the linked list of control blocks */ + LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_OWNED); + TAILQ_REMOVE(&ndrvl, np, nd_next); - ifp = np->nd_if; - /* Remove from the linked list of control blocks */ - TAILQ_REMOVE(&ndrvl, np, nd_next); - if (ifp != NULL) { + ifp = np->nd_if; + if (ifp != NULL) { u_int32_t proto_family = np->nd_proto_family; if (proto_family != PF_NDRV && proto_family != 0) { @@ -553,9 +600,10 @@ ndrv_do_detach(struct ndrv_cb *np) } /* Check if this is the last socket attached to this interface */ + LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_OWNED); TAILQ_FOREACH(cur_np, &ndrvl, nd_next) { if (cur_np->nd_family == np->nd_family && - cur_np->nd_unit == np->nd_unit) { + cur_np->nd_unit == np->nd_unit) { break; } } @@ -567,7 +615,7 @@ ndrv_do_detach(struct ndrv_cb *np) socket_lock(so, 0); } } - if (np->nd_laddr != NULL) { + if (np->nd_laddr != NULL) { FREE(np->nd_laddr, M_IFADDR); np->nd_laddr = NULL; } @@ -585,9 +633,8 @@ ndrv_do_disconnect(struct ndrv_cb *np) #if NDRV_DEBUG printf("NDRV disconnect: %x\n", np); #endif - if (np->nd_faddr) - { - FREE(np->nd_faddr, M_IFADDR); + if (np->nd_faddr) { + FREE(np->nd_faddr, M_IFADDR); np->nd_faddr = 0; } /* @@ -595,34 +642,39 @@ ndrv_do_disconnect(struct ndrv_cb *np) * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB; * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared. */ - if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) + if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) { ndrv_do_detach(np); + } soisdisconnected(so); - return(0); + return 0; } /* Hackery - return a string version of a decimal number */ static void sprint_d(u_int n, char *buf, int buflen) -{ char dbuf[IFNAMSIZ]; - char *cp = dbuf+IFNAMSIZ-1; +{ + char dbuf[IFNAMSIZ]; + char *cp = dbuf + IFNAMSIZ - 1; - *cp = 0; - do { buflen--; + *cp = 0; + do { + buflen--; cp--; - *cp = "0123456789"[n % 10]; - n /= 10; - } while (n != 0 && buflen > 0); - strlcpy(buf, cp, IFNAMSIZ-buflen); - return; + *cp = "0123456789"[n % 10]; + n /= 10; + } while (n != 0 && buflen > 0); + strlcpy(buf, cp, IFNAMSIZ - buflen); + return; } /* * Try to compare a device name (q) with one of the funky ifnet * device names (ifp). */ -static int name_cmp(struct ifnet *ifp, char *q) -{ char *r; +static int +name_cmp(struct ifnet *ifp, char *q) +{ + char *r; int len; char buf[IFNAMSIZ]; @@ -630,11 +682,11 @@ static int name_cmp(struct ifnet *ifp, char *q) len = strlen(ifnet_name(ifp)); strlcpy(r, ifnet_name(ifp), IFNAMSIZ); r += len; - sprint_d(ifnet_unit(ifp), r, IFNAMSIZ-(r-buf)); + sprint_d(ifnet_unit(ifp), r, IFNAMSIZ - (r - buf)); #if NDRV_DEBUG printf("Comparing %s, %s\n", buf, q); #endif - return(strncmp(buf, q, IFNAMSIZ)); + return strncmp(buf, q, IFNAMSIZ); } #if 0 @@ -645,15 +697,16 @@ static int name_cmp(struct ifnet *ifp, char *q) void ndrv_flushq(struct ifqueue *q) { - struct mbuf *m; - for (;;) - { + struct mbuf *m; + for (;;) { IF_DEQUEUE(q, m); - if (m == NULL) + if (m == NULL) { break; + } IF_DROP(q); - if (m) + if (m) { m_freem(m); + } } } #endif @@ -661,45 +714,50 @@ ndrv_flushq(struct ifqueue *q) int ndrv_setspec(struct ndrv_cb *np, struct sockopt *sopt) { - struct ifnet_attach_proto_param proto_param; - struct ndrv_protocol_desc ndrvSpec; - struct ndrv_demux_desc* ndrvDemux = NULL; - int error = 0; - struct socket * so = np->nd_socket; - user_addr_t user_addr; + struct ifnet_attach_proto_param proto_param; + struct ndrv_protocol_desc ndrvSpec; + struct ndrv_demux_desc* ndrvDemux = NULL; + int error = 0; + struct socket * so = np->nd_socket; + user_addr_t user_addr; /* Sanity checking */ - if (np->nd_proto_family != PF_NDRV) + if (np->nd_proto_family != PF_NDRV) { return EBUSY; - if (np->nd_if == NULL) + } + if (np->nd_if == NULL) { return EINVAL; + } /* Copy the ndrvSpec */ if (proc_is64bit(sopt->sopt_p)) { - struct ndrv_protocol_desc64 ndrvSpec64; + struct ndrv_protocol_desc64 ndrvSpec64; - if (sopt->sopt_valsize != sizeof(ndrvSpec64)) + if (sopt->sopt_valsize != sizeof(ndrvSpec64)) { return EINVAL; + } error = sooptcopyin(sopt, &ndrvSpec64, sizeof(ndrvSpec64), sizeof(ndrvSpec64)); - if (error != 0) + if (error != 0) { return error; + } ndrvSpec.version = ndrvSpec64.version; ndrvSpec.protocol_family = ndrvSpec64.protocol_family; ndrvSpec.demux_count = ndrvSpec64.demux_count; user_addr = ndrvSpec64.demux_list; - } - else { - struct ndrv_protocol_desc32 ndrvSpec32; + } else { + struct ndrv_protocol_desc32 ndrvSpec32; - if (sopt->sopt_valsize != sizeof(ndrvSpec32)) + if (sopt->sopt_valsize != sizeof(ndrvSpec32)) { return EINVAL; + } error = sooptcopyin(sopt, &ndrvSpec32, sizeof(ndrvSpec32), sizeof(ndrvSpec32)); - if (error != 0) + if (error != 0) { return error; + } ndrvSpec.version = ndrvSpec32.version; ndrvSpec.protocol_family = ndrvSpec32.protocol_family; @@ -709,72 +767,74 @@ ndrv_setspec(struct ndrv_cb *np, struct sockopt *sopt) } /* Verify the parameter */ - if (ndrvSpec.version > NDRV_PROTOCOL_DESC_VERS) + if (ndrvSpec.version > NDRV_PROTOCOL_DESC_VERS) { return ENOTSUP; // version is too new! - else if (ndrvSpec.version < 1) + } else if (ndrvSpec.version < 1) { return EINVAL; // version is not valid - else if (ndrvSpec.demux_count > NDRV_PROTODEMUX_COUNT || ndrvSpec.demux_count == 0) + } else if (ndrvSpec.demux_count > NDRV_PROTODEMUX_COUNT || ndrvSpec.demux_count == 0) { return EINVAL; // demux_count is not valid - + } bzero(&proto_param, sizeof(proto_param)); proto_param.demux_count = ndrvSpec.demux_count; /* Allocate storage for demux array */ MALLOC(ndrvDemux, struct ndrv_demux_desc*, proto_param.demux_count * - sizeof(struct ndrv_demux_desc), M_TEMP, M_WAITOK); - if (ndrvDemux == NULL) + sizeof(struct ndrv_demux_desc), M_TEMP, M_WAITOK); + if (ndrvDemux == NULL) { return ENOMEM; + } /* Allocate enough ifnet_demux_descs */ MALLOC(proto_param.demux_array, struct ifnet_demux_desc*, - sizeof(*proto_param.demux_array) * ndrvSpec.demux_count, - M_TEMP, M_WAITOK); - if (proto_param.demux_array == NULL) + sizeof(*proto_param.demux_array) * ndrvSpec.demux_count, + M_TEMP, M_WAITOK); + if (proto_param.demux_array == NULL) { error = ENOMEM; + } - if (error == 0) - { + if (error == 0) { /* Copy the ndrv demux array from userland */ error = copyin(user_addr, ndrvDemux, - ndrvSpec.demux_count * sizeof(struct ndrv_demux_desc)); + ndrvSpec.demux_count * sizeof(struct ndrv_demux_desc)); ndrvSpec.demux_list = ndrvDemux; } - if (error == 0) - { + if (error == 0) { /* At this point, we've at least got enough bytes to start looking around */ - u_int32_t demuxOn = 0; + u_int32_t demuxOn = 0; proto_param.demux_count = ndrvSpec.demux_count; proto_param.input = ndrv_input; proto_param.event = ndrv_event; - for (demuxOn = 0; demuxOn < ndrvSpec.demux_count; demuxOn++) - { + for (demuxOn = 0; demuxOn < ndrvSpec.demux_count; demuxOn++) { /* Convert an ndrv_demux_desc to a ifnet_demux_desc */ error = ndrv_to_ifnet_demux(&ndrvSpec.demux_list[demuxOn], - &proto_param.demux_array[demuxOn]); - if (error) + &proto_param.demux_array[demuxOn]); + if (error) { break; + } } } - if (error == 0) - { + if (error == 0) { /* We've got all our ducks lined up...lets attach! */ socket_unlock(so, 0); error = ifnet_attach_protocol(np->nd_if, ndrvSpec.protocol_family, - &proto_param); + &proto_param); socket_lock(so, 0); - if (error == 0) + if (error == 0) { np->nd_proto_family = ndrvSpec.protocol_family; + } } /* Free any memory we've allocated */ - if (proto_param.demux_array) + if (proto_param.demux_array) { FREE(proto_param.demux_array, M_TEMP); - if (ndrvDemux) + } + if (ndrvDemux) { FREE(ndrvDemux, M_TEMP); + } return error; } @@ -783,38 +843,37 @@ ndrv_setspec(struct ndrv_cb *np, struct sockopt *sopt) int ndrv_to_ifnet_demux(struct ndrv_demux_desc* ndrv, struct ifnet_demux_desc* ifdemux) { - bzero(ifdemux, sizeof(*ifdemux)); + bzero(ifdemux, sizeof(*ifdemux)); - if (ndrv->type < DLIL_DESC_ETYPE2) - { - /* using old "type", not supported */ - return ENOTSUP; - } + if (ndrv->type < DLIL_DESC_ETYPE2) { + /* using old "type", not supported */ + return ENOTSUP; + } - if (ndrv->length > 28) - { - return EINVAL; - } + if (ndrv->length > 28) { + return EINVAL; + } - ifdemux->type = ndrv->type; - ifdemux->data = ndrv->data.other; - ifdemux->datalen = ndrv->length; + ifdemux->type = ndrv->type; + ifdemux->data = ndrv->data.other; + ifdemux->datalen = ndrv->length; - return 0; + return 0; } int ndrv_delspec(struct ndrv_cb *np) { - int result = 0; + int result = 0; - if (np->nd_proto_family == PF_NDRV || - np->nd_proto_family == 0) - return EINVAL; + if (np->nd_proto_family == PF_NDRV || + np->nd_proto_family == 0) { + return EINVAL; + } - /* Detach the protocol */ - result = ifnet_detach_protocol(np->nd_if, np->nd_proto_family); - np->nd_proto_family = PF_NDRV; + /* Detach the protocol */ + result = ifnet_detach_protocol(np->nd_if, np->nd_proto_family); + np->nd_proto_family = PF_NDRV; return result; } @@ -822,16 +881,20 @@ ndrv_delspec(struct ndrv_cb *np) struct ndrv_cb * ndrv_find_inbound(struct ifnet *ifp, u_int32_t protocol) { - struct ndrv_cb* np; + struct ndrv_cb* np; - if (protocol == PF_NDRV) return NULL; + LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_OWNED); - TAILQ_FOREACH(np, &ndrvl, nd_next) { - if (np->nd_proto_family == protocol && - np->nd_if == ifp) { - return np; - } - } + if (protocol == PF_NDRV) { + return NULL; + } + + TAILQ_FOREACH(np, &ndrvl, nd_next) { + if (np->nd_proto_family == protocol && + np->nd_if == ifp) { + return np; + } + } return NULL; } @@ -839,239 +902,233 @@ ndrv_find_inbound(struct ifnet *ifp, u_int32_t protocol) static void ndrv_handle_ifp_detach(u_int32_t family, short unit) { - struct ndrv_cb* np; - struct ifnet *ifp = NULL; - struct socket *so; - - /* Find all sockets using this interface. */ - TAILQ_FOREACH(np, &ndrvl, nd_next) { - if (np->nd_family == family && - np->nd_unit == unit) - { - /* This cb is using the detaching interface, but not for long. */ - /* Let the protocol go */ - ifp = np->nd_if; - if (np->nd_proto_family != 0) - ndrv_delspec(np); - - /* Delete the multicasts first */ - ndrv_remove_all_multicast(np); - - /* Disavow all knowledge of the ifp */ - np->nd_if = NULL; - np->nd_unit = 0; - np->nd_family = 0; - - so = np->nd_socket; - /* Make sure sending returns an error */ - LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_OWNED); - socantsendmore(so); - socantrcvmore(so); - } - } - - /* Unregister our protocol */ - if (ifp) { - ifnet_detach_protocol(ifp, PF_NDRV); - } + struct ndrv_cb* np; + struct ifnet *ifp = NULL; + struct socket *so; + + /* Find all sockets using this interface. */ + TAILQ_FOREACH(np, &ndrvl, nd_next) { + if (np->nd_family == family && + np->nd_unit == unit) { + /* This cb is using the detaching interface, but not for long. */ + /* Let the protocol go */ + ifp = np->nd_if; + if (np->nd_proto_family != 0) { + ndrv_delspec(np); + } + + /* Delete the multicasts first */ + ndrv_remove_all_multicast(np); + + /* Disavow all knowledge of the ifp */ + np->nd_if = NULL; + np->nd_unit = 0; + np->nd_family = 0; + + so = np->nd_socket; + /* Make sure sending returns an error */ + LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_OWNED); + socantsendmore(so); + socantrcvmore(so); + } + } + + /* Unregister our protocol */ + if (ifp) { + ifnet_detach_protocol(ifp, PF_NDRV); + } } static int ndrv_do_add_multicast(struct ndrv_cb *np, struct sockopt *sopt) { - struct ndrv_multiaddr* ndrv_multi; - int result; - - if (sopt->sopt_val == 0 || sopt->sopt_valsize < 2 || - sopt->sopt_level != SOL_NDRVPROTO || sopt->sopt_valsize > SOCK_MAXADDRLEN) - return EINVAL; - if (np->nd_if == NULL) - return ENXIO; - if (!(np->nd_dlist_cnt < ndrv_multi_max_count)) + struct ndrv_multiaddr* ndrv_multi; + int result; + + if (sopt->sopt_val == 0 || sopt->sopt_valsize < 2 || + sopt->sopt_level != SOL_NDRVPROTO || sopt->sopt_valsize > SOCK_MAXADDRLEN) { + return EINVAL; + } + if (np->nd_if == NULL) { + return ENXIO; + } + if (!(np->nd_dlist_cnt < ndrv_multi_max_count)) { return EPERM; + } + + // Allocate storage + MALLOC(ndrv_multi, struct ndrv_multiaddr*, sizeof(struct ndrv_multiaddr) - + sizeof(struct sockaddr) + sopt->sopt_valsize, M_IFADDR, M_WAITOK); + if (ndrv_multi == NULL) { + return ENOMEM; + } + + // Copy in the address + result = copyin(sopt->sopt_val, &ndrv_multi->addr, sopt->sopt_valsize); + + // Validate the sockaddr + if (result == 0 && sopt->sopt_valsize != ndrv_multi->addr.sa_len) { + result = EINVAL; + } + + if (result == 0 && ndrv_have_multicast(np, &ndrv_multi->addr)) { + result = EEXIST; + } - // Allocate storage - MALLOC(ndrv_multi, struct ndrv_multiaddr*, sizeof(struct ndrv_multiaddr) - - sizeof(struct sockaddr) + sopt->sopt_valsize, M_IFADDR, M_WAITOK); - if (ndrv_multi == NULL) - return ENOMEM; - - // Copy in the address - result = copyin(sopt->sopt_val, &ndrv_multi->addr, sopt->sopt_valsize); - - // Validate the sockaddr - if (result == 0 && sopt->sopt_valsize != ndrv_multi->addr.sa_len) - result = EINVAL; - - if (result == 0 && ndrv_have_multicast(np, &ndrv_multi->addr)) - result = EEXIST; - - if (result == 0) - { - // Try adding the multicast - result = ifnet_add_multicast(np->nd_if, &ndrv_multi->addr, - &ndrv_multi->ifma); - } - - if (result == 0) - { - // Add to our linked list - ndrv_multi->next = np->nd_multiaddrs; - np->nd_multiaddrs = ndrv_multi; + if (result == 0) { + // Try adding the multicast + result = ifnet_add_multicast(np->nd_if, &ndrv_multi->addr, + &ndrv_multi->ifma); + } + + if (result == 0) { + // Add to our linked list + ndrv_multi->next = np->nd_multiaddrs; + np->nd_multiaddrs = ndrv_multi; np->nd_dlist_cnt++; - } - else - { - // Free up the memory, something went wrong - FREE(ndrv_multi, M_IFADDR); - } - - return result; + } else { + // Free up the memory, something went wrong + FREE(ndrv_multi, M_IFADDR); + } + + return result; } static int ndrv_do_remove_multicast(struct ndrv_cb *np, struct sockopt *sopt) { - struct sockaddr* multi_addr; - struct ndrv_multiaddr* ndrv_entry = NULL; - int result; - - if (sopt->sopt_val == 0 || sopt->sopt_valsize < 2 || - sopt->sopt_level != SOL_NDRVPROTO) - return EINVAL; - if (np->nd_if == NULL || np->nd_dlist_cnt == 0) - return ENXIO; - - // Allocate storage - MALLOC(multi_addr, struct sockaddr*, sopt->sopt_valsize, - M_TEMP, M_WAITOK); - if (multi_addr == NULL) - return ENOMEM; - - // Copy in the address - result = copyin(sopt->sopt_val, multi_addr, sopt->sopt_valsize); - - // Validate the sockaddr - if (result == 0 && sopt->sopt_valsize != multi_addr->sa_len) - result = EINVAL; - - if (result == 0) - { - /* Find the old entry */ - ndrv_entry = ndrv_have_multicast(np, multi_addr); - - if (ndrv_entry == NULL) - result = ENOENT; - } - - if (result == 0) - { - // Try deleting the multicast - result = ifnet_remove_multicast(ndrv_entry->ifma); - } - - if (result == 0) - { - // Remove from our linked list - struct ndrv_multiaddr* cur = np->nd_multiaddrs; - - ifmaddr_release(ndrv_entry->ifma); - - if (cur == ndrv_entry) - { - np->nd_multiaddrs = cur->next; - } - else - { - for (cur = cur->next; cur != NULL; cur = cur->next) - { - if (cur->next == ndrv_entry) - { - cur->next = cur->next->next; - break; - } - } - } + struct sockaddr* multi_addr; + struct ndrv_multiaddr* ndrv_entry = NULL; + int result; + + if (sopt->sopt_val == 0 || sopt->sopt_valsize < 2 || + sopt->sopt_level != SOL_NDRVPROTO) { + return EINVAL; + } + if (np->nd_if == NULL || np->nd_dlist_cnt == 0) { + return ENXIO; + } + + // Allocate storage + MALLOC(multi_addr, struct sockaddr*, sopt->sopt_valsize, + M_TEMP, M_WAITOK); + if (multi_addr == NULL) { + return ENOMEM; + } + + // Copy in the address + result = copyin(sopt->sopt_val, multi_addr, sopt->sopt_valsize); + + // Validate the sockaddr + if (result == 0 && sopt->sopt_valsize != multi_addr->sa_len) { + result = EINVAL; + } + + if (result == 0) { + /* Find the old entry */ + ndrv_entry = ndrv_have_multicast(np, multi_addr); + + if (ndrv_entry == NULL) { + result = ENOENT; + } + } + + if (result == 0) { + // Try deleting the multicast + result = ifnet_remove_multicast(ndrv_entry->ifma); + } + + if (result == 0) { + // Remove from our linked list + struct ndrv_multiaddr* cur = np->nd_multiaddrs; + + ifmaddr_release(ndrv_entry->ifma); + + if (cur == ndrv_entry) { + np->nd_multiaddrs = cur->next; + } else { + for (cur = cur->next; cur != NULL; cur = cur->next) { + if (cur->next == ndrv_entry) { + cur->next = cur->next->next; + break; + } + } + } np->nd_dlist_cnt--; - // Free the memory - FREE(ndrv_entry, M_IFADDR); - } - FREE(multi_addr, M_TEMP); + // Free the memory + FREE(ndrv_entry, M_IFADDR); + } + FREE(multi_addr, M_TEMP); - return result; + return result; } static struct ndrv_multiaddr* ndrv_have_multicast(struct ndrv_cb *np, struct sockaddr* inAddr) { - struct ndrv_multiaddr* cur; - for (cur = np->nd_multiaddrs; cur != NULL; cur = cur->next) - { - - if ((inAddr->sa_len == cur->addr.sa_len) && - (bcmp(&cur->addr, inAddr, inAddr->sa_len) == 0)) - { - // Found a match - return cur; - } - } - - return NULL; + struct ndrv_multiaddr* cur; + for (cur = np->nd_multiaddrs; cur != NULL; cur = cur->next) { + if ((inAddr->sa_len == cur->addr.sa_len) && + (bcmp(&cur->addr, inAddr, inAddr->sa_len) == 0)) { + // Found a match + return cur; + } + } + + return NULL; } static void ndrv_remove_all_multicast(struct ndrv_cb* np) { - struct ndrv_multiaddr* cur; - - if (np->nd_if != NULL) - { - while (np->nd_multiaddrs != NULL) - { - cur = np->nd_multiaddrs; - np->nd_multiaddrs = cur->next; - - ifnet_remove_multicast(cur->ifma); - ifmaddr_release(cur->ifma); - FREE(cur, M_IFADDR); - } - } + struct ndrv_multiaddr* cur; + + if (np->nd_if != NULL) { + while (np->nd_multiaddrs != NULL) { + cur = np->nd_multiaddrs; + np->nd_multiaddrs = cur->next; + + ifnet_remove_multicast(cur->ifma); + ifmaddr_release(cur->ifma); + FREE(cur, M_IFADDR); + } + } } static struct pr_usrreqs ndrv_usrreqs = { - .pru_abort = ndrv_abort, - .pru_attach = ndrv_attach, - .pru_bind = ndrv_bind, - .pru_connect = ndrv_connect, - .pru_detach = ndrv_detach, - .pru_disconnect = ndrv_disconnect, - .pru_peeraddr = ndrv_peeraddr, - .pru_send = ndrv_send, - .pru_shutdown = ndrv_shutdown, - .pru_sockaddr = ndrv_sockaddr, - .pru_sosend = sosend, - .pru_soreceive = soreceive, + .pru_abort = ndrv_abort, + .pru_attach = ndrv_attach, + .pru_bind = ndrv_bind, + .pru_connect = ndrv_connect, + .pru_detach = ndrv_detach, + .pru_disconnect = ndrv_disconnect, + .pru_peeraddr = ndrv_peeraddr, + .pru_send = ndrv_send, + .pru_shutdown = ndrv_shutdown, + .pru_sockaddr = ndrv_sockaddr, + .pru_sosend = sosend, + .pru_soreceive = soreceive, }; static struct protosw ndrvsw[] = { -{ - .pr_type = SOCK_RAW, - .pr_protocol = NDRVPROTO_NDRV, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_output = ndrv_output, - .pr_ctloutput = ndrv_ctloutput, - .pr_usrreqs = &ndrv_usrreqs, -} + { + .pr_type = SOCK_RAW, + .pr_protocol = NDRVPROTO_NDRV, + .pr_flags = PR_ATOMIC | PR_ADDR, + .pr_output = ndrv_output, + .pr_ctloutput = ndrv_ctloutput, + .pr_usrreqs = &ndrv_usrreqs, + } }; -static int ndrv_proto_count = (sizeof (ndrvsw) / sizeof (struct protosw)); +static int ndrv_proto_count = (sizeof(ndrvsw) / sizeof(struct protosw)); struct domain ndrvdomain_s = { - .dom_family = PF_NDRV, - .dom_name = "NetDriver", - .dom_init = ndrv_dominit, + .dom_family = PF_NDRV, + .dom_name = "NetDriver", + .dom_init = ndrv_dominit, }; static void @@ -1085,6 +1142,7 @@ ndrv_dominit(struct domain *dp) ndrvdomain = dp; - for (i = 0, pr = &ndrvsw[0]; i < ndrv_proto_count; i++, pr++) + for (i = 0, pr = &ndrvsw[0]; i < ndrv_proto_count; i++, pr++) { net_add_proto(pr, dp, 1); + } } diff --git a/bsd/net/necp.c b/bsd/net/necp.c index 18c84be93..3e251d7b1 100644 --- a/bsd/net/necp.c +++ b/bsd/net/necp.c @@ -406,7 +406,7 @@ static bool necp_is_addr_in_subnet(struct sockaddr *addr, struct sockaddr *subne static int necp_addr_compare(struct sockaddr *sa1, struct sockaddr *sa2, int check_port); static bool necp_buffer_compare_with_bit_prefix(u_int8_t *p1, u_int8_t *p2, u_int32_t bits); static bool necp_addr_is_empty(struct sockaddr *addr); -static bool necp_is_loopback(struct sockaddr *local_addr, struct sockaddr *remote_addr, struct inpcb *inp, struct mbuf *packet); +static bool necp_is_loopback(struct sockaddr *local_addr, struct sockaddr *remote_addr, struct inpcb *inp, struct mbuf *packet, u_int32_t bound_interface_index); static bool necp_is_intcoproc(struct inpcb *inp, struct mbuf *packet); struct necp_uuid_id_mapping { @@ -7063,6 +7063,15 @@ necp_application_find_policy_match_internal(proc_t proc, offset += sizeof(u_int8_t) + sizeof(u_int32_t) + length; } + // Check for loopback exception + if (necp_pass_loopback > 0 && necp_is_loopback(&local_addr.sa, &remote_addr.sa, NULL, NULL, bound_interface_index)) { + returned_result->policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH; + returned_result->routing_result = NECP_KERNEL_POLICY_RESULT_PASS; + returned_result->routed_interface_index = lo_ifp->if_index; + *flags |= (NECP_CLIENT_RESULT_FLAG_IS_LOCAL | NECP_CLIENT_RESULT_FLAG_IS_DIRECT); + return 0; + } + // Lock lck_rw_lock_shared(&necp_kernel_policy_lock); @@ -8253,7 +8262,7 @@ necp_socket_is_connected(struct inpcb *inp) static inline bool necp_socket_bypass(struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, struct inpcb *inp) { - if (necp_pass_loopback > 0 && necp_is_loopback(override_local_addr, override_remote_addr, inp, NULL)) { + if (necp_pass_loopback > 0 && necp_is_loopback(override_local_addr, override_remote_addr, inp, NULL, IFSCOPE_NONE)) { return true; } else if (necp_is_intcoproc(inp, NULL)) { return true; @@ -8728,7 +8737,7 @@ necp_ip_output_find_policy_match_locked(necp_kernel_policy_id socket_policy_id, static inline bool necp_output_bypass(struct mbuf *packet) { - if (necp_pass_loopback > 0 && necp_is_loopback(NULL, NULL, NULL, packet)) { + if (necp_pass_loopback > 0 && necp_is_loopback(NULL, NULL, NULL, packet, IFSCOPE_NONE)) { return true; } if (necp_pass_keepalives > 0 && necp_get_is_keepalive_from_packet(packet)) { @@ -10297,7 +10306,7 @@ necp_addr_is_loopback(struct sockaddr *address) } static bool -necp_is_loopback(struct sockaddr *local_addr, struct sockaddr *remote_addr, struct inpcb *inp, struct mbuf *packet) +necp_is_loopback(struct sockaddr *local_addr, struct sockaddr *remote_addr, struct inpcb *inp, struct mbuf *packet, u_int32_t bound_interface_index) { // Note: This function only checks for the loopback addresses. // In the future, we may want to expand to also allow any traffic @@ -10327,6 +10336,8 @@ necp_is_loopback(struct sockaddr *local_addr, struct sockaddr *remote_addr, stru return TRUE; } } + } else if (bound_interface_index != IFSCOPE_NONE && lo_ifp->if_index == bound_interface_index) { + return TRUE; } if (packet != NULL) { diff --git a/bsd/net/necp_client.c b/bsd/net/necp_client.c index ec1fd72f0..0e3dd1f47 100644 --- a/bsd/net/necp_client.c +++ b/bsd/net/necp_client.c @@ -1484,6 +1484,23 @@ necp_client_flow_is_viable(proc_t proc, struct necp_client *client, &flow->local_addr, &flow->remote_addr, NULL, NULL, NULL, ignore_address, true); + // Check for blocking agents + for (int i = 0; i < NECP_MAX_NETAGENTS; i++) { + if (uuid_is_null(result.netagents[i])) { + // Passed end of valid agents + break; + } + + u_int32_t flags = netagent_get_flags(result.netagents[i]); + if ((flags & NETAGENT_FLAG_REGISTERED) && + !(flags & NETAGENT_FLAG_VOLUNTARY) && + !(flags & NETAGENT_FLAG_ACTIVE) && + !(flags & NETAGENT_FLAG_SPECIFIC_USE_ONLY)) { + // A required agent is not active, cause the flow to be marked non-viable + return false; + } + } + return error == 0 && result.routed_interface_index != IFSCOPE_NONE && result.routing_result != NECP_KERNEL_POLICY_RESULT_DROP; diff --git a/bsd/net/net_kev.h b/bsd/net/net_kev.h index 085db9c3f..a32a2ab04 100644 --- a/bsd/net/net_kev.h +++ b/bsd/net/net_kev.h @@ -142,6 +142,10 @@ /* KEV_MPTCP_SUBCLASS event codes */ #define KEV_MPTCP_CELLUSE 1 +#define KEV_IPSEC_SUBCLASS 13 /* IPsec event subclass */ +#define KEV_IPSEC_WAKE_PACKET 1 /* IPsec wake packet available, the + * first packet processed after a wake event */ + #endif /* PRIVATE */ #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ #endif /* _NET_NETKEV_H_ */ diff --git a/bsd/net/pf_if.c b/bsd/net/pf_if.c index 4afbfed57..4c03c1c35 100644 --- a/bsd/net/pf_if.c +++ b/bsd/net/pf_if.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2016 Apple Inc. All rights reserved. + * Copyright (c) 2007-2019 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -555,7 +555,7 @@ pfi_address_add(struct sockaddr *sa, int af, int net) "(%d/%d)\n", pfi_buffer_cnt, PFI_BUFFER_MAX); return; } - memcpy(pfi_buffer, p, pfi_buffer_cnt * sizeof(*pfi_buffer)); + memcpy(p, pfi_buffer, pfi_buffer_max * sizeof(*pfi_buffer)); /* no need to zero buffer */ _FREE(pfi_buffer, PFI_MTYPE); pfi_buffer = p; diff --git a/bsd/netinet/in.h b/bsd/netinet/in.h index 66674c208..d5b76845e 100644 --- a/bsd/netinet/in.h +++ b/bsd/netinet/in.h @@ -918,6 +918,9 @@ extern struct in_ifaddr * inifa_ifpclatv4(struct ifnet *); #define satosin(sa) SIN(sa) #define sintosa(sin) ((struct sockaddr *)(void *)(sin)) #define SINIFSCOPE(s) ((struct sockaddr_inifscope *)(void *)(s)) + +#define IPTOS_UNSPEC (-1) /* TOS byte not set */ +#define IPTOS_MASK 0xFF /* TOS byte mask */ #endif /* BSD_KERNEL_PRIVATE */ #ifdef KERNEL_PRIVATE diff --git a/bsd/netinet/in_pcb.h b/bsd/netinet/in_pcb.h index a5ec42ab2..2ba76a786 100644 --- a/bsd/netinet/in_pcb.h +++ b/bsd/netinet/in_pcb.h @@ -796,11 +796,6 @@ extern void inpcb_to_xinpcb64(struct inpcb *, struct xinpcb64 *); #endif extern int get_pcblist_n(short, struct sysctl_req *, struct inpcbinfo *); -#define INPCB_GET_PORTS_USED_WILDCARDOK 0x01 -#define INPCB_GET_PORTS_USED_NOWAKEUPOK 0x02 -#define INPCB_GET_PORTS_USED_RECVANYIFONLY 0x04 -#define INPCB_GET_PORTS_USED_EXTBGIDLEONLY 0x08 -#define INPCB_GET_PORTS_USED_ACTIVEONLY 0x10 extern void inpcb_get_ports_used(u_int32_t, int, u_int32_t, bitstr_t *, struct inpcbinfo *); diff --git a/bsd/netinet/in_pcblist.c b/bsd/netinet/in_pcblist.c index dcd59d9c3..69302e731 100644 --- a/bsd/netinet/in_pcblist.c +++ b/bsd/netinet/in_pcblist.c @@ -438,29 +438,37 @@ inpcb_get_ports_used(uint32_t ifindex, int protocol, uint32_t flags, bool iswildcard, wildcardok, nowakeok; bool recvanyifonly, extbgidleok; bool activeonly; + bool anytcpstateok; - wildcardok = ((flags & INPCB_GET_PORTS_USED_WILDCARDOK) != 0); - nowakeok = ((flags & INPCB_GET_PORTS_USED_NOWAKEUPOK) != 0); - recvanyifonly = ((flags & INPCB_GET_PORTS_USED_RECVANYIFONLY) != 0); - extbgidleok = ((flags & INPCB_GET_PORTS_USED_EXTBGIDLEONLY) != 0); - activeonly = ((flags & INPCB_GET_PORTS_USED_ACTIVEONLY) != 0); + wildcardok = ((flags & IFNET_GET_LOCAL_PORTS_WILDCARDOK) != 0); + nowakeok = ((flags & IFNET_GET_LOCAL_PORTS_NOWAKEUPOK) != 0); + recvanyifonly = ((flags & IFNET_GET_LOCAL_PORTS_RECVANYIFONLY) != 0); + extbgidleok = ((flags & IFNET_GET_LOCAL_PORTS_EXTBGIDLEONLY) != 0); + activeonly = ((flags & IFNET_GET_LOCAL_PORTS_ACTIVEONLY) != 0); + anytcpstateok = ((flags & IFNET_GET_LOCAL_PORTS_ANYTCPSTATEOK) != 0); lck_rw_lock_shared(pcbinfo->ipi_lock); gencnt = pcbinfo->ipi_gencnt; for (inp = LIST_FIRST(pcbinfo->ipi_listhead); inp; inp = LIST_NEXT(inp, inp_list)) { - uint16_t port; - if (inp->inp_gencnt > gencnt || inp->inp_state == INPCB_STATE_DEAD || inp->inp_wantcnt == WNT_STOPUSING) { continue; } - if ((so = inp->inp_socket) == NULL || - (so->so_state & SS_DEFUNCT) || - (so->so_state & SS_ISDISCONNECTED)) { + if ((so = inp->inp_socket) == NULL || inp->inp_lport == 0) { + continue; + } + + /* + * ANYTCPSTATEOK means incoming packets cannot be filtered + * reception so cast a wide net of possibilities + */ + if (!anytcpstateok && + ((so->so_state & SS_DEFUNCT) || + (so->so_state & SS_ISDISCONNECTED))) { continue; } @@ -551,6 +559,15 @@ inpcb_get_ports_used(uint32_t ifindex, int protocol, uint32_t flags, switch (tp->t_state) { case TCPS_CLOSED: + if (anytcpstateok && inp->inp_fport != 0) { + /* + * A foreign port means we had a 4 tuple at + * least a connection attempt so packets + * may be received for the 4 tuple after the + * connection is gone + */ + break; + } continue; /* NOT REACHED */ case TCPS_LISTEN: @@ -570,26 +587,28 @@ inpcb_get_ports_used(uint32_t ifindex, int protocol, uint32_t flags, case TCPS_FIN_WAIT_2: /* * In the closing states, the connection - * is not idle when there is outgoing + * is active when there is outgoing * data having to be acknowledged */ - if (activeonly && so->so_snd.sb_cc == 0) { + if (!anytcpstateok && + (activeonly && so->so_snd.sb_cc == 0)) { continue; } break; case TCPS_TIME_WAIT: + if (anytcpstateok) { + /* + * Packets may still be received for the 4 tuple + * after the connection is gone + */ + break; + } continue; /* NOT REACHED */ } } - /* - * Final safeguard to exclude unspecified local port - */ - port = ntohs(inp->inp_lport); - if (port == 0) { - continue; - } - bitstr_set(bitfield, port); + + bitstr_set(bitfield, ntohs(inp->inp_lport)); if_ports_used_add_inpcb(ifindex, inp); } diff --git a/bsd/netinet/in_tclass.c b/bsd/netinet/in_tclass.c index 6c939c5aa..2971f9191 100644 --- a/bsd/netinet/in_tclass.c +++ b/bsd/netinet/in_tclass.c @@ -1068,6 +1068,32 @@ so_tc_from_control(struct mbuf *control, int *out_netsvctype) return sotc; } +__private_extern__ int +so_tos_from_control(struct mbuf *control) +{ + struct cmsghdr *cm; + int tos = IPTOS_UNSPEC; + + for (cm = M_FIRST_CMSGHDR(control); + is_cmsg_valid(control, cm); + cm = M_NXT_CMSGHDR(control, cm)) { + if (cm->cmsg_len != CMSG_LEN(sizeof(int))) { + continue; + } + + if ((cm->cmsg_level == IPPROTO_IP && + cm->cmsg_type == IP_TOS) || + (cm->cmsg_level == IPPROTO_IPV6 && + cm->cmsg_type == IPV6_TCLASS)) { + tos = *(int *)(void *)CMSG_DATA(cm) & IPTOS_MASK; + /* The first valid option wins */ + break; + } + } + + return tos; +} + __private_extern__ void so_recv_data_stat(struct socket *so, struct mbuf *m, size_t off) { diff --git a/bsd/netinet/ip_output.c b/bsd/netinet/ip_output.c index 16782affc..38a45abfb 100644 --- a/bsd/netinet/ip_output.c +++ b/bsd/netinet/ip_output.c @@ -3074,7 +3074,7 @@ ip_pcbopts(int optname, struct mbuf **pcbopt, struct mbuf *m) ovbcopy((caddr_t)(&cp[IPOPT_OFFSET + 1] + sizeof(struct in_addr)), (caddr_t)&cp[IPOPT_OFFSET + 1], - (unsigned)cnt + sizeof(struct in_addr)); + (unsigned)cnt - (IPOPT_MINOFF - 1)); break; } } diff --git a/bsd/netinet/mptcp.c b/bsd/netinet/mptcp.c index a2883309e..25a51db92 100644 --- a/bsd/netinet/mptcp.c +++ b/bsd/netinet/mptcp.c @@ -1465,6 +1465,10 @@ mptcp_session_necp_cb(void *handle, int action, uint32_t interface_index, goto out; } + if (IFNET_IS_COMPANION_LINK(ifp)) { + goto out; + } + if (IFNET_IS_EXPENSIVE(ifp) && (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE)) { goto out; diff --git a/bsd/netinet/mptcp_subr.c b/bsd/netinet/mptcp_subr.c index f7980b76c..da19de209 100644 --- a/bsd/netinet/mptcp_subr.c +++ b/bsd/netinet/mptcp_subr.c @@ -126,7 +126,6 @@ static void mptcp_subflow_abort(struct mptsub *, int); static void mptcp_send_dfin(struct socket *so); static void mptcp_set_cellicon(struct mptses *mpte, struct mptsub *mpts); -static void mptcp_unset_cellicon(struct mptses *mpte, struct mptsub *mpts, long val); static int mptcp_freeq(struct mptcb *mp_tp); /* @@ -215,7 +214,6 @@ static uint32_t mptcp_kern_skt_unit; static symptoms_advisory_t mptcp_advisory; uint32_t mptcp_cellicon_refcount = 0; -#define MPTCP_CELLICON_TOGGLE_RATE (5 * TCP_RETRANSHZ) /* Only toggle every 5 seconds */ /* * XXX The order of the event handlers below is really @@ -852,9 +850,6 @@ mptcp_trigger_cell_bringup(struct mptses *mpte) static boolean_t mptcp_subflow_disconnecting(struct mptsub *mpts) { - /* Split out in if-statements for readability. Compile should - * optimize that. - */ if (mpts->mpts_socket->so_state & SS_ISDISCONNECTED) { return true; } @@ -2699,11 +2694,15 @@ mptcp_subflow_abort(struct mptsub *mpts, int error) void mptcp_subflow_disconnect(struct mptses *mpte, struct mptsub *mpts) { - struct socket *so; + struct socket *so, *mp_so; struct mptcb *mp_tp; int send_dfin = 0; - socket_lock_assert_owned(mptetoso(mpte)); + so = mpts->mpts_socket; + mp_tp = mpte->mpte_mptcb; + mp_so = mptetoso(mpte); + + socket_lock_assert_owned(mp_so); if (mpts->mpts_flags & (MPTSF_DISCONNECTING | MPTSF_DISCONNECTED)) { return; @@ -2713,8 +2712,6 @@ mptcp_subflow_disconnect(struct mptses *mpte, struct mptsub *mpts) mpts->mpts_flags |= MPTSF_DISCONNECTING; - so = mpts->mpts_socket; - mp_tp = mpte->mpte_mptcb; if (mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) { send_dfin = 1; } @@ -2728,10 +2725,29 @@ mptcp_subflow_disconnect(struct mptses *mpte, struct mptsub *mpts) if (send_dfin) { mptcp_send_dfin(so); } - (void) soshutdownlock(so, SHUT_RD); - (void) soshutdownlock(so, SHUT_WR); - (void) sodisconnectlocked(so); + + if (mp_so->so_flags & SOF_DEFUNCT) { + errno_t ret; + + ret = sosetdefunct(NULL, so, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL, TRUE); + if (ret == 0) { + ret = sodefunct(NULL, so, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL); + + if (ret != 0) { + os_log_error(mptcp_log_handle, "%s - %lx: sodefunct failed with %d\n", + __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), ret); + } + } else { + os_log_error(mptcp_log_handle, "%s - %lx: sosetdefunct failed with %d\n", + __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), ret); + } + } else { + (void) soshutdownlock(so, SHUT_RD); + (void) soshutdownlock(so, SHUT_WR); + (void) sodisconnectlocked(so); + } } + /* * Generate a disconnect event for this subflow socket, in case * the lower layer doesn't do it; this is needed because the @@ -6525,6 +6541,7 @@ mptcp_post_event(u_int32_t event_code, int value) static void mptcp_set_cellicon(struct mptses *mpte, struct mptsub *mpts) { + struct tcpcb *tp = sototcpcb(mpts->mpts_socket); int error; /* First-party apps (Siri) don't flip the cellicon */ @@ -6537,9 +6554,17 @@ mptcp_set_cellicon(struct mptses *mpte, struct mptsub *mpts) return; } + /* Fallen back connections are not triggering the cellicon */ + if (mpte->mpte_mptcb->mpt_flags & MPTCPF_FALLBACK_TO_TCP) { + return; + } + /* Remember the last time we set the cellicon. Needed for debouncing */ mpte->mpte_last_cellicon_set = tcp_now; + tp->t_timer[TCPT_CELLICON] = OFFSET_FROM_START(tp, MPTCP_CELLICON_TOGGLE_RATE); + tcp_sched_timers(tp); + if (mpts->mpts_flags & MPTSF_CELLICON_SET && mpte->mpte_cellicon_increments != 0) { if (mptcp_cellicon_refcount == 0) { @@ -6612,8 +6637,8 @@ __mptcp_unset_cellicon(long val) return true; } -static void -mptcp_unset_cellicon(struct mptses *mpte, struct mptsub *mpts, long val) +void +mptcp_unset_cellicon(struct mptses *mpte, struct mptsub *mpts, uint32_t val) { /* First-party apps (Siri) don't flip the cellicon */ if (mpte->mpte_flags & MPTE_FIRSTPARTY) { @@ -6640,7 +6665,13 @@ mptcp_unset_cellicon(struct mptses *mpte, struct mptsub *mpts, long val) mpts->mpts_flags &= ~MPTSF_CELLICON_SET; } - mpte->mpte_cellicon_increments--; + if (mpte->mpte_cellicon_increments < val) { + os_log_error(mptcp_log_handle, "%s - %lx: Increments is %u but want to dec by %u.\n", + __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_cellicon_increments, val); + val = mpte->mpte_cellicon_increments; + } + + mpte->mpte_cellicon_increments -= val; if (__mptcp_unset_cellicon(val) == false) { return; diff --git a/bsd/netinet/mptcp_usrreq.c b/bsd/netinet/mptcp_usrreq.c index a47b8a512..0012e4497 100644 --- a/bsd/netinet/mptcp_usrreq.c +++ b/bsd/netinet/mptcp_usrreq.c @@ -1414,6 +1414,7 @@ mptcp_usr_socheckopt(struct socket *mp_so, struct sockopt *sopt) case SO_NOADDRERR: /* MP */ case SO_LABEL: /* MP */ case SO_PEERLABEL: /* MP */ + case SO_DEFUNCTIT: /* MP */ case SO_DEFUNCTOK: /* MP */ case SO_ISDEFUNCT: /* MP */ case SO_TRAFFIC_CLASS_DBG: /* MP */ @@ -2185,6 +2186,8 @@ mptcp_sopt2str(int level, int optname) return "SO_TRAFFIC_CLASS_DBG"; case SO_PRIVILEGED_TRAFFIC_CLASS: return "SO_PRIVILEGED_TRAFFIC_CLASS"; + case SO_DEFUNCTIT: + return "SO_DEFUNCTIT"; case SO_DEFUNCTOK: return "SO_DEFUNCTOK"; case SO_ISDEFUNCT: diff --git a/bsd/netinet/mptcp_var.h b/bsd/netinet/mptcp_var.h index 4c9037db7..f13bfb950 100644 --- a/bsd/netinet/mptcp_var.h +++ b/bsd/netinet/mptcp_var.h @@ -554,6 +554,8 @@ extern uint32_t mptcp_dbg_area; /* Multipath TCP debugging area */ extern int mptcp_developer_mode; /* Allow aggregation mode */ extern uint32_t mptcp_cellicon_refcount; +#define MPTCP_CELLICON_TOGGLE_RATE (5 * TCP_RETRANSHZ) /* Only toggle every 5 seconds */ + extern int tcp_jack_rxmt; /* Join ACK retransmission value in msecs */ __BEGIN_DECLS @@ -642,6 +644,7 @@ extern struct sockaddr *mptcp_get_session_dst(struct mptses *mpte, boolean_t has_v6, boolean_t has_v4); extern void mptcp_set_restrictions(struct socket *mp_so); extern void mptcp_clear_cellicon(void); +extern void mptcp_unset_cellicon(struct mptses *mpte, struct mptsub *mpts, uint32_t val); extern void mptcp_reset_rexmit_state(struct tcpcb *tp); extern void mptcp_reset_keepalive(struct tcpcb *tp); extern int mptcp_validate_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn, diff --git a/bsd/netinet/raw_ip.c b/bsd/netinet/raw_ip.c index 66b0102bb..dc552d9e3 100644 --- a/bsd/netinet/raw_ip.c +++ b/bsd/netinet/raw_ip.c @@ -372,6 +372,7 @@ rip_output( int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST; struct ip_out_args ipoa; struct ip_moptions *imo; + int tos = IPTOS_UNSPEC; int error = 0; bzero(&ipoa, sizeof(ipoa)); @@ -383,6 +384,7 @@ rip_output( if (control != NULL) { + tos = so_tos_from_control(control); sotc = so_tc_from_control(control, &netsvctype); m_freem(control); @@ -444,7 +446,11 @@ rip_output( return ENOBUFS; } ip = mtod(m, struct ip *); - ip->ip_tos = inp->inp_ip_tos; + if (tos != IPTOS_UNSPEC) { + ip->ip_tos = (uint8_t)(tos & IPTOS_MASK); + } else { + ip->ip_tos = inp->inp_ip_tos; + } ip->ip_off = 0; ip->ip_p = inp->inp_ip_p; ip->ip_len = m->m_pkthdr.len; diff --git a/bsd/netinet/tcp_timer.c b/bsd/netinet/tcp_timer.c index 784c0e879..ee69afd6f 100644 --- a/bsd/netinet/tcp_timer.c +++ b/bsd/netinet/tcp_timer.c @@ -311,7 +311,6 @@ static void tcp_remove_timer(struct tcpcb *tp); static void tcp_sched_timerlist(uint32_t offset); static u_int32_t tcp_run_conn_timer(struct tcpcb *tp, u_int16_t *mode, u_int16_t probe_if_index); -static void tcp_sched_timers(struct tcpcb *tp); static inline void tcp_set_lotimer_index(struct tcpcb *); __private_extern__ void tcp_remove_from_time_wait(struct inpcb *inp); static inline void tcp_update_mss_core(struct tcpcb *tp, struct ifnet *ifp); @@ -1550,6 +1549,27 @@ fc_output: (void) tcp_output(tp); } break; + case TCPT_CELLICON: + { + struct mptses *mpte = tptomptp(tp)->mpt_mpte; + + tp->t_timer[TCPT_CELLICON] = 0; + + if (mpte->mpte_cellicon_increments == 0) { + /* Cell-icon not set by this connection */ + break; + } + + if (TSTMP_LT(mpte->mpte_last_cellicon_set + MPTCP_CELLICON_TOGGLE_RATE, tcp_now)) { + mptcp_unset_cellicon(mpte, NULL, 1); + } + + if (mpte->mpte_cellicon_increments) { + tp->t_timer[TCPT_CELLICON] = OFFSET_FROM_START(tp, MPTCP_CELLICON_TOGGLE_RATE); + } + + break; + } #endif /* MPTCP */ case TCPT_PTO: diff --git a/bsd/netinet/tcp_timer.h b/bsd/netinet/tcp_timer.h index e09f01de9..8b8435722 100644 --- a/bsd/netinet/tcp_timer.h +++ b/bsd/netinet/tcp_timer.h @@ -134,7 +134,8 @@ #define TCPT_2MSL 6 /* 2*msl quiet time timer */ #if MPTCP #define TCPT_JACK_RXMT 7 /* retransmit timer for join ack */ -#define TCPT_MAX 7 +#define TCPT_CELLICON 8 /* Timer to check for cell-activity */ +#define TCPT_MAX 8 #else /* MPTCP */ #define TCPT_MAX 6 #endif /* !MPTCP */ diff --git a/bsd/netinet/tcp_var.h b/bsd/netinet/tcp_var.h index 5358d21a0..95e1903ee 100644 --- a/bsd/netinet/tcp_var.h +++ b/bsd/netinet/tcp_var.h @@ -1574,6 +1574,7 @@ void tcp_gc(struct inpcbinfo *); void tcp_itimer(struct inpcbinfo *ipi); void tcp_check_timer_state(struct tcpcb *tp); void tcp_run_timerlist(void *arg1, void *arg2); +void tcp_sched_timers(struct tcpcb *tp); struct tcptemp *tcp_maketemplate(struct tcpcb *); void tcp_fillheaders(struct tcpcb *, void *, void *); diff --git a/bsd/netinet/udp_usrreq.c b/bsd/netinet/udp_usrreq.c index 247e01802..ed16674e5 100644 --- a/bsd/netinet/udp_usrreq.c +++ b/bsd/netinet/udp_usrreq.c @@ -107,6 +107,7 @@ #if IPSEC #include #include +#include extern int ipsec_bypass; extern int esp_udp_encap_port; #endif /* IPSEC */ @@ -615,41 +616,56 @@ udp_input(struct mbuf *m, int iphlen) if (ipsec_bypass == 0 && (esp_udp_encap_port & 0xFFFF) != 0 && (uh->uh_dport == ntohs((u_short)esp_udp_encap_port) || uh->uh_sport == ntohs((u_short)esp_udp_encap_port))) { - int payload_len = len - sizeof(struct udphdr) > 4 ? 4 : - len - sizeof(struct udphdr); + /* + * Check if ESP or keepalive: + * 1. If the destination port of the incoming packet is 4500. + * 2. If the source port of the incoming packet is 4500, + * then check the SADB to match IP address and port. + */ + bool check_esp = true; + if (uh->uh_dport != ntohs((u_short)esp_udp_encap_port)) { + check_esp = key_checksa_present(AF_INET, (caddr_t)&ip->ip_dst, + (caddr_t)&ip->ip_src, uh->uh_dport, + uh->uh_sport); + } + + if (check_esp) { + int payload_len = len - sizeof(struct udphdr) > 4 ? 4 : + len - sizeof(struct udphdr); + + if (m->m_len < iphlen + sizeof(struct udphdr) + payload_len) { + if ((m = m_pullup(m, iphlen + sizeof(struct udphdr) + + payload_len)) == NULL) { + udpstat.udps_hdrops++; + KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, + 0, 0, 0, 0, 0); + return; + } + /* + * Expect 32-bit aligned data pointer on strict-align + * platforms. + */ + MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); - if (m->m_len < iphlen + sizeof(struct udphdr) + payload_len) { - if ((m = m_pullup(m, iphlen + sizeof(struct udphdr) + - payload_len)) == NULL) { - udpstat.udps_hdrops++; + ip = mtod(m, struct ip *); + uh = (struct udphdr *)(void *)((caddr_t)ip + iphlen); + } + /* Check for NAT keepalive packet */ + if (payload_len == 1 && *(u_int8_t *) + ((caddr_t)uh + sizeof(struct udphdr)) == 0xFF) { + m_freem(m); + KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, + 0, 0, 0, 0, 0); + return; + } else if (payload_len == 4 && *(u_int32_t *)(void *) + ((caddr_t)uh + sizeof(struct udphdr)) != 0) { + /* UDP encapsulated IPsec packet to pass through NAT */ KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0); + /* preserve the udp header */ + esp4_input(m, iphlen + sizeof(struct udphdr)); return; } - /* - * Expect 32-bit aligned data pointer on strict-align - * platforms. - */ - MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); - - ip = mtod(m, struct ip *); - uh = (struct udphdr *)(void *)((caddr_t)ip + iphlen); - } - /* Check for NAT keepalive packet */ - if (payload_len == 1 && *(u_int8_t *) - ((caddr_t)uh + sizeof(struct udphdr)) == 0xFF) { - m_freem(m); - KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, - 0, 0, 0, 0, 0); - return; - } else if (payload_len == 4 && *(u_int32_t *)(void *) - ((caddr_t)uh + sizeof(struct udphdr)) != 0) { - /* UDP encapsulated IPsec packet to pass through NAT */ - KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, - 0, 0, 0, 0, 0); - /* preserve the udp header */ - esp4_input(m, iphlen + sizeof(struct udphdr)); - return; } } #endif /* IPSEC */ @@ -1478,6 +1494,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, int netsvctype = _NET_SERVICE_TYPE_UNSPEC; struct ifnet *origoutifp = NULL; int flowadv = 0; + int tos = IPTOS_UNSPEC; /* Enable flow advisory only when connected */ flowadv = (so->so_state & SS_ISCONNECTED) ? 1 : 0; @@ -1516,6 +1533,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, #endif if (control != NULL) { + tos = so_tos_from_control(control); sotc = so_tc_from_control(control, &netsvctype); VERIFY(outif == NULL); error = udp_check_pktinfo(control, &outif, &pi_laddr); @@ -1799,7 +1817,11 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, } ((struct ip *)ui)->ip_len = sizeof(struct udpiphdr) + len; ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */ - ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */ + if (tos != IPTOS_UNSPEC) { + ((struct ip *)ui)->ip_tos = (uint8_t)(tos & IPTOS_MASK); + } else { + ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */ + } udpstat.udps_opackets++; KERNEL_DEBUG(DBG_LAYER_OUT_END, ui->ui_dport, ui->ui_sport, diff --git a/bsd/netinet6/ah_input.c b/bsd/netinet6/ah_input.c index 104f5a9c6..67a664ec6 100644 --- a/bsd/netinet6/ah_input.c +++ b/bsd/netinet6/ah_input.c @@ -232,7 +232,7 @@ ah4_input(struct mbuf *m, int off) */ if (siz1 < siz) { ipseclog((LOG_NOTICE, "sum length too short in IPv4 AH input " - "(%lu, should be at least %lu): %s\n", + "(%u, should be at least %u): %s\n", (u_int32_t)siz1, (u_int32_t)siz, ipsec4_logpacketstr(ip, spi))); IPSEC_STAT_INCREMENT(ipsecstat.in_inval); @@ -240,7 +240,7 @@ ah4_input(struct mbuf *m, int off) } if ((ah->ah_len << 2) - sizoff != siz1) { ipseclog((LOG_NOTICE, "sum length mismatch in IPv4 AH input " - "(%d should be %lu): %s\n", + "(%d should be %u): %s\n", (ah->ah_len << 2) - sizoff, (u_int32_t)siz1, ipsec4_logpacketstr(ip, spi))); IPSEC_STAT_INCREMENT(ipsecstat.in_inval); @@ -708,7 +708,7 @@ ah6_input(struct mbuf **mp, int *offp, int proto) */ if (siz1 < siz) { ipseclog((LOG_NOTICE, "sum length too short in IPv6 AH input " - "(%lu, should be at least %lu): %s\n", + "(%u, should be at least %u): %s\n", (u_int32_t)siz1, (u_int32_t)siz, ipsec6_logpacketstr(ip6, spi))); IPSEC_STAT_INCREMENT(ipsec6stat.in_inval); @@ -716,7 +716,7 @@ ah6_input(struct mbuf **mp, int *offp, int proto) } if ((ah->ah_len << 2) - sizoff != siz1) { ipseclog((LOG_NOTICE, "sum length mismatch in IPv6 AH input " - "(%d should be %lu): %s\n", + "(%d should be %u): %s\n", (ah->ah_len << 2) - sizoff, (u_int32_t)siz1, ipsec6_logpacketstr(ip6, spi))); IPSEC_STAT_INCREMENT(ipsec6stat.in_inval); diff --git a/bsd/netinet6/esp_chachapoly.c b/bsd/netinet6/esp_chachapoly.c index a176a64f4..85450de0a 100644 --- a/bsd/netinet6/esp_chachapoly.c +++ b/bsd/netinet6/esp_chachapoly.c @@ -442,7 +442,7 @@ esp_chachapoly_decrypt(struct mbuf *m, // head of mbuf chain // check if total packet length is enough to contain ESP + IV if (m->m_pkthdr.len < bodyoff) { - esp_packet_log_err("ChaChaPoly Packet too short %d < %zu, SPI 0x%08x", + esp_packet_log_err("ChaChaPoly Packet too short %d < %u, SPI 0x%08x", m->m_pkthdr.len, bodyoff, ntohl(sav->spi)); m_freem(m); return EINVAL; diff --git a/bsd/netinet6/esp_core.c b/bsd/netinet6/esp_core.c index 17bd8e242..28ce42881 100644 --- a/bsd/netinet6/esp_core.c +++ b/bsd/netinet6/esp_core.c @@ -630,7 +630,7 @@ esp_gcm_mature(struct secasvar *sav) break; default: ipseclog((LOG_ERR, - "esp_gcm_mature %s: invalid algo %d.\n", sav->alg_enc)); + "esp_gcm_mature %s: invalid algo %d.\n", algo->name, sav->alg_enc)); return 1; } @@ -777,7 +777,7 @@ esp_cbc_decrypt(struct mbuf *m, size_t off, struct secasvar *sav, } if (m->m_pkthdr.len < bodyoff) { - ipseclog((LOG_ERR, "esp_cbc_decrypt %s: bad len %d/%lu\n", + ipseclog((LOG_ERR, "esp_cbc_decrypt %s: bad len %d/%u\n", algo->name, m->m_pkthdr.len, (u_int32_t)bodyoff)); m_freem(m); return EINVAL; @@ -1020,14 +1020,14 @@ esp_cbc_encrypt( } if (m->m_pkthdr.len < bodyoff) { - ipseclog((LOG_ERR, "esp_cbc_encrypt %s: bad len %d/%lu\n", + ipseclog((LOG_ERR, "esp_cbc_encrypt %s: bad len %d/%u\n", algo->name, m->m_pkthdr.len, (u_int32_t)bodyoff)); m_freem(m); return EINVAL; } if ((m->m_pkthdr.len - bodyoff) % blocklen) { ipseclog((LOG_ERR, "esp_cbc_encrypt %s: " - "payload length must be multiple of %lu\n", + "payload length must be multiple of %u\n", algo->name, (u_int32_t)algo->padbound)); m_freem(m); return EINVAL; @@ -1228,7 +1228,7 @@ esp_auth( siz = (((*algo->sumsiz)(sav) + 3) & ~(4 - 1)); if (sizeof(sumbuf) < siz) { ipseclog((LOG_DEBUG, - "esp_auth: AH_MAXSUMSIZE is too small: siz=%lu\n", + "esp_auth: AH_MAXSUMSIZE is too small: siz=%u\n", (u_int32_t)siz)); KERNEL_DEBUG(DBG_FNC_ESPAUTH | DBG_FUNC_END, 4, 0, 0, 0, 0); return EINVAL; diff --git a/bsd/netinet6/esp_input.c b/bsd/netinet6/esp_input.c index f53236153..8e99b3eb7 100644 --- a/bsd/netinet6/esp_input.c +++ b/bsd/netinet6/esp_input.c @@ -378,7 +378,7 @@ esp4_input_extended(struct mbuf *m, int off, ifnet_t interface) } if (AH_MAXSUMSIZE < siz) { ipseclog((LOG_DEBUG, - "internal error: AH_MAXSUMSIZE must be larger than %lu\n", + "internal error: AH_MAXSUMSIZE must be larger than %u\n", (u_int32_t)siz)); IPSEC_STAT_INCREMENT(ipsecstat.in_inval); goto bad; @@ -811,7 +811,7 @@ noreplaycheck: int mlen; if ((mlen = m_length2(m, NULL)) < hlen) { ipseclog((LOG_DEBUG, - "IPv4 ESP input: decrypted packet too short %d < %d\n", + "IPv4 ESP input: decrypted packet too short %d < %zu\n", mlen, hlen)); IPSEC_STAT_INCREMENT(ipsecstat.in_inval); ifnet_release(ipsec_if); @@ -1055,7 +1055,7 @@ esp6_input_extended(struct mbuf **mp, int *offp, int proto, ifnet_t interface) } if (AH_MAXSUMSIZE < siz) { ipseclog((LOG_DEBUG, - "internal error: AH_MAXSUMSIZE must be larger than %lu\n", + "internal error: AH_MAXSUMSIZE must be larger than %u\n", (u_int32_t)siz)); IPSEC_STAT_INCREMENT(ipsec6stat.in_inval); goto bad; diff --git a/bsd/netinet6/esp_rijndael.c b/bsd/netinet6/esp_rijndael.c index fbb36070b..9a768a004 100644 --- a/bsd/netinet6/esp_rijndael.c +++ b/bsd/netinet6/esp_rijndael.c @@ -182,7 +182,7 @@ esp_cbc_decrypt_aes( } if (m->m_pkthdr.len < bodyoff) { - ipseclog((LOG_ERR, "esp_cbc_decrypt %s: bad len %d/%lu\n", + ipseclog((LOG_ERR, "esp_cbc_decrypt %s: bad len %d/%u\n", algo->name, m->m_pkthdr.len, (u_int32_t)bodyoff)); m_freem(m); return EINVAL; @@ -399,14 +399,14 @@ esp_cbc_encrypt_aes( ivp = (u_int8_t *) sav->iv; if (m->m_pkthdr.len < bodyoff) { - ipseclog((LOG_ERR, "esp_cbc_encrypt %s: bad len %d/%lu\n", + ipseclog((LOG_ERR, "esp_cbc_encrypt %s: bad len %d/%u\n", algo->name, m->m_pkthdr.len, (u_int32_t)bodyoff)); m_freem(m); return EINVAL; } if ((m->m_pkthdr.len - bodyoff) % AES_BLOCKLEN) { ipseclog((LOG_ERR, "esp_cbc_encrypt %s: " - "payload length must be multiple of %lu\n", + "payload length must be multiple of %d\n", algo->name, AES_BLOCKLEN)); m_freem(m); return EINVAL; @@ -705,7 +705,7 @@ esp_gcm_encrypt_aes( bzero(nonce, ESP_GCM_SALT_LEN + ivlen); if (m->m_pkthdr.len < bodyoff) { - ipseclog((LOG_ERR, "%s: bad len %d/%lu\n", __FUNCTION__, + ipseclog((LOG_ERR, "%s: bad len %d/%u\n", __FUNCTION__, m->m_pkthdr.len, (u_int32_t)bodyoff)); m_freem(m); return EINVAL; @@ -906,7 +906,7 @@ esp_gcm_decrypt_aes( } if (m->m_pkthdr.len < bodyoff) { - ipseclog((LOG_ERR, "%s: bad len %d/%lu\n", __FUNCTION__, + ipseclog((LOG_ERR, "%s: bad len %d/%u\n", __FUNCTION__, m->m_pkthdr.len, (u_int32_t)bodyoff)); m_freem(m); return EINVAL; diff --git a/bsd/netinet6/ipsec.c b/bsd/netinet6/ipsec.c index 671a6a64f..c5610feed 100644 --- a/bsd/netinet6/ipsec.c +++ b/bsd/netinet6/ipsec.c @@ -79,11 +79,14 @@ #include #include #include +#include + #include #include #include #include +#include #include #include @@ -130,6 +133,8 @@ #include +#include + #if IPSEC_DEBUG int ipsec_debug = 1; #else @@ -5162,6 +5167,15 @@ sysctl_ipsec_wake_packet SYSCTL_HANDLER_ARGS } int result = sysctl_io_opaque(req, &ipsec_wake_pkt, sizeof(ipsec_wake_pkt), NULL); + + ipseclog((LOG_NOTICE, "%s: uuid %s spi %u seq %u len %u result %d", + __func__, + ipsec_wake_pkt.wake_uuid, + ipsec_wake_pkt.wake_pkt_spi, + ipsec_wake_pkt.wake_pkt_seq, + ipsec_wake_pkt.wake_pkt_len, + result)); + return result; } @@ -5191,12 +5205,63 @@ ipsec_save_wake_packet(struct mbuf *wake_mbuf, u_int32_t spi, u_int32_t seq) ipsec_wake_pkt.wake_pkt_spi = spi; ipsec_wake_pkt.wake_pkt_seq = seq; + ipseclog((LOG_NOTICE, "%s: uuid %s spi %u seq %u len %u", + __func__, + ipsec_wake_pkt.wake_uuid, + ipsec_wake_pkt.wake_pkt_spi, + ipsec_wake_pkt.wake_pkt_seq, + ipsec_wake_pkt.wake_pkt_len)); + + struct kev_msg ev_msg = { 0 }; + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = KEV_IPSEC_SUBCLASS; + ev_msg.kev_subclass = KEV_IPSEC_WAKE_PACKET; + int result = kev_post_msg(&ev_msg); + if (result != 0) { + os_log_error(OS_LOG_DEFAULT, "%s: kev_post_msg() failed with error %d for wake uuid %s", + __func__, result, ipsec_wake_pkt.wake_uuid); + } + ipsec_save_wake_pkt = false; done: lck_mtx_unlock(sadb_mutex); return; } +static void +ipsec_get_local_ports(void) +{ + errno_t error; + ifnet_t *ifp_list; + uint32_t count, i; + static uint8_t port_bitmap[bitstr_size(IP_PORTRANGE_SIZE)]; + + error = ifnet_list_get_all(IFNET_FAMILY_IPSEC, &ifp_list, &count); + if (error != 0) { + os_log_error(OS_LOG_DEFAULT, "%s: ifnet_list_get_all() failed %d", + __func__, error); + return; + } + for (i = 0; i < count; i++) { + ifnet_t ifp = ifp_list[i]; + + /* + * Get all the TCP and UDP ports for IPv4 and IPv6 + */ + error = ifnet_get_local_ports_extended(ifp, PF_UNSPEC, + IFNET_GET_LOCAL_PORTS_WILDCARDOK | + IFNET_GET_LOCAL_PORTS_NOWAKEUPOK | + IFNET_GET_LOCAL_PORTS_ANYTCPSTATEOK, + port_bitmap); + if (error != 0) { + os_log_error(OS_LOG_DEFAULT, "%s: ifnet_get_local_ports_extended(%s) failed %d", + __func__, if_name(ifp), error); + } + } + ifnet_list_free(ifp_list); +} + static IOReturn ipsec_sleep_wake_handler(void *target, void *refCon, UInt32 messageType, void *provider, void *messageArgument, vm_size_t argSize) @@ -5204,16 +5269,17 @@ ipsec_sleep_wake_handler(void *target, void *refCon, UInt32 messageType, #pragma unused(target, refCon, provider, messageArgument, argSize) switch (messageType) { case kIOMessageSystemWillSleep: + ipsec_get_local_ports(); memset(&ipsec_wake_pkt, 0, sizeof(ipsec_wake_pkt)); IOPMCopySleepWakeUUIDKey(ipsec_wake_pkt.wake_uuid, sizeof(ipsec_wake_pkt.wake_uuid)); - ipseclog((LOG_INFO, - "ipsec: system will sleep")); + ipseclog((LOG_NOTICE, + "ipsec: system will sleep, uuid: %s", ipsec_wake_pkt.wake_uuid)); break; - case kIOMessageSystemHasPoweredOn: + case kIOMessageSystemWillPowerOn: ipsec_save_wake_pkt = true; - ipseclog((LOG_INFO, - "ipsec: system has powered on")); + ipseclog((LOG_NOTICE, + "ipsec: system will powered on, uuid: %s", ipsec_wake_pkt.wake_uuid)); break; default: break; diff --git a/bsd/netinet6/ipsec.h b/bsd/netinet6/ipsec.h index eb094bfce..0ce45fb5f 100644 --- a/bsd/netinet6/ipsec.h +++ b/bsd/netinet6/ipsec.h @@ -45,6 +45,8 @@ #include #include +#include + /* lock for IPsec stats */ extern lck_grp_t *sadb_stat_mutex_grp; extern lck_grp_attr_t *sadb_stat_mutex_grp_attr; @@ -337,7 +339,26 @@ extern int ip4_esp_randpad; extern bool ipsec_save_wake_pkt; -#define ipseclog(x) do { if (ipsec_debug) log x; } while (0) +#define _ipsec_log(level, fmt, ...) do { \ + os_log_type_t type; \ + switch (level) { \ + default: \ + type = OS_LOG_TYPE_DEFAULT; \ + break; \ + case LOG_INFO: \ + type = OS_LOG_TYPE_INFO; \ + break; \ + case LOG_DEBUG: \ + type = OS_LOG_TYPE_DEBUG; \ + break; \ + case LOG_ERR: \ + type = OS_LOG_TYPE_ERROR; \ + break; \ + } \ + os_log_with_type(OS_LOG_DEFAULT, type, fmt, ##__VA_ARGS__); \ +} while (0) + +#define ipseclog(x) do { if (ipsec_debug != 0) _ipsec_log x; } while (0) extern struct secpolicy *ipsec4_getpolicybysock(struct mbuf *, u_int, struct socket *, int *); diff --git a/bsd/netinet6/udp6_usrreq.c b/bsd/netinet6/udp6_usrreq.c index 9b4c3a16e..2917f5c7e 100644 --- a/bsd/netinet6/udp6_usrreq.c +++ b/bsd/netinet6/udp6_usrreq.c @@ -133,6 +133,7 @@ #include #include #include +#include extern int ipsec_bypass; extern int esp_udp_encap_port; #endif /* IPSEC */ @@ -492,34 +493,49 @@ udp6_input(struct mbuf **mp, int *offp, int proto) if (ipsec_bypass == 0 && (esp_udp_encap_port & 0xFFFF) != 0 && (uh->uh_dport == ntohs((u_short)esp_udp_encap_port) || uh->uh_sport == ntohs((u_short)esp_udp_encap_port))) { - int payload_len = ulen - sizeof(struct udphdr) > 4 ? 4 : - ulen - sizeof(struct udphdr); + /* + * Check if ESP or keepalive: + * 1. If the destination port of the incoming packet is 4500. + * 2. If the source port of the incoming packet is 4500, + * then check the SADB to match IP address and port. + */ + bool check_esp = true; + if (uh->uh_dport != ntohs((u_short)esp_udp_encap_port)) { + check_esp = key_checksa_present(AF_INET6, (caddr_t)&ip6->ip6_dst, + (caddr_t)&ip6->ip6_src, uh->uh_dport, + uh->uh_sport); + } + + if (check_esp) { + int payload_len = ulen - sizeof(struct udphdr) > 4 ? 4 : + ulen - sizeof(struct udphdr); + + if (m->m_len < off + sizeof(struct udphdr) + payload_len) { + if ((m = m_pullup(m, off + sizeof(struct udphdr) + + payload_len)) == NULL) { + udpstat.udps_hdrops++; + goto bad; + } + /* + * Expect 32-bit aligned data pointer on strict-align + * platforms. + */ + MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); - if (m->m_len < off + sizeof(struct udphdr) + payload_len) { - if ((m = m_pullup(m, off + sizeof(struct udphdr) + - payload_len)) == NULL) { - udpstat.udps_hdrops++; + ip6 = mtod(m, struct ip6_hdr *); + uh = (struct udphdr *)(void *)((caddr_t)ip6 + off); + } + /* Check for NAT keepalive packet */ + if (payload_len == 1 && *(u_int8_t*) + ((caddr_t)uh + sizeof(struct udphdr)) == 0xFF) { goto bad; + } else if (payload_len == 4 && *(u_int32_t*)(void *) + ((caddr_t)uh + sizeof(struct udphdr)) != 0) { + /* UDP encapsulated IPsec packet to pass through NAT */ + /* preserve the udp header */ + *offp = off + sizeof(struct udphdr); + return esp6_input(mp, offp, IPPROTO_UDP); } - /* - * Expect 32-bit aligned data pointer on strict-align - * platforms. - */ - MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); - - ip6 = mtod(m, struct ip6_hdr *); - uh = (struct udphdr *)(void *)((caddr_t)ip6 + off); - } - /* Check for NAT keepalive packet */ - if (payload_len == 1 && *(u_int8_t*) - ((caddr_t)uh + sizeof(struct udphdr)) == 0xFF) { - goto bad; - } else if (payload_len == 4 && *(u_int32_t*)(void *) - ((caddr_t)uh + sizeof(struct udphdr)) != 0) { - /* UDP encapsulated IPsec packet to pass through NAT */ - /* preserve the udp header */ - *offp = off + sizeof(struct udphdr); - return esp6_input(mp, offp, IPPROTO_UDP); } } #endif /* IPSEC */ diff --git a/bsd/netkey/key.c b/bsd/netkey/key.c index e1230e472..b7c473e6a 100644 --- a/bsd/netkey/key.c +++ b/bsd/netkey/key.c @@ -1471,6 +1471,157 @@ found: return match; } +/* + * This function checks whether a UDP packet with a random local port + * and a remote port of 4500 matches an SA in the kernel. If does match, + * send the packet to the ESP engine. If not, send the packet to the UDP protocol. + */ +bool +key_checksa_present(u_int family, + caddr_t local_addr, + caddr_t remote_addr, + u_int16_t local_port, + u_int16_t remote_port) +{ + LCK_MTX_ASSERT(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); + + /* sanity check */ + if (local_addr == NULL || remote_addr == NULL) { + panic("key_allocsa: NULL pointer is passed.\n"); + } + + /* + * searching SAD. + * XXX: to be checked internal IP header somewhere. Also when + * IPsec tunnel packet is received. But ESP tunnel mode is + * encrypted so we can't check internal IP header. + */ + /* + * search a valid state list for inbound packet. + * the search order is not important. + */ + struct secashead *sah = NULL; + bool found_sa = false; + + lck_mtx_lock(sadb_mutex); + LIST_FOREACH(sah, &sahtree, chain) { + if (sah->state == SADB_SASTATE_DEAD) { + continue; + } + + if (sah->dir != IPSEC_DIR_OUTBOUND) { + continue; + } + + if (family != sah->saidx.src.ss_family) { + continue; + } + + struct sockaddr_in src_in = {}; + struct sockaddr_in6 src_in6 = {}; + + /* check src address */ + switch (family) { + case AF_INET: + src_in.sin_family = AF_INET; + src_in.sin_len = sizeof(src_in); + memcpy(&src_in.sin_addr, local_addr, sizeof(src_in.sin_addr)); + if (key_sockaddrcmp((struct sockaddr*)&src_in, + (struct sockaddr *)&sah->saidx.src, 0) != 0) { + continue; + } + break; + case AF_INET6: + src_in6.sin6_family = AF_INET6; + src_in6.sin6_len = sizeof(src_in6); + memcpy(&src_in6.sin6_addr, local_addr, sizeof(src_in6.sin6_addr)); + if (IN6_IS_SCOPE_LINKLOCAL(&src_in6.sin6_addr)) { + /* kame fake scopeid */ + src_in6.sin6_scope_id = + ntohs(src_in6.sin6_addr.s6_addr16[1]); + src_in6.sin6_addr.s6_addr16[1] = 0; + } + if (key_sockaddrcmp((struct sockaddr*)&src_in6, + (struct sockaddr *)&sah->saidx.src, 0) != 0) { + continue; + } + break; + default: + ipseclog((LOG_DEBUG, "key_checksa_present: " + "unknown address family=%d.\n", + family)); + continue; + } + + struct sockaddr_in dest_in = {}; + struct sockaddr_in6 dest_in6 = {}; + + /* check dst address */ + switch (family) { + case AF_INET: + dest_in.sin_family = AF_INET; + dest_in.sin_len = sizeof(dest_in); + memcpy(&dest_in.sin_addr, remote_addr, sizeof(dest_in.sin_addr)); + if (key_sockaddrcmp((struct sockaddr*)&dest_in, + (struct sockaddr *)&sah->saidx.dst, 0) != 0) { + continue; + } + + break; + case AF_INET6: + dest_in6.sin6_family = AF_INET6; + dest_in6.sin6_len = sizeof(dest_in6); + memcpy(&dest_in6.sin6_addr, remote_addr, sizeof(dest_in6.sin6_addr)); + if (IN6_IS_SCOPE_LINKLOCAL(&dest_in6.sin6_addr)) { + /* kame fake scopeid */ + dest_in6.sin6_scope_id = + ntohs(dest_in6.sin6_addr.s6_addr16[1]); + dest_in6.sin6_addr.s6_addr16[1] = 0; + } + if (key_sockaddrcmp((struct sockaddr*)&dest_in6, + (struct sockaddr *)&sah->saidx.dst, 0) != 0) { + continue; + } + + break; + default: + ipseclog((LOG_DEBUG, "key_checksa_present: " + "unknown address family=%d.\n", family)); + continue; + } + + struct secasvar *nextsav = NULL; + for (u_int stateidx = 0; stateidx < _ARRAYLEN(saorder_state_alive); stateidx++) { + u_int state = saorder_state_alive[stateidx]; + for (struct secasvar *sav = LIST_FIRST(&sah->savtree[state]); sav != NULL; sav = nextsav) { + nextsav = LIST_NEXT(sav, chain); + /* sanity check */ + if (sav->state != state) { + ipseclog((LOG_DEBUG, "key_checksa_present: " + "invalid sav->state " + "(state: %d SA: %d)\n", + state, sav->state)); + continue; + } + + if (sav->remote_ike_port != ntohs(remote_port)) { + continue; + } + + if (sav->natt_encapsulated_src_port != local_port) { + continue; + } + found_sa = true;; + break; + } + } + } + + /* not found */ + lck_mtx_unlock(sadb_mutex); + return found_sa; +} + u_int16_t key_natt_get_translated_port( struct secasvar *outsav) @@ -1999,7 +2150,8 @@ key_msg2sp( paddr = (struct sockaddr *)(xisr + 1); uint8_t src_len = paddr->sa_len; - if (xisr->sadb_x_ipsecrequest_len < src_len) { + /* +sizeof(uint8_t) for dst_len below */ + if (xisr->sadb_x_ipsecrequest_len < sizeof(*xisr) + src_len + sizeof(uint8_t)) { ipseclog((LOG_DEBUG, "key_msg2sp: invalid request " "invalid source address length.\n")); key_freesp(newsp, KEY_SADB_UNLOCKED); @@ -2023,7 +2175,7 @@ key_msg2sp( paddr = (struct sockaddr *)((caddr_t)paddr + paddr->sa_len); uint8_t dst_len = paddr->sa_len; - if (xisr->sadb_x_ipsecrequest_len < (src_len + dst_len)) { + if (xisr->sadb_x_ipsecrequest_len < sizeof(*xisr) + src_len + dst_len) { ipseclog((LOG_DEBUG, "key_msg2sp: invalid request " "invalid dest address length.\n")); key_freesp(newsp, KEY_SADB_UNLOCKED); @@ -4086,8 +4238,8 @@ key_delsav( /* remove from SA header */ if (__LIST_CHAINED(sav)) { LIST_REMOVE(sav, chain); + ipsec_sav_count--; } - ipsec_sav_count--; if (sav->spihash.le_prev || sav->spihash.le_next) { LIST_REMOVE(sav, spihash); diff --git a/bsd/netkey/key.h b/bsd/netkey/key.h index 4d3ee9421..418f9792d 100644 --- a/bsd/netkey/key.h +++ b/bsd/netkey/key.h @@ -67,6 +67,7 @@ extern struct secasvar *key_allocsa(u_int, caddr_t, caddr_t, struct secasvar * key_allocsa_extended(u_int family, caddr_t src, caddr_t dst, u_int proto, u_int32_t spi, ifnet_t interface); +extern bool key_checksa_present(u_int family, caddr_t src, caddr_t dst, u_int16_t src_port, u_int16_t dst_port); extern u_int16_t key_natt_get_translated_port(struct secasvar *); extern void key_freesp(struct secpolicy *, int); extern void key_freesav(struct secasvar *, int); diff --git a/bsd/nfs/nfs_subs.c b/bsd/nfs/nfs_subs.c index 6a6878fc5..51d151ecf 100644 --- a/bsd/nfs/nfs_subs.c +++ b/bsd/nfs/nfs_subs.c @@ -3597,6 +3597,65 @@ nfsrv_fhtoexport(struct nfs_filehandle *nfhp) return nx; } +struct nfsrv_getvfs_by_mntonname_callback_args { + const char *path; /* IN */ + mount_t mp; /* OUT */ +}; + +static int +nfsrv_getvfs_by_mntonname_callback(mount_t mp, void *v) +{ + struct nfsrv_getvfs_by_mntonname_callback_args * const args = v; + char real_mntonname[MAXPATHLEN]; + int pathbuflen = MAXPATHLEN; + vnode_t rvp; + int error; + + error = VFS_ROOT(mp, &rvp, vfs_context_current()); + if (error) { + goto out; + } + error = vn_getpath_ext(rvp, NULLVP, real_mntonname, &pathbuflen, + VN_GETPATH_FSENTER | VN_GETPATH_NO_FIRMLINK); + vnode_put(rvp); + if (error) { + goto out; + } + if (strcmp(args->path, real_mntonname) == 0) { + error = vfs_busy(mp, LK_NOWAIT); + if (error == 0) { + args->mp = mp; + } + return VFS_RETURNED_DONE; + } +out: + return VFS_RETURNED; +} + +static mount_t +nfsrv_getvfs_by_mntonname(char *path) +{ + struct nfsrv_getvfs_by_mntonname_callback_args args = { + .path = path, + .mp = NULL, + }; + mount_t mp; + int error; + + mp = vfs_getvfs_by_mntonname(path); + if (mp) { + error = vfs_busy(mp, LK_NOWAIT); + mount_iterdrop(mp); + if (error) { + mp = NULL; + } + } else if (vfs_iterate(0, nfsrv_getvfs_by_mntonname_callback, + &args) == 0) { + mp = args.mp; + } + return mp; +} + /* * nfsrv_fhtovp() - convert FH to vnode and export info */ @@ -3690,14 +3749,7 @@ nfsrv_fhtovp( } /* find mount structure */ - mp = vfs_getvfs_by_mntonname((*nxp)->nx_fs->nxfs_path); - if (mp) { - error = vfs_busy(mp, LK_NOWAIT); - mount_iterdrop(mp); - if (error) { - mp = NULL; - } - } + mp = nfsrv_getvfs_by_mntonname((*nxp)->nx_fs->nxfs_path); if (!mp) { /* * We have an export, but no mount? diff --git a/bsd/nfs/nfs_syscalls.c b/bsd/nfs/nfs_syscalls.c index 78d83c951..fe4bb37cf 100644 --- a/bsd/nfs/nfs_syscalls.c +++ b/bsd/nfs/nfs_syscalls.c @@ -522,6 +522,23 @@ worktodo: * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c */ +static struct nfs_exportfs * +nfsrv_find_exportfs(const char *ptr) +{ + struct nfs_exportfs *nxfs; + + LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) { + if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN)) { + break; + } + } + if (nxfs && strncmp(nxfs->nxfs_path, ptr, strlen(nxfs->nxfs_path))) { + nxfs = NULL; + } + + return nxfs; +} + /* * Get file handle system call */ @@ -532,7 +549,7 @@ getfh(proc_t p, struct getfh_args *uap, __unused int *retval) struct nfs_filehandle nfh; int error, fhlen, fidlen; struct nameidata nd; - char path[MAXPATHLEN], *ptr; + char path[MAXPATHLEN], real_mntonname[MAXPATHLEN], *ptr; size_t pathlen; struct nfs_exportfs *nxfs; struct nfs_export *nx; @@ -575,12 +592,28 @@ getfh(proc_t p, struct getfh_args *uap, __unused int *retval) // find exportfs that matches f_mntonname lck_rw_lock_shared(&nfsrv_export_rwlock); ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname; - LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) { - if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN)) { - break; + if ((nxfs = nfsrv_find_exportfs(ptr)) == NULL) { + /* + * The f_mntonname might be a firmlink path. Resolve + * it into a physical path and try again. + */ + int pathbuflen = MAXPATHLEN; + vnode_t rvp; + + error = VFS_ROOT(vnode_mount(vp), &rvp, vfs_context_current()); + if (error) { + goto out; + } + error = vn_getpath_ext(rvp, NULLVP, real_mntonname, &pathbuflen, + VN_GETPATH_FSENTER | VN_GETPATH_NO_FIRMLINK); + vnode_put(rvp); + if (error) { + goto out; } + ptr = real_mntonname; + nxfs = nfsrv_find_exportfs(ptr); } - if (!nxfs || strncmp(nxfs->nxfs_path, path, strlen(nxfs->nxfs_path))) { + if (nxfs == NULL) { error = EINVAL; goto out; } diff --git a/bsd/nfs/nfs_vnops.c b/bsd/nfs/nfs_vnops.c index b460a0411..abb24e2fb 100644 --- a/bsd/nfs/nfs_vnops.c +++ b/bsd/nfs/nfs_vnops.c @@ -5975,7 +5975,7 @@ nfs_dir_buf_search( if ((cnp->cn_namelen == dp->d_namlen) && !strcmp(cnp->cn_nameptr, dp->d_name)) { fhlen = dp->d_name[dp->d_namlen + 1]; nvattrp = NFS_DIR_BUF_NVATTR(bp, i); - if ((ndbhp->ndbh_ncgen != bp->nb_np->n_ncgen) || (fhp->fh_len == 0) || + if ((ndbhp->ndbh_ncgen != bp->nb_np->n_ncgen) || (fhlen == 0) || (nvattrp->nva_type == VNON) || (nvattrp->nva_fileid == 0)) { /* entry is not valid */ error = ENOENT; diff --git a/bsd/pthread/pthread_workqueue.c b/bsd/pthread/pthread_workqueue.c index c979f80eb..0e83375cd 100644 --- a/bsd/pthread/pthread_workqueue.c +++ b/bsd/pthread/pthread_workqueue.c @@ -1856,6 +1856,7 @@ fixedpri: } } + done: if (qos_rv && voucher_rv) { /* Both failed, give that a unique error. */ @@ -3239,6 +3240,8 @@ workq_select_threadreq_or_park_and_unlock(proc_t p, struct workqueue *wq, workq_thread_reset_pri(wq, uth, req, /*unpark*/ true); + thread_unfreeze_base_pri(uth->uu_thread); +#if 0 // to turn this back on if (__improbable(thread_unfreeze_base_pri(uth->uu_thread) && !is_creator)) { if (req_ts) { workq_perform_turnstile_operation_locked(wq, ^{ @@ -3251,6 +3254,7 @@ workq_select_threadreq_or_park_and_unlock(proc_t p, struct workqueue *wq, WQ_TRACE_WQ(TRACE_wq_select_threadreq | DBG_FUNC_NONE, wq, 3, 0, 0, 0); goto park_thawed; } +#endif /* * We passed all checks, dequeue the request, bind to it, and set it up @@ -3321,7 +3325,9 @@ workq_select_threadreq_or_park_and_unlock(proc_t p, struct workqueue *wq, park: thread_unfreeze_base_pri(uth->uu_thread); +#if 0 // park_thawed: +#endif workq_park_and_unlock(p, wq, uth, setup_flags); } diff --git a/bsd/pthread/workqueue_internal.h b/bsd/pthread/workqueue_internal.h index f7ed3080c..082441370 100644 --- a/bsd/pthread/workqueue_internal.h +++ b/bsd/pthread/workqueue_internal.h @@ -295,12 +295,14 @@ void workq_kern_threadreq_unlock(struct proc *p); void workq_kern_threadreq_redrive(struct proc *p, workq_kern_threadreq_flags_t flags); +// This enum matches _pthread_set_flags in libpthread's qos_private.h enum workq_set_self_flags { - WORKQ_SET_SELF_QOS_FLAG = 0x1, - WORKQ_SET_SELF_VOUCHER_FLAG = 0x2, - WORKQ_SET_SELF_FIXEDPRIORITY_FLAG = 0x4, - WORKQ_SET_SELF_TIMESHARE_FLAG = 0x8, - WORKQ_SET_SELF_WQ_KEVENT_UNBIND = 0x10, + WORKQ_SET_SELF_QOS_FLAG = 0x01, + WORKQ_SET_SELF_VOUCHER_FLAG = 0x02, + WORKQ_SET_SELF_FIXEDPRIORITY_FLAG = 0x04, + WORKQ_SET_SELF_TIMESHARE_FLAG = 0x08, + WORKQ_SET_SELF_WQ_KEVENT_UNBIND = 0x10, + WORKQ_SET_SELF_ALTERNATE_AMX = 0x20, }; void workq_proc_suspended(struct proc *p); diff --git a/bsd/security/audit/audit.c b/bsd/security/audit/audit.c index d6c156a1b..0a5f138ae 100644 --- a/bsd/security/audit/audit.c +++ b/bsd/security/audit/audit.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1999-2009 Apple Inc. + * Copyright (c) 1999-2019 Apple Inc. * Copyright (c) 2006-2007 Robert N. M. Watson * All rights reserved. * @@ -754,6 +754,77 @@ out: uthread->uu_ar = NULL; } +/* + * For system calls such as posix_spawn(2) the sub operations (i.e., file actions + * and port actions) need to be audited as their own events. Like with system + * calls we need to determine if the sub operation needs to be audited by + * examining preselection masks. + */ +void +audit_subcall_enter(au_event_t event, proc_t proc, struct uthread *uthread) +{ + struct au_mask *aumask; + au_class_t class; + au_id_t auid; + kauth_cred_t cred; + + /* + * Check which audit mask to use; either the kernel non-attributable + * event mask or the process audit mask. + */ + cred = kauth_cred_proc_ref(proc); + auid = cred->cr_audit.as_aia_p->ai_auid; + if (auid == AU_DEFAUDITID) { + aumask = &audit_nae_mask; + } else { + aumask = &cred->cr_audit.as_mask; + } + + /* + * Allocate an audit record, if preselection allows it, and store in + * the thread for later use. + */ + class = au_event_class(event); + + if (au_preselect(event, class, aumask, AU_PRS_BOTH)) { + /* + * If we're out of space and need to suspend unprivileged + * processes, do that here rather than trying to allocate + * another audit record. + * + * Note: we might wish to be able to continue here in the + * future, if the system recovers. That should be possible + * by means of checking the condition in a loop around + * cv_wait(). It might be desirable to reevaluate whether an + * audit record is still required for this event by + * re-calling au_preselect(). + */ + if (audit_in_failure && + suser(cred, &proc->p_acflag) != 0) { + cv_wait(&audit_fail_cv, &audit_mtx); + panic("audit_failing_stop: thread continued"); + } + if (uthread->uu_ar == NULL) { + uthread->uu_ar = audit_new(event, proc, uthread); + } + } else if (audit_pipe_preselect(auid, event, class, AU_PRS_BOTH, 0)) { + if (uthread->uu_ar == NULL) { + uthread->uu_ar = audit_new(event, proc, uthread); + } + } + + kauth_cred_unref(&cred); +} + +void +audit_subcall_exit(int error, struct uthread *uthread) +{ + /* A subcall doesn't have a return value so always zero. */ + audit_commit(uthread->uu_ar, error, 0 /* retval */); + + uthread->uu_ar = NULL; +} + /* * Calls to set up and tear down audit structures used during Mach system * calls. diff --git a/bsd/security/audit/audit.h b/bsd/security/audit/audit.h index 13a1b8c24..903099074 100644 --- a/bsd/security/audit/audit.h +++ b/bsd/security/audit/audit.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2004-2016 Apple Inc. + * Copyright (c) 2004-2019 Apple Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -172,6 +172,11 @@ void audit_syscall_exit(int error, struct proc *proc, void audit_mach_syscall_enter(unsigned short audit_event); void audit_mach_syscall_exit(int retval, struct uthread *uthread); +void audit_subcall_enter(au_event_t event, + struct proc *proc, struct uthread *uthread); +void audit_subcall_exit(int error, + struct uthread *uthread); + extern struct auditinfo_addr *audit_default_aia_p; /* @@ -349,6 +354,16 @@ extern au_event_t sys_au_event[]; audit_syscall_exit(code, error, proc, uthread); \ } while (0) +#define AUDIT_SUBCALL_ENTER(event, proc, uthread) do { \ + if (AUDIT_ENABLED()) \ + audit_subcall_enter(AUE_ ## event, proc, uthread); \ +} while (0) + +#define AUDIT_SUBCALL_EXIT(uthread, error) do { \ + if (AUDIT_AUDITING(uthread->uu_ar)) \ + audit_subcall_exit(error, uthread); \ +} while (0) + /* * Wrap the audit_mach_syscall_enter() and audit_mach_syscall_exit() * functions in a manner similar to other system call enter/exit functions. @@ -390,6 +405,12 @@ extern au_event_t sys_au_event[]; #define AUDIT_SYSCALL_EXIT(code, proc, uthread, error) do { \ } while (0) +#define AUDIT_SUBCALL_ENTER(event, proc, uthread) do { \ +} while (0) + +#define AUDIT_SUBCALL_EXIT(uthread, error) do { \ +} while (0) + #define AUDIT_MACH_SYSCALL_ENTER(args...) do { \ } while (0) diff --git a/bsd/sys/event.h b/bsd/sys/event.h index 5966311eb..6303c4915 100644 --- a/bsd/sys/event.h +++ b/bsd/sys/event.h @@ -728,7 +728,11 @@ struct knote { union { void *kn_hook; uint32_t kn_hook32; - uint64_t kn_hook64; +#if __LP64__ + uint64_t kn_hook_waitqid; +#else + uint32_t kn_hook_waitqid; +#endif }; /* per filter pointer to the resource being watched */ diff --git a/bsd/sys/eventvar.h b/bsd/sys/eventvar.h index 04d31067e..62387f2b6 100644 --- a/bsd/sys/eventvar.h +++ b/bsd/sys/eventvar.h @@ -281,9 +281,9 @@ struct kqworkloop { #define KQWL_STAYACTIVE_FIRED_BIT (1 << 0) uint8_t kqwl_wakeup_indexes; /* QoS/override levels that woke */ kq_index_t kqwl_stayactive_qos; /* max QoS of statyactive knotes */ + struct turnstile *kqwl_turnstile; /* turnstile for sync IPC/waiters */ kqueue_id_t kqwl_dynamicid; /* dynamic identity */ uint64_t kqwl_params; /* additional parameters */ - struct turnstile *kqwl_turnstile; /* turnstile for sync IPC/waiters */ LIST_ENTRY(kqworkloop) kqwl_hashlink; /* linkage for search list */ #if CONFIG_WORKLOOP_DEBUG #define KQWL_HISTORY_COUNT 32 diff --git a/bsd/sys/kdebug.h b/bsd/sys/kdebug.h index 03c7af88f..c30f0ba3d 100644 --- a/bsd/sys/kdebug.h +++ b/bsd/sys/kdebug.h @@ -645,11 +645,12 @@ __BEGIN_DECLS #define DBG_MT_TMPCPU 0xff /* The Kernel Debug Sub Classes for DBG_MISC */ -#define DBG_EVENT 0x10 -#define DBG_MISC_INSTRUMENTS 0x11 -#define DBG_MISC_INSTRUMENTSBT 0x12 -#define DBG_MISC_LAYOUT 0x1a -#define DBG_BUFFER 0x20 +#define DBG_MISC_COREBRIGHTNESS 0x01 +#define DBG_EVENT 0x10 +#define DBG_MISC_INSTRUMENTS 0x11 +#define DBG_MISC_INSTRUMENTSBT 0x12 +#define DBG_MISC_LAYOUT 0x1a +#define DBG_BUFFER 0x20 /* The Kernel Debug Sub Classes for DBG_DYLD */ #define DBG_DYLD_UUID (5) diff --git a/bsd/sys/kern_memorystatus.h b/bsd/sys/kern_memorystatus.h index 8ea2bce39..2ba2a8452 100644 --- a/bsd/sys/kern_memorystatus.h +++ b/bsd/sys/kern_memorystatus.h @@ -562,6 +562,7 @@ void memorystatus_proc_flags_unsafe(void * v, boolean_t *is_dirty, boolean_t *is #if __arm64__ void memorystatus_act_on_legacy_footprint_entitlement(proc_t p, boolean_t footprint_increase); +void memorystatus_act_on_ios13extended_footprint_entitlement(proc_t p); #endif /* __arm64__ */ #endif /* CONFIG_MEMORYSTATUS */ diff --git a/bsd/sys/lockf.h b/bsd/sys/lockf.h index ab81a003e..574ef7a70 100644 --- a/bsd/sys/lockf.h +++ b/bsd/sys/lockf.h @@ -107,6 +107,7 @@ struct lockf { __BEGIN_DECLS #ifdef KERNEL_PRIVATE +void lf_init(void); int lf_advlock(struct vnop_advlock_args *); int lf_assert(struct vnop_advlock_args *, void **); void lf_commit(void *, int); diff --git a/bsd/sys/socketvar.h b/bsd/sys/socketvar.h index f7e1e82ff..2e30057f1 100644 --- a/bsd/sys/socketvar.h +++ b/bsd/sys/socketvar.h @@ -960,6 +960,7 @@ extern void set_packet_service_class(struct mbuf *, struct socket *, mbuf_svc_class_t, u_int32_t); extern void so_tc_update_stats(struct mbuf *, struct socket *, mbuf_svc_class_t); +extern int so_tos_from_control(struct mbuf *); extern int so_tc_from_control(struct mbuf *, int *); extern mbuf_svc_class_t so_tc2msc(int); extern int so_svc2tc(mbuf_svc_class_t); diff --git a/bsd/sys/user.h b/bsd/sys/user.h index 42734a4de..8d4001be4 100644 --- a/bsd/sys/user.h +++ b/bsd/sys/user.h @@ -123,7 +123,7 @@ struct uthread { * relies on single copy atomicity and cannot be changed to a bitfield. */ bool uu_workq_pthread_kill_allowed; - unsigned int syscall_code; /* current syscall code */ + uint16_t syscall_code; /* current syscall code */ /* thread exception handling */ int uu_exception; @@ -191,12 +191,6 @@ struct uthread { uint nbytes; /* number of bytes in ibits and obits */ } uu_select; /* saved state for select() */ - /* internal support for continuation framework */ - int (*uu_continuation)(int); - int uu_pri; - int uu_timo; - caddr_t uu_wchan; /* sleeping thread wait channel */ - const char *uu_wmesg; /* ... wait message */ struct proc *uu_proc; thread_t uu_thread; void * uu_userstate; @@ -223,12 +217,19 @@ struct uthread { lck_spin_t uu_rethrottle_lock; /* locks was_rethrottled and is_throttled */ TAILQ_ENTRY(uthread) uu_throttlelist; /* List of uthreads currently throttled */ void * uu_throttle_info; /* pointer to throttled I/Os info */ - int uu_on_throttlelist; - int uu_lowpri_window; + int8_t uu_on_throttlelist; + bool uu_lowpri_window; /* These boolean fields are protected by different locks */ bool uu_was_rethrottled; bool uu_is_throttled; bool uu_throttle_bc; + bool uu_defer_reclaims; + + /* internal support for continuation framework */ + uint16_t uu_pri; /* pri | PCATCH | PVFS, ... */ + caddr_t uu_wchan; /* sleeping thread wait channel */ + int (*uu_continuation)(int); + const char *uu_wmesg; /* ... wait message */ u_int32_t uu_network_marks; /* network control flow marks */ @@ -236,7 +237,6 @@ struct uthread { vnode_t uu_vreclaims; vnode_t uu_cdir; /* per thread CWD */ int uu_dupfd; /* fd in fdesc_open/dupfdopen */ - int uu_defer_reclaims; /* * Bound kqueue request. This field is only cleared by the current thread, @@ -244,7 +244,7 @@ struct uthread { */ struct workq_threadreq_s *uu_kqr_bound; TAILQ_ENTRY(uthread) uu_workq_entry; - mach_vm_offset_t uu_workq_stackaddr; + vm_offset_t uu_workq_stackaddr; mach_port_name_t uu_workq_thport; struct uu_workq_policy { uint16_t qos_req : 4; /* requested QoS */ diff --git a/bsd/sys/vnode.h b/bsd/sys/vnode.h index 5ec22ac93..e5263caf4 100644 --- a/bsd/sys/vnode.h +++ b/bsd/sys/vnode.h @@ -2369,6 +2369,7 @@ int vfs_context_issuser(vfs_context_t); int vfs_context_iskernel(vfs_context_t); vfs_context_t vfs_context_kernel(void); /* get from 1st kernel thread */ vnode_t vfs_context_cwd(vfs_context_t); +vnode_t vfs_context_get_cwd(vfs_context_t); /* get cwd with iocount */ int vnode_isnoflush(vnode_t); void vnode_setnoflush(vnode_t); void vnode_clearnoflush(vnode_t); diff --git a/bsd/tests/ctrr_test_sysctl.c b/bsd/tests/ctrr_test_sysctl.c deleted file mode 100644 index bea84e1ab..000000000 --- a/bsd/tests/ctrr_test_sysctl.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2018 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -#include - diff --git a/bsd/vfs/kpi_vfs.c b/bsd/vfs/kpi_vfs.c index 79a40a817..693bf9de7 100644 --- a/bsd/vfs/kpi_vfs.c +++ b/bsd/vfs/kpi_vfs.c @@ -1330,6 +1330,56 @@ vfs_context_cwd(vfs_context_t ctx) return cwd; } +/* + * vfs_context_get_cwd + * + * Description: Returns a vnode for the current working directory for the + * supplied context. The returned vnode has an iocount on it + * which must be released with a vnode_put(). + * + * Parameters: vfs_context_t The context to use + * + * Returns: vnode_t The current working directory + * for this context + * + * Notes: The function first attempts to obtain the current directory + * from the thread, and if it is not present there, falls back + * to obtaining it from the process instead. If it can't be + * obtained from either place, we return NULLVP. + */ +vnode_t +vfs_context_get_cwd(vfs_context_t ctx) +{ + vnode_t cwd = NULLVP; + + if (ctx != NULL && ctx->vc_thread != NULL) { + uthread_t uth = get_bsdthread_info(ctx->vc_thread); + proc_t proc; + + /* + * Get the cwd from the thread; if there isn't one, get it + * from the process, instead. + */ + cwd = uth->uu_cdir; + + if (cwd) { + if ((vnode_get(cwd) != 0)) { + cwd = NULLVP; + } + } else if ((proc = (proc_t)get_bsdthreadtask_info(ctx->vc_thread)) != NULL && + proc->p_fd != NULL) { + proc_fdlock(proc); + cwd = proc->p_fd->fd_cdir; + if (cwd && (vnode_get(cwd) != 0)) { + cwd = NULLVP; + } + proc_fdunlock(proc); + } + } + + return cwd; +} + /* * vfs_context_create * diff --git a/bsd/vfs/vfs_lookup.c b/bsd/vfs/vfs_lookup.c index aaaf2fbb1..5fe5b737a 100644 --- a/bsd/vfs/vfs_lookup.c +++ b/bsd/vfs/vfs_lookup.c @@ -113,7 +113,7 @@ static int vfs_getrealpath(const char * path, char * realpath, size_t bufsize, v #endif static int lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, int vbusyflags, vfs_context_t ctx); -static int lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx); +static int handle_symlink_for_namei(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx); static int lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx); static void lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp, int nc_generation); static int lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly, @@ -167,6 +167,8 @@ namei(struct nameidata *ndp) { struct filedesc *fdp; /* pointer to file descriptor state */ struct vnode *dp; /* the directory we are searching */ + struct vnode *rootdir_with_usecount = NULLVP; + struct vnode *startdir_with_usecount = NULLVP; struct vnode *usedvp = ndp->ni_dvp; /* store pointer to vp in case we must loop due to * heavy vnode pressure */ u_long cnpflags = ndp->ni_cnd.cn_flags; /* store in case we have to restore after loop */ @@ -348,16 +350,80 @@ retry_copy: /* * determine the starting point for the translation. + * + * We may need to upto 2 usecounts on vnodes before starting the translation + * We need to have a usecount on the root directory for the process + * for the entire duration of the lookup. This is because symlink + * translation can restart translation at / if a symlink is encountered. + * + * For the duration of this lookup at rootdir for this lookup is the one + * we fetch now under the proc_fdlock even the if the proc rootdir changes + * once we let go of the proc_fdlock. + * + * In the future we may consider holding off a chroot till we complete + * in progress lookups. + * + * If the starting directory is not the process rootdir then we need + * a usecount on the starting directory as well for the duration of the + * lookup. + * + * Getting an addtional usecount involves first getting an iocount under + * the lock that ensures that a usecount is on the directory. Once we + * get an iocount we can release the lock and we will be free to get a + * usecount without the vnode getting recycled. Once we get the usecount + * we can release the icoount which we used to get our usecount. */ + proc_fdlock(p); + if ((ndp->ni_rootdir = fdp->fd_rdir) == NULLVP) { if (!(fdp->fd_flags & FD_CHROOT)) { ndp->ni_rootdir = rootvnode; + } else { + proc_fdunlock(p); + /* This should be a panic */ + printf("proc is chrooted but does not have a root directory set\n"); + error = ENOENT; + goto error_out; } } + + /* + * We have the proc_fdlock here so we still have a usecount + * on ndp->ni_rootdir. + * + * However we need to get our own usecount on it in order to + * ensure that the vnode isn't recycled to something else. + * + * Note : It's fine if the vnode is force reclaimed but with + * a usecount it won't be reused until we release the reference. + * + * In order to get that usecount however, we need to first + * get non blocking iocount since we'll be doing this under + * the proc_fdlock. + */ + if (vnode_get(ndp->ni_rootdir) != 0) { + proc_fdunlock(p); + error = ENOENT; + goto error_out; + } + + proc_fdunlock(p); + + /* Now we can safely get our own ref on ni_rootdir */ + error = vnode_ref_ext(ndp->ni_rootdir, O_EVTONLY, 0); + vnode_put(ndp->ni_rootdir); + if (error) { + ndp->ni_rootdir = NULLVP; + goto error_out; + } + + rootdir_with_usecount = ndp->ni_rootdir; + cnp->cn_nameptr = cnp->cn_pnbuf; ndp->ni_usedvp = NULLVP; + bool dp_needs_put = false; if (*(cnp->cn_nameptr) == '/') { while (*(cnp->cn_nameptr) == '/') { cnp->cn_nameptr++; @@ -368,13 +434,40 @@ retry_copy: dp = ndp->ni_dvp; ndp->ni_usedvp = dp; } else { - dp = vfs_context_cwd(ctx); + dp = vfs_context_get_cwd(ctx); + if (dp) { + dp_needs_put = true; + } } if (dp == NULLVP || (dp->v_lflag & VL_DEAD)) { + if (dp_needs_put) { + vnode_put(dp); + dp_needs_put = false; + } + dp = NULLVP; error = ENOENT; goto error_out; } + + if (dp != rootdir_with_usecount) { + error = vnode_ref_ext(dp, O_EVTONLY, 0); + if (error) { + if (dp_needs_put) { + vnode_put(dp); + dp_needs_put = false; + } + dp = NULLVP; + goto error_out; + } + startdir_with_usecount = dp; + } + + if (dp_needs_put) { + vnode_put(dp); + dp_needs_put = false; + } + ndp->ni_dvp = NULLVP; ndp->ni_vp = NULLVP; @@ -395,6 +488,7 @@ retry_copy: #endif ndp->ni_startdir = dp; + dp = NULLVP; if ((error = lookup(ndp))) { goto error_out; @@ -404,15 +498,46 @@ retry_copy: * Check for symbolic link */ if ((cnp->cn_flags & ISSYMLINK) == 0) { + if (startdir_with_usecount) { + vnode_rele_ext(startdir_with_usecount, O_EVTONLY, 0); + startdir_with_usecount = NULLVP; + } + if (rootdir_with_usecount) { + vnode_rele_ext(rootdir_with_usecount, O_EVTONLY, 0); + rootdir_with_usecount = NULLVP; + } return 0; } continue_symlink: - /* Gives us a new path to process, and a starting dir */ - error = lookup_handle_symlink(ndp, &dp, ctx); + /* + * Gives us a new path to process, and a starting dir (with an iocount). + * The iocount is needed to take a usecount on the vnode returned + * (if it is not a vnode we already have a usecount on). + */ + error = handle_symlink_for_namei(ndp, &dp, ctx); if (error != 0) { break; } + + if (dp == ndp->ni_rootdir && startdir_with_usecount) { + vnode_rele_ext(startdir_with_usecount, O_EVTONLY, 0); + startdir_with_usecount = NULLVP; + } else if (dp != startdir_with_usecount) { + if (startdir_with_usecount) { + vnode_rele_ext(startdir_with_usecount, O_EVTONLY, 0); + startdir_with_usecount = NULLVP; + } + error = vnode_ref_ext(dp, O_EVTONLY, 0); + if (error) { + vnode_put(dp); + dp = NULLVP; + goto error_out; + } + startdir_with_usecount = dp; + } + /* iocount not required on dp anymore */ + vnode_put(dp); } /* * only come here if we fail to handle a SYMLINK... @@ -436,6 +561,15 @@ error_out: ndp->ni_vp = NULLVP; ndp->ni_dvp = NULLVP; + if (startdir_with_usecount) { + vnode_rele_ext(startdir_with_usecount, O_EVTONLY, 0); + startdir_with_usecount = NULLVP; + } + if (rootdir_with_usecount) { + vnode_rele_ext(rootdir_with_usecount, O_EVTONLY, 0); + rootdir_with_usecount = NULLVP; + } + #if CONFIG_VOLFS /* * Deal with volfs fallout. @@ -1530,10 +1664,10 @@ out: /* * Takes ni_vp and ni_dvp non-NULL. Returns with *new_dp set to the location - * at which to start a lookup with a resolved path, and all other iocounts dropped. + * at which to start a lookup with a resolved path and with an iocount. */ static int -lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx) +handle_symlink_for_namei(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx) { int error; char *cp; /* pointer into pathname argument */ @@ -1624,17 +1758,18 @@ lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx) /* * starting point for 'relative' * symbolic link path + * + * If the starting point is not the root we have to return an iocounted + * dp to namei so we don't release the icoount here. */ dp = ndp->ni_dvp; + ndp->ni_dvp = NULLVP; /* * get rid of references returned via 'lookup' */ vnode_put(ndp->ni_vp); - vnode_put(ndp->ni_dvp); /* ALWAYS have a dvp for a symlink */ - ndp->ni_vp = NULLVP; - ndp->ni_dvp = NULLVP; /* * Check if symbolic link restarts us at the root @@ -1644,9 +1779,20 @@ lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx) cnp->cn_nameptr++; ndp->ni_pathlen--; } + vnode_put(dp); if ((dp = ndp->ni_rootdir) == NULLVP) { return ENOENT; } + if (vnode_get(dp) != 0) { + return ENOENT; + } + } + + if (dp == NULLVP || (dp->v_lflag & VL_DEAD)) { + if (dp) { + vnode_put(dp); + } + return ENOENT; } *new_dp = dp; diff --git a/bsd/vfs/vfs_subr.c b/bsd/vfs/vfs_subr.c index 8a3cdcc47..f8304f9ad 100644 --- a/bsd/vfs/vfs_subr.c +++ b/bsd/vfs/vfs_subr.c @@ -234,6 +234,8 @@ static void record_vp(vnode_t vp, int count); extern int bootarg_no_vnode_jetsam; /* from bsd_init.c default value is 0 */ #endif /* CONFIG_JETSAM && (DEVELOPMENT || DEBUG) */ +extern int bootarg_no_vnode_drain; /* from bsd_init.c default value is 0 */ + boolean_t root_is_CF_drive = FALSE; #if CONFIG_TRIGGERS @@ -250,6 +252,7 @@ TAILQ_HEAD(ragelst, vnode) vnode_rage_list; /* vnode rapid age list */ struct timeval rage_tv; int rage_limit = 0; int ragevnodes = 0; +static int vfs_unmountall_started = 0; #define RAGE_LIMIT_MIN 100 #define RAGE_TIME_LIMIT 5 @@ -3311,6 +3314,8 @@ vfs_unmountall(void) int mounts, sec = 1; struct unmount_info ui; + vfs_unmountall_started = 1; + retry: ui.u_errs = ui.u_busy = 0; vfs_iterate(VFS_ITERATE_CB_DROPREF | VFS_ITERATE_TAIL_FIRST, unmount_callback, &ui); @@ -3454,6 +3459,7 @@ vfs_init_io_attributes(vnode_t devvp, mount_t mp) if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, ctx) == 0) { if (isvirtual) { mp->mnt_kern_flag |= MNTK_VIRTUALDEV; + mp->mnt_flag |= MNT_REMOVABLE; } } if (VNOP_IOCTL(devvp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, ctx) == 0) { @@ -3640,10 +3646,7 @@ vfs_init_io_attributes(vnode_t devvp, mount_t mp) if (VNOP_IOCTL(devvp, DKIOCGETLOCATION, (caddr_t)&location, 0, ctx) == 0) { if (location & DK_LOCATION_EXTERNAL) { mp->mnt_ioflags |= MNT_IOFLAGS_PERIPHERAL_DRIVE; - /* This must be called after MNTK_VIRTUALDEV has been determined via DKIOCISVIRTUAL */ - if ((MNTK_VIRTUALDEV & mp->mnt_kern_flag)) { - mp->mnt_flag |= MNT_REMOVABLE; - } + mp->mnt_flag |= MNT_REMOVABLE; } } @@ -4972,7 +4975,25 @@ vnode_drain(vnode_t vp) vp->v_owner = current_thread(); while (vp->v_iocount > 1) { - msleep(&vp->v_iocount, &vp->v_lock, PVFS, "vnode_drain", NULL); + if (bootarg_no_vnode_drain) { + struct timespec ts = {.tv_sec = 10, .tv_nsec = 0}; + int error; + + if (vfs_unmountall_started) { + ts.tv_sec = 1; + } + + error = msleep(&vp->v_iocount, &vp->v_lock, PVFS, "vnode_drain_with_timeout", &ts); + + /* Try to deal with leaked iocounts under bootarg and shutting down */ + if (vp->v_iocount > 1 && error == EWOULDBLOCK && + ts.tv_sec == 1 && vp->v_numoutput == 0) { + vp->v_iocount = 1; + break; + } + } else { + msleep(&vp->v_iocount, &vp->v_lock, PVFS, "vnode_drain", NULL); + } } vp->v_lflag &= ~VL_DRAIN; diff --git a/bsd/vfs/vfs_syscalls.c b/bsd/vfs/vfs_syscalls.c index 838ad8c12..727d2dafc 100644 --- a/bsd/vfs/vfs_syscalls.c +++ b/bsd/vfs/vfs_syscalls.c @@ -1966,11 +1966,10 @@ checkdirs_callback(proc_t p, void * arg) vnode_t olddp = cdrp->olddp; vnode_t newdp = cdrp->newdp; struct filedesc *fdp; - vnode_t tvp; - vnode_t fdp_cvp; - vnode_t fdp_rvp; - int cdir_changed = 0; - int rdir_changed = 0; + vnode_t new_cvp = newdp; + vnode_t new_rvp = newdp; + vnode_t old_cvp = NULL; + vnode_t old_rvp = NULL; /* * XXX Also needs to iterate each thread in the process to see if it @@ -1978,36 +1977,68 @@ checkdirs_callback(proc_t p, void * arg) * XXX update that as well. */ + /* + * First, with the proc_fdlock held, check to see if we will need + * to do any work. If not, we will get out fast. + */ proc_fdlock(p); fdp = p->p_fd; - if (fdp == (struct filedesc *)0) { + if (fdp == NULL || + (fdp->fd_cdir != olddp && fdp->fd_rdir != olddp)) { proc_fdunlock(p); return PROC_RETURNED; } - fdp_cvp = fdp->fd_cdir; - fdp_rvp = fdp->fd_rdir; proc_fdunlock(p); - if (fdp_cvp == olddp) { - vnode_ref(newdp); - tvp = fdp->fd_cdir; - fdp_cvp = newdp; - cdir_changed = 1; - vnode_rele(tvp); + /* + * Ok, we will have to do some work. Always take two refs + * because we might need that many. We'll dispose of whatever + * we ended up not using. + */ + if (vnode_ref(newdp) != 0) { + return PROC_RETURNED; } - if (fdp_rvp == olddp) { - vnode_ref(newdp); - tvp = fdp->fd_rdir; - fdp_rvp = newdp; - rdir_changed = 1; - vnode_rele(tvp); + if (vnode_ref(newdp) != 0) { + vnode_rele(newdp); + return PROC_RETURNED; } - if (cdir_changed || rdir_changed) { - proc_fdlock(p); - fdp->fd_cdir = fdp_cvp; - fdp->fd_rdir = fdp_rvp; - proc_fdunlock(p); + + /* + * Now do the work. Note: we dropped the proc_fdlock, so we + * have to do all of the checks again. + */ + proc_fdlock(p); + fdp = p->p_fd; + if (fdp != NULL) { + if (fdp->fd_cdir == olddp) { + old_cvp = olddp; + fdp->fd_cdir = newdp; + new_cvp = NULL; + } + if (fdp->fd_rdir == olddp) { + old_rvp = olddp; + fdp->fd_rdir = newdp; + new_rvp = NULL; + } } + proc_fdunlock(p); + + /* + * Dispose of any references that are no longer needed. + */ + if (old_cvp != NULL) { + vnode_rele(old_cvp); + } + if (old_rvp != NULL) { + vnode_rele(old_rvp); + } + if (new_cvp != NULL) { + vnode_rele(new_cvp); + } + if (new_rvp != NULL) { + vnode_rele(new_rvp); + } + return PROC_RETURNED; } diff --git a/iokit/IOKit/pwr_mgt/IOPM.h b/iokit/IOKit/pwr_mgt/IOPM.h index a7b0417b2..76d6a4ef3 100644 --- a/iokit/IOKit/pwr_mgt/IOPM.h +++ b/iokit/IOKit/pwr_mgt/IOPM.h @@ -523,7 +523,9 @@ enum { kIOPMOverTemp = (1 << 9),// system dangerously hot kIOPMClamshellOpened = (1 << 10),// clamshell was opened kIOPMDWOverTemp = (1 << 11),// DarkWake thermal limits exceeded. - kIOPMPowerButtonUp = (1 << 12) // Power button up + kIOPMPowerButtonUp = (1 << 12),// Power button up + kIOPMProModeEngaged = (1 << 13),// Fans entered 'ProMode' + kIOPMProModeDisengaged = (1 << 14) // Fans exited 'ProMode' }; @@ -788,6 +790,8 @@ enum { #define kIOPMSettingTimeZoneOffsetKey "TimeZoneOffsetSeconds" #define kIOPMSettingMobileMotionModuleKey "MobileMotionModule" #define kIOPMSettingGraphicsSwitchKey "GPUSwitch" +#define kIOPMSettingProModeControl "ProModeControl" +#define kIOPMSettingProModeDefer "ProModeDefer" // Setting controlling drivers can register to receive scheduled wake data // Either in "CF seconds" type, or structured calendar data in a formatted diff --git a/iokit/IOKit/pwr_mgt/IOPMPrivate.h b/iokit/IOKit/pwr_mgt/IOPMPrivate.h index 1a549c0c4..ee04a5072 100644 --- a/iokit/IOKit/pwr_mgt/IOPMPrivate.h +++ b/iokit/IOKit/pwr_mgt/IOPMPrivate.h @@ -106,6 +106,9 @@ enum { #define kIOPMMessageLaunchBootSpinDump \ iokit_family_msg(sub_iokit_powermanagement, 0x440) +#define kIOPMMessageProModeStateChange \ + iokit_family_msg(sub_iokit_powermanagement, 0x450) + /* @enum SystemSleepReasons * @abstract The potential causes for system sleep as logged in the system event record. */ @@ -677,6 +680,11 @@ enum { #define kIOPMWakeEventReasonKey "Reason" #define kIOPMWakeEventDetailsKey "Details" +/* kIOPMFeatureProModeKey + * Feature published if ProMode is supported + */ +#define kIOPMFeatureProModeKey "ProMode" + /***************************************************************************** * * Wake event flags reported to IOPMrootDomain::claimSystemWakeEvent() diff --git a/iokit/Kernel/IOInterruptController.cpp b/iokit/Kernel/IOInterruptController.cpp index 18441e5ce..664890442 100644 --- a/iokit/Kernel/IOInterruptController.cpp +++ b/iokit/Kernel/IOInterruptController.cpp @@ -317,7 +317,9 @@ IOInterruptController::enableInterrupt(IOService *nub, int source) } if (vector->interruptDisabledHard) { vector->interruptDisabledHard = 0; - +#if !defined(__i386__) && !defined(__x86_64__) + OSMemoryBarrier(); +#endif enableVector(vectorNumber, vector); } } diff --git a/iokit/Kernel/IOKitDebug.cpp b/iokit/Kernel/IOKitDebug.cpp index 69e82fbec..9cf95e501 100644 --- a/iokit/Kernel/IOKitDebug.cpp +++ b/iokit/Kernel/IOKitDebug.cpp @@ -874,6 +874,10 @@ IOTrackingLeakScan(void * refcon) for (ptrIdx = 0; ptrIdx < (page_size / sizeof(uintptr_t)); ptrIdx++) { ptr = ((uintptr_t *)vphysaddr)[ptrIdx]; +#if defined(HAS_APPLE_PAC) + // strip possible ptrauth signature from candidate data pointer + ptr = (uintptr_t)ptrauth_strip((void*)ptr, ptrauth_key_process_independent_data); +#endif /* defined(HAS_APPLE_PAC) */ for (lim = count, baseIdx = 0; lim; lim >>= 1) { inst = instances[baseIdx + (lim >> 1)]; diff --git a/iokit/Kernel/IOPMrootDomain.cpp b/iokit/Kernel/IOPMrootDomain.cpp index 0920486da..624d7a812 100644 --- a/iokit/Kernel/IOPMrootDomain.cpp +++ b/iokit/Kernel/IOPMrootDomain.cpp @@ -1349,7 +1349,8 @@ static const OSSymbol * gIOPMUserIsActiveKey; // //****************************************************************************** -#define kRootDomainSettingsCount 17 +#define kRootDomainSettingsCount 19 +#define kRootDomainNoPublishSettingsCount 3 bool IOPMrootDomain::start( IOService * nub ) @@ -1400,7 +1401,16 @@ IOPMrootDomain::start( IOService * nub ) OSSymbol::withCString(kIOPMSettingMobileMotionModuleKey), OSSymbol::withCString(kIOPMSettingGraphicsSwitchKey), OSSymbol::withCString(kIOPMStateConsoleShutdown), - gIOPMSettingSilentRunningKey + OSSymbol::withCString(kIOPMSettingProModeControl), + OSSymbol::withCString(kIOPMSettingProModeDefer), + gIOPMSettingSilentRunningKey, + }; + + const OSSymbol *noPublishSettingsArr[kRootDomainNoPublishSettingsCount] = + { + OSSymbol::withCString(kIOPMSettingProModeControl), + OSSymbol::withCString(kIOPMSettingProModeDefer), + gIOPMSettingSilentRunningKey, }; PE_parse_boot_argn("darkwake", &gDarkWakeFlags, sizeof(gDarkWakeFlags)); @@ -1518,7 +1528,9 @@ IOPMrootDomain::start( IOService * nub ) // List of PM settings that should not automatically publish itself // as a feature when registered by a listener. noPublishPMSettings = OSArray::withObjects( - (const OSObject **) &gIOPMSettingSilentRunningKey, 1, 0); + (const OSObject **)noPublishSettingsArr, + kRootDomainNoPublishSettingsCount, + 0); fPMSettingsDict = OSDictionary::withCapacity(5); preventIdleSleepList = OSSet::withCapacity(8); @@ -7883,6 +7895,18 @@ IOPMrootDomain::handlePowerNotification( UInt32 msg ) evaluatePolicy( kStimulusDarkWakeEvaluate ); } } + + if (msg & kIOPMProModeEngaged) { + int newState = 1; + DLOG("ProModeEngaged\n"); + messageClient(kIOPMMessageProModeStateChange, systemCapabilityNotifier, &newState, sizeof(newState)); + } + + if (msg & kIOPMProModeDisengaged) { + int newState = 0; + DLOG("ProModeDisengaged\n"); + messageClient(kIOPMMessageProModeStateChange, systemCapabilityNotifier, &newState, sizeof(newState)); + } } //****************************************************************************** diff --git a/iokit/Kernel/IOService.cpp b/iokit/Kernel/IOService.cpp index 7e5abfb41..bd20598ef 100644 --- a/iokit/Kernel/IOService.cpp +++ b/iokit/Kernel/IOService.cpp @@ -3023,8 +3023,12 @@ IOService::terminateWorker( IOOptionBits options ) } if (doPhase2) { if (kIOServiceNeedWillTerminate & victim->__state[1]) { - _workLoopAction((IOWorkLoop::Action) &actionWillStop, - victim, (void *)(uintptr_t) options, NULL ); + if (NULL == victim->reserved->uvars) { + _workLoopAction((IOWorkLoop::Action) &actionWillStop, + victim, (void *)(uintptr_t) options); + } else { + actionWillStop(victim, options, NULL, NULL, NULL); + } } OSArray * notifiers; diff --git a/iokit/Kernel/IOUserServer.cpp b/iokit/Kernel/IOUserServer.cpp index 4ad8eb5bd..a436a8cb2 100644 --- a/iokit/Kernel/IOUserServer.cpp +++ b/iokit/Kernel/IOUserServer.cpp @@ -453,6 +453,7 @@ IMPL(IOBufferMemoryDescriptor, Create) return kIOReturnBadArgument; } options &= kIOMemoryDirectionOutIn; + options |= kIOMemoryKernelUserShared; bmd = IOBufferMemoryDescriptor::inTaskWithOptions( kernel_task, options, capacity, alignment); diff --git a/libkern/os/reason_private.h b/libkern/os/reason_private.h index a4b9b1c3f..46aaceb9e 100644 --- a/libkern/os/reason_private.h +++ b/libkern/os/reason_private.h @@ -38,6 +38,7 @@ OS_ENUM(os_reason_libsystem_code, uint64_t, OS_REASON_LIBSYSTEM_CODE_WORKLOOP_OWNERSHIP_LEAK = 1, OS_REASON_LIBSYSTEM_CODE_FAULT = 2, /* generated by os_log_fault */ OS_REASON_LIBSYSTEM_CODE_SECINIT_INITIALIZER = 3, + OS_REASON_LIBSYSTEM_CODE_PTHREAD_CORRUPTION = 4, ); #ifndef KERNEL diff --git a/libsyscall/wrappers/persona.c b/libsyscall/wrappers/persona.c index 513543952..b4e12fba6 100644 --- a/libsyscall/wrappers/persona.c +++ b/libsyscall/wrappers/persona.c @@ -55,18 +55,8 @@ kpersona_dealloc(uid_t id) int kpersona_get(uid_t *id) { - /* persona is a process-static identifier: cache it in a global */ - static uid_t p_id = PERSONA_ID_NONE; - if (p_id == PERSONA_ID_NONE) { - int ret = 0; - size_t idlen = 1; - ret = __persona(PERSONA_OP_GET, 0, NULL, &p_id, &idlen, NULL); - if (ret != 0) { - return ret; - } - } - *id = p_id; - return 0; + size_t idlen = 1; + return __persona(PERSONA_OP_GET, 0, NULL, id, &idlen, NULL); } int diff --git a/osfmk/arm/cswitch.s b/osfmk/arm/cswitch.s index 7851e0ed3..6f4d332fc 100644 --- a/osfmk/arm/cswitch.s +++ b/osfmk/arm/cswitch.s @@ -87,7 +87,7 @@ LEXT(machine_load_context) and r2, r2, #3 // Extract cpu number orr r1, r1, r2 // mcr p15, 0, r1, c13, c0, 3 // Write TPIDRURO - ldr r1, [r0, TH_CTH_DATA] + mov r1, #0 mcr p15, 0, r1, c13, c0, 2 // Write TPIDRURW mov r7, #0 // Clear frame pointer ldr r3, [r0, TH_KSTACKPTR] // Get kernel stack top @@ -146,7 +146,8 @@ LEXT(Switch_context) bne switch_threads // No need to save GPR/NEON state if we are #if __ARM_VFP__ mov r1, r2 // r2 will be clobbered by the save, so preserve it - add r3, r0, ACT_KVFP // Get the kernel VFP save area for the old thread... + ldr r3, [r0, TH_KSTACKPTR] // Get old kernel stack top + add r3, r3, SS_KVFP // Get the kernel VFP save area for the old thread... save_vfp_registers // ...and save our VFP state to it mov r2, r1 // Restore r2 (the new thread pointer) #endif /* __ARM_VFP__ */ @@ -161,13 +162,14 @@ switch_threads: and r5, r5, #3 // Extract cpu number orr r6, r6, r5 mcr p15, 0, r6, c13, c0, 3 // Write TPIDRURO - ldr r6, [r2, TH_CTH_DATA] + mov r6, #0 mcr p15, 0, r6, c13, c0, 2 // Write TPIDRURW load_reg: add r3, r3, SS_R4 ldmia r3!, {r4-r14} // Restore new thread status #if __ARM_VFP__ - add r3, r2, ACT_KVFP // Get the kernel VFP save area for the new thread... + ldr r3, [r2, TH_KSTACKPTR] // get kernel stack top + add r3, r3, SS_KVFP // Get the kernel VFP save area for the new thread... load_vfp_registers // ...and load the saved state #endif /* __ARM_VFP__ */ bx lr // Return @@ -183,7 +185,8 @@ load_reg: LEXT(Shutdown_context) mrc p15, 0, r9, c13, c0, 4 // Read TPIDRPRW #if __ARM_VFP__ - add r3, r9, ACT_KVFP // Get the kernel VFP save area for the current thread... + ldr r3, [r9, TH_KSTACKPTR] // get kernel stack top + add r3, r3, SS_KVFP // Get the kernel VFP save area for the current thread... save_vfp_registers // ...and save our VFP state to it #endif ldr r3, [r9, TH_KSTACKPTR] // Get kernel stack top @@ -207,7 +210,8 @@ LEXT(Idle_context) mrc p15, 0, r9, c13, c0, 4 // Read TPIDRPRW #if __ARM_VFP__ - add r3, r9, ACT_KVFP // Get the kernel VFP save area for the current thread... + ldr r3, [r9, TH_KSTACKPTR] // get kernel stack top + add r3, r3, SS_KVFP // Get the kernel VFP save area for the current thread... save_vfp_registers // ...and save our VFP state to it #endif ldr r3, [r9, TH_KSTACKPTR] // Get kernel stack top @@ -233,7 +237,8 @@ LEXT(Idle_load_context) add r3, r3, SS_R4 ldmia r3!, {r4-r14} // Restore new thread status #if __ARM_VFP__ - add r3, r9, ACT_KVFP // Get the kernel VFP save area for the current thread... + ldr r3, [r9, TH_KSTACKPTR] // get kernel stack top + add r3, r3, SS_KVFP // Get the kernel VFP save area for the current thread... load_vfp_registers // ...and load the saved state #endif bx lr // Return diff --git a/osfmk/arm/genassym.c b/osfmk/arm/genassym.c index 5ebbf990b..585d71303 100644 --- a/osfmk/arm/genassym.c +++ b/osfmk/arm/genassym.c @@ -131,11 +131,9 @@ main( DECLARE("ACT_TASK", offsetof(struct thread, task)); DECLARE("ACT_PCBDATA", offsetof(struct thread, machine.PcbData)); #if __ARM_VFP__ - DECLARE("ACT_UVFP", offsetof(struct thread, machine.uVFPdata)); - DECLARE("ACT_KVFP", offsetof(struct thread, machine.kVFPdata)); + DECLARE("ACT_UVFP", offsetof(struct thread, machine.PcbData.VFPdata)); #endif DECLARE("TH_CTH_SELF", offsetof(struct thread, machine.cthread_self)); - DECLARE("TH_CTH_DATA", offsetof(struct thread, machine.cthread_data)); DECLARE("ACT_PCBDATA_PC", offsetof(struct thread, machine.PcbData.pc)); DECLARE("ACT_PCBDATA_R0", offsetof(struct thread, machine.PcbData.r[0])); DECLARE("ACT_PREEMPT_CNT", offsetof(struct thread, machine.preemption_count)); @@ -176,6 +174,7 @@ main( DECLARE("SS_EXC", offsetof(struct arm_saved_state, exception)); #if __ARM_VFP__ + DECLARE("SS_KVFP", offsetof(struct arm_saved_state, VFPdata)); DECLARE("VSS_SIZE", sizeof(struct arm_vfpsaved_state)); DECLARE("VSS_FPSCR", offsetof(struct arm_vfpsaved_state, fpscr)); DECLARE("VSS_FPEXC", offsetof(struct arm_vfpsaved_state, fpexc)); diff --git a/osfmk/arm/machine_routines_asm.s b/osfmk/arm/machine_routines_asm.s index 7b7f41411..9d1896393 100644 --- a/osfmk/arm/machine_routines_asm.s +++ b/osfmk/arm/machine_routines_asm.s @@ -41,7 +41,7 @@ LEXT(machine_set_current_thread) and r2, r2, #3 // Extract cpu number orr r1, r1, r2 // mcr p15, 0, r1, c13, c0, 3 // Write TPIDRURO - ldr r1, [r0, TH_CTH_DATA] + mov r1, #0 mcr p15, 0, r1, c13, c0, 2 // Write TPIDRURW bx lr diff --git a/osfmk/arm/pcb.c b/osfmk/arm/pcb.c index c03e518b6..b78a1be8e 100644 --- a/osfmk/arm/pcb.c +++ b/osfmk/arm/pcb.c @@ -143,7 +143,6 @@ machine_thread_create( } thread->machine.preemption_count = 0; thread->machine.cthread_self = 0; - thread->machine.cthread_data = 0; #if __ARM_USER_PROTECT__ { struct pmap *new_pmap = vm_map_pmap(task->map); @@ -252,6 +251,7 @@ machine_stack_attach( savestate->r[7] = 0x0UL; savestate->r[9] = (uint32_t) NULL; savestate->cpsr = PSR_SVC_MODE | PSR_INTMASK; + vfp_state_initialize(&savestate->VFPdata); machine_stack_attach_kprintf("thread = %x pc = %x, sp = %x\n", thread, savestate->lr, savestate->sp); } diff --git a/osfmk/arm/pmap.c b/osfmk/arm/pmap.c index 93921c0eb..92b3562d7 100644 --- a/osfmk/arm/pmap.c +++ b/osfmk/arm/pmap.c @@ -1395,11 +1395,15 @@ static void pmap_tte_deallocate( #ifdef __ARM64_PMAP_SUBPAGE_L1__ #if (__ARM_VMSA__ <= 7) #error This is not supported for old-style page tables -#endif +#endif /* (__ARM_VMSA__ <= 7) */ #define PMAP_ROOT_ALLOC_SIZE (((ARM_TT_L1_INDEX_MASK >> ARM_TT_L1_SHIFT) + 1) * sizeof(tt_entry_t)) -#else +#else /* !defined(__ARM64_PMAP_SUBPAGE_L1__) */ +#if (__ARM_VMSA__ <= 7) +#define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES * 2) +#else /* (__ARM_VMSA__ > 7) */ #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES) -#endif +#endif /* (__ARM_VMSA__ > 7) */ +#endif /* !defined(__ARM64_PMAP_SUBPAGE_L1__) */ const unsigned int arm_hardware_page_size = ARM_PGBYTES; const unsigned int arm_pt_desc_size = sizeof(pt_desc_t); @@ -3458,15 +3462,10 @@ pmap_bootstrap( #if (__ARM_VMSA__ == 7) kernel_pmap->tte_index_max = 4 * NTTES; #endif - kernel_pmap->prev_tte = (tt_entry_t *) NULL; kernel_pmap->hw_asid = 0; kernel_pmap->sw_asid = 0; PMAP_LOCK_INIT(kernel_pmap); -#if (__ARM_VMSA__ == 7) - simple_lock_init(&kernel_pmap->tt1_lock, 0); - kernel_pmap->cpu_ref = 0; -#endif memset((void *) &kernel_pmap->stats, 0, sizeof(kernel_pmap->stats)); /* allocate space for and initialize the bookkeeping structures */ @@ -3887,13 +3886,14 @@ pmap_create_options_internal( p->ledger = ledger; PMAP_LOCK_INIT(p); -#if (__ARM_VMSA__ == 7) - simple_lock_init(&p->tt1_lock, 0); - p->cpu_ref = 0; -#endif memset((void *) &p->stats, 0, sizeof(p->stats)); p->tt_entry_free = (tt_entry_t *)0; + tte_index_max = PMAP_ROOT_ALLOC_SIZE / sizeof(tt_entry_t); + +#if (__ARM_VMSA__ == 7) + p->tte_index_max = tte_index_max; +#endif p->tte = pmap_tt1_allocate(p, PMAP_ROOT_ALLOC_SIZE, 0); if (!(p->tte)) { @@ -3903,13 +3903,6 @@ pmap_create_options_internal( p->ttep = ml_static_vtop((vm_offset_t)p->tte); PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(p), VM_KERNEL_ADDRHIDE(p->min), VM_KERNEL_ADDRHIDE(p->max), p->ttep); -#if (__ARM_VMSA__ == 7) - tte_index_max = p->tte_index_max = NTTES; -#else - tte_index_max = (PMAP_ROOT_ALLOC_SIZE / sizeof(tt_entry_t)); -#endif - p->prev_tte = (tt_entry_t *) NULL; - /* nullify the translation table */ for (i = 0; i < tte_index_max; i++) { p->tte[i] = ARM_TTE_TYPE_FAULT; @@ -4088,15 +4081,6 @@ pmap_destroy_internal( queue_remove(&map_pmap_list, pmap, pmap_t, pmaps); pmap_simple_unlock(&pmaps_lock); -#if (__ARM_VMSA__ == 7) - if (pmap->cpu_ref != 0) { - panic("%s: cpu_ref=%u, " - "pmap=%p", - __FUNCTION__, pmap->cpu_ref, - pmap); - } -#endif /* (__ARM_VMSA__ == 7) */ - pmap_trim_self(pmap); /* @@ -4144,13 +4128,6 @@ pmap_destroy_internal( pmap->ttep = 0; } -#if (__ARM_VMSA__ == 7) - if (pmap->prev_tte) { - pmap_tt1_deallocate(pmap, pmap->prev_tte, PMAP_ROOT_ALLOC_SIZE, 0); - pmap->prev_tte = (tt_entry_t *) NULL; - } -#endif /* (__ARM_VMSA__ == 7) */ - assert((tt_free_entry_t*)pmap->tt_entry_free == NULL); pmap_get_pt_ops(pmap)->flush_tlb_async(pmap); @@ -5210,10 +5187,7 @@ pmap_switch_internal( asid_index >>= 1; #endif -#if (__ARM_VMSA__ == 7) - assert(not_in_kdp); - pmap_simple_lock(&pmap->tt1_lock); -#else +#if (__ARM_VMSA__ > 7) pmap_t last_nested_pmap = cpu_data_ptr->cpu_nested_pmap; #endif @@ -5257,10 +5231,6 @@ pmap_switch_internal( os_atomic_inc(&pmap_asid_flushes, relaxed); #endif } - -#if (__ARM_VMSA__ == 7) - pmap_simple_unlock(&pmap->tt1_lock); -#endif } void @@ -7046,65 +7016,20 @@ pmap_expand( tt_entry_t *tt_p; unsigned int i; - while (tte_index(pmap, pt_attr, v) >= pmap->tte_index_max) { - tte_p = pmap_tt1_allocate(pmap, 2 * ARM_PGBYTES, ((options & PMAP_OPTIONS_NOWAIT)? PMAP_TT_ALLOCATE_NOWAIT : 0)); - if (tte_p == (tt_entry_t *)0) { - return KERN_RESOURCE_SHORTAGE; - } - - PMAP_LOCK(pmap); - if (pmap->tte_index_max > NTTES) { - pmap_tt1_deallocate(pmap, tte_p, 2 * ARM_PGBYTES, PMAP_TT_DEALLOCATE_NOBLOCK); - PMAP_UNLOCK(pmap); - break; - } - - pmap_simple_lock(&pmap->tt1_lock); - for (i = 0; i < pmap->tte_index_max; i++) { - tte_p[i] = pmap->tte[i]; - } - for (i = NTTES; i < 2 * NTTES; i++) { - tte_p[i] = ARM_TTE_TYPE_FAULT; - } - - FLUSH_PTE_RANGE(tte_p, tte_p + (2 * NTTES)); // DMB - - /* Order is important here, so that pmap_switch_user_ttb() sees things - * in the correct sequence. - * --update of pmap->tte[p] must happen prior to updating pmap->tte_index_max, - * separated by at least a DMB, so that context switch does not see a 1 GB - * L1 table with a 2GB size. - * --update of pmap->tte[p] must also happen prior to setting pmap->prev_tte, - * separated by at least a DMB, so that context switch does not see an L1 - * table to be freed without also seeing its replacement.*/ - - tt_entry_t *prev_tte = pmap->tte; - - pmap->tte = tte_p; - pmap->ttep = ml_static_vtop((vm_offset_t)pmap->tte); - - __builtin_arm_dmb(DMB_ISH); - - pmap->tte_index_max = 2 * NTTES; - pmap->stamp = os_atomic_inc(&pmap_stamp, relaxed); - - for (i = 0; i < NTTES; i++) { - prev_tte[i] = ARM_TTE_TYPE_FAULT; - } - - /* We need a strong flush here because a TLB flush will be - * issued from pmap_switch_user_ttb() as soon as this pmap - * is no longer active on any CPU. We need to ensure all - * prior stores to the TTE region have retired before that. */ - FLUSH_PTE_RANGE_STRONG(prev_tte, prev_tte + NTTES); // DSB - pmap->prev_tte = prev_tte; +#if DEVELOPMENT || DEBUG + /* + * We no longer support root level expansion; panic in case something + * still attempts to trigger it. + */ + i = tte_index(pmap, pt_attr, v); - pmap_simple_unlock(&pmap->tt1_lock); - PMAP_UNLOCK(pmap); - if (current_pmap() == pmap) { - pmap_set_pmap(pmap, current_thread()); - } + if (i >= pmap->tte_index_max) { + panic("%s: index out of range, index=%u, max=%u, " + "pmap=%p, addr=%p, options=%u, level=%u", + __func__, i, pmap->tte_index_max, + pmap, (void *)v, options, level); } +#endif /* DEVELOPMENT || DEBUG */ if (level == 1) { return KERN_SUCCESS; @@ -7823,33 +7748,8 @@ pmap_switch_user_ttb_internal( cpu_data_ptr = pmap_get_cpu_data(); #if (__ARM_VMSA__ == 7) - - if ((cpu_data_ptr->cpu_user_pmap != PMAP_NULL) - && (cpu_data_ptr->cpu_user_pmap != kernel_pmap)) { - unsigned int c; - tt_entry_t *tt_entry = cpu_data_ptr->cpu_user_pmap->prev_tte; - - c = os_atomic_dec(&cpu_data_ptr->cpu_user_pmap->cpu_ref, acq_rel); - if ((c == 0) && (tt_entry != NULL)) { - /* We saved off the old 1-page tt1 in pmap_expand() in case other cores were still using it. - * Now that the user pmap's cpu_ref is 0, we should be able to safely free it.*/ - - cpu_data_ptr->cpu_user_pmap->prev_tte = NULL; -#if !__ARM_USER_PROTECT__ - set_mmu_ttb(kernel_pmap->ttep); - set_context_id(kernel_pmap->hw_asid); -#endif - /* Now that we can guarantee the old 1-page L1 table is no longer active on any CPU, - * flush any cached intermediate translations that may point to it. Note that to be truly - * safe from prefetch-related issues, this table PA must have been cleared from TTBR0 prior - * to this call. __ARM_USER_PROTECT__ effectively guarantees that for all current configurations.*/ - flush_mmu_tlb_asid(cpu_data_ptr->cpu_user_pmap->hw_asid); - pmap_tt1_deallocate(cpu_data_ptr->cpu_user_pmap, tt_entry, ARM_PGBYTES, PMAP_TT_DEALLOCATE_NOBLOCK); - } - } cpu_data_ptr->cpu_user_pmap = pmap; cpu_data_ptr->cpu_user_pmap_stamp = pmap->stamp; - os_atomic_inc(&pmap->cpu_ref, acq_rel); #if MACH_ASSERT && __ARM_USER_PROTECT__ { @@ -10429,7 +10329,7 @@ pmap_max_32bit_offset( if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) { max_offset_ret = arm_pmap_max_offset_default; } else if (option == ARM_PMAP_MAX_OFFSET_MIN) { - max_offset_ret = 0x66000000; + max_offset_ret = 0x80000000; } else if (option == ARM_PMAP_MAX_OFFSET_MAX) { max_offset_ret = VM_MAX_ADDRESS; } else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) { @@ -10438,7 +10338,7 @@ pmap_max_32bit_offset( } else if (max_mem > 0x20000000) { max_offset_ret = 0x80000000; } else { - max_offset_ret = 0x66000000; + max_offset_ret = 0x80000000; } } else if (option == ARM_PMAP_MAX_OFFSET_JUMBO) { max_offset_ret = 0x80000000; diff --git a/osfmk/arm/pmap.h b/osfmk/arm/pmap.h index 3d45185eb..92d4a167a 100644 --- a/osfmk/arm/pmap.h +++ b/osfmk/arm/pmap.h @@ -276,8 +276,8 @@ extern pmap_paddr_t mmu_uvtop(vm_offset_t va); #define PMAP_GC_WAIT 2 #if DEVELOPMENT || DEBUG -#define pmap_cs_log(msg, args...) printf("PMAP_CS: " msg "\n", args) #define pmap_cs_log_h(msg, args...) { if(pmap_cs_log_hacks) printf("PMAP_CS: " msg "\n", args); } +#define pmap_cs_log pmap_cs_log_h #define PMAP_CS_EXCEPTION_LIST_HACK 1 @@ -309,7 +309,6 @@ struct pmap { struct pmap_statistics stats; /* map statistics */ queue_chain_t pmaps; /* global list of pmaps */ tt_entry_t *tt_entry_free; /* free translation table entries */ - tt_entry_t *prev_tte; /* previous translation table */ struct pmap *nested_pmap; /* nested pmap */ vm_map_address_t nested_region_grand_addr; vm_map_address_t nested_region_subord_addr; @@ -319,8 +318,6 @@ struct pmap { unsigned int *nested_region_asid_bitmap; #if (__ARM_VMSA__ <= 7) - decl_simple_lock_data(, tt1_lock); /* lock on tt1 */ - unsigned int cpu_ref; /* number of cpus using pmap */ unsigned int tte_index_max; /* max tte index in translation table entries */ #endif diff --git a/osfmk/arm/status.c b/osfmk/arm/status.c index bdfcf5a6b..2f35514a3 100644 --- a/osfmk/arm/status.c +++ b/osfmk/arm/status.c @@ -511,8 +511,7 @@ machine_thread_state_initialize( savestate->cpsr = PSR_USERDFLT; #if __ARM_VFP__ - vfp_state_initialize(&thread->machine.uVFPdata); - vfp_state_initialize(&thread->machine.kVFPdata); + vfp_state_initialize(&thread->machine.PcbData.VFPdata); #endif thread->machine.DebugData = NULL; @@ -561,15 +560,14 @@ machine_thread_dup( #endif target->machine.cthread_self = self->machine.cthread_self; - target->machine.cthread_data = self->machine.cthread_data; self_saved_state = &self->machine.PcbData; target_saved_state = &target->machine.PcbData; bcopy(self_saved_state, target_saved_state, sizeof(struct arm_saved_state)); #if __ARM_VFP__ - self_vfp_state = &self->machine.uVFPdata; - target_vfp_state = &target->machine.uVFPdata; + self_vfp_state = &self->machine.PcbData.VFPdata; + target_vfp_state = &target->machine.PcbData.VFPdata; bcopy(self_vfp_state, target_vfp_state, sizeof(struct arm_vfpsaved_state)); #endif @@ -626,7 +624,7 @@ struct arm_vfpsaved_state * find_user_vfp( thread_t thread) { - return &thread->machine.uVFPdata; + return &thread->machine.PcbData.VFPdata; } #endif /* __ARM_VFP__ */ diff --git a/osfmk/arm/thread.h b/osfmk/arm/thread.h index f17ae451d..3782d0f26 100644 --- a/osfmk/arm/thread.h +++ b/osfmk/arm/thread.h @@ -69,22 +69,6 @@ #include #endif -#if __ARM_VFP__ - -#define VFPSAVE_ALIGN 16 -#define VFPSAVE_ATTRIB __attribute__((aligned (VFPSAVE_ALIGN))) -#define THREAD_ALIGN VFPSAVE_ALIGN - -/* - * vector floating point saved state - */ -struct arm_vfpsaved_state { - uint32_t r[64]; - uint32_t fpscr; - uint32_t fpexc; -}; -#endif - struct perfcontrol_state { uint64_t opaque[8] __attribute__((aligned(8))); }; @@ -103,26 +87,31 @@ typedef struct arm_saved_state machine_thread_kernel_state; #include struct machine_thread { +#if __ARM_USER_PROTECT__ + unsigned int uptw_ttc; + unsigned int uptw_ttb; + unsigned int kptw_ttb; + unsigned int asid; +#endif + #if __arm64__ arm_context_t * contextData; /* allocated user context */ arm_saved_state_t * upcb; /* pointer to user GPR state */ arm_neon_saved_state_t * uNeon; /* pointer to user VFP state */ #elif __arm__ struct arm_saved_state PcbData; -#if __ARM_VFP__ - struct arm_vfpsaved_state uVFPdata VFPSAVE_ATTRIB; - struct arm_vfpsaved_state kVFPdata VFPSAVE_ATTRIB; -#endif /* __ARM_VFP__ */ - #else #error Unknown arch #endif -#if __ARM_USER_PROTECT__ - unsigned int uptw_ttc; - unsigned int uptw_ttb; - unsigned int kptw_ttb; - unsigned int asid; +#if defined(__arm__) && defined(__ARM_VFP__) + // for packing reasons chtread_self and DebugData + // are inside the the PcbData when __ARM_VFP__ is set +#define DebugData PcbData.VFPpadding_DebugData +#define cthread_self PcbData.VFPpadding_cthread_self +#else + arm_debug_state_t *DebugData; + vm_address_t cthread_self; /* for use of cthread package */ #endif vm_offset_t kstackptr; /* top of kernel stack */ @@ -139,10 +128,6 @@ struct machine_thread { uint8_t machine_thread_flags; #endif /* __ARM_SMP__ */ - arm_debug_state_t * DebugData; - mach_vm_address_t cthread_self; /* for use of cthread package */ - mach_vm_address_t cthread_data; /* for use of cthread package */ - struct perfcontrol_state perfctrl_state; #if __arm64__ uint64_t energy_estimate_nj; diff --git a/osfmk/arm64/cswitch.s b/osfmk/arm64/cswitch.s index 06aeca99e..48e0879b1 100644 --- a/osfmk/arm64/cswitch.s +++ b/osfmk/arm64/cswitch.s @@ -151,8 +151,7 @@ and $2, $2, #(MACHDEP_CPUNUM_MASK) orr $2, $1, $2 // Save new cthread/cpu to TPIDRRO_EL0 msr TPIDRRO_EL0, $2 - ldr $1, [$0, TH_CTH_DATA] // Get new cthread data pointer - msr TPIDR_EL0, $1 // Save data pointer to TPIDRRW_EL0 + msr TPIDR_EL0, xzr /* ARM64_TODO Reserve x18 until we decide what to do with it */ mov x18, $1 // ... and trash reserved x18 .endmacro diff --git a/osfmk/arm64/genassym.c b/osfmk/arm64/genassym.c index 8dfdecdda..c47c6ab1a 100644 --- a/osfmk/arm64/genassym.c +++ b/osfmk/arm64/genassym.c @@ -122,7 +122,6 @@ main(int argc, /* These fields are being added on demand */ DECLARE("ACT_CONTEXT", offsetof(struct thread, machine.contextData)); DECLARE("TH_CTH_SELF", offsetof(struct thread, machine.cthread_self)); - DECLARE("TH_CTH_DATA", offsetof(struct thread, machine.cthread_data)); DECLARE("ACT_PREEMPT_CNT", offsetof(struct thread, machine.preemption_count)); DECLARE("ACT_CPUDATAP", offsetof(struct thread, machine.CpuDatap)); DECLARE("ACT_DEBUGDATA", offsetof(struct thread, machine.DebugData)); diff --git a/osfmk/arm64/locore.s b/osfmk/arm64/locore.s index 5edaf67f1..f9162a819 100644 --- a/osfmk/arm64/locore.s +++ b/osfmk/arm64/locore.s @@ -904,8 +904,7 @@ exception_return_unint_tpidr_x3: mov sp, x21 // Reload the pcb pointer /* ARM64_TODO Reserve x18 until we decide what to do with it */ - ldr x0, [x3, TH_CTH_DATA] // Load cthread data pointer - str x0, [sp, SS64_X18] // and use it to trash x18 + str xzr, [sp, SS64_X18] #if __ARM_KERNEL_PROTECT__ /* diff --git a/osfmk/arm64/loose_ends.c b/osfmk/arm64/loose_ends.c index 495cc7c03..8b7ea91f4 100644 --- a/osfmk/arm64/loose_ends.c +++ b/osfmk/arm64/loose_ends.c @@ -185,6 +185,8 @@ bzero_phys_nc(addr64_t src64, vm_size_t bytes) bzero_phys(src64, bytes); } +extern void *secure_memset(void *, int, size_t); + /* Zero bytes starting at a physical address */ void bzero_phys(addr64_t src, vm_size_t bytes) @@ -202,15 +204,14 @@ bzero_phys(addr64_t src, vm_size_t bytes) boolean_t use_copy_window = !pmap_valid_address(src); pn = (ppnum_t)(src >> PAGE_SHIFT); + wimg_bits = pmap_cache_attributes(pn); #if !defined(__ARM_COHERENT_IO__) && !__ARM_PTE_PHYSMAP__ count = PAGE_SIZE - offset; - wimg_bits = pmap_cache_attributes(pn); if ((wimg_bits & VM_WIMG_MASK) != VM_WIMG_DEFAULT) { use_copy_window = TRUE; } #else if (use_copy_window) { - wimg_bits = pmap_cache_attributes(pn); count = PAGE_SIZE - offset; } #endif @@ -229,7 +230,17 @@ bzero_phys(addr64_t src, vm_size_t bytes) count = bytes; } - bzero(buf, count); + switch (wimg_bits & VM_WIMG_MASK) { + case VM_WIMG_DEFAULT: + case VM_WIMG_WCOMB: + case VM_WIMG_INNERWBACK: + case VM_WIMG_WTHRU: + bzero(buf, count); + break; + default: + /* 'dc zva' performed by bzero is not safe for device memory */ + secure_memset((void*)buf, 0, count); + } if (use_copy_window) { pmap_unmap_cpu_windows_copy(index); diff --git a/osfmk/arm64/machine_routines_asm.s b/osfmk/arm64/machine_routines_asm.s index 64fd61152..5dc6cde73 100644 --- a/osfmk/arm64/machine_routines_asm.s +++ b/osfmk/arm64/machine_routines_asm.s @@ -195,7 +195,6 @@ LEXT(set_mmu_ttb) LEXT(set_aux_control) msr ACTLR_EL1, x0 // Synchronize system - dsb sy isb sy ret diff --git a/osfmk/arm64/pcb.c b/osfmk/arm64/pcb.c index 4303f45fe..3bf15f95a 100644 --- a/osfmk/arm64/pcb.c +++ b/osfmk/arm64/pcb.c @@ -158,7 +158,6 @@ machine_thread_create(thread_t thread, } thread->machine.preemption_count = 0; thread->machine.cthread_self = 0; - thread->machine.cthread_data = 0; #if defined(HAS_APPLE_PAC) thread->machine.rop_pid = task->rop_pid; thread->machine.disable_user_jop = task->disable_user_jop; diff --git a/osfmk/arm64/platform_tests.c b/osfmk/arm64/platform_tests.c index 9026e45f1..8523c57ab 100644 --- a/osfmk/arm64/platform_tests.c +++ b/osfmk/arm64/platform_tests.c @@ -270,42 +270,6 @@ lt_upgrade_downgrade_rw() lck_rw_done(<_rwlock); } -const int limit = 1000000; -static int lt_stress_local_counters[MAX_CPUS]; - -static void -lt_stress_hw_lock() -{ - int local_counter = 0; - - uint cpuid = current_processor()->cpu_id; - - kprintf("%s>cpu %d starting\n", __FUNCTION__, cpuid); - - hw_lock_lock(<_hw_lock, LCK_GRP_NULL); - lt_counter++; - local_counter++; - hw_lock_unlock(<_hw_lock); - - while (lt_counter < lt_target_done_threads) { - ; - } - - kprintf("%s>cpu %d started\n", __FUNCTION__, cpuid); - - while (lt_counter < limit) { - hw_lock_lock(<_hw_lock, LCK_GRP_NULL); - if (lt_counter < limit) { - lt_counter++; - local_counter++; - } - hw_lock_unlock(<_hw_lock); - } - - lt_stress_local_counters[cpuid] = local_counter; - - kprintf("%s>final counter %d cpu %d incremented the counter %d times\n", __FUNCTION__, lt_counter, cpuid, local_counter); -} static void lt_grab_hw_lock() @@ -595,29 +559,6 @@ lt_thread(void *arg, wait_result_t wres __unused) OSIncrementAtomic((volatile SInt32*) <_done_threads); } -static void -lt_bound_thread(void *arg, wait_result_t wres __unused) -{ - void (*func)(void) = (void (*)(void))arg; - - int cpuid = OSIncrementAtomic((volatile SInt32 *)<_cpu_bind_id); - - processor_t processor = processor_list; - while ((processor != NULL) && (processor->cpu_id != cpuid)) { - processor = processor->processor_list; - } - - if (processor != NULL) { - thread_bind(processor); - } - - thread_block(THREAD_CONTINUE_NULL); - - func(); - - OSIncrementAtomic((volatile SInt32*) <_done_threads); -} - static void lt_start_lock_thread(thread_continue_t func) { @@ -631,18 +572,6 @@ lt_start_lock_thread(thread_continue_t func) } -static void -lt_start_lock_thread_bound(thread_continue_t func) -{ - thread_t thread; - kern_return_t kr; - - kr = kernel_thread_start(lt_bound_thread, func, &thread); - assert(kr == KERN_SUCCESS); - - thread_deallocate(thread); -} - static kern_return_t lt_test_locks() { @@ -833,29 +762,6 @@ lt_test_locks() lt_wait_for_lock_test_threads(); T_EXPECT_EQ_UINT(lt_counter, LOCK_TEST_ITERATIONS * lt_target_done_threads, NULL); - /* HW locks stress test */ - T_LOG("Running HW locks stress test with hw_lock_lock()"); - extern unsigned int real_ncpus; - lt_reset(); - lt_target_done_threads = real_ncpus; - for (processor_t processor = processor_list; processor != NULL; processor = processor->processor_list) { - lt_start_lock_thread_bound(lt_stress_hw_lock); - } - lt_wait_for_lock_test_threads(); - bool starvation = false; - uint total_local_count = 0; - for (processor_t processor = processor_list; processor != NULL; processor = processor->processor_list) { - starvation = starvation || (lt_stress_local_counters[processor->cpu_id] < 10); - total_local_count += lt_stress_local_counters[processor->cpu_id]; - } - if (total_local_count != lt_counter) { - T_FAIL("Lock failure\n"); - } else if (starvation) { - T_FAIL("Lock starvation found\n"); - } else { - T_PASS("HW locks stress test with hw_lock_lock()"); - } - /* HW locks: trylocks */ T_LOG("Running test with hw_lock_try()"); diff --git a/osfmk/arm64/proc_reg.h b/osfmk/arm64/proc_reg.h index 10c7aa567..66a551ffe 100644 --- a/osfmk/arm64/proc_reg.h +++ b/osfmk/arm64/proc_reg.h @@ -1632,6 +1632,9 @@ typedef enum { #define APCTL_EL1_KernKeyEn (1ULL << 2) #endif +#define ACTLR_EL1_DisHWP_OFFSET 3 +#define ACTLR_EL1_DisHWP_MASK (1ULL << ACTLR_EL1_DisHWP_OFFSET) +#define ACTLR_EL1_DisHWP ACTLR_EL1_DisHWP_MASK #if defined(HAS_APPLE_PAC) diff --git a/osfmk/arm64/status.c b/osfmk/arm64/status.c index 41d213e69..28f87b0a1 100644 --- a/osfmk/arm64/status.c +++ b/osfmk/arm64/status.c @@ -1336,7 +1336,6 @@ machine_thread_dup(thread_t self, struct arm_saved_state *target_saved_state; target->machine.cthread_self = self->machine.cthread_self; - target->machine.cthread_data = self->machine.cthread_data; self_saved_state = self->machine.upcb; target_saved_state = target->machine.upcb; diff --git a/osfmk/corpses/corpse.c b/osfmk/corpses/corpse.c index 040f331ce..a3d283be4 100644 --- a/osfmk/corpses/corpse.c +++ b/osfmk/corpses/corpse.c @@ -152,6 +152,9 @@ static _Atomic uint32_t inflight_corpses; unsigned long total_corpses_created = 0; boolean_t corpse_enabled_config = TRUE; +/* bootarg to generate corpse with size up to max_footprint_mb */ +boolean_t corpse_threshold_system_limit = FALSE; + /* bootarg to turn on corpse forking for EXC_RESOURCE */ int exc_via_corpse_forking = 1; @@ -189,6 +192,11 @@ corpses_init() if (PE_parse_boot_argn("corpse_for_fatal_memkill", &fatal_memkill, sizeof(fatal_memkill))) { corpse_for_fatal_memkill = fatal_memkill; } +#if DEBUG || DEVELOPMENT + if (PE_parse_boot_argn("-corpse_threshold_system_limit", &corpse_threshold_system_limit, sizeof(corpse_threshold_system_limit))) { + corpse_threshold_system_limit = TRUE; + } +#endif /* DEBUG || DEVELOPMENT */ } /* diff --git a/osfmk/i386/cpu_data.h b/osfmk/i386/cpu_data.h index a479eaea8..1f69f62f3 100644 --- a/osfmk/i386/cpu_data.h +++ b/osfmk/i386/cpu_data.h @@ -125,6 +125,7 @@ typedef struct { uint64_t plbt[MAX_TRACE_BTFRAMES]; } plrecord_t; +#if DEVELOPMENT || DEBUG typedef enum { IOTRACE_PHYS_READ = 1, IOTRACE_PHYS_WRITE, @@ -145,7 +146,17 @@ typedef struct { uint64_t backtrace[MAX_TRACE_BTFRAMES]; } iotrace_entry_t; -#if DEVELOPMENT || DEBUG +typedef struct { + int vector; /* Vector number of interrupt */ + thread_t curthread; /* Current thread at the time of the interrupt */ + uint64_t interrupted_pc; + int curpl; /* Current preemption level */ + int curil; /* Current interrupt level */ + uint64_t start_time_abs; + uint64_t duration; + uint64_t backtrace[MAX_TRACE_BTFRAMES]; +} traptrace_entry_t; + #define DEFAULT_IOTRACE_ENTRIES_PER_CPU (64) #define IOTRACE_MAX_ENTRIES_PER_CPU (256) extern volatile int mmiotrace_enabled; @@ -154,7 +165,14 @@ extern int iotrace_entries_per_cpu; extern int *iotrace_next; extern iotrace_entry_t **iotrace_ring; -extern void init_iotrace_bufs(int cpucnt, int entries_per_cpu); +#define TRAPTRACE_INVALID_INDEX (~0U) +#define DEFAULT_TRAPTRACE_ENTRIES_PER_CPU (16) +#define TRAPTRACE_MAX_ENTRIES_PER_CPU (256) +extern volatile int traptrace_enabled; +extern int traptrace_generators; +extern int traptrace_entries_per_cpu; +extern int *traptrace_next; +extern traptrace_entry_t **traptrace_ring; #endif /* DEVELOPMENT || DEBUG */ /* @@ -490,11 +508,12 @@ current_cpu_datap(void) */ #if DEVELOPMENT || DEBUG static inline void -rbtrace_bt(uint64_t *rets, int maxframes, cpu_data_t *cdata) +rbtrace_bt(uint64_t *rets, int maxframes, cpu_data_t *cdata, uint64_t frameptr, bool use_cursp) { extern uint32_t low_intstack[]; /* bottom */ extern uint32_t low_eintstack[]; /* top */ extern char mp_slave_stack[]; + int btidx = 0; uint64_t kstackb, kstackt; @@ -502,16 +521,21 @@ rbtrace_bt(uint64_t *rets, int maxframes, cpu_data_t *cdata) * element. This will also indicate if we were unable to * trace further up the stack for some reason */ - __asm__ volatile ("leaq 1f(%%rip), %%rax; mov %%rax, %0\n1:" - : "=m" (rets[0]) - : - : "rax"); - + if (use_cursp) { + __asm__ volatile ("leaq 1f(%%rip), %%rax; mov %%rax, %0\n1:" + : "=m" (rets[btidx++]) + : + : "rax"); + } thread_t cplthread = cdata->cpu_active_thread; if (cplthread) { uintptr_t csp; - __asm__ __volatile__ ("movq %%rsp, %0": "=r" (csp):); + if (use_cursp == true) { + __asm__ __volatile__ ("movq %%rsp, %0": "=r" (csp):); + } else { + csp = frameptr; + } /* Determine which stack we're on to populate stack bounds. * We don't need to trace across stack boundaries for this * routine. @@ -539,10 +563,10 @@ rbtrace_bt(uint64_t *rets, int maxframes, cpu_data_t *cdata) } if (__probable(kstackb && kstackt)) { - uint64_t *cfp = (uint64_t *) __builtin_frame_address(0); + uint64_t *cfp = (uint64_t *) frameptr; int rbbtf; - for (rbbtf = 1; rbbtf < maxframes; rbbtf++) { + for (rbbtf = btidx; rbbtf < maxframes; rbbtf++) { if (((uint64_t)cfp == 0) || (((uint64_t)cfp < kstackb) || ((uint64_t)cfp > kstackt))) { rets[rbbtf] = 0; continue; @@ -577,7 +601,7 @@ pltrace_internal(boolean_t enable) cdata->cpu_plri = cplrecord; - rbtrace_bt(plbts, MAX_TRACE_BTFRAMES - 1, cdata); + rbtrace_bt(plbts, MAX_TRACE_BTFRAMES - 1, cdata, (uint64_t)__builtin_frame_address(0), true); } extern int plctrace_enabled; @@ -610,8 +634,55 @@ iotrace(iotrace_type_e type, uint64_t vaddr, uint64_t paddr, int size, uint64_t iotrace_next[cpu_num] = ((nextidx + 1) >= iotrace_entries_per_cpu) ? 0 : (nextidx + 1); rbtrace_bt(&cur_iotrace_ring[nextidx].backtrace[0], - MAX_TRACE_BTFRAMES - 1, cdata); + MAX_TRACE_BTFRAMES - 1, cdata, (uint64_t)__builtin_frame_address(0), true); +} + +static inline uint32_t +traptrace_start(int vecnum, uint64_t ipc, uint64_t sabs, uint64_t frameptr) +{ + cpu_data_t *cdata; + int cpu_num, nextidx; + traptrace_entry_t *cur_traptrace_ring; + + if (__improbable(traptrace_enabled == 0 || traptrace_generators == 0)) { + return TRAPTRACE_INVALID_INDEX; + } + + assert(ml_get_interrupts_enabled() == FALSE); + cdata = current_cpu_datap(); + cpu_num = cdata->cpu_number; + nextidx = traptrace_next[cpu_num]; + /* prevent nested interrupts from clobbering this record */ + traptrace_next[cpu_num] = ((nextidx + 1) >= traptrace_entries_per_cpu) ? 0 : (nextidx + 1); + + cur_traptrace_ring = traptrace_ring[cpu_num]; + + cur_traptrace_ring[nextidx].vector = vecnum; + cur_traptrace_ring[nextidx].curthread = current_thread(); + cur_traptrace_ring[nextidx].interrupted_pc = ipc; + cur_traptrace_ring[nextidx].curpl = cdata->cpu_preemption_level; + cur_traptrace_ring[nextidx].curil = cdata->cpu_interrupt_level; + cur_traptrace_ring[nextidx].start_time_abs = sabs; + cur_traptrace_ring[nextidx].duration = ~0ULL; + + rbtrace_bt(&cur_traptrace_ring[nextidx].backtrace[0], + MAX_TRACE_BTFRAMES - 1, cdata, frameptr, false); + + assert(nextidx <= 0xFFFF); + + return ((unsigned)cpu_num << 16) | nextidx; +} + +static inline void +traptrace_end(uint32_t index, uint64_t eabs) +{ + if (index != TRAPTRACE_INVALID_INDEX) { + traptrace_entry_t *ttentp = &traptrace_ring[index >> 16][index & 0xFFFF]; + + ttentp->duration = eabs - ttentp->start_time_abs; + } } + #endif /* DEVELOPMENT || DEBUG */ static inline void diff --git a/osfmk/i386/cpu_topology.c b/osfmk/i386/cpu_topology.c index 37ce39b2a..7396b1b5f 100644 --- a/osfmk/i386/cpu_topology.c +++ b/osfmk/i386/cpu_topology.c @@ -55,6 +55,7 @@ extern cpu_data_t cpshadows[]; #if DEVELOPMENT || DEBUG void iotrace_init(int ncpus); +void traptrace_init(int ncpus); #endif /* DEVELOPMENT || DEBUG */ @@ -151,6 +152,7 @@ cpu_topology_sort(int ncpus) #if DEVELOPMENT || DEBUG iotrace_init(ncpus); + traptrace_init(ncpus); #endif /* DEVELOPMENT || DEBUG */ /* @@ -316,65 +318,117 @@ int iotrace_entries_per_cpu = 0; int *iotrace_next; iotrace_entry_t **iotrace_ring; -void -init_iotrace_bufs(int cpucnt, int entries_per_cpu) +volatile int traptrace_enabled = 1; +int traptrace_generators = 0; +int traptrace_entries_per_cpu = 0; +int *traptrace_next; +traptrace_entry_t **traptrace_ring; + +static void +init_trace_bufs(int cpucnt, int entries_per_cpu, void ***ring, int entry_size, + int **next_array, int *allocated_entries_per_cpu, int *allocated_generator_count) { int i; - iotrace_next = kalloc_tag(cpucnt * sizeof(int), VM_KERN_MEMORY_DIAG); - if (__improbable(iotrace_next == NULL)) { - iotrace_generators = 0; + *next_array = kalloc_tag(cpucnt * sizeof(int), VM_KERN_MEMORY_DIAG); + if (__improbable(*next_array == NULL)) { + *allocated_generator_count = 0; return; } else { - bzero(iotrace_next, cpucnt * sizeof(int)); + bzero(*next_array, cpucnt * sizeof(int)); } - iotrace_ring = kalloc_tag(cpucnt * sizeof(iotrace_entry_t *), VM_KERN_MEMORY_DIAG); - if (__improbable(iotrace_ring == NULL)) { - kfree(iotrace_next, cpucnt * sizeof(int)); - iotrace_generators = 0; + *ring = kalloc_tag(cpucnt * sizeof(void *), VM_KERN_MEMORY_DIAG); + if (__improbable(*ring == NULL)) { + kfree(*next_array, cpucnt * sizeof(int)); + *next_array = NULL; + *allocated_generator_count = 0; return; } for (i = 0; i < cpucnt; i++) { - iotrace_ring[i] = kalloc_tag(entries_per_cpu * sizeof(iotrace_entry_t), VM_KERN_MEMORY_DIAG); - if (__improbable(iotrace_ring[i] == NULL)) { - kfree(iotrace_next, cpucnt * sizeof(int)); - iotrace_next = NULL; + (*ring)[i] = kalloc_tag(entries_per_cpu * entry_size, VM_KERN_MEMORY_DIAG); + if (__improbable((*ring)[i] == NULL)) { + kfree(*next_array, cpucnt * sizeof(int)); + *next_array = NULL; for (int j = 0; j < i; j++) { - kfree(iotrace_ring[j], entries_per_cpu * sizeof(iotrace_entry_t)); + kfree((*ring)[j], entries_per_cpu * entry_size); } - kfree(iotrace_ring, cpucnt * sizeof(iotrace_entry_t *)); - iotrace_ring = NULL; + kfree(*ring, cpucnt * sizeof(void *)); + *ring = NULL; return; } - bzero(iotrace_ring[i], entries_per_cpu * sizeof(iotrace_entry_t)); + bzero((*ring)[i], entries_per_cpu * entry_size); } - iotrace_entries_per_cpu = entries_per_cpu; - iotrace_generators = cpucnt; + *allocated_entries_per_cpu = entries_per_cpu; + *allocated_generator_count = cpucnt; } -void -iotrace_init(int ncpus) + +static void +init_iotrace_bufs(int cpucnt, int entries_per_cpu) { - int iot, epc; - int entries_per_cpu; + init_trace_bufs(cpucnt, entries_per_cpu, (void ***)&iotrace_ring, sizeof(iotrace_entry_t), + &iotrace_next, &iotrace_entries_per_cpu, &iotrace_generators); +} + +static void +init_traptrace_bufs(int cpucnt, int entries_per_cpu) +{ + init_trace_bufs(cpucnt, entries_per_cpu, (void ***)&traptrace_ring, sizeof(traptrace_entry_t), + &traptrace_next, &traptrace_entries_per_cpu, &traptrace_generators); +} - if (PE_parse_boot_argn("iotrace", &iot, sizeof(iot))) { - mmiotrace_enabled = iot; +static void +gentrace_configure_from_bootargs(const char *ena_prop, int *ena_valp, const char *epc_prop, + int *epcp, int max_epc, int def_epc, int override) +{ + if (kern_feature_override(override)) { + *ena_valp = 0; } - if (mmiotrace_enabled == 0) { + (void) PE_parse_boot_argn(ena_prop, ena_valp, sizeof(*ena_valp)); + + if (*ena_valp == 0) { return; } - if (PE_parse_boot_argn("iotrace_epc", &epc, sizeof(epc)) && - epc >= 1 && epc <= IOTRACE_MAX_ENTRIES_PER_CPU) { - entries_per_cpu = epc; - } else { - entries_per_cpu = DEFAULT_IOTRACE_ENTRIES_PER_CPU; + if (PE_parse_boot_argn(epc_prop, epcp, sizeof(*epcp)) && + (*epcp < 1 || *epcp > max_epc)) { + *epcp = def_epc; } +} + +void +iotrace_init(int ncpus) +{ + int entries_per_cpu = DEFAULT_IOTRACE_ENTRIES_PER_CPU; + int enable = mmiotrace_enabled; - init_iotrace_bufs(ncpus, entries_per_cpu); + gentrace_configure_from_bootargs("iotrace", &enable, "iotrace_epc", &entries_per_cpu, + IOTRACE_MAX_ENTRIES_PER_CPU, DEFAULT_IOTRACE_ENTRIES_PER_CPU, KF_IOTRACE_OVRD); + + mmiotrace_enabled = enable; + + if (mmiotrace_enabled) { + init_iotrace_bufs(ncpus, entries_per_cpu); + } } + +void +traptrace_init(int ncpus) +{ + int entries_per_cpu = DEFAULT_TRAPTRACE_ENTRIES_PER_CPU; + int enable = traptrace_enabled; + + gentrace_configure_from_bootargs("traptrace", &enable, "traptrace_epc", &entries_per_cpu, + TRAPTRACE_MAX_ENTRIES_PER_CPU, DEFAULT_TRAPTRACE_ENTRIES_PER_CPU, KF_TRAPTRACE_OVRD); + + traptrace_enabled = enable; + + if (traptrace_enabled) { + init_traptrace_bufs(ncpus, entries_per_cpu); + } +} + #endif /* DEVELOPMENT || DEBUG */ diff --git a/osfmk/i386/cpuid.c b/osfmk/i386/cpuid.c index 0fafb3aad..25a26de3b 100644 --- a/osfmk/i386/cpuid.c +++ b/osfmk/i386/cpuid.c @@ -213,6 +213,7 @@ static cpuid_cache_descriptor_t intel_cpuid_leaf2_descriptor_table[] = { sizeof(cpuid_cache_descriptor_t)) static void do_cwas(i386_cpu_info_t *cpuinfo, boolean_t on_slave); +static void cpuid_do_precpuid_was(void); static inline cpuid_cache_descriptor_t * cpuid_leaf2_find(uint8_t value) @@ -257,6 +258,7 @@ do_cwas(i386_cpu_info_t *cpuinfo, boolean_t on_slave) * enumerated, lest we #GP when forced to access it.) */ if (cpuid_wa_required(CPU_INTEL_TSXFA) == CWA_ON) { + /* This must be executed on all logical processors */ wrmsr64(MSR_IA32_TSX_FORCE_ABORT, rdmsr64(MSR_IA32_TSX_FORCE_ABORT) | MSR_IA32_TSXFA_RTM_FORCE_ABORT); } @@ -897,6 +899,9 @@ cpuid_set_info(void) i386_cpu_info_t *info_p = &cpuid_cpu_info; boolean_t enable_x86_64h = TRUE; + /* Perform pre-cpuid workarounds (since their effects impact values returned via cpuid) */ + cpuid_do_precpuid_was(); + cpuid_set_generic_info(info_p); /* verify we are running on a supported CPU */ @@ -1370,10 +1375,10 @@ cpuid_vmm_family(void) cwa_classifier_e cpuid_wa_required(cpu_wa_e wa) { + i386_cpu_info_t *info_p = &cpuid_cpu_info; static uint64_t bootarg_cpu_wa_enables = 0; static uint64_t bootarg_cpu_wa_disables = 0; static int bootargs_overrides_processed = 0; - i386_cpu_info_t *info_p = &cpuid_cpu_info; if (!bootargs_overrides_processed) { if (!PE_parse_boot_argn("cwae", &bootarg_cpu_wa_enables, sizeof(bootarg_cpu_wa_enables))) { @@ -1420,7 +1425,7 @@ cpuid_wa_required(cpu_wa_e wa) case CPU_INTEL_TSXFA: /* - * If this CPU supports RTM and supports FORCE_ABORT, return that + * Otherwise, if the CPU supports both TSX(HLE) and FORCE_ABORT, return that * the workaround should be enabled. */ if ((info_p->cpuid_leaf7_extfeatures & CPUID_LEAF7_EXTFEATURE_TSXFA) != 0 && @@ -1435,3 +1440,14 @@ cpuid_wa_required(cpu_wa_e wa) return CWA_OFF; } + +static void +cpuid_do_precpuid_was(void) +{ + /* + * Note that care must be taken not to use any data from the cached cpuid data since it is + * likely uninitialized at this point. That includes calling functions that make use of + * that data as well. + */ + +} diff --git a/osfmk/i386/cpuid.h b/osfmk/i386/cpuid.h index c80308084..a3a6ad6ee 100644 --- a/osfmk/i386/cpuid.h +++ b/osfmk/i386/cpuid.h @@ -476,7 +476,7 @@ typedef struct { typedef enum { CPU_INTEL_SEGCHK = 1, - CPU_INTEL_TSXFA + CPU_INTEL_TSXFA = 2 } cpu_wa_e; typedef enum { diff --git a/osfmk/i386/fpu.c b/osfmk/i386/fpu.c index 82ab4423f..b05c65b9a 100644 --- a/osfmk/i386/fpu.c +++ b/osfmk/i386/fpu.c @@ -1295,7 +1295,7 @@ fpextovrflt(void) intr = ml_set_interrupts_enabled(FALSE); if (get_interrupt_level()) { - panic("FPU segment overrun exception at interrupt context\n"); + panic("FPU segment overrun exception at interrupt context\n"); } if (current_task() == kernel_task) { panic("FPU segment overrun exception in kernel thread context\n"); @@ -1327,12 +1327,6 @@ fpextovrflt(void) if (ifps) { fp_state_free(ifps, xstate); } - - /* - * Raise exception. - */ - i386_exception(EXC_BAD_ACCESS, VM_PROT_READ | VM_PROT_EXECUTE, 0); - /*NOTREACHED*/ } extern void fpxlog(int, uint32_t, uint32_t, uint32_t); @@ -1369,16 +1363,6 @@ fpexterrflt(void) const uint32_t xcpt = ~mask & (ifps->fx_status & (FPS_IE | FPS_DE | FPS_ZE | FPS_OE | FPS_UE | FPS_PE)); fpxlog(EXC_I386_EXTERR, ifps->fx_status, ifps->fx_control, xcpt); - /* - * Raise FPU exception. - * Locking not needed on pcb->ifps, - * since thread is running. - */ - i386_exception(EXC_ARITHMETIC, - EXC_I386_EXTERR, - ifps->fx_status); - - /*NOTREACHED*/ } /* @@ -1473,11 +1457,6 @@ fpSSEexterrflt(void) const uint32_t xcpt = ~mask & (ifps->fx_MXCSR & (FPS_IE | FPS_DE | FPS_ZE | FPS_OE | FPS_UE | FPS_PE)); fpxlog(EXC_I386_SSEEXTERR, ifps->fx_MXCSR, ifps->fx_MXCSR, xcpt); - - i386_exception(EXC_ARITHMETIC, - EXC_I386_SSEEXTERR, - ifps->fx_MXCSR); - /*NOTREACHED*/ } @@ -1592,8 +1571,8 @@ fpu_thread_promote_avx512(thread_t thread) * return directly via thread_exception_return(). * Otherwise simply return. */ -#define MAX_X86_INSN_LENGTH (16) -void +#define MAX_X86_INSN_LENGTH (15) +int fpUDflt(user_addr_t rip) { uint8_t instruction_prefix; @@ -1605,7 +1584,7 @@ fpUDflt(user_addr_t rip) * rather than issue multiple copyins */ if (copyin(rip, (char *) &instruction_prefix, 1)) { - return; + return 1; } DBG("fpUDflt(0x%016llx) prefix: 0x%x\n", rip, instruction_prefix); @@ -1624,7 +1603,7 @@ fpUDflt(user_addr_t rip) /* Skip optional prefixes */ rip++; if ((rip - original_rip) > MAX_X86_INSN_LENGTH) { - return; + return 1; } break; case 0x62: /* EVEX */ @@ -1633,7 +1612,7 @@ fpUDflt(user_addr_t rip) is_AVX512_instruction = TRUE; break; default: - return; + return 1; } } while (!is_AVX512_instruction); @@ -1643,7 +1622,7 @@ fpUDflt(user_addr_t rip) * Fail if this machine doesn't support AVX512 */ if (fpu_capability != AVX512) { - return; + return 1; } assert(xgetbv(XCR0) == AVX_XMASK); @@ -1651,8 +1630,7 @@ fpUDflt(user_addr_t rip) DBG("fpUDflt() switching xstate to AVX512\n"); (void) fpu_thread_promote_avx512(current_thread()); - thread_exception_return(); - /* NOT REACHED */ + return 0; } #endif /* !defined(RC_HIDE_XNU_J137) */ diff --git a/osfmk/i386/fpu.h b/osfmk/i386/fpu.h index 7042cea10..542de23eb 100644 --- a/osfmk/i386/fpu.h +++ b/osfmk/i386/fpu.h @@ -143,7 +143,7 @@ extern void fpu_switch_addrmode( extern xstate_t fpu_default; extern xstate_t fpu_capability; extern xstate_t current_xstate(void); -extern void fpUDflt(user_addr_t rip); +extern int fpUDflt(user_addr_t rip); #ifdef MACH_KERNEL_PRIVATE extern uint32_t thread_fpsimd_hash(thread_t); extern void vzeroall(void); diff --git a/osfmk/i386/i386_init.c b/osfmk/i386/i386_init.c index f1b9f1e12..aef78bcbd 100644 --- a/osfmk/i386/i386_init.c +++ b/osfmk/i386/i386_init.c @@ -854,9 +854,6 @@ do_init_slave(boolean_t fast_restart) #endif /* update CPU microcode */ ucode_update_wake(); - - /* Do CPU workarounds after the microcode update */ - cpuid_do_was(); } else { init_param = FAST_SLAVE_INIT; } diff --git a/osfmk/i386/proc_reg.h b/osfmk/i386/proc_reg.h index cf5c384e4..022491a89 100644 --- a/osfmk/i386/proc_reg.h +++ b/osfmk/i386/proc_reg.h @@ -568,6 +568,7 @@ __END_DECLS #define MSR_IA32_BBL_CR_CTL 0x119 + #define MSR_IA32_SYSENTER_CS 0x174 #define MSR_IA32_SYSENTER_ESP 0x175 #define MSR_IA32_SYSENTER_EIP 0x176 diff --git a/osfmk/i386/trap.c b/osfmk/i386/trap.c index bfc24c4aa..4231b1f26 100644 --- a/osfmk/i386/trap.c +++ b/osfmk/i386/trap.c @@ -116,7 +116,6 @@ extern void kprint_state(x86_saved_state64_t *saved_state); /* * Forward declarations */ -static void user_page_fault_continue(kern_return_t kret); static void panic_trap(x86_saved_state64_t *saved_state, uint32_t pl, kern_return_t fault_result) __dead2; static void set_recovery_ip(x86_saved_state64_t *saved_state, vm_offset_t ip); @@ -206,36 +205,6 @@ thread_syscall_return( /*NOTREACHED*/ } - -static inline void -user_page_fault_continue( - kern_return_t kr) -{ - thread_t thread = current_thread(); - user_addr_t vaddr; - - if (thread_is_64bit_addr(thread)) { - x86_saved_state64_t *uregs; - - uregs = USER_REGS64(thread); - - vaddr = (user_addr_t)uregs->cr2; - } else { - x86_saved_state32_t *uregs; - - uregs = USER_REGS32(thread); - - vaddr = uregs->cr2; - } - - - /* PAL debug hook */ - pal_dbg_page_fault( thread, vaddr, kr ); - - i386_exception(EXC_BAD_ACCESS, kr, vaddr); - /*NOTREACHED*/ -} - /* * Fault recovery in copyin/copyout routines. */ @@ -374,6 +343,11 @@ interrupt(x86_saved_state_t *state) user_mode = TRUE; } +#if DEVELOPMENT || DEBUG + uint64_t frameptr = is_saved_state64(state) ? state64->rbp : saved_state32(state)->ebp; + uint32_t traptrace_index = traptrace_start(interrupt_num, rip, mach_absolute_time(), frameptr); +#endif + if (cpu_data_ptr[cnum]->lcpu.package->num_idle == topoParms.nLThreadsPerPackage) { cpu_data_ptr[cnum]->cpu_hwIntpexits[interrupt_num]++; } @@ -492,6 +466,12 @@ interrupt(x86_saved_state_t *state) interrupt_num); assert(ml_get_interrupts_enabled() == FALSE); + +#if DEVELOPMENT || DEBUG + if (traptrace_index != TRAPTRACE_INVALID_INDEX) { + traptrace_end(traptrace_index, mach_absolute_time()); + } +#endif } static inline void @@ -553,6 +533,10 @@ kernel_trap( is_user = (vaddr < VM_MAX_USER_PAGE_ADDRESS); +#if DEVELOPMENT || DEBUG + uint32_t traptrace_index = traptrace_start(type, kern_ip, mach_absolute_time(), saved_state->rbp); +#endif + #if CONFIG_DTRACE /* * Is there a DTrace hook? @@ -562,7 +546,7 @@ kernel_trap( /* * If it succeeds, we are done... */ - return; + goto common_return; } } #endif /* CONFIG_DTRACE */ @@ -578,7 +562,8 @@ kernel_trap( KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_EXCP_KTRAP_x86, type)) | DBG_FUNC_NONE, 0, 0, 0, VM_KERNEL_UNSLIDE(kern_ip), 0); - return; + + goto common_return; } user_addr_t kd_vaddr = is_user ? vaddr : VM_KERNEL_UNSLIDE(vaddr); @@ -675,19 +660,19 @@ kernel_trap( switch (type) { case T_NO_FPU: fpnoextflt(); - return; + goto common_return; case T_FPU_FAULT: fpextovrflt(); - return; + goto common_return; case T_FLOATING_POINT_ERROR: fpexterrflt(); - return; + goto common_return; case T_SSE_FLOAT_ERROR: fpSSEexterrflt(); - return; + goto common_return; case T_INVALID_OPCODE: fpUDflt(kern_ip); @@ -701,7 +686,7 @@ kernel_trap( * This isn't supposed to happen. */ reset_dr7(); - return; + goto common_return; } goto debugger_entry; case T_INT3: @@ -745,7 +730,7 @@ kernel_trap( (void) ml_set_interrupts_enabled(intr); } #endif /* NCOPY_WINDOWS > 0 */ - return; + goto common_return; } /* * fall through @@ -762,7 +747,7 @@ FALL_THROUGH: for (rp = recover_table; rp < recover_table_end; rp++) { if (kern_ip == rp->fault_addr) { set_recovery_ip(saved_state, rp->recover_addr); - return; + goto common_return; } } @@ -772,7 +757,7 @@ FALL_THROUGH: if (thread != THREAD_NULL && thread->recover) { set_recovery_ip(saved_state, thread->recover); thread->recover = 0; - return; + goto common_return; } /* * Unanticipated page-fault errors in kernel @@ -787,7 +772,7 @@ FALL_THROUGH: */ if (type == 15) { kprintf("kernel_trap() ignoring spurious trap 15\n"); - return; + goto common_return; } debugger_entry: /* Ensure that the i386_kernel_state at the base of the @@ -798,7 +783,7 @@ debugger_entry: sync_iss_to_iks(state); #if MACH_KDP if (kdp_i386_trap(type, saved_state, result, (vm_offset_t)vaddr)) { - return; + goto common_return; } #endif } @@ -807,6 +792,14 @@ debugger_entry: /* * NO RETURN */ + +common_return: +#if DEVELOPMENT || DEBUG + if (traptrace_index != TRAPTRACE_INVALID_INDEX) { + traptrace_end(traptrace_index, mach_absolute_time()); + } +#endif + return; } static void @@ -907,7 +900,9 @@ user_trap( kern_return_t kret; user_addr_t rip; unsigned long dr6 = 0; /* 32 bit for i386, 64 bit for x86_64 */ - +#if DEVELOPMENT || DEBUG + uint32_t traptrace_index; +#endif assert((is_saved_state32(saved_state) && !thread_is_64bit_addr(thread)) || (is_saved_state64(saved_state) && thread_is_64bit_addr(thread))); @@ -923,6 +918,9 @@ user_trap( err = (int)regs->isf.err & 0xffff; vaddr = (user_addr_t)regs->cr2; rip = (user_addr_t)regs->isf.rip; +#if DEVELOPMENT || DEBUG + traptrace_index = traptrace_start(type, rip, mach_absolute_time(), regs->rbp); +#endif } else { x86_saved_state32_t *regs; @@ -935,8 +933,12 @@ user_trap( err = regs->err & 0xffff; vaddr = (user_addr_t)regs->cr2; rip = (user_addr_t)regs->eip; +#if DEVELOPMENT || DEBUG + traptrace_index = traptrace_start(type, rip, mach_absolute_time(), regs->ebp); +#endif } + if ((type == T_DEBUG) && thread->machine.ids) { unsigned long clear = 0; /* Stash and clear this processor's DR6 value, in the event @@ -1023,20 +1025,25 @@ user_trap( break; case T_INVALID_OPCODE: -#if !defined(RC_HIDE_XNU_J137) - fpUDflt(rip); /* May return from exception directly */ -#endif - exc = EXC_BAD_INSTRUCTION; - code = EXC_I386_INVOP; + if (fpUDflt(rip) == 1) { + exc = EXC_BAD_INSTRUCTION; + code = EXC_I386_INVOP; + } break; case T_NO_FPU: fpnoextflt(); - return; + break; case T_FPU_FAULT: - fpextovrflt(); /* Propagates exception directly, doesn't return */ - return; + fpextovrflt(); + /* + * Raise exception. + */ + exc = EXC_BAD_ACCESS; + code = VM_PROT_READ | VM_PROT_EXECUTE; + subcode = 0; + break; case T_INVALID_TSS: /* invalid TSS == iret with NT flag set */ exc = EXC_BAD_INSTRUCTION; @@ -1114,30 +1121,37 @@ user_trap( } #endif if (__probable((kret == KERN_SUCCESS) || (kret == KERN_ABORTED))) { - thread_exception_return(); - /*NOTREACHED*/ - } - - /* - * For a user trap, vm_fault() should never return KERN_FAILURE. - * If it does, we're leaking preemption disables somewhere in the kernel. - */ - if (__improbable(kret == KERN_FAILURE)) { + break; + } else if (__improbable(kret == KERN_FAILURE)) { + /* + * For a user trap, vm_fault() should never return KERN_FAILURE. + * If it does, we're leaking preemption disables somewhere in the kernel. + */ panic("vm_fault() KERN_FAILURE from user fault on thread %p", thread); } - user_page_fault_continue(kret); - } /* NOTREACHED */ + /* PAL debug hook (empty on x86) */ + pal_dbg_page_fault(thread, vaddr, kret); + exc = EXC_BAD_ACCESS; + code = kret; + subcode = vaddr; + } break; case T_SSE_FLOAT_ERROR: - fpSSEexterrflt(); /* Propagates exception directly, doesn't return */ - return; + fpSSEexterrflt(); + exc = EXC_ARITHMETIC; + code = EXC_I386_SSEEXTERR; + subcode = ((struct x86_fx_thread_state *)thread->machine.ifps)->fx_MXCSR; + break; case T_FLOATING_POINT_ERROR: - fpexterrflt(); /* Propagates exception directly, doesn't return */ - return; + fpexterrflt(); + exc = EXC_ARITHMETIC; + code = EXC_I386_EXTERR; + subcode = ((struct x86_fx_thread_state *)thread->machine.ifps)->fx_status; + break; case T_DTRACE_RET: #if CONFIG_DTRACE @@ -1156,11 +1170,21 @@ user_trap( default: panic("Unexpected user trap, type %d", type); } - /* Note: Codepaths that directly return from user_trap() have pending - * ASTs processed in locore - */ - i386_exception(exc, code, subcode); - /* NOTREACHED */ + +#if DEVELOPMENT || DEBUG + if (traptrace_index != TRAPTRACE_INVALID_INDEX) { + traptrace_end(traptrace_index, mach_absolute_time()); + } +#endif + + if (exc != 0) { + /* + * Note: Codepaths that directly return from user_trap() have pending + * ASTs processed in locore + */ + i386_exception(exc, code, subcode); + /* NOTREACHED */ + } } /* diff --git a/osfmk/i386/trap_native.c b/osfmk/i386/trap_native.c index b5613be39..da7e55e20 100644 --- a/osfmk/i386/trap_native.c +++ b/osfmk/i386/trap_native.c @@ -171,11 +171,19 @@ panic_64(x86_saved_state_t *sp, __unused int pc, __unused const char *msg, boole void panic_double_fault64(x86_saved_state_t *sp) { +#if DEVELOPMENT || DEBUG + uint64_t frameptr = is_saved_state64(sp) ? saved_state64(sp)->rbp : saved_state32(sp)->ebp; + (void) traptrace_start(T_DOUBLE_FAULT, saved_state64(sp)->isf.rip, mach_absolute_time(), frameptr); +#endif (void)OSCompareAndSwap((UInt32) - 1, (UInt32) cpu_number(), (volatile UInt32 *)&panic_double_fault_cpu); panic_64(sp, PANIC_DOUBLE_FAULT, "Double fault", FALSE); } void panic_machine_check64(x86_saved_state_t *sp) { +#if DEVELOPMENT || DEBUG + uint64_t frameptr = is_saved_state64(sp) ? saved_state64(sp)->rbp : saved_state32(sp)->ebp; + (void) traptrace_start(T_MACHINE_CHECK, saved_state64(sp)->isf.rip, mach_absolute_time(), frameptr); +#endif panic_64(sp, PANIC_MACHINE_CHECK, "Machine Check", TRUE); } diff --git a/osfmk/i386/ucode.c b/osfmk/i386/ucode.c index a9e9a12f5..139250617 100644 --- a/osfmk/i386/ucode.c +++ b/osfmk/i386/ucode.c @@ -252,8 +252,6 @@ xcpu_update(void) cpu_apply_microcode(); /* Update the cpuid info */ ucode_cpuid_set_info(); - /* Now apply workarounds */ - cpuid_do_was(); mp_enable_preemption(); /* Get all other CPUs to perform the update */ diff --git a/osfmk/ipc/ipc_mqueue.c b/osfmk/ipc/ipc_mqueue.c index 4bec21084..360879748 100644 --- a/osfmk/ipc/ipc_mqueue.c +++ b/osfmk/ipc/ipc_mqueue.c @@ -120,15 +120,19 @@ static void ipc_mqueue_peek_on_thread( */ void ipc_mqueue_init( - ipc_mqueue_t mqueue, - boolean_t is_set) + ipc_mqueue_t mqueue, + ipc_mqueue_kind_t kind) { - if (is_set) { + switch (kind) { + case IPC_MQUEUE_KIND_SET: waitq_set_init(&mqueue->imq_set_queue, SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST, NULL, NULL); - } else { - waitq_init(&mqueue->imq_wait_queue, SYNC_POLICY_FIFO | SYNC_POLICY_PORT); + break; + case IPC_MQUEUE_KIND_NONE: /* cheat: we really should have "no" mqueue */ + case IPC_MQUEUE_KIND_PORT: + waitq_init(&mqueue->imq_wait_queue, + SYNC_POLICY_FIFO | SYNC_POLICY_TURNSTILE_PROXY); ipc_kmsg_queue_init(&mqueue->imq_messages); mqueue->imq_seqno = 0; mqueue->imq_msgcount = 0; @@ -138,6 +142,7 @@ ipc_mqueue_init( #if MACH_FLIPC mqueue->imq_fport = FPORT_NULL; #endif + break; } klist_init(&mqueue->imq_klist); } @@ -1147,7 +1152,7 @@ ipc_mqueue_receive_on_thread( imq_unlock(port_mq); return THREAD_NOT_WAITING; } - } else if (imq_is_queue(mqueue)) { + } else if (imq_is_queue(mqueue) || imq_is_turnstile_proxy(mqueue)) { ipc_kmsg_queue_t kmsgs; /* @@ -1199,8 +1204,7 @@ ipc_mqueue_receive_on_thread( } /* - * Threads waiting on a special reply port - * (not portset or regular ports) + * Threads waiting on a reply port (not portset) * will wait on its receive turnstile. * * Donate waiting thread's turnstile and @@ -1217,7 +1221,7 @@ ipc_mqueue_receive_on_thread( * will be converted to to turnstile waitq * in waitq_assert_wait instead of global waitqs. */ - if (imq_is_queue(mqueue) && ip_from_mq(mqueue)->ip_specialreply) { + if (imq_is_turnstile_proxy(mqueue)) { ipc_port_t port = ip_from_mq(mqueue); rcv_turnstile = turnstile_prepare((uintptr_t)port, port_rcv_turnstile_address(port), diff --git a/osfmk/ipc/ipc_mqueue.h b/osfmk/ipc/ipc_mqueue.h index 4e6fb3240..f982ba677 100644 --- a/osfmk/ipc/ipc_mqueue.h +++ b/osfmk/ipc/ipc_mqueue.h @@ -159,6 +159,8 @@ typedef struct ipc_mqueue { #define imq_set_queue data.pset.setq #define imq_is_set(mq) waitqs_is_set(&(mq)->imq_set_queue) #define imq_is_queue(mq) waitq_is_queue(&(mq)->imq_wait_queue) +#define imq_is_turnstile_proxy(mq) \ + waitq_is_turnstile_proxy(&(mq)->imq_wait_queue) #define imq_is_valid(mq) waitq_is_valid(&(mq)->imq_wait_queue) #define imq_unlock(mq) waitq_unlock(&(mq)->imq_wait_queue) @@ -199,10 +201,16 @@ extern int ipc_mqueue_full; * Exported interfaces */ +__enum_closed_decl(ipc_mqueue_kind_t, int, { + IPC_MQUEUE_KIND_NONE, /* this mqueue really isn't used */ + IPC_MQUEUE_KIND_PORT, /* this queue is a regular port queue */ + IPC_MQUEUE_KIND_SET, /* this queue is a portset queue */ +}); + /* Initialize a newly-allocated message queue */ extern void ipc_mqueue_init( ipc_mqueue_t mqueue, - boolean_t is_set); + ipc_mqueue_kind_t kind); /* de-initialize / cleanup an mqueue (specifically waitq resources) */ extern void ipc_mqueue_deinit( diff --git a/osfmk/ipc/ipc_notify.c b/osfmk/ipc/ipc_notify.c index f677c6e28..9744b2b62 100644 --- a/osfmk/ipc/ipc_notify.c +++ b/osfmk/ipc/ipc_notify.c @@ -158,7 +158,7 @@ void ipc_notify_send_once( ipc_port_t port) { - ipc_port_adjust_special_reply_port(port, IPC_PORT_ADJUST_RESET_BOOSTRAP_CHECKIN, FALSE); + ipc_port_adjust_special_reply_port(port, IPC_PORT_ADJUST_RESET_BOOSTRAP_CHECKIN); (void)mach_notify_send_once(port); /* send-once right consumed */ diff --git a/osfmk/ipc/ipc_port.c b/osfmk/ipc/ipc_port.c index cd8c04b81..b8cddf28a 100644 --- a/osfmk/ipc/ipc_port.c +++ b/osfmk/ipc/ipc_port.c @@ -638,6 +638,7 @@ void ipc_port_init( ipc_port_t port, ipc_space_t space, + ipc_port_init_flags_t flags, mach_port_name_t name) { /* port->ip_kobject doesn't have to be initialized */ @@ -648,6 +649,10 @@ ipc_port_init( port->ip_mscount = 0; port->ip_srights = 0; port->ip_sorights = 0; + if (flags & IPC_PORT_INIT_MAKE_SEND_RIGHT) { + port->ip_srights = 1; + port->ip_mscount = 1; + } port->ip_nsrequest = IP_NULL; port->ip_pdrequest = IP_NULL; @@ -669,17 +674,19 @@ ipc_port_init( port->ip_immovable_send = 0; port->ip_impcount = 0; - port->ip_specialreply = 0; + port->ip_specialreply = (flags & IPC_PORT_INIT_SPECIAL_REPLY) != 0; port->ip_sync_link_state = PORT_SYNC_LINK_ANY; port->ip_sync_bootstrap_checkin = 0; - port->ip_watchport_elem = NULL; ipc_special_reply_port_bits_reset(port); port->ip_send_turnstile = TURNSTILE_NULL; - ipc_mqueue_init(&port->ip_messages, - FALSE /* !set */); + ipc_mqueue_kind_t kind = IPC_MQUEUE_KIND_NONE; + if (flags & IPC_PORT_INIT_MESSAGE_QUEUE) { + kind = IPC_MQUEUE_KIND_PORT; + } + ipc_mqueue_init(&port->ip_messages, kind); } /* @@ -699,7 +706,7 @@ ipc_port_init( kern_return_t ipc_port_alloc( ipc_space_t space, - bool make_send_right, + ipc_port_init_flags_t flags, mach_port_name_t *namep, ipc_port_t *portp) { @@ -714,7 +721,7 @@ ipc_port_alloc( ipc_port_callstack_init_debug(&buf[0], IP_CALLSTACK_MAX); #endif /* MACH_ASSERT */ - if (make_send_right) { + if (flags & IPC_PORT_INIT_MAKE_SEND_RIGHT) { type |= MACH_PORT_TYPE_SEND; urefs = 1; } @@ -725,13 +732,7 @@ ipc_port_alloc( } /* port and space are locked */ - ipc_port_init(port, space, name); - - if (make_send_right) { - /* ipc_object_alloc() already made the entry reference */ - port->ip_srights++; - port->ip_mscount++; - } + ipc_port_init(port, space, flags, name); #if MACH_ASSERT ipc_port_init_debug(port, &buf[0], IP_CALLSTACK_MAX); @@ -763,19 +764,25 @@ ipc_port_alloc( kern_return_t ipc_port_alloc_name( ipc_space_t space, + ipc_port_init_flags_t flags, mach_port_name_t name, ipc_port_t *portp) { ipc_port_t port; kern_return_t kr; + mach_port_type_t type = MACH_PORT_TYPE_RECEIVE; + mach_port_urefs_t urefs = 0; #if MACH_ASSERT uintptr_t buf[IP_CALLSTACK_MAX]; ipc_port_callstack_init_debug(&buf[0], IP_CALLSTACK_MAX); #endif /* MACH_ASSERT */ - kr = ipc_object_alloc_name(space, IOT_PORT, - MACH_PORT_TYPE_RECEIVE, 0, + if (flags & IPC_PORT_INIT_MAKE_SEND_RIGHT) { + type |= MACH_PORT_TYPE_SEND; + urefs = 1; + } + kr = ipc_object_alloc_name(space, IOT_PORT, type, urefs, name, (ipc_object_t *) &port); if (kr != KERN_SUCCESS) { return kr; @@ -783,7 +790,7 @@ ipc_port_alloc_name( /* port is locked */ - ipc_port_init(port, space, name); + ipc_port_init(port, space, flags, name); #if MACH_ASSERT ipc_port_init_debug(port, &buf[0], IP_CALLSTACK_MAX); @@ -985,7 +992,7 @@ ipc_port_destroy(ipc_port_t port) if (special_reply) { ipc_port_adjust_special_reply_port(port, - IPC_PORT_ADJUST_SR_ALLOW_SYNC_LINKAGE, FALSE); + IPC_PORT_ADJUST_SR_ALLOW_SYNC_LINKAGE); } if (watchport_elem) { @@ -1058,7 +1065,7 @@ ipc_port_destroy(ipc_port_t port) /* unlink the kmsg from special reply port */ if (special_reply) { ipc_port_adjust_special_reply_port(port, - IPC_PORT_ADJUST_SR_ALLOW_SYNC_LINKAGE, FALSE); + IPC_PORT_ADJUST_SR_ALLOW_SYNC_LINKAGE); } /* throw away no-senders request */ @@ -1310,6 +1317,36 @@ not_circular: #endif /* !IMPORTANCE_INHERITANCE */ } +/* + * Routine: ipc_port_watchport_elem + * Purpose: + * Get the port's watchport elem field + * + * Conditions: + * mqueue locked + */ +static struct task_watchport_elem * +ipc_port_watchport_elem(ipc_port_t port) +{ + return port->ip_messages.imq_wait_queue.waitq_tspriv; +} + +/* + * Routine: ipc_port_update_watchport_elem + * Purpose: + * Set the port's watchport elem field + * + * Conditions: + * mqueue locked + */ +static inline struct task_watchport_elem * +ipc_port_update_watchport_elem(ipc_port_t port, struct task_watchport_elem *we) +{ + struct task_watchport_elem *old_we = ipc_port_watchport_elem(port); + port->ip_messages.imq_wait_queue.waitq_tspriv = we; + return old_we; +} + /* * Update the recv turnstile inheritor for a port. * @@ -1414,7 +1451,7 @@ ipc_port_send_update_inheritor( port->ip_destination != NULL) { /* Case 2. */ inheritor = port_send_turnstile(port->ip_destination); - } else if (port->ip_watchport_elem != NULL) { + } else if (ipc_port_watchport_elem(port) != NULL) { /* Case 3. */ if (prioritize_launch) { assert(port->ip_sync_link_state == PORT_SYNC_LINK_ANY); @@ -1539,35 +1576,7 @@ ipc_port_send_turnstile_complete(ipc_port_t port) static struct turnstile * ipc_port_rcv_turnstile(ipc_port_t port) { - return turnstile_lookup_by_proprietor((uintptr_t)port, TURNSTILE_SYNC_IPC); -} - - -/* - * Routine: ipc_port_rcv_turnstile_waitq - * Purpose: - * Given the mqueue's waitq, find the port's - * rcv turnstile and return its waitq. - * - * Conditions: - * mqueue locked or thread waiting on turnstile is locked. - */ -struct waitq * -ipc_port_rcv_turnstile_waitq(struct waitq *waitq) -{ - struct waitq *safeq; - - ipc_mqueue_t mqueue = imq_from_waitq(waitq); - ipc_port_t port = ip_from_mq(mqueue); - struct turnstile *rcv_turnstile = ipc_port_rcv_turnstile(port); - - /* Check if the port has a rcv turnstile */ - if (rcv_turnstile != TURNSTILE_NULL) { - safeq = &rcv_turnstile->ts_waitq; - } else { - safeq = global_eventq(waitq); - } - return safeq; + return *port_rcv_turnstile_address(port); } @@ -1702,11 +1711,15 @@ ipc_port_adjust_special_reply_port_locked( turnstile_inheritor_t inheritor = TURNSTILE_INHERITOR_NULL; struct turnstile *ts = TURNSTILE_NULL; - assert(special_reply_port->ip_specialreply); - ip_lock_held(special_reply_port); // ip_sync_link_state is touched imq_lock(&special_reply_port->ip_messages); + if (!special_reply_port->ip_specialreply) { + // only mach_msg_receive_results_complete() calls this with any port + assert(get_turnstile); + goto not_special; + } + if (flags & IPC_PORT_ADJUST_SR_RECEIVED_MSG) { ipc_special_reply_port_msg_sent_reset(special_reply_port); } @@ -1721,6 +1734,7 @@ ipc_port_adjust_special_reply_port_locked( /* Check if the special reply port is marked non-special */ if (special_reply_port->ip_sync_link_state == PORT_SYNC_LINK_ANY) { +not_special: if (get_turnstile) { turnstile_complete((uintptr_t)special_reply_port, port_rcv_turnstile_address(special_reply_port), NULL, TURNSTILE_SYNC_IPC); @@ -1822,18 +1836,12 @@ ipc_port_adjust_special_reply_port_locked( */ void ipc_port_adjust_special_reply_port( - ipc_port_t special_reply_port, - uint8_t flags, - boolean_t get_turnstile) + ipc_port_t port, + uint8_t flags) { - if (special_reply_port->ip_specialreply) { - ip_lock(special_reply_port); - ipc_port_adjust_special_reply_port_locked(special_reply_port, NULL, - flags, get_turnstile); - /* special_reply_port unlocked */ - } - if (get_turnstile) { - assert(current_thread()->turnstile != TURNSTILE_NULL); + if (port->ip_specialreply) { + ip_lock(port); + ipc_port_adjust_special_reply_port_locked(port, NULL, flags, FALSE); } } @@ -1988,8 +1996,7 @@ ipc_port_add_watchport_elem_locked( ipc_port_adjust_sync_link_state_locked(port, PORT_SYNC_LINK_ANY, NULL); } - *old_elem = port->ip_watchport_elem; - port->ip_watchport_elem = watchport_elem; + *old_elem = ipc_port_update_watchport_elem(port, watchport_elem); ipc_port_send_turnstile_recompute_push_locked(port); /* port and mqueue unlocked */ @@ -2015,7 +2022,7 @@ ipc_port_clear_watchport_elem_internal_conditional_locked( ip_lock_held(port); imq_held(&port->ip_messages); - if (port->ip_watchport_elem != watchport_elem) { + if (ipc_port_watchport_elem(port) != watchport_elem) { imq_unlock(&port->ip_messages); ip_unlock(port); return KERN_FAILURE; @@ -2047,13 +2054,13 @@ ipc_port_replace_watchport_elem_conditional_locked( ip_lock_held(port); imq_held(&port->ip_messages); - if (port->ip_watchport_elem != old_watchport_elem) { + if (ipc_port_watchport_elem(port) != old_watchport_elem) { imq_unlock(&port->ip_messages); ip_unlock(port); return KERN_FAILURE; } - port->ip_watchport_elem = new_watchport_elem; + ipc_port_update_watchport_elem(port, new_watchport_elem); ipc_port_send_turnstile_recompute_push_locked(port); /* port and mqueue unlocked */ return KERN_SUCCESS; @@ -2073,15 +2080,10 @@ struct task_watchport_elem * ipc_port_clear_watchport_elem_internal( ipc_port_t port) { - struct task_watchport_elem *watchport_elem; - ip_lock_held(port); imq_held(&port->ip_messages); - watchport_elem = port->ip_watchport_elem; - port->ip_watchport_elem = NULL; - - return watchport_elem; + return ipc_port_update_watchport_elem(port, NULL); } /* @@ -2129,7 +2131,7 @@ ipc_port_get_watchport_inheritor( ipc_port_t port) { imq_held(&port->ip_messages); - return port->ip_watchport_elem->twe_task->watchports->tw_thread; + return ipc_port_watchport_elem(port)->twe_task->watchports->tw_thread; } /* @@ -2638,7 +2640,7 @@ ipc_port_release_sonce( return; } - ipc_port_adjust_special_reply_port(port, IPC_PORT_ADJUST_RESET_BOOSTRAP_CHECKIN, FALSE); + ipc_port_adjust_special_reply_port(port, IPC_PORT_ADJUST_RESET_BOOSTRAP_CHECKIN); ip_lock(port); @@ -2697,7 +2699,8 @@ ipc_port_release_receive( ipc_port_t ipc_port_alloc_special( - ipc_space_t space) + ipc_space_t space, + ipc_port_init_flags_t flags) { ipc_port_t port; @@ -2716,7 +2719,7 @@ ipc_port_alloc_special( port->ip_references = 1; port->ip_object.io_bits = io_makebits(TRUE, IOT_PORT, 0); - ipc_port_init(port, space, 1); + ipc_port_init(port, space, flags, 1); #if MACH_ASSERT ipc_port_init_debug(port, &buf[0], IP_CALLSTACK_MAX); @@ -2777,7 +2780,9 @@ ipc_port_finalize( ipc_port_request_t requests = port->ip_requests; assert(port_send_turnstile(port) == TURNSTILE_NULL); - assert(ipc_port_rcv_turnstile(port) == TURNSTILE_NULL); + if (imq_is_turnstile_proxy(&port->ip_messages)) { + assert(ipc_port_rcv_turnstile(port) == TURNSTILE_NULL); + } if (ip_active(port)) { panic("Trying to free an active port. port %p", port); diff --git a/osfmk/ipc/ipc_port.h b/osfmk/ipc/ipc_port.h index 3fccd2460..413139c0e 100644 --- a/osfmk/ipc/ipc_port.h +++ b/osfmk/ipc/ipc_port.h @@ -108,6 +108,8 @@ * taken when the port was destroyed. */ +struct task_watchport_elem; + typedef unsigned int ipc_port_timestamp_t; struct ipc_port { @@ -141,8 +143,6 @@ struct ipc_port { struct turnstile *send_turnstile; } kdata2; - struct task_watchport_elem *ip_watchport_elem; - mach_vm_address_t ip_context; natural_t ip_sprequests:1, /* send-possible requests outstanding */ @@ -213,7 +213,7 @@ MACRO_END (IP_PREALLOC(port) ? &((port)->ip_premsg->ikm_turnstile) : &((port)->ip_send_turnstile)) #define port_rcv_turnstile_address(port) \ - (NULL) + &(port)->ip_messages.imq_wait_queue.waitq_ts /* @@ -472,22 +472,31 @@ extern boolean_t ipc_port_clear_receiver( ipc_port_t port, boolean_t should_destroy); +__options_decl(ipc_port_init_flags_t, uint32_t, { + IPC_PORT_INIT_NONE = 0x00000000, + IPC_PORT_INIT_MAKE_SEND_RIGHT = 0x00000001, + IPC_PORT_INIT_MESSAGE_QUEUE = 0x00000002, + IPC_PORT_INIT_SPECIAL_REPLY = 0x00000004, +}); + /* Initialize a newly-allocated port */ extern void ipc_port_init( ipc_port_t port, ipc_space_t space, + ipc_port_init_flags_t flags, mach_port_name_t name); /* Allocate a port */ extern kern_return_t ipc_port_alloc( ipc_space_t space, - bool make_send_right, + ipc_port_init_flags_t flags, mach_port_name_t *namep, ipc_port_t *portp); /* Allocate a port, with a specific name */ extern kern_return_t ipc_port_alloc_name( ipc_space_t space, + ipc_port_init_flags_t flags, mach_port_name_t name, ipc_port_t *portp); @@ -559,8 +568,7 @@ ipc_port_adjust_sync_link_state_locked( void ipc_port_adjust_special_reply_port( ipc_port_t special_reply_port, - uint8_t flags, - boolean_t get_turnstile); + uint8_t flags); void ipc_port_adjust_port_locked( @@ -686,7 +694,8 @@ extern void ipc_port_finalize( /* Allocate a port in a special space */ extern ipc_port_t ipc_port_alloc_special( - ipc_space_t space); + ipc_space_t space, + ipc_port_init_flags_t flags); /* Deallocate a port in a special space */ extern void ipc_port_dealloc_special( @@ -711,12 +720,12 @@ extern void ipc_port_send_update_inheritor(ipc_port_t port, turnstile_update_flags_t flags); #define ipc_port_alloc_kernel() \ - ipc_port_alloc_special(ipc_space_kernel) + ipc_port_alloc_special(ipc_space_kernel, IPC_PORT_INIT_NONE) #define ipc_port_dealloc_kernel(port) \ ipc_port_dealloc_special((port), ipc_space_kernel) #define ipc_port_alloc_reply() \ - ipc_port_alloc_special(ipc_space_reply) + ipc_port_alloc_special(ipc_space_reply, IPC_PORT_INIT_MESSAGE_QUEUE) #define ipc_port_dealloc_reply(port) \ ipc_port_dealloc_special((port), ipc_space_reply) diff --git a/osfmk/ipc/ipc_pset.c b/osfmk/ipc/ipc_pset.c index 523c49660..e73364b48 100644 --- a/osfmk/ipc/ipc_pset.c +++ b/osfmk/ipc/ipc_pset.c @@ -110,7 +110,7 @@ ipc_pset_alloc( } /* pset and space are locked */ - ipc_mqueue_init(&pset->ips_messages, TRUE /* set */); + ipc_mqueue_init(&pset->ips_messages, IPC_MQUEUE_KIND_SET); is_write_unlock(space); *namep = name; @@ -149,7 +149,7 @@ ipc_pset_alloc_name( } /* pset is locked */ - ipc_mqueue_init(&pset->ips_messages, TRUE /* set */); + ipc_mqueue_init(&pset->ips_messages, IPC_MQUEUE_KIND_SET); *psetp = pset; return KERN_SUCCESS; @@ -186,7 +186,7 @@ ipc_pset_alloc_special( pset->ips_references = 1; pset->ips_object.io_bits = io_makebits(TRUE, IOT_PORT_SET, 0); - ipc_mqueue_init(&pset->ips_messages, TRUE /* set */); + ipc_mqueue_init(&pset->ips_messages, IPC_MQUEUE_KIND_SET); return pset; } diff --git a/osfmk/ipc/mach_msg.c b/osfmk/ipc/mach_msg.c index e4a901230..f367c8e00 100644 --- a/osfmk/ipc/mach_msg.c +++ b/osfmk/ipc/mach_msg.c @@ -667,7 +667,7 @@ mach_msg_receive_results_complete(ipc_object_t object) { thread_t self = current_thread(); ipc_port_t port = IPC_PORT_NULL; - boolean_t get_turnstile = self->turnstile ? FALSE : TRUE; + boolean_t get_turnstile = (self->turnstile == TURNSTILE_NULL); if (io_otype(object) == IOT_PORT) { port = ip_object_to_port(object); @@ -689,8 +689,12 @@ mach_msg_receive_results_complete(ipc_object_t object) flags |= IPC_PORT_ADJUST_SR_RECEIVED_MSG; } - ipc_port_adjust_special_reply_port(port, - flags, get_turnstile); + if (port->ip_specialreply || get_turnstile) { + ip_lock(port); + ipc_port_adjust_special_reply_port_locked(port, NULL, + flags, get_turnstile); + } + assert(self->turnstile != TURNSTILE_NULL); /* thread now has a turnstile */ } diff --git a/osfmk/ipc/mach_port.c b/osfmk/ipc/mach_port.c index 9f4d8b677..af47faa37 100644 --- a/osfmk/ipc/mach_port.c +++ b/osfmk/ipc/mach_port.c @@ -725,9 +725,11 @@ mach_port_allocate_internal( } if (qosp->name) { - kr = ipc_port_alloc_name(space, *namep, &port); + kr = ipc_port_alloc_name(space, IPC_PORT_INIT_MESSAGE_QUEUE, + *namep, &port); } else { - kr = ipc_port_alloc(space, FALSE, namep, &port); + kr = ipc_port_alloc(space, IPC_PORT_INIT_MESSAGE_QUEUE, + namep, &port); } if (kr == KERN_SUCCESS) { if (kmsg != IKM_NULL) { @@ -2499,14 +2501,18 @@ mach_port_construct( { kern_return_t kr; ipc_port_t port; + ipc_port_init_flags_t init_flags = IPC_PORT_INIT_MESSAGE_QUEUE; if (space == IS_NULL) { return KERN_INVALID_TASK; } + if (options->flags & MPO_INSERT_SEND_RIGHT) { + init_flags |= IPC_PORT_INIT_MAKE_SEND_RIGHT; + } + /* Allocate a new port in the IPC space */ - kr = ipc_port_alloc(space, (options->flags & MPO_INSERT_SEND_RIGHT), - name, &port); + kr = ipc_port_alloc(space, init_flags, name, &port); if (kr != KERN_SUCCESS) { return kr; } diff --git a/osfmk/kern/backtrace.c b/osfmk/kern/backtrace.c index 82daadce6..59667c828 100644 --- a/osfmk/kern/backtrace.c +++ b/osfmk/kern/backtrace.c @@ -329,7 +329,7 @@ backtrace_thread_user(void *thread, uintptr_t *bt, unsigned int max_frames, assert(ml_get_interrupts_enabled() == TRUE); if (!ml_get_interrupts_enabled()) { - return EINVAL; + goto out; } union { @@ -349,7 +349,7 @@ backtrace_thread_user(void *thread, uintptr_t *bt, unsigned int max_frames, if (thread != current_thread()) { map = get_task_map_reference(get_threadtask(thread)); if (map == NULL) { - return EINVAL; + goto out; } old_map = vm_map_switch(map); } else { diff --git a/osfmk/kern/debug.h b/osfmk/kern/debug.h index 57effae1c..981eb3619 100644 --- a/osfmk/kern/debug.h +++ b/osfmk/kern/debug.h @@ -271,6 +271,8 @@ enum { #define KF_STACKSHOT_OVRD (0x10) #define KF_COMPRSV_OVRD (0x20) #define KF_INTERRUPT_MASKED_DEBUG_OVRD (0x40) +#define KF_TRAPTRACE_OVRD (0x80) +#define KF_IOTRACE_OVRD (0x100) boolean_t kern_feature_override(uint32_t fmask); diff --git a/osfmk/kern/host.c b/osfmk/kern/host.c index e336fcc09..a4a617d32 100644 --- a/osfmk/kern/host.c +++ b/osfmk/kern/host.c @@ -1278,6 +1278,10 @@ kernel_set_special_port(host_priv_t host_priv, int id, ipc_port_t port) host_lock(host_priv); old_port = host_priv->special[id]; + if ((id == HOST_AMFID_PORT) && (task_pid(current_task()) != 1)) { + host_unlock(host_priv); + return KERN_NO_ACCESS; + } host_priv->special[id] = port; host_unlock(host_priv); diff --git a/osfmk/kern/ipc_tt.c b/osfmk/kern/ipc_tt.c index 7d0384cf2..8af9d9cb2 100644 --- a/osfmk/kern/ipc_tt.c +++ b/osfmk/kern/ipc_tt.c @@ -868,7 +868,8 @@ mach_reply_port( mach_port_name_t name; kern_return_t kr; - kr = ipc_port_alloc(current_task()->itk_space, FALSE, &name, &port); + kr = ipc_port_alloc(current_task()->itk_space, IPC_PORT_INIT_MESSAGE_QUEUE, + &name, &port); if (kr == KERN_SUCCESS) { ip_unlock(port); } else { @@ -897,6 +898,8 @@ thread_get_special_reply_port( mach_port_name_t name; kern_return_t kr; thread_t thread = current_thread(); + ipc_port_init_flags_t flags = IPC_PORT_INIT_MESSAGE_QUEUE | + IPC_PORT_INIT_MAKE_SEND_RIGHT | IPC_PORT_INIT_SPECIAL_REPLY; /* unbind the thread special reply port */ if (IP_VALID(thread->ith_special_reply_port)) { @@ -906,7 +909,7 @@ thread_get_special_reply_port( } } - kr = ipc_port_alloc(current_task()->itk_space, TRUE, &name, &port); + kr = ipc_port_alloc(current_task()->itk_space, flags, &name, &port); if (kr == KERN_SUCCESS) { ipc_port_bind_special_reply_port_locked(port); ip_unlock(port); @@ -932,11 +935,11 @@ ipc_port_bind_special_reply_port_locked( { thread_t thread = current_thread(); assert(thread->ith_special_reply_port == NULL); + assert(port->ip_specialreply); + assert(port->ip_sync_link_state == PORT_SYNC_LINK_ANY); ip_reference(port); thread->ith_special_reply_port = port; - port->ip_specialreply = 1; - port->ip_sync_link_state = PORT_SYNC_LINK_ANY; port->ip_messages.imq_srp_owner_thread = thread; ipc_special_reply_port_bits_reset(port); @@ -1386,6 +1389,8 @@ mach_ports_lookup( return KERN_SUCCESS; } +extern zone_t task_zone; + kern_return_t task_conversion_eval(task_t caller, task_t victim) { @@ -1409,6 +1414,8 @@ task_conversion_eval(task_t caller, task_t victim) return KERN_INVALID_SECURITY; } + zone_require(victim, task_zone); + #if CONFIG_EMBEDDED /* * On embedded platforms, only a platform binary can resolve the task port diff --git a/osfmk/kern/kalloc.c b/osfmk/kern/kalloc.c index ffe8d7658..26176bc36 100644 --- a/osfmk/kern/kalloc.c +++ b/osfmk/kern/kalloc.c @@ -192,8 +192,8 @@ KALLOC_ZINFO_SFREE(vm_size_t bytes) * 6144 N N N * 8192 Y N N * 12288 N X X - * 16384 N N N - * 32768 X N N + * 16384 N X N + * 32768 X X N * */ static const struct kalloc_zone_config { @@ -266,8 +266,15 @@ static const struct kalloc_zone_config { KZC_ENTRY(4096, false), KZC_ENTRY(6144, false), KZC_ENTRY(8192, false), + /* To limit internal fragmentation, only add the following zones if the + * page size is greater than 4K. + * Note that we use ARM_PGBYTES here (instead of one of the VM macros) + * since it's guaranteed to be a compile time constant. + */ +#if ARM_PGBYTES > 4096 KZC_ENTRY(16384, false), KZC_ENTRY(32768, false), +#endif /* ARM_PGBYTES > 4096 */ #else #error missing or invalid zone size parameters for kalloc diff --git a/osfmk/kern/kpc_thread.c b/osfmk/kern/kpc_thread.c index aa8edd434..a2f1fe6a0 100644 --- a/osfmk/kern/kpc_thread.c +++ b/osfmk/kern/kpc_thread.c @@ -116,7 +116,7 @@ kpc_set_thread_counting(uint32_t classes) /* and schedule an AST for this thread... */ if (!current_thread()->kpc_buf) { - current_thread()->kperf_flags |= T_KPC_ALLOC; + current_thread()->kperf_ast |= T_KPC_ALLOC; act_set_kperf(current_thread()); } } @@ -150,7 +150,7 @@ kpc_update_thread_counters( thread_t thread ) /* schedule any necessary allocations */ if (!current_thread()->kpc_buf) { - current_thread()->kperf_flags |= T_KPC_ALLOC; + current_thread()->kperf_ast |= T_KPC_ALLOC; act_set_kperf(current_thread()); } @@ -234,12 +234,10 @@ kpc_thread_destroy(thread_t thread) kpc_counterbuf_free(buf); } -/* ast callback on a thread */ void -kpc_thread_ast_handler( thread_t thread ) +kpc_thread_ast_handler(thread_t thread) { - /* see if we want an alloc */ - if (thread->kperf_flags & T_KPC_ALLOC) { + if (thread->kperf_ast & T_KPC_ALLOC) { thread->kpc_buf = kpc_counterbuf_alloc(); } } diff --git a/osfmk/kern/ledger.c b/osfmk/kern/ledger.c index a0d925872..e905ee666 100644 --- a/osfmk/kern/ledger.c +++ b/osfmk/kern/ledger.c @@ -448,7 +448,7 @@ ledger_instantiate(ledger_template_t template, int entry_type) le->le_credit = 0; le->le_debit = 0; le->le_limit = LEDGER_LIMIT_INFINITY; - le->le_warn_level = LEDGER_LIMIT_INFINITY; + le->le_warn_percent = LEDGER_PERCENT_NONE; le->_le.le_refill.le_refill_period = 0; le->_le.le_refill.le_last_refill = 0; } @@ -521,7 +521,8 @@ warn_level_exceeded(struct ledger_entry *le) * use positive limits. */ balance = le->le_credit - le->le_debit; - if ((le->le_warn_level != LEDGER_LIMIT_INFINITY) && (balance > le->le_warn_level)) { + if (le->le_warn_percent != LEDGER_PERCENT_NONE && + ((balance > (le->le_limit * le->le_warn_percent) >> 16))) { return 1; } return 0; @@ -987,9 +988,9 @@ ledger_set_limit(ledger_t ledger, int entry, ledger_amount_t limit, assert(warn_level_percentage <= 100); assert(limit > 0); /* no negative limit support for warnings */ assert(limit != LEDGER_LIMIT_INFINITY); /* warn % without limit makes no sense */ - le->le_warn_level = (le->le_limit * warn_level_percentage) / 100; + le->le_warn_percent = warn_level_percentage * (1u << 16) / 100; } else { - le->le_warn_level = LEDGER_LIMIT_INFINITY; + le->le_warn_percent = LEDGER_PERCENT_NONE; } return KERN_SUCCESS; @@ -1145,12 +1146,12 @@ ledger_disable_callback(ledger_t ledger, int entry) } /* - * le_warn_level is used to indicate *if* this ledger has a warning configured, + * le_warn_percent is used to indicate *if* this ledger has a warning configured, * in addition to what that warning level is set to. * This means a side-effect of ledger_disable_callback() is that the * warning level is forgotten. */ - ledger->l_entries[entry].le_warn_level = LEDGER_LIMIT_INFINITY; + ledger->l_entries[entry].le_warn_percent = LEDGER_PERCENT_NONE; flag_clear(&ledger->l_entries[entry].le_flags, LEDGER_ACTION_CALLBACK); return KERN_SUCCESS; } diff --git a/osfmk/kern/ledger.h b/osfmk/kern/ledger.h index 9be77bb0c..e3a2ec2e6 100644 --- a/osfmk/kern/ledger.h +++ b/osfmk/kern/ledger.h @@ -72,8 +72,9 @@ struct ledger_template_info { */ struct ledger_entry { volatile uint32_t le_flags; +#define LEDGER_PERCENT_NONE UINT16_MAX + uint16_t le_warn_percent; ledger_amount_t le_limit; - ledger_amount_t le_warn_level; volatile ledger_amount_t le_credit __attribute__((aligned(8))); volatile ledger_amount_t le_debit __attribute__((aligned(8))); union { diff --git a/osfmk/kern/mach_node.c b/osfmk/kern/mach_node.c index c4e8347f1..d0d03cf62 100644 --- a/osfmk/kern/mach_node.c +++ b/osfmk/kern/mach_node.c @@ -302,14 +302,14 @@ mach_node_register(mach_node_t node) proxy_space->is_node_id = nid; /* Create the bootstrap proxy port for this remote node */ - bs_port = ipc_port_alloc_special(proxy_space); + bs_port = ipc_port_alloc_special(proxy_space, IPC_PORT_INIT_MESSAGE_QUEUE); if (bs_port == MACH_PORT_NULL) { kr = KERN_RESOURCE_SHORTAGE; goto out; } /* Create the control (ack) port for this remote node */ - ack_port = ipc_port_alloc_special(proxy_space); + ack_port = ipc_port_alloc_special(proxy_space, IPC_PORT_INIT_MESSAGE_QUEUE); if (ack_port == MACH_PORT_NULL) { kr = KERN_RESOURCE_SHORTAGE; goto out; diff --git a/osfmk/kern/sched_prim.c b/osfmk/kern/sched_prim.c index c59175da0..c312e0b4e 100644 --- a/osfmk/kern/sched_prim.c +++ b/osfmk/kern/sched_prim.c @@ -6267,3 +6267,10 @@ sysctl_task_get_no_smt(void) return '0'; } #endif /* DEVELOPMENT || DEBUG */ + + +__private_extern__ void +thread_bind_cluster_type(char cluster_type) +{ + (void)cluster_type; +} diff --git a/osfmk/kern/sched_prim.h b/osfmk/kern/sched_prim.h index 2f806bdd0..880e84960 100644 --- a/osfmk/kern/sched_prim.h +++ b/osfmk/kern/sched_prim.h @@ -454,6 +454,8 @@ __BEGIN_DECLS #ifdef XNU_KERNEL_PRIVATE +extern void thread_bind_cluster_type(char cluster_type); + /* Toggles a global override to turn off CPU Throttling */ extern void sys_override_cpu_throttle(boolean_t enable_override); diff --git a/osfmk/kern/startup.c b/osfmk/kern/startup.c index d01037df8..917ae6db5 100644 --- a/osfmk/kern/startup.c +++ b/osfmk/kern/startup.c @@ -214,7 +214,6 @@ extern int serverperfmode; unsigned int new_nkdbufs = 0; unsigned int wake_nkdbufs = 0; unsigned int write_trace_on_panic = 0; -static char trace_typefilter[64] = { 0 }; unsigned int trace_wrap = 0; boolean_t trace_serial = FALSE; boolean_t early_boot_complete = FALSE; @@ -269,7 +268,6 @@ kernel_bootstrap(void) PE_parse_boot_argn("trace", &new_nkdbufs, sizeof(new_nkdbufs)); PE_parse_boot_argn("trace_wake", &wake_nkdbufs, sizeof(wake_nkdbufs)); PE_parse_boot_argn("trace_panic", &write_trace_on_panic, sizeof(write_trace_on_panic)); - PE_parse_boot_arg_str("trace_typefilter", trace_typefilter, sizeof(trace_typefilter)); PE_parse_boot_argn("trace_wrap", &trace_wrap, sizeof(trace_wrap)); scale_setup(); @@ -556,6 +554,9 @@ kernel_bootstrap_thread(void) kernel_bootstrap_thread_log("ktrace_init"); ktrace_init(); + char trace_typefilter[256] = {}; + PE_parse_boot_arg_str("trace_typefilter", trace_typefilter, + sizeof(trace_typefilter)); kdebug_init(new_nkdbufs, trace_typefilter, trace_wrap); #ifdef MACH_BSD diff --git a/osfmk/kern/task.c b/osfmk/kern/task.c index 0374456e1..ebd6c2bb2 100644 --- a/osfmk/kern/task.c +++ b/osfmk/kern/task.c @@ -1640,6 +1640,7 @@ task_create_internal( #if __arm64__ new_task->task_legacy_footprint = FALSE; new_task->task_extra_footprint_limit = FALSE; + new_task->task_ios13extended_footprint_limit = FALSE; #endif /* __arm64__ */ new_task->task_region_footprint = FALSE; new_task->task_has_crossed_thread_limit = FALSE; @@ -7312,6 +7313,7 @@ task_set_exc_guard_behavior( #if __arm64__ extern int legacy_footprint_entitlement_mode; extern void memorystatus_act_on_legacy_footprint_entitlement(proc_t, boolean_t); +extern void memorystatus_act_on_ios13extended_footprint_entitlement(proc_t); void task_set_legacy_footprint( @@ -7330,11 +7332,30 @@ task_set_extra_footprint_limit( return; } task_lock(task); - if (!task->task_extra_footprint_limit) { - memorystatus_act_on_legacy_footprint_entitlement(task->bsd_info, TRUE); - task->task_extra_footprint_limit = TRUE; + if (task->task_extra_footprint_limit) { + task_unlock(task); + return; + } + task->task_extra_footprint_limit = TRUE; + task_unlock(task); + memorystatus_act_on_legacy_footprint_entitlement(task->bsd_info, TRUE); +} + +void +task_set_ios13extended_footprint_limit( + task_t task) +{ + if (task->task_ios13extended_footprint_limit) { + return; + } + task_lock(task); + if (task->task_ios13extended_footprint_limit) { + task_unlock(task); + return; } + task->task_ios13extended_footprint_limit = TRUE; task_unlock(task); + memorystatus_act_on_ios13extended_footprint_entitlement(task->bsd_info); } #endif /* __arm64__ */ diff --git a/osfmk/kern/task.h b/osfmk/kern/task.h index 1db8fb09d..e47bb217c 100644 --- a/osfmk/kern/task.h +++ b/osfmk/kern/task.h @@ -427,6 +427,7 @@ struct task { #if __arm64__ unsigned int task_legacy_footprint:1; unsigned int task_extra_footprint_limit:1; + unsigned int task_ios13extended_footprint_limit:1; #endif /* __arm64__ */ unsigned int task_region_footprint:1; unsigned int task_has_crossed_thread_limit:1; @@ -1004,6 +1005,7 @@ extern boolean_t task_get_darkwake_mode(task_t); #if __arm64__ extern void task_set_legacy_footprint(task_t task); extern void task_set_extra_footprint_limit(task_t task); +extern void task_set_ios13extended_footprint_limit(task_t task); #endif /* __arm64__ */ #if CONFIG_MACF diff --git a/osfmk/kern/thread.c b/osfmk/kern/thread.c index f554222b1..e26f4dced 100644 --- a/osfmk/kern/thread.c +++ b/osfmk/kern/thread.c @@ -356,7 +356,7 @@ thread_bootstrap(void) #endif /* CONFIG_DTRACE */ #if KPERF - thread_template.kperf_flags = 0; + thread_template.kperf_ast = 0; thread_template.kperf_pet_gen = 0; thread_template.kperf_c_switch = 0; thread_template.kperf_pet_cnt = 0; diff --git a/osfmk/kern/thread.h b/osfmk/kern/thread.h index 833466376..7242faac7 100644 --- a/osfmk/kern/thread.h +++ b/osfmk/kern/thread.h @@ -178,9 +178,8 @@ struct thread { struct priority_queue_entry wait_prioq_links; /* priority ordered waitq links */ }; - processor_t runq; /* run queue assignment */ - event64_t wait_event; /* wait queue event */ + processor_t runq; /* run queue assignment */ struct waitq *waitq; /* wait queue this thread is enqueued on */ struct turnstile *turnstile; /* thread's turnstile, protected by primitives interlock */ void *inheritor; /* inheritor of the primitive the thread will block on */ @@ -204,7 +203,7 @@ struct thread { decl_simple_lock_data(, sched_lock); /* scheduling lock (thread_lock()) */ decl_simple_lock_data(, wake_lock); /* for thread stop / wait (wake_lock()) */ #endif - integer_t options; /* options set by thread itself */ + uint16_t options; /* options set by thread itself */ #define TH_OPT_INTMASK 0x0003 /* interrupt / abort level */ #define TH_OPT_VMPRIV 0x0004 /* may allocate reserved memory */ #define TH_OPT_SYSTEM_CRITICAL 0x0010 /* Thread must always be allowed to run - even under heavy load */ @@ -217,8 +216,8 @@ struct thread { #define TH_OPT_SEND_IMPORTANCE 0x0800 /* Thread will allow importance donation from kernel rpc */ #define TH_OPT_ZONE_GC 0x1000 /* zone_gc() called on this thread */ - boolean_t wake_active; /* wake event on stop */ - int at_safe_point; /* thread_abort_safely allowed */ + bool wake_active; /* wake event on stop */ + bool at_safe_point; /* thread_abort_safely allowed */ ast_t reason; /* why we blocked */ uint32_t quantum_remaining; wait_result_t wait_result; /* outcome of wait - @@ -349,9 +348,8 @@ struct thread { uint64_t safe_release; /* when to release fail-safe */ /* Call out from scheduler */ - void (*sched_call)( - int type, - thread_t thread); + void (*sched_call)(int type, thread_t thread); + #if defined(CONFIG_SCHED_PROTO) uint32_t runqueue_generation; /* last time runqueue was drained */ #endif @@ -388,18 +386,16 @@ struct thread { uint64_t wait_sfi_begin_time; /* start time for thread waiting in SFI */ #endif - /* Timed wait expiration */ - timer_call_data_t wait_timer; - integer_t wait_timer_active; - boolean_t wait_timer_is_set; - - /* * Processor/cache affinity * - affinity_threads links task threads with the same affinity set */ - affinity_set_t affinity_set; queue_chain_t affinity_threads; + affinity_set_t affinity_set; + +#if CONFIG_EMBEDDED + task_watch_t * taskwatch; /* task watch */ +#endif /* CONFIG_EMBEDDED */ /* Various bits of state to stash across a continuation, exclusive to the current thread block point */ union { @@ -407,7 +403,7 @@ struct thread { mach_msg_return_t state; /* receive state */ mach_port_seqno_t seqno; /* seqno of recvd message */ ipc_object_t object; /* object received on */ - mach_vm_address_t msg_addr; /* receive buffer pointer */ + vm_address_t msg_addr; /* receive buffer pointer */ mach_msg_size_t rsize; /* max size for recvd msg */ mach_msg_size_t msize; /* actual size for recvd msg */ mach_msg_option_t option; /* options for receive */ @@ -463,26 +459,28 @@ struct thread { struct ipc_kmsg_queue ith_messages; /* messages to reap */ mach_port_t ith_rpc_reply; /* reply port for kernel RPCs */ + /* Pending thread ast(s) */ + ast_t ast; + /* Ast/Halt data structures */ - vm_offset_t recover; /* page fault recover(copyin/out) */ + vm_offset_t recover; /* page fault recover(copyin/out) */ queue_chain_t threads; /* global list of all threads */ /* Activation */ - queue_chain_t task_threads; + queue_chain_t task_threads; /* Task membership */ struct task *task; vm_map_t map; #if DEVELOPMENT || DEBUG - boolean_t pmap_footprint_suspended; + bool pmap_footprint_suspended; #endif /* DEVELOPMENT || DEBUG */ - decl_lck_mtx_data(, mutex); - - - /* Pending thread ast(s) */ - ast_t ast; + /* Timed wait expiration */ + timer_call_data_t wait_timer; + uint16_t wait_timer_active; + bool wait_timer_is_set; /* Miscellaneous bits guarded by mutex */ uint32_t @@ -495,6 +493,8 @@ struct thread { corpse_dup:1, /* TRUE when thread is an inactive duplicate in a corpse */ :0; + decl_lck_mtx_data(, mutex); + /* Ports associated with this thread */ struct ipc_port *ith_self; /* not a right, doesn't hold ref */ struct ipc_port *ith_sself; /* a send right */ @@ -528,15 +528,21 @@ struct thread { #define T_KPERF_CALLSTACK_DEPTH_OFFSET (24) #define T_KPERF_SET_CALLSTACK_DEPTH(DEPTH) (((uint32_t)(DEPTH)) << T_KPERF_CALLSTACK_DEPTH_OFFSET) #define T_KPERF_GET_CALLSTACK_DEPTH(FLAGS) ((FLAGS) >> T_KPERF_CALLSTACK_DEPTH_OFFSET) +#define T_KPERF_ACTIONID_OFFSET (18) +#define T_KPERF_SET_ACTIONID(AID) (((uint32_t)(AID)) << T_KPERF_ACTIONID_OFFSET) +#define T_KPERF_GET_ACTIONID(FLAGS) ((FLAGS) >> T_KPERF_ACTIONID_OFFSET) #endif -#define T_KPERF_AST_CALLSTACK (1U << 0) /* dump a callstack on thread's next AST */ -#define T_KPERF_AST_DISPATCH (1U << 1) /* dump a name on thread's next AST */ -#define T_KPC_ALLOC (1U << 2) /* thread needs a kpc_buf allocated */ -/* only go up to T_KPERF_CALLSTACK_DEPTH_OFFSET - 1 */ +#define T_KPERF_AST_CALLSTACK 0x1 /* dump a callstack on thread's next AST */ +#define T_KPERF_AST_DISPATCH 0x2 /* dump a name on thread's next AST */ +#define T_KPC_ALLOC 0x4 /* thread needs a kpc_buf allocated */ + +#define T_KPERF_AST_ALL \ + (T_KPERF_AST_CALLSTACK | T_KPERF_AST_DISPATCH | T_KPC_ALLOC) +/* only go up to T_KPERF_ACTIONID_OFFSET - 1 */ #ifdef KPERF - uint32_t kperf_flags; + uint32_t kperf_ast; uint32_t kperf_pet_gen; /* last generation of PET that sampled this thread*/ uint32_t kperf_c_switch; /* last dispatch detection */ uint32_t kperf_pet_cnt; /* how many times a thread has been sampled by PET */ @@ -552,8 +558,6 @@ struct thread { void *hv_thread_target; #endif /* HYPERVISOR */ - uint64_t thread_id; /*system wide unique thread-id*/ - /* Statistics accumulated per-thread and aggregated per-task */ uint32_t syscalls_unix; uint32_t syscalls_mach; @@ -563,6 +567,8 @@ struct thread { uint64_t t_deduct_bank_ledger_time; /* cpu time to be deducted from bank ledger */ uint64_t t_deduct_bank_ledger_energy; /* energy to be deducted from bank ledger */ + uint64_t thread_id; /*system wide unique thread-id*/ + #if MONOTONIC struct mt_thread t_monotonic; #endif /* MONOTONIC */ @@ -584,16 +590,12 @@ struct thread { } *overrides; uint32_t kevent_overrides; - uint16_t user_promotion_basepri; - uint16_t kern_promotion_schedpri; + uint8_t user_promotion_basepri; + uint8_t kern_promotion_schedpri; _Atomic uint16_t kevent_ast_bits; io_stat_info_t thread_io_stats; /* per-thread I/O statistics */ -#if CONFIG_EMBEDDED - task_watch_t * taskwatch; /* task watch */ -#endif /* CONFIG_EMBEDDED */ - uint32_t thread_callout_interrupt_wakeups; uint32_t thread_callout_platform_idle_wakeups; uint32_t thread_timer_wakeups_bin_1; diff --git a/osfmk/kern/timer.h b/osfmk/kern/timer.h index 2ac35e414..eeb8eebdb 100644 --- a/osfmk/kern/timer.h +++ b/osfmk/kern/timer.h @@ -86,6 +86,12 @@ extern int precise_user_kernel_time; * Definitions for high resolution timers. */ +#if __LP64__ +#define TIMER_ALIGNMENT +#else +#define TIMER_ALIGNMENT __attribute__((packed, aligned(4))) +#endif + struct timer { uint64_t tstamp; #if defined(__LP64__) @@ -96,7 +102,7 @@ struct timer { uint32_t high_bits; uint32_t high_bits_check; #endif /* !defined(__LP64__) */ -}; +} TIMER_ALIGNMENT; typedef struct timer timer_data_t, *timer_t; diff --git a/osfmk/kern/turnstile.c b/osfmk/kern/turnstile.c index 6375a3704..e2b189dcb 100644 --- a/osfmk/kern/turnstile.c +++ b/osfmk/kern/turnstile.c @@ -1896,9 +1896,8 @@ thread_get_waiting_turnstile(thread_t thread) return turnstile; } - /* Get the safeq if the waitq is a port queue */ - if (waitq_is_port_queue(waitq)) { - waitq = waitq_get_safeq(waitq); + if (waitq_is_turnstile_proxy(waitq)) { + return waitq->waitq_ts; } /* Check if the waitq is a turnstile queue */ @@ -1952,8 +1951,11 @@ thread_get_update_flags_for_turnstile_propagation_stoppage(thread_t thread) } /* Get the safeq if the waitq is a port queue */ - if (waitq_is_port_queue(waitq)) { - waitq = waitq_get_safeq(waitq); + if (waitq_is_turnstile_proxy(waitq)) { + if (waitq->waitq_ts) { + return TSU_NO_PRI_CHANGE_NEEDED; + } + return TSU_NO_TURNSTILE; } /* Check if the waitq is a turnstile queue */ diff --git a/osfmk/kern/waitq.c b/osfmk/kern/waitq.c index 2348ef572..e408f029b 100644 --- a/osfmk/kern/waitq.c +++ b/osfmk/kern/waitq.c @@ -1809,19 +1809,29 @@ waitq_irq_safe(struct waitq *waitq) return waitq->waitq_irq; } -struct waitq * -waitq_get_safeq(struct waitq *waitq) +static inline bool +waitq_empty(struct waitq *wq) { - struct waitq *safeq; + if (waitq_is_turnstile_queue(wq)) { + return priority_queue_empty(&wq->waitq_prio_queue); + } else if (waitq_is_turnstile_proxy(wq)) { + struct turnstile *ts = wq->waitq_ts; + return ts == TURNSTILE_NULL || + priority_queue_empty(&ts->ts_waitq.waitq_prio_queue); + } else { + return queue_empty(&wq->waitq_queue); + } +} +static struct waitq * +waitq_get_safeq(struct waitq *waitq) +{ /* Check if it's a port waitq */ - if (waitq_is_port_queue(waitq)) { - assert(!waitq_irq_safe(waitq)); - safeq = ipc_port_rcv_turnstile_waitq(waitq); - } else { - safeq = global_eventq(waitq); + if (waitq_is_turnstile_proxy(waitq)) { + struct turnstile *ts = waitq->waitq_ts; + return ts ? &ts->ts_waitq : NULL; } - return safeq; + return global_eventq(waitq); } static uint32_t @@ -2387,6 +2397,15 @@ do_waitq_select_n_locked(struct waitq_select_args *args) /* JMM - add flag to waitq to avoid global lookup if no waiters */ eventmask = _CAST_TO_EVENT_MASK(waitq); safeq = waitq_get_safeq(waitq); + if (safeq == NULL) { + /* + * in the WQT_TSPROXY case, if there's no turnstile, + * there's no queue and no waiters, so we can move straight + * to the waitq set recursion + */ + goto handle_waitq_set; + } + if (*nthreads == 0) { spl = splsched(); } @@ -2464,6 +2483,7 @@ do_waitq_select_n_locked(struct waitq_select_args *args) return; } +handle_waitq_set: /* * wait queues that are not in any sets * are the bottom of the recursion @@ -2678,13 +2698,22 @@ waitq_select_thread_locked(struct waitq *waitq, kern_return_t kr; spl_t s; - s = splsched(); - /* Find and lock the interrupts disabled queue the thread is actually on */ if (!waitq_irq_safe(waitq)) { safeq = waitq_get_safeq(waitq); + if (safeq == NULL) { + /* + * in the WQT_TSPROXY case, if there's no turnstile, + * there's no queue and no waiters, so we can move straight + * to the waitq set recursion + */ + goto handle_waitq_set; + } + + s = splsched(); waitq_lock(safeq); } else { + s = splsched(); safeq = waitq; } @@ -2709,6 +2738,7 @@ waitq_select_thread_locked(struct waitq *waitq, splx(s); +handle_waitq_set: if (!waitq->waitq_set_id) { return KERN_NOT_WAITING; } @@ -2819,6 +2849,10 @@ waitq_assert_wait64_locked(struct waitq *waitq, */ if (!waitq_irq_safe(waitq)) { safeq = waitq_get_safeq(waitq); + if (__improbable(safeq == NULL)) { + panic("Trying to assert_wait on a turnstile proxy " + "that hasn't been donated one (waitq: %p)", waitq); + } eventmask = _CAST_TO_EVENT_MASK(waitq); waitq_lock(safeq); } else { @@ -2922,6 +2956,10 @@ waitq_pull_thread_locked(struct waitq *waitq, thread_t thread) /* Find the interrupts disabled queue thread is waiting on */ if (!waitq_irq_safe(waitq)) { safeq = waitq_get_safeq(waitq); + if (__improbable(safeq == NULL)) { + panic("Trying to clear_wait on a turnstile proxy " + "that hasn't been donated one (waitq: %p)", waitq); + } } else { safeq = waitq; } @@ -3246,8 +3284,12 @@ waitq_init(struct waitq *waitq, int policy) waitq->waitq_fifo = ((policy & SYNC_POLICY_REVERSED) == 0); waitq->waitq_irq = !!(policy & SYNC_POLICY_DISABLE_IRQ); waitq->waitq_prepost = 0; - waitq->waitq_type = WQT_QUEUE; - waitq->waitq_turnstile_or_port = !!(policy & SYNC_POLICY_TURNSTILE); + if (policy & SYNC_POLICY_TURNSTILE_PROXY) { + waitq->waitq_type = WQT_TSPROXY; + } else { + waitq->waitq_type = WQT_QUEUE; + } + waitq->waitq_turnstile = !!(policy & SYNC_POLICY_TURNSTILE); waitq->waitq_eventmask = 0; waitq->waitq_set_id = 0; @@ -3259,6 +3301,9 @@ waitq_init(struct waitq *waitq, int policy) priority_queue_init(&waitq->waitq_prio_queue, PRIORITY_QUEUE_BUILTIN_MAX_HEAP); assert(waitq->waitq_fifo == 0); + } else if (policy & SYNC_POLICY_TURNSTILE_PROXY) { + waitq->waitq_ts = TURNSTILE_NULL; + waitq->waitq_tspriv = NULL; } else { queue_init(&waitq->waitq_queue); } @@ -3343,7 +3388,12 @@ waitq_deinit(struct waitq *waitq) { spl_t s; - if (!waitq || !waitq_is_queue(waitq)) { + assert(waitq); + if (!waitq_is_valid(waitq)) { + return; + } + + if (!waitq_is_queue(waitq) && !waitq_is_turnstile_proxy(waitq)) { return; } @@ -3351,25 +3401,33 @@ waitq_deinit(struct waitq *waitq) s = splsched(); } waitq_lock(waitq); - if (!waitq_valid(waitq)) { - waitq_unlock(waitq); - if (waitq_irq_safe(waitq)) { - splx(s); + + if (waitq_valid(waitq)) { + waitq->waitq_isvalid = 0; + if (!waitq_irq_safe(waitq)) { + waitq_unlink_all_unlock(waitq); + /* waitq unlocked and set links deallocated */ + goto out; } - return; } - waitq->waitq_isvalid = 0; - - if (!waitq_irq_safe(waitq)) { - waitq_unlink_all_unlock(waitq); - /* waitq unlocked and set links deallocated */ - } else { - waitq_unlock(waitq); + waitq_unlock(waitq); + if (waitq_irq_safe(waitq)) { splx(s); } - assert(waitq_empty(waitq)); +out: +#if MACH_ASSERT + if (waitq_is_turnstile_queue(waitq)) { + assert(priority_queue_empty(&waitq->waitq_prio_queue)); + } else if (waitq_is_turnstile_proxy(waitq)) { + assert(waitq->waitq_ts == TURNSTILE_NULL); + } else { + assert(queue_empty(&waitq->waitq_queue)); + } +#else + (void)0; +#endif // MACH_ASSERT } void diff --git a/osfmk/kern/waitq.h b/osfmk/kern/waitq.h index 2d8975733..efbdcc883 100644 --- a/osfmk/kern/waitq.h +++ b/osfmk/kern/waitq.h @@ -101,6 +101,7 @@ typedef enum e_waitq_lock_state { enum waitq_type { WQT_INVALID = 0, + WQT_TSPROXY = 0x1, WQT_QUEUE = 0x2, WQT_SET = 0x3, }; @@ -141,7 +142,7 @@ struct waitq { waitq_prepost:1, /* waitq supports prepost? */ waitq_irq:1, /* waitq requires interrupts disabled */ waitq_isvalid:1, /* waitq structure is valid */ - waitq_turnstile_or_port:1, /* waitq is embedded in a turnstile (if irq safe), or port (if not irq safe) */ + waitq_turnstile:1, /* waitq is embedded in a turnstile */ waitq_eventmask:_EVENT_MASK_BITS; /* the wait queue set (set-of-sets) to which this queue belongs */ #if __arm64__ @@ -153,8 +154,12 @@ struct waitq { uint64_t waitq_set_id; uint64_t waitq_prepost_id; union { - queue_head_t waitq_queue; /* queue of elements */ - struct priority_queue waitq_prio_queue; /* priority ordered queue of elements */ + queue_head_t waitq_queue; /* queue of elements */ + struct priority_queue waitq_prio_queue; /* priority ordered queue of elements */ + struct { + struct turnstile *waitq_ts; /* turnstile for WQT_TSPROXY */ + void *waitq_tspriv; /* private field for clients use */ + }; }; }; @@ -184,11 +189,11 @@ extern void waitq_bootstrap(void); #define waitq_is_queue(wq) \ ((wq)->waitq_type == WQT_QUEUE) -#define waitq_is_turnstile_queue(wq) \ - (((wq)->waitq_irq) && (wq)->waitq_turnstile_or_port) +#define waitq_is_turnstile_proxy(wq) \ + ((wq)->waitq_type == WQT_TSPROXY) -#define waitq_is_port_queue(wq) \ - (!((wq)->waitq_irq) && (wq)->waitq_turnstile_or_port) +#define waitq_is_turnstile_queue(wq) \ + (((wq)->waitq_irq) && (wq)->waitq_turnstile) #define waitq_is_set(wq) \ ((wq)->waitq_type == WQT_SET && ((struct waitq_set *)(wq))->wqset_id != 0) @@ -209,16 +214,6 @@ extern void waitq_bootstrap(void); */ extern void waitq_invalidate_locked(struct waitq *wq); -static inline boolean_t -waitq_empty(struct waitq *wq) -{ - if (waitq_is_turnstile_queue(wq)) { - return priority_queue_empty(&(wq->waitq_prio_queue)); - } else { - return queue_empty(&(wq->waitq_queue)); - } -} - extern lck_grp_t waitq_lck_grp; #if __arm64__ @@ -466,8 +461,6 @@ extern int waitq_is_global(struct waitq *waitq); extern int waitq_irq_safe(struct waitq *waitq); -extern struct waitq * waitq_get_safeq(struct waitq *waitq); - #if CONFIG_WAITQ_STATS /* * waitq statistics diff --git a/osfmk/kperf/action.c b/osfmk/kperf/action.c index 90d8e341f..ae3951156 100644 --- a/osfmk/kperf/action.c +++ b/osfmk/kperf/action.c @@ -120,6 +120,80 @@ kperf_system_memory_log(void) (uintptr_t)VM_PAGE_COMPRESSOR_COUNT); } +static void +kperf_sample_user_internal(struct kperf_usample *sbuf, + struct kperf_context *context, unsigned int actionid, + unsigned int sample_what) +{ + if (sample_what & SAMPLER_USTACK) { + kperf_ucallstack_sample(&sbuf->ucallstack, context); + } + if (sample_what & SAMPLER_TH_DISPATCH) { + kperf_thread_dispatch_sample(&sbuf->th_dispatch, context); + } + if (sample_what & SAMPLER_TH_INFO) { + kperf_thread_info_sample(&sbuf->th_info, context); + } + + boolean_t intren = ml_set_interrupts_enabled(FALSE); + + /* + * No userdata or sample_flags for this one. + */ + BUF_DATA(PERF_GEN_EVENT | DBG_FUNC_START, sample_what, actionid); + + if (sample_what & SAMPLER_USTACK) { + kperf_ucallstack_log(&sbuf->ucallstack); + } + if (sample_what & SAMPLER_TH_DISPATCH) { + kperf_thread_dispatch_log(&sbuf->th_dispatch); + } + if (sample_what & SAMPLER_TH_INFO) { + kperf_thread_info_log(&sbuf->th_info); + } + + BUF_DATA(PERF_GEN_EVENT | DBG_FUNC_END, sample_what); + + ml_set_interrupts_enabled(intren); +} + +void +kperf_sample_user(struct kperf_usample *sbuf, struct kperf_context *context, + unsigned int actionid, unsigned int sample_flags) +{ + if (actionid == 0 || actionid > actionc) { + return; + } + + unsigned int sample_what = actionv[actionid - 1].sample; + unsigned int ucallstack_depth = actionv[actionid - 1].ucallstack_depth; + + /* callstacks should be explicitly ignored */ + if (sample_flags & SAMPLE_FLAG_EMPTY_CALLSTACK) { + sample_what &= ~(SAMPLER_KSTACK | SAMPLER_USTACK); + } + if (sample_flags & SAMPLE_FLAG_ONLY_SYSTEM) { + sample_what &= SAMPLER_SYS_MEM; + } + assert((sample_flags & (SAMPLE_FLAG_THREAD_ONLY | SAMPLE_FLAG_TASK_ONLY)) + != (SAMPLE_FLAG_THREAD_ONLY | SAMPLE_FLAG_TASK_ONLY)); + if (sample_flags & SAMPLE_FLAG_THREAD_ONLY) { + sample_what &= SAMPLER_THREAD_MASK; + } + if (sample_flags & SAMPLE_FLAG_TASK_ONLY) { + sample_what &= SAMPLER_TASK_MASK; + } + + if (sample_what == 0) { + return; + } + + sbuf->ucallstack.kpuc_nframes = ucallstack_depth ?: + MAX_UCALLSTACK_FRAMES; + + kperf_sample_user_internal(sbuf, context, actionid, sample_what); +} + static kern_return_t kperf_sample_internal(struct kperf_sample *sbuf, struct kperf_context *context, @@ -132,9 +206,6 @@ kperf_sample_internal(struct kperf_sample *sbuf, uint32_t userdata = actionid; bool task_only = false; - /* not much point continuing here, but what to do ? return - * Shutdown? cut a tracepoint and continue? - */ if (sample_what == 0) { return SAMPLE_CONTINUE; } @@ -170,14 +241,9 @@ kperf_sample_internal(struct kperf_sample *sbuf, sbuf->kcallstack.kpkc_nframes = MAX_KCALLSTACK_FRAMES; } - if (ucallstack_depth) { - sbuf->ucallstack.kpuc_nframes = ucallstack_depth; - } else { - sbuf->ucallstack.kpuc_nframes = MAX_UCALLSTACK_FRAMES; - } - + ucallstack_depth = ucallstack_depth ?: MAX_UCALLSTACK_FRAMES; sbuf->kcallstack.kpkc_flags = 0; - sbuf->ucallstack.kpuc_flags = 0; + sbuf->usample.ucallstack.kpuc_flags = 0; if (sample_what & SAMPLER_TH_INFO) { kperf_thread_info_sample(&sbuf->th_info, context); @@ -199,8 +265,8 @@ kperf_sample_internal(struct kperf_sample *sbuf, if (sample_what & SAMPLER_KSTACK) { if (sample_flags & SAMPLE_FLAG_CONTINUATION) { kperf_continuation_sample(&(sbuf->kcallstack), context); - /* outside of interrupt context, backtrace the current thread */ } else if (sample_flags & SAMPLE_FLAG_NON_INTERRUPT) { + /* outside of interrupt context, backtrace the current thread */ kperf_backtrace_sample(&(sbuf->kcallstack), context); } else { kperf_kcallstack_sample(&(sbuf->kcallstack), context); @@ -210,7 +276,6 @@ kperf_sample_internal(struct kperf_sample *sbuf, kperf_task_snapshot_sample(context->cur_task, &(sbuf->tk_snapshot)); } - /* sensitive ones */ if (!is_kernel) { if (sample_what & SAMPLER_MEMINFO) { kperf_meminfo_sample(context->cur_task, &(sbuf->meminfo)); @@ -218,19 +283,13 @@ kperf_sample_internal(struct kperf_sample *sbuf, if (sample_flags & SAMPLE_FLAG_PEND_USER) { if (sample_what & SAMPLER_USTACK) { - pended_ucallstack = kperf_ucallstack_pend(context, sbuf->ucallstack.kpuc_nframes); - } - - if (sample_what & SAMPLER_TH_DISPATCH) { - pended_th_dispatch = kperf_thread_dispatch_pend(context); - } - } else { - if (sample_what & SAMPLER_USTACK) { - kperf_ucallstack_sample(&(sbuf->ucallstack), context); + pended_ucallstack = kperf_ucallstack_pend(context, + ucallstack_depth, actionid); } if (sample_what & SAMPLER_TH_DISPATCH) { - kperf_thread_dispatch_sample(&(sbuf->th_dispatch), context); + pended_th_dispatch = + kperf_thread_dispatch_pend(context, actionid); } } } @@ -307,14 +366,6 @@ log_sample: if (pended_th_dispatch) { BUF_INFO(PERF_TI_DISPPEND); } - } else { - if (sample_what & SAMPLER_USTACK) { - kperf_ucallstack_log(&(sbuf->ucallstack)); - } - - if (sample_what & SAMPLER_TH_DISPATCH) { - kperf_thread_dispatch_log(&(sbuf->th_dispatch)); - } } } @@ -357,11 +408,11 @@ kperf_sample(struct kperf_sample *sbuf, /* the samplers to run */ unsigned int sample_what = actionv[actionid - 1].sample; + unsigned int ucallstack_depth = actionv[actionid - 1].ucallstack_depth; /* do the actual sample operation */ return kperf_sample_internal(sbuf, context, sample_what, - sample_flags, actionid, - actionv[actionid - 1].ucallstack_depth); + sample_flags, actionid, ucallstack_depth); } void @@ -412,11 +463,11 @@ __attribute__((noinline)) void kperf_thread_ast_handler(thread_t thread) { - BUF_INFO(PERF_AST_HNDLR | DBG_FUNC_START, thread, kperf_get_thread_flags(thread)); + uint32_t ast = thread->kperf_ast; - /* ~2KB of the stack for the sample since this is called from AST */ - struct kperf_sample sbuf; - memset(&sbuf, 0, sizeof(struct kperf_sample)); + BUF_INFO(PERF_AST_HNDLR | DBG_FUNC_START, thread, ast); + + struct kperf_usample sbuf = {}; task_t task = get_threadtask(thread); @@ -425,49 +476,46 @@ kperf_thread_ast_handler(thread_t thread) return; } - /* make a context, take a sample */ struct kperf_context ctx = { .cur_thread = thread, .cur_task = task, .cur_pid = task_pid(task), }; - /* decode the flags to determine what to sample */ unsigned int sample_what = 0; - uint32_t flags = kperf_get_thread_flags(thread); - - if (flags & T_KPERF_AST_DISPATCH) { + if (ast & T_KPERF_AST_DISPATCH) { sample_what |= SAMPLER_TH_DISPATCH; } - if (flags & T_KPERF_AST_CALLSTACK) { - sample_what |= SAMPLER_USTACK; - sample_what |= SAMPLER_TH_INFO; + if (ast & T_KPERF_AST_CALLSTACK) { + /* TH_INFO for backwards compatibility */ + sample_what |= SAMPLER_USTACK | SAMPLER_TH_INFO; } - uint32_t ucallstack_depth = T_KPERF_GET_CALLSTACK_DEPTH(flags); - - int r = kperf_sample_internal(&sbuf, &ctx, sample_what, 0, 0, ucallstack_depth); + sbuf.ucallstack.kpuc_nframes = + T_KPERF_GET_CALLSTACK_DEPTH(ast) ?: MAX_UCALLSTACK_FRAMES; + unsigned int actionid = T_KPERF_GET_ACTIONID(ast); + kperf_sample_user_internal(&sbuf, &ctx, actionid, sample_what); - BUF_INFO(PERF_AST_HNDLR | DBG_FUNC_END, r); + BUF_INFO(PERF_AST_HNDLR | DBG_FUNC_END); } -/* register AST bits */ int -kperf_ast_pend(thread_t thread, uint32_t set_flags) +kperf_ast_pend(thread_t thread, uint32_t set_flags, unsigned int set_actionid) { - /* can only pend on the current thread */ if (thread != current_thread()) { - panic("pending to non-current thread"); + panic("kperf: pending AST to non-current thread"); } - /* get our current bits */ - uint32_t flags = kperf_get_thread_flags(thread); + uint32_t ast = thread->kperf_ast; + unsigned int actionid = T_KPERF_GET_ACTIONID(ast); + uint32_t flags = ast & T_KPERF_AST_ALL; - /* see if it's already been done or pended */ - if (!(flags & set_flags)) { - /* set the bit on the thread */ - flags |= set_flags; - kperf_set_thread_flags(thread, flags); + if ((flags | set_flags) != flags || actionid != set_actionid) { + ast &= ~T_KPERF_SET_ACTIONID(actionid); + ast |= T_KPERF_SET_ACTIONID(set_actionid); + ast |= set_flags; + + thread->kperf_ast = ast; /* set the actual AST */ act_set_kperf(thread); @@ -480,14 +528,12 @@ kperf_ast_pend(thread_t thread, uint32_t set_flags) void kperf_ast_set_callstack_depth(thread_t thread, uint32_t depth) { - uint32_t ast_flags = kperf_get_thread_flags(thread); - uint32_t existing_callstack_depth = - T_KPERF_GET_CALLSTACK_DEPTH(ast_flags); - - if (existing_callstack_depth < depth) { - ast_flags &= ~T_KPERF_SET_CALLSTACK_DEPTH(depth); - ast_flags |= T_KPERF_SET_CALLSTACK_DEPTH(depth); - kperf_set_thread_flags(thread, ast_flags); + uint32_t ast = thread->kperf_ast; + uint32_t existing_depth = T_KPERF_GET_CALLSTACK_DEPTH(ast); + if (existing_depth < depth) { + ast &= ~T_KPERF_SET_CALLSTACK_DEPTH(existing_depth); + ast |= T_KPERF_SET_CALLSTACK_DEPTH(depth); + thread->kperf_ast = ast; } } @@ -689,6 +735,9 @@ kperf_action_set_ucallstack_depth(unsigned action_id, uint32_t depth) if (depth > MAX_UCALLSTACK_FRAMES) { return EINVAL; } + if (depth < 2) { + return EINVAL; + } actionv[action_id - 1].ucallstack_depth = depth; @@ -705,6 +754,9 @@ kperf_action_set_kcallstack_depth(unsigned action_id, uint32_t depth) if (depth > MAX_KCALLSTACK_FRAMES) { return EINVAL; } + if (depth < 1) { + return EINVAL; + } actionv[action_id - 1].kcallstack_depth = depth; diff --git a/osfmk/kperf/action.h b/osfmk/kperf/action.h index 420720b86..f37d5dccd 100644 --- a/osfmk/kperf/action.h +++ b/osfmk/kperf/action.h @@ -33,8 +33,8 @@ #include #include -/* fwd decl */ struct kperf_sample; +struct kperf_usample; struct kperf_context; /* bits for defining what to do on an action */ @@ -86,6 +86,12 @@ kern_return_t kperf_sample(struct kperf_sample *sbuf, unsigned actionid, unsigned sample_flags); +/* + * Sample user space. + */ +void kperf_sample_user(struct kperf_usample *sbuf, struct kperf_context *ctx, + unsigned int actionid, unsigned int sample_flags); + /* Whether the action provided samples non-system values. */ bool kperf_action_has_non_system(unsigned actionid); bool kperf_action_has_thread(unsigned int actionid); diff --git a/osfmk/kperf/ast.h b/osfmk/kperf/ast.h index d43ce88b4..f5b19d508 100644 --- a/osfmk/kperf/ast.h +++ b/osfmk/kperf/ast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2011-2019 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -26,6 +26,13 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* pend ast bits on a thread */ -extern int kperf_ast_pend(thread_t thread, uint32_t flags); -extern void kperf_ast_set_callstack_depth(thread_t thread, uint32_t depth); +/* + * Ensure that kperf is informed the next time this thread goes back to user + * space, to handle an action. + */ +int kperf_ast_pend(thread_t thread, uint32_t flags, unsigned int actionid); + +/* + * Set the depth for the user callstack sample. + */ +void kperf_ast_set_callstack_depth(thread_t thread, uint32_t depth); diff --git a/osfmk/kperf/callstack.c b/osfmk/kperf/callstack.c index d6f0fb9a9..4a38dd7c5 100644 --- a/osfmk/kperf/callstack.c +++ b/osfmk/kperf/callstack.c @@ -254,7 +254,7 @@ kperf_backtrace_sample(struct kp_kcallstack *cs, struct kperf_context *context) cs->kpkc_nframes += 1; } if (trunc) { - cs->kpkc_nframes |= CALLSTACK_TRUNCATED; + cs->kpkc_flags |= CALLSTACK_TRUNCATED; } BUF_VERB(PERF_CS_BACKTRACE | DBG_FUNC_END, cs->kpkc_nframes); @@ -437,12 +437,15 @@ kperf_ucallstack_log(struct kp_ucallstack *cs) } int -kperf_ucallstack_pend(struct kperf_context * context, uint32_t depth) +kperf_ucallstack_pend(struct kperf_context * context, uint32_t depth, + unsigned int actionid) { - int did_pend = kperf_ast_pend(context->cur_thread, T_KPERF_AST_CALLSTACK); + if (depth < 2) { + panic("HUH"); + } kperf_ast_set_callstack_depth(context->cur_thread, depth); - - return did_pend; + return kperf_ast_pend(context->cur_thread, T_KPERF_AST_CALLSTACK, + actionid); } static kern_return_t diff --git a/osfmk/kperf/callstack.h b/osfmk/kperf/callstack.h index a144a8b95..e4a0cd5e0 100644 --- a/osfmk/kperf/callstack.h +++ b/osfmk/kperf/callstack.h @@ -70,7 +70,8 @@ void kperf_continuation_sample(struct kp_kcallstack *cs, struct kperf_context *) void kperf_backtrace_sample(struct kp_kcallstack *cs, struct kperf_context *context); void kperf_ucallstack_sample(struct kp_ucallstack *cs, struct kperf_context *); -int kperf_ucallstack_pend(struct kperf_context *, uint32_t depth); +int kperf_ucallstack_pend(struct kperf_context *, uint32_t depth, + unsigned int actionid); void kperf_ucallstack_log(struct kp_ucallstack *cs); #endif /* !defined(KPERF_CALLSTACK_H) */ diff --git a/osfmk/kperf/kperf.c b/osfmk/kperf/kperf.c index 17a94be8e..3bffd178c 100644 --- a/osfmk/kperf/kperf.c +++ b/osfmk/kperf/kperf.c @@ -250,19 +250,6 @@ kperf_on_cpu_update(void) kperf_lazy_wait_action != 0; } -/* random misc-ish functions */ -uint32_t -kperf_get_thread_flags(thread_t thread) -{ - return thread->kperf_flags; -} - -void -kperf_set_thread_flags(thread_t thread, uint32_t flags) -{ - thread->kperf_flags = flags; -} - unsigned int kperf_sampling_status(void) { diff --git a/osfmk/kperf/kperf.h b/osfmk/kperf/kperf.h index 63434af8f..31f87f6a6 100644 --- a/osfmk/kperf/kperf.h +++ b/osfmk/kperf/kperf.h @@ -41,9 +41,8 @@ extern lck_grp_t kperf_lck_grp; #define TRIGGER_TYPE_LAZY_WAIT (3) #define TRIGGER_TYPE_LAZY_CPU (3) -/* helpers to get and set AST flags on a thread */ -uint32_t kperf_get_thread_flags(thread_t thread); -void kperf_set_thread_flags(thread_t thread, uint32_t flags); +uint32_t kperf_get_thread_ast(thread_t thread); +void kperf_set_thread_ast(thread_t thread, uint32_t flags); /* * Get and set dirtiness of thread, so kperf can track whether the thread diff --git a/osfmk/kperf/kperf_kpc.c b/osfmk/kperf/kperf_kpc.c index 43df937a2..3d3fcb0bf 100644 --- a/osfmk/kperf/kperf_kpc.c +++ b/osfmk/kperf/kperf_kpc.c @@ -42,8 +42,7 @@ kperf_kpc_thread_ast(thread_t thread) { kpc_thread_ast_handler(thread); kperf_thread_ast_handler(thread); - - thread->kperf_flags = 0; + thread->kperf_ast = 0; } void diff --git a/osfmk/kperf/pet.c b/osfmk/kperf/pet.c index 0db17185b..09e73dc9a 100644 --- a/osfmk/kperf/pet.c +++ b/osfmk/kperf/pet.c @@ -363,7 +363,8 @@ pet_sample_thread(int pid, task_t task, thread_t thread, uint32_t idle_rate) { lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED); - uint32_t sample_flags = SAMPLE_FLAG_IDLE_THREADS | SAMPLE_FLAG_THREAD_ONLY; + uint32_t sample_flags = SAMPLE_FLAG_IDLE_THREADS | + SAMPLE_FLAG_THREAD_ONLY; BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_START); @@ -388,6 +389,8 @@ pet_sample_thread(int pid, task_t task, thread_t thread, uint32_t idle_rate) thread->kperf_pet_cnt++; kperf_sample(pet_sample, &ctx, pet_action_id, sample_flags); + kperf_sample_user(&pet_sample->usample, &ctx, pet_action_id, + sample_flags); BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_END); } diff --git a/osfmk/kperf/sample.h b/osfmk/kperf/sample.h index 9af5ba5b5..941ae53e8 100644 --- a/osfmk/kperf/sample.h +++ b/osfmk/kperf/sample.h @@ -35,18 +35,27 @@ #include "kperf_kpc.h" #include "meminfo.h" +/* + * For data that must be sampled in a fault-able context. + */ +struct kperf_usample { + struct kperf_thread_dispatch th_dispatch; + struct kp_ucallstack ucallstack; + struct kperf_thread_info th_info; +}; + struct kperf_sample { struct kperf_thread_info th_info; struct kperf_thread_scheduling th_scheduling; struct kperf_thread_snapshot th_snapshot; - struct kperf_thread_dispatch th_dispatch; struct kperf_task_snapshot tk_snapshot; struct kp_kcallstack kcallstack; - struct kp_ucallstack ucallstack; struct meminfo meminfo; + struct kperf_usample usample; + #if KPC struct kpcdata kpcdata; #endif /* KPC */ diff --git a/osfmk/kperf/thread_samplers.c b/osfmk/kperf/thread_samplers.c index 901e500f7..ca2d0c67d 100644 --- a/osfmk/kperf/thread_samplers.c +++ b/osfmk/kperf/thread_samplers.c @@ -321,9 +321,11 @@ out: } int -kperf_thread_dispatch_pend(struct kperf_context *context) +kperf_thread_dispatch_pend(struct kperf_context *context, + unsigned int actionid) { - return kperf_ast_pend(context->cur_thread, T_KPERF_AST_DISPATCH); + return kperf_ast_pend(context->cur_thread, T_KPERF_AST_DISPATCH, + actionid); } void diff --git a/osfmk/kperf/thread_samplers.h b/osfmk/kperf/thread_samplers.h index 09a188554..fcd932608 100644 --- a/osfmk/kperf/thread_samplers.h +++ b/osfmk/kperf/thread_samplers.h @@ -86,7 +86,7 @@ struct kperf_thread_dispatch { void kperf_thread_dispatch_sample(struct kperf_thread_dispatch *, struct kperf_context *); -int kperf_thread_dispatch_pend(struct kperf_context *); +int kperf_thread_dispatch_pend(struct kperf_context *, unsigned int actionid); void kperf_thread_dispatch_log(struct kperf_thread_dispatch *); void kperf_thread_inscyc_log(struct kperf_context *); diff --git a/osfmk/mach/arm/thread_status.h b/osfmk/mach/arm/thread_status.h index b12c02b5b..13fbdad05 100644 --- a/osfmk/mach/arm/thread_status.h +++ b/osfmk/mach/arm/thread_status.h @@ -35,6 +35,7 @@ #include #include +#include #include /* @@ -277,6 +278,21 @@ const_thread_state64(const arm_unified_thread_state_t *its) #define ARM_SAVED_STATE (THREAD_STATE_NONE + 1) +#if __ARM_VFP__ +#define VFPSAVE_ALIGN 16 +#define VFPSAVE_ATTRIB __attribute__((aligned (VFPSAVE_ALIGN))) +#define THREAD_ALIGN VFPSAVE_ALIGN + +/* + * vector floating point saved state + */ +struct arm_vfpsaved_state { + uint32_t r[64]; + uint32_t fpscr; + uint32_t fpexc; +}; +#endif + struct arm_saved_state { uint32_t r[13]; /* General purpose register r0-r12 */ uint32_t sp; /* Stack pointer r13 */ @@ -286,6 +302,15 @@ struct arm_saved_state { uint32_t fsr; /* Fault status */ uint32_t far; /* Virtual Fault Address */ uint32_t exception; /* exception number */ + +#if __ARM_VFP__ + /* VFP state */ + struct arm_vfpsaved_state VFPdata VFPSAVE_ATTRIB; + // for packing reasons chtread_self and DebugData + // are inside the the PcbData when __ARM_VFP__ is set + arm_debug_state_t *VFPpadding_DebugData; + vm_address_t VFPpadding_cthread_self; +#endif }; typedef struct arm_saved_state arm_saved_state_t; diff --git a/osfmk/mach/shared_region.h b/osfmk/mach/shared_region.h index 0faf73ee8..f6efdcbc1 100644 --- a/osfmk/mach/shared_region.h +++ b/osfmk/mach/shared_region.h @@ -63,10 +63,10 @@ #define SHARED_REGION_NESTING_MIN_PPC64 0x0000000010000000ULL #define SHARED_REGION_NESTING_MAX_PPC64 0x0000000010000000ULL -#define SHARED_REGION_BASE_ARM 0x1A000000ULL -#define SHARED_REGION_SIZE_ARM 0x26000000ULL -#define SHARED_REGION_NESTING_BASE_ARM 0x1A000000ULL -#define SHARED_REGION_NESTING_SIZE_ARM 0x26000000ULL +#define SHARED_REGION_BASE_ARM 0x40000000ULL +#define SHARED_REGION_SIZE_ARM 0x40000000ULL +#define SHARED_REGION_NESTING_BASE_ARM 0x40000000ULL +#define SHARED_REGION_NESTING_SIZE_ARM 0x40000000ULL #define SHARED_REGION_NESTING_MIN_ARM ? #define SHARED_REGION_NESTING_MAX_ARM ? diff --git a/osfmk/mach/sync_policy.h b/osfmk/mach/sync_policy.h index 605388fcc..648c6e16b 100644 --- a/osfmk/mach/sync_policy.h +++ b/osfmk/mach/sync_policy.h @@ -51,13 +51,8 @@ typedef int sync_policy_t; #define SYNC_POLICY_PREPOST 0x4 #define SYNC_POLICY_DISABLE_IRQ 0x8 - -/* - * If the waitq is IRQ safe, 0x10 suggests it's a waitq embedded in turnstile. - * If the waitq is not IRQ safe, 0x10 suggests it's a waitq of a port and should use it's turnstile safeq. - */ #define SYNC_POLICY_TURNSTILE 0x10 -#define SYNC_POLICY_PORT 0x10 +#define SYNC_POLICY_TURNSTILE_PROXY 0x20 #endif /* KERNEL_PRIVATE */ diff --git a/osfmk/man/index.html b/osfmk/man/index.html deleted file mode 100644 index 2a9d0ff9e..000000000 --- a/osfmk/man/index.html +++ /dev/null @@ -1,448 +0,0 @@ - - - - Mach Kernel Interface Reference Manual - - -

Mach IPC Interface

-
-

-Mach IPC presents itself in a few forms: message queues, lock-sets, -and semaphores (more may be added in the future).  All share one common -charateristic: the capabilities presented by each are represented through -a handle known as a Mach port.  Specific rights represented in these -Mach port capability handles allow the underlying IPC object to be used and -manipulated in consistent ways.

- -

Mach Message Queue Interface

-
-

-mach_msg - Send and/or receive a message from the target port.
-mach_msg_overwrite - Send and/or receive messages with possible overwrite.
-

-Mach Message Queue Data Structures -

-mach_msg_descriptor - Specifies an element of a complex IPC message.
-mach_msg_header - Specifies the content of an IPC message header.
-

-
- -

Mach Lock-Set Interface

-
-

-lock_acquire - Acquire ownership a lock
-lock_handoff - Hand-off ownership of a lock.
-lock_handoff_accept - Accept lock ownership from a handoff.
-lock_make_stable - Stabilize the state of the specified lock.
-lock_release - Release ownership of a lock.
-lock_set_create - Create a new lock set.
-lock_set_destroy - Destroy a lock set and its associated locks.
-lock_try - Attempt to acquire access rights to a lock.
-

-
- -

Mach Semaphore Interface

-
-

-semaphore_create - Create a new semaphore.
-semaphore_destroy - Destroy a semaphore.
-semaphore_signal - Increments the semaphore count.
-semaphore_signal_all - Wake up all threads blocked on a semaphore.
-semaphore_wait - Wait on the specified semaphore.
-

-
- -

Mach Port Management Interface

-
-

-mach_port_allocate - Create caller-specified type of port right.
-mach_port_allocate_full - Create a port right with full Mach port semantics.
-mach_port_allocate_name - Create a port right with the caller-specified name.
-mach_port_allocate_qos - Allocate a port with specified "quality of service".
-mach_port_allocate_subsystem - Create a port right associated with the caller-specified subsystem.
-mach_port_deallocate - Decrement the target port right's user reference count.
-mach_port_destroy - Deallocate all port rights associated with specified name.
-mach_port_extract_right - Remove the specified right from the target task and return it to the caller.
-mach_port_get_attributes - Return information about target port as specified by the caller.
-mach_port_get_refs - Return the current count of user references on the target port right.
-mach_port_get_set_status - Return the port right names contained in the target port set.
-mach_port_insert_right - Insert the specified port right into the target task.
-mach_port_mod_refs - Modify the specified port right's count of user references.
-mach_port_move_member - Move the specified receive right into or out of the specified port set.
-mach_port_names - Return information about a task's port name space.
-mach_port_request_notification - Request notification of the specified port event type.
-mach_port_set_attributes - Set the target port's attributes.
-mach_port_set_mscount - Change the target port's make-send count.
-mach_port_set_seqno - Change the current value of the target port's sequence number.
-mach_port_type - Return the characteristics of the target port name.
-mach_reply_port - Allocate a new port and insert corresponding receive right in the calling task.
- mach_subsystem_create - Used by a server to register information about an RPC subsystem with the kernel.
-

-Mach Port Data Structures -

-mach_port_limits - Specifies a port's resource and message queue limits.
-mach_port_qos - Specifies a port's attributes with respect to "Quality Of Service."
-mach_port_status - Used to present a port's current status with respect to various important attributes.
-

-Mach Port Notification Callbacks -

-do_mach_notify_dead_name - Handle the current instance of a dead-name notification.
-do_mach_notify_no_senders - Handle the current instance of a no-more-senders notification.
-do_mach_notify_port_deleted - Handle the current instance of a port-deleted notification.
-do_mach_notify_port_destroyed - Handle the current instance of a port-destroyed notification.
-do_mach_notify_send_once - Handle the current instance of a send-once notification.
-

-Mach Port Notification Callback Server Helpers -

-notify_server - Detect and handle a kernel-generated IPC notification.
-

-
- -
- -

Mach Virtual Memory Interface

-
-

Mach Virtual Memory Address Space Manipulation Interface

-
-

-host_page_size - Provide the system's virtual page size.
-vm_allocate - Allocate a region of virtual memory.
-vm_behavior_set - Specify expected access patterns for the target VM region.
-vm_copy - Copy a region of virtual memory.
-vm_deallocate - Deallocate a region of virtual memory.
-vm_inherit - Set a VM region's inheritance attribute.
-vm_machine_attribute - Get/set the target memory region's special attributes.
-vm_map - Map the specified memory object to a region of virtual memory.
-vm_msync - Synchronize the specified region of virtual memory.
-vm_protect - Set access privilege attribute for a region of virtual memory.
-vm_read - Read the specified range of target task's address space.
-vm_region - Return description of a virtual memory region.
-vm_remap - Map memory objects in one address space to that of another's.
- vm_wire - Modify the target region's paging characteristics.
-vm_write - Write data to the specified address in the target address space.
-

-Data Structures -

-vm_region_basic_info - Defines the attributes of a task's memory region.
-vm_statistics - Defines statistics for the kernel's use of virtual memory.
-

-
- -

External Memory Management Interface

-
-The External Memory Management Interface (EMMI) is undergoing significant change in the Darwin system. -For this reason, the interface is not currently available to user-level programs. Even for kernel -extensions, use of these interfaces in not supported. Instead, the BSD filesystem's Universal Buffer Cache (UBC) -mechanism should be used.
-

-memory_object_change_attributes - Modify subset of memory object attributes.
-memory_object_destroy - Shut down a memory object.
-memory_object_get_attributes - Return current attributes for a memory object.
-memory_object_lock_request - Restrict access to memory object data.
-memory_object_synchronize_completed - Synchronized data has been processed.
-

-Data Structures -

-memory_object_attr_info - Defines memory object attributes.
-memory_object_perf_info- Specifies performance-related memory object attributes.
-

-External Memory Manager Interface Callbacks -

-memory_object_create - Assign a new memory object to the default memory manager.
-memory_object_data_initialize - Provide initial data for a new memory object.
-memory_object_data_request - Request that memory manager page-in specified data.
-memory_object_data_return - Return memory object data to the appropriate memory manager.
-memory_object_data_unlock - Request a memory manager release the lock on specific data.
-memory_object_init - Inform a memory manager on first use of a memory object.
-memory_object_synchronize - Request synchronization of data with backing store.
-memory_object_terminate - Relinquish access to a memory object.
-

-EMMI Callback Server Helpers -

-memory_object_default_server - Handle kernel operation request targeted for the default pager.
-memory_object_server - Handle kernel operation request aimed at a given memory manager.
-

-
- -

Default Memory Management Interface

-
-

-default_pager_add_segment - Add additional backing storage for a default pager.
-default_pager_backing_store_create - Create a backing storage object.
- default_pager_backing_store_delete - Delete a backing storage object.
-default_pager_backing_store_info - Return information about a backing storage object.
-default_pager_info - Furnish caller with information about the default pager.
-default_pager_object_create - Initialize a non-persistent memory object.
-host_default_memory_manager - Register/Lookup the host's default pager.
-

-
- -
- -

Process Management Interface

-
- -

Task Interface

-
-

-mach_ports_lookup - Provide caller with an array of the target task's well-known ports.
-mach_ports_register - Register an array of well-known ports on behalf of the target task.
-mach_task_self - Return a send right to the caller's task_self port.
-task_create - Create a new task.
-task_get_emulation_vector - Return an array identifying the target task's user-level system call handlers.
-task_get_exception_ports - Return send rights to the target task's exception ports.
-task_get_special_port - Return a send write to the indicated special port.
-task_info - Return per-task information according to specified flavor.
-task_resume - Decrement the target task's suspend count.
-task_sample - Sample the target task's thread program counters periodically.
-task_set_emulation - Establish a user-level handler for a system call.
-task_set_emulation_vector - Establish the target task's user-level system call handlers.
-task_set_exception_ports - Set target task's exception ports.
-task_set_info - Set task-specific information state.
-task_set_port_space - Set the size of the target task's port name space table.
-task_set_special_port - Set the indicated special port.
-task_suspend - Suspend the target task.
-task_swap_exception_ports - Set target task's exception ports, returning the previous exception ports.
-task_terminate - Terminate the target task and deallocate its resources.
-task_threads - Return the target task's list of threads.
-

-Task Data Structures -

-task_basic_info - Defines basic information for a task.
-task_thread_times_info - Defines thread execution times information for tasks.
-

-
- -

Thread Interface

-
-

-mach_thread_self - Returns the thread self port.
-thread_abort - Abort a thread.
-thread_abort_safely - Abort a thread, restartably.
-thread_create - Create a thread within a task.
-thread_create_running - Optimized creation of a running thread.
-thread_depress_abort - Cancel thread scheduling depression.
-thread_get_exception_ports - Return a send right to an exception port.
-thread_get_special_port - Return a send right to the caller-specified special port.
-thread_get_state - Return the execution state for a thread.
-thread_info - Return information about a thread.
-thread_resume - Resume a thread.
-thread_sample - Perform periodic PC sampling for a thread.
-thread_set_exception_ports - Set exception ports for a thread.
-thread_set_special_port - Set caller-specified special port belonging to the target thread.
-thread_set_state - Set the target thread's user-mode execution state.
-thread_suspend - Suspend a thread.
-thread_swap_exception_ports - Swap exception ports for a thread.
-thread_terminate - Destroy a thread.
-thread_wire - Mark the thread as privileged with respect to kernel resources.
-

-Thread Data Structures -

-thread_basic_info - Defines basic information for a thread.
-

-Thread Exception Callbacks -

-catch_exception_raise - Handles the occurrence of an exception within a thread.
-

-Thread Exception Callback Server Helpers -

-exc_server - Handle kernel-reported thread exception.
-

-
- -

Scheduling Interface

-
-

-task_policy - Set target task's default scheduling policy state.
-task_set_policy - Set target task's default scheduling policy state.
-thread_policy - Set target thread's scheduling policy state.
-thread_set_policy - Set target thread's scheduling policy state.
-thread_switch - Cause context switch with options.
-

-Scheduling Data Structures -

-policy_fifo_info - Specifies information associated with the system's First-In-First-Out scheduling policy.
-policy_rr_info - Specifies information associated with the system's Round Robin scheduling policy.
-policy_timeshare_info - Specifies information associated with the system's Timeshare scheduling policy.
-

-
-
- -

System Management Interface

-
- -

Host Interface

-
-

-host_get_clock_service - Return a send right to a kernel clock's service port.
-host_get_time - Returns the current time as seen by that host.
-host_info - Return information about a host.
-host_kernel_version - Return kernel version information for a host.
-host_statistics - Return statistics for a host.
-mach_host_self - Returns send rights to the task's host self port.
-

-Data Structures -

-host_basic_info - Used to present basic information about a host.
-host_load_info - Used to present a host's processor load information.
-host_sched_info - - Used to present the set of scheduler limits associated with the host.
-kernel_resource_sizes - Used to present the sizes of kernel's major structures.
-

-
- -

Host Control Interface

-
-

-host_adjust_time - Arranges for the time on a specified host to be gradually changed by an adjustment value.
-host_default_memory_manager - Set the default memory manager.
-host_get_boot_info - Return operator boot information.
-host_get_clock_control - Return a send right to a kernel clock's control port.
-host_processor_slots - Return a list of numbers that map processor slots to active processors.
-host_processors - Return a list of send rights representing all processor ports.
-host_reboot - Reboot this host.
-host_set_time - Establishes the time on the specified host.
-

-
- -

Host Security Interface

-
-

-host_security_create_task_token - Create a new task with an explicit security token.
-host_security_set_task_token - Change the target task's security token.
-

-
- -

Resource Accounting Interface

-
- -The Mach resource accounting mechanism is not functional in the current Mac OS X/Darwin system. It will become functional in a future release. - -

-ledger_create - Create a subordinate ledger.
-ledger_read - Return the ledger limit and balance.
-ledger_terminate - Destroy a ledger.
-ledger_transfer - Transfer resources from a parent ledger to a child.
-

-
- -

Processor Management Interface

-
-

-processor_control - Perform caller-specified operation on target processor.
-processor_exit - Exit a processor.
-processor_info - Return information about a processor.
-processor_start - Start a processor.
-

-Processor Data Structures -

-processor_basic_info - Defines the basic information about a processor.
-

-
- -

Processor Set Interface

-
- -The processor set interface allows for the grouping of tasks and -processors for the purpose of exclusive scheduling. These interface -are deprecated and should not be used in code that isn't tied -to a particular release of Mac OS X/Darwin. These will likely change -or disappear in a future release. - -

-host_processor_sets - Return a list of send rights representing all processor set name ports.
-host_processor_set_priv - Translate a processor set name port into a processor set control port.
-processor_assign - Assign a processor to a processor set.
-processor_get_assignment - Get current assignment for a processor.
-processor_set_create - Create a new processor set.
-processor_set_default - Return the default processor set.
-processor_set_destroy - Destroy the target processor set.
-processor_set_info - Return processor set state according to caller-specified flavor.
-processor_set_max_priority - Sets the maximum scheduling priority for a processor set.
-processor_set_policy_control - Set target processor set's scheduling policy state.
-processor_set_policy_disable - Enables a scheduling policy for a processor set.
-processor_set_policy_enable - Enables a scheduling policy for a processor set.
-processor_set_statistics - Return scheduling statistics for a processor set.
-processor_set_tasks - Return all tasks currently assigned to the target processor set.
-processor_set_threads - Return all threads currently assigned to the target processor set.
-task_assign - Assign a task to a processor set.
-task_assign_default - Assign a task to the default processor set.
-task_get_assignment - Create a new task with an explicit security token.
-thread_assign - Assign a thread to a processor set.
-thread_assign_default - Assign a thread to the default processor set.
-thread_get_assignment - Return the processor set to which a thread is assigned.
-

-Processor Set Data Structures -

-processor_set_basic_info - Defines the basic information about a processor set.
-processor_set_load_info - Defines the scheduling statistics for a processor set.
-

-
- -

Clock Interface

-
-

-clock_alarm - Set up an alarm.
-clock_get_attributes - Return attributes of a clock.
-clock_get_time - Return the current time.
-clock_map_time - Return a memory object that maps a clock.
-clock_set_attributes - Set a particular clock's attributes.
-clock_set_time - Set the current time.
-clock_sleep - Delay the invoking thread until a specified time.
-

-Clock Data Structures -

-mapped_tvalspec - Specifies the format the kernel uses to maintain a mapped clock's time.
-tvalspec - Defines format of system time values.
-

-Clock Interface Callbacks -

-clock_alarm_reply - Ring a preset alarm.
-

-Clock Callback Server Helpers -

- clock_reply_server - Handle kernel-generated alarm.
-

-
- -

Multi-Computer Support Interface

-
- -These multi-computer support interfaces are no longer supported by -the Mac OS X/Darwin kernel. If and when multi-computer support is -added back in, something like these will likely be added. - -

-host_page_size - Returns the page size for the given host.
-ledger_get_remote - Return send right to specified host's remote ledger port.
-ledger_set_remote - Set this host's remote ledger port.
-

-
- -
- -

Machine Specific Interface

-
- -

Intel 386 Support

-
-

-i386_get_ldt - Returns per-thread segment descriptors from the local descriptor table (LDT).
-i386_io_port_add - Adds a device to the I/O permission bitmap for a thread.
-i386_io_port_list - Returns a list of the devices named in the thread's I/O permission bitmap.
-i386_io_port_remove - Removes the specified device from the thread's I/O permission bitmap.
-i386_set_ldt - Allows a thread to have a private local descriptor table (LDT).
-

-
- -

PowerPC Support

-
-

-

-
- -
- - - - - diff --git a/osfmk/vm/vm_compressor.c b/osfmk/vm/vm_compressor.c index 6c5a42214..c79a03e57 100644 --- a/osfmk/vm/vm_compressor.c +++ b/osfmk/vm/vm_compressor.c @@ -598,12 +598,7 @@ vm_compressor_init(void) PE_parse_boot_argn("vm_compression_limit", &vm_compression_limit, sizeof(vm_compression_limit)); #ifdef CONFIG_EMBEDDED -#if XNU_TARGET_OS_WATCH - // rdar://problem/51012698 - vm_compressor_minorcompact_threshold_divisor = 40; -#else vm_compressor_minorcompact_threshold_divisor = 20; -#endif vm_compressor_majorcompact_threshold_divisor = 30; vm_compressor_unthrottle_threshold_divisor = 40; vm_compressor_catchup_threshold_divisor = 60; diff --git a/osfmk/vm/vm_fault.c b/osfmk/vm/vm_fault.c index 277c96487..e3956937b 100644 --- a/osfmk/vm/vm_fault.c +++ b/osfmk/vm/vm_fault.c @@ -4088,7 +4088,9 @@ FastPmapEnter: } KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, event_code, trace_real_vaddr, (fault_info.user_tag << 16) | (caller_prot << 8) | type_of_fault, m->vmp_offset, get_current_unique_pid(), 0); - + if (need_retry == FALSE) { + KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_REAL_FAULT_FAST), get_current_unique_pid(), 0, 0, 0, 0); + } DTRACE_VM6(real_fault, vm_map_offset_t, real_vaddr, vm_map_offset_t, m->vmp_offset, int, event_code, int, caller_prot, int, type_of_fault, int, fault_info.user_tag); } if (kr == KERN_SUCCESS && @@ -5087,6 +5089,7 @@ handle_copy_delay: } KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, event_code, trace_real_vaddr, (fault_info.user_tag << 16) | (caller_prot << 8) | type_of_fault, m->vmp_offset, get_current_unique_pid(), 0); + KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_REAL_FAULT_SLOW), get_current_unique_pid(), 0, 0, 0, 0); DTRACE_VM6(real_fault, vm_map_offset_t, real_vaddr, vm_map_offset_t, m->vmp_offset, int, event_code, int, caller_prot, int, type_of_fault, int, fault_info.user_tag); } diff --git a/osfmk/vm/vm_map.c b/osfmk/vm/vm_map.c index 031cb8298..d130132c3 100644 --- a/osfmk/vm/vm_map.c +++ b/osfmk/vm/vm_map.c @@ -13246,6 +13246,7 @@ protection_failure: *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry); *object = VME_OBJECT(entry); *out_prot = prot; + KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_MAP_LOOKUP_OBJECT), VM_KERNEL_UNSLIDE_OR_PERM(*object), 0, 0, 0, 0); if (fault_info) { fault_info->interruptible = THREAD_UNINT; /* for now... */ @@ -17672,6 +17673,7 @@ vm_map_msync( local_map = VME_SUBMAP(entry); local_offset = VME_OFFSET(entry); + vm_map_reference(local_map); vm_map_unlock(map); if (vm_map_msync( local_map, @@ -17680,6 +17682,7 @@ vm_map_msync( sync_flags) == KERN_INVALID_ADDRESS) { had_hole = TRUE; } + vm_map_deallocate(local_map); continue; } object = VME_OBJECT(entry); diff --git a/osfmk/vm/vm_object.h b/osfmk/vm/vm_object.h index eedfb09e4..c4fde23b1 100644 --- a/osfmk/vm/vm_object.h +++ b/osfmk/vm/vm_object.h @@ -184,6 +184,7 @@ struct vm_object { * copy_call. */ struct vm_object *shadow; /* My shadow */ + memory_object_t pager; /* Where to get data */ union { vm_object_offset_t vou_shadow_offset; /* Offset into shadow */ @@ -196,7 +197,6 @@ struct vm_object { */ } vo_un2; - memory_object_t pager; /* Where to get data */ vm_object_offset_t paging_offset; /* Offset into memory object */ memory_object_control_t pager_control; /* Where data comes back */ @@ -328,12 +328,12 @@ struct vm_object { * they are updated via atomic compare and swap */ vm_object_offset_t last_alloc; /* last allocation offset */ + vm_offset_t cow_hint; /* last page present in */ + /* shadow but not in object */ int sequential; /* sequential access size */ uint32_t pages_created; uint32_t pages_used; - vm_offset_t cow_hint; /* last page present in */ - /* shadow but not in object */ /* hold object lock when altering */ unsigned int wimg_bits:8, /* cache WIMG bits */ @@ -373,8 +373,8 @@ struct vm_object { #endif /* VM_OBJECT_ACCESS_TRACKING */ uint8_t scan_collisions; + uint8_t __object4_unused_bits[1]; vm_tag_t wire_tag; - uint8_t __object4_unused_bits[2]; #if CONFIG_PHANTOM_CACHE uint32_t phantom_object_id; diff --git a/osfmk/vm/vm_pageout.h b/osfmk/vm/vm_pageout.h index 378c4765c..d9e16eab3 100644 --- a/osfmk/vm/vm_pageout.h +++ b/osfmk/vm/vm_pageout.h @@ -113,6 +113,9 @@ extern boolean_t vm_pressure_events_enabled; #define VM_REAL_FAULT_ADDR_PURGABLE 0x03 #define VM_REAL_FAULT_ADDR_EXTERNAL 0x04 #define VM_REAL_FAULT_ADDR_SHAREDCACHE 0x05 +#define VM_REAL_FAULT_FAST 0x06 +#define VM_REAL_FAULT_SLOW 0x07 +#define VM_MAP_LOOKUP_OBJECT 0x08 diff --git a/osfmk/x86_64/kpc_x86.c b/osfmk/x86_64/kpc_x86.c index ce27db8dd..08fd380f2 100644 --- a/osfmk/x86_64/kpc_x86.c +++ b/osfmk/x86_64/kpc_x86.c @@ -43,6 +43,8 @@ #include #include +#include + /* Fixed counter mask -- three counters, each with OS and USER */ #define IA32_FIXED_CTR_ENABLE_ALL_CTRS_ALL_RINGS (0x333) #define IA32_FIXED_CTR_ENABLE_ALL_PMI (0x888) @@ -67,16 +69,6 @@ IA32_FIXED_CTR_CTRL(void) return rdmsr64( MSR_IA32_PERF_FIXED_CTR_CTRL ); } -static uint64_t -IA32_FIXED_CTRx(uint32_t ctr) -{ -#ifdef USE_RDPMC - return rdpmc64(RDPMC_FIXED_COUNTER_SELECTOR | ctr); -#else /* !USE_RDPMC */ - return rdmsr64(MSR_IA32_PERF_FIXED_CTR0 + ctr); -#endif /* !USE_RDPMC */ -} - #ifdef FIXED_COUNTER_RELOAD static void wrIA32_FIXED_CTRx(uint32_t ctr, uint64_t value) @@ -326,37 +318,13 @@ kpc_set_fixed_config(kpc_config_t *configv) int kpc_get_fixed_counters(uint64_t *counterv) { - int i, n = kpc_fixed_count(); - -#ifdef FIXED_COUNTER_SHADOW - uint64_t status; - - /* snap the counters */ - for (i = 0; i < n; i++) { - counterv[i] = FIXED_SHADOW(ctr) + - (IA32_FIXED_CTRx(i) - FIXED_RELOAD(ctr)); - } - - /* Grab the overflow bits */ - status = rdmsr64(MSR_IA32_PERF_GLOBAL_STATUS); - - /* If the overflow bit is set for a counter, our previous read may or may not have been - * before the counter overflowed. Re-read any counter with it's overflow bit set so - * we know for sure that it has overflowed. The reason this matters is that the math - * is different for a counter that has overflowed. */ - for (i = 0; i < n; i++) { - if ((1ull << (i + 32)) & status) { - counterv[i] = FIXED_SHADOW(ctr) + - (kpc_fixed_max() - FIXED_RELOAD(ctr) + 1 /* Wrap */) + IA32_FIXED_CTRx(i); - } - } -#else - for (i = 0; i < n; i++) { - counterv[i] = IA32_FIXED_CTRx(i); - } -#endif - +#if MONOTONIC + mt_fixed_counts(counterv); return 0; +#else /* MONOTONIC */ +#pragma unused(counterv) + return ENOTSUP; +#endif /* !MONOTONIC */ } int diff --git a/tests/Makefile b/tests/Makefile index 78b2cfa4f..4c5e65c90 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -273,16 +273,31 @@ net_tuntests: CODE_SIGN_ENTITLEMENTS = network_entitlements.plist ifneq (osx,$(TARGET_NAME)) EXCLUDED_SOURCES += no32exec_35914211.c no32exec_35914211_helper.c -endif +else # target = osx +CUSTOM_TARGETS += no32exec_35914211_helper no32exec_35914211_helper_binprefs + +no32exec_35914211_helper: INVALID_ARCHS = x86_64 i386 +no32exec_35914211_helper: + $(CC) $(LDFLAGS) $(CFLAGS) -arch i386 no32exec_35914211_helper.c -o $(SYMROOT)/$@; + env CODESIGN_ALLOCATE=$(CODESIGN_ALLOCATE) $(CODESIGN) --force --sign - --timestamp=none $(SYMROOT)/$@; + +install-no32exec_35914211_helper: + mkdir -p $(INSTALLDIR) + cp $(SYMROOT)/no32exec_35914211_helper $(INSTALLDIR)/ -no32exec_35914211_helper: INVALID_ARCHS = x86_64 +no32exec_35914211_helper_binprefs: INVALID_ARCHS = x86_64 i386 no32exec_35914211_helper_binprefs: - $(CC) $(OTHER_CFLAGS) $(CFLAGS) $(OTHER_LDFLAGS) $(LDFLAGS) -ldarwintest -arch i386 -arch x86_64 \ - no32exec_35914211_helper_binprefs.c -o $(SYMROOT)/no32exec_35914211_helper_binprefs + $(CC) $(OTHER_CFLAGS) $(CFLAGS) $(OTHER_LDFLAGS) $(LDFLAGS) -arch i386 -arch x86_64 no32exec_35914211_helper.c -o $(SYMROOT)/$@; + env CODESIGN_ALLOCATE=$(CODESIGN_ALLOCATE) $(CODESIGN) --force --sign - --timestamp=none $(SYMROOT)/$@; + +install-no32exec_35914211_helper_binprefs: + mkdir -p $(INSTALLDIR) + cp $(SYMROOT)/no32exec_35914211_helper_binprefs $(INSTALLDIR)/ no32exec_35914211: INVALID_ARCHS = i386 no32exec_35914211: no32exec_35914211_helper no32exec_35914211: no32exec_35914211_helper_binprefs +endif # (osx,$(TARGET_NAME))) MIG:=SDKROOT=$(SDKROOT) $(shell xcrun -sdk "$(TARGETSDK)" -find mig) diff --git a/tests/no32exec_35914211.c b/tests/no32exec_35914211.c index b1f87634f..3ce06731d 100644 --- a/tests/no32exec_35914211.c +++ b/tests/no32exec_35914211.c @@ -7,23 +7,14 @@ #include #include -static int binprefs_child_is_64 = 0; - -static void -signal_handler(__unused int sig) -{ - binprefs_child_is_64 = 1; - return; -} - -T_DECL(no32exec_bootarg_with_spawn, "make sure the no32exec boot-arg is honored, using posix_spawn", T_META_BOOTARGS_SET("-no32exec")) +T_DECL(no32exec_bootarg_with_spawn, "make sure we can't posix_spawn 32-bit") { int spawn_ret, pid; char path[1024]; uint32_t size = sizeof(path); T_QUIET; T_ASSERT_EQ(_NSGetExecutablePath(path, &size), 0, NULL); - T_QUIET; T_ASSERT_LT(strlcat(path, "_helper", size), size, NULL); + T_QUIET; T_ASSERT_LT(strlcat(path, "_helper", size), (unsigned long)size, NULL); spawn_ret = posix_spawn(&pid, path, NULL, NULL, NULL, NULL); if (spawn_ret == 0) { @@ -34,8 +25,30 @@ T_DECL(no32exec_bootarg_with_spawn, "make sure the no32exec boot-arg is honored, T_ASSERT_EQ(spawn_ret, EBADARCH, NULL); } -T_DECL(no32exec_bootarg_with_spawn_binprefs, "make sure the no32exec boot-arg is honored, using posix_spawn" - "with binprefs on a fat i386/x86_64 Mach-O", T_META_BOOTARGS_SET("-no32exec")) +T_DECL(no32_exec_bootarg_with_exec, "make sure we can't fork and exec 32-bit") +{ + int pid; + char path[1024]; + uint32_t size = sizeof(path); + + T_QUIET; T_ASSERT_EQ(_NSGetExecutablePath(path, &size), 0, NULL); + T_QUIET; T_ASSERT_LT(strlcat(path, "_helper", size), (unsigned long)size, NULL); + + pid = fork(); + T_QUIET; T_ASSERT_POSIX_SUCCESS(pid, "fork"); + + if (pid == 0) { /* child */ + execve(path, NULL, NULL); /* this should fail, resulting in the call to exit below */ + exit(errno); + } else { /* parent */ + int wait_ret = 0; + waitpid(pid, &wait_ret, 0); + T_QUIET; T_ASSERT_TRUE(WIFEXITED(wait_ret), "child should have called exit()"); + T_ASSERT_EQ(WEXITSTATUS(wait_ret), EBADARCH, "execve should set errno = EBADARCH"); + } +} + +T_DECL(no32exec_bootarg_with_spawn_binprefs, "make sure we honor no32exec, using posix_spawn with binprefs on a fat i386/x86_64 Mach-O") { int pid, ret; posix_spawnattr_t spawnattr; @@ -44,9 +57,7 @@ T_DECL(no32exec_bootarg_with_spawn_binprefs, "make sure the no32exec boot-arg is char path[1024]; uint32_t size = sizeof(path); T_QUIET; T_ASSERT_EQ(_NSGetExecutablePath(path, &size), 0, NULL); - T_QUIET; T_ASSERT_LT(strlcat(path, "_helper_binprefs", size), size, NULL); - - T_QUIET; T_ASSERT_NE(signal(SIGUSR1, signal_handler), SIG_ERR, "signal"); + T_QUIET; T_ASSERT_LT(strlcat(path, "_helper_binprefs", size), (unsigned long)size, NULL); ret = posix_spawnattr_init(&spawnattr); T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "posix_spawnattr_init"); @@ -57,37 +68,37 @@ T_DECL(no32exec_bootarg_with_spawn_binprefs, "make sure the no32exec boot-arg is ret = posix_spawn(&pid, path, NULL, &spawnattr, NULL, NULL); T_ASSERT_EQ(ret, 0, "posix_spawn should succeed despite 32-bit binpref appearing first"); - sleep(1); - ret = kill(pid, SIGUSR1); // ping helper; helper should ping back if running 64-bit - T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kill"); - - ret = wait(NULL); + int wait_ret = 0; + ret = waitpid(pid, &wait_ret, 0); T_QUIET; T_ASSERT_EQ(ret, pid, "child pid"); - T_ASSERT_EQ(binprefs_child_is_64, 1, "child process should be running in 64-bit mode"); + T_QUIET; T_ASSERT_EQ(WIFEXITED(wait_ret), 1, "child process should have called exit()"); + T_ASSERT_EQ(WEXITSTATUS(wait_ret), 8, "child process should be running in 64-bit mode"); ret = posix_spawnattr_destroy(&spawnattr); T_QUIET; T_ASSERT_EQ(ret, 0, "posix_spawnattr_destroy"); } -T_DECL(no32_exec_bootarg_with_exec, "make sure the no32exec boot-arg is honored, using fork and exec", T_META_BOOTARGS_SET("-no32exec")) +T_DECL(no32exec_bootarg_with_32only_spawn_binprefs, "make sure we honor no32exec, using posix_spawn with 32-bit only binprefs on a fat i386/x86_64 Mach-O") { - int pid; + int pid, ret, spawn_ret; + posix_spawnattr_t spawnattr; + cpu_type_t cpuprefs[] = { CPU_TYPE_X86 }; + char path[1024]; uint32_t size = sizeof(path); - T_QUIET; T_ASSERT_EQ(_NSGetExecutablePath(path, &size), 0, NULL); - T_QUIET; T_ASSERT_LT(strlcat(path, "_helper", size), size, NULL); + T_QUIET; T_ASSERT_LT(strlcat(path, "_helper_binprefs", size), (unsigned long)size, NULL); - pid = fork(); - T_QUIET; T_ASSERT_POSIX_SUCCESS(pid, "fork"); + ret = posix_spawnattr_init(&spawnattr); + T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "posix_spawnattr_init"); - if (pid == 0) { /* child */ - execve(path, NULL, NULL); /* this should fail, resulting in the call to exit below */ - exit(errno); - } else { /* parent */ - int wait_ret = 0; - waitpid(pid, &wait_ret, 0); - T_ASSERT_EQ(WEXITSTATUS(wait_ret), EBADARCH, "execve should set errno = EBADARCH"); - } + ret = posix_spawnattr_setbinpref_np(&spawnattr, sizeof(cpuprefs) / sizeof(cpuprefs[0]), cpuprefs, NULL); + T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "posix_spawnattr_setbinpref_np"); + + spawn_ret = posix_spawn(&pid, path, NULL, &spawnattr, NULL, NULL); + T_ASSERT_EQ(spawn_ret, EBADARCH, "posix_spawn should return EBADARCH since only 32-bit binpref is requested"); + + ret = posix_spawnattr_destroy(&spawnattr); + T_QUIET; T_ASSERT_EQ(ret, 0, "posix_spawnattr_destroy"); } diff --git a/tests/no32exec_35914211_helper.c b/tests/no32exec_35914211_helper.c index 99fb6be2f..04069dcdc 100644 --- a/tests/no32exec_35914211_helper.c +++ b/tests/no32exec_35914211_helper.c @@ -1,6 +1,17 @@ -#include +/* This is a file that compiles as a 32-bit helper to test + * forking of 32-bit programs, now that 32-bit has been + * deprecated on macOS despite still requiring its support in + * the watchOS simulator. + */ -T_DECL(null_test, "nothing to see here") +#include +#include + +int +main(int argc __unused, char **argv) { - T_SKIP("nothing to see here"); + (void)argc; + size_t retval = sizeof(void *); + printf("%s(%d): sizeof(void *) = %lu\n", argv[0], getpid(), retval); + return (int)retval; } diff --git a/tests/no32exec_35914211_helper_binprefs.c b/tests/no32exec_35914211_helper_binprefs.c deleted file mode 100644 index 0909633eb..000000000 --- a/tests/no32exec_35914211_helper_binprefs.c +++ /dev/null @@ -1,34 +0,0 @@ -#include -#include -#include - -int can_signal_parent = 0; - -void -signal_handler(int sig) -{ - if (sig == SIGUSR1) { - can_signal_parent = 1; - } - return; -} - -T_DECL(no32exec_bootarg_with_spawn_binprefs_helper, "helper for no32exec_bootarg_with_spawn_binprefs test") -{ - unsigned long ptrSize = sizeof(long); - int ppid = getppid(); - - signal(SIGUSR1, signal_handler); - signal(SIGALRM, signal_handler); - - // parent will signal us if they're no32exec_bootarg_with_spawn_binprefs, otherwise timeout - alarm(3); - pause(); - - /* signal to parent process if we are running in 64-bit mode */ - if (can_signal_parent && ptrSize == 8) { - kill(ppid, SIGUSR1); - } - - T_SKIP("nothing to see here"); -} diff --git a/tools/lldbmacros/core/cvalue.py b/tools/lldbmacros/core/cvalue.py index e58c7752f..bcebeb495 100755 --- a/tools/lldbmacros/core/cvalue.py +++ b/tools/lldbmacros/core/cvalue.py @@ -49,7 +49,10 @@ class value(object): other = long(other) return me.__cmp__(other) if type(other) is value: - return int(self).__cmp__(int(other)) + try: + return int(self).__cmp__(int(other)) + except TypeError: # Try promoting to long + return long(self).__cmp__(long(other)) raise TypeError("Cannot compare value with type {}".format(type(other))) def __str__(self): diff --git a/tools/lldbmacros/ipc.py b/tools/lldbmacros/ipc.py index 6ac2b4ecc..88ea13b5b 100755 --- a/tools/lldbmacros/ipc.py +++ b/tools/lldbmacros/ipc.py @@ -1549,7 +1549,7 @@ def ShowMQueue(cmd_args=None, cmd_options={}): pset = unsigned(ArgumentStringToInt(cmd_args[0])) - unsigned(psetoff) print PrintPortSetSummary.header PrintPortSetSummary(kern.GetValueFromAddress(pset, 'struct ipc_pset *'), space) - elif int(wq_type) == 2: + elif int(wq_type) in [2, 1]: portoff = getfieldoffset('struct ipc_port', 'ip_messages') port = unsigned(ArgumentStringToInt(cmd_args[0])) - unsigned(portoff) print PrintPortSummary.header diff --git a/tools/lldbmacros/memory.py b/tools/lldbmacros/memory.py index a73dc5b8a..963db7bcc 100755 --- a/tools/lldbmacros/memory.py +++ b/tools/lldbmacros/memory.py @@ -2772,12 +2772,13 @@ def ShowTaskVMEntries(task, show_pager_info, show_all_shadows): return None showmapvme(task.map, 0, 0, show_pager_info, show_all_shadows, False) -@lldb_command("showmapvme", "A:B:PRST") +@lldb_command("showmapvme", "A:B:F:PRST") def ShowMapVME(cmd_args=None, cmd_options={}): """Routine to print out info about the specified vm_map and its vm entries usage: showmapvme [-A start] [-B end] [-S] [-P] Use -A flag to start at virtual address Use -B flag to end at virtual address + Use -F flag to find just the VME containing the given VA Use -S flag to show VM object shadow chains Use -P flag to show pager info (mapped file, compressed pages, ...) Use -R flag to reverse order @@ -2796,6 +2797,9 @@ def ShowMapVME(cmd_args=None, cmd_options={}): start_vaddr = unsigned(int(cmd_options['-A'], 16)) if "-B" in cmd_options: end_vaddr = unsigned(int(cmd_options['-B'], 16)) + if "-F" in cmd_options: + start_vaddr = unsigned(int(cmd_options['-F'], 16)) + end_vaddr = start_vaddr if "-P" in cmd_options: show_pager_info = True if "-S" in cmd_options: @@ -3117,7 +3121,7 @@ def showvmtags(cmd_args=None, cmd_options={}): if "-A" in cmd_options: all_tags = True page_size = unsigned(kern.globals.page_size) - nsites = unsigned(kern.globals.vm_allocation_tag_highest) + nsites = unsigned(kern.globals.vm_allocation_tag_highest) + 1 tagcounts = [0] * nsites tagpeaks = [0] * nsites tagmapped = [0] * nsites @@ -3139,7 +3143,7 @@ def showvmtags(cmd_args=None, cmd_options={}): total = 0 totalmapped = 0 - print " vm_allocation_tag_highest: {:<7d} ".format(nsites) + print " vm_allocation_tag_highest: {:<7d} ".format(nsites - 1) print " {:<7s} {:>7s} {:>7s} {:>7s} {:<50s}".format("tag.kmod", "peak", "size", "mapped", "name") for tag in range(nsites): if all_tags or tagcounts[tag] or tagmapped[tag]: @@ -4432,7 +4436,7 @@ def showmemoryentry(entry, idx=0, queue_len=0): if entry.is_sub_map == 1: showmapvme(entry.backing.map, 0, 0, show_pager_info, show_all_shadows) if entry.is_copy == 1: - showmapcopyvme(entry.backing.copy, 0, 0, 0, show_pager_info, show_all_shadows, 0) + showmapcopyvme(entry.backing.copy, 0, 0, show_pager_info, show_all_shadows, 0) if entry.is_sub_map == 0 and entry.is_copy == 0: showvmobject(entry.backing.object, entry.offset, entry.size, show_pager_info, show_all_shadows) diff --git a/tools/lldbmacros/misc.py b/tools/lldbmacros/misc.py index 237927c69..414a4e11d 100755 --- a/tools/lldbmacros/misc.py +++ b/tools/lldbmacros/misc.py @@ -738,6 +738,7 @@ def DumpRawTraceFile(cmd_args=[], cmd_options={}): if lp64 : KDBG_TIMESTAMP_MASK = 0xffffffffffffffff + KDBG_CPU_SHIFT = 0 else : KDBG_TIMESTAMP_MASK = 0x00ffffffffffffff KDBG_CPU_SHIFT = 56 @@ -967,7 +968,8 @@ def DumpRawTraceFile(cmd_args=[], cmd_options={}): htab[min_kdbp].kd_prev_timebase += 1 e.timestamp = htab[min_kdbp].kd_prev_timebase & KDBG_TIMESTAMP_MASK - e.timestamp |= (min_cpu << KDBG_CPU_SHIFT) + if not lp64: + e.timestamp |= (min_cpu << KDBG_CPU_SHIFT) else : htab[min_kdbp].kd_prev_timebase = earliest_time diff --git a/tools/lldbmacros/process.py b/tools/lldbmacros/process.py index cf7afc73c..3a107da8b 100755 --- a/tools/lldbmacros/process.py +++ b/tools/lldbmacros/process.py @@ -1547,8 +1547,8 @@ def GetLedgerEntrySummary(ledger_template, ledger, i, show_footprint_interval_ma else: out_str += " - " - if (unsigned(ledger.le_warn_level) != ledger_limit_infinity): - out_str += "{:9d} ".format((unsigned(ledger.le_warn_level) * 100) / unsigned(ledger.le_limit)) + if (unsigned(ledger.le_warn_percent) < 65535): + out_str += "{:9d} ".format(unsigned(ledger.le_warn_percent * 100. / 65536)) else: out_str += " - " diff --git a/tools/lldbmacros/waitq.py b/tools/lldbmacros/waitq.py index 6768635c0..dd3a38eb1 100755 --- a/tools/lldbmacros/waitq.py +++ b/tools/lldbmacros/waitq.py @@ -7,7 +7,7 @@ import sys def GetWaitqStateStr(waitq): wq_types = { 0: 'INV', - 1: '???', + 1: ' TS', 2: ' Q', 3: 'SET' } diff --git a/tools/lldbmacros/xnu.py b/tools/lldbmacros/xnu.py index 7ec9ca7c8..473e06a8c 100755 --- a/tools/lldbmacros/xnu.py +++ b/tools/lldbmacros/xnu.py @@ -984,7 +984,7 @@ def WalkList(cmd_args=[], cmd_options={}): else: print "{0: <#020x}".format(i) -def iotrace_parse_Copt(Copt): +def trace_parse_Copt(Copt): """Parses the -C option argument and returns a list of CPUs """ cpusOpt = Copt @@ -1017,30 +1017,17 @@ def iotrace_parse_Copt(Copt): return chosen_cpus -@lldb_command('iotrace', 'C:N:S:RB') -def IOTrace_cmd(cmd_args=[], cmd_options={}): - """ Prints the iotrace ring buffers for all CPUs by default. - Arguments: - -B : Print backtraces for each ring entry - -C [,...,] : Limit trace entries to those generated by the specified CPUs (each cpuSpec can be a - single CPU number or a range separated by a dash (e.g. "0-3")) - -N : Limit output to the first entries (across all chosen CPUs) - -R : Display results in reverse-sorted order (oldest first; default is newest-first) - -S : Sort output by specified iotrace_entry_t field name (instead of by timestamp) +IDX_CPU = 0 +IDX_RINGPOS = 1 +IDX_RINGENTRY = 2 +def Trace_cmd(cmd_args=[], cmd_options={}, headerString=lambda:"", entryString=lambda x:"", ring=[], entries_per_cpu=0, max_backtraces=0): + """Generic trace dumper helper function """ - IDX_CPU = 0 - IDX_RINGPOS = 1 - IDX_RINGENTRY = 2 - MAX_IOTRACE_BACKTRACES = 16 - - if kern.arch != "x86_64": - print "Sorry, iotrace is an x86-only command." - return if '-S' in cmd_options: field_arg = cmd_options['-S'] try: - getattr(kern.globals.iotrace_ring[0][0], field_arg) + getattr(ring[0][0], field_arg) sort_key_field_name = field_arg except AttributeError: raise ArgumentError("Invalid sort key field name `%s'" % field_arg) @@ -1048,7 +1035,7 @@ def IOTrace_cmd(cmd_args=[], cmd_options={}): sort_key_field_name = 'start_time_abs' if '-C' in cmd_options: - chosen_cpus = iotrace_parse_Copt(cmd_options['-C']) + chosen_cpus = trace_parse_Copt(cmd_options['-C']) else: chosen_cpus = [x for x in range(kern.globals.real_ncpus)] @@ -1066,7 +1053,7 @@ def IOTrace_cmd(cmd_args=[], cmd_options={}): # the original ring index, and the iotrace entry. entries = [] for x in chosen_cpus: - ring_slice = [(x, y, kern.globals.iotrace_ring[x][y]) for y in range(kern.globals.iotrace_entries_per_cpu)] + ring_slice = [(x, y, ring[x][y]) for y in range(entries_per_cpu)] entries.extend(ring_slice) total_entries = len(entries) @@ -1086,31 +1073,90 @@ def IOTrace_cmd(cmd_args=[], cmd_options={}): else: entries_to_display = total_entries - print "%-19s %-8s %-10s %-20s SZ %-18s %-17s DATA" % ( - "START TIME", - "DURATION", - "CPU#[RIDX]", - " TYPE", - " VIRT ADDR", - " PHYS ADDR") + print headerString() for x in xrange(entries_to_display): - print "%-20u(%6u) %6s[%02d] %-20s %d 0x%016x 0x%016x 0x%x" % ( - entries[x][IDX_RINGENTRY].start_time_abs, - entries[x][IDX_RINGENTRY].duration, - "CPU%d" % entries[x][IDX_CPU], - entries[x][IDX_RINGPOS], - str(entries[x][IDX_RINGENTRY].iotype).split("=")[1].strip(), - entries[x][IDX_RINGENTRY].size, - entries[x][IDX_RINGENTRY].vaddr, - entries[x][IDX_RINGENTRY].paddr, - entries[x][IDX_RINGENTRY].val) + print entryString(entries[x]) + if backtraces: - for btidx in range(MAX_IOTRACE_BACKTRACES): + for btidx in range(max_backtraces): nextbt = entries[x][IDX_RINGENTRY].backtrace[btidx] if nextbt == 0: break print "\t" + GetSourceInformationForAddress(nextbt) + + +@lldb_command('iotrace', 'C:N:S:RB') +def IOTrace_cmd(cmd_args=[], cmd_options={}): + """ Prints the iotrace ring buffers for all CPUs by default. + Arguments: + -B : Print backtraces for each ring entry + -C [,...,] : Limit trace entries to those generated by the specified CPUs (each cpuSpec can be a + single CPU number or a range separated by a dash (e.g. "0-3")) + -N : Limit output to the first entries (across all chosen CPUs) + -R : Display results in reverse-sorted order (oldest first; default is newest-first) + -S : Sort output by specified iotrace_entry_t field name (instead of by timestamp) + """ + MAX_IOTRACE_BACKTRACES = 16 + + if kern.arch != "x86_64": + print "Sorry, iotrace is an x86-only command." + return + + hdrString = lambda : "%-19s %-8s %-10s %-20s SZ %-18s %-17s DATA" % ( + "START TIME", + "DURATION", + "CPU#[RIDX]", + " TYPE", + " VIRT ADDR", + " PHYS ADDR") + + entryString = lambda x : "%-20u(%6u) %6s[%02d] %-20s %-2d 0x%016x 0x%016x 0x%x" % ( + x[IDX_RINGENTRY].start_time_abs, + x[IDX_RINGENTRY].duration, + "CPU%d" % x[IDX_CPU], + x[IDX_RINGPOS], + str(x[IDX_RINGENTRY].iotype).split("=")[1].strip(), + x[IDX_RINGENTRY].size, + x[IDX_RINGENTRY].vaddr, + x[IDX_RINGENTRY].paddr, + x[IDX_RINGENTRY].val) + + Trace_cmd(cmd_args, cmd_options, hdrString, entryString, kern.globals.iotrace_ring, kern.globals.iotrace_entries_per_cpu, MAX_IOTRACE_BACKTRACES) + + +@lldb_command('ttrace', 'C:N:S:RB') +def TrapTrace_cmd(cmd_args=[], cmd_options={}): + """ Prints the iotrace ring buffers for all CPUs by default. + Arguments: + -B : Print backtraces for each ring entry + -C [,...,] : Limit trace entries to those generated by the specified CPUs (each cpuSpec can be a + single CPU number or a range separated by a dash (e.g. "0-3")) + -N : Limit output to the first entries (across all chosen CPUs) + -R : Display results in reverse-sorted order (oldest first; default is newest-first) + -S : Sort output by specified traptrace_entry_t field name (instead of by timestamp) + """ + MAX_TRAPTRACE_BACKTRACES = 8 + + if kern.arch != "x86_64": + print "Sorry, ttrace is an x86-only command." + return + + hdrString = lambda : "%-30s CPU#[RIDX] VECT INTERRUPTED_THREAD PREMLV INTRLV INTERRUPTED_PC" % ( + "START TIME (DURATION [ns])") + entryString = lambda x : "%-20u(%6s) %8s[%02d] 0x%02x 0x%016x %6d %6d %s" % ( + x[IDX_RINGENTRY].start_time_abs, + str(x[IDX_RINGENTRY].duration) if hex(x[IDX_RINGENTRY].duration) != "0xffffffffffffffff" else 'inprog', + "CPU%d" % x[IDX_CPU], + x[IDX_RINGPOS], + int(x[IDX_RINGENTRY].vector), + x[IDX_RINGENTRY].curthread, + x[IDX_RINGENTRY].curpl, + x[IDX_RINGENTRY].curil, + GetSourceInformationForAddress(x[IDX_RINGENTRY].interrupted_pc)) + + Trace_cmd(cmd_args, cmd_options, hdrString, entryString, kern.globals.traptrace_ring, + kern.globals.traptrace_entries_per_cpu, MAX_TRAPTRACE_BACKTRACES) diff --git a/tools/tests/zero-to-n/zero-to-n.c b/tools/tests/zero-to-n/zero-to-n.c index cd1963c56..db9ed81c5 100644 --- a/tools/tests/zero-to-n/zero-to-n.c +++ b/tools/tests/zero-to-n/zero-to-n.c @@ -66,6 +66,7 @@ typedef enum my_policy_type { MY_POLICY_REALTIME, MY_POLICY_TIMESHARE, MY_POLICY #define CONSTRAINT_NANOS (20000000ll) /* 20 ms */ #define COMPUTATION_NANOS (10000000ll) /* 10 ms */ +#define RT_CHURN_COMP_NANOS ( 1000000ll) /* 1 ms */ #define TRACEWORTHY_NANOS (10000000ll) /* 10 ms */ #define TRACEWORTHY_NANOS_TEST ( 2000000ll) /* 2 ms */ @@ -105,8 +106,10 @@ static uint32_t g_iteration_sleeptime_us = 0; static uint32_t g_priority = 0; static uint32_t g_churn_pri = 0; static uint32_t g_churn_count = 0; +static uint32_t g_rt_churn_count = 0; static pthread_t* g_churn_threads = NULL; +static pthread_t* g_rt_churn_threads = NULL; /* Threshold for dropping a 'bad run' tracepoint */ static uint64_t g_traceworthy_latency_ns = TRACEWORTHY_NANOS; @@ -126,6 +129,8 @@ static boolean_t g_drop_priority = FALSE; /* Test whether realtime threads are scheduled on the separate CPUs */ static boolean_t g_test_rt = FALSE; +static boolean_t g_rt_churn = FALSE; + /* On SMT machines, test whether realtime threads are scheduled on the correct CPUs */ static boolean_t g_test_rt_smt = FALSE; @@ -151,6 +156,8 @@ static semaphore_t g_broadcastsem; static semaphore_t g_leadersem; static semaphore_t g_readysem; static semaphore_t g_donesem; +static semaphore_t g_rt_churn_sem; +static semaphore_t g_rt_churn_start_sem; /* Global variables (chain) */ static semaphore_t *g_semarr; @@ -270,6 +277,129 @@ join_churn_threads(void) } } +/* + * Set policy + */ +static int +rt_churn_thread_setup(void) +{ + kern_return_t kr; + thread_time_constraint_policy_data_t pol; + + /* Hard-coded realtime parameters (similar to what Digi uses) */ + pol.period = 100000; + pol.constraint = (uint32_t) nanos_to_abs(CONSTRAINT_NANOS * 2); + pol.computation = (uint32_t) nanos_to_abs(RT_CHURN_COMP_NANOS * 2); + pol.preemptible = 0; /* Ignored by OS */ + + kr = thread_policy_set(mach_thread_self(), THREAD_TIME_CONSTRAINT_POLICY, + (thread_policy_t) &pol, THREAD_TIME_CONSTRAINT_POLICY_COUNT); + mach_assert_zero_t(0, kr); + + return 0; +} + +static void * +rt_churn_thread(__unused void *arg) +{ + rt_churn_thread_setup(); + + for (uint32_t i = 0; i < g_iterations; i++) { + kern_return_t kr = semaphore_wait_signal(g_rt_churn_start_sem, g_rt_churn_sem); + mach_assert_zero_t(0, kr); + + volatile double x = 0.0; + volatile double y = 0.0; + + uint64_t endspin = mach_absolute_time() + nanos_to_abs(RT_CHURN_COMP_NANOS); + while (mach_absolute_time() < endspin) { + y = y + 1.5 + x; + x = sqrt(y); + } + } + + kern_return_t kr = semaphore_signal(g_rt_churn_sem); + mach_assert_zero_t(0, kr); + + return NULL; +} + +static void +wait_for_rt_churn_threads(void) +{ + for (uint32_t i = 0; i < g_rt_churn_count; i++) { + kern_return_t kr = semaphore_wait(g_rt_churn_sem); + mach_assert_zero_t(0, kr); + } +} + +static void +start_rt_churn_threads(void) +{ + for (uint32_t i = 0; i < g_rt_churn_count; i++) { + kern_return_t kr = semaphore_signal(g_rt_churn_start_sem); + mach_assert_zero_t(0, kr); + } +} + +static void +create_rt_churn_threads(void) +{ + if (g_rt_churn_count == 0) { + /* Leave 1 CPU to ensure that the main thread can make progress */ + g_rt_churn_count = g_numcpus - 1; + } + + errno_t err; + + struct sched_param param = { .sched_priority = (int)g_churn_pri }; + pthread_attr_t attr; + + /* Array for churn threads */ + g_rt_churn_threads = (pthread_t*) valloc(sizeof(pthread_t) * g_rt_churn_count); + assert(g_rt_churn_threads); + + if ((err = pthread_attr_init(&attr))) { + errc(EX_OSERR, err, "pthread_attr_init"); + } + + if ((err = pthread_attr_setschedparam(&attr, ¶m))) { + errc(EX_OSERR, err, "pthread_attr_setschedparam"); + } + + if ((err = pthread_attr_setschedpolicy(&attr, SCHED_RR))) { + errc(EX_OSERR, err, "pthread_attr_setschedpolicy"); + } + + for (uint32_t i = 0; i < g_rt_churn_count; i++) { + pthread_t new_thread; + + if ((err = pthread_create(&new_thread, &attr, rt_churn_thread, NULL))) { + errc(EX_OSERR, err, "pthread_create"); + } + g_rt_churn_threads[i] = new_thread; + } + + if ((err = pthread_attr_destroy(&attr))) { + errc(EX_OSERR, err, "pthread_attr_destroy"); + } + + /* Wait until all threads have checked in */ + wait_for_rt_churn_threads(); +} + +static void +join_rt_churn_threads(void) +{ + /* Rejoin rt churn threads */ + for (uint32_t i = 0; i < g_rt_churn_count; i++) { + errno_t err = pthread_join(g_rt_churn_threads[i], NULL); + if (err) { + errc(EX_OSERR, err, "pthread_join %d", i); + } + } +} + /* * Figure out what thread policy to use */ @@ -828,6 +958,12 @@ main(int argc, char **argv) kr = semaphore_create(mach_task_self(), &g_readysem, SYNC_POLICY_FIFO, 0); mach_assert_zero(kr); + kr = semaphore_create(mach_task_self(), &g_rt_churn_sem, SYNC_POLICY_FIFO, 0); + mach_assert_zero(kr); + + kr = semaphore_create(mach_task_self(), &g_rt_churn_start_sem, SYNC_POLICY_FIFO, 0); + mach_assert_zero(kr); + atomic_store_explicit(&g_done_threads, 0, memory_order_relaxed); /* Create the threads */ @@ -850,6 +986,9 @@ main(int argc, char **argv) if (g_churn_pri) { create_churn_threads(); } + if (g_rt_churn) { + create_rt_churn_threads(); + } /* Let everyone get settled */ kr = semaphore_wait(g_main_sem); @@ -869,6 +1008,11 @@ main(int argc, char **argv) g_one_long_spin_id = (uint32_t)rand() % g_numthreads; } + if (g_rt_churn) { + start_rt_churn_threads(); + usleep(100); + } + debug_log("%d Main thread reset\n", i); atomic_store_explicit(&g_done_threads, 0, memory_order_seq_cst); @@ -883,6 +1027,10 @@ main(int argc, char **argv) assert(atomic_load_explicit(&g_done_threads, memory_order_relaxed) == g_numthreads); + if (g_rt_churn) { + wait_for_rt_churn_threads(); + } + /* * We report the worst latencies relative to start time * and relative to the lead worker thread. @@ -933,6 +1081,10 @@ main(int argc, char **argv) } } + if (g_rt_churn) { + join_rt_churn_threads(); + } + if (g_churn_pri) { join_churn_threads(); } @@ -1104,6 +1256,7 @@ parse_args(int argc, char *argv[]) OPT_PRIORITY, OPT_CHURN_PRI, OPT_CHURN_COUNT, + OPT_RT_CHURN_COUNT, }; static struct option longopts[] = { @@ -1113,6 +1266,7 @@ parse_args(int argc, char *argv[]) { "priority", required_argument, NULL, OPT_PRIORITY }, { "churn-pri", required_argument, NULL, OPT_CHURN_PRI }, { "churn-count", required_argument, NULL, OPT_CHURN_COUNT }, + { "rt-churn-count", required_argument, NULL, OPT_RT_CHURN_COUNT }, { "switched_apptype", no_argument, (int*)&g_seen_apptype, TRUE }, { "spin-one", no_argument, (int*)&g_do_one_long_spin, TRUE }, { "spin-all", no_argument, (int*)&g_do_all_spin, TRUE }, @@ -1122,6 +1276,7 @@ parse_args(int argc, char *argv[]) { "test-rt", no_argument, (int*)&g_test_rt, TRUE }, { "test-rt-smt", no_argument, (int*)&g_test_rt_smt, TRUE }, { "test-rt-avoid0", no_argument, (int*)&g_test_rt_avoid0, TRUE }, + { "rt-churn", no_argument, (int*)&g_rt_churn, TRUE }, { "histogram", no_argument, (int*)&g_histogram, TRUE }, { "verbose", no_argument, (int*)&g_verbose, TRUE }, { "help", no_argument, NULL, 'h' }, @@ -1153,6 +1308,9 @@ parse_args(int argc, char *argv[]) case OPT_CHURN_COUNT: g_churn_count = read_dec_arg(); break; + case OPT_RT_CHURN_COUNT: + g_rt_churn_count = read_dec_arg(); + break; case '?': case 'h': default: diff --git a/tools/trace/ktruss.lua b/tools/trace/ktruss.lua new file mode 100755 index 000000000..514a8b7e6 --- /dev/null +++ b/tools/trace/ktruss.lua @@ -0,0 +1,28 @@ +#!/usr/local/bin/recon + +local ktrace = require 'ktrace' + +if not arg[1] or arg[1] == '-h' then + print[[ +usage: ktruss [ ...] + +Use Kernel TRace to print User Space Syscalls (ktruss).]] + os.exit(arg[1] == nil) +end + +local sess = ktrace.Session.new() + +for i = 1, #arg do + sess:add_callback_pair('BSC_' .. arg[i], function (start, finish) + print(('%s[%d]: %s(0x%x, 0x%x, 0x%x, 0x%x) -> %d'):format( + sess:procname_for_threadid(start.threadid), + sess:pid_for_threadid(start.threadid), arg[1], start[1], start[2], + start[3], start[4], finish[2])) + end) +end + +local ok, err = sess:start() +if not ok then + io.stderr:write('tracing failed: ', err, '\n') + os.exit(1) +end -- 2.47.2