CLOSURE(prf)
CLOSURE(proc_is64bit)
CLOSURE(proc_selfname)
+ CRITICAL(rbtrace_bt)
CRITICAL(register_cpu_setup_func)
CRITICAL(ret64_iret)
CRITICAL(ret_to_user)
ARM_ONLY(timer_state_event)
CRITICAL(tmrCvt)
CRITICAL(trap_from_kernel)
+ CRITICAL(traptrace_)
CRITICAL(tsc_)
CRITICAL(uart_putc)
CRITICAL(unlock_debugger)
0, 0,
spin_in_the_kernel, "I", "Spin forever");
+
+extern int traptrace_enabled;
+SYSCTL_INT(_machdep_misc, OID_AUTO, traptrace_enabled,
+ CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED,
+ &traptrace_enabled, 0, "Enabled/disable trap trace");
+
#endif /* DEVELOPMENT || DEBUG */
#include <net/restricted_in_port.h> /* for restricted_in_port_init() */
#include <kern/assert.h> /* for assert() */
#include <sys/kern_overrides.h> /* for init_system_override() */
+#include <sys/lockf.h> /* for lf_init() */
#include <net/init.h>
__private_extern__ int bootarg_no_vnode_jetsam = 0;
#endif /* CONFIG_JETSAM && (DEVELOPMENT || DEBUG) */
+__private_extern__ int bootarg_no_vnode_drain = 0;
+
/*
* Prevent kernel-based ASLR from being used, for testing.
*/
bsd_init_kprintf("calling vfsinit\n");
vfsinit();
+ /* Initialize file locks. */
+ bsd_init_kprintf("calling lf_init\n");
+ lf_init();
+
#if CONFIG_PROC_UUID_POLICY
/* Initial proc_uuid_policy subsystem */
bsd_init_kprintf("calling proc_uuid_policy_init()\n");
}
#endif /* CONFIG_JETSAM && (DEVELOPMENT || DEBUG) */
+ if (PE_parse_boot_argn("-no_vnode_drain", namep, sizeof(namep))) {
+ bootarg_no_vnode_drain = 1;
+ }
#if CONFIG_EMBEDDED
/*
/*
* IOP(s)
*
- * https://coreoswiki.apple.com/wiki/pages/U6z3i0q9/Consistent_Logging_Implementers_Guide.html
- *
* IOP(s) are auxiliary cores that want to participate in kdebug event logging.
* They are registered dynamically. Each is assigned a cpu_id at registration.
*
void *userdata;
struct sockaddr_ctl sac;
u_int32_t usecount;
+ u_int32_t kcb_usecount;
};
#ifndef ROUNDUP64
return 0;
}
+/*
+ * Use this function to serialize calls into the kctl subsystem
+ */
+static void
+ctl_kcb_increment_use_count(struct ctl_cb *kcb, lck_mtx_t *mutex_held)
+{
+ LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
+ while (kcb->kcb_usecount > 0) {
+ msleep(&kcb->kcb_usecount, mutex_held, PSOCK | PCATCH, "kcb_usecount", NULL);
+ }
+ kcb->kcb_usecount++;
+}
+
+static void
+clt_kcb_decrement_use_count(struct ctl_cb *kcb)
+{
+ assert(kcb->kcb_usecount != 0);
+ kcb->kcb_usecount--;
+ wakeup_one((caddr_t)&kcb->kcb_usecount);
+}
+
static int
ctl_detach(struct socket *so)
{
return 0;
}
+ lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK);
+ ctl_kcb_increment_use_count(kcb, mtx_held);
+
if (kcb->kctl != NULL && kcb->kctl->bind != NULL &&
kcb->userdata != NULL && !(so->so_state & SS_ISCONNECTED)) {
// The unit was bound, but not connected
soisdisconnected(so);
so->so_flags |= SOF_PCBCLEARING;
+ clt_kcb_decrement_use_count(kcb);
return 0;
}
panic("ctl_bind so_pcb null\n");
}
+ lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK);
+ ctl_kcb_increment_use_count(kcb, mtx_held);
+
error = ctl_setup_kctl(so, nam, p);
if (error) {
- return error;
+ goto out;
}
if (kcb->kctl == NULL) {
}
if (kcb->kctl->bind == NULL) {
- return EINVAL;
+ error = EINVAL;
+ goto out;
}
socket_unlock(so, 0);
error = (*kcb->kctl->bind)(kcb->kctl->kctlref, &kcb->sac, &kcb->userdata);
socket_lock(so, 0);
+out:
+ clt_kcb_decrement_use_count(kcb);
return error;
}
panic("ctl_connect so_pcb null\n");
}
+ lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK);
+ ctl_kcb_increment_use_count(kcb, mtx_held);
+
error = ctl_setup_kctl(so, nam, p);
if (error) {
- return error;
+ goto out;
}
if (kcb->kctl == NULL) {
kctlstat.kcs_conn_fail++;
lck_mtx_unlock(ctl_mtx);
}
+out:
+ clt_kcb_decrement_use_count(kcb);
return error;
}
struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb;
if ((kcb = (struct ctl_cb *)so->so_pcb)) {
+ lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK);
+ ctl_kcb_increment_use_count(kcb, mtx_held);
struct kctl *kctl = kcb->kctl;
if (kctl && kctl->disconnect) {
kctlstat.kcs_gencnt++;
lck_mtx_unlock(ctl_mtx);
socket_lock(so, 0);
+ clt_kcb_decrement_use_count(kcb);
}
return 0;
}
static int
ctl_usr_rcvd(struct socket *so, int flags)
{
+ int error = 0;
struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb;
struct kctl *kctl;
+ if (kcb == NULL) {
+ return ENOTCONN;
+ }
+
+ lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK);
+ ctl_kcb_increment_use_count(kcb, mtx_held);
+
if ((kctl = kcb->kctl) == NULL) {
- return EINVAL;
+ error = EINVAL;
+ goto out;
}
if (kctl->rcvd) {
ctl_sbrcv_trim(so);
- return 0;
+out:
+ clt_kcb_decrement_use_count(kcb);
+ return error;
}
static int
error = ENOTCONN;
}
+ lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK);
+ ctl_kcb_increment_use_count(kcb, mtx_held);
+
if (error == 0 && (kctl = kcb->kctl) == NULL) {
error = EINVAL;
}
if (error != 0) {
OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_send_fail);
}
+ clt_kcb_decrement_use_count(kcb);
+
return error;
}
error = ENOTCONN;
}
+ lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK);
+ ctl_kcb_increment_use_count(kcb, mtx_held);
+
if (error == 0 && (kctl = kcb->kctl) == NULL) {
error = EINVAL;
}
if (error != 0) {
OSIncrementAtomic64((SInt64 *)&kctlstat.kcs_send_list_fail);
}
+ clt_kcb_decrement_use_count(kcb);
+
return error;
}
return EINVAL;
}
+ lck_mtx_t *mtx_held = socket_getlock(so, PR_F_WILLUNLOCK);
+ ctl_kcb_increment_use_count(kcb, mtx_held);
+
switch (sopt->sopt_dir) {
case SOPT_SET:
if (kctl->setopt == NULL) {
- return ENOTSUP;
+ error = ENOTSUP;
+ goto out;
}
if (sopt->sopt_valsize != 0) {
MALLOC(data, void *, sopt->sopt_valsize, M_TEMP,
M_WAITOK | M_ZERO);
if (data == NULL) {
- return ENOMEM;
+ error = ENOMEM;
+ goto out;
}
error = sooptcopyin(sopt, data,
sopt->sopt_valsize, sopt->sopt_valsize);
case SOPT_GET:
if (kctl->getopt == NULL) {
- return ENOTSUP;
+ error = ENOTSUP;
+ goto out;
}
if (sopt->sopt_valsize && sopt->sopt_val) {
MALLOC(data, void *, sopt->sopt_valsize, M_TEMP,
M_WAITOK | M_ZERO);
if (data == NULL) {
- return ENOMEM;
+ error = ENOMEM;
+ goto out;
}
/*
* 4108337 - copy user data in case the
}
break;
}
+
+out:
+ clt_kcb_decrement_use_count(kcb);
return error;
}
/*
* The kernel has a wrapper in place that returns the same data
- * as is collected here, in kn_hook64. Any changes to how
+ * as is collected here, in kn_hook32. Any changes to how
* NOTE_EXITSTATUS and NOTE_EXIT_DETAIL are collected
* should also be reflected in the proc_pidnoteexit() wrapper.
*/
if (event == NOTE_EXIT) {
- kn->kn_hook64 = 0;
+ kn->kn_hook32 = 0;
if ((kn->kn_sfflags & NOTE_EXITSTATUS) != 0) {
kn->kn_fflags |= NOTE_EXITSTATUS;
- kn->kn_hook64 |= (hint & NOTE_PDATAMASK);
+ kn->kn_hook32 |= (hint & NOTE_PDATAMASK);
}
if ((kn->kn_sfflags & NOTE_EXIT_DETAIL) != 0) {
kn->kn_fflags |= NOTE_EXIT_DETAIL;
if ((kn->kn_proc->p_lflag &
P_LTERM_DECRYPTFAIL) != 0) {
- kn->kn_hook64 |= NOTE_EXIT_DECRYPTFAIL;
+ kn->kn_hook32 |= NOTE_EXIT_DECRYPTFAIL;
}
if ((kn->kn_proc->p_lflag &
P_LTERM_JETSAM) != 0) {
- kn->kn_hook64 |= NOTE_EXIT_MEMORY;
+ kn->kn_hook32 |= NOTE_EXIT_MEMORY;
switch (kn->kn_proc->p_lflag & P_JETSAM_MASK) {
case P_JETSAM_VMPAGESHORTAGE:
- kn->kn_hook64 |= NOTE_EXIT_MEMORY_VMPAGESHORTAGE;
+ kn->kn_hook32 |= NOTE_EXIT_MEMORY_VMPAGESHORTAGE;
break;
case P_JETSAM_VMTHRASHING:
- kn->kn_hook64 |= NOTE_EXIT_MEMORY_VMTHRASHING;
+ kn->kn_hook32 |= NOTE_EXIT_MEMORY_VMTHRASHING;
break;
case P_JETSAM_FCTHRASHING:
- kn->kn_hook64 |= NOTE_EXIT_MEMORY_FCTHRASHING;
+ kn->kn_hook32 |= NOTE_EXIT_MEMORY_FCTHRASHING;
break;
case P_JETSAM_VNODE:
- kn->kn_hook64 |= NOTE_EXIT_MEMORY_VNODE;
+ kn->kn_hook32 |= NOTE_EXIT_MEMORY_VNODE;
break;
case P_JETSAM_HIWAT:
- kn->kn_hook64 |= NOTE_EXIT_MEMORY_HIWAT;
+ kn->kn_hook32 |= NOTE_EXIT_MEMORY_HIWAT;
break;
case P_JETSAM_PID:
- kn->kn_hook64 |= NOTE_EXIT_MEMORY_PID;
+ kn->kn_hook32 |= NOTE_EXIT_MEMORY_PID;
break;
case P_JETSAM_IDLEEXIT:
- kn->kn_hook64 |= NOTE_EXIT_MEMORY_IDLE;
+ kn->kn_hook32 |= NOTE_EXIT_MEMORY_IDLE;
break;
}
}
if ((kn->kn_proc->p_csflags &
CS_KILLED) != 0) {
- kn->kn_hook64 |= NOTE_EXIT_CSERROR;
+ kn->kn_hook32 |= NOTE_EXIT_CSERROR;
}
}
}
proc_klist_lock();
if (kn->kn_fflags) {
- knote_fill_kevent(kn, kev, kn->kn_hook64);
- kn->kn_hook64 = 0;
+ knote_fill_kevent(kn, kev, kn->kn_hook32);
+ kn->kn_hook32 = 0;
res = 1;
}
proc_klist_unlock();
}
if (kq->kq_state & KQ_WORKLOOP) {
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_REGISTER),
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_REGISTER),
((struct kqworkloop *)kq)->kqwl_dynamicid,
kev->udata, kev->flags, kev->filter);
} else if (kq->kq_state & KQ_WORKQ) {
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_REGISTER),
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWQ_REGISTER),
0, kev->udata, kev->flags, kev->filter);
} else {
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_REGISTER),
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQ_REGISTER),
VM_KERNEL_UNSLIDE_OR_PERM(kq),
kev->udata, kev->flags, kev->filter);
}
assert(!(kn->kn_status & (KN_DISABLED | KN_SUPPRESSED | KN_DROPPING)));
if (kq->kq_state & KQ_WORKLOOP) {
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS),
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS),
((struct kqworkloop *)kq)->kqwl_dynamicid,
kn->kn_udata, kn->kn_status | (kn->kn_id << 32),
kn->kn_filtid);
} else if (kq->kq_state & KQ_WORKQ) {
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS),
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS),
0, kn->kn_udata, kn->kn_status | (kn->kn_id << 32),
kn->kn_filtid);
} else {
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS),
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQ_PROCESS),
VM_KERNEL_UNSLIDE_OR_PERM(kq), kn->kn_udata,
kn->kn_status | (kn->kn_id << 32), kn->kn_filtid);
}
}
if (kev.flags & EV_VANISHED) {
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KNOTE_VANISHED),
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KNOTE_VANISHED),
kev.ident, kn->kn_udata, kn->kn_status | (kn->kn_id << 32),
kn->kn_filtid);
}
{
int rc = 0;
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN) | DBG_FUNC_START,
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN) | DBG_FUNC_START,
0, kqr->tr_kq_qos_index);
rc = kqworkq_acknowledge_events(kqwq, kqr, kevent_flags,
KQWQAE_BEGIN_PROCESSING);
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN) | DBG_FUNC_END,
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWQ_PROCESS_BEGIN) | DBG_FUNC_END,
thread_tid(kqr_thread(kqr)), kqr->tr_kq_wakeup);
return rc;
kqlock_held(kq);
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_BEGIN) | DBG_FUNC_START,
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_BEGIN) | DBG_FUNC_START,
kqwl->kqwl_dynamicid, 0, 0);
/* nobody else should still be processing */
}
done:
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_BEGIN) | DBG_FUNC_END,
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_BEGIN) | DBG_FUNC_END,
kqwl->kqwl_dynamicid, 0, 0);
return rc;
kqlock_held(kq);
assert((kq->kqf_state & (KQ_WORKQ | KQ_WORKLOOP)) == 0);
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_START,
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_START,
VM_KERNEL_UNSLIDE_OR_PERM(kq), 0);
/* wait to become the exclusive processing thread */
for (;;) {
if (kq->kqf_state & KQ_DRAIN) {
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_END,
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_END,
VM_KERNEL_UNSLIDE_OR_PERM(kq), 2);
return EBADF;
}
/* anything left to process? */
if (TAILQ_EMPTY(&kq->kqf_queue)) {
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_END,
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_END,
VM_KERNEL_UNSLIDE_OR_PERM(kq), 1);
return -1;
}
/* convert to processing mode */
kq->kqf_state |= KQ_PROCESSING;
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_END,
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_BEGIN) | DBG_FUNC_END,
VM_KERNEL_UNSLIDE_OR_PERM(kq));
return 0;
kqlock_held(kq);
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_END) | DBG_FUNC_START,
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_END) | DBG_FUNC_START,
kqwl->kqwl_dynamicid, 0, 0);
if (flags & KQ_PROCESSING) {
kqworkloop_unbind_delayed_override_drop(thread);
}
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_END) | DBG_FUNC_END,
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_PROCESS_END) | DBG_FUNC_END,
kqwl->kqwl_dynamicid, 0, 0);
return rc;
assert((kq->kqf_state & (KQ_WORKQ | KQ_WORKLOOP)) == 0);
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_END),
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQ_PROCESS_END),
VM_KERNEL_UNSLIDE_OR_PERM(kq), 0);
/*
trp.trp_value = kqwl->kqwl_params;
if (trp.trp_flags && !(trp.trp_flags & TRP_RELEASED)) {
trp.trp_flags |= TRP_RELEASED;
+ kqwl->kqwl_params = trp.trp_value;
kqworkloop_release_live(kqwl);
} else {
error = EINVAL;
__assert_only struct kqworkloop *kqwl = (struct kqworkloop *)kq;
assert(kqwl->kqwl_owner == THREAD_NULL);
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THREQUEST),
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_THREQUEST),
kqwl->kqwl_dynamicid, 0, qos, kqr->tr_kq_wakeup);
ts = kqwl->kqwl_turnstile;
/* Add a thread request reference on the kqueue. */
kqworkloop_retain(kqwl);
} else {
assert(kq->kq_state & KQ_WORKQ);
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_THREQUEST),
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWQ_THREQUEST),
-1, 0, qos, kqr->tr_kq_wakeup);
}
turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
}
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_BIND), kqu.kqwl->kqwl_dynamicid,
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_BIND), kqu.kqwl->kqwl_dynamicid,
thread_tid(thread), kqr->tr_kq_qos_index,
(kqr->tr_kq_override_index << 16) | kqr->tr_kq_wakeup);
} else {
assert(kqr->tr_kq_override_index == 0);
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_BIND), -1,
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWQ_BIND), -1,
thread_tid(thread), kqr->tr_kq_qos_index,
(kqr->tr_kq_override_index << 16) | kqr->tr_kq_wakeup);
}
if (kqwl_owner) {
#if 0
/* JMM - need new trace hooks for owner overrides */
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THADJUST),
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_THADJUST),
kqwl->kqwl_dynamicid, thread_tid(kqwl_owner), kqr->tr_kq_qos_index,
(kqr->tr_kq_override_index << 16) | kqr->tr_kq_wakeup);
#endif
}
if (qos_changed) {
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_THADJUST), kqwl->kqwl_dynamicid,
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_THADJUST), kqwl->kqwl_dynamicid,
thread_tid(servicer), kqr->tr_kq_qos_index,
(kqr->tr_kq_override_index << 16) | kqr->tr_kq_wakeup);
}
struct uthread *ut = get_bsdthread_info(thread);
workq_threadreq_t kqr = &kqwl->kqwl_request;
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWL_UNBIND), kqwl->kqwl_dynamicid,
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWL_UNBIND), kqwl->kqwl_dynamicid,
thread_tid(thread), 0, 0);
kqlock_held(kqwl);
struct uthread *ut = get_bsdthread_info(thread);
kq_index_t old_override = kqr->tr_kq_override_index;
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KQWQ_UNBIND), -1,
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KQWQ_UNBIND), -1,
thread_tid(kqr_thread(kqr)), kqr->tr_kq_qos_index, 0);
kqlock_held(kqwq);
knote_mark_active(struct knote *kn)
{
if ((kn->kn_status & KN_ACTIVE) == 0) {
- KDBG_FILTERED(KEV_EVTID(BSD_KEVENT_KNOTE_ACTIVATE),
+ KDBG_DEBUG(KEV_EVTID(BSD_KEVENT_KNOTE_ACTIVATE),
kn->kn_udata, kn->kn_status | (kn->kn_id << 32),
kn->kn_filtid);
}
/*
- * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <security/mac_mach_internal.h>
#endif
+#if CONFIG_AUDIT
+#include <bsm/audit_kevents.h>
+#endif
+
#if CONFIG_ARCADE
#include <kern/arcade.h>
#endif
*/
if (imgp->ip_scriptvp) {
vnode_put(imgp->ip_scriptvp);
+ imgp->ip_scriptvp = NULLVP;
}
if (vnode_getwithref(imgp->ip_vp) == 0) {
imgp->ip_scriptvp = imgp->ip_vp;
proc_t p = vfs_context_proc(imgp->ip_vfs_context);
_posix_spawn_file_actions_t px_sfap = imgp->ip_px_sfa;
int ival[2]; /* dummy retval for system calls) */
+#if CONFIG_AUDIT
+ struct uthread *uthread = get_bsdthread_info(current_thread());
+#endif
for (action = 0; action < px_sfap->psfa_act_count; action++) {
_psfa_action_t *psfa = &px_sfap->psfa_act_acts[action];
mode = ((mode & ~p->p_fd->fd_cmask) & ALLPERMS) & ~S_ISTXT;
VATTR_SET(vap, va_mode, mode & ACCESSPERMS);
+ AUDIT_SUBCALL_ENTER(OPEN, p, uthread);
+
NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE,
CAST_USER_ADDR_T(psfa->psfaa_openargs.psfao_path),
imgp->ip_vfs_context);
FREE(bufp, M_TEMP);
+ AUDIT_SUBCALL_EXIT(uthread, error);
+
/*
* If there's an error, or we get the right fd by
* accident, then drop out here. This is easier than
* can ignore that, since if we didn't get the
* fd we wanted, the error will stop us.
*/
+ AUDIT_SUBCALL_ENTER(DUP2, p, uthread);
error = dup2(p, &dup2a, ival);
+ AUDIT_SUBCALL_EXIT(uthread, error);
if (error) {
break;
}
*/
ca.fd = origfd;
+ AUDIT_SUBCALL_ENTER(CLOSE, p, uthread);
error = close_nocancel(p, &ca, ival);
+ AUDIT_SUBCALL_EXIT(uthread, error);
}
break;
* can ignore that, since if we didn't get the
* fd we wanted, the error will stop us.
*/
+ AUDIT_SUBCALL_ENTER(DUP2, p, uthread);
error = dup2(p, &dup2a, ival);
+ AUDIT_SUBCALL_EXIT(uthread, error);
}
break;
dup2a.from = ca.fd = ival[0];
dup2a.to = psfa->psfaa_dup2args.psfad_newfiledes;
+ AUDIT_SUBCALL_ENTER(DUP2, p, uthread);
error = dup2(p, &dup2a, ival);
+ AUDIT_SUBCALL_EXIT(uthread, error);
if (error) {
break;
}
+ AUDIT_SUBCALL_ENTER(CLOSE, p, uthread);
error = close_nocancel(p, &ca, ival);
+ AUDIT_SUBCALL_EXIT(uthread, error);
}
break;
ca.fd = psfa->psfaa_filedes;
+ AUDIT_SUBCALL_ENTER(CLOSE, p, uthread);
error = close_nocancel(p, &ca, ival);
+ AUDIT_SUBCALL_EXIT(uthread, error);
}
break;
*/
struct nameidata nd;
+ AUDIT_SUBCALL_ENTER(CHDIR, p, uthread);
NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE,
CAST_USER_ADDR_T(psfa->psfaa_chdirargs.psfac_path),
imgp->ip_vfs_context);
error = chdir_internal(p, imgp->ip_vfs_context, &nd, 0);
+ AUDIT_SUBCALL_EXIT(uthread, error);
}
break;
fchdira.fd = psfa->psfaa_filedes;
+ AUDIT_SUBCALL_ENTER(FCHDIR, p, uthread);
error = fchdir(p, &fchdira, ival);
+ AUDIT_SUBCALL_EXIT(uthread, error);
}
break;
break;
}
}
+
+static inline void
+proc_ios13extended_footprint_entitled(proc_t p, task_t task, const char *caller)
+{
+#pragma unused(p, caller)
+ boolean_t ios13extended_footprint_entitled;
+
+ /* the entitlement grants a footprint limit increase */
+ ios13extended_footprint_entitled = IOTaskHasEntitlement(task,
+ "com.apple.developer.memory.ios13extended_footprint");
+ if (ios13extended_footprint_entitled) {
+ task_set_ios13extended_footprint_limit(task);
+ }
+}
#endif /* __arm64__ */
/*
* The POSIX_SPAWN_CLOEXEC_DEFAULT flag
* is handled in exec_handle_file_actions().
*/
- if ((error = exec_handle_file_actions(imgp,
- imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0)) != 0) {
+#if CONFIG_AUDIT
+ /*
+ * The file actions auditing can overwrite the upath of
+ * AUE_POSIX_SPAWN audit record. Save the audit record.
+ */
+ struct kaudit_record *save_uu_ar = uthread->uu_ar;
+ uthread->uu_ar = NULL;
+#endif
+ error = exec_handle_file_actions(imgp,
+ imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0);
+#if CONFIG_AUDIT
+ /* Restore the AUE_POSIX_SPAWN audit record. */
+ uthread->uu_ar = save_uu_ar;
+#endif
+ if (error != 0) {
goto bad;
}
}
/* Has spawn port actions? */
if (imgp->ip_px_spa != NULL) {
- if ((error = exec_handle_port_actions(imgp, &port_actions)) != 0) {
+#if CONFIG_AUDIT
+ /*
+ * Do the same for the port actions as we did for the file
+ * actions. Save the AUE_POSIX_SPAWN audit record.
+ */
+ struct kaudit_record *save_uu_ar = uthread->uu_ar;
+ uthread->uu_ar = NULL;
+#endif
+ error = exec_handle_port_actions(imgp, &port_actions);
+#if CONFIG_AUDIT
+ /* Restore the AUE_POSIX_SPAWN audit record. */
+ uthread->uu_ar = save_uu_ar;
+#endif
+ if (error != 0) {
goto bad;
}
}
#if __arm64__
proc_legacy_footprint_entitled(p, new_task, __FUNCTION__);
+ proc_ios13extended_footprint_entitled(p, new_task, __FUNCTION__);
#endif /* __arm64__ */
}
#if __arm64__
proc_legacy_footprint_entitled(p, new_task, __FUNCTION__);
+ proc_ios13extended_footprint_entitled(p, new_task, __FUNCTION__);
#endif /* __arm64__ */
/* Sever any extant thread affinity */
if (child_proc->p_textvp != NULLVP) {
/* bump references to the text vnode */
/* Need to hold iocount across the ref call */
- if (vnode_getwithref(child_proc->p_textvp) == 0) {
+ if ((error = vnode_getwithref(child_proc->p_textvp)) == 0) {
error = vnode_ref(child_proc->p_textvp);
vnode_put(child_proc->p_textvp);
- if (error != 0) {
- child_proc->p_textvp = NULLVP;
- }
+ }
+
+ if (error != 0) {
+ child_proc->p_textvp = NULLVP;
}
}
static void lf_adjust_assertion(struct lockf *block);
#endif /* IMPORTANCE_INHERITANCE */
+static lck_mtx_t lf_dead_lock;
+static lck_grp_t *lf_dead_lock_grp;
+
+void
+lf_init(void)
+{
+ lf_dead_lock_grp = lck_grp_alloc_init("lf_dead_lock", LCK_GRP_ATTR_NULL);
+ lck_mtx_init(&lf_dead_lock, lf_dead_lock_grp, LCK_ATTR_NULL);
+}
+
/*
* lf_advlock
*
struct lockf *block;
struct lockf **head = lock->lf_head;
struct lockf **prev, *overlap, *ltmp;
- static char lockstr[] = "lockf";
+ static const char lockstr[] = "lockf";
int priority, needtolink, error;
struct vnode *vp = lock->lf_vnode;
overlap_t ovcase;
*/
if ((lock->lf_flags & F_POSIX) &&
(block->lf_flags & F_POSIX)) {
- struct proc *wproc;
- struct uthread *ut;
+ lck_mtx_lock(&lf_dead_lock);
- /* The block is waiting on something */
- wproc = block->lf_owner;
+ /* The blocked process is waiting on something */
+ struct proc *wproc = block->lf_owner;
proc_lock(wproc);
+
LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p owned by pid %d\n", lock, proc_pid(wproc));
+
+ struct uthread *ut;
TAILQ_FOREACH(ut, &wproc->p_uthlist, uu_list) {
/*
- * If the thread is asleep (uu_wchan != 0)
- * in this code (uu_wmesg == lockstr)
- * check to see if the lock is blocked behind
+ * If the thread is (a) asleep (uu_wchan != 0)
+ * and (b) in this code (uu_wmesg == lockstr)
+ * then check to see if the lock is blocked behind
* someone blocked behind us.
+ *
+ * Note: (i) vp->v_lock is held, preventing other
+ * threads from mutating the blocking list for our vnode.
+ * and (ii) the proc_lock is held i.e the thread list
+ * is stable.
+ *
+ * HOWEVER some thread in wproc might be sleeping on a lockf
+ * structure for a different vnode, and be woken at any
+ * time. Thus the waitblock list could mutate while
+ * it's being inspected by this thread, and what
+ * ut->uu_wchan was just pointing at could even be freed.
+ *
+ * Nevertheless this is safe here because of lf_dead_lock; if
+ * any thread blocked with uu_wmesg == lockstr wakes (see below)
+ * it will try to acquire lf_dead_lock which is already held
+ * here. Holding that lock prevents the lockf structure being
+ * pointed at by ut->uu_wchan from going away. Thus the vnode
+ * involved can be found and locked, and the corresponding
+ * blocking chain can then be examined safely.
*/
- if ((ut->uu_wchan != NULL) && (ut->uu_wmesg == lockstr)) {
- struct lockf *waitblock = (struct lockf *)ut->uu_wchan;
+ const struct lockf *waitblock = (const void *)ut->uu_wchan;
+ if ((waitblock != NULL) && (ut->uu_wmesg == lockstr)) {
LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p which is also blocked on lock %p vnode %p\n", lock, waitblock, waitblock->lf_vnode);
vnode_t othervp = NULL;
* v_lock) retry the scan.
*/
proc_unlock(wproc);
+ lck_mtx_unlock(&lf_dead_lock);
static struct timespec ts = {
.tv_sec = 0,
- .tv_nsec = 10 * NSEC_PER_MSEC,
+ .tv_nsec = 2 * NSEC_PER_MSEC,
};
- (void) msleep(lock, &vp->v_lock, priority, lockstr, &ts);
+ static const char pausestr[] = "lockf:pause";
+ (void) msleep(lock, &vp->v_lock, priority, pausestr, &ts);
LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p contention for vp %p => restart\n", lock, othervp);
goto scan;
}
* we successfully acquired the
* proc_lock).
*/
- waitblock = waitblock->lf_next;
- if (waitblock == NULL) {
+ const struct lockf *nextblock = waitblock->lf_next;
+ if (nextblock == NULL) {
if (othervp) {
lck_mtx_unlock(&othervp->v_lock);
}
- LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p with no lf_next\n", lock);
+ LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p with waitblock %p and no lf_next; othervp %p\n", lock, waitblock, othervp);
continue;
}
- LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p which is also blocked on lock %p vnode %p\n", lock, waitblock, waitblock->lf_vnode);
+ LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p which is also blocked on lock %p vnode %p\n", lock, nextblock, nextblock->lf_vnode);
/*
* Make sure it's an advisory range
* if we mix lock types, it's our own
* fault.
*/
- if ((waitblock->lf_flags & F_POSIX) == 0) {
+ if ((nextblock->lf_flags & F_POSIX) == 0) {
if (othervp) {
lck_mtx_unlock(&othervp->v_lock);
}
* getting the requested lock, then we
* would deadlock, so error out.
*/
- struct proc *bproc = waitblock->lf_owner;
+ struct proc *bproc = nextblock->lf_owner;
const boolean_t deadlocked = bproc == lock->lf_owner;
if (othervp) {
if (deadlocked) {
LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p which is me, so EDEADLK\n", lock);
proc_unlock(wproc);
+ lck_mtx_unlock(&lf_dead_lock);
FREE(lock, M_LOCKF);
return EDEADLK;
}
LOCKF_DEBUG(LF_DBG_DEADLOCK, "lock %p bottom of thread loop\n", lock);
}
proc_unlock(wproc);
+ lck_mtx_unlock(&lf_dead_lock);
}
/*
#endif /* LOCKF_DEBUGGING */
DTRACE_FSINFO(advlock__wait, vnode_t, vp);
- error = msleep(lock, &vp->v_lock, priority, lockstr, timeout);
+ if (lock->lf_flags & F_POSIX) {
+ error = msleep(lock, &vp->v_lock, priority, lockstr, timeout);
+ /*
+ * Ensure that 'lock' doesn't get mutated or freed if a
+ * wakeup occurs while hunting for deadlocks (and holding
+ * lf_dead_lock - see above)
+ */
+ lck_mtx_lock(&lf_dead_lock);
+ lck_mtx_unlock(&lf_dead_lock);
+ } else {
+ static const char lockstr_np[] = "lockf:np";
+ error = msleep(lock, &vp->v_lock, priority, lockstr_np, timeout);
+ }
if (error == 0 && (lock->lf_flags & F_ABORT) != 0) {
error = EBADF;
proc_list_unlock();
}
+void
+memorystatus_act_on_ios13extended_footprint_entitlement(proc_t p)
+{
+ int memlimit_mb_active = 0, memlimit_mb_inactive = 0;
+ boolean_t memlimit_active_is_fatal = FALSE, memlimit_inactive_is_fatal = 0, use_active_limit = FALSE;
+
+ if (max_mem < 1500ULL * 1024 * 1024 ||
+ max_mem > 2ULL * 1024 * 1024 * 1024) {
+ /* ios13extended_footprint is only for 2GB devices */
+ return;
+ }
+
+ proc_list_lock();
+
+ if (p->p_memstat_memlimit_active > 0) {
+ memlimit_mb_active = p->p_memstat_memlimit_active;
+ } else if (p->p_memstat_memlimit_active == -1) {
+ memlimit_mb_active = max_task_footprint_mb;
+ } else {
+ /*
+ * Nothing to do for '0' which is
+ * a special value only used internally
+ * to test 'no limits'.
+ */
+ proc_list_unlock();
+ return;
+ }
+
+ if (p->p_memstat_memlimit_inactive > 0) {
+ memlimit_mb_inactive = p->p_memstat_memlimit_inactive;
+ } else if (p->p_memstat_memlimit_inactive == -1) {
+ memlimit_mb_inactive = max_task_footprint_mb;
+ } else {
+ /*
+ * Nothing to do for '0' which is
+ * a special value only used internally
+ * to test 'no limits'.
+ */
+ proc_list_unlock();
+ return;
+ }
+
+ /* limit to "almost 2GB" */
+ int ios13extended_footprint_mb = 1800;
+ if (memlimit_mb_active > ios13extended_footprint_mb) {
+ /* do not lower the current limit */
+ proc_list_unlock();
+ return;
+ }
+ memlimit_mb_active = ios13extended_footprint_mb;
+ memlimit_mb_inactive = ios13extended_footprint_mb;
+
+ memlimit_active_is_fatal = (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL);
+ memlimit_inactive_is_fatal = (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL);
+
+ SET_ACTIVE_LIMITS_LOCKED(p, memlimit_mb_active, memlimit_active_is_fatal);
+ SET_INACTIVE_LIMITS_LOCKED(p, memlimit_mb_inactive, memlimit_inactive_is_fatal);
+
+ if (proc_jetsam_state_is_active_locked(p) == TRUE) {
+ use_active_limit = TRUE;
+ CACHE_ACTIVE_LIMITS_LOCKED(p, memlimit_active_is_fatal);
+ } else {
+ CACHE_INACTIVE_LIMITS_LOCKED(p, memlimit_inactive_is_fatal);
+ }
+
+
+ if (memorystatus_highwater_enabled) {
+ task_set_phys_footprint_limit_internal(p->task,
+ (p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit : -1,
+ NULL, /*return old value */
+ use_active_limit, /*active limit?*/
+ (use_active_limit ? memlimit_active_is_fatal : memlimit_inactive_is_fatal));
+ }
+
+ proc_list_unlock();
+}
+
#endif /* CONFIG_MEMORYSTATUS */
#endif /* __arm64__ */
* then the vm_map_fork is allowed.
*
* And if a process's memory footprint calculates less
- * than or equal to half of the system-wide task limit,
+ * than or equal to quarter of the system-wide task limit,
* then the vm_map_fork is allowed. This calculation
* is based on the assumption that a process can
* munch memory up to the system-wide task limit.
*/
+extern boolean_t corpse_threshold_system_limit;
boolean_t
memorystatus_allowed_vm_map_fork(task_t task)
{
footprint_in_bytes = get_task_phys_footprint(task);
/*
- * Maximum is 1/4 of the system-wide task limit.
+ * Maximum is 1/4 of the system-wide task limit by default.
*/
max_allowed_bytes = ((uint64_t)max_task_footprint_mb * 1024 * 1024) >> 2;
+#if DEBUG || DEVELOPMENT
+ if (corpse_threshold_system_limit) {
+ max_allowed_bytes = (uint64_t)max_task_footprint_mb * (1UL << 20);
+ }
+#endif /* DEBUG || DEVELOPMENT */
+
if (footprint_in_bytes > max_allowed_bytes) {
printf("memorystatus disallowed vm_map_fork %lld %lld\n", footprint_in_bytes, max_allowed_bytes);
set_vm_map_fork_pidwatch(task, MEMORYSTATUS_VM_MAP_FORK_NOT_ALLOWED);
}
#if VM_PRESSURE_EVENTS
- memorystatus_available_pages_pressure = (pressure_threshold_percentage / delta_percentage) * memorystatus_delta;
+ memorystatus_available_pages_pressure = pressure_threshold_percentage * (atop_64(max_mem) / 100);
#endif
}
if ((thread_continue_t)continuation != THREAD_CONTINUE_NULL) {
ut->uu_continuation = continuation;
ut->uu_pri = pri;
- ut->uu_timo = abstime? 1: 0;
ut->uu_mtx = mtx;
(void) thread_block(_sleep_continue);
/* NOTREACHED */
0x132000c RealFaultAddressPurgeable
0x1320010 RealFaultAddressExternal
0x1320014 RealFaultAddressSharedCache
+0x1320018 vm_fast_fault
+0x132001c vm_slow_fault
+0x1320020 vm_map_lookup_object
0x1400000 MACH_SCHED
0x1400004 MACH_STKATTACH
0x1400008 MACH_STKHANDOFF
switch (dom) {
/*
- * Don't mark Unix domain, system or multipath sockets as
+ * Don't mark Unix domain or system
* eligible for defunct by default.
*/
case PF_LOCAL:
case PF_SYSTEM:
- case PF_MULTIPATH:
so->so_flags |= SOF_NODEFUNCT;
break;
default:
KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0);
AUDIT_ARG(fd, uap->s);
+ if (uap->flags & MSG_SKIPCFIL) {
+ error = EPERM;
+ goto done;
+ }
+
auio = uio_create(1, 0,
(IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
UIO_WRITE);
KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0);
AUDIT_ARG(fd, uap->s);
+
+ if (uap->flags & MSG_SKIPCFIL) {
+ error = EPERM;
+ goto done;
+ }
+
if (IS_64BIT_PROCESS(p)) {
msghdrp = (caddr_t)&msg64;
size_of_msghdr = sizeof(msg64);
KERNEL_DEBUG(DBG_FNC_SENDMSG_X | DBG_FUNC_START, 0, 0, 0, 0, 0);
+ if (uap->flags & MSG_SKIPCFIL) {
+ error = EPERM;
+ goto out;
+ }
+
error = file_socket(uap->s, &so);
if (error) {
goto out;
/*
* This function may be called many times to link or re-link the
* underlying vnode to the kqueue. If we've already linked the two,
- * we will have a valid kn_hook64 which ties us to the underlying
+ * we will have a valid kn_hook_waitqid which ties us to the underlying
* device's waitq via a the waitq's prepost table object. However,
* devices can abort any select action by calling selthreadclear().
* This is OK because the table object will be invalidated by the
* the table object's ID to us. It will also set the
* waitq_prepost_id field within the waitq structure.
*
- * We can just overwrite kn_hook64 because it's simply a
+ * We can just overwrite kn_hook_waitqid because it's simply a
* table ID used to grab a reference when needed.
*
* We have a reference on the vnode, so we know that the
* device won't go away while we get this ID.
+ *
+ * Note: on 32bit this field is 32bit only.
*/
- kn->kn_hook64 = waitq_get_prepost_id(wq);
+ kn->kn_hook_waitqid = (typeof(kn->kn_hook_waitqid))waitq_get_prepost_id(wq);
} else if (selres == 0) {
/*
* The device indicated that there's no data to read, but didn't call
}
kn->kn_filtid = EVFILTID_SPEC;
- kn->kn_hook64 = 0;
+ kn->kn_hook_waitqid = 0;
knote_markstayactive(kn);
return spec_knote_select_and_link(kn);
* This is potentially tricky: the device's selinfo waitq that was
* tricked into being part of this knote's waitq set may not be a part
* of any other set, and the device itself may have revoked the memory
- * in which the waitq was held. We use the knote's kn_hook64 field
+ * in which the waitq was held. We use the knote's kn_hook_waitqid field
* to keep the ID of the waitq's prepost table object. This
* object keeps a pointer back to the waitq, and gives us a safe way
* to decouple the dereferencing of driver allocated memory: if the
* object will be invalidated. The waitq details are handled in the
* waitq API invoked here.
*/
- if (kn->kn_hook64) {
- waitq_unlink_by_prepost_id(kn->kn_hook64, &(knote_get_kq(kn)->kq_wqs));
- kn->kn_hook64 = 0;
+ if (kn->kn_hook_waitqid) {
+ waitq_unlink_by_prepost_id(kn->kn_hook_waitqid, &(knote_get_kq(kn)->kq_wqs));
+ kn->kn_hook_waitqid = 0;
}
}
}
struct ipsec_pcb *pcb = *unitinfo;
+ if (pcb == NULL) {
+ return EINVAL;
+ }
lck_mtx_lock(&ipsec_lock);
void *data,
size_t len)
{
- struct ipsec_pcb *pcb = unitinfo;
errno_t result = 0;
+ struct ipsec_pcb *pcb = unitinfo;
+ if (pcb == NULL) {
+ return EINVAL;
+ }
/* check for privileges for privileged options */
switch (opt) {
void *data,
size_t *len)
{
- struct ipsec_pcb *pcb = unitinfo;
errno_t result = 0;
+ struct ipsec_pcb *pcb = unitinfo;
+ if (pcb == NULL) {
+ return EINVAL;
+ }
switch (opt) {
case IPSEC_OPT_FLAGS: {
#include <net/if_ports_used.h>
#include <netinet/in_pcb.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_fsm.h>
#include <stdbool.h>
npi.npi_timestamp.tv_usec = wakeuiid_last_check.tv_usec;
if (SOCK_PROTO(so) == IPPROTO_TCP) {
+ struct tcpcb *tp = intotcpcb(inp);
+
npi.npi_flags |= NPIF_TCP;
+ if (tp != NULL && tp->t_state == TCPS_LISTEN) {
+ npi.npi_flags |= NPIF_LISTEN;
+ }
} else if (SOCK_PROTO(so) == IPPROTO_UDP) {
npi.npi_flags |= NPIF_UDP;
} else {
npi.npi_local_port = inp->inp_lport;
npi.npi_foreign_port = inp->inp_fport;
- if (inp->inp_vflag & INP_IPV4) {
+ /*
+ * Take in account IPv4 addresses mapped on IPv6
+ */
+ if ((inp->inp_vflag & INP_IPV6) != 0 && (inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
+ (inp->inp_vflag & (INP_IPV6 | INP_IPV4)) == (INP_IPV6 | INP_IPV4)) {
+ npi.npi_flags |= NPIF_IPV6 | NPIF_IPV4;
+ memcpy(&npi.npi_local_addr_in6,
+ &inp->in6p_laddr, sizeof(struct in6_addr));
+ } else if (inp->inp_vflag & INP_IPV4) {
npi.npi_flags |= NPIF_IPV4;
npi.npi_local_addr_in = inp->inp_laddr;
npi.npi_foreign_addr_in = inp->inp_faddr;
struct in6_addr _in_a_6;
};
-#define NPIF_IPV4 0x00000001
-#define NPIF_IPV6 0x00000002
-#define NPIF_TCP 0x00000004
-#define NPIF_UDP 0x00000008
-#define NPIF_DELEGATED 0x00000010
-#define NPIF_SOCKET 0x00000020
-#define NPIF_CHANNEL 0x00000040
+#define NPIF_IPV4 0x0001
+#define NPIF_IPV6 0x0002
+#define NPIF_TCP 0x0004
+#define NPIF_UDP 0x0008
+#define NPIF_DELEGATED 0x0010
+#define NPIF_SOCKET 0x0020
+#define NPIF_CHANNEL 0x0040
+#define NPIF_LISTEN 0x0080
struct net_port_info {
uint16_t npi_if_index;
u_int32_t flags, u_int8_t *bitfield)
{
u_int32_t ifindex;
- u_int32_t inp_flags = 0;
if (bitfield == NULL) {
return EINVAL;
if_ports_used_update_wakeuuid(ifp);
- inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_WILDCARDOK) ?
- INPCB_GET_PORTS_USED_WILDCARDOK : 0);
- inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_NOWAKEUPOK) ?
- INPCB_GET_PORTS_USED_NOWAKEUPOK : 0);
- inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_RECVANYIFONLY) ?
- INPCB_GET_PORTS_USED_RECVANYIFONLY : 0);
- inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_EXTBGIDLEONLY) ?
- INPCB_GET_PORTS_USED_EXTBGIDLEONLY : 0);
- inp_flags |= ((flags & IFNET_GET_LOCAL_PORTS_ACTIVEONLY) ?
- INPCB_GET_PORTS_USED_ACTIVEONLY : 0);
-
ifindex = (ifp != NULL) ? ifp->if_index : 0;
if (!(flags & IFNET_GET_LOCAL_PORTS_TCPONLY)) {
- udp_get_ports_used(ifindex, protocol, inp_flags,
+ udp_get_ports_used(ifindex, protocol, flags,
bitfield);
}
if (!(flags & IFNET_GET_LOCAL_PORTS_UDPONLY)) {
- tcp_get_ports_used(ifindex, protocol, inp_flags,
+ tcp_get_ports_used(ifindex, protocol, flags,
bitfield);
}
#define IFNET_GET_LOCAL_PORTS_RECVANYIFONLY 0x10
#define IFNET_GET_LOCAL_PORTS_EXTBGIDLEONLY 0x20
#define IFNET_GET_LOCAL_PORTS_ACTIVEONLY 0x40
+#define IFNET_GET_LOCAL_PORTS_ANYTCPSTATEOK 0x80
/*
* @function ifnet_get_local_ports_extended
* @discussion Returns a bitfield indicating which local ports of the
* IFNET_GET_LOCAL_PORTS_ACTIVEONLY: When bit is set, the
* port is in the list only if the socket is not in a final TCP
* state or the connection is not idle in a final TCP state
+ * IFNET_GET_LOCAL_PORTS_ANYTCPSTATEOK: When bit is set, the
+ * port is in the list for all the TCP states except CLOSED
+ * and TIME_WAIT
* @param bitfield A pointer to 8192 bytes.
* @result Returns 0 on success.
*/
static unsigned int ndrv_multi_max_count = NDRV_DMUX_MAX_DESCR;
SYSCTL_UINT(_net, OID_AUTO, ndrv_multi_max_count, CTLFLAG_RW | CTLFLAG_LOCKED,
- &ndrv_multi_max_count, 0, "Number of allowed multicast addresses per NRDV socket");
+ &ndrv_multi_max_count, 0, "Number of allowed multicast addresses per NRDV socket");
+
+/*
+ * The locking strategy relies on the PF_NRDRV domain mutex that protects both the
+ * PCB list "ndrvl" and the sockets themselves
+ */
static int ndrv_do_detach(struct ndrv_cb *);
static int ndrv_do_disconnect(struct ndrv_cb *);
u_int32_t ndrv_sendspace = NDRVSNDQ;
u_int32_t ndrv_recvspace = NDRVRCVQ;
-TAILQ_HEAD(, ndrv_cb) ndrvl = TAILQ_HEAD_INITIALIZER(ndrvl);
+TAILQ_HEAD(, ndrv_cb) ndrvl = TAILQ_HEAD_INITIALIZER(ndrvl);
static struct domain *ndrvdomain = NULL;
extern struct domain ndrvdomain_s;
-#define NDRV_PROTODEMUX_COUNT 10
+#define NDRV_PROTODEMUX_COUNT 10
/*
* Verify these values match.
static int
ndrv_output(struct mbuf *m, struct socket *so)
{
- struct ndrv_cb *np = sotondrvcb(so);
+ struct ndrv_cb *np = sotondrvcb(so);
struct ifnet *ifp = np->nd_if;
- int result = 0;
+ int result = 0;
#if NDRV_DEBUG
printf("NDRV output: %x, %x, %x\n", m, so, np);
/*
* No header is a format error
*/
- if ((m->m_flags&M_PKTHDR) == 0)
- return(EINVAL);
+ if ((m->m_flags & M_PKTHDR) == 0) {
+ return EINVAL;
+ }
/* Unlock before calling ifnet_output */
socket_unlock(so, 0);
/*
- * Call DLIL if we can. DLIL is much safer than calling the
- * ifp directly.
- */
+ * Call DLIL if we can. DLIL is much safer than calling the
+ * ifp directly.
+ */
result = ifnet_output_raw(ifp, np->nd_proto_family, m);
socket_lock(so, 0);
- return (result);
+ return result;
}
/* Our input routine called from DLIL */
static errno_t
ndrv_input(
- ifnet_t ifp,
- protocol_family_t proto_family,
- mbuf_t m,
- char *frame_header)
+ ifnet_t ifp,
+ protocol_family_t proto_family,
+ mbuf_t m,
+ char *frame_header)
{
struct socket *so;
struct sockaddr_dl ndrvsrc;
struct ndrv_cb *np;
int error = 0;
- ndrvsrc.sdl_len = sizeof (struct sockaddr_dl);
- ndrvsrc.sdl_family = AF_NDRV;
- ndrvsrc.sdl_index = 0;
+ ndrvsrc.sdl_len = sizeof(struct sockaddr_dl);
+ ndrvsrc.sdl_family = AF_NDRV;
+ ndrvsrc.sdl_index = 0;
- /* move packet from if queue to socket */
+ /* move packet from if queue to socket */
/* Should be media-independent */
- ndrvsrc.sdl_type = IFT_ETHER;
- ndrvsrc.sdl_nlen = 0;
- ndrvsrc.sdl_alen = 6;
- ndrvsrc.sdl_slen = 0;
- bcopy(frame_header, &ndrvsrc.sdl_data, 6);
+ ndrvsrc.sdl_type = IFT_ETHER;
+ ndrvsrc.sdl_nlen = 0;
+ ndrvsrc.sdl_alen = 6;
+ ndrvsrc.sdl_slen = 0;
+ bcopy(frame_header, &ndrvsrc.sdl_data, 6);
+
+ /* prepend the frame header */
+ m = m_prepend(m, ifnet_hdrlen(ifp), M_NOWAIT);
+ if (m == NULL) {
+ return EJUSTRETURN;
+ }
+ bcopy(frame_header, m->m_data, ifnet_hdrlen(ifp));
+
+ /*
+ * We need to take the domain mutex before the list RW lock
+ */
+ LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED);
+ lck_mtx_lock(ndrvdomain->dom_mtx);
np = ndrv_find_inbound(ifp, proto_family);
- if (np == NULL)
- {
- return(ENOENT);
+ if (np == NULL) {
+ lck_mtx_unlock(ndrvdomain->dom_mtx);
+ return ENOENT;
}
+
so = np->nd_socket;
- /* prepend the frame header */
- m = m_prepend(m, ifnet_hdrlen(ifp), M_NOWAIT);
- if (m == NULL)
- return EJUSTRETURN;
- bcopy(frame_header, m->m_data, ifnet_hdrlen(ifp));
- LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED);
- lck_mtx_lock(ndrvdomain->dom_mtx);
if (sbappendaddr(&(so->so_rcv), (struct sockaddr *)&ndrvsrc,
- m, (struct mbuf *)0, &error) != 0) {
+ m, NULL, &error) != 0) {
sorwakeup(so);
}
+
lck_mtx_unlock(ndrvdomain->dom_mtx);
+
return 0; /* radar 4030377 - always return 0 */
}
static int
ndrv_attach(struct socket *so, int proto, __unused struct proc *p)
{
- int error;
+ int error;
struct ndrv_cb *np = sotondrvcb(so);
- if ((so->so_state & SS_PRIV) == 0)
- return(EPERM);
+ if ((so->so_state & SS_PRIV) == 0) {
+ return EPERM;
+ }
#if NDRV_DEBUG
printf("NDRV attach: %x, %x, %x\n", so, proto, np);
#endif
- if ((error = soreserve(so, ndrv_sendspace, ndrv_recvspace)))
- return(error);
+ if ((error = soreserve(so, ndrv_sendspace, ndrv_recvspace))) {
+ return error;
+ }
MALLOC(np, struct ndrv_cb *, sizeof(*np), M_PCB, M_WAITOK);
- if (np == NULL)
- return (ENOMEM);
- so->so_pcb = (caddr_t)np;
- bzero(np, sizeof(*np));
+ if (np == NULL) {
+ return ENOMEM;
+ }
+ so->so_pcb = (caddr_t)np;
+ bzero(np, sizeof(*np));
#if NDRV_DEBUG
printf("NDRV attach: %x, %x, %x\n", so, proto, np);
#endif
np->nd_socket = so;
np->nd_proto.sp_family = SOCK_DOM(so);
np->nd_proto.sp_protocol = proto;
- np->nd_if = NULL;
- np->nd_proto_family = 0;
- np->nd_family = 0;
- np->nd_unit = 0;
- TAILQ_INSERT_TAIL(&ndrvl, np, nd_next);
- return(0);
+ np->nd_if = NULL;
+ np->nd_proto_family = 0;
+ np->nd_family = 0;
+ np->nd_unit = 0;
+
+ /*
+ * Use the domain mutex to protect the list
+ */
+ LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED);
+ lck_mtx_lock(ndrvdomain->dom_mtx);
+
+ TAILQ_INSERT_TAIL(&ndrvl, np, nd_next);
+
+ lck_mtx_unlock(ndrvdomain->dom_mtx);
+
+ return 0;
}
/*
{
struct ndrv_cb *np = sotondrvcb(so);
- if (np == 0)
+ if (np == 0) {
return EINVAL;
+ }
return ndrv_do_detach(np);
}
{
struct ndrv_cb *np = sotondrvcb(so);
- if (np == 0)
+ if (np == 0) {
return EINVAL;
+ }
- if (np->nd_faddr)
+ if (np->nd_faddr) {
return EISCONN;
+ }
/* Allocate memory to store the remote address */
MALLOC(np->nd_faddr, struct sockaddr_ndrv*,
- nam->sa_len, M_IFADDR, M_WAITOK);
- if (np->nd_faddr == NULL)
+ nam->sa_len, M_IFADDR, M_WAITOK);
+ if (np->nd_faddr == NULL) {
return ENOMEM;
+ }
bcopy((caddr_t) nam, (caddr_t) np->nd_faddr, nam->sa_len);
soisconnected(so);
static void
ndrv_event(struct ifnet *ifp, __unused protocol_family_t protocol,
- const struct kev_msg *event)
+ const struct kev_msg *event)
{
if (event->vendor_code == KEV_VENDOR_APPLE &&
- event->kev_class == KEV_NETWORK_CLASS &&
- event->kev_subclass == KEV_DL_SUBCLASS &&
- event->event_code == KEV_DL_IF_DETACHING) {
+ event->kev_class == KEV_NETWORK_CLASS &&
+ event->kev_subclass == KEV_DL_SUBCLASS &&
+ event->event_code == KEV_DL_IF_DETACHING) {
LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED);
lck_mtx_lock(ndrvdomain->dom_mtx);
ndrv_handle_ifp_detach(ifnet_family(ifp), ifnet_unit(ifp));
static int
ndrv_bind(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
{
- struct sockaddr_ndrv *sa = (struct sockaddr_ndrv *) nam;
+ struct sockaddr_ndrv *sa = (struct sockaddr_ndrv *) nam;
char *dname;
struct ndrv_cb *np;
struct ifnet *ifp;
- int result;
+ int result;
- if TAILQ_EMPTY(&ifnet_head)
- return(EADDRNOTAVAIL); /* Quick sanity check */
+ if (TAILQ_EMPTY(&ifnet_head)) {
+ return EADDRNOTAVAIL; /* Quick sanity check */
+ }
np = sotondrvcb(so);
- if (np == 0)
+ if (np == 0) {
return EINVAL;
+ }
- if (np->nd_laddr)
- return EINVAL; /* XXX */
-
+ if (np->nd_laddr) {
+ return EINVAL; /* XXX */
+ }
/* I think we just latch onto a copy here; the caller frees */
np->nd_laddr = _MALLOC(sizeof(struct sockaddr_ndrv), M_IFADDR, M_WAITOK);
- if (np->nd_laddr == NULL)
- return(ENOMEM);
+ if (np->nd_laddr == NULL) {
+ return ENOMEM;
+ }
bcopy((caddr_t) sa, (caddr_t) np->nd_laddr, sizeof(struct sockaddr_ndrv));
dname = (char *) sa->snd_name;
np->nd_laddr->snd_len = sizeof(struct sockaddr_ndrv);
- if (*dname == '\0')
- return(EINVAL);
+ if (*dname == '\0') {
+ return EINVAL;
+ }
#if NDRV_DEBUG
printf("NDRV bind: %x, %x, %s\n", so, np, dname);
#endif
*/
ifnet_head_lock_shared();
TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
- if (name_cmp(ifp, dname) == 0)
+ if (name_cmp(ifp, dname) == 0) {
break;
+ }
}
ifnet_head_done();
- if (ifp == NULL)
- return(EADDRNOTAVAIL);
+ if (ifp == NULL) {
+ return EADDRNOTAVAIL;
+ }
// PPP doesn't support PF_NDRV.
- if (ifnet_family(ifp) != APPLE_IF_FAM_PPP)
- {
+ if (ifnet_family(ifp) != APPLE_IF_FAM_PPP) {
/* NDRV on this interface */
- struct ifnet_attach_proto_param ndrv_proto;
+ struct ifnet_attach_proto_param ndrv_proto;
result = 0;
bzero(&ndrv_proto, sizeof(ndrv_proto));
ndrv_proto.event = ndrv_event;
return result;
}
np->nd_proto_family = PF_NDRV;
- }
- else {
+ } else {
np->nd_proto_family = 0;
}
np->nd_if = ifp;
- np->nd_family = ifnet_family(ifp);
- np->nd_unit = ifnet_unit(ifp);
+ np->nd_family = ifnet_family(ifp);
+ np->nd_unit = ifnet_unit(ifp);
- return(0);
+ return 0;
}
static int
{
struct ndrv_cb *np = sotondrvcb(so);
- if (np == 0)
+ if (np == 0) {
return EINVAL;
+ }
- if (np->nd_faddr == 0)
+ if (np->nd_faddr == 0) {
return ENOTCONN;
+ }
ndrv_do_disconnect(np);
return 0;
*/
static int
ndrv_send(struct socket *so, __unused int flags, struct mbuf *m,
- __unused struct sockaddr *addr, struct mbuf *control,
- __unused struct proc *p)
+ __unused struct sockaddr *addr, struct mbuf *control,
+ __unused struct proc *p)
{
int error;
- if (control)
+ if (control) {
return EOPNOTSUPP;
+ }
error = ndrv_output(m, so);
m = NULL;
{
struct ndrv_cb *np = sotondrvcb(so);
- if (np == 0)
+ if (np == 0) {
return EINVAL;
+ }
ndrv_do_disconnect(np);
return 0;
struct ndrv_cb *np = sotondrvcb(so);
int len;
- if (np == 0)
+ if (np == 0) {
return EINVAL;
+ }
- if (np->nd_laddr == 0)
+ if (np->nd_laddr == 0) {
return EINVAL;
+ }
len = np->nd_laddr->snd_len;
MALLOC(*nam, struct sockaddr *, len, M_SONAME, M_WAITOK);
- if (*nam == NULL)
+ if (*nam == NULL) {
return ENOMEM;
+ }
bcopy((caddr_t)np->nd_laddr, *nam,
- (unsigned)len);
+ (unsigned)len);
return 0;
}
struct ndrv_cb *np = sotondrvcb(so);
int len;
- if (np == 0)
+ if (np == 0) {
return EINVAL;
+ }
- if (np->nd_faddr == 0)
+ if (np->nd_faddr == 0) {
return ENOTCONN;
+ }
len = np->nd_faddr->snd_len;
MALLOC(*nam, struct sockaddr *, len, M_SONAME, M_WAITOK);
- if (*nam == NULL)
+ if (*nam == NULL) {
return ENOMEM;
+ }
bcopy((caddr_t)np->nd_faddr, *nam,
- (unsigned)len);
+ (unsigned)len);
return 0;
}
static int
ndrv_ctloutput(struct socket *so, struct sockopt *sopt)
{
- struct ndrv_cb *np = sotondrvcb(so);
+ struct ndrv_cb *np = sotondrvcb(so);
int error = 0;
- switch(sopt->sopt_name)
- {
- case NDRV_DELDMXSPEC: /* Delete current spec */
- /* Verify no parameter was passed */
- if (sopt->sopt_val != 0 || sopt->sopt_valsize != 0) {
- /*
- * We don't support deleting a specific demux, it's
- * all or nothing.
- */
- return EINVAL;
- }
- error = ndrv_delspec(np);
- break;
- case NDRV_SETDMXSPEC: /* Set protocol spec */
- error = ndrv_setspec(np, sopt);
- break;
- case NDRV_ADDMULTICAST:
- error = ndrv_do_add_multicast(np, sopt);
- break;
- case NDRV_DELMULTICAST:
- error = ndrv_do_remove_multicast(np, sopt);
- break;
- default:
- error = ENOTSUP;
- }
+ switch (sopt->sopt_name) {
+ case NDRV_DELDMXSPEC: /* Delete current spec */
+ /* Verify no parameter was passed */
+ if (sopt->sopt_val != 0 || sopt->sopt_valsize != 0) {
+ /*
+ * We don't support deleting a specific demux, it's
+ * all or nothing.
+ */
+ return EINVAL;
+ }
+ error = ndrv_delspec(np);
+ break;
+ case NDRV_SETDMXSPEC: /* Set protocol spec */
+ error = ndrv_setspec(np, sopt);
+ break;
+ case NDRV_ADDMULTICAST:
+ error = ndrv_do_add_multicast(np, sopt);
+ break;
+ case NDRV_DELMULTICAST:
+ error = ndrv_do_remove_multicast(np, sopt);
+ break;
+ default:
+ error = ENOTSUP;
+ }
#ifdef NDRV_DEBUG
log(LOG_WARNING, "NDRV CTLOUT: %x returns %d\n", sopt->sopt_name,
error);
#endif
- return(error);
+ return error;
}
static int
ndrv_do_detach(struct ndrv_cb *np)
{
- struct ndrv_cb* cur_np = NULL;
- struct socket *so = np->nd_socket;
- int error = 0;
- struct ifnet * ifp;
+ struct ndrv_cb* cur_np = NULL;
+ struct socket *so = np->nd_socket;
+ int error = 0;
+ struct ifnet * ifp;
#if NDRV_DEBUG
printf("NDRV detach: %x, %x\n", so, np);
#endif
- ndrv_remove_all_multicast(np);
+ ndrv_remove_all_multicast(np);
+
+ /* Remove from the linked list of control blocks */
+ LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_OWNED);
+ TAILQ_REMOVE(&ndrvl, np, nd_next);
- ifp = np->nd_if;
- /* Remove from the linked list of control blocks */
- TAILQ_REMOVE(&ndrvl, np, nd_next);
- if (ifp != NULL) {
+ ifp = np->nd_if;
+ if (ifp != NULL) {
u_int32_t proto_family = np->nd_proto_family;
if (proto_family != PF_NDRV && proto_family != 0) {
}
/* Check if this is the last socket attached to this interface */
+ LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_OWNED);
TAILQ_FOREACH(cur_np, &ndrvl, nd_next) {
if (cur_np->nd_family == np->nd_family &&
- cur_np->nd_unit == np->nd_unit) {
+ cur_np->nd_unit == np->nd_unit) {
break;
}
}
socket_lock(so, 0);
}
}
- if (np->nd_laddr != NULL) {
+ if (np->nd_laddr != NULL) {
FREE(np->nd_laddr, M_IFADDR);
np->nd_laddr = NULL;
}
#if NDRV_DEBUG
printf("NDRV disconnect: %x\n", np);
#endif
- if (np->nd_faddr)
- {
- FREE(np->nd_faddr, M_IFADDR);
+ if (np->nd_faddr) {
+ FREE(np->nd_faddr, M_IFADDR);
np->nd_faddr = 0;
}
/*
* so check for SOF_MP_SUBFLOW socket flag before detaching the PCB;
* when the socket is closed for real, SOF_MP_SUBFLOW would be cleared.
*/
- if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF))
+ if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) {
ndrv_do_detach(np);
+ }
soisdisconnected(so);
- return(0);
+ return 0;
}
/* Hackery - return a string version of a decimal number */
static void
sprint_d(u_int n, char *buf, int buflen)
-{ char dbuf[IFNAMSIZ];
- char *cp = dbuf+IFNAMSIZ-1;
+{
+ char dbuf[IFNAMSIZ];
+ char *cp = dbuf + IFNAMSIZ - 1;
- *cp = 0;
- do { buflen--;
+ *cp = 0;
+ do {
+ buflen--;
cp--;
- *cp = "0123456789"[n % 10];
- n /= 10;
- } while (n != 0 && buflen > 0);
- strlcpy(buf, cp, IFNAMSIZ-buflen);
- return;
+ *cp = "0123456789"[n % 10];
+ n /= 10;
+ } while (n != 0 && buflen > 0);
+ strlcpy(buf, cp, IFNAMSIZ - buflen);
+ return;
}
/*
* Try to compare a device name (q) with one of the funky ifnet
* device names (ifp).
*/
-static int name_cmp(struct ifnet *ifp, char *q)
-{ char *r;
+static int
+name_cmp(struct ifnet *ifp, char *q)
+{
+ char *r;
int len;
char buf[IFNAMSIZ];
len = strlen(ifnet_name(ifp));
strlcpy(r, ifnet_name(ifp), IFNAMSIZ);
r += len;
- sprint_d(ifnet_unit(ifp), r, IFNAMSIZ-(r-buf));
+ sprint_d(ifnet_unit(ifp), r, IFNAMSIZ - (r - buf));
#if NDRV_DEBUG
printf("Comparing %s, %s\n", buf, q);
#endif
- return(strncmp(buf, q, IFNAMSIZ));
+ return strncmp(buf, q, IFNAMSIZ);
}
#if 0
void
ndrv_flushq(struct ifqueue *q)
{
- struct mbuf *m;
- for (;;)
- {
+ struct mbuf *m;
+ for (;;) {
IF_DEQUEUE(q, m);
- if (m == NULL)
+ if (m == NULL) {
break;
+ }
IF_DROP(q);
- if (m)
+ if (m) {
m_freem(m);
+ }
}
}
#endif
int
ndrv_setspec(struct ndrv_cb *np, struct sockopt *sopt)
{
- struct ifnet_attach_proto_param proto_param;
- struct ndrv_protocol_desc ndrvSpec;
- struct ndrv_demux_desc* ndrvDemux = NULL;
- int error = 0;
- struct socket * so = np->nd_socket;
- user_addr_t user_addr;
+ struct ifnet_attach_proto_param proto_param;
+ struct ndrv_protocol_desc ndrvSpec;
+ struct ndrv_demux_desc* ndrvDemux = NULL;
+ int error = 0;
+ struct socket * so = np->nd_socket;
+ user_addr_t user_addr;
/* Sanity checking */
- if (np->nd_proto_family != PF_NDRV)
+ if (np->nd_proto_family != PF_NDRV) {
return EBUSY;
- if (np->nd_if == NULL)
+ }
+ if (np->nd_if == NULL) {
return EINVAL;
+ }
/* Copy the ndrvSpec */
if (proc_is64bit(sopt->sopt_p)) {
- struct ndrv_protocol_desc64 ndrvSpec64;
+ struct ndrv_protocol_desc64 ndrvSpec64;
- if (sopt->sopt_valsize != sizeof(ndrvSpec64))
+ if (sopt->sopt_valsize != sizeof(ndrvSpec64)) {
return EINVAL;
+ }
error = sooptcopyin(sopt, &ndrvSpec64, sizeof(ndrvSpec64), sizeof(ndrvSpec64));
- if (error != 0)
+ if (error != 0) {
return error;
+ }
ndrvSpec.version = ndrvSpec64.version;
ndrvSpec.protocol_family = ndrvSpec64.protocol_family;
ndrvSpec.demux_count = ndrvSpec64.demux_count;
user_addr = ndrvSpec64.demux_list;
- }
- else {
- struct ndrv_protocol_desc32 ndrvSpec32;
+ } else {
+ struct ndrv_protocol_desc32 ndrvSpec32;
- if (sopt->sopt_valsize != sizeof(ndrvSpec32))
+ if (sopt->sopt_valsize != sizeof(ndrvSpec32)) {
return EINVAL;
+ }
error = sooptcopyin(sopt, &ndrvSpec32, sizeof(ndrvSpec32), sizeof(ndrvSpec32));
- if (error != 0)
+ if (error != 0) {
return error;
+ }
ndrvSpec.version = ndrvSpec32.version;
ndrvSpec.protocol_family = ndrvSpec32.protocol_family;
}
/* Verify the parameter */
- if (ndrvSpec.version > NDRV_PROTOCOL_DESC_VERS)
+ if (ndrvSpec.version > NDRV_PROTOCOL_DESC_VERS) {
return ENOTSUP; // version is too new!
- else if (ndrvSpec.version < 1)
+ } else if (ndrvSpec.version < 1) {
return EINVAL; // version is not valid
- else if (ndrvSpec.demux_count > NDRV_PROTODEMUX_COUNT || ndrvSpec.demux_count == 0)
+ } else if (ndrvSpec.demux_count > NDRV_PROTODEMUX_COUNT || ndrvSpec.demux_count == 0) {
return EINVAL; // demux_count is not valid
-
+ }
bzero(&proto_param, sizeof(proto_param));
proto_param.demux_count = ndrvSpec.demux_count;
/* Allocate storage for demux array */
MALLOC(ndrvDemux, struct ndrv_demux_desc*, proto_param.demux_count *
- sizeof(struct ndrv_demux_desc), M_TEMP, M_WAITOK);
- if (ndrvDemux == NULL)
+ sizeof(struct ndrv_demux_desc), M_TEMP, M_WAITOK);
+ if (ndrvDemux == NULL) {
return ENOMEM;
+ }
/* Allocate enough ifnet_demux_descs */
MALLOC(proto_param.demux_array, struct ifnet_demux_desc*,
- sizeof(*proto_param.demux_array) * ndrvSpec.demux_count,
- M_TEMP, M_WAITOK);
- if (proto_param.demux_array == NULL)
+ sizeof(*proto_param.demux_array) * ndrvSpec.demux_count,
+ M_TEMP, M_WAITOK);
+ if (proto_param.demux_array == NULL) {
error = ENOMEM;
+ }
- if (error == 0)
- {
+ if (error == 0) {
/* Copy the ndrv demux array from userland */
error = copyin(user_addr, ndrvDemux,
- ndrvSpec.demux_count * sizeof(struct ndrv_demux_desc));
+ ndrvSpec.demux_count * sizeof(struct ndrv_demux_desc));
ndrvSpec.demux_list = ndrvDemux;
}
- if (error == 0)
- {
+ if (error == 0) {
/* At this point, we've at least got enough bytes to start looking around */
- u_int32_t demuxOn = 0;
+ u_int32_t demuxOn = 0;
proto_param.demux_count = ndrvSpec.demux_count;
proto_param.input = ndrv_input;
proto_param.event = ndrv_event;
- for (demuxOn = 0; demuxOn < ndrvSpec.demux_count; demuxOn++)
- {
+ for (demuxOn = 0; demuxOn < ndrvSpec.demux_count; demuxOn++) {
/* Convert an ndrv_demux_desc to a ifnet_demux_desc */
error = ndrv_to_ifnet_demux(&ndrvSpec.demux_list[demuxOn],
- &proto_param.demux_array[demuxOn]);
- if (error)
+ &proto_param.demux_array[demuxOn]);
+ if (error) {
break;
+ }
}
}
- if (error == 0)
- {
+ if (error == 0) {
/* We've got all our ducks lined up...lets attach! */
socket_unlock(so, 0);
error = ifnet_attach_protocol(np->nd_if, ndrvSpec.protocol_family,
- &proto_param);
+ &proto_param);
socket_lock(so, 0);
- if (error == 0)
+ if (error == 0) {
np->nd_proto_family = ndrvSpec.protocol_family;
+ }
}
/* Free any memory we've allocated */
- if (proto_param.demux_array)
+ if (proto_param.demux_array) {
FREE(proto_param.demux_array, M_TEMP);
- if (ndrvDemux)
+ }
+ if (ndrvDemux) {
FREE(ndrvDemux, M_TEMP);
+ }
return error;
}
int
ndrv_to_ifnet_demux(struct ndrv_demux_desc* ndrv, struct ifnet_demux_desc* ifdemux)
{
- bzero(ifdemux, sizeof(*ifdemux));
+ bzero(ifdemux, sizeof(*ifdemux));
- if (ndrv->type < DLIL_DESC_ETYPE2)
- {
- /* using old "type", not supported */
- return ENOTSUP;
- }
+ if (ndrv->type < DLIL_DESC_ETYPE2) {
+ /* using old "type", not supported */
+ return ENOTSUP;
+ }
- if (ndrv->length > 28)
- {
- return EINVAL;
- }
+ if (ndrv->length > 28) {
+ return EINVAL;
+ }
- ifdemux->type = ndrv->type;
- ifdemux->data = ndrv->data.other;
- ifdemux->datalen = ndrv->length;
+ ifdemux->type = ndrv->type;
+ ifdemux->data = ndrv->data.other;
+ ifdemux->datalen = ndrv->length;
- return 0;
+ return 0;
}
int
ndrv_delspec(struct ndrv_cb *np)
{
- int result = 0;
+ int result = 0;
- if (np->nd_proto_family == PF_NDRV ||
- np->nd_proto_family == 0)
- return EINVAL;
+ if (np->nd_proto_family == PF_NDRV ||
+ np->nd_proto_family == 0) {
+ return EINVAL;
+ }
- /* Detach the protocol */
- result = ifnet_detach_protocol(np->nd_if, np->nd_proto_family);
- np->nd_proto_family = PF_NDRV;
+ /* Detach the protocol */
+ result = ifnet_detach_protocol(np->nd_if, np->nd_proto_family);
+ np->nd_proto_family = PF_NDRV;
return result;
}
struct ndrv_cb *
ndrv_find_inbound(struct ifnet *ifp, u_int32_t protocol)
{
- struct ndrv_cb* np;
+ struct ndrv_cb* np;
- if (protocol == PF_NDRV) return NULL;
+ LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_OWNED);
- TAILQ_FOREACH(np, &ndrvl, nd_next) {
- if (np->nd_proto_family == protocol &&
- np->nd_if == ifp) {
- return np;
- }
- }
+ if (protocol == PF_NDRV) {
+ return NULL;
+ }
+
+ TAILQ_FOREACH(np, &ndrvl, nd_next) {
+ if (np->nd_proto_family == protocol &&
+ np->nd_if == ifp) {
+ return np;
+ }
+ }
return NULL;
}
static void
ndrv_handle_ifp_detach(u_int32_t family, short unit)
{
- struct ndrv_cb* np;
- struct ifnet *ifp = NULL;
- struct socket *so;
-
- /* Find all sockets using this interface. */
- TAILQ_FOREACH(np, &ndrvl, nd_next) {
- if (np->nd_family == family &&
- np->nd_unit == unit)
- {
- /* This cb is using the detaching interface, but not for long. */
- /* Let the protocol go */
- ifp = np->nd_if;
- if (np->nd_proto_family != 0)
- ndrv_delspec(np);
-
- /* Delete the multicasts first */
- ndrv_remove_all_multicast(np);
-
- /* Disavow all knowledge of the ifp */
- np->nd_if = NULL;
- np->nd_unit = 0;
- np->nd_family = 0;
-
- so = np->nd_socket;
- /* Make sure sending returns an error */
- LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_OWNED);
- socantsendmore(so);
- socantrcvmore(so);
- }
- }
-
- /* Unregister our protocol */
- if (ifp) {
- ifnet_detach_protocol(ifp, PF_NDRV);
- }
+ struct ndrv_cb* np;
+ struct ifnet *ifp = NULL;
+ struct socket *so;
+
+ /* Find all sockets using this interface. */
+ TAILQ_FOREACH(np, &ndrvl, nd_next) {
+ if (np->nd_family == family &&
+ np->nd_unit == unit) {
+ /* This cb is using the detaching interface, but not for long. */
+ /* Let the protocol go */
+ ifp = np->nd_if;
+ if (np->nd_proto_family != 0) {
+ ndrv_delspec(np);
+ }
+
+ /* Delete the multicasts first */
+ ndrv_remove_all_multicast(np);
+
+ /* Disavow all knowledge of the ifp */
+ np->nd_if = NULL;
+ np->nd_unit = 0;
+ np->nd_family = 0;
+
+ so = np->nd_socket;
+ /* Make sure sending returns an error */
+ LCK_MTX_ASSERT(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_OWNED);
+ socantsendmore(so);
+ socantrcvmore(so);
+ }
+ }
+
+ /* Unregister our protocol */
+ if (ifp) {
+ ifnet_detach_protocol(ifp, PF_NDRV);
+ }
}
static int
ndrv_do_add_multicast(struct ndrv_cb *np, struct sockopt *sopt)
{
- struct ndrv_multiaddr* ndrv_multi;
- int result;
-
- if (sopt->sopt_val == 0 || sopt->sopt_valsize < 2 ||
- sopt->sopt_level != SOL_NDRVPROTO || sopt->sopt_valsize > SOCK_MAXADDRLEN)
- return EINVAL;
- if (np->nd_if == NULL)
- return ENXIO;
- if (!(np->nd_dlist_cnt < ndrv_multi_max_count))
+ struct ndrv_multiaddr* ndrv_multi;
+ int result;
+
+ if (sopt->sopt_val == 0 || sopt->sopt_valsize < 2 ||
+ sopt->sopt_level != SOL_NDRVPROTO || sopt->sopt_valsize > SOCK_MAXADDRLEN) {
+ return EINVAL;
+ }
+ if (np->nd_if == NULL) {
+ return ENXIO;
+ }
+ if (!(np->nd_dlist_cnt < ndrv_multi_max_count)) {
return EPERM;
+ }
+
+ // Allocate storage
+ MALLOC(ndrv_multi, struct ndrv_multiaddr*, sizeof(struct ndrv_multiaddr) -
+ sizeof(struct sockaddr) + sopt->sopt_valsize, M_IFADDR, M_WAITOK);
+ if (ndrv_multi == NULL) {
+ return ENOMEM;
+ }
+
+ // Copy in the address
+ result = copyin(sopt->sopt_val, &ndrv_multi->addr, sopt->sopt_valsize);
+
+ // Validate the sockaddr
+ if (result == 0 && sopt->sopt_valsize != ndrv_multi->addr.sa_len) {
+ result = EINVAL;
+ }
+
+ if (result == 0 && ndrv_have_multicast(np, &ndrv_multi->addr)) {
+ result = EEXIST;
+ }
- // Allocate storage
- MALLOC(ndrv_multi, struct ndrv_multiaddr*, sizeof(struct ndrv_multiaddr) -
- sizeof(struct sockaddr) + sopt->sopt_valsize, M_IFADDR, M_WAITOK);
- if (ndrv_multi == NULL)
- return ENOMEM;
-
- // Copy in the address
- result = copyin(sopt->sopt_val, &ndrv_multi->addr, sopt->sopt_valsize);
-
- // Validate the sockaddr
- if (result == 0 && sopt->sopt_valsize != ndrv_multi->addr.sa_len)
- result = EINVAL;
-
- if (result == 0 && ndrv_have_multicast(np, &ndrv_multi->addr))
- result = EEXIST;
-
- if (result == 0)
- {
- // Try adding the multicast
- result = ifnet_add_multicast(np->nd_if, &ndrv_multi->addr,
- &ndrv_multi->ifma);
- }
-
- if (result == 0)
- {
- // Add to our linked list
- ndrv_multi->next = np->nd_multiaddrs;
- np->nd_multiaddrs = ndrv_multi;
+ if (result == 0) {
+ // Try adding the multicast
+ result = ifnet_add_multicast(np->nd_if, &ndrv_multi->addr,
+ &ndrv_multi->ifma);
+ }
+
+ if (result == 0) {
+ // Add to our linked list
+ ndrv_multi->next = np->nd_multiaddrs;
+ np->nd_multiaddrs = ndrv_multi;
np->nd_dlist_cnt++;
- }
- else
- {
- // Free up the memory, something went wrong
- FREE(ndrv_multi, M_IFADDR);
- }
-
- return result;
+ } else {
+ // Free up the memory, something went wrong
+ FREE(ndrv_multi, M_IFADDR);
+ }
+
+ return result;
}
static int
ndrv_do_remove_multicast(struct ndrv_cb *np, struct sockopt *sopt)
{
- struct sockaddr* multi_addr;
- struct ndrv_multiaddr* ndrv_entry = NULL;
- int result;
-
- if (sopt->sopt_val == 0 || sopt->sopt_valsize < 2 ||
- sopt->sopt_level != SOL_NDRVPROTO)
- return EINVAL;
- if (np->nd_if == NULL || np->nd_dlist_cnt == 0)
- return ENXIO;
-
- // Allocate storage
- MALLOC(multi_addr, struct sockaddr*, sopt->sopt_valsize,
- M_TEMP, M_WAITOK);
- if (multi_addr == NULL)
- return ENOMEM;
-
- // Copy in the address
- result = copyin(sopt->sopt_val, multi_addr, sopt->sopt_valsize);
-
- // Validate the sockaddr
- if (result == 0 && sopt->sopt_valsize != multi_addr->sa_len)
- result = EINVAL;
-
- if (result == 0)
- {
- /* Find the old entry */
- ndrv_entry = ndrv_have_multicast(np, multi_addr);
-
- if (ndrv_entry == NULL)
- result = ENOENT;
- }
-
- if (result == 0)
- {
- // Try deleting the multicast
- result = ifnet_remove_multicast(ndrv_entry->ifma);
- }
-
- if (result == 0)
- {
- // Remove from our linked list
- struct ndrv_multiaddr* cur = np->nd_multiaddrs;
-
- ifmaddr_release(ndrv_entry->ifma);
-
- if (cur == ndrv_entry)
- {
- np->nd_multiaddrs = cur->next;
- }
- else
- {
- for (cur = cur->next; cur != NULL; cur = cur->next)
- {
- if (cur->next == ndrv_entry)
- {
- cur->next = cur->next->next;
- break;
- }
- }
- }
+ struct sockaddr* multi_addr;
+ struct ndrv_multiaddr* ndrv_entry = NULL;
+ int result;
+
+ if (sopt->sopt_val == 0 || sopt->sopt_valsize < 2 ||
+ sopt->sopt_level != SOL_NDRVPROTO) {
+ return EINVAL;
+ }
+ if (np->nd_if == NULL || np->nd_dlist_cnt == 0) {
+ return ENXIO;
+ }
+
+ // Allocate storage
+ MALLOC(multi_addr, struct sockaddr*, sopt->sopt_valsize,
+ M_TEMP, M_WAITOK);
+ if (multi_addr == NULL) {
+ return ENOMEM;
+ }
+
+ // Copy in the address
+ result = copyin(sopt->sopt_val, multi_addr, sopt->sopt_valsize);
+
+ // Validate the sockaddr
+ if (result == 0 && sopt->sopt_valsize != multi_addr->sa_len) {
+ result = EINVAL;
+ }
+
+ if (result == 0) {
+ /* Find the old entry */
+ ndrv_entry = ndrv_have_multicast(np, multi_addr);
+
+ if (ndrv_entry == NULL) {
+ result = ENOENT;
+ }
+ }
+
+ if (result == 0) {
+ // Try deleting the multicast
+ result = ifnet_remove_multicast(ndrv_entry->ifma);
+ }
+
+ if (result == 0) {
+ // Remove from our linked list
+ struct ndrv_multiaddr* cur = np->nd_multiaddrs;
+
+ ifmaddr_release(ndrv_entry->ifma);
+
+ if (cur == ndrv_entry) {
+ np->nd_multiaddrs = cur->next;
+ } else {
+ for (cur = cur->next; cur != NULL; cur = cur->next) {
+ if (cur->next == ndrv_entry) {
+ cur->next = cur->next->next;
+ break;
+ }
+ }
+ }
np->nd_dlist_cnt--;
- // Free the memory
- FREE(ndrv_entry, M_IFADDR);
- }
- FREE(multi_addr, M_TEMP);
+ // Free the memory
+ FREE(ndrv_entry, M_IFADDR);
+ }
+ FREE(multi_addr, M_TEMP);
- return result;
+ return result;
}
static struct ndrv_multiaddr*
ndrv_have_multicast(struct ndrv_cb *np, struct sockaddr* inAddr)
{
- struct ndrv_multiaddr* cur;
- for (cur = np->nd_multiaddrs; cur != NULL; cur = cur->next)
- {
-
- if ((inAddr->sa_len == cur->addr.sa_len) &&
- (bcmp(&cur->addr, inAddr, inAddr->sa_len) == 0))
- {
- // Found a match
- return cur;
- }
- }
-
- return NULL;
+ struct ndrv_multiaddr* cur;
+ for (cur = np->nd_multiaddrs; cur != NULL; cur = cur->next) {
+ if ((inAddr->sa_len == cur->addr.sa_len) &&
+ (bcmp(&cur->addr, inAddr, inAddr->sa_len) == 0)) {
+ // Found a match
+ return cur;
+ }
+ }
+
+ return NULL;
}
static void
ndrv_remove_all_multicast(struct ndrv_cb* np)
{
- struct ndrv_multiaddr* cur;
-
- if (np->nd_if != NULL)
- {
- while (np->nd_multiaddrs != NULL)
- {
- cur = np->nd_multiaddrs;
- np->nd_multiaddrs = cur->next;
-
- ifnet_remove_multicast(cur->ifma);
- ifmaddr_release(cur->ifma);
- FREE(cur, M_IFADDR);
- }
- }
+ struct ndrv_multiaddr* cur;
+
+ if (np->nd_if != NULL) {
+ while (np->nd_multiaddrs != NULL) {
+ cur = np->nd_multiaddrs;
+ np->nd_multiaddrs = cur->next;
+
+ ifnet_remove_multicast(cur->ifma);
+ ifmaddr_release(cur->ifma);
+ FREE(cur, M_IFADDR);
+ }
+ }
}
static struct pr_usrreqs ndrv_usrreqs = {
- .pru_abort = ndrv_abort,
- .pru_attach = ndrv_attach,
- .pru_bind = ndrv_bind,
- .pru_connect = ndrv_connect,
- .pru_detach = ndrv_detach,
- .pru_disconnect = ndrv_disconnect,
- .pru_peeraddr = ndrv_peeraddr,
- .pru_send = ndrv_send,
- .pru_shutdown = ndrv_shutdown,
- .pru_sockaddr = ndrv_sockaddr,
- .pru_sosend = sosend,
- .pru_soreceive = soreceive,
+ .pru_abort = ndrv_abort,
+ .pru_attach = ndrv_attach,
+ .pru_bind = ndrv_bind,
+ .pru_connect = ndrv_connect,
+ .pru_detach = ndrv_detach,
+ .pru_disconnect = ndrv_disconnect,
+ .pru_peeraddr = ndrv_peeraddr,
+ .pru_send = ndrv_send,
+ .pru_shutdown = ndrv_shutdown,
+ .pru_sockaddr = ndrv_sockaddr,
+ .pru_sosend = sosend,
+ .pru_soreceive = soreceive,
};
static struct protosw ndrvsw[] = {
-{
- .pr_type = SOCK_RAW,
- .pr_protocol = NDRVPROTO_NDRV,
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_output = ndrv_output,
- .pr_ctloutput = ndrv_ctloutput,
- .pr_usrreqs = &ndrv_usrreqs,
-}
+ {
+ .pr_type = SOCK_RAW,
+ .pr_protocol = NDRVPROTO_NDRV,
+ .pr_flags = PR_ATOMIC | PR_ADDR,
+ .pr_output = ndrv_output,
+ .pr_ctloutput = ndrv_ctloutput,
+ .pr_usrreqs = &ndrv_usrreqs,
+ }
};
-static int ndrv_proto_count = (sizeof (ndrvsw) / sizeof (struct protosw));
+static int ndrv_proto_count = (sizeof(ndrvsw) / sizeof(struct protosw));
struct domain ndrvdomain_s = {
- .dom_family = PF_NDRV,
- .dom_name = "NetDriver",
- .dom_init = ndrv_dominit,
+ .dom_family = PF_NDRV,
+ .dom_name = "NetDriver",
+ .dom_init = ndrv_dominit,
};
static void
ndrvdomain = dp;
- for (i = 0, pr = &ndrvsw[0]; i < ndrv_proto_count; i++, pr++)
+ for (i = 0, pr = &ndrvsw[0]; i < ndrv_proto_count; i++, pr++) {
net_add_proto(pr, dp, 1);
+ }
}
static int necp_addr_compare(struct sockaddr *sa1, struct sockaddr *sa2, int check_port);
static bool necp_buffer_compare_with_bit_prefix(u_int8_t *p1, u_int8_t *p2, u_int32_t bits);
static bool necp_addr_is_empty(struct sockaddr *addr);
-static bool necp_is_loopback(struct sockaddr *local_addr, struct sockaddr *remote_addr, struct inpcb *inp, struct mbuf *packet);
+static bool necp_is_loopback(struct sockaddr *local_addr, struct sockaddr *remote_addr, struct inpcb *inp, struct mbuf *packet, u_int32_t bound_interface_index);
static bool necp_is_intcoproc(struct inpcb *inp, struct mbuf *packet);
struct necp_uuid_id_mapping {
offset += sizeof(u_int8_t) + sizeof(u_int32_t) + length;
}
+ // Check for loopback exception
+ if (necp_pass_loopback > 0 && necp_is_loopback(&local_addr.sa, &remote_addr.sa, NULL, NULL, bound_interface_index)) {
+ returned_result->policy_id = NECP_KERNEL_POLICY_ID_NO_MATCH;
+ returned_result->routing_result = NECP_KERNEL_POLICY_RESULT_PASS;
+ returned_result->routed_interface_index = lo_ifp->if_index;
+ *flags |= (NECP_CLIENT_RESULT_FLAG_IS_LOCAL | NECP_CLIENT_RESULT_FLAG_IS_DIRECT);
+ return 0;
+ }
+
// Lock
lck_rw_lock_shared(&necp_kernel_policy_lock);
static inline bool
necp_socket_bypass(struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, struct inpcb *inp)
{
- if (necp_pass_loopback > 0 && necp_is_loopback(override_local_addr, override_remote_addr, inp, NULL)) {
+ if (necp_pass_loopback > 0 && necp_is_loopback(override_local_addr, override_remote_addr, inp, NULL, IFSCOPE_NONE)) {
return true;
} else if (necp_is_intcoproc(inp, NULL)) {
return true;
static inline bool
necp_output_bypass(struct mbuf *packet)
{
- if (necp_pass_loopback > 0 && necp_is_loopback(NULL, NULL, NULL, packet)) {
+ if (necp_pass_loopback > 0 && necp_is_loopback(NULL, NULL, NULL, packet, IFSCOPE_NONE)) {
return true;
}
if (necp_pass_keepalives > 0 && necp_get_is_keepalive_from_packet(packet)) {
}
static bool
-necp_is_loopback(struct sockaddr *local_addr, struct sockaddr *remote_addr, struct inpcb *inp, struct mbuf *packet)
+necp_is_loopback(struct sockaddr *local_addr, struct sockaddr *remote_addr, struct inpcb *inp, struct mbuf *packet, u_int32_t bound_interface_index)
{
// Note: This function only checks for the loopback addresses.
// In the future, we may want to expand to also allow any traffic
return TRUE;
}
}
+ } else if (bound_interface_index != IFSCOPE_NONE && lo_ifp->if_index == bound_interface_index) {
+ return TRUE;
}
if (packet != NULL) {
&flow->local_addr, &flow->remote_addr, NULL, NULL,
NULL, ignore_address, true);
+ // Check for blocking agents
+ for (int i = 0; i < NECP_MAX_NETAGENTS; i++) {
+ if (uuid_is_null(result.netagents[i])) {
+ // Passed end of valid agents
+ break;
+ }
+
+ u_int32_t flags = netagent_get_flags(result.netagents[i]);
+ if ((flags & NETAGENT_FLAG_REGISTERED) &&
+ !(flags & NETAGENT_FLAG_VOLUNTARY) &&
+ !(flags & NETAGENT_FLAG_ACTIVE) &&
+ !(flags & NETAGENT_FLAG_SPECIFIC_USE_ONLY)) {
+ // A required agent is not active, cause the flow to be marked non-viable
+ return false;
+ }
+ }
+
return error == 0 &&
result.routed_interface_index != IFSCOPE_NONE &&
result.routing_result != NECP_KERNEL_POLICY_RESULT_DROP;
/* KEV_MPTCP_SUBCLASS event codes */
#define KEV_MPTCP_CELLUSE 1
+#define KEV_IPSEC_SUBCLASS 13 /* IPsec event subclass */
+#define KEV_IPSEC_WAKE_PACKET 1 /* IPsec wake packet available, the
+ * first packet processed after a wake event */
+
#endif /* PRIVATE */
#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
#endif /* _NET_NETKEV_H_ */
/*
- * Copyright (c) 2007-2016 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2019 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
"(%d/%d)\n", pfi_buffer_cnt, PFI_BUFFER_MAX);
return;
}
- memcpy(pfi_buffer, p, pfi_buffer_cnt * sizeof(*pfi_buffer));
+ memcpy(p, pfi_buffer, pfi_buffer_max * sizeof(*pfi_buffer));
/* no need to zero buffer */
_FREE(pfi_buffer, PFI_MTYPE);
pfi_buffer = p;
#define satosin(sa) SIN(sa)
#define sintosa(sin) ((struct sockaddr *)(void *)(sin))
#define SINIFSCOPE(s) ((struct sockaddr_inifscope *)(void *)(s))
+
+#define IPTOS_UNSPEC (-1) /* TOS byte not set */
+#define IPTOS_MASK 0xFF /* TOS byte mask */
#endif /* BSD_KERNEL_PRIVATE */
#ifdef KERNEL_PRIVATE
#endif
extern int get_pcblist_n(short, struct sysctl_req *, struct inpcbinfo *);
-#define INPCB_GET_PORTS_USED_WILDCARDOK 0x01
-#define INPCB_GET_PORTS_USED_NOWAKEUPOK 0x02
-#define INPCB_GET_PORTS_USED_RECVANYIFONLY 0x04
-#define INPCB_GET_PORTS_USED_EXTBGIDLEONLY 0x08
-#define INPCB_GET_PORTS_USED_ACTIVEONLY 0x10
extern void inpcb_get_ports_used(u_int32_t, int, u_int32_t, bitstr_t *,
struct inpcbinfo *);
bool iswildcard, wildcardok, nowakeok;
bool recvanyifonly, extbgidleok;
bool activeonly;
+ bool anytcpstateok;
- wildcardok = ((flags & INPCB_GET_PORTS_USED_WILDCARDOK) != 0);
- nowakeok = ((flags & INPCB_GET_PORTS_USED_NOWAKEUPOK) != 0);
- recvanyifonly = ((flags & INPCB_GET_PORTS_USED_RECVANYIFONLY) != 0);
- extbgidleok = ((flags & INPCB_GET_PORTS_USED_EXTBGIDLEONLY) != 0);
- activeonly = ((flags & INPCB_GET_PORTS_USED_ACTIVEONLY) != 0);
+ wildcardok = ((flags & IFNET_GET_LOCAL_PORTS_WILDCARDOK) != 0);
+ nowakeok = ((flags & IFNET_GET_LOCAL_PORTS_NOWAKEUPOK) != 0);
+ recvanyifonly = ((flags & IFNET_GET_LOCAL_PORTS_RECVANYIFONLY) != 0);
+ extbgidleok = ((flags & IFNET_GET_LOCAL_PORTS_EXTBGIDLEONLY) != 0);
+ activeonly = ((flags & IFNET_GET_LOCAL_PORTS_ACTIVEONLY) != 0);
+ anytcpstateok = ((flags & IFNET_GET_LOCAL_PORTS_ANYTCPSTATEOK) != 0);
lck_rw_lock_shared(pcbinfo->ipi_lock);
gencnt = pcbinfo->ipi_gencnt;
for (inp = LIST_FIRST(pcbinfo->ipi_listhead); inp;
inp = LIST_NEXT(inp, inp_list)) {
- uint16_t port;
-
if (inp->inp_gencnt > gencnt ||
inp->inp_state == INPCB_STATE_DEAD ||
inp->inp_wantcnt == WNT_STOPUSING) {
continue;
}
- if ((so = inp->inp_socket) == NULL ||
- (so->so_state & SS_DEFUNCT) ||
- (so->so_state & SS_ISDISCONNECTED)) {
+ if ((so = inp->inp_socket) == NULL || inp->inp_lport == 0) {
+ continue;
+ }
+
+ /*
+ * ANYTCPSTATEOK means incoming packets cannot be filtered
+ * reception so cast a wide net of possibilities
+ */
+ if (!anytcpstateok &&
+ ((so->so_state & SS_DEFUNCT) ||
+ (so->so_state & SS_ISDISCONNECTED))) {
continue;
}
switch (tp->t_state) {
case TCPS_CLOSED:
+ if (anytcpstateok && inp->inp_fport != 0) {
+ /*
+ * A foreign port means we had a 4 tuple at
+ * least a connection attempt so packets
+ * may be received for the 4 tuple after the
+ * connection is gone
+ */
+ break;
+ }
continue;
/* NOT REACHED */
case TCPS_LISTEN:
case TCPS_FIN_WAIT_2:
/*
* In the closing states, the connection
- * is not idle when there is outgoing
+ * is active when there is outgoing
* data having to be acknowledged
*/
- if (activeonly && so->so_snd.sb_cc == 0) {
+ if (!anytcpstateok &&
+ (activeonly && so->so_snd.sb_cc == 0)) {
continue;
}
break;
case TCPS_TIME_WAIT:
+ if (anytcpstateok) {
+ /*
+ * Packets may still be received for the 4 tuple
+ * after the connection is gone
+ */
+ break;
+ }
continue;
/* NOT REACHED */
}
}
- /*
- * Final safeguard to exclude unspecified local port
- */
- port = ntohs(inp->inp_lport);
- if (port == 0) {
- continue;
- }
- bitstr_set(bitfield, port);
+
+ bitstr_set(bitfield, ntohs(inp->inp_lport));
if_ports_used_add_inpcb(ifindex, inp);
}
return sotc;
}
+__private_extern__ int
+so_tos_from_control(struct mbuf *control)
+{
+ struct cmsghdr *cm;
+ int tos = IPTOS_UNSPEC;
+
+ for (cm = M_FIRST_CMSGHDR(control);
+ is_cmsg_valid(control, cm);
+ cm = M_NXT_CMSGHDR(control, cm)) {
+ if (cm->cmsg_len != CMSG_LEN(sizeof(int))) {
+ continue;
+ }
+
+ if ((cm->cmsg_level == IPPROTO_IP &&
+ cm->cmsg_type == IP_TOS) ||
+ (cm->cmsg_level == IPPROTO_IPV6 &&
+ cm->cmsg_type == IPV6_TCLASS)) {
+ tos = *(int *)(void *)CMSG_DATA(cm) & IPTOS_MASK;
+ /* The first valid option wins */
+ break;
+ }
+ }
+
+ return tos;
+}
+
__private_extern__ void
so_recv_data_stat(struct socket *so, struct mbuf *m, size_t off)
{
ovbcopy((caddr_t)(&cp[IPOPT_OFFSET + 1] +
sizeof(struct in_addr)),
(caddr_t)&cp[IPOPT_OFFSET + 1],
- (unsigned)cnt + sizeof(struct in_addr));
+ (unsigned)cnt - (IPOPT_MINOFF - 1));
break;
}
}
goto out;
}
+ if (IFNET_IS_COMPANION_LINK(ifp)) {
+ goto out;
+ }
+
if (IFNET_IS_EXPENSIVE(ifp) &&
(mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE)) {
goto out;
static void mptcp_send_dfin(struct socket *so);
static void mptcp_set_cellicon(struct mptses *mpte, struct mptsub *mpts);
-static void mptcp_unset_cellicon(struct mptses *mpte, struct mptsub *mpts, long val);
static int mptcp_freeq(struct mptcb *mp_tp);
/*
static symptoms_advisory_t mptcp_advisory;
uint32_t mptcp_cellicon_refcount = 0;
-#define MPTCP_CELLICON_TOGGLE_RATE (5 * TCP_RETRANSHZ) /* Only toggle every 5 seconds */
/*
* XXX The order of the event handlers below is really
static boolean_t
mptcp_subflow_disconnecting(struct mptsub *mpts)
{
- /* Split out in if-statements for readability. Compile should
- * optimize that.
- */
if (mpts->mpts_socket->so_state & SS_ISDISCONNECTED) {
return true;
}
void
mptcp_subflow_disconnect(struct mptses *mpte, struct mptsub *mpts)
{
- struct socket *so;
+ struct socket *so, *mp_so;
struct mptcb *mp_tp;
int send_dfin = 0;
- socket_lock_assert_owned(mptetoso(mpte));
+ so = mpts->mpts_socket;
+ mp_tp = mpte->mpte_mptcb;
+ mp_so = mptetoso(mpte);
+
+ socket_lock_assert_owned(mp_so);
if (mpts->mpts_flags & (MPTSF_DISCONNECTING | MPTSF_DISCONNECTED)) {
return;
mpts->mpts_flags |= MPTSF_DISCONNECTING;
- so = mpts->mpts_socket;
- mp_tp = mpte->mpte_mptcb;
if (mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) {
send_dfin = 1;
}
if (send_dfin) {
mptcp_send_dfin(so);
}
- (void) soshutdownlock(so, SHUT_RD);
- (void) soshutdownlock(so, SHUT_WR);
- (void) sodisconnectlocked(so);
+
+ if (mp_so->so_flags & SOF_DEFUNCT) {
+ errno_t ret;
+
+ ret = sosetdefunct(NULL, so, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL, TRUE);
+ if (ret == 0) {
+ ret = sodefunct(NULL, so, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
+
+ if (ret != 0) {
+ os_log_error(mptcp_log_handle, "%s - %lx: sodefunct failed with %d\n",
+ __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), ret);
+ }
+ } else {
+ os_log_error(mptcp_log_handle, "%s - %lx: sosetdefunct failed with %d\n",
+ __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), ret);
+ }
+ } else {
+ (void) soshutdownlock(so, SHUT_RD);
+ (void) soshutdownlock(so, SHUT_WR);
+ (void) sodisconnectlocked(so);
+ }
}
+
/*
* Generate a disconnect event for this subflow socket, in case
* the lower layer doesn't do it; this is needed because the
static void
mptcp_set_cellicon(struct mptses *mpte, struct mptsub *mpts)
{
+ struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
int error;
/* First-party apps (Siri) don't flip the cellicon */
return;
}
+ /* Fallen back connections are not triggering the cellicon */
+ if (mpte->mpte_mptcb->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
+ return;
+ }
+
/* Remember the last time we set the cellicon. Needed for debouncing */
mpte->mpte_last_cellicon_set = tcp_now;
+ tp->t_timer[TCPT_CELLICON] = OFFSET_FROM_START(tp, MPTCP_CELLICON_TOGGLE_RATE);
+ tcp_sched_timers(tp);
+
if (mpts->mpts_flags & MPTSF_CELLICON_SET &&
mpte->mpte_cellicon_increments != 0) {
if (mptcp_cellicon_refcount == 0) {
return true;
}
-static void
-mptcp_unset_cellicon(struct mptses *mpte, struct mptsub *mpts, long val)
+void
+mptcp_unset_cellicon(struct mptses *mpte, struct mptsub *mpts, uint32_t val)
{
/* First-party apps (Siri) don't flip the cellicon */
if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
mpts->mpts_flags &= ~MPTSF_CELLICON_SET;
}
- mpte->mpte_cellicon_increments--;
+ if (mpte->mpte_cellicon_increments < val) {
+ os_log_error(mptcp_log_handle, "%s - %lx: Increments is %u but want to dec by %u.\n",
+ __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_cellicon_increments, val);
+ val = mpte->mpte_cellicon_increments;
+ }
+
+ mpte->mpte_cellicon_increments -= val;
if (__mptcp_unset_cellicon(val) == false) {
return;
case SO_NOADDRERR: /* MP */
case SO_LABEL: /* MP */
case SO_PEERLABEL: /* MP */
+ case SO_DEFUNCTIT: /* MP */
case SO_DEFUNCTOK: /* MP */
case SO_ISDEFUNCT: /* MP */
case SO_TRAFFIC_CLASS_DBG: /* MP */
return "SO_TRAFFIC_CLASS_DBG";
case SO_PRIVILEGED_TRAFFIC_CLASS:
return "SO_PRIVILEGED_TRAFFIC_CLASS";
+ case SO_DEFUNCTIT:
+ return "SO_DEFUNCTIT";
case SO_DEFUNCTOK:
return "SO_DEFUNCTOK";
case SO_ISDEFUNCT:
extern int mptcp_developer_mode; /* Allow aggregation mode */
extern uint32_t mptcp_cellicon_refcount;
+#define MPTCP_CELLICON_TOGGLE_RATE (5 * TCP_RETRANSHZ) /* Only toggle every 5 seconds */
+
extern int tcp_jack_rxmt; /* Join ACK retransmission value in msecs */
__BEGIN_DECLS
boolean_t has_v6, boolean_t has_v4);
extern void mptcp_set_restrictions(struct socket *mp_so);
extern void mptcp_clear_cellicon(void);
+extern void mptcp_unset_cellicon(struct mptses *mpte, struct mptsub *mpts, uint32_t val);
extern void mptcp_reset_rexmit_state(struct tcpcb *tp);
extern void mptcp_reset_keepalive(struct tcpcb *tp);
extern int mptcp_validate_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn,
int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST;
struct ip_out_args ipoa;
struct ip_moptions *imo;
+ int tos = IPTOS_UNSPEC;
int error = 0;
bzero(&ipoa, sizeof(ipoa));
if (control != NULL) {
+ tos = so_tos_from_control(control);
sotc = so_tc_from_control(control, &netsvctype);
m_freem(control);
return ENOBUFS;
}
ip = mtod(m, struct ip *);
- ip->ip_tos = inp->inp_ip_tos;
+ if (tos != IPTOS_UNSPEC) {
+ ip->ip_tos = (uint8_t)(tos & IPTOS_MASK);
+ } else {
+ ip->ip_tos = inp->inp_ip_tos;
+ }
ip->ip_off = 0;
ip->ip_p = inp->inp_ip_p;
ip->ip_len = m->m_pkthdr.len;
static void tcp_sched_timerlist(uint32_t offset);
static u_int32_t tcp_run_conn_timer(struct tcpcb *tp, u_int16_t *mode,
u_int16_t probe_if_index);
-static void tcp_sched_timers(struct tcpcb *tp);
static inline void tcp_set_lotimer_index(struct tcpcb *);
__private_extern__ void tcp_remove_from_time_wait(struct inpcb *inp);
static inline void tcp_update_mss_core(struct tcpcb *tp, struct ifnet *ifp);
(void) tcp_output(tp);
}
break;
+ case TCPT_CELLICON:
+ {
+ struct mptses *mpte = tptomptp(tp)->mpt_mpte;
+
+ tp->t_timer[TCPT_CELLICON] = 0;
+
+ if (mpte->mpte_cellicon_increments == 0) {
+ /* Cell-icon not set by this connection */
+ break;
+ }
+
+ if (TSTMP_LT(mpte->mpte_last_cellicon_set + MPTCP_CELLICON_TOGGLE_RATE, tcp_now)) {
+ mptcp_unset_cellicon(mpte, NULL, 1);
+ }
+
+ if (mpte->mpte_cellicon_increments) {
+ tp->t_timer[TCPT_CELLICON] = OFFSET_FROM_START(tp, MPTCP_CELLICON_TOGGLE_RATE);
+ }
+
+ break;
+ }
#endif /* MPTCP */
case TCPT_PTO:
#define TCPT_2MSL 6 /* 2*msl quiet time timer */
#if MPTCP
#define TCPT_JACK_RXMT 7 /* retransmit timer for join ack */
-#define TCPT_MAX 7
+#define TCPT_CELLICON 8 /* Timer to check for cell-activity */
+#define TCPT_MAX 8
#else /* MPTCP */
#define TCPT_MAX 6
#endif /* !MPTCP */
void tcp_itimer(struct inpcbinfo *ipi);
void tcp_check_timer_state(struct tcpcb *tp);
void tcp_run_timerlist(void *arg1, void *arg2);
+void tcp_sched_timers(struct tcpcb *tp);
struct tcptemp *tcp_maketemplate(struct tcpcb *);
void tcp_fillheaders(struct tcpcb *, void *, void *);
#if IPSEC
#include <netinet6/ipsec.h>
#include <netinet6/esp.h>
+#include <netkey/key.h>
extern int ipsec_bypass;
extern int esp_udp_encap_port;
#endif /* IPSEC */
if (ipsec_bypass == 0 && (esp_udp_encap_port & 0xFFFF) != 0 &&
(uh->uh_dport == ntohs((u_short)esp_udp_encap_port) ||
uh->uh_sport == ntohs((u_short)esp_udp_encap_port))) {
- int payload_len = len - sizeof(struct udphdr) > 4 ? 4 :
- len - sizeof(struct udphdr);
+ /*
+ * Check if ESP or keepalive:
+ * 1. If the destination port of the incoming packet is 4500.
+ * 2. If the source port of the incoming packet is 4500,
+ * then check the SADB to match IP address and port.
+ */
+ bool check_esp = true;
+ if (uh->uh_dport != ntohs((u_short)esp_udp_encap_port)) {
+ check_esp = key_checksa_present(AF_INET, (caddr_t)&ip->ip_dst,
+ (caddr_t)&ip->ip_src, uh->uh_dport,
+ uh->uh_sport);
+ }
+
+ if (check_esp) {
+ int payload_len = len - sizeof(struct udphdr) > 4 ? 4 :
+ len - sizeof(struct udphdr);
+
+ if (m->m_len < iphlen + sizeof(struct udphdr) + payload_len) {
+ if ((m = m_pullup(m, iphlen + sizeof(struct udphdr) +
+ payload_len)) == NULL) {
+ udpstat.udps_hdrops++;
+ KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END,
+ 0, 0, 0, 0, 0);
+ return;
+ }
+ /*
+ * Expect 32-bit aligned data pointer on strict-align
+ * platforms.
+ */
+ MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
- if (m->m_len < iphlen + sizeof(struct udphdr) + payload_len) {
- if ((m = m_pullup(m, iphlen + sizeof(struct udphdr) +
- payload_len)) == NULL) {
- udpstat.udps_hdrops++;
+ ip = mtod(m, struct ip *);
+ uh = (struct udphdr *)(void *)((caddr_t)ip + iphlen);
+ }
+ /* Check for NAT keepalive packet */
+ if (payload_len == 1 && *(u_int8_t *)
+ ((caddr_t)uh + sizeof(struct udphdr)) == 0xFF) {
+ m_freem(m);
+ KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END,
+ 0, 0, 0, 0, 0);
+ return;
+ } else if (payload_len == 4 && *(u_int32_t *)(void *)
+ ((caddr_t)uh + sizeof(struct udphdr)) != 0) {
+ /* UDP encapsulated IPsec packet to pass through NAT */
KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END,
0, 0, 0, 0, 0);
+ /* preserve the udp header */
+ esp4_input(m, iphlen + sizeof(struct udphdr));
return;
}
- /*
- * Expect 32-bit aligned data pointer on strict-align
- * platforms.
- */
- MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
-
- ip = mtod(m, struct ip *);
- uh = (struct udphdr *)(void *)((caddr_t)ip + iphlen);
- }
- /* Check for NAT keepalive packet */
- if (payload_len == 1 && *(u_int8_t *)
- ((caddr_t)uh + sizeof(struct udphdr)) == 0xFF) {
- m_freem(m);
- KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END,
- 0, 0, 0, 0, 0);
- return;
- } else if (payload_len == 4 && *(u_int32_t *)(void *)
- ((caddr_t)uh + sizeof(struct udphdr)) != 0) {
- /* UDP encapsulated IPsec packet to pass through NAT */
- KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END,
- 0, 0, 0, 0, 0);
- /* preserve the udp header */
- esp4_input(m, iphlen + sizeof(struct udphdr));
- return;
}
}
#endif /* IPSEC */
int netsvctype = _NET_SERVICE_TYPE_UNSPEC;
struct ifnet *origoutifp = NULL;
int flowadv = 0;
+ int tos = IPTOS_UNSPEC;
/* Enable flow advisory only when connected */
flowadv = (so->so_state & SS_ISCONNECTED) ? 1 : 0;
#endif
if (control != NULL) {
+ tos = so_tos_from_control(control);
sotc = so_tc_from_control(control, &netsvctype);
VERIFY(outif == NULL);
error = udp_check_pktinfo(control, &outif, &pi_laddr);
}
((struct ip *)ui)->ip_len = sizeof(struct udpiphdr) + len;
((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */
- ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */
+ if (tos != IPTOS_UNSPEC) {
+ ((struct ip *)ui)->ip_tos = (uint8_t)(tos & IPTOS_MASK);
+ } else {
+ ((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */
+ }
udpstat.udps_opackets++;
KERNEL_DEBUG(DBG_LAYER_OUT_END, ui->ui_dport, ui->ui_sport,
*/
if (siz1 < siz) {
ipseclog((LOG_NOTICE, "sum length too short in IPv4 AH input "
- "(%lu, should be at least %lu): %s\n",
+ "(%u, should be at least %u): %s\n",
(u_int32_t)siz1, (u_int32_t)siz,
ipsec4_logpacketstr(ip, spi)));
IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
}
if ((ah->ah_len << 2) - sizoff != siz1) {
ipseclog((LOG_NOTICE, "sum length mismatch in IPv4 AH input "
- "(%d should be %lu): %s\n",
+ "(%d should be %u): %s\n",
(ah->ah_len << 2) - sizoff, (u_int32_t)siz1,
ipsec4_logpacketstr(ip, spi)));
IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
*/
if (siz1 < siz) {
ipseclog((LOG_NOTICE, "sum length too short in IPv6 AH input "
- "(%lu, should be at least %lu): %s\n",
+ "(%u, should be at least %u): %s\n",
(u_int32_t)siz1, (u_int32_t)siz,
ipsec6_logpacketstr(ip6, spi)));
IPSEC_STAT_INCREMENT(ipsec6stat.in_inval);
}
if ((ah->ah_len << 2) - sizoff != siz1) {
ipseclog((LOG_NOTICE, "sum length mismatch in IPv6 AH input "
- "(%d should be %lu): %s\n",
+ "(%d should be %u): %s\n",
(ah->ah_len << 2) - sizoff, (u_int32_t)siz1,
ipsec6_logpacketstr(ip6, spi)));
IPSEC_STAT_INCREMENT(ipsec6stat.in_inval);
// check if total packet length is enough to contain ESP + IV
if (m->m_pkthdr.len < bodyoff) {
- esp_packet_log_err("ChaChaPoly Packet too short %d < %zu, SPI 0x%08x",
+ esp_packet_log_err("ChaChaPoly Packet too short %d < %u, SPI 0x%08x",
m->m_pkthdr.len, bodyoff, ntohl(sav->spi));
m_freem(m);
return EINVAL;
break;
default:
ipseclog((LOG_ERR,
- "esp_gcm_mature %s: invalid algo %d.\n", sav->alg_enc));
+ "esp_gcm_mature %s: invalid algo %d.\n", algo->name, sav->alg_enc));
return 1;
}
}
if (m->m_pkthdr.len < bodyoff) {
- ipseclog((LOG_ERR, "esp_cbc_decrypt %s: bad len %d/%lu\n",
+ ipseclog((LOG_ERR, "esp_cbc_decrypt %s: bad len %d/%u\n",
algo->name, m->m_pkthdr.len, (u_int32_t)bodyoff));
m_freem(m);
return EINVAL;
}
if (m->m_pkthdr.len < bodyoff) {
- ipseclog((LOG_ERR, "esp_cbc_encrypt %s: bad len %d/%lu\n",
+ ipseclog((LOG_ERR, "esp_cbc_encrypt %s: bad len %d/%u\n",
algo->name, m->m_pkthdr.len, (u_int32_t)bodyoff));
m_freem(m);
return EINVAL;
}
if ((m->m_pkthdr.len - bodyoff) % blocklen) {
ipseclog((LOG_ERR, "esp_cbc_encrypt %s: "
- "payload length must be multiple of %lu\n",
+ "payload length must be multiple of %u\n",
algo->name, (u_int32_t)algo->padbound));
m_freem(m);
return EINVAL;
siz = (((*algo->sumsiz)(sav) + 3) & ~(4 - 1));
if (sizeof(sumbuf) < siz) {
ipseclog((LOG_DEBUG,
- "esp_auth: AH_MAXSUMSIZE is too small: siz=%lu\n",
+ "esp_auth: AH_MAXSUMSIZE is too small: siz=%u\n",
(u_int32_t)siz));
KERNEL_DEBUG(DBG_FNC_ESPAUTH | DBG_FUNC_END, 4, 0, 0, 0, 0);
return EINVAL;
}
if (AH_MAXSUMSIZE < siz) {
ipseclog((LOG_DEBUG,
- "internal error: AH_MAXSUMSIZE must be larger than %lu\n",
+ "internal error: AH_MAXSUMSIZE must be larger than %u\n",
(u_int32_t)siz));
IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
goto bad;
int mlen;
if ((mlen = m_length2(m, NULL)) < hlen) {
ipseclog((LOG_DEBUG,
- "IPv4 ESP input: decrypted packet too short %d < %d\n",
+ "IPv4 ESP input: decrypted packet too short %d < %zu\n",
mlen, hlen));
IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
ifnet_release(ipsec_if);
}
if (AH_MAXSUMSIZE < siz) {
ipseclog((LOG_DEBUG,
- "internal error: AH_MAXSUMSIZE must be larger than %lu\n",
+ "internal error: AH_MAXSUMSIZE must be larger than %u\n",
(u_int32_t)siz));
IPSEC_STAT_INCREMENT(ipsec6stat.in_inval);
goto bad;
}
if (m->m_pkthdr.len < bodyoff) {
- ipseclog((LOG_ERR, "esp_cbc_decrypt %s: bad len %d/%lu\n",
+ ipseclog((LOG_ERR, "esp_cbc_decrypt %s: bad len %d/%u\n",
algo->name, m->m_pkthdr.len, (u_int32_t)bodyoff));
m_freem(m);
return EINVAL;
ivp = (u_int8_t *) sav->iv;
if (m->m_pkthdr.len < bodyoff) {
- ipseclog((LOG_ERR, "esp_cbc_encrypt %s: bad len %d/%lu\n",
+ ipseclog((LOG_ERR, "esp_cbc_encrypt %s: bad len %d/%u\n",
algo->name, m->m_pkthdr.len, (u_int32_t)bodyoff));
m_freem(m);
return EINVAL;
}
if ((m->m_pkthdr.len - bodyoff) % AES_BLOCKLEN) {
ipseclog((LOG_ERR, "esp_cbc_encrypt %s: "
- "payload length must be multiple of %lu\n",
+ "payload length must be multiple of %d\n",
algo->name, AES_BLOCKLEN));
m_freem(m);
return EINVAL;
bzero(nonce, ESP_GCM_SALT_LEN + ivlen);
if (m->m_pkthdr.len < bodyoff) {
- ipseclog((LOG_ERR, "%s: bad len %d/%lu\n", __FUNCTION__,
+ ipseclog((LOG_ERR, "%s: bad len %d/%u\n", __FUNCTION__,
m->m_pkthdr.len, (u_int32_t)bodyoff));
m_freem(m);
return EINVAL;
}
if (m->m_pkthdr.len < bodyoff) {
- ipseclog((LOG_ERR, "%s: bad len %d/%lu\n", __FUNCTION__,
+ ipseclog((LOG_ERR, "%s: bad len %d/%u\n", __FUNCTION__,
m->m_pkthdr.len, (u_int32_t)bodyoff));
m_freem(m);
return EINVAL;
#include <sys/priv.h>
#include <kern/locks.h>
#include <sys/kauth.h>
+#include <sys/bitstring.h>
+
#include <libkern/OSAtomic.h>
#include <net/if.h>
#include <net/route.h>
#include <net/if_ipsec.h>
+#include <net/if_ports_used.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <IOKit/pwr_mgt/IOPM.h>
+#include <os/log_private.h>
+
#if IPSEC_DEBUG
int ipsec_debug = 1;
#else
}
int result = sysctl_io_opaque(req, &ipsec_wake_pkt, sizeof(ipsec_wake_pkt), NULL);
+
+ ipseclog((LOG_NOTICE, "%s: uuid %s spi %u seq %u len %u result %d",
+ __func__,
+ ipsec_wake_pkt.wake_uuid,
+ ipsec_wake_pkt.wake_pkt_spi,
+ ipsec_wake_pkt.wake_pkt_seq,
+ ipsec_wake_pkt.wake_pkt_len,
+ result));
+
return result;
}
ipsec_wake_pkt.wake_pkt_spi = spi;
ipsec_wake_pkt.wake_pkt_seq = seq;
+ ipseclog((LOG_NOTICE, "%s: uuid %s spi %u seq %u len %u",
+ __func__,
+ ipsec_wake_pkt.wake_uuid,
+ ipsec_wake_pkt.wake_pkt_spi,
+ ipsec_wake_pkt.wake_pkt_seq,
+ ipsec_wake_pkt.wake_pkt_len));
+
+ struct kev_msg ev_msg = { 0 };
+ ev_msg.vendor_code = KEV_VENDOR_APPLE;
+ ev_msg.kev_class = KEV_NETWORK_CLASS;
+ ev_msg.kev_subclass = KEV_IPSEC_SUBCLASS;
+ ev_msg.kev_subclass = KEV_IPSEC_WAKE_PACKET;
+ int result = kev_post_msg(&ev_msg);
+ if (result != 0) {
+ os_log_error(OS_LOG_DEFAULT, "%s: kev_post_msg() failed with error %d for wake uuid %s",
+ __func__, result, ipsec_wake_pkt.wake_uuid);
+ }
+
ipsec_save_wake_pkt = false;
done:
lck_mtx_unlock(sadb_mutex);
return;
}
+static void
+ipsec_get_local_ports(void)
+{
+ errno_t error;
+ ifnet_t *ifp_list;
+ uint32_t count, i;
+ static uint8_t port_bitmap[bitstr_size(IP_PORTRANGE_SIZE)];
+
+ error = ifnet_list_get_all(IFNET_FAMILY_IPSEC, &ifp_list, &count);
+ if (error != 0) {
+ os_log_error(OS_LOG_DEFAULT, "%s: ifnet_list_get_all() failed %d",
+ __func__, error);
+ return;
+ }
+ for (i = 0; i < count; i++) {
+ ifnet_t ifp = ifp_list[i];
+
+ /*
+ * Get all the TCP and UDP ports for IPv4 and IPv6
+ */
+ error = ifnet_get_local_ports_extended(ifp, PF_UNSPEC,
+ IFNET_GET_LOCAL_PORTS_WILDCARDOK |
+ IFNET_GET_LOCAL_PORTS_NOWAKEUPOK |
+ IFNET_GET_LOCAL_PORTS_ANYTCPSTATEOK,
+ port_bitmap);
+ if (error != 0) {
+ os_log_error(OS_LOG_DEFAULT, "%s: ifnet_get_local_ports_extended(%s) failed %d",
+ __func__, if_name(ifp), error);
+ }
+ }
+ ifnet_list_free(ifp_list);
+}
+
static IOReturn
ipsec_sleep_wake_handler(void *target, void *refCon, UInt32 messageType,
void *provider, void *messageArgument, vm_size_t argSize)
#pragma unused(target, refCon, provider, messageArgument, argSize)
switch (messageType) {
case kIOMessageSystemWillSleep:
+ ipsec_get_local_ports();
memset(&ipsec_wake_pkt, 0, sizeof(ipsec_wake_pkt));
IOPMCopySleepWakeUUIDKey(ipsec_wake_pkt.wake_uuid,
sizeof(ipsec_wake_pkt.wake_uuid));
- ipseclog((LOG_INFO,
- "ipsec: system will sleep"));
+ ipseclog((LOG_NOTICE,
+ "ipsec: system will sleep, uuid: %s", ipsec_wake_pkt.wake_uuid));
break;
- case kIOMessageSystemHasPoweredOn:
+ case kIOMessageSystemWillPowerOn:
ipsec_save_wake_pkt = true;
- ipseclog((LOG_INFO,
- "ipsec: system has powered on"));
+ ipseclog((LOG_NOTICE,
+ "ipsec: system will powered on, uuid: %s", ipsec_wake_pkt.wake_uuid));
break;
default:
break;
#include <netkey/keydb.h>
#include <netinet/ip_var.h>
+#include <os/log.h>
+
/* lock for IPsec stats */
extern lck_grp_t *sadb_stat_mutex_grp;
extern lck_grp_attr_t *sadb_stat_mutex_grp_attr;
extern bool ipsec_save_wake_pkt;
-#define ipseclog(x) do { if (ipsec_debug) log x; } while (0)
+#define _ipsec_log(level, fmt, ...) do { \
+ os_log_type_t type; \
+ switch (level) { \
+ default: \
+ type = OS_LOG_TYPE_DEFAULT; \
+ break; \
+ case LOG_INFO: \
+ type = OS_LOG_TYPE_INFO; \
+ break; \
+ case LOG_DEBUG: \
+ type = OS_LOG_TYPE_DEBUG; \
+ break; \
+ case LOG_ERR: \
+ type = OS_LOG_TYPE_ERROR; \
+ break; \
+ } \
+ os_log_with_type(OS_LOG_DEFAULT, type, fmt, ##__VA_ARGS__); \
+} while (0)
+
+#define ipseclog(x) do { if (ipsec_debug != 0) _ipsec_log x; } while (0)
extern struct secpolicy *ipsec4_getpolicybysock(struct mbuf *, u_int,
struct socket *, int *);
#include <netinet6/ipsec.h>
#include <netinet6/ipsec6.h>
#include <netinet6/esp6.h>
+#include <netkey/key.h>
extern int ipsec_bypass;
extern int esp_udp_encap_port;
#endif /* IPSEC */
if (ipsec_bypass == 0 && (esp_udp_encap_port & 0xFFFF) != 0 &&
(uh->uh_dport == ntohs((u_short)esp_udp_encap_port) ||
uh->uh_sport == ntohs((u_short)esp_udp_encap_port))) {
- int payload_len = ulen - sizeof(struct udphdr) > 4 ? 4 :
- ulen - sizeof(struct udphdr);
+ /*
+ * Check if ESP or keepalive:
+ * 1. If the destination port of the incoming packet is 4500.
+ * 2. If the source port of the incoming packet is 4500,
+ * then check the SADB to match IP address and port.
+ */
+ bool check_esp = true;
+ if (uh->uh_dport != ntohs((u_short)esp_udp_encap_port)) {
+ check_esp = key_checksa_present(AF_INET6, (caddr_t)&ip6->ip6_dst,
+ (caddr_t)&ip6->ip6_src, uh->uh_dport,
+ uh->uh_sport);
+ }
+
+ if (check_esp) {
+ int payload_len = ulen - sizeof(struct udphdr) > 4 ? 4 :
+ ulen - sizeof(struct udphdr);
+
+ if (m->m_len < off + sizeof(struct udphdr) + payload_len) {
+ if ((m = m_pullup(m, off + sizeof(struct udphdr) +
+ payload_len)) == NULL) {
+ udpstat.udps_hdrops++;
+ goto bad;
+ }
+ /*
+ * Expect 32-bit aligned data pointer on strict-align
+ * platforms.
+ */
+ MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
- if (m->m_len < off + sizeof(struct udphdr) + payload_len) {
- if ((m = m_pullup(m, off + sizeof(struct udphdr) +
- payload_len)) == NULL) {
- udpstat.udps_hdrops++;
+ ip6 = mtod(m, struct ip6_hdr *);
+ uh = (struct udphdr *)(void *)((caddr_t)ip6 + off);
+ }
+ /* Check for NAT keepalive packet */
+ if (payload_len == 1 && *(u_int8_t*)
+ ((caddr_t)uh + sizeof(struct udphdr)) == 0xFF) {
goto bad;
+ } else if (payload_len == 4 && *(u_int32_t*)(void *)
+ ((caddr_t)uh + sizeof(struct udphdr)) != 0) {
+ /* UDP encapsulated IPsec packet to pass through NAT */
+ /* preserve the udp header */
+ *offp = off + sizeof(struct udphdr);
+ return esp6_input(mp, offp, IPPROTO_UDP);
}
- /*
- * Expect 32-bit aligned data pointer on strict-align
- * platforms.
- */
- MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
-
- ip6 = mtod(m, struct ip6_hdr *);
- uh = (struct udphdr *)(void *)((caddr_t)ip6 + off);
- }
- /* Check for NAT keepalive packet */
- if (payload_len == 1 && *(u_int8_t*)
- ((caddr_t)uh + sizeof(struct udphdr)) == 0xFF) {
- goto bad;
- } else if (payload_len == 4 && *(u_int32_t*)(void *)
- ((caddr_t)uh + sizeof(struct udphdr)) != 0) {
- /* UDP encapsulated IPsec packet to pass through NAT */
- /* preserve the udp header */
- *offp = off + sizeof(struct udphdr);
- return esp6_input(mp, offp, IPPROTO_UDP);
}
}
#endif /* IPSEC */
return match;
}
+/*
+ * This function checks whether a UDP packet with a random local port
+ * and a remote port of 4500 matches an SA in the kernel. If does match,
+ * send the packet to the ESP engine. If not, send the packet to the UDP protocol.
+ */
+bool
+key_checksa_present(u_int family,
+ caddr_t local_addr,
+ caddr_t remote_addr,
+ u_int16_t local_port,
+ u_int16_t remote_port)
+{
+ LCK_MTX_ASSERT(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED);
+
+ /* sanity check */
+ if (local_addr == NULL || remote_addr == NULL) {
+ panic("key_allocsa: NULL pointer is passed.\n");
+ }
+
+ /*
+ * searching SAD.
+ * XXX: to be checked internal IP header somewhere. Also when
+ * IPsec tunnel packet is received. But ESP tunnel mode is
+ * encrypted so we can't check internal IP header.
+ */
+ /*
+ * search a valid state list for inbound packet.
+ * the search order is not important.
+ */
+ struct secashead *sah = NULL;
+ bool found_sa = false;
+
+ lck_mtx_lock(sadb_mutex);
+ LIST_FOREACH(sah, &sahtree, chain) {
+ if (sah->state == SADB_SASTATE_DEAD) {
+ continue;
+ }
+
+ if (sah->dir != IPSEC_DIR_OUTBOUND) {
+ continue;
+ }
+
+ if (family != sah->saidx.src.ss_family) {
+ continue;
+ }
+
+ struct sockaddr_in src_in = {};
+ struct sockaddr_in6 src_in6 = {};
+
+ /* check src address */
+ switch (family) {
+ case AF_INET:
+ src_in.sin_family = AF_INET;
+ src_in.sin_len = sizeof(src_in);
+ memcpy(&src_in.sin_addr, local_addr, sizeof(src_in.sin_addr));
+ if (key_sockaddrcmp((struct sockaddr*)&src_in,
+ (struct sockaddr *)&sah->saidx.src, 0) != 0) {
+ continue;
+ }
+ break;
+ case AF_INET6:
+ src_in6.sin6_family = AF_INET6;
+ src_in6.sin6_len = sizeof(src_in6);
+ memcpy(&src_in6.sin6_addr, local_addr, sizeof(src_in6.sin6_addr));
+ if (IN6_IS_SCOPE_LINKLOCAL(&src_in6.sin6_addr)) {
+ /* kame fake scopeid */
+ src_in6.sin6_scope_id =
+ ntohs(src_in6.sin6_addr.s6_addr16[1]);
+ src_in6.sin6_addr.s6_addr16[1] = 0;
+ }
+ if (key_sockaddrcmp((struct sockaddr*)&src_in6,
+ (struct sockaddr *)&sah->saidx.src, 0) != 0) {
+ continue;
+ }
+ break;
+ default:
+ ipseclog((LOG_DEBUG, "key_checksa_present: "
+ "unknown address family=%d.\n",
+ family));
+ continue;
+ }
+
+ struct sockaddr_in dest_in = {};
+ struct sockaddr_in6 dest_in6 = {};
+
+ /* check dst address */
+ switch (family) {
+ case AF_INET:
+ dest_in.sin_family = AF_INET;
+ dest_in.sin_len = sizeof(dest_in);
+ memcpy(&dest_in.sin_addr, remote_addr, sizeof(dest_in.sin_addr));
+ if (key_sockaddrcmp((struct sockaddr*)&dest_in,
+ (struct sockaddr *)&sah->saidx.dst, 0) != 0) {
+ continue;
+ }
+
+ break;
+ case AF_INET6:
+ dest_in6.sin6_family = AF_INET6;
+ dest_in6.sin6_len = sizeof(dest_in6);
+ memcpy(&dest_in6.sin6_addr, remote_addr, sizeof(dest_in6.sin6_addr));
+ if (IN6_IS_SCOPE_LINKLOCAL(&dest_in6.sin6_addr)) {
+ /* kame fake scopeid */
+ dest_in6.sin6_scope_id =
+ ntohs(dest_in6.sin6_addr.s6_addr16[1]);
+ dest_in6.sin6_addr.s6_addr16[1] = 0;
+ }
+ if (key_sockaddrcmp((struct sockaddr*)&dest_in6,
+ (struct sockaddr *)&sah->saidx.dst, 0) != 0) {
+ continue;
+ }
+
+ break;
+ default:
+ ipseclog((LOG_DEBUG, "key_checksa_present: "
+ "unknown address family=%d.\n", family));
+ continue;
+ }
+
+ struct secasvar *nextsav = NULL;
+ for (u_int stateidx = 0; stateidx < _ARRAYLEN(saorder_state_alive); stateidx++) {
+ u_int state = saorder_state_alive[stateidx];
+ for (struct secasvar *sav = LIST_FIRST(&sah->savtree[state]); sav != NULL; sav = nextsav) {
+ nextsav = LIST_NEXT(sav, chain);
+ /* sanity check */
+ if (sav->state != state) {
+ ipseclog((LOG_DEBUG, "key_checksa_present: "
+ "invalid sav->state "
+ "(state: %d SA: %d)\n",
+ state, sav->state));
+ continue;
+ }
+
+ if (sav->remote_ike_port != ntohs(remote_port)) {
+ continue;
+ }
+
+ if (sav->natt_encapsulated_src_port != local_port) {
+ continue;
+ }
+ found_sa = true;;
+ break;
+ }
+ }
+ }
+
+ /* not found */
+ lck_mtx_unlock(sadb_mutex);
+ return found_sa;
+}
+
u_int16_t
key_natt_get_translated_port(
struct secasvar *outsav)
paddr = (struct sockaddr *)(xisr + 1);
uint8_t src_len = paddr->sa_len;
- if (xisr->sadb_x_ipsecrequest_len < src_len) {
+ /* +sizeof(uint8_t) for dst_len below */
+ if (xisr->sadb_x_ipsecrequest_len < sizeof(*xisr) + src_len + sizeof(uint8_t)) {
ipseclog((LOG_DEBUG, "key_msg2sp: invalid request "
"invalid source address length.\n"));
key_freesp(newsp, KEY_SADB_UNLOCKED);
paddr = (struct sockaddr *)((caddr_t)paddr + paddr->sa_len);
uint8_t dst_len = paddr->sa_len;
- if (xisr->sadb_x_ipsecrequest_len < (src_len + dst_len)) {
+ if (xisr->sadb_x_ipsecrequest_len < sizeof(*xisr) + src_len + dst_len) {
ipseclog((LOG_DEBUG, "key_msg2sp: invalid request "
"invalid dest address length.\n"));
key_freesp(newsp, KEY_SADB_UNLOCKED);
/* remove from SA header */
if (__LIST_CHAINED(sav)) {
LIST_REMOVE(sav, chain);
+ ipsec_sav_count--;
}
- ipsec_sav_count--;
if (sav->spihash.le_prev || sav->spihash.le_next) {
LIST_REMOVE(sav, spihash);
struct secasvar *
key_allocsa_extended(u_int family, caddr_t src, caddr_t dst,
u_int proto, u_int32_t spi, ifnet_t interface);
+extern bool key_checksa_present(u_int family, caddr_t src, caddr_t dst, u_int16_t src_port, u_int16_t dst_port);
extern u_int16_t key_natt_get_translated_port(struct secasvar *);
extern void key_freesp(struct secpolicy *, int);
extern void key_freesav(struct secasvar *, int);
return nx;
}
+struct nfsrv_getvfs_by_mntonname_callback_args {
+ const char *path; /* IN */
+ mount_t mp; /* OUT */
+};
+
+static int
+nfsrv_getvfs_by_mntonname_callback(mount_t mp, void *v)
+{
+ struct nfsrv_getvfs_by_mntonname_callback_args * const args = v;
+ char real_mntonname[MAXPATHLEN];
+ int pathbuflen = MAXPATHLEN;
+ vnode_t rvp;
+ int error;
+
+ error = VFS_ROOT(mp, &rvp, vfs_context_current());
+ if (error) {
+ goto out;
+ }
+ error = vn_getpath_ext(rvp, NULLVP, real_mntonname, &pathbuflen,
+ VN_GETPATH_FSENTER | VN_GETPATH_NO_FIRMLINK);
+ vnode_put(rvp);
+ if (error) {
+ goto out;
+ }
+ if (strcmp(args->path, real_mntonname) == 0) {
+ error = vfs_busy(mp, LK_NOWAIT);
+ if (error == 0) {
+ args->mp = mp;
+ }
+ return VFS_RETURNED_DONE;
+ }
+out:
+ return VFS_RETURNED;
+}
+
+static mount_t
+nfsrv_getvfs_by_mntonname(char *path)
+{
+ struct nfsrv_getvfs_by_mntonname_callback_args args = {
+ .path = path,
+ .mp = NULL,
+ };
+ mount_t mp;
+ int error;
+
+ mp = vfs_getvfs_by_mntonname(path);
+ if (mp) {
+ error = vfs_busy(mp, LK_NOWAIT);
+ mount_iterdrop(mp);
+ if (error) {
+ mp = NULL;
+ }
+ } else if (vfs_iterate(0, nfsrv_getvfs_by_mntonname_callback,
+ &args) == 0) {
+ mp = args.mp;
+ }
+ return mp;
+}
+
/*
* nfsrv_fhtovp() - convert FH to vnode and export info
*/
}
/* find mount structure */
- mp = vfs_getvfs_by_mntonname((*nxp)->nx_fs->nxfs_path);
- if (mp) {
- error = vfs_busy(mp, LK_NOWAIT);
- mount_iterdrop(mp);
- if (error) {
- mp = NULL;
- }
- }
+ mp = nfsrv_getvfs_by_mntonname((*nxp)->nx_fs->nxfs_path);
if (!mp) {
/*
* We have an export, but no mount?
* getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
*/
+static struct nfs_exportfs *
+nfsrv_find_exportfs(const char *ptr)
+{
+ struct nfs_exportfs *nxfs;
+
+ LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
+ if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN)) {
+ break;
+ }
+ }
+ if (nxfs && strncmp(nxfs->nxfs_path, ptr, strlen(nxfs->nxfs_path))) {
+ nxfs = NULL;
+ }
+
+ return nxfs;
+}
+
/*
* Get file handle system call
*/
struct nfs_filehandle nfh;
int error, fhlen, fidlen;
struct nameidata nd;
- char path[MAXPATHLEN], *ptr;
+ char path[MAXPATHLEN], real_mntonname[MAXPATHLEN], *ptr;
size_t pathlen;
struct nfs_exportfs *nxfs;
struct nfs_export *nx;
// find exportfs that matches f_mntonname
lck_rw_lock_shared(&nfsrv_export_rwlock);
ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname;
- LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
- if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN)) {
- break;
+ if ((nxfs = nfsrv_find_exportfs(ptr)) == NULL) {
+ /*
+ * The f_mntonname might be a firmlink path. Resolve
+ * it into a physical path and try again.
+ */
+ int pathbuflen = MAXPATHLEN;
+ vnode_t rvp;
+
+ error = VFS_ROOT(vnode_mount(vp), &rvp, vfs_context_current());
+ if (error) {
+ goto out;
+ }
+ error = vn_getpath_ext(rvp, NULLVP, real_mntonname, &pathbuflen,
+ VN_GETPATH_FSENTER | VN_GETPATH_NO_FIRMLINK);
+ vnode_put(rvp);
+ if (error) {
+ goto out;
}
+ ptr = real_mntonname;
+ nxfs = nfsrv_find_exportfs(ptr);
}
- if (!nxfs || strncmp(nxfs->nxfs_path, path, strlen(nxfs->nxfs_path))) {
+ if (nxfs == NULL) {
error = EINVAL;
goto out;
}
if ((cnp->cn_namelen == dp->d_namlen) && !strcmp(cnp->cn_nameptr, dp->d_name)) {
fhlen = dp->d_name[dp->d_namlen + 1];
nvattrp = NFS_DIR_BUF_NVATTR(bp, i);
- if ((ndbhp->ndbh_ncgen != bp->nb_np->n_ncgen) || (fhp->fh_len == 0) ||
+ if ((ndbhp->ndbh_ncgen != bp->nb_np->n_ncgen) || (fhlen == 0) ||
(nvattrp->nva_type == VNON) || (nvattrp->nva_fileid == 0)) {
/* entry is not valid */
error = ENOENT;
}
}
+
done:
if (qos_rv && voucher_rv) {
/* Both failed, give that a unique error. */
workq_thread_reset_pri(wq, uth, req, /*unpark*/ true);
+ thread_unfreeze_base_pri(uth->uu_thread);
+#if 0 // <rdar://problem/55259863> to turn this back on
if (__improbable(thread_unfreeze_base_pri(uth->uu_thread) && !is_creator)) {
if (req_ts) {
workq_perform_turnstile_operation_locked(wq, ^{
WQ_TRACE_WQ(TRACE_wq_select_threadreq | DBG_FUNC_NONE, wq, 3, 0, 0, 0);
goto park_thawed;
}
+#endif
/*
* We passed all checks, dequeue the request, bind to it, and set it up
park:
thread_unfreeze_base_pri(uth->uu_thread);
+#if 0 // <rdar://problem/55259863>
park_thawed:
+#endif
workq_park_and_unlock(p, wq, uth, setup_flags);
}
void workq_kern_threadreq_redrive(struct proc *p, workq_kern_threadreq_flags_t flags);
+// This enum matches _pthread_set_flags in libpthread's qos_private.h
enum workq_set_self_flags {
- WORKQ_SET_SELF_QOS_FLAG = 0x1,
- WORKQ_SET_SELF_VOUCHER_FLAG = 0x2,
- WORKQ_SET_SELF_FIXEDPRIORITY_FLAG = 0x4,
- WORKQ_SET_SELF_TIMESHARE_FLAG = 0x8,
- WORKQ_SET_SELF_WQ_KEVENT_UNBIND = 0x10,
+ WORKQ_SET_SELF_QOS_FLAG = 0x01,
+ WORKQ_SET_SELF_VOUCHER_FLAG = 0x02,
+ WORKQ_SET_SELF_FIXEDPRIORITY_FLAG = 0x04,
+ WORKQ_SET_SELF_TIMESHARE_FLAG = 0x08,
+ WORKQ_SET_SELF_WQ_KEVENT_UNBIND = 0x10,
+ WORKQ_SET_SELF_ALTERNATE_AMX = 0x20,
};
void workq_proc_suspended(struct proc *p);
/*-
- * Copyright (c) 1999-2009 Apple Inc.
+ * Copyright (c) 1999-2019 Apple Inc.
* Copyright (c) 2006-2007 Robert N. M. Watson
* All rights reserved.
*
uthread->uu_ar = NULL;
}
+/*
+ * For system calls such as posix_spawn(2) the sub operations (i.e., file actions
+ * and port actions) need to be audited as their own events. Like with system
+ * calls we need to determine if the sub operation needs to be audited by
+ * examining preselection masks.
+ */
+void
+audit_subcall_enter(au_event_t event, proc_t proc, struct uthread *uthread)
+{
+ struct au_mask *aumask;
+ au_class_t class;
+ au_id_t auid;
+ kauth_cred_t cred;
+
+ /*
+ * Check which audit mask to use; either the kernel non-attributable
+ * event mask or the process audit mask.
+ */
+ cred = kauth_cred_proc_ref(proc);
+ auid = cred->cr_audit.as_aia_p->ai_auid;
+ if (auid == AU_DEFAUDITID) {
+ aumask = &audit_nae_mask;
+ } else {
+ aumask = &cred->cr_audit.as_mask;
+ }
+
+ /*
+ * Allocate an audit record, if preselection allows it, and store in
+ * the thread for later use.
+ */
+ class = au_event_class(event);
+
+ if (au_preselect(event, class, aumask, AU_PRS_BOTH)) {
+ /*
+ * If we're out of space and need to suspend unprivileged
+ * processes, do that here rather than trying to allocate
+ * another audit record.
+ *
+ * Note: we might wish to be able to continue here in the
+ * future, if the system recovers. That should be possible
+ * by means of checking the condition in a loop around
+ * cv_wait(). It might be desirable to reevaluate whether an
+ * audit record is still required for this event by
+ * re-calling au_preselect().
+ */
+ if (audit_in_failure &&
+ suser(cred, &proc->p_acflag) != 0) {
+ cv_wait(&audit_fail_cv, &audit_mtx);
+ panic("audit_failing_stop: thread continued");
+ }
+ if (uthread->uu_ar == NULL) {
+ uthread->uu_ar = audit_new(event, proc, uthread);
+ }
+ } else if (audit_pipe_preselect(auid, event, class, AU_PRS_BOTH, 0)) {
+ if (uthread->uu_ar == NULL) {
+ uthread->uu_ar = audit_new(event, proc, uthread);
+ }
+ }
+
+ kauth_cred_unref(&cred);
+}
+
+void
+audit_subcall_exit(int error, struct uthread *uthread)
+{
+ /* A subcall doesn't have a return value so always zero. */
+ audit_commit(uthread->uu_ar, error, 0 /* retval */);
+
+ uthread->uu_ar = NULL;
+}
+
/*
* Calls to set up and tear down audit structures used during Mach system
* calls.
/*-
- * Copyright (c) 2004-2016 Apple Inc.
+ * Copyright (c) 2004-2019 Apple Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
void audit_mach_syscall_enter(unsigned short audit_event);
void audit_mach_syscall_exit(int retval, struct uthread *uthread);
+void audit_subcall_enter(au_event_t event,
+ struct proc *proc, struct uthread *uthread);
+void audit_subcall_exit(int error,
+ struct uthread *uthread);
+
extern struct auditinfo_addr *audit_default_aia_p;
/*
audit_syscall_exit(code, error, proc, uthread); \
} while (0)
+#define AUDIT_SUBCALL_ENTER(event, proc, uthread) do { \
+ if (AUDIT_ENABLED()) \
+ audit_subcall_enter(AUE_ ## event, proc, uthread); \
+} while (0)
+
+#define AUDIT_SUBCALL_EXIT(uthread, error) do { \
+ if (AUDIT_AUDITING(uthread->uu_ar)) \
+ audit_subcall_exit(error, uthread); \
+} while (0)
+
/*
* Wrap the audit_mach_syscall_enter() and audit_mach_syscall_exit()
* functions in a manner similar to other system call enter/exit functions.
#define AUDIT_SYSCALL_EXIT(code, proc, uthread, error) do { \
} while (0)
+#define AUDIT_SUBCALL_ENTER(event, proc, uthread) do { \
+} while (0)
+
+#define AUDIT_SUBCALL_EXIT(uthread, error) do { \
+} while (0)
+
#define AUDIT_MACH_SYSCALL_ENTER(args...) do { \
} while (0)
union {
void *kn_hook;
uint32_t kn_hook32;
- uint64_t kn_hook64;
+#if __LP64__
+ uint64_t kn_hook_waitqid;
+#else
+ uint32_t kn_hook_waitqid;
+#endif
};
/* per filter pointer to the resource being watched */
#define KQWL_STAYACTIVE_FIRED_BIT (1 << 0)
uint8_t kqwl_wakeup_indexes; /* QoS/override levels that woke */
kq_index_t kqwl_stayactive_qos; /* max QoS of statyactive knotes */
+ struct turnstile *kqwl_turnstile; /* turnstile for sync IPC/waiters */
kqueue_id_t kqwl_dynamicid; /* dynamic identity */
uint64_t kqwl_params; /* additional parameters */
- struct turnstile *kqwl_turnstile; /* turnstile for sync IPC/waiters */
LIST_ENTRY(kqworkloop) kqwl_hashlink; /* linkage for search list */
#if CONFIG_WORKLOOP_DEBUG
#define KQWL_HISTORY_COUNT 32
#define DBG_MT_TMPCPU 0xff
/* The Kernel Debug Sub Classes for DBG_MISC */
-#define DBG_EVENT 0x10
-#define DBG_MISC_INSTRUMENTS 0x11
-#define DBG_MISC_INSTRUMENTSBT 0x12
-#define DBG_MISC_LAYOUT 0x1a
-#define DBG_BUFFER 0x20
+#define DBG_MISC_COREBRIGHTNESS 0x01
+#define DBG_EVENT 0x10
+#define DBG_MISC_INSTRUMENTS 0x11
+#define DBG_MISC_INSTRUMENTSBT 0x12
+#define DBG_MISC_LAYOUT 0x1a
+#define DBG_BUFFER 0x20
/* The Kernel Debug Sub Classes for DBG_DYLD */
#define DBG_DYLD_UUID (5)
#if __arm64__
void memorystatus_act_on_legacy_footprint_entitlement(proc_t p, boolean_t footprint_increase);
+void memorystatus_act_on_ios13extended_footprint_entitlement(proc_t p);
#endif /* __arm64__ */
#endif /* CONFIG_MEMORYSTATUS */
__BEGIN_DECLS
#ifdef KERNEL_PRIVATE
+void lf_init(void);
int lf_advlock(struct vnop_advlock_args *);
int lf_assert(struct vnop_advlock_args *, void **);
void lf_commit(void *, int);
mbuf_svc_class_t, u_int32_t);
extern void so_tc_update_stats(struct mbuf *, struct socket *,
mbuf_svc_class_t);
+extern int so_tos_from_control(struct mbuf *);
extern int so_tc_from_control(struct mbuf *, int *);
extern mbuf_svc_class_t so_tc2msc(int);
extern int so_svc2tc(mbuf_svc_class_t);
* relies on single copy atomicity and cannot be changed to a bitfield.
*/
bool uu_workq_pthread_kill_allowed;
- unsigned int syscall_code; /* current syscall code */
+ uint16_t syscall_code; /* current syscall code */
/* thread exception handling */
int uu_exception;
uint nbytes; /* number of bytes in ibits and obits */
} uu_select; /* saved state for select() */
- /* internal support for continuation framework */
- int (*uu_continuation)(int);
- int uu_pri;
- int uu_timo;
- caddr_t uu_wchan; /* sleeping thread wait channel */
- const char *uu_wmesg; /* ... wait message */
struct proc *uu_proc;
thread_t uu_thread;
void * uu_userstate;
lck_spin_t uu_rethrottle_lock; /* locks was_rethrottled and is_throttled */
TAILQ_ENTRY(uthread) uu_throttlelist; /* List of uthreads currently throttled */
void * uu_throttle_info; /* pointer to throttled I/Os info */
- int uu_on_throttlelist;
- int uu_lowpri_window;
+ int8_t uu_on_throttlelist;
+ bool uu_lowpri_window;
/* These boolean fields are protected by different locks */
bool uu_was_rethrottled;
bool uu_is_throttled;
bool uu_throttle_bc;
+ bool uu_defer_reclaims;
+
+ /* internal support for continuation framework */
+ uint16_t uu_pri; /* pri | PCATCH | PVFS, ... */
+ caddr_t uu_wchan; /* sleeping thread wait channel */
+ int (*uu_continuation)(int);
+ const char *uu_wmesg; /* ... wait message */
u_int32_t uu_network_marks; /* network control flow marks */
vnode_t uu_vreclaims;
vnode_t uu_cdir; /* per thread CWD */
int uu_dupfd; /* fd in fdesc_open/dupfdopen */
- int uu_defer_reclaims;
/*
* Bound kqueue request. This field is only cleared by the current thread,
*/
struct workq_threadreq_s *uu_kqr_bound;
TAILQ_ENTRY(uthread) uu_workq_entry;
- mach_vm_offset_t uu_workq_stackaddr;
+ vm_offset_t uu_workq_stackaddr;
mach_port_name_t uu_workq_thport;
struct uu_workq_policy {
uint16_t qos_req : 4; /* requested QoS */
int vfs_context_iskernel(vfs_context_t);
vfs_context_t vfs_context_kernel(void); /* get from 1st kernel thread */
vnode_t vfs_context_cwd(vfs_context_t);
+vnode_t vfs_context_get_cwd(vfs_context_t); /* get cwd with iocount */
int vnode_isnoflush(vnode_t);
void vnode_setnoflush(vnode_t);
void vnode_clearnoflush(vnode_t);
+++ /dev/null
-/*
- * Copyright (c) 2018 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <sys/sysctl.h>
-
return cwd;
}
+/*
+ * vfs_context_get_cwd
+ *
+ * Description: Returns a vnode for the current working directory for the
+ * supplied context. The returned vnode has an iocount on it
+ * which must be released with a vnode_put().
+ *
+ * Parameters: vfs_context_t The context to use
+ *
+ * Returns: vnode_t The current working directory
+ * for this context
+ *
+ * Notes: The function first attempts to obtain the current directory
+ * from the thread, and if it is not present there, falls back
+ * to obtaining it from the process instead. If it can't be
+ * obtained from either place, we return NULLVP.
+ */
+vnode_t
+vfs_context_get_cwd(vfs_context_t ctx)
+{
+ vnode_t cwd = NULLVP;
+
+ if (ctx != NULL && ctx->vc_thread != NULL) {
+ uthread_t uth = get_bsdthread_info(ctx->vc_thread);
+ proc_t proc;
+
+ /*
+ * Get the cwd from the thread; if there isn't one, get it
+ * from the process, instead.
+ */
+ cwd = uth->uu_cdir;
+
+ if (cwd) {
+ if ((vnode_get(cwd) != 0)) {
+ cwd = NULLVP;
+ }
+ } else if ((proc = (proc_t)get_bsdthreadtask_info(ctx->vc_thread)) != NULL &&
+ proc->p_fd != NULL) {
+ proc_fdlock(proc);
+ cwd = proc->p_fd->fd_cdir;
+ if (cwd && (vnode_get(cwd) != 0)) {
+ cwd = NULLVP;
+ }
+ proc_fdunlock(proc);
+ }
+ }
+
+ return cwd;
+}
+
/*
* vfs_context_create
*
#endif
static int lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, int vbusyflags, vfs_context_t ctx);
-static int lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx);
+static int handle_symlink_for_namei(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx);
static int lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx);
static void lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp, int nc_generation);
static int lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly,
{
struct filedesc *fdp; /* pointer to file descriptor state */
struct vnode *dp; /* the directory we are searching */
+ struct vnode *rootdir_with_usecount = NULLVP;
+ struct vnode *startdir_with_usecount = NULLVP;
struct vnode *usedvp = ndp->ni_dvp; /* store pointer to vp in case we must loop due to
* heavy vnode pressure */
u_long cnpflags = ndp->ni_cnd.cn_flags; /* store in case we have to restore after loop */
/*
* determine the starting point for the translation.
+ *
+ * We may need to upto 2 usecounts on vnodes before starting the translation
+ * We need to have a usecount on the root directory for the process
+ * for the entire duration of the lookup. This is because symlink
+ * translation can restart translation at / if a symlink is encountered.
+ *
+ * For the duration of this lookup at rootdir for this lookup is the one
+ * we fetch now under the proc_fdlock even the if the proc rootdir changes
+ * once we let go of the proc_fdlock.
+ *
+ * In the future we may consider holding off a chroot till we complete
+ * in progress lookups.
+ *
+ * If the starting directory is not the process rootdir then we need
+ * a usecount on the starting directory as well for the duration of the
+ * lookup.
+ *
+ * Getting an addtional usecount involves first getting an iocount under
+ * the lock that ensures that a usecount is on the directory. Once we
+ * get an iocount we can release the lock and we will be free to get a
+ * usecount without the vnode getting recycled. Once we get the usecount
+ * we can release the icoount which we used to get our usecount.
*/
+ proc_fdlock(p);
+
if ((ndp->ni_rootdir = fdp->fd_rdir) == NULLVP) {
if (!(fdp->fd_flags & FD_CHROOT)) {
ndp->ni_rootdir = rootvnode;
+ } else {
+ proc_fdunlock(p);
+ /* This should be a panic */
+ printf("proc is chrooted but does not have a root directory set\n");
+ error = ENOENT;
+ goto error_out;
}
}
+
+ /*
+ * We have the proc_fdlock here so we still have a usecount
+ * on ndp->ni_rootdir.
+ *
+ * However we need to get our own usecount on it in order to
+ * ensure that the vnode isn't recycled to something else.
+ *
+ * Note : It's fine if the vnode is force reclaimed but with
+ * a usecount it won't be reused until we release the reference.
+ *
+ * In order to get that usecount however, we need to first
+ * get non blocking iocount since we'll be doing this under
+ * the proc_fdlock.
+ */
+ if (vnode_get(ndp->ni_rootdir) != 0) {
+ proc_fdunlock(p);
+ error = ENOENT;
+ goto error_out;
+ }
+
+ proc_fdunlock(p);
+
+ /* Now we can safely get our own ref on ni_rootdir */
+ error = vnode_ref_ext(ndp->ni_rootdir, O_EVTONLY, 0);
+ vnode_put(ndp->ni_rootdir);
+ if (error) {
+ ndp->ni_rootdir = NULLVP;
+ goto error_out;
+ }
+
+ rootdir_with_usecount = ndp->ni_rootdir;
+
cnp->cn_nameptr = cnp->cn_pnbuf;
ndp->ni_usedvp = NULLVP;
+ bool dp_needs_put = false;
if (*(cnp->cn_nameptr) == '/') {
while (*(cnp->cn_nameptr) == '/') {
cnp->cn_nameptr++;
dp = ndp->ni_dvp;
ndp->ni_usedvp = dp;
} else {
- dp = vfs_context_cwd(ctx);
+ dp = vfs_context_get_cwd(ctx);
+ if (dp) {
+ dp_needs_put = true;
+ }
}
if (dp == NULLVP || (dp->v_lflag & VL_DEAD)) {
+ if (dp_needs_put) {
+ vnode_put(dp);
+ dp_needs_put = false;
+ }
+ dp = NULLVP;
error = ENOENT;
goto error_out;
}
+
+ if (dp != rootdir_with_usecount) {
+ error = vnode_ref_ext(dp, O_EVTONLY, 0);
+ if (error) {
+ if (dp_needs_put) {
+ vnode_put(dp);
+ dp_needs_put = false;
+ }
+ dp = NULLVP;
+ goto error_out;
+ }
+ startdir_with_usecount = dp;
+ }
+
+ if (dp_needs_put) {
+ vnode_put(dp);
+ dp_needs_put = false;
+ }
+
ndp->ni_dvp = NULLVP;
ndp->ni_vp = NULLVP;
#endif
ndp->ni_startdir = dp;
+ dp = NULLVP;
if ((error = lookup(ndp))) {
goto error_out;
* Check for symbolic link
*/
if ((cnp->cn_flags & ISSYMLINK) == 0) {
+ if (startdir_with_usecount) {
+ vnode_rele_ext(startdir_with_usecount, O_EVTONLY, 0);
+ startdir_with_usecount = NULLVP;
+ }
+ if (rootdir_with_usecount) {
+ vnode_rele_ext(rootdir_with_usecount, O_EVTONLY, 0);
+ rootdir_with_usecount = NULLVP;
+ }
return 0;
}
continue_symlink:
- /* Gives us a new path to process, and a starting dir */
- error = lookup_handle_symlink(ndp, &dp, ctx);
+ /*
+ * Gives us a new path to process, and a starting dir (with an iocount).
+ * The iocount is needed to take a usecount on the vnode returned
+ * (if it is not a vnode we already have a usecount on).
+ */
+ error = handle_symlink_for_namei(ndp, &dp, ctx);
if (error != 0) {
break;
}
+
+ if (dp == ndp->ni_rootdir && startdir_with_usecount) {
+ vnode_rele_ext(startdir_with_usecount, O_EVTONLY, 0);
+ startdir_with_usecount = NULLVP;
+ } else if (dp != startdir_with_usecount) {
+ if (startdir_with_usecount) {
+ vnode_rele_ext(startdir_with_usecount, O_EVTONLY, 0);
+ startdir_with_usecount = NULLVP;
+ }
+ error = vnode_ref_ext(dp, O_EVTONLY, 0);
+ if (error) {
+ vnode_put(dp);
+ dp = NULLVP;
+ goto error_out;
+ }
+ startdir_with_usecount = dp;
+ }
+ /* iocount not required on dp anymore */
+ vnode_put(dp);
}
/*
* only come here if we fail to handle a SYMLINK...
ndp->ni_vp = NULLVP;
ndp->ni_dvp = NULLVP;
+ if (startdir_with_usecount) {
+ vnode_rele_ext(startdir_with_usecount, O_EVTONLY, 0);
+ startdir_with_usecount = NULLVP;
+ }
+ if (rootdir_with_usecount) {
+ vnode_rele_ext(rootdir_with_usecount, O_EVTONLY, 0);
+ rootdir_with_usecount = NULLVP;
+ }
+
#if CONFIG_VOLFS
/*
* Deal with volfs fallout.
/*
* Takes ni_vp and ni_dvp non-NULL. Returns with *new_dp set to the location
- * at which to start a lookup with a resolved path, and all other iocounts dropped.
+ * at which to start a lookup with a resolved path and with an iocount.
*/
static int
-lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx)
+handle_symlink_for_namei(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx)
{
int error;
char *cp; /* pointer into pathname argument */
/*
* starting point for 'relative'
* symbolic link path
+ *
+ * If the starting point is not the root we have to return an iocounted
+ * dp to namei so we don't release the icoount here.
*/
dp = ndp->ni_dvp;
+ ndp->ni_dvp = NULLVP;
/*
* get rid of references returned via 'lookup'
*/
vnode_put(ndp->ni_vp);
- vnode_put(ndp->ni_dvp); /* ALWAYS have a dvp for a symlink */
-
ndp->ni_vp = NULLVP;
- ndp->ni_dvp = NULLVP;
/*
* Check if symbolic link restarts us at the root
cnp->cn_nameptr++;
ndp->ni_pathlen--;
}
+ vnode_put(dp);
if ((dp = ndp->ni_rootdir) == NULLVP) {
return ENOENT;
}
+ if (vnode_get(dp) != 0) {
+ return ENOENT;
+ }
+ }
+
+ if (dp == NULLVP || (dp->v_lflag & VL_DEAD)) {
+ if (dp) {
+ vnode_put(dp);
+ }
+ return ENOENT;
}
*new_dp = dp;
extern int bootarg_no_vnode_jetsam; /* from bsd_init.c default value is 0 */
#endif /* CONFIG_JETSAM && (DEVELOPMENT || DEBUG) */
+extern int bootarg_no_vnode_drain; /* from bsd_init.c default value is 0 */
+
boolean_t root_is_CF_drive = FALSE;
#if CONFIG_TRIGGERS
struct timeval rage_tv;
int rage_limit = 0;
int ragevnodes = 0;
+static int vfs_unmountall_started = 0;
#define RAGE_LIMIT_MIN 100
#define RAGE_TIME_LIMIT 5
int mounts, sec = 1;
struct unmount_info ui;
+ vfs_unmountall_started = 1;
+
retry:
ui.u_errs = ui.u_busy = 0;
vfs_iterate(VFS_ITERATE_CB_DROPREF | VFS_ITERATE_TAIL_FIRST, unmount_callback, &ui);
if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, ctx) == 0) {
if (isvirtual) {
mp->mnt_kern_flag |= MNTK_VIRTUALDEV;
+ mp->mnt_flag |= MNT_REMOVABLE;
}
}
if (VNOP_IOCTL(devvp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, ctx) == 0) {
if (VNOP_IOCTL(devvp, DKIOCGETLOCATION, (caddr_t)&location, 0, ctx) == 0) {
if (location & DK_LOCATION_EXTERNAL) {
mp->mnt_ioflags |= MNT_IOFLAGS_PERIPHERAL_DRIVE;
- /* This must be called after MNTK_VIRTUALDEV has been determined via DKIOCISVIRTUAL */
- if ((MNTK_VIRTUALDEV & mp->mnt_kern_flag)) {
- mp->mnt_flag |= MNT_REMOVABLE;
- }
+ mp->mnt_flag |= MNT_REMOVABLE;
}
}
vp->v_owner = current_thread();
while (vp->v_iocount > 1) {
- msleep(&vp->v_iocount, &vp->v_lock, PVFS, "vnode_drain", NULL);
+ if (bootarg_no_vnode_drain) {
+ struct timespec ts = {.tv_sec = 10, .tv_nsec = 0};
+ int error;
+
+ if (vfs_unmountall_started) {
+ ts.tv_sec = 1;
+ }
+
+ error = msleep(&vp->v_iocount, &vp->v_lock, PVFS, "vnode_drain_with_timeout", &ts);
+
+ /* Try to deal with leaked iocounts under bootarg and shutting down */
+ if (vp->v_iocount > 1 && error == EWOULDBLOCK &&
+ ts.tv_sec == 1 && vp->v_numoutput == 0) {
+ vp->v_iocount = 1;
+ break;
+ }
+ } else {
+ msleep(&vp->v_iocount, &vp->v_lock, PVFS, "vnode_drain", NULL);
+ }
}
vp->v_lflag &= ~VL_DRAIN;
vnode_t olddp = cdrp->olddp;
vnode_t newdp = cdrp->newdp;
struct filedesc *fdp;
- vnode_t tvp;
- vnode_t fdp_cvp;
- vnode_t fdp_rvp;
- int cdir_changed = 0;
- int rdir_changed = 0;
+ vnode_t new_cvp = newdp;
+ vnode_t new_rvp = newdp;
+ vnode_t old_cvp = NULL;
+ vnode_t old_rvp = NULL;
/*
* XXX Also needs to iterate each thread in the process to see if it
* XXX update that as well.
*/
+ /*
+ * First, with the proc_fdlock held, check to see if we will need
+ * to do any work. If not, we will get out fast.
+ */
proc_fdlock(p);
fdp = p->p_fd;
- if (fdp == (struct filedesc *)0) {
+ if (fdp == NULL ||
+ (fdp->fd_cdir != olddp && fdp->fd_rdir != olddp)) {
proc_fdunlock(p);
return PROC_RETURNED;
}
- fdp_cvp = fdp->fd_cdir;
- fdp_rvp = fdp->fd_rdir;
proc_fdunlock(p);
- if (fdp_cvp == olddp) {
- vnode_ref(newdp);
- tvp = fdp->fd_cdir;
- fdp_cvp = newdp;
- cdir_changed = 1;
- vnode_rele(tvp);
+ /*
+ * Ok, we will have to do some work. Always take two refs
+ * because we might need that many. We'll dispose of whatever
+ * we ended up not using.
+ */
+ if (vnode_ref(newdp) != 0) {
+ return PROC_RETURNED;
}
- if (fdp_rvp == olddp) {
- vnode_ref(newdp);
- tvp = fdp->fd_rdir;
- fdp_rvp = newdp;
- rdir_changed = 1;
- vnode_rele(tvp);
+ if (vnode_ref(newdp) != 0) {
+ vnode_rele(newdp);
+ return PROC_RETURNED;
}
- if (cdir_changed || rdir_changed) {
- proc_fdlock(p);
- fdp->fd_cdir = fdp_cvp;
- fdp->fd_rdir = fdp_rvp;
- proc_fdunlock(p);
+
+ /*
+ * Now do the work. Note: we dropped the proc_fdlock, so we
+ * have to do all of the checks again.
+ */
+ proc_fdlock(p);
+ fdp = p->p_fd;
+ if (fdp != NULL) {
+ if (fdp->fd_cdir == olddp) {
+ old_cvp = olddp;
+ fdp->fd_cdir = newdp;
+ new_cvp = NULL;
+ }
+ if (fdp->fd_rdir == olddp) {
+ old_rvp = olddp;
+ fdp->fd_rdir = newdp;
+ new_rvp = NULL;
+ }
}
+ proc_fdunlock(p);
+
+ /*
+ * Dispose of any references that are no longer needed.
+ */
+ if (old_cvp != NULL) {
+ vnode_rele(old_cvp);
+ }
+ if (old_rvp != NULL) {
+ vnode_rele(old_rvp);
+ }
+ if (new_cvp != NULL) {
+ vnode_rele(new_cvp);
+ }
+ if (new_rvp != NULL) {
+ vnode_rele(new_rvp);
+ }
+
return PROC_RETURNED;
}
kIOPMOverTemp = (1 << 9),// system dangerously hot
kIOPMClamshellOpened = (1 << 10),// clamshell was opened
kIOPMDWOverTemp = (1 << 11),// DarkWake thermal limits exceeded.
- kIOPMPowerButtonUp = (1 << 12) // Power button up
+ kIOPMPowerButtonUp = (1 << 12),// Power button up
+ kIOPMProModeEngaged = (1 << 13),// Fans entered 'ProMode'
+ kIOPMProModeDisengaged = (1 << 14) // Fans exited 'ProMode'
};
#define kIOPMSettingTimeZoneOffsetKey "TimeZoneOffsetSeconds"
#define kIOPMSettingMobileMotionModuleKey "MobileMotionModule"
#define kIOPMSettingGraphicsSwitchKey "GPUSwitch"
+#define kIOPMSettingProModeControl "ProModeControl"
+#define kIOPMSettingProModeDefer "ProModeDefer"
// Setting controlling drivers can register to receive scheduled wake data
// Either in "CF seconds" type, or structured calendar data in a formatted
#define kIOPMMessageLaunchBootSpinDump \
iokit_family_msg(sub_iokit_powermanagement, 0x440)
+#define kIOPMMessageProModeStateChange \
+ iokit_family_msg(sub_iokit_powermanagement, 0x450)
+
/* @enum SystemSleepReasons
* @abstract The potential causes for system sleep as logged in the system event record.
*/
#define kIOPMWakeEventReasonKey "Reason"
#define kIOPMWakeEventDetailsKey "Details"
+/* kIOPMFeatureProModeKey
+ * Feature published if ProMode is supported
+ */
+#define kIOPMFeatureProModeKey "ProMode"
+
/*****************************************************************************
*
* Wake event flags reported to IOPMrootDomain::claimSystemWakeEvent()
}
if (vector->interruptDisabledHard) {
vector->interruptDisabledHard = 0;
-
+#if !defined(__i386__) && !defined(__x86_64__)
+ OSMemoryBarrier();
+#endif
enableVector(vectorNumber, vector);
}
}
for (ptrIdx = 0; ptrIdx < (page_size / sizeof(uintptr_t)); ptrIdx++) {
ptr = ((uintptr_t *)vphysaddr)[ptrIdx];
+#if defined(HAS_APPLE_PAC)
+ // strip possible ptrauth signature from candidate data pointer
+ ptr = (uintptr_t)ptrauth_strip((void*)ptr, ptrauth_key_process_independent_data);
+#endif /* defined(HAS_APPLE_PAC) */
for (lim = count, baseIdx = 0; lim; lim >>= 1) {
inst = instances[baseIdx + (lim >> 1)];
//
//******************************************************************************
-#define kRootDomainSettingsCount 17
+#define kRootDomainSettingsCount 19
+#define kRootDomainNoPublishSettingsCount 3
bool
IOPMrootDomain::start( IOService * nub )
OSSymbol::withCString(kIOPMSettingMobileMotionModuleKey),
OSSymbol::withCString(kIOPMSettingGraphicsSwitchKey),
OSSymbol::withCString(kIOPMStateConsoleShutdown),
- gIOPMSettingSilentRunningKey
+ OSSymbol::withCString(kIOPMSettingProModeControl),
+ OSSymbol::withCString(kIOPMSettingProModeDefer),
+ gIOPMSettingSilentRunningKey,
+ };
+
+ const OSSymbol *noPublishSettingsArr[kRootDomainNoPublishSettingsCount] =
+ {
+ OSSymbol::withCString(kIOPMSettingProModeControl),
+ OSSymbol::withCString(kIOPMSettingProModeDefer),
+ gIOPMSettingSilentRunningKey,
};
PE_parse_boot_argn("darkwake", &gDarkWakeFlags, sizeof(gDarkWakeFlags));
// List of PM settings that should not automatically publish itself
// as a feature when registered by a listener.
noPublishPMSettings = OSArray::withObjects(
- (const OSObject **) &gIOPMSettingSilentRunningKey, 1, 0);
+ (const OSObject **)noPublishSettingsArr,
+ kRootDomainNoPublishSettingsCount,
+ 0);
fPMSettingsDict = OSDictionary::withCapacity(5);
preventIdleSleepList = OSSet::withCapacity(8);
evaluatePolicy( kStimulusDarkWakeEvaluate );
}
}
+
+ if (msg & kIOPMProModeEngaged) {
+ int newState = 1;
+ DLOG("ProModeEngaged\n");
+ messageClient(kIOPMMessageProModeStateChange, systemCapabilityNotifier, &newState, sizeof(newState));
+ }
+
+ if (msg & kIOPMProModeDisengaged) {
+ int newState = 0;
+ DLOG("ProModeDisengaged\n");
+ messageClient(kIOPMMessageProModeStateChange, systemCapabilityNotifier, &newState, sizeof(newState));
+ }
}
//******************************************************************************
}
if (doPhase2) {
if (kIOServiceNeedWillTerminate & victim->__state[1]) {
- _workLoopAction((IOWorkLoop::Action) &actionWillStop,
- victim, (void *)(uintptr_t) options, NULL );
+ if (NULL == victim->reserved->uvars) {
+ _workLoopAction((IOWorkLoop::Action) &actionWillStop,
+ victim, (void *)(uintptr_t) options);
+ } else {
+ actionWillStop(victim, options, NULL, NULL, NULL);
+ }
}
OSArray * notifiers;
return kIOReturnBadArgument;
}
options &= kIOMemoryDirectionOutIn;
+ options |= kIOMemoryKernelUserShared;
bmd = IOBufferMemoryDescriptor::inTaskWithOptions(
kernel_task, options, capacity, alignment);
OS_REASON_LIBSYSTEM_CODE_WORKLOOP_OWNERSHIP_LEAK = 1,
OS_REASON_LIBSYSTEM_CODE_FAULT = 2, /* generated by os_log_fault */
OS_REASON_LIBSYSTEM_CODE_SECINIT_INITIALIZER = 3,
+ OS_REASON_LIBSYSTEM_CODE_PTHREAD_CORRUPTION = 4,
);
#ifndef KERNEL
int
kpersona_get(uid_t *id)
{
- /* persona is a process-static identifier: cache it in a global */
- static uid_t p_id = PERSONA_ID_NONE;
- if (p_id == PERSONA_ID_NONE) {
- int ret = 0;
- size_t idlen = 1;
- ret = __persona(PERSONA_OP_GET, 0, NULL, &p_id, &idlen, NULL);
- if (ret != 0) {
- return ret;
- }
- }
- *id = p_id;
- return 0;
+ size_t idlen = 1;
+ return __persona(PERSONA_OP_GET, 0, NULL, id, &idlen, NULL);
}
int
and r2, r2, #3 // Extract cpu number
orr r1, r1, r2 //
mcr p15, 0, r1, c13, c0, 3 // Write TPIDRURO
- ldr r1, [r0, TH_CTH_DATA]
+ mov r1, #0
mcr p15, 0, r1, c13, c0, 2 // Write TPIDRURW
mov r7, #0 // Clear frame pointer
ldr r3, [r0, TH_KSTACKPTR] // Get kernel stack top
bne switch_threads // No need to save GPR/NEON state if we are
#if __ARM_VFP__
mov r1, r2 // r2 will be clobbered by the save, so preserve it
- add r3, r0, ACT_KVFP // Get the kernel VFP save area for the old thread...
+ ldr r3, [r0, TH_KSTACKPTR] // Get old kernel stack top
+ add r3, r3, SS_KVFP // Get the kernel VFP save area for the old thread...
save_vfp_registers // ...and save our VFP state to it
mov r2, r1 // Restore r2 (the new thread pointer)
#endif /* __ARM_VFP__ */
and r5, r5, #3 // Extract cpu number
orr r6, r6, r5
mcr p15, 0, r6, c13, c0, 3 // Write TPIDRURO
- ldr r6, [r2, TH_CTH_DATA]
+ mov r6, #0
mcr p15, 0, r6, c13, c0, 2 // Write TPIDRURW
load_reg:
add r3, r3, SS_R4
ldmia r3!, {r4-r14} // Restore new thread status
#if __ARM_VFP__
- add r3, r2, ACT_KVFP // Get the kernel VFP save area for the new thread...
+ ldr r3, [r2, TH_KSTACKPTR] // get kernel stack top
+ add r3, r3, SS_KVFP // Get the kernel VFP save area for the new thread...
load_vfp_registers // ...and load the saved state
#endif /* __ARM_VFP__ */
bx lr // Return
LEXT(Shutdown_context)
mrc p15, 0, r9, c13, c0, 4 // Read TPIDRPRW
#if __ARM_VFP__
- add r3, r9, ACT_KVFP // Get the kernel VFP save area for the current thread...
+ ldr r3, [r9, TH_KSTACKPTR] // get kernel stack top
+ add r3, r3, SS_KVFP // Get the kernel VFP save area for the current thread...
save_vfp_registers // ...and save our VFP state to it
#endif
ldr r3, [r9, TH_KSTACKPTR] // Get kernel stack top
mrc p15, 0, r9, c13, c0, 4 // Read TPIDRPRW
#if __ARM_VFP__
- add r3, r9, ACT_KVFP // Get the kernel VFP save area for the current thread...
+ ldr r3, [r9, TH_KSTACKPTR] // get kernel stack top
+ add r3, r3, SS_KVFP // Get the kernel VFP save area for the current thread...
save_vfp_registers // ...and save our VFP state to it
#endif
ldr r3, [r9, TH_KSTACKPTR] // Get kernel stack top
add r3, r3, SS_R4
ldmia r3!, {r4-r14} // Restore new thread status
#if __ARM_VFP__
- add r3, r9, ACT_KVFP // Get the kernel VFP save area for the current thread...
+ ldr r3, [r9, TH_KSTACKPTR] // get kernel stack top
+ add r3, r3, SS_KVFP // Get the kernel VFP save area for the current thread...
load_vfp_registers // ...and load the saved state
#endif
bx lr // Return
DECLARE("ACT_TASK", offsetof(struct thread, task));
DECLARE("ACT_PCBDATA", offsetof(struct thread, machine.PcbData));
#if __ARM_VFP__
- DECLARE("ACT_UVFP", offsetof(struct thread, machine.uVFPdata));
- DECLARE("ACT_KVFP", offsetof(struct thread, machine.kVFPdata));
+ DECLARE("ACT_UVFP", offsetof(struct thread, machine.PcbData.VFPdata));
#endif
DECLARE("TH_CTH_SELF", offsetof(struct thread, machine.cthread_self));
- DECLARE("TH_CTH_DATA", offsetof(struct thread, machine.cthread_data));
DECLARE("ACT_PCBDATA_PC", offsetof(struct thread, machine.PcbData.pc));
DECLARE("ACT_PCBDATA_R0", offsetof(struct thread, machine.PcbData.r[0]));
DECLARE("ACT_PREEMPT_CNT", offsetof(struct thread, machine.preemption_count));
DECLARE("SS_EXC", offsetof(struct arm_saved_state, exception));
#if __ARM_VFP__
+ DECLARE("SS_KVFP", offsetof(struct arm_saved_state, VFPdata));
DECLARE("VSS_SIZE", sizeof(struct arm_vfpsaved_state));
DECLARE("VSS_FPSCR", offsetof(struct arm_vfpsaved_state, fpscr));
DECLARE("VSS_FPEXC", offsetof(struct arm_vfpsaved_state, fpexc));
and r2, r2, #3 // Extract cpu number
orr r1, r1, r2 //
mcr p15, 0, r1, c13, c0, 3 // Write TPIDRURO
- ldr r1, [r0, TH_CTH_DATA]
+ mov r1, #0
mcr p15, 0, r1, c13, c0, 2 // Write TPIDRURW
bx lr
}
thread->machine.preemption_count = 0;
thread->machine.cthread_self = 0;
- thread->machine.cthread_data = 0;
#if __ARM_USER_PROTECT__
{
struct pmap *new_pmap = vm_map_pmap(task->map);
savestate->r[7] = 0x0UL;
savestate->r[9] = (uint32_t) NULL;
savestate->cpsr = PSR_SVC_MODE | PSR_INTMASK;
+ vfp_state_initialize(&savestate->VFPdata);
machine_stack_attach_kprintf("thread = %x pc = %x, sp = %x\n", thread, savestate->lr, savestate->sp);
}
#ifdef __ARM64_PMAP_SUBPAGE_L1__
#if (__ARM_VMSA__ <= 7)
#error This is not supported for old-style page tables
-#endif
+#endif /* (__ARM_VMSA__ <= 7) */
#define PMAP_ROOT_ALLOC_SIZE (((ARM_TT_L1_INDEX_MASK >> ARM_TT_L1_SHIFT) + 1) * sizeof(tt_entry_t))
-#else
+#else /* !defined(__ARM64_PMAP_SUBPAGE_L1__) */
+#if (__ARM_VMSA__ <= 7)
+#define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES * 2)
+#else /* (__ARM_VMSA__ > 7) */
#define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES)
-#endif
+#endif /* (__ARM_VMSA__ > 7) */
+#endif /* !defined(__ARM64_PMAP_SUBPAGE_L1__) */
const unsigned int arm_hardware_page_size = ARM_PGBYTES;
const unsigned int arm_pt_desc_size = sizeof(pt_desc_t);
#if (__ARM_VMSA__ == 7)
kernel_pmap->tte_index_max = 4 * NTTES;
#endif
- kernel_pmap->prev_tte = (tt_entry_t *) NULL;
kernel_pmap->hw_asid = 0;
kernel_pmap->sw_asid = 0;
PMAP_LOCK_INIT(kernel_pmap);
-#if (__ARM_VMSA__ == 7)
- simple_lock_init(&kernel_pmap->tt1_lock, 0);
- kernel_pmap->cpu_ref = 0;
-#endif
memset((void *) &kernel_pmap->stats, 0, sizeof(kernel_pmap->stats));
/* allocate space for and initialize the bookkeeping structures */
p->ledger = ledger;
PMAP_LOCK_INIT(p);
-#if (__ARM_VMSA__ == 7)
- simple_lock_init(&p->tt1_lock, 0);
- p->cpu_ref = 0;
-#endif
memset((void *) &p->stats, 0, sizeof(p->stats));
p->tt_entry_free = (tt_entry_t *)0;
+ tte_index_max = PMAP_ROOT_ALLOC_SIZE / sizeof(tt_entry_t);
+
+#if (__ARM_VMSA__ == 7)
+ p->tte_index_max = tte_index_max;
+#endif
p->tte = pmap_tt1_allocate(p, PMAP_ROOT_ALLOC_SIZE, 0);
if (!(p->tte)) {
p->ttep = ml_static_vtop((vm_offset_t)p->tte);
PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(p), VM_KERNEL_ADDRHIDE(p->min), VM_KERNEL_ADDRHIDE(p->max), p->ttep);
-#if (__ARM_VMSA__ == 7)
- tte_index_max = p->tte_index_max = NTTES;
-#else
- tte_index_max = (PMAP_ROOT_ALLOC_SIZE / sizeof(tt_entry_t));
-#endif
- p->prev_tte = (tt_entry_t *) NULL;
-
/* nullify the translation table */
for (i = 0; i < tte_index_max; i++) {
p->tte[i] = ARM_TTE_TYPE_FAULT;
queue_remove(&map_pmap_list, pmap, pmap_t, pmaps);
pmap_simple_unlock(&pmaps_lock);
-#if (__ARM_VMSA__ == 7)
- if (pmap->cpu_ref != 0) {
- panic("%s: cpu_ref=%u, "
- "pmap=%p",
- __FUNCTION__, pmap->cpu_ref,
- pmap);
- }
-#endif /* (__ARM_VMSA__ == 7) */
-
pmap_trim_self(pmap);
/*
pmap->ttep = 0;
}
-#if (__ARM_VMSA__ == 7)
- if (pmap->prev_tte) {
- pmap_tt1_deallocate(pmap, pmap->prev_tte, PMAP_ROOT_ALLOC_SIZE, 0);
- pmap->prev_tte = (tt_entry_t *) NULL;
- }
-#endif /* (__ARM_VMSA__ == 7) */
-
assert((tt_free_entry_t*)pmap->tt_entry_free == NULL);
pmap_get_pt_ops(pmap)->flush_tlb_async(pmap);
asid_index >>= 1;
#endif
-#if (__ARM_VMSA__ == 7)
- assert(not_in_kdp);
- pmap_simple_lock(&pmap->tt1_lock);
-#else
+#if (__ARM_VMSA__ > 7)
pmap_t last_nested_pmap = cpu_data_ptr->cpu_nested_pmap;
#endif
os_atomic_inc(&pmap_asid_flushes, relaxed);
#endif
}
-
-#if (__ARM_VMSA__ == 7)
- pmap_simple_unlock(&pmap->tt1_lock);
-#endif
}
void
tt_entry_t *tt_p;
unsigned int i;
- while (tte_index(pmap, pt_attr, v) >= pmap->tte_index_max) {
- tte_p = pmap_tt1_allocate(pmap, 2 * ARM_PGBYTES, ((options & PMAP_OPTIONS_NOWAIT)? PMAP_TT_ALLOCATE_NOWAIT : 0));
- if (tte_p == (tt_entry_t *)0) {
- return KERN_RESOURCE_SHORTAGE;
- }
-
- PMAP_LOCK(pmap);
- if (pmap->tte_index_max > NTTES) {
- pmap_tt1_deallocate(pmap, tte_p, 2 * ARM_PGBYTES, PMAP_TT_DEALLOCATE_NOBLOCK);
- PMAP_UNLOCK(pmap);
- break;
- }
-
- pmap_simple_lock(&pmap->tt1_lock);
- for (i = 0; i < pmap->tte_index_max; i++) {
- tte_p[i] = pmap->tte[i];
- }
- for (i = NTTES; i < 2 * NTTES; i++) {
- tte_p[i] = ARM_TTE_TYPE_FAULT;
- }
-
- FLUSH_PTE_RANGE(tte_p, tte_p + (2 * NTTES)); // DMB
-
- /* Order is important here, so that pmap_switch_user_ttb() sees things
- * in the correct sequence.
- * --update of pmap->tte[p] must happen prior to updating pmap->tte_index_max,
- * separated by at least a DMB, so that context switch does not see a 1 GB
- * L1 table with a 2GB size.
- * --update of pmap->tte[p] must also happen prior to setting pmap->prev_tte,
- * separated by at least a DMB, so that context switch does not see an L1
- * table to be freed without also seeing its replacement.*/
-
- tt_entry_t *prev_tte = pmap->tte;
-
- pmap->tte = tte_p;
- pmap->ttep = ml_static_vtop((vm_offset_t)pmap->tte);
-
- __builtin_arm_dmb(DMB_ISH);
-
- pmap->tte_index_max = 2 * NTTES;
- pmap->stamp = os_atomic_inc(&pmap_stamp, relaxed);
-
- for (i = 0; i < NTTES; i++) {
- prev_tte[i] = ARM_TTE_TYPE_FAULT;
- }
-
- /* We need a strong flush here because a TLB flush will be
- * issued from pmap_switch_user_ttb() as soon as this pmap
- * is no longer active on any CPU. We need to ensure all
- * prior stores to the TTE region have retired before that. */
- FLUSH_PTE_RANGE_STRONG(prev_tte, prev_tte + NTTES); // DSB
- pmap->prev_tte = prev_tte;
+#if DEVELOPMENT || DEBUG
+ /*
+ * We no longer support root level expansion; panic in case something
+ * still attempts to trigger it.
+ */
+ i = tte_index(pmap, pt_attr, v);
- pmap_simple_unlock(&pmap->tt1_lock);
- PMAP_UNLOCK(pmap);
- if (current_pmap() == pmap) {
- pmap_set_pmap(pmap, current_thread());
- }
+ if (i >= pmap->tte_index_max) {
+ panic("%s: index out of range, index=%u, max=%u, "
+ "pmap=%p, addr=%p, options=%u, level=%u",
+ __func__, i, pmap->tte_index_max,
+ pmap, (void *)v, options, level);
}
+#endif /* DEVELOPMENT || DEBUG */
if (level == 1) {
return KERN_SUCCESS;
cpu_data_ptr = pmap_get_cpu_data();
#if (__ARM_VMSA__ == 7)
-
- if ((cpu_data_ptr->cpu_user_pmap != PMAP_NULL)
- && (cpu_data_ptr->cpu_user_pmap != kernel_pmap)) {
- unsigned int c;
- tt_entry_t *tt_entry = cpu_data_ptr->cpu_user_pmap->prev_tte;
-
- c = os_atomic_dec(&cpu_data_ptr->cpu_user_pmap->cpu_ref, acq_rel);
- if ((c == 0) && (tt_entry != NULL)) {
- /* We saved off the old 1-page tt1 in pmap_expand() in case other cores were still using it.
- * Now that the user pmap's cpu_ref is 0, we should be able to safely free it.*/
-
- cpu_data_ptr->cpu_user_pmap->prev_tte = NULL;
-#if !__ARM_USER_PROTECT__
- set_mmu_ttb(kernel_pmap->ttep);
- set_context_id(kernel_pmap->hw_asid);
-#endif
- /* Now that we can guarantee the old 1-page L1 table is no longer active on any CPU,
- * flush any cached intermediate translations that may point to it. Note that to be truly
- * safe from prefetch-related issues, this table PA must have been cleared from TTBR0 prior
- * to this call. __ARM_USER_PROTECT__ effectively guarantees that for all current configurations.*/
- flush_mmu_tlb_asid(cpu_data_ptr->cpu_user_pmap->hw_asid);
- pmap_tt1_deallocate(cpu_data_ptr->cpu_user_pmap, tt_entry, ARM_PGBYTES, PMAP_TT_DEALLOCATE_NOBLOCK);
- }
- }
cpu_data_ptr->cpu_user_pmap = pmap;
cpu_data_ptr->cpu_user_pmap_stamp = pmap->stamp;
- os_atomic_inc(&pmap->cpu_ref, acq_rel);
#if MACH_ASSERT && __ARM_USER_PROTECT__
{
if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
max_offset_ret = arm_pmap_max_offset_default;
} else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
- max_offset_ret = 0x66000000;
+ max_offset_ret = 0x80000000;
} else if (option == ARM_PMAP_MAX_OFFSET_MAX) {
max_offset_ret = VM_MAX_ADDRESS;
} else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) {
} else if (max_mem > 0x20000000) {
max_offset_ret = 0x80000000;
} else {
- max_offset_ret = 0x66000000;
+ max_offset_ret = 0x80000000;
}
} else if (option == ARM_PMAP_MAX_OFFSET_JUMBO) {
max_offset_ret = 0x80000000;
#define PMAP_GC_WAIT 2
#if DEVELOPMENT || DEBUG
-#define pmap_cs_log(msg, args...) printf("PMAP_CS: " msg "\n", args)
#define pmap_cs_log_h(msg, args...) { if(pmap_cs_log_hacks) printf("PMAP_CS: " msg "\n", args); }
+#define pmap_cs_log pmap_cs_log_h
#define PMAP_CS_EXCEPTION_LIST_HACK 1
struct pmap_statistics stats; /* map statistics */
queue_chain_t pmaps; /* global list of pmaps */
tt_entry_t *tt_entry_free; /* free translation table entries */
- tt_entry_t *prev_tte; /* previous translation table */
struct pmap *nested_pmap; /* nested pmap */
vm_map_address_t nested_region_grand_addr;
vm_map_address_t nested_region_subord_addr;
unsigned int *nested_region_asid_bitmap;
#if (__ARM_VMSA__ <= 7)
- decl_simple_lock_data(, tt1_lock); /* lock on tt1 */
- unsigned int cpu_ref; /* number of cpus using pmap */
unsigned int tte_index_max; /* max tte index in translation table entries */
#endif
savestate->cpsr = PSR_USERDFLT;
#if __ARM_VFP__
- vfp_state_initialize(&thread->machine.uVFPdata);
- vfp_state_initialize(&thread->machine.kVFPdata);
+ vfp_state_initialize(&thread->machine.PcbData.VFPdata);
#endif
thread->machine.DebugData = NULL;
#endif
target->machine.cthread_self = self->machine.cthread_self;
- target->machine.cthread_data = self->machine.cthread_data;
self_saved_state = &self->machine.PcbData;
target_saved_state = &target->machine.PcbData;
bcopy(self_saved_state, target_saved_state, sizeof(struct arm_saved_state));
#if __ARM_VFP__
- self_vfp_state = &self->machine.uVFPdata;
- target_vfp_state = &target->machine.uVFPdata;
+ self_vfp_state = &self->machine.PcbData.VFPdata;
+ target_vfp_state = &target->machine.PcbData.VFPdata;
bcopy(self_vfp_state, target_vfp_state, sizeof(struct arm_vfpsaved_state));
#endif
find_user_vfp(
thread_t thread)
{
- return &thread->machine.uVFPdata;
+ return &thread->machine.PcbData.VFPdata;
}
#endif /* __ARM_VFP__ */
#include <arm/proc_reg.h>
#endif
-#if __ARM_VFP__
-
-#define VFPSAVE_ALIGN 16
-#define VFPSAVE_ATTRIB __attribute__((aligned (VFPSAVE_ALIGN)))
-#define THREAD_ALIGN VFPSAVE_ALIGN
-
-/*
- * vector floating point saved state
- */
-struct arm_vfpsaved_state {
- uint32_t r[64];
- uint32_t fpscr;
- uint32_t fpexc;
-};
-#endif
-
struct perfcontrol_state {
uint64_t opaque[8] __attribute__((aligned(8)));
};
#include <kern/thread_kernel_state.h>
struct machine_thread {
+#if __ARM_USER_PROTECT__
+ unsigned int uptw_ttc;
+ unsigned int uptw_ttb;
+ unsigned int kptw_ttb;
+ unsigned int asid;
+#endif
+
#if __arm64__
arm_context_t * contextData; /* allocated user context */
arm_saved_state_t * upcb; /* pointer to user GPR state */
arm_neon_saved_state_t * uNeon; /* pointer to user VFP state */
#elif __arm__
struct arm_saved_state PcbData;
-#if __ARM_VFP__
- struct arm_vfpsaved_state uVFPdata VFPSAVE_ATTRIB;
- struct arm_vfpsaved_state kVFPdata VFPSAVE_ATTRIB;
-#endif /* __ARM_VFP__ */
-
#else
#error Unknown arch
#endif
-#if __ARM_USER_PROTECT__
- unsigned int uptw_ttc;
- unsigned int uptw_ttb;
- unsigned int kptw_ttb;
- unsigned int asid;
+#if defined(__arm__) && defined(__ARM_VFP__)
+ // for packing reasons chtread_self and DebugData
+ // are inside the the PcbData when __ARM_VFP__ is set
+#define DebugData PcbData.VFPpadding_DebugData
+#define cthread_self PcbData.VFPpadding_cthread_self
+#else
+ arm_debug_state_t *DebugData;
+ vm_address_t cthread_self; /* for use of cthread package */
#endif
vm_offset_t kstackptr; /* top of kernel stack */
uint8_t machine_thread_flags;
#endif /* __ARM_SMP__ */
- arm_debug_state_t * DebugData;
- mach_vm_address_t cthread_self; /* for use of cthread package */
- mach_vm_address_t cthread_data; /* for use of cthread package */
-
struct perfcontrol_state perfctrl_state;
#if __arm64__
uint64_t energy_estimate_nj;
and $2, $2, #(MACHDEP_CPUNUM_MASK)
orr $2, $1, $2 // Save new cthread/cpu to TPIDRRO_EL0
msr TPIDRRO_EL0, $2
- ldr $1, [$0, TH_CTH_DATA] // Get new cthread data pointer
- msr TPIDR_EL0, $1 // Save data pointer to TPIDRRW_EL0
+ msr TPIDR_EL0, xzr
/* ARM64_TODO Reserve x18 until we decide what to do with it */
mov x18, $1 // ... and trash reserved x18
.endmacro
/* These fields are being added on demand */
DECLARE("ACT_CONTEXT", offsetof(struct thread, machine.contextData));
DECLARE("TH_CTH_SELF", offsetof(struct thread, machine.cthread_self));
- DECLARE("TH_CTH_DATA", offsetof(struct thread, machine.cthread_data));
DECLARE("ACT_PREEMPT_CNT", offsetof(struct thread, machine.preemption_count));
DECLARE("ACT_CPUDATAP", offsetof(struct thread, machine.CpuDatap));
DECLARE("ACT_DEBUGDATA", offsetof(struct thread, machine.DebugData));
mov sp, x21 // Reload the pcb pointer
/* ARM64_TODO Reserve x18 until we decide what to do with it */
- ldr x0, [x3, TH_CTH_DATA] // Load cthread data pointer
- str x0, [sp, SS64_X18] // and use it to trash x18
+ str xzr, [sp, SS64_X18]
#if __ARM_KERNEL_PROTECT__
/*
bzero_phys(src64, bytes);
}
+extern void *secure_memset(void *, int, size_t);
+
/* Zero bytes starting at a physical address */
void
bzero_phys(addr64_t src, vm_size_t bytes)
boolean_t use_copy_window = !pmap_valid_address(src);
pn = (ppnum_t)(src >> PAGE_SHIFT);
+ wimg_bits = pmap_cache_attributes(pn);
#if !defined(__ARM_COHERENT_IO__) && !__ARM_PTE_PHYSMAP__
count = PAGE_SIZE - offset;
- wimg_bits = pmap_cache_attributes(pn);
if ((wimg_bits & VM_WIMG_MASK) != VM_WIMG_DEFAULT) {
use_copy_window = TRUE;
}
#else
if (use_copy_window) {
- wimg_bits = pmap_cache_attributes(pn);
count = PAGE_SIZE - offset;
}
#endif
count = bytes;
}
- bzero(buf, count);
+ switch (wimg_bits & VM_WIMG_MASK) {
+ case VM_WIMG_DEFAULT:
+ case VM_WIMG_WCOMB:
+ case VM_WIMG_INNERWBACK:
+ case VM_WIMG_WTHRU:
+ bzero(buf, count);
+ break;
+ default:
+ /* 'dc zva' performed by bzero is not safe for device memory */
+ secure_memset((void*)buf, 0, count);
+ }
if (use_copy_window) {
pmap_unmap_cpu_windows_copy(index);
LEXT(set_aux_control)
msr ACTLR_EL1, x0
// Synchronize system
- dsb sy
isb sy
ret
}
thread->machine.preemption_count = 0;
thread->machine.cthread_self = 0;
- thread->machine.cthread_data = 0;
#if defined(HAS_APPLE_PAC)
thread->machine.rop_pid = task->rop_pid;
thread->machine.disable_user_jop = task->disable_user_jop;
lck_rw_done(<_rwlock);
}
-const int limit = 1000000;
-static int lt_stress_local_counters[MAX_CPUS];
-
-static void
-lt_stress_hw_lock()
-{
- int local_counter = 0;
-
- uint cpuid = current_processor()->cpu_id;
-
- kprintf("%s>cpu %d starting\n", __FUNCTION__, cpuid);
-
- hw_lock_lock(<_hw_lock, LCK_GRP_NULL);
- lt_counter++;
- local_counter++;
- hw_lock_unlock(<_hw_lock);
-
- while (lt_counter < lt_target_done_threads) {
- ;
- }
-
- kprintf("%s>cpu %d started\n", __FUNCTION__, cpuid);
-
- while (lt_counter < limit) {
- hw_lock_lock(<_hw_lock, LCK_GRP_NULL);
- if (lt_counter < limit) {
- lt_counter++;
- local_counter++;
- }
- hw_lock_unlock(<_hw_lock);
- }
-
- lt_stress_local_counters[cpuid] = local_counter;
-
- kprintf("%s>final counter %d cpu %d incremented the counter %d times\n", __FUNCTION__, lt_counter, cpuid, local_counter);
-}
static void
lt_grab_hw_lock()
OSIncrementAtomic((volatile SInt32*) <_done_threads);
}
-static void
-lt_bound_thread(void *arg, wait_result_t wres __unused)
-{
- void (*func)(void) = (void (*)(void))arg;
-
- int cpuid = OSIncrementAtomic((volatile SInt32 *)<_cpu_bind_id);
-
- processor_t processor = processor_list;
- while ((processor != NULL) && (processor->cpu_id != cpuid)) {
- processor = processor->processor_list;
- }
-
- if (processor != NULL) {
- thread_bind(processor);
- }
-
- thread_block(THREAD_CONTINUE_NULL);
-
- func();
-
- OSIncrementAtomic((volatile SInt32*) <_done_threads);
-}
-
static void
lt_start_lock_thread(thread_continue_t func)
{
}
-static void
-lt_start_lock_thread_bound(thread_continue_t func)
-{
- thread_t thread;
- kern_return_t kr;
-
- kr = kernel_thread_start(lt_bound_thread, func, &thread);
- assert(kr == KERN_SUCCESS);
-
- thread_deallocate(thread);
-}
-
static kern_return_t
lt_test_locks()
{
lt_wait_for_lock_test_threads();
T_EXPECT_EQ_UINT(lt_counter, LOCK_TEST_ITERATIONS * lt_target_done_threads, NULL);
- /* HW locks stress test */
- T_LOG("Running HW locks stress test with hw_lock_lock()");
- extern unsigned int real_ncpus;
- lt_reset();
- lt_target_done_threads = real_ncpus;
- for (processor_t processor = processor_list; processor != NULL; processor = processor->processor_list) {
- lt_start_lock_thread_bound(lt_stress_hw_lock);
- }
- lt_wait_for_lock_test_threads();
- bool starvation = false;
- uint total_local_count = 0;
- for (processor_t processor = processor_list; processor != NULL; processor = processor->processor_list) {
- starvation = starvation || (lt_stress_local_counters[processor->cpu_id] < 10);
- total_local_count += lt_stress_local_counters[processor->cpu_id];
- }
- if (total_local_count != lt_counter) {
- T_FAIL("Lock failure\n");
- } else if (starvation) {
- T_FAIL("Lock starvation found\n");
- } else {
- T_PASS("HW locks stress test with hw_lock_lock()");
- }
-
/* HW locks: trylocks */
T_LOG("Running test with hw_lock_try()");
#define APCTL_EL1_KernKeyEn (1ULL << 2)
#endif
+#define ACTLR_EL1_DisHWP_OFFSET 3
+#define ACTLR_EL1_DisHWP_MASK (1ULL << ACTLR_EL1_DisHWP_OFFSET)
+#define ACTLR_EL1_DisHWP ACTLR_EL1_DisHWP_MASK
#if defined(HAS_APPLE_PAC)
struct arm_saved_state *target_saved_state;
target->machine.cthread_self = self->machine.cthread_self;
- target->machine.cthread_data = self->machine.cthread_data;
self_saved_state = self->machine.upcb;
target_saved_state = target->machine.upcb;
unsigned long total_corpses_created = 0;
boolean_t corpse_enabled_config = TRUE;
+/* bootarg to generate corpse with size up to max_footprint_mb */
+boolean_t corpse_threshold_system_limit = FALSE;
+
/* bootarg to turn on corpse forking for EXC_RESOURCE */
int exc_via_corpse_forking = 1;
if (PE_parse_boot_argn("corpse_for_fatal_memkill", &fatal_memkill, sizeof(fatal_memkill))) {
corpse_for_fatal_memkill = fatal_memkill;
}
+#if DEBUG || DEVELOPMENT
+ if (PE_parse_boot_argn("-corpse_threshold_system_limit", &corpse_threshold_system_limit, sizeof(corpse_threshold_system_limit))) {
+ corpse_threshold_system_limit = TRUE;
+ }
+#endif /* DEBUG || DEVELOPMENT */
}
/*
uint64_t plbt[MAX_TRACE_BTFRAMES];
} plrecord_t;
+#if DEVELOPMENT || DEBUG
typedef enum {
IOTRACE_PHYS_READ = 1,
IOTRACE_PHYS_WRITE,
uint64_t backtrace[MAX_TRACE_BTFRAMES];
} iotrace_entry_t;
-#if DEVELOPMENT || DEBUG
+typedef struct {
+ int vector; /* Vector number of interrupt */
+ thread_t curthread; /* Current thread at the time of the interrupt */
+ uint64_t interrupted_pc;
+ int curpl; /* Current preemption level */
+ int curil; /* Current interrupt level */
+ uint64_t start_time_abs;
+ uint64_t duration;
+ uint64_t backtrace[MAX_TRACE_BTFRAMES];
+} traptrace_entry_t;
+
#define DEFAULT_IOTRACE_ENTRIES_PER_CPU (64)
#define IOTRACE_MAX_ENTRIES_PER_CPU (256)
extern volatile int mmiotrace_enabled;
extern int *iotrace_next;
extern iotrace_entry_t **iotrace_ring;
-extern void init_iotrace_bufs(int cpucnt, int entries_per_cpu);
+#define TRAPTRACE_INVALID_INDEX (~0U)
+#define DEFAULT_TRAPTRACE_ENTRIES_PER_CPU (16)
+#define TRAPTRACE_MAX_ENTRIES_PER_CPU (256)
+extern volatile int traptrace_enabled;
+extern int traptrace_generators;
+extern int traptrace_entries_per_cpu;
+extern int *traptrace_next;
+extern traptrace_entry_t **traptrace_ring;
#endif /* DEVELOPMENT || DEBUG */
/*
*/
#if DEVELOPMENT || DEBUG
static inline void
-rbtrace_bt(uint64_t *rets, int maxframes, cpu_data_t *cdata)
+rbtrace_bt(uint64_t *rets, int maxframes, cpu_data_t *cdata, uint64_t frameptr, bool use_cursp)
{
extern uint32_t low_intstack[]; /* bottom */
extern uint32_t low_eintstack[]; /* top */
extern char mp_slave_stack[];
+ int btidx = 0;
uint64_t kstackb, kstackt;
* element. This will also indicate if we were unable to
* trace further up the stack for some reason
*/
- __asm__ volatile ("leaq 1f(%%rip), %%rax; mov %%rax, %0\n1:"
- : "=m" (rets[0])
- :
- : "rax");
-
+ if (use_cursp) {
+ __asm__ volatile ("leaq 1f(%%rip), %%rax; mov %%rax, %0\n1:"
+ : "=m" (rets[btidx++])
+ :
+ : "rax");
+ }
thread_t cplthread = cdata->cpu_active_thread;
if (cplthread) {
uintptr_t csp;
- __asm__ __volatile__ ("movq %%rsp, %0": "=r" (csp):);
+ if (use_cursp == true) {
+ __asm__ __volatile__ ("movq %%rsp, %0": "=r" (csp):);
+ } else {
+ csp = frameptr;
+ }
/* Determine which stack we're on to populate stack bounds.
* We don't need to trace across stack boundaries for this
* routine.
}
if (__probable(kstackb && kstackt)) {
- uint64_t *cfp = (uint64_t *) __builtin_frame_address(0);
+ uint64_t *cfp = (uint64_t *) frameptr;
int rbbtf;
- for (rbbtf = 1; rbbtf < maxframes; rbbtf++) {
+ for (rbbtf = btidx; rbbtf < maxframes; rbbtf++) {
if (((uint64_t)cfp == 0) || (((uint64_t)cfp < kstackb) || ((uint64_t)cfp > kstackt))) {
rets[rbbtf] = 0;
continue;
cdata->cpu_plri = cplrecord;
- rbtrace_bt(plbts, MAX_TRACE_BTFRAMES - 1, cdata);
+ rbtrace_bt(plbts, MAX_TRACE_BTFRAMES - 1, cdata, (uint64_t)__builtin_frame_address(0), true);
}
extern int plctrace_enabled;
iotrace_next[cpu_num] = ((nextidx + 1) >= iotrace_entries_per_cpu) ? 0 : (nextidx + 1);
rbtrace_bt(&cur_iotrace_ring[nextidx].backtrace[0],
- MAX_TRACE_BTFRAMES - 1, cdata);
+ MAX_TRACE_BTFRAMES - 1, cdata, (uint64_t)__builtin_frame_address(0), true);
+}
+
+static inline uint32_t
+traptrace_start(int vecnum, uint64_t ipc, uint64_t sabs, uint64_t frameptr)
+{
+ cpu_data_t *cdata;
+ int cpu_num, nextidx;
+ traptrace_entry_t *cur_traptrace_ring;
+
+ if (__improbable(traptrace_enabled == 0 || traptrace_generators == 0)) {
+ return TRAPTRACE_INVALID_INDEX;
+ }
+
+ assert(ml_get_interrupts_enabled() == FALSE);
+ cdata = current_cpu_datap();
+ cpu_num = cdata->cpu_number;
+ nextidx = traptrace_next[cpu_num];
+ /* prevent nested interrupts from clobbering this record */
+ traptrace_next[cpu_num] = ((nextidx + 1) >= traptrace_entries_per_cpu) ? 0 : (nextidx + 1);
+
+ cur_traptrace_ring = traptrace_ring[cpu_num];
+
+ cur_traptrace_ring[nextidx].vector = vecnum;
+ cur_traptrace_ring[nextidx].curthread = current_thread();
+ cur_traptrace_ring[nextidx].interrupted_pc = ipc;
+ cur_traptrace_ring[nextidx].curpl = cdata->cpu_preemption_level;
+ cur_traptrace_ring[nextidx].curil = cdata->cpu_interrupt_level;
+ cur_traptrace_ring[nextidx].start_time_abs = sabs;
+ cur_traptrace_ring[nextidx].duration = ~0ULL;
+
+ rbtrace_bt(&cur_traptrace_ring[nextidx].backtrace[0],
+ MAX_TRACE_BTFRAMES - 1, cdata, frameptr, false);
+
+ assert(nextidx <= 0xFFFF);
+
+ return ((unsigned)cpu_num << 16) | nextidx;
+}
+
+static inline void
+traptrace_end(uint32_t index, uint64_t eabs)
+{
+ if (index != TRAPTRACE_INVALID_INDEX) {
+ traptrace_entry_t *ttentp = &traptrace_ring[index >> 16][index & 0xFFFF];
+
+ ttentp->duration = eabs - ttentp->start_time_abs;
+ }
}
+
#endif /* DEVELOPMENT || DEBUG */
static inline void
#if DEVELOPMENT || DEBUG
void iotrace_init(int ncpus);
+void traptrace_init(int ncpus);
#endif /* DEVELOPMENT || DEBUG */
#if DEVELOPMENT || DEBUG
iotrace_init(ncpus);
+ traptrace_init(ncpus);
#endif /* DEVELOPMENT || DEBUG */
/*
int *iotrace_next;
iotrace_entry_t **iotrace_ring;
-void
-init_iotrace_bufs(int cpucnt, int entries_per_cpu)
+volatile int traptrace_enabled = 1;
+int traptrace_generators = 0;
+int traptrace_entries_per_cpu = 0;
+int *traptrace_next;
+traptrace_entry_t **traptrace_ring;
+
+static void
+init_trace_bufs(int cpucnt, int entries_per_cpu, void ***ring, int entry_size,
+ int **next_array, int *allocated_entries_per_cpu, int *allocated_generator_count)
{
int i;
- iotrace_next = kalloc_tag(cpucnt * sizeof(int), VM_KERN_MEMORY_DIAG);
- if (__improbable(iotrace_next == NULL)) {
- iotrace_generators = 0;
+ *next_array = kalloc_tag(cpucnt * sizeof(int), VM_KERN_MEMORY_DIAG);
+ if (__improbable(*next_array == NULL)) {
+ *allocated_generator_count = 0;
return;
} else {
- bzero(iotrace_next, cpucnt * sizeof(int));
+ bzero(*next_array, cpucnt * sizeof(int));
}
- iotrace_ring = kalloc_tag(cpucnt * sizeof(iotrace_entry_t *), VM_KERN_MEMORY_DIAG);
- if (__improbable(iotrace_ring == NULL)) {
- kfree(iotrace_next, cpucnt * sizeof(int));
- iotrace_generators = 0;
+ *ring = kalloc_tag(cpucnt * sizeof(void *), VM_KERN_MEMORY_DIAG);
+ if (__improbable(*ring == NULL)) {
+ kfree(*next_array, cpucnt * sizeof(int));
+ *next_array = NULL;
+ *allocated_generator_count = 0;
return;
}
for (i = 0; i < cpucnt; i++) {
- iotrace_ring[i] = kalloc_tag(entries_per_cpu * sizeof(iotrace_entry_t), VM_KERN_MEMORY_DIAG);
- if (__improbable(iotrace_ring[i] == NULL)) {
- kfree(iotrace_next, cpucnt * sizeof(int));
- iotrace_next = NULL;
+ (*ring)[i] = kalloc_tag(entries_per_cpu * entry_size, VM_KERN_MEMORY_DIAG);
+ if (__improbable((*ring)[i] == NULL)) {
+ kfree(*next_array, cpucnt * sizeof(int));
+ *next_array = NULL;
for (int j = 0; j < i; j++) {
- kfree(iotrace_ring[j], entries_per_cpu * sizeof(iotrace_entry_t));
+ kfree((*ring)[j], entries_per_cpu * entry_size);
}
- kfree(iotrace_ring, cpucnt * sizeof(iotrace_entry_t *));
- iotrace_ring = NULL;
+ kfree(*ring, cpucnt * sizeof(void *));
+ *ring = NULL;
return;
}
- bzero(iotrace_ring[i], entries_per_cpu * sizeof(iotrace_entry_t));
+ bzero((*ring)[i], entries_per_cpu * entry_size);
}
- iotrace_entries_per_cpu = entries_per_cpu;
- iotrace_generators = cpucnt;
+ *allocated_entries_per_cpu = entries_per_cpu;
+ *allocated_generator_count = cpucnt;
}
-void
-iotrace_init(int ncpus)
+
+static void
+init_iotrace_bufs(int cpucnt, int entries_per_cpu)
{
- int iot, epc;
- int entries_per_cpu;
+ init_trace_bufs(cpucnt, entries_per_cpu, (void ***)&iotrace_ring, sizeof(iotrace_entry_t),
+ &iotrace_next, &iotrace_entries_per_cpu, &iotrace_generators);
+}
+
+static void
+init_traptrace_bufs(int cpucnt, int entries_per_cpu)
+{
+ init_trace_bufs(cpucnt, entries_per_cpu, (void ***)&traptrace_ring, sizeof(traptrace_entry_t),
+ &traptrace_next, &traptrace_entries_per_cpu, &traptrace_generators);
+}
- if (PE_parse_boot_argn("iotrace", &iot, sizeof(iot))) {
- mmiotrace_enabled = iot;
+static void
+gentrace_configure_from_bootargs(const char *ena_prop, int *ena_valp, const char *epc_prop,
+ int *epcp, int max_epc, int def_epc, int override)
+{
+ if (kern_feature_override(override)) {
+ *ena_valp = 0;
}
- if (mmiotrace_enabled == 0) {
+ (void) PE_parse_boot_argn(ena_prop, ena_valp, sizeof(*ena_valp));
+
+ if (*ena_valp == 0) {
return;
}
- if (PE_parse_boot_argn("iotrace_epc", &epc, sizeof(epc)) &&
- epc >= 1 && epc <= IOTRACE_MAX_ENTRIES_PER_CPU) {
- entries_per_cpu = epc;
- } else {
- entries_per_cpu = DEFAULT_IOTRACE_ENTRIES_PER_CPU;
+ if (PE_parse_boot_argn(epc_prop, epcp, sizeof(*epcp)) &&
+ (*epcp < 1 || *epcp > max_epc)) {
+ *epcp = def_epc;
}
+}
+
+void
+iotrace_init(int ncpus)
+{
+ int entries_per_cpu = DEFAULT_IOTRACE_ENTRIES_PER_CPU;
+ int enable = mmiotrace_enabled;
- init_iotrace_bufs(ncpus, entries_per_cpu);
+ gentrace_configure_from_bootargs("iotrace", &enable, "iotrace_epc", &entries_per_cpu,
+ IOTRACE_MAX_ENTRIES_PER_CPU, DEFAULT_IOTRACE_ENTRIES_PER_CPU, KF_IOTRACE_OVRD);
+
+ mmiotrace_enabled = enable;
+
+ if (mmiotrace_enabled) {
+ init_iotrace_bufs(ncpus, entries_per_cpu);
+ }
}
+
+void
+traptrace_init(int ncpus)
+{
+ int entries_per_cpu = DEFAULT_TRAPTRACE_ENTRIES_PER_CPU;
+ int enable = traptrace_enabled;
+
+ gentrace_configure_from_bootargs("traptrace", &enable, "traptrace_epc", &entries_per_cpu,
+ TRAPTRACE_MAX_ENTRIES_PER_CPU, DEFAULT_TRAPTRACE_ENTRIES_PER_CPU, KF_TRAPTRACE_OVRD);
+
+ traptrace_enabled = enable;
+
+ if (traptrace_enabled) {
+ init_traptrace_bufs(ncpus, entries_per_cpu);
+ }
+}
+
#endif /* DEVELOPMENT || DEBUG */
sizeof(cpuid_cache_descriptor_t))
static void do_cwas(i386_cpu_info_t *cpuinfo, boolean_t on_slave);
+static void cpuid_do_precpuid_was(void);
static inline cpuid_cache_descriptor_t *
cpuid_leaf2_find(uint8_t value)
* enumerated, lest we #GP when forced to access it.)
*/
if (cpuid_wa_required(CPU_INTEL_TSXFA) == CWA_ON) {
+ /* This must be executed on all logical processors */
wrmsr64(MSR_IA32_TSX_FORCE_ABORT,
rdmsr64(MSR_IA32_TSX_FORCE_ABORT) | MSR_IA32_TSXFA_RTM_FORCE_ABORT);
}
i386_cpu_info_t *info_p = &cpuid_cpu_info;
boolean_t enable_x86_64h = TRUE;
+ /* Perform pre-cpuid workarounds (since their effects impact values returned via cpuid) */
+ cpuid_do_precpuid_was();
+
cpuid_set_generic_info(info_p);
/* verify we are running on a supported CPU */
cwa_classifier_e
cpuid_wa_required(cpu_wa_e wa)
{
+ i386_cpu_info_t *info_p = &cpuid_cpu_info;
static uint64_t bootarg_cpu_wa_enables = 0;
static uint64_t bootarg_cpu_wa_disables = 0;
static int bootargs_overrides_processed = 0;
- i386_cpu_info_t *info_p = &cpuid_cpu_info;
if (!bootargs_overrides_processed) {
if (!PE_parse_boot_argn("cwae", &bootarg_cpu_wa_enables, sizeof(bootarg_cpu_wa_enables))) {
case CPU_INTEL_TSXFA:
/*
- * If this CPU supports RTM and supports FORCE_ABORT, return that
+ * Otherwise, if the CPU supports both TSX(HLE) and FORCE_ABORT, return that
* the workaround should be enabled.
*/
if ((info_p->cpuid_leaf7_extfeatures & CPUID_LEAF7_EXTFEATURE_TSXFA) != 0 &&
return CWA_OFF;
}
+
+static void
+cpuid_do_precpuid_was(void)
+{
+ /*
+ * Note that care must be taken not to use any data from the cached cpuid data since it is
+ * likely uninitialized at this point. That includes calling functions that make use of
+ * that data as well.
+ */
+
+}
typedef enum {
CPU_INTEL_SEGCHK = 1,
- CPU_INTEL_TSXFA
+ CPU_INTEL_TSXFA = 2
} cpu_wa_e;
typedef enum {
intr = ml_set_interrupts_enabled(FALSE);
if (get_interrupt_level()) {
- panic("FPU segment overrun exception at interrupt context\n");
+ panic("FPU segment overrun exception at interrupt context\n");
}
if (current_task() == kernel_task) {
panic("FPU segment overrun exception in kernel thread context\n");
if (ifps) {
fp_state_free(ifps, xstate);
}
-
- /*
- * Raise exception.
- */
- i386_exception(EXC_BAD_ACCESS, VM_PROT_READ | VM_PROT_EXECUTE, 0);
- /*NOTREACHED*/
}
extern void fpxlog(int, uint32_t, uint32_t, uint32_t);
const uint32_t xcpt = ~mask & (ifps->fx_status &
(FPS_IE | FPS_DE | FPS_ZE | FPS_OE | FPS_UE | FPS_PE));
fpxlog(EXC_I386_EXTERR, ifps->fx_status, ifps->fx_control, xcpt);
- /*
- * Raise FPU exception.
- * Locking not needed on pcb->ifps,
- * since thread is running.
- */
- i386_exception(EXC_ARITHMETIC,
- EXC_I386_EXTERR,
- ifps->fx_status);
-
- /*NOTREACHED*/
}
/*
const uint32_t xcpt = ~mask & (ifps->fx_MXCSR &
(FPS_IE | FPS_DE | FPS_ZE | FPS_OE | FPS_UE | FPS_PE));
fpxlog(EXC_I386_SSEEXTERR, ifps->fx_MXCSR, ifps->fx_MXCSR, xcpt);
-
- i386_exception(EXC_ARITHMETIC,
- EXC_I386_SSEEXTERR,
- ifps->fx_MXCSR);
- /*NOTREACHED*/
}
* return directly via thread_exception_return().
* Otherwise simply return.
*/
-#define MAX_X86_INSN_LENGTH (16)
-void
+#define MAX_X86_INSN_LENGTH (15)
+int
fpUDflt(user_addr_t rip)
{
uint8_t instruction_prefix;
* rather than issue multiple copyins
*/
if (copyin(rip, (char *) &instruction_prefix, 1)) {
- return;
+ return 1;
}
DBG("fpUDflt(0x%016llx) prefix: 0x%x\n",
rip, instruction_prefix);
/* Skip optional prefixes */
rip++;
if ((rip - original_rip) > MAX_X86_INSN_LENGTH) {
- return;
+ return 1;
}
break;
case 0x62: /* EVEX */
is_AVX512_instruction = TRUE;
break;
default:
- return;
+ return 1;
}
} while (!is_AVX512_instruction);
* Fail if this machine doesn't support AVX512
*/
if (fpu_capability != AVX512) {
- return;
+ return 1;
}
assert(xgetbv(XCR0) == AVX_XMASK);
DBG("fpUDflt() switching xstate to AVX512\n");
(void) fpu_thread_promote_avx512(current_thread());
- thread_exception_return();
- /* NOT REACHED */
+ return 0;
}
#endif /* !defined(RC_HIDE_XNU_J137) */
extern xstate_t fpu_default;
extern xstate_t fpu_capability;
extern xstate_t current_xstate(void);
-extern void fpUDflt(user_addr_t rip);
+extern int fpUDflt(user_addr_t rip);
#ifdef MACH_KERNEL_PRIVATE
extern uint32_t thread_fpsimd_hash(thread_t);
extern void vzeroall(void);
#endif
/* update CPU microcode */
ucode_update_wake();
-
- /* Do CPU workarounds after the microcode update */
- cpuid_do_was();
} else {
init_param = FAST_SLAVE_INIT;
}
#define MSR_IA32_BBL_CR_CTL 0x119
+
#define MSR_IA32_SYSENTER_CS 0x174
#define MSR_IA32_SYSENTER_ESP 0x175
#define MSR_IA32_SYSENTER_EIP 0x176
/*
* Forward declarations
*/
-static void user_page_fault_continue(kern_return_t kret);
static void panic_trap(x86_saved_state64_t *saved_state, uint32_t pl, kern_return_t fault_result) __dead2;
static void set_recovery_ip(x86_saved_state64_t *saved_state, vm_offset_t ip);
/*NOTREACHED*/
}
-
-static inline void
-user_page_fault_continue(
- kern_return_t kr)
-{
- thread_t thread = current_thread();
- user_addr_t vaddr;
-
- if (thread_is_64bit_addr(thread)) {
- x86_saved_state64_t *uregs;
-
- uregs = USER_REGS64(thread);
-
- vaddr = (user_addr_t)uregs->cr2;
- } else {
- x86_saved_state32_t *uregs;
-
- uregs = USER_REGS32(thread);
-
- vaddr = uregs->cr2;
- }
-
-
- /* PAL debug hook */
- pal_dbg_page_fault( thread, vaddr, kr );
-
- i386_exception(EXC_BAD_ACCESS, kr, vaddr);
- /*NOTREACHED*/
-}
-
/*
* Fault recovery in copyin/copyout routines.
*/
user_mode = TRUE;
}
+#if DEVELOPMENT || DEBUG
+ uint64_t frameptr = is_saved_state64(state) ? state64->rbp : saved_state32(state)->ebp;
+ uint32_t traptrace_index = traptrace_start(interrupt_num, rip, mach_absolute_time(), frameptr);
+#endif
+
if (cpu_data_ptr[cnum]->lcpu.package->num_idle == topoParms.nLThreadsPerPackage) {
cpu_data_ptr[cnum]->cpu_hwIntpexits[interrupt_num]++;
}
interrupt_num);
assert(ml_get_interrupts_enabled() == FALSE);
+
+#if DEVELOPMENT || DEBUG
+ if (traptrace_index != TRAPTRACE_INVALID_INDEX) {
+ traptrace_end(traptrace_index, mach_absolute_time());
+ }
+#endif
}
static inline void
is_user = (vaddr < VM_MAX_USER_PAGE_ADDRESS);
+#if DEVELOPMENT || DEBUG
+ uint32_t traptrace_index = traptrace_start(type, kern_ip, mach_absolute_time(), saved_state->rbp);
+#endif
+
#if CONFIG_DTRACE
/*
* Is there a DTrace hook?
/*
* If it succeeds, we are done...
*/
- return;
+ goto common_return;
}
}
#endif /* CONFIG_DTRACE */
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
(MACHDBG_CODE(DBG_MACH_EXCP_KTRAP_x86, type)) | DBG_FUNC_NONE,
0, 0, 0, VM_KERNEL_UNSLIDE(kern_ip), 0);
- return;
+
+ goto common_return;
}
user_addr_t kd_vaddr = is_user ? vaddr : VM_KERNEL_UNSLIDE(vaddr);
switch (type) {
case T_NO_FPU:
fpnoextflt();
- return;
+ goto common_return;
case T_FPU_FAULT:
fpextovrflt();
- return;
+ goto common_return;
case T_FLOATING_POINT_ERROR:
fpexterrflt();
- return;
+ goto common_return;
case T_SSE_FLOAT_ERROR:
fpSSEexterrflt();
- return;
+ goto common_return;
case T_INVALID_OPCODE:
fpUDflt(kern_ip);
* This isn't supposed to happen.
*/
reset_dr7();
- return;
+ goto common_return;
}
goto debugger_entry;
case T_INT3:
(void) ml_set_interrupts_enabled(intr);
}
#endif /* NCOPY_WINDOWS > 0 */
- return;
+ goto common_return;
}
/*
* fall through
for (rp = recover_table; rp < recover_table_end; rp++) {
if (kern_ip == rp->fault_addr) {
set_recovery_ip(saved_state, rp->recover_addr);
- return;
+ goto common_return;
}
}
if (thread != THREAD_NULL && thread->recover) {
set_recovery_ip(saved_state, thread->recover);
thread->recover = 0;
- return;
+ goto common_return;
}
/*
* Unanticipated page-fault errors in kernel
*/
if (type == 15) {
kprintf("kernel_trap() ignoring spurious trap 15\n");
- return;
+ goto common_return;
}
debugger_entry:
/* Ensure that the i386_kernel_state at the base of the
sync_iss_to_iks(state);
#if MACH_KDP
if (kdp_i386_trap(type, saved_state, result, (vm_offset_t)vaddr)) {
- return;
+ goto common_return;
}
#endif
}
/*
* NO RETURN
*/
+
+common_return:
+#if DEVELOPMENT || DEBUG
+ if (traptrace_index != TRAPTRACE_INVALID_INDEX) {
+ traptrace_end(traptrace_index, mach_absolute_time());
+ }
+#endif
+ return;
}
static void
kern_return_t kret;
user_addr_t rip;
unsigned long dr6 = 0; /* 32 bit for i386, 64 bit for x86_64 */
-
+#if DEVELOPMENT || DEBUG
+ uint32_t traptrace_index;
+#endif
assert((is_saved_state32(saved_state) && !thread_is_64bit_addr(thread)) ||
(is_saved_state64(saved_state) && thread_is_64bit_addr(thread)));
err = (int)regs->isf.err & 0xffff;
vaddr = (user_addr_t)regs->cr2;
rip = (user_addr_t)regs->isf.rip;
+#if DEVELOPMENT || DEBUG
+ traptrace_index = traptrace_start(type, rip, mach_absolute_time(), regs->rbp);
+#endif
} else {
x86_saved_state32_t *regs;
err = regs->err & 0xffff;
vaddr = (user_addr_t)regs->cr2;
rip = (user_addr_t)regs->eip;
+#if DEVELOPMENT || DEBUG
+ traptrace_index = traptrace_start(type, rip, mach_absolute_time(), regs->ebp);
+#endif
}
+
if ((type == T_DEBUG) && thread->machine.ids) {
unsigned long clear = 0;
/* Stash and clear this processor's DR6 value, in the event
break;
case T_INVALID_OPCODE:
-#if !defined(RC_HIDE_XNU_J137)
- fpUDflt(rip); /* May return from exception directly */
-#endif
- exc = EXC_BAD_INSTRUCTION;
- code = EXC_I386_INVOP;
+ if (fpUDflt(rip) == 1) {
+ exc = EXC_BAD_INSTRUCTION;
+ code = EXC_I386_INVOP;
+ }
break;
case T_NO_FPU:
fpnoextflt();
- return;
+ break;
case T_FPU_FAULT:
- fpextovrflt(); /* Propagates exception directly, doesn't return */
- return;
+ fpextovrflt();
+ /*
+ * Raise exception.
+ */
+ exc = EXC_BAD_ACCESS;
+ code = VM_PROT_READ | VM_PROT_EXECUTE;
+ subcode = 0;
+ break;
case T_INVALID_TSS: /* invalid TSS == iret with NT flag set */
exc = EXC_BAD_INSTRUCTION;
}
#endif
if (__probable((kret == KERN_SUCCESS) || (kret == KERN_ABORTED))) {
- thread_exception_return();
- /*NOTREACHED*/
- }
-
- /*
- * For a user trap, vm_fault() should never return KERN_FAILURE.
- * If it does, we're leaking preemption disables somewhere in the kernel.
- */
- if (__improbable(kret == KERN_FAILURE)) {
+ break;
+ } else if (__improbable(kret == KERN_FAILURE)) {
+ /*
+ * For a user trap, vm_fault() should never return KERN_FAILURE.
+ * If it does, we're leaking preemption disables somewhere in the kernel.
+ */
panic("vm_fault() KERN_FAILURE from user fault on thread %p", thread);
}
- user_page_fault_continue(kret);
- } /* NOTREACHED */
+ /* PAL debug hook (empty on x86) */
+ pal_dbg_page_fault(thread, vaddr, kret);
+ exc = EXC_BAD_ACCESS;
+ code = kret;
+ subcode = vaddr;
+ }
break;
case T_SSE_FLOAT_ERROR:
- fpSSEexterrflt(); /* Propagates exception directly, doesn't return */
- return;
+ fpSSEexterrflt();
+ exc = EXC_ARITHMETIC;
+ code = EXC_I386_SSEEXTERR;
+ subcode = ((struct x86_fx_thread_state *)thread->machine.ifps)->fx_MXCSR;
+ break;
case T_FLOATING_POINT_ERROR:
- fpexterrflt(); /* Propagates exception directly, doesn't return */
- return;
+ fpexterrflt();
+ exc = EXC_ARITHMETIC;
+ code = EXC_I386_EXTERR;
+ subcode = ((struct x86_fx_thread_state *)thread->machine.ifps)->fx_status;
+ break;
case T_DTRACE_RET:
#if CONFIG_DTRACE
default:
panic("Unexpected user trap, type %d", type);
}
- /* Note: Codepaths that directly return from user_trap() have pending
- * ASTs processed in locore
- */
- i386_exception(exc, code, subcode);
- /* NOTREACHED */
+
+#if DEVELOPMENT || DEBUG
+ if (traptrace_index != TRAPTRACE_INVALID_INDEX) {
+ traptrace_end(traptrace_index, mach_absolute_time());
+ }
+#endif
+
+ if (exc != 0) {
+ /*
+ * Note: Codepaths that directly return from user_trap() have pending
+ * ASTs processed in locore
+ */
+ i386_exception(exc, code, subcode);
+ /* NOTREACHED */
+ }
}
/*
void
panic_double_fault64(x86_saved_state_t *sp)
{
+#if DEVELOPMENT || DEBUG
+ uint64_t frameptr = is_saved_state64(sp) ? saved_state64(sp)->rbp : saved_state32(sp)->ebp;
+ (void) traptrace_start(T_DOUBLE_FAULT, saved_state64(sp)->isf.rip, mach_absolute_time(), frameptr);
+#endif
(void)OSCompareAndSwap((UInt32) - 1, (UInt32) cpu_number(), (volatile UInt32 *)&panic_double_fault_cpu);
panic_64(sp, PANIC_DOUBLE_FAULT, "Double fault", FALSE);
}
void
panic_machine_check64(x86_saved_state_t *sp)
{
+#if DEVELOPMENT || DEBUG
+ uint64_t frameptr = is_saved_state64(sp) ? saved_state64(sp)->rbp : saved_state32(sp)->ebp;
+ (void) traptrace_start(T_MACHINE_CHECK, saved_state64(sp)->isf.rip, mach_absolute_time(), frameptr);
+#endif
panic_64(sp, PANIC_MACHINE_CHECK, "Machine Check", TRUE);
}
cpu_apply_microcode();
/* Update the cpuid info */
ucode_cpuid_set_info();
- /* Now apply workarounds */
- cpuid_do_was();
mp_enable_preemption();
/* Get all other CPUs to perform the update */
*/
void
ipc_mqueue_init(
- ipc_mqueue_t mqueue,
- boolean_t is_set)
+ ipc_mqueue_t mqueue,
+ ipc_mqueue_kind_t kind)
{
- if (is_set) {
+ switch (kind) {
+ case IPC_MQUEUE_KIND_SET:
waitq_set_init(&mqueue->imq_set_queue,
SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST,
NULL, NULL);
- } else {
- waitq_init(&mqueue->imq_wait_queue, SYNC_POLICY_FIFO | SYNC_POLICY_PORT);
+ break;
+ case IPC_MQUEUE_KIND_NONE: /* cheat: we really should have "no" mqueue */
+ case IPC_MQUEUE_KIND_PORT:
+ waitq_init(&mqueue->imq_wait_queue,
+ SYNC_POLICY_FIFO | SYNC_POLICY_TURNSTILE_PROXY);
ipc_kmsg_queue_init(&mqueue->imq_messages);
mqueue->imq_seqno = 0;
mqueue->imq_msgcount = 0;
#if MACH_FLIPC
mqueue->imq_fport = FPORT_NULL;
#endif
+ break;
}
klist_init(&mqueue->imq_klist);
}
imq_unlock(port_mq);
return THREAD_NOT_WAITING;
}
- } else if (imq_is_queue(mqueue)) {
+ } else if (imq_is_queue(mqueue) || imq_is_turnstile_proxy(mqueue)) {
ipc_kmsg_queue_t kmsgs;
/*
}
/*
- * Threads waiting on a special reply port
- * (not portset or regular ports)
+ * Threads waiting on a reply port (not portset)
* will wait on its receive turnstile.
*
* Donate waiting thread's turnstile and
* will be converted to to turnstile waitq
* in waitq_assert_wait instead of global waitqs.
*/
- if (imq_is_queue(mqueue) && ip_from_mq(mqueue)->ip_specialreply) {
+ if (imq_is_turnstile_proxy(mqueue)) {
ipc_port_t port = ip_from_mq(mqueue);
rcv_turnstile = turnstile_prepare((uintptr_t)port,
port_rcv_turnstile_address(port),
#define imq_set_queue data.pset.setq
#define imq_is_set(mq) waitqs_is_set(&(mq)->imq_set_queue)
#define imq_is_queue(mq) waitq_is_queue(&(mq)->imq_wait_queue)
+#define imq_is_turnstile_proxy(mq) \
+ waitq_is_turnstile_proxy(&(mq)->imq_wait_queue)
#define imq_is_valid(mq) waitq_is_valid(&(mq)->imq_wait_queue)
#define imq_unlock(mq) waitq_unlock(&(mq)->imq_wait_queue)
* Exported interfaces
*/
+__enum_closed_decl(ipc_mqueue_kind_t, int, {
+ IPC_MQUEUE_KIND_NONE, /* this mqueue really isn't used */
+ IPC_MQUEUE_KIND_PORT, /* this queue is a regular port queue */
+ IPC_MQUEUE_KIND_SET, /* this queue is a portset queue */
+});
+
/* Initialize a newly-allocated message queue */
extern void ipc_mqueue_init(
ipc_mqueue_t mqueue,
- boolean_t is_set);
+ ipc_mqueue_kind_t kind);
/* de-initialize / cleanup an mqueue (specifically waitq resources) */
extern void ipc_mqueue_deinit(
ipc_notify_send_once(
ipc_port_t port)
{
- ipc_port_adjust_special_reply_port(port, IPC_PORT_ADJUST_RESET_BOOSTRAP_CHECKIN, FALSE);
+ ipc_port_adjust_special_reply_port(port, IPC_PORT_ADJUST_RESET_BOOSTRAP_CHECKIN);
(void)mach_notify_send_once(port);
/* send-once right consumed */
ipc_port_init(
ipc_port_t port,
ipc_space_t space,
+ ipc_port_init_flags_t flags,
mach_port_name_t name)
{
/* port->ip_kobject doesn't have to be initialized */
port->ip_mscount = 0;
port->ip_srights = 0;
port->ip_sorights = 0;
+ if (flags & IPC_PORT_INIT_MAKE_SEND_RIGHT) {
+ port->ip_srights = 1;
+ port->ip_mscount = 1;
+ }
port->ip_nsrequest = IP_NULL;
port->ip_pdrequest = IP_NULL;
port->ip_immovable_send = 0;
port->ip_impcount = 0;
- port->ip_specialreply = 0;
+ port->ip_specialreply = (flags & IPC_PORT_INIT_SPECIAL_REPLY) != 0;
port->ip_sync_link_state = PORT_SYNC_LINK_ANY;
port->ip_sync_bootstrap_checkin = 0;
- port->ip_watchport_elem = NULL;
ipc_special_reply_port_bits_reset(port);
port->ip_send_turnstile = TURNSTILE_NULL;
- ipc_mqueue_init(&port->ip_messages,
- FALSE /* !set */);
+ ipc_mqueue_kind_t kind = IPC_MQUEUE_KIND_NONE;
+ if (flags & IPC_PORT_INIT_MESSAGE_QUEUE) {
+ kind = IPC_MQUEUE_KIND_PORT;
+ }
+ ipc_mqueue_init(&port->ip_messages, kind);
}
/*
kern_return_t
ipc_port_alloc(
ipc_space_t space,
- bool make_send_right,
+ ipc_port_init_flags_t flags,
mach_port_name_t *namep,
ipc_port_t *portp)
{
ipc_port_callstack_init_debug(&buf[0], IP_CALLSTACK_MAX);
#endif /* MACH_ASSERT */
- if (make_send_right) {
+ if (flags & IPC_PORT_INIT_MAKE_SEND_RIGHT) {
type |= MACH_PORT_TYPE_SEND;
urefs = 1;
}
}
/* port and space are locked */
- ipc_port_init(port, space, name);
-
- if (make_send_right) {
- /* ipc_object_alloc() already made the entry reference */
- port->ip_srights++;
- port->ip_mscount++;
- }
+ ipc_port_init(port, space, flags, name);
#if MACH_ASSERT
ipc_port_init_debug(port, &buf[0], IP_CALLSTACK_MAX);
kern_return_t
ipc_port_alloc_name(
ipc_space_t space,
+ ipc_port_init_flags_t flags,
mach_port_name_t name,
ipc_port_t *portp)
{
ipc_port_t port;
kern_return_t kr;
+ mach_port_type_t type = MACH_PORT_TYPE_RECEIVE;
+ mach_port_urefs_t urefs = 0;
#if MACH_ASSERT
uintptr_t buf[IP_CALLSTACK_MAX];
ipc_port_callstack_init_debug(&buf[0], IP_CALLSTACK_MAX);
#endif /* MACH_ASSERT */
- kr = ipc_object_alloc_name(space, IOT_PORT,
- MACH_PORT_TYPE_RECEIVE, 0,
+ if (flags & IPC_PORT_INIT_MAKE_SEND_RIGHT) {
+ type |= MACH_PORT_TYPE_SEND;
+ urefs = 1;
+ }
+ kr = ipc_object_alloc_name(space, IOT_PORT, type, urefs,
name, (ipc_object_t *) &port);
if (kr != KERN_SUCCESS) {
return kr;
/* port is locked */
- ipc_port_init(port, space, name);
+ ipc_port_init(port, space, flags, name);
#if MACH_ASSERT
ipc_port_init_debug(port, &buf[0], IP_CALLSTACK_MAX);
if (special_reply) {
ipc_port_adjust_special_reply_port(port,
- IPC_PORT_ADJUST_SR_ALLOW_SYNC_LINKAGE, FALSE);
+ IPC_PORT_ADJUST_SR_ALLOW_SYNC_LINKAGE);
}
if (watchport_elem) {
/* unlink the kmsg from special reply port */
if (special_reply) {
ipc_port_adjust_special_reply_port(port,
- IPC_PORT_ADJUST_SR_ALLOW_SYNC_LINKAGE, FALSE);
+ IPC_PORT_ADJUST_SR_ALLOW_SYNC_LINKAGE);
}
/* throw away no-senders request */
#endif /* !IMPORTANCE_INHERITANCE */
}
+/*
+ * Routine: ipc_port_watchport_elem
+ * Purpose:
+ * Get the port's watchport elem field
+ *
+ * Conditions:
+ * mqueue locked
+ */
+static struct task_watchport_elem *
+ipc_port_watchport_elem(ipc_port_t port)
+{
+ return port->ip_messages.imq_wait_queue.waitq_tspriv;
+}
+
+/*
+ * Routine: ipc_port_update_watchport_elem
+ * Purpose:
+ * Set the port's watchport elem field
+ *
+ * Conditions:
+ * mqueue locked
+ */
+static inline struct task_watchport_elem *
+ipc_port_update_watchport_elem(ipc_port_t port, struct task_watchport_elem *we)
+{
+ struct task_watchport_elem *old_we = ipc_port_watchport_elem(port);
+ port->ip_messages.imq_wait_queue.waitq_tspriv = we;
+ return old_we;
+}
+
/*
* Update the recv turnstile inheritor for a port.
*
port->ip_destination != NULL) {
/* Case 2. */
inheritor = port_send_turnstile(port->ip_destination);
- } else if (port->ip_watchport_elem != NULL) {
+ } else if (ipc_port_watchport_elem(port) != NULL) {
/* Case 3. */
if (prioritize_launch) {
assert(port->ip_sync_link_state == PORT_SYNC_LINK_ANY);
static struct turnstile *
ipc_port_rcv_turnstile(ipc_port_t port)
{
- return turnstile_lookup_by_proprietor((uintptr_t)port, TURNSTILE_SYNC_IPC);
-}
-
-
-/*
- * Routine: ipc_port_rcv_turnstile_waitq
- * Purpose:
- * Given the mqueue's waitq, find the port's
- * rcv turnstile and return its waitq.
- *
- * Conditions:
- * mqueue locked or thread waiting on turnstile is locked.
- */
-struct waitq *
-ipc_port_rcv_turnstile_waitq(struct waitq *waitq)
-{
- struct waitq *safeq;
-
- ipc_mqueue_t mqueue = imq_from_waitq(waitq);
- ipc_port_t port = ip_from_mq(mqueue);
- struct turnstile *rcv_turnstile = ipc_port_rcv_turnstile(port);
-
- /* Check if the port has a rcv turnstile */
- if (rcv_turnstile != TURNSTILE_NULL) {
- safeq = &rcv_turnstile->ts_waitq;
- } else {
- safeq = global_eventq(waitq);
- }
- return safeq;
+ return *port_rcv_turnstile_address(port);
}
turnstile_inheritor_t inheritor = TURNSTILE_INHERITOR_NULL;
struct turnstile *ts = TURNSTILE_NULL;
- assert(special_reply_port->ip_specialreply);
-
ip_lock_held(special_reply_port); // ip_sync_link_state is touched
imq_lock(&special_reply_port->ip_messages);
+ if (!special_reply_port->ip_specialreply) {
+ // only mach_msg_receive_results_complete() calls this with any port
+ assert(get_turnstile);
+ goto not_special;
+ }
+
if (flags & IPC_PORT_ADJUST_SR_RECEIVED_MSG) {
ipc_special_reply_port_msg_sent_reset(special_reply_port);
}
/* Check if the special reply port is marked non-special */
if (special_reply_port->ip_sync_link_state == PORT_SYNC_LINK_ANY) {
+not_special:
if (get_turnstile) {
turnstile_complete((uintptr_t)special_reply_port,
port_rcv_turnstile_address(special_reply_port), NULL, TURNSTILE_SYNC_IPC);
*/
void
ipc_port_adjust_special_reply_port(
- ipc_port_t special_reply_port,
- uint8_t flags,
- boolean_t get_turnstile)
+ ipc_port_t port,
+ uint8_t flags)
{
- if (special_reply_port->ip_specialreply) {
- ip_lock(special_reply_port);
- ipc_port_adjust_special_reply_port_locked(special_reply_port, NULL,
- flags, get_turnstile);
- /* special_reply_port unlocked */
- }
- if (get_turnstile) {
- assert(current_thread()->turnstile != TURNSTILE_NULL);
+ if (port->ip_specialreply) {
+ ip_lock(port);
+ ipc_port_adjust_special_reply_port_locked(port, NULL, flags, FALSE);
}
}
ipc_port_adjust_sync_link_state_locked(port, PORT_SYNC_LINK_ANY, NULL);
}
- *old_elem = port->ip_watchport_elem;
- port->ip_watchport_elem = watchport_elem;
+ *old_elem = ipc_port_update_watchport_elem(port, watchport_elem);
ipc_port_send_turnstile_recompute_push_locked(port);
/* port and mqueue unlocked */
ip_lock_held(port);
imq_held(&port->ip_messages);
- if (port->ip_watchport_elem != watchport_elem) {
+ if (ipc_port_watchport_elem(port) != watchport_elem) {
imq_unlock(&port->ip_messages);
ip_unlock(port);
return KERN_FAILURE;
ip_lock_held(port);
imq_held(&port->ip_messages);
- if (port->ip_watchport_elem != old_watchport_elem) {
+ if (ipc_port_watchport_elem(port) != old_watchport_elem) {
imq_unlock(&port->ip_messages);
ip_unlock(port);
return KERN_FAILURE;
}
- port->ip_watchport_elem = new_watchport_elem;
+ ipc_port_update_watchport_elem(port, new_watchport_elem);
ipc_port_send_turnstile_recompute_push_locked(port);
/* port and mqueue unlocked */
return KERN_SUCCESS;
ipc_port_clear_watchport_elem_internal(
ipc_port_t port)
{
- struct task_watchport_elem *watchport_elem;
-
ip_lock_held(port);
imq_held(&port->ip_messages);
- watchport_elem = port->ip_watchport_elem;
- port->ip_watchport_elem = NULL;
-
- return watchport_elem;
+ return ipc_port_update_watchport_elem(port, NULL);
}
/*
ipc_port_t port)
{
imq_held(&port->ip_messages);
- return port->ip_watchport_elem->twe_task->watchports->tw_thread;
+ return ipc_port_watchport_elem(port)->twe_task->watchports->tw_thread;
}
/*
return;
}
- ipc_port_adjust_special_reply_port(port, IPC_PORT_ADJUST_RESET_BOOSTRAP_CHECKIN, FALSE);
+ ipc_port_adjust_special_reply_port(port, IPC_PORT_ADJUST_RESET_BOOSTRAP_CHECKIN);
ip_lock(port);
ipc_port_t
ipc_port_alloc_special(
- ipc_space_t space)
+ ipc_space_t space,
+ ipc_port_init_flags_t flags)
{
ipc_port_t port;
port->ip_references = 1;
port->ip_object.io_bits = io_makebits(TRUE, IOT_PORT, 0);
- ipc_port_init(port, space, 1);
+ ipc_port_init(port, space, flags, 1);
#if MACH_ASSERT
ipc_port_init_debug(port, &buf[0], IP_CALLSTACK_MAX);
ipc_port_request_t requests = port->ip_requests;
assert(port_send_turnstile(port) == TURNSTILE_NULL);
- assert(ipc_port_rcv_turnstile(port) == TURNSTILE_NULL);
+ if (imq_is_turnstile_proxy(&port->ip_messages)) {
+ assert(ipc_port_rcv_turnstile(port) == TURNSTILE_NULL);
+ }
if (ip_active(port)) {
panic("Trying to free an active port. port %p", port);
* taken when the port was destroyed.
*/
+struct task_watchport_elem;
+
typedef unsigned int ipc_port_timestamp_t;
struct ipc_port {
struct turnstile *send_turnstile;
} kdata2;
- struct task_watchport_elem *ip_watchport_elem;
-
mach_vm_address_t ip_context;
natural_t ip_sprequests:1, /* send-possible requests outstanding */
(IP_PREALLOC(port) ? &((port)->ip_premsg->ikm_turnstile) : &((port)->ip_send_turnstile))
#define port_rcv_turnstile_address(port) \
- (NULL)
+ &(port)->ip_messages.imq_wait_queue.waitq_ts
/*
ipc_port_t port,
boolean_t should_destroy);
+__options_decl(ipc_port_init_flags_t, uint32_t, {
+ IPC_PORT_INIT_NONE = 0x00000000,
+ IPC_PORT_INIT_MAKE_SEND_RIGHT = 0x00000001,
+ IPC_PORT_INIT_MESSAGE_QUEUE = 0x00000002,
+ IPC_PORT_INIT_SPECIAL_REPLY = 0x00000004,
+});
+
/* Initialize a newly-allocated port */
extern void ipc_port_init(
ipc_port_t port,
ipc_space_t space,
+ ipc_port_init_flags_t flags,
mach_port_name_t name);
/* Allocate a port */
extern kern_return_t ipc_port_alloc(
ipc_space_t space,
- bool make_send_right,
+ ipc_port_init_flags_t flags,
mach_port_name_t *namep,
ipc_port_t *portp);
/* Allocate a port, with a specific name */
extern kern_return_t ipc_port_alloc_name(
ipc_space_t space,
+ ipc_port_init_flags_t flags,
mach_port_name_t name,
ipc_port_t *portp);
void
ipc_port_adjust_special_reply_port(
ipc_port_t special_reply_port,
- uint8_t flags,
- boolean_t get_turnstile);
+ uint8_t flags);
void
ipc_port_adjust_port_locked(
/* Allocate a port in a special space */
extern ipc_port_t ipc_port_alloc_special(
- ipc_space_t space);
+ ipc_space_t space,
+ ipc_port_init_flags_t flags);
/* Deallocate a port in a special space */
extern void ipc_port_dealloc_special(
turnstile_update_flags_t flags);
#define ipc_port_alloc_kernel() \
- ipc_port_alloc_special(ipc_space_kernel)
+ ipc_port_alloc_special(ipc_space_kernel, IPC_PORT_INIT_NONE)
#define ipc_port_dealloc_kernel(port) \
ipc_port_dealloc_special((port), ipc_space_kernel)
#define ipc_port_alloc_reply() \
- ipc_port_alloc_special(ipc_space_reply)
+ ipc_port_alloc_special(ipc_space_reply, IPC_PORT_INIT_MESSAGE_QUEUE)
#define ipc_port_dealloc_reply(port) \
ipc_port_dealloc_special((port), ipc_space_reply)
}
/* pset and space are locked */
- ipc_mqueue_init(&pset->ips_messages, TRUE /* set */);
+ ipc_mqueue_init(&pset->ips_messages, IPC_MQUEUE_KIND_SET);
is_write_unlock(space);
*namep = name;
}
/* pset is locked */
- ipc_mqueue_init(&pset->ips_messages, TRUE /* set */);
+ ipc_mqueue_init(&pset->ips_messages, IPC_MQUEUE_KIND_SET);
*psetp = pset;
return KERN_SUCCESS;
pset->ips_references = 1;
pset->ips_object.io_bits = io_makebits(TRUE, IOT_PORT_SET, 0);
- ipc_mqueue_init(&pset->ips_messages, TRUE /* set */);
+ ipc_mqueue_init(&pset->ips_messages, IPC_MQUEUE_KIND_SET);
return pset;
}
{
thread_t self = current_thread();
ipc_port_t port = IPC_PORT_NULL;
- boolean_t get_turnstile = self->turnstile ? FALSE : TRUE;
+ boolean_t get_turnstile = (self->turnstile == TURNSTILE_NULL);
if (io_otype(object) == IOT_PORT) {
port = ip_object_to_port(object);
flags |= IPC_PORT_ADJUST_SR_RECEIVED_MSG;
}
- ipc_port_adjust_special_reply_port(port,
- flags, get_turnstile);
+ if (port->ip_specialreply || get_turnstile) {
+ ip_lock(port);
+ ipc_port_adjust_special_reply_port_locked(port, NULL,
+ flags, get_turnstile);
+ }
+ assert(self->turnstile != TURNSTILE_NULL);
/* thread now has a turnstile */
}
}
if (qosp->name) {
- kr = ipc_port_alloc_name(space, *namep, &port);
+ kr = ipc_port_alloc_name(space, IPC_PORT_INIT_MESSAGE_QUEUE,
+ *namep, &port);
} else {
- kr = ipc_port_alloc(space, FALSE, namep, &port);
+ kr = ipc_port_alloc(space, IPC_PORT_INIT_MESSAGE_QUEUE,
+ namep, &port);
}
if (kr == KERN_SUCCESS) {
if (kmsg != IKM_NULL) {
{
kern_return_t kr;
ipc_port_t port;
+ ipc_port_init_flags_t init_flags = IPC_PORT_INIT_MESSAGE_QUEUE;
if (space == IS_NULL) {
return KERN_INVALID_TASK;
}
+ if (options->flags & MPO_INSERT_SEND_RIGHT) {
+ init_flags |= IPC_PORT_INIT_MAKE_SEND_RIGHT;
+ }
+
/* Allocate a new port in the IPC space */
- kr = ipc_port_alloc(space, (options->flags & MPO_INSERT_SEND_RIGHT),
- name, &port);
+ kr = ipc_port_alloc(space, init_flags, name, &port);
if (kr != KERN_SUCCESS) {
return kr;
}
assert(ml_get_interrupts_enabled() == TRUE);
if (!ml_get_interrupts_enabled()) {
- return EINVAL;
+ goto out;
}
union {
if (thread != current_thread()) {
map = get_task_map_reference(get_threadtask(thread));
if (map == NULL) {
- return EINVAL;
+ goto out;
}
old_map = vm_map_switch(map);
} else {
#define KF_STACKSHOT_OVRD (0x10)
#define KF_COMPRSV_OVRD (0x20)
#define KF_INTERRUPT_MASKED_DEBUG_OVRD (0x40)
+#define KF_TRAPTRACE_OVRD (0x80)
+#define KF_IOTRACE_OVRD (0x100)
boolean_t kern_feature_override(uint32_t fmask);
host_lock(host_priv);
old_port = host_priv->special[id];
+ if ((id == HOST_AMFID_PORT) && (task_pid(current_task()) != 1)) {
+ host_unlock(host_priv);
+ return KERN_NO_ACCESS;
+ }
host_priv->special[id] = port;
host_unlock(host_priv);
mach_port_name_t name;
kern_return_t kr;
- kr = ipc_port_alloc(current_task()->itk_space, FALSE, &name, &port);
+ kr = ipc_port_alloc(current_task()->itk_space, IPC_PORT_INIT_MESSAGE_QUEUE,
+ &name, &port);
if (kr == KERN_SUCCESS) {
ip_unlock(port);
} else {
mach_port_name_t name;
kern_return_t kr;
thread_t thread = current_thread();
+ ipc_port_init_flags_t flags = IPC_PORT_INIT_MESSAGE_QUEUE |
+ IPC_PORT_INIT_MAKE_SEND_RIGHT | IPC_PORT_INIT_SPECIAL_REPLY;
/* unbind the thread special reply port */
if (IP_VALID(thread->ith_special_reply_port)) {
}
}
- kr = ipc_port_alloc(current_task()->itk_space, TRUE, &name, &port);
+ kr = ipc_port_alloc(current_task()->itk_space, flags, &name, &port);
if (kr == KERN_SUCCESS) {
ipc_port_bind_special_reply_port_locked(port);
ip_unlock(port);
{
thread_t thread = current_thread();
assert(thread->ith_special_reply_port == NULL);
+ assert(port->ip_specialreply);
+ assert(port->ip_sync_link_state == PORT_SYNC_LINK_ANY);
ip_reference(port);
thread->ith_special_reply_port = port;
- port->ip_specialreply = 1;
- port->ip_sync_link_state = PORT_SYNC_LINK_ANY;
port->ip_messages.imq_srp_owner_thread = thread;
ipc_special_reply_port_bits_reset(port);
return KERN_SUCCESS;
}
+extern zone_t task_zone;
+
kern_return_t
task_conversion_eval(task_t caller, task_t victim)
{
return KERN_INVALID_SECURITY;
}
+ zone_require(victim, task_zone);
+
#if CONFIG_EMBEDDED
/*
* On embedded platforms, only a platform binary can resolve the task port
* 6144 N N N
* 8192 Y N N
* 12288 N X X
- * 16384 N N N
- * 32768 X N N
+ * 16384 N X N
+ * 32768 X X N
*
*/
static const struct kalloc_zone_config {
KZC_ENTRY(4096, false),
KZC_ENTRY(6144, false),
KZC_ENTRY(8192, false),
+ /* To limit internal fragmentation, only add the following zones if the
+ * page size is greater than 4K.
+ * Note that we use ARM_PGBYTES here (instead of one of the VM macros)
+ * since it's guaranteed to be a compile time constant.
+ */
+#if ARM_PGBYTES > 4096
KZC_ENTRY(16384, false),
KZC_ENTRY(32768, false),
+#endif /* ARM_PGBYTES > 4096 */
#else
#error missing or invalid zone size parameters for kalloc
/* and schedule an AST for this thread... */
if (!current_thread()->kpc_buf) {
- current_thread()->kperf_flags |= T_KPC_ALLOC;
+ current_thread()->kperf_ast |= T_KPC_ALLOC;
act_set_kperf(current_thread());
}
}
/* schedule any necessary allocations */
if (!current_thread()->kpc_buf) {
- current_thread()->kperf_flags |= T_KPC_ALLOC;
+ current_thread()->kperf_ast |= T_KPC_ALLOC;
act_set_kperf(current_thread());
}
kpc_counterbuf_free(buf);
}
-/* ast callback on a thread */
void
-kpc_thread_ast_handler( thread_t thread )
+kpc_thread_ast_handler(thread_t thread)
{
- /* see if we want an alloc */
- if (thread->kperf_flags & T_KPC_ALLOC) {
+ if (thread->kperf_ast & T_KPC_ALLOC) {
thread->kpc_buf = kpc_counterbuf_alloc();
}
}
le->le_credit = 0;
le->le_debit = 0;
le->le_limit = LEDGER_LIMIT_INFINITY;
- le->le_warn_level = LEDGER_LIMIT_INFINITY;
+ le->le_warn_percent = LEDGER_PERCENT_NONE;
le->_le.le_refill.le_refill_period = 0;
le->_le.le_refill.le_last_refill = 0;
}
* use positive limits.
*/
balance = le->le_credit - le->le_debit;
- if ((le->le_warn_level != LEDGER_LIMIT_INFINITY) && (balance > le->le_warn_level)) {
+ if (le->le_warn_percent != LEDGER_PERCENT_NONE &&
+ ((balance > (le->le_limit * le->le_warn_percent) >> 16))) {
return 1;
}
return 0;
assert(warn_level_percentage <= 100);
assert(limit > 0); /* no negative limit support for warnings */
assert(limit != LEDGER_LIMIT_INFINITY); /* warn % without limit makes no sense */
- le->le_warn_level = (le->le_limit * warn_level_percentage) / 100;
+ le->le_warn_percent = warn_level_percentage * (1u << 16) / 100;
} else {
- le->le_warn_level = LEDGER_LIMIT_INFINITY;
+ le->le_warn_percent = LEDGER_PERCENT_NONE;
}
return KERN_SUCCESS;
}
/*
- * le_warn_level is used to indicate *if* this ledger has a warning configured,
+ * le_warn_percent is used to indicate *if* this ledger has a warning configured,
* in addition to what that warning level is set to.
* This means a side-effect of ledger_disable_callback() is that the
* warning level is forgotten.
*/
- ledger->l_entries[entry].le_warn_level = LEDGER_LIMIT_INFINITY;
+ ledger->l_entries[entry].le_warn_percent = LEDGER_PERCENT_NONE;
flag_clear(&ledger->l_entries[entry].le_flags, LEDGER_ACTION_CALLBACK);
return KERN_SUCCESS;
}
*/
struct ledger_entry {
volatile uint32_t le_flags;
+#define LEDGER_PERCENT_NONE UINT16_MAX
+ uint16_t le_warn_percent;
ledger_amount_t le_limit;
- ledger_amount_t le_warn_level;
volatile ledger_amount_t le_credit __attribute__((aligned(8)));
volatile ledger_amount_t le_debit __attribute__((aligned(8)));
union {
proxy_space->is_node_id = nid;
/* Create the bootstrap proxy port for this remote node */
- bs_port = ipc_port_alloc_special(proxy_space);
+ bs_port = ipc_port_alloc_special(proxy_space, IPC_PORT_INIT_MESSAGE_QUEUE);
if (bs_port == MACH_PORT_NULL) {
kr = KERN_RESOURCE_SHORTAGE;
goto out;
}
/* Create the control (ack) port for this remote node */
- ack_port = ipc_port_alloc_special(proxy_space);
+ ack_port = ipc_port_alloc_special(proxy_space, IPC_PORT_INIT_MESSAGE_QUEUE);
if (ack_port == MACH_PORT_NULL) {
kr = KERN_RESOURCE_SHORTAGE;
goto out;
return '0';
}
#endif /* DEVELOPMENT || DEBUG */
+
+
+__private_extern__ void
+thread_bind_cluster_type(char cluster_type)
+{
+ (void)cluster_type;
+}
#ifdef XNU_KERNEL_PRIVATE
+extern void thread_bind_cluster_type(char cluster_type);
+
/* Toggles a global override to turn off CPU Throttling */
extern void sys_override_cpu_throttle(boolean_t enable_override);
unsigned int new_nkdbufs = 0;
unsigned int wake_nkdbufs = 0;
unsigned int write_trace_on_panic = 0;
-static char trace_typefilter[64] = { 0 };
unsigned int trace_wrap = 0;
boolean_t trace_serial = FALSE;
boolean_t early_boot_complete = FALSE;
PE_parse_boot_argn("trace", &new_nkdbufs, sizeof(new_nkdbufs));
PE_parse_boot_argn("trace_wake", &wake_nkdbufs, sizeof(wake_nkdbufs));
PE_parse_boot_argn("trace_panic", &write_trace_on_panic, sizeof(write_trace_on_panic));
- PE_parse_boot_arg_str("trace_typefilter", trace_typefilter, sizeof(trace_typefilter));
PE_parse_boot_argn("trace_wrap", &trace_wrap, sizeof(trace_wrap));
scale_setup();
kernel_bootstrap_thread_log("ktrace_init");
ktrace_init();
+ char trace_typefilter[256] = {};
+ PE_parse_boot_arg_str("trace_typefilter", trace_typefilter,
+ sizeof(trace_typefilter));
kdebug_init(new_nkdbufs, trace_typefilter, trace_wrap);
#ifdef MACH_BSD
#if __arm64__
new_task->task_legacy_footprint = FALSE;
new_task->task_extra_footprint_limit = FALSE;
+ new_task->task_ios13extended_footprint_limit = FALSE;
#endif /* __arm64__ */
new_task->task_region_footprint = FALSE;
new_task->task_has_crossed_thread_limit = FALSE;
#if __arm64__
extern int legacy_footprint_entitlement_mode;
extern void memorystatus_act_on_legacy_footprint_entitlement(proc_t, boolean_t);
+extern void memorystatus_act_on_ios13extended_footprint_entitlement(proc_t);
void
task_set_legacy_footprint(
return;
}
task_lock(task);
- if (!task->task_extra_footprint_limit) {
- memorystatus_act_on_legacy_footprint_entitlement(task->bsd_info, TRUE);
- task->task_extra_footprint_limit = TRUE;
+ if (task->task_extra_footprint_limit) {
+ task_unlock(task);
+ return;
+ }
+ task->task_extra_footprint_limit = TRUE;
+ task_unlock(task);
+ memorystatus_act_on_legacy_footprint_entitlement(task->bsd_info, TRUE);
+}
+
+void
+task_set_ios13extended_footprint_limit(
+ task_t task)
+{
+ if (task->task_ios13extended_footprint_limit) {
+ return;
+ }
+ task_lock(task);
+ if (task->task_ios13extended_footprint_limit) {
+ task_unlock(task);
+ return;
}
+ task->task_ios13extended_footprint_limit = TRUE;
task_unlock(task);
+ memorystatus_act_on_ios13extended_footprint_entitlement(task->bsd_info);
}
#endif /* __arm64__ */
#if __arm64__
unsigned int task_legacy_footprint:1;
unsigned int task_extra_footprint_limit:1;
+ unsigned int task_ios13extended_footprint_limit:1;
#endif /* __arm64__ */
unsigned int task_region_footprint:1;
unsigned int task_has_crossed_thread_limit:1;
#if __arm64__
extern void task_set_legacy_footprint(task_t task);
extern void task_set_extra_footprint_limit(task_t task);
+extern void task_set_ios13extended_footprint_limit(task_t task);
#endif /* __arm64__ */
#if CONFIG_MACF
#endif /* CONFIG_DTRACE */
#if KPERF
- thread_template.kperf_flags = 0;
+ thread_template.kperf_ast = 0;
thread_template.kperf_pet_gen = 0;
thread_template.kperf_c_switch = 0;
thread_template.kperf_pet_cnt = 0;
struct priority_queue_entry wait_prioq_links; /* priority ordered waitq links */
};
- processor_t runq; /* run queue assignment */
-
event64_t wait_event; /* wait queue event */
+ processor_t runq; /* run queue assignment */
struct waitq *waitq; /* wait queue this thread is enqueued on */
struct turnstile *turnstile; /* thread's turnstile, protected by primitives interlock */
void *inheritor; /* inheritor of the primitive the thread will block on */
decl_simple_lock_data(, sched_lock); /* scheduling lock (thread_lock()) */
decl_simple_lock_data(, wake_lock); /* for thread stop / wait (wake_lock()) */
#endif
- integer_t options; /* options set by thread itself */
+ uint16_t options; /* options set by thread itself */
#define TH_OPT_INTMASK 0x0003 /* interrupt / abort level */
#define TH_OPT_VMPRIV 0x0004 /* may allocate reserved memory */
#define TH_OPT_SYSTEM_CRITICAL 0x0010 /* Thread must always be allowed to run - even under heavy load */
#define TH_OPT_SEND_IMPORTANCE 0x0800 /* Thread will allow importance donation from kernel rpc */
#define TH_OPT_ZONE_GC 0x1000 /* zone_gc() called on this thread */
- boolean_t wake_active; /* wake event on stop */
- int at_safe_point; /* thread_abort_safely allowed */
+ bool wake_active; /* wake event on stop */
+ bool at_safe_point; /* thread_abort_safely allowed */
ast_t reason; /* why we blocked */
uint32_t quantum_remaining;
wait_result_t wait_result; /* outcome of wait -
uint64_t safe_release; /* when to release fail-safe */
/* Call out from scheduler */
- void (*sched_call)(
- int type,
- thread_t thread);
+ void (*sched_call)(int type, thread_t thread);
+
#if defined(CONFIG_SCHED_PROTO)
uint32_t runqueue_generation; /* last time runqueue was drained */
#endif
uint64_t wait_sfi_begin_time; /* start time for thread waiting in SFI */
#endif
- /* Timed wait expiration */
- timer_call_data_t wait_timer;
- integer_t wait_timer_active;
- boolean_t wait_timer_is_set;
-
-
/*
* Processor/cache affinity
* - affinity_threads links task threads with the same affinity set
*/
- affinity_set_t affinity_set;
queue_chain_t affinity_threads;
+ affinity_set_t affinity_set;
+
+#if CONFIG_EMBEDDED
+ task_watch_t * taskwatch; /* task watch */
+#endif /* CONFIG_EMBEDDED */
/* Various bits of state to stash across a continuation, exclusive to the current thread block point */
union {
mach_msg_return_t state; /* receive state */
mach_port_seqno_t seqno; /* seqno of recvd message */
ipc_object_t object; /* object received on */
- mach_vm_address_t msg_addr; /* receive buffer pointer */
+ vm_address_t msg_addr; /* receive buffer pointer */
mach_msg_size_t rsize; /* max size for recvd msg */
mach_msg_size_t msize; /* actual size for recvd msg */
mach_msg_option_t option; /* options for receive */
struct ipc_kmsg_queue ith_messages; /* messages to reap */
mach_port_t ith_rpc_reply; /* reply port for kernel RPCs */
+ /* Pending thread ast(s) */
+ ast_t ast;
+
/* Ast/Halt data structures */
- vm_offset_t recover; /* page fault recover(copyin/out) */
+ vm_offset_t recover; /* page fault recover(copyin/out) */
queue_chain_t threads; /* global list of all threads */
/* Activation */
- queue_chain_t task_threads;
+ queue_chain_t task_threads;
/* Task membership */
struct task *task;
vm_map_t map;
#if DEVELOPMENT || DEBUG
- boolean_t pmap_footprint_suspended;
+ bool pmap_footprint_suspended;
#endif /* DEVELOPMENT || DEBUG */
- decl_lck_mtx_data(, mutex);
-
-
- /* Pending thread ast(s) */
- ast_t ast;
+ /* Timed wait expiration */
+ timer_call_data_t wait_timer;
+ uint16_t wait_timer_active;
+ bool wait_timer_is_set;
/* Miscellaneous bits guarded by mutex */
uint32_t
corpse_dup:1, /* TRUE when thread is an inactive duplicate in a corpse */
:0;
+ decl_lck_mtx_data(, mutex);
+
/* Ports associated with this thread */
struct ipc_port *ith_self; /* not a right, doesn't hold ref */
struct ipc_port *ith_sself; /* a send right */
#define T_KPERF_CALLSTACK_DEPTH_OFFSET (24)
#define T_KPERF_SET_CALLSTACK_DEPTH(DEPTH) (((uint32_t)(DEPTH)) << T_KPERF_CALLSTACK_DEPTH_OFFSET)
#define T_KPERF_GET_CALLSTACK_DEPTH(FLAGS) ((FLAGS) >> T_KPERF_CALLSTACK_DEPTH_OFFSET)
+#define T_KPERF_ACTIONID_OFFSET (18)
+#define T_KPERF_SET_ACTIONID(AID) (((uint32_t)(AID)) << T_KPERF_ACTIONID_OFFSET)
+#define T_KPERF_GET_ACTIONID(FLAGS) ((FLAGS) >> T_KPERF_ACTIONID_OFFSET)
#endif
-#define T_KPERF_AST_CALLSTACK (1U << 0) /* dump a callstack on thread's next AST */
-#define T_KPERF_AST_DISPATCH (1U << 1) /* dump a name on thread's next AST */
-#define T_KPC_ALLOC (1U << 2) /* thread needs a kpc_buf allocated */
-/* only go up to T_KPERF_CALLSTACK_DEPTH_OFFSET - 1 */
+#define T_KPERF_AST_CALLSTACK 0x1 /* dump a callstack on thread's next AST */
+#define T_KPERF_AST_DISPATCH 0x2 /* dump a name on thread's next AST */
+#define T_KPC_ALLOC 0x4 /* thread needs a kpc_buf allocated */
+
+#define T_KPERF_AST_ALL \
+ (T_KPERF_AST_CALLSTACK | T_KPERF_AST_DISPATCH | T_KPC_ALLOC)
+/* only go up to T_KPERF_ACTIONID_OFFSET - 1 */
#ifdef KPERF
- uint32_t kperf_flags;
+ uint32_t kperf_ast;
uint32_t kperf_pet_gen; /* last generation of PET that sampled this thread*/
uint32_t kperf_c_switch; /* last dispatch detection */
uint32_t kperf_pet_cnt; /* how many times a thread has been sampled by PET */
void *hv_thread_target;
#endif /* HYPERVISOR */
- uint64_t thread_id; /*system wide unique thread-id*/
-
/* Statistics accumulated per-thread and aggregated per-task */
uint32_t syscalls_unix;
uint32_t syscalls_mach;
uint64_t t_deduct_bank_ledger_time; /* cpu time to be deducted from bank ledger */
uint64_t t_deduct_bank_ledger_energy; /* energy to be deducted from bank ledger */
+ uint64_t thread_id; /*system wide unique thread-id*/
+
#if MONOTONIC
struct mt_thread t_monotonic;
#endif /* MONOTONIC */
} *overrides;
uint32_t kevent_overrides;
- uint16_t user_promotion_basepri;
- uint16_t kern_promotion_schedpri;
+ uint8_t user_promotion_basepri;
+ uint8_t kern_promotion_schedpri;
_Atomic uint16_t kevent_ast_bits;
io_stat_info_t thread_io_stats; /* per-thread I/O statistics */
-#if CONFIG_EMBEDDED
- task_watch_t * taskwatch; /* task watch */
-#endif /* CONFIG_EMBEDDED */
-
uint32_t thread_callout_interrupt_wakeups;
uint32_t thread_callout_platform_idle_wakeups;
uint32_t thread_timer_wakeups_bin_1;
* Definitions for high resolution timers.
*/
+#if __LP64__
+#define TIMER_ALIGNMENT
+#else
+#define TIMER_ALIGNMENT __attribute__((packed, aligned(4)))
+#endif
+
struct timer {
uint64_t tstamp;
#if defined(__LP64__)
uint32_t high_bits;
uint32_t high_bits_check;
#endif /* !defined(__LP64__) */
-};
+} TIMER_ALIGNMENT;
typedef struct timer timer_data_t, *timer_t;
return turnstile;
}
- /* Get the safeq if the waitq is a port queue */
- if (waitq_is_port_queue(waitq)) {
- waitq = waitq_get_safeq(waitq);
+ if (waitq_is_turnstile_proxy(waitq)) {
+ return waitq->waitq_ts;
}
/* Check if the waitq is a turnstile queue */
}
/* Get the safeq if the waitq is a port queue */
- if (waitq_is_port_queue(waitq)) {
- waitq = waitq_get_safeq(waitq);
+ if (waitq_is_turnstile_proxy(waitq)) {
+ if (waitq->waitq_ts) {
+ return TSU_NO_PRI_CHANGE_NEEDED;
+ }
+ return TSU_NO_TURNSTILE;
}
/* Check if the waitq is a turnstile queue */
return waitq->waitq_irq;
}
-struct waitq *
-waitq_get_safeq(struct waitq *waitq)
+static inline bool
+waitq_empty(struct waitq *wq)
{
- struct waitq *safeq;
+ if (waitq_is_turnstile_queue(wq)) {
+ return priority_queue_empty(&wq->waitq_prio_queue);
+ } else if (waitq_is_turnstile_proxy(wq)) {
+ struct turnstile *ts = wq->waitq_ts;
+ return ts == TURNSTILE_NULL ||
+ priority_queue_empty(&ts->ts_waitq.waitq_prio_queue);
+ } else {
+ return queue_empty(&wq->waitq_queue);
+ }
+}
+static struct waitq *
+waitq_get_safeq(struct waitq *waitq)
+{
/* Check if it's a port waitq */
- if (waitq_is_port_queue(waitq)) {
- assert(!waitq_irq_safe(waitq));
- safeq = ipc_port_rcv_turnstile_waitq(waitq);
- } else {
- safeq = global_eventq(waitq);
+ if (waitq_is_turnstile_proxy(waitq)) {
+ struct turnstile *ts = waitq->waitq_ts;
+ return ts ? &ts->ts_waitq : NULL;
}
- return safeq;
+ return global_eventq(waitq);
}
static uint32_t
/* JMM - add flag to waitq to avoid global lookup if no waiters */
eventmask = _CAST_TO_EVENT_MASK(waitq);
safeq = waitq_get_safeq(waitq);
+ if (safeq == NULL) {
+ /*
+ * in the WQT_TSPROXY case, if there's no turnstile,
+ * there's no queue and no waiters, so we can move straight
+ * to the waitq set recursion
+ */
+ goto handle_waitq_set;
+ }
+
if (*nthreads == 0) {
spl = splsched();
}
return;
}
+handle_waitq_set:
/*
* wait queues that are not in any sets
* are the bottom of the recursion
kern_return_t kr;
spl_t s;
- s = splsched();
-
/* Find and lock the interrupts disabled queue the thread is actually on */
if (!waitq_irq_safe(waitq)) {
safeq = waitq_get_safeq(waitq);
+ if (safeq == NULL) {
+ /*
+ * in the WQT_TSPROXY case, if there's no turnstile,
+ * there's no queue and no waiters, so we can move straight
+ * to the waitq set recursion
+ */
+ goto handle_waitq_set;
+ }
+
+ s = splsched();
waitq_lock(safeq);
} else {
+ s = splsched();
safeq = waitq;
}
splx(s);
+handle_waitq_set:
if (!waitq->waitq_set_id) {
return KERN_NOT_WAITING;
}
*/
if (!waitq_irq_safe(waitq)) {
safeq = waitq_get_safeq(waitq);
+ if (__improbable(safeq == NULL)) {
+ panic("Trying to assert_wait on a turnstile proxy "
+ "that hasn't been donated one (waitq: %p)", waitq);
+ }
eventmask = _CAST_TO_EVENT_MASK(waitq);
waitq_lock(safeq);
} else {
/* Find the interrupts disabled queue thread is waiting on */
if (!waitq_irq_safe(waitq)) {
safeq = waitq_get_safeq(waitq);
+ if (__improbable(safeq == NULL)) {
+ panic("Trying to clear_wait on a turnstile proxy "
+ "that hasn't been donated one (waitq: %p)", waitq);
+ }
} else {
safeq = waitq;
}
waitq->waitq_fifo = ((policy & SYNC_POLICY_REVERSED) == 0);
waitq->waitq_irq = !!(policy & SYNC_POLICY_DISABLE_IRQ);
waitq->waitq_prepost = 0;
- waitq->waitq_type = WQT_QUEUE;
- waitq->waitq_turnstile_or_port = !!(policy & SYNC_POLICY_TURNSTILE);
+ if (policy & SYNC_POLICY_TURNSTILE_PROXY) {
+ waitq->waitq_type = WQT_TSPROXY;
+ } else {
+ waitq->waitq_type = WQT_QUEUE;
+ }
+ waitq->waitq_turnstile = !!(policy & SYNC_POLICY_TURNSTILE);
waitq->waitq_eventmask = 0;
waitq->waitq_set_id = 0;
priority_queue_init(&waitq->waitq_prio_queue,
PRIORITY_QUEUE_BUILTIN_MAX_HEAP);
assert(waitq->waitq_fifo == 0);
+ } else if (policy & SYNC_POLICY_TURNSTILE_PROXY) {
+ waitq->waitq_ts = TURNSTILE_NULL;
+ waitq->waitq_tspriv = NULL;
} else {
queue_init(&waitq->waitq_queue);
}
{
spl_t s;
- if (!waitq || !waitq_is_queue(waitq)) {
+ assert(waitq);
+ if (!waitq_is_valid(waitq)) {
+ return;
+ }
+
+ if (!waitq_is_queue(waitq) && !waitq_is_turnstile_proxy(waitq)) {
return;
}
s = splsched();
}
waitq_lock(waitq);
- if (!waitq_valid(waitq)) {
- waitq_unlock(waitq);
- if (waitq_irq_safe(waitq)) {
- splx(s);
+
+ if (waitq_valid(waitq)) {
+ waitq->waitq_isvalid = 0;
+ if (!waitq_irq_safe(waitq)) {
+ waitq_unlink_all_unlock(waitq);
+ /* waitq unlocked and set links deallocated */
+ goto out;
}
- return;
}
- waitq->waitq_isvalid = 0;
-
- if (!waitq_irq_safe(waitq)) {
- waitq_unlink_all_unlock(waitq);
- /* waitq unlocked and set links deallocated */
- } else {
- waitq_unlock(waitq);
+ waitq_unlock(waitq);
+ if (waitq_irq_safe(waitq)) {
splx(s);
}
- assert(waitq_empty(waitq));
+out:
+#if MACH_ASSERT
+ if (waitq_is_turnstile_queue(waitq)) {
+ assert(priority_queue_empty(&waitq->waitq_prio_queue));
+ } else if (waitq_is_turnstile_proxy(waitq)) {
+ assert(waitq->waitq_ts == TURNSTILE_NULL);
+ } else {
+ assert(queue_empty(&waitq->waitq_queue));
+ }
+#else
+ (void)0;
+#endif // MACH_ASSERT
}
void
enum waitq_type {
WQT_INVALID = 0,
+ WQT_TSPROXY = 0x1,
WQT_QUEUE = 0x2,
WQT_SET = 0x3,
};
waitq_prepost:1, /* waitq supports prepost? */
waitq_irq:1, /* waitq requires interrupts disabled */
waitq_isvalid:1, /* waitq structure is valid */
- waitq_turnstile_or_port:1, /* waitq is embedded in a turnstile (if irq safe), or port (if not irq safe) */
+ waitq_turnstile:1, /* waitq is embedded in a turnstile */
waitq_eventmask:_EVENT_MASK_BITS;
/* the wait queue set (set-of-sets) to which this queue belongs */
#if __arm64__
uint64_t waitq_set_id;
uint64_t waitq_prepost_id;
union {
- queue_head_t waitq_queue; /* queue of elements */
- struct priority_queue waitq_prio_queue; /* priority ordered queue of elements */
+ queue_head_t waitq_queue; /* queue of elements */
+ struct priority_queue waitq_prio_queue; /* priority ordered queue of elements */
+ struct {
+ struct turnstile *waitq_ts; /* turnstile for WQT_TSPROXY */
+ void *waitq_tspriv; /* private field for clients use */
+ };
};
};
#define waitq_is_queue(wq) \
((wq)->waitq_type == WQT_QUEUE)
-#define waitq_is_turnstile_queue(wq) \
- (((wq)->waitq_irq) && (wq)->waitq_turnstile_or_port)
+#define waitq_is_turnstile_proxy(wq) \
+ ((wq)->waitq_type == WQT_TSPROXY)
-#define waitq_is_port_queue(wq) \
- (!((wq)->waitq_irq) && (wq)->waitq_turnstile_or_port)
+#define waitq_is_turnstile_queue(wq) \
+ (((wq)->waitq_irq) && (wq)->waitq_turnstile)
#define waitq_is_set(wq) \
((wq)->waitq_type == WQT_SET && ((struct waitq_set *)(wq))->wqset_id != 0)
*/
extern void waitq_invalidate_locked(struct waitq *wq);
-static inline boolean_t
-waitq_empty(struct waitq *wq)
-{
- if (waitq_is_turnstile_queue(wq)) {
- return priority_queue_empty(&(wq->waitq_prio_queue));
- } else {
- return queue_empty(&(wq->waitq_queue));
- }
-}
-
extern lck_grp_t waitq_lck_grp;
#if __arm64__
extern int waitq_irq_safe(struct waitq *waitq);
-extern struct waitq * waitq_get_safeq(struct waitq *waitq);
-
#if CONFIG_WAITQ_STATS
/*
* waitq statistics
(uintptr_t)VM_PAGE_COMPRESSOR_COUNT);
}
+static void
+kperf_sample_user_internal(struct kperf_usample *sbuf,
+ struct kperf_context *context, unsigned int actionid,
+ unsigned int sample_what)
+{
+ if (sample_what & SAMPLER_USTACK) {
+ kperf_ucallstack_sample(&sbuf->ucallstack, context);
+ }
+ if (sample_what & SAMPLER_TH_DISPATCH) {
+ kperf_thread_dispatch_sample(&sbuf->th_dispatch, context);
+ }
+ if (sample_what & SAMPLER_TH_INFO) {
+ kperf_thread_info_sample(&sbuf->th_info, context);
+ }
+
+ boolean_t intren = ml_set_interrupts_enabled(FALSE);
+
+ /*
+ * No userdata or sample_flags for this one.
+ */
+ BUF_DATA(PERF_GEN_EVENT | DBG_FUNC_START, sample_what, actionid);
+
+ if (sample_what & SAMPLER_USTACK) {
+ kperf_ucallstack_log(&sbuf->ucallstack);
+ }
+ if (sample_what & SAMPLER_TH_DISPATCH) {
+ kperf_thread_dispatch_log(&sbuf->th_dispatch);
+ }
+ if (sample_what & SAMPLER_TH_INFO) {
+ kperf_thread_info_log(&sbuf->th_info);
+ }
+
+ BUF_DATA(PERF_GEN_EVENT | DBG_FUNC_END, sample_what);
+
+ ml_set_interrupts_enabled(intren);
+}
+
+void
+kperf_sample_user(struct kperf_usample *sbuf, struct kperf_context *context,
+ unsigned int actionid, unsigned int sample_flags)
+{
+ if (actionid == 0 || actionid > actionc) {
+ return;
+ }
+
+ unsigned int sample_what = actionv[actionid - 1].sample;
+ unsigned int ucallstack_depth = actionv[actionid - 1].ucallstack_depth;
+
+ /* callstacks should be explicitly ignored */
+ if (sample_flags & SAMPLE_FLAG_EMPTY_CALLSTACK) {
+ sample_what &= ~(SAMPLER_KSTACK | SAMPLER_USTACK);
+ }
+ if (sample_flags & SAMPLE_FLAG_ONLY_SYSTEM) {
+ sample_what &= SAMPLER_SYS_MEM;
+ }
+ assert((sample_flags & (SAMPLE_FLAG_THREAD_ONLY | SAMPLE_FLAG_TASK_ONLY))
+ != (SAMPLE_FLAG_THREAD_ONLY | SAMPLE_FLAG_TASK_ONLY));
+ if (sample_flags & SAMPLE_FLAG_THREAD_ONLY) {
+ sample_what &= SAMPLER_THREAD_MASK;
+ }
+ if (sample_flags & SAMPLE_FLAG_TASK_ONLY) {
+ sample_what &= SAMPLER_TASK_MASK;
+ }
+
+ if (sample_what == 0) {
+ return;
+ }
+
+ sbuf->ucallstack.kpuc_nframes = ucallstack_depth ?:
+ MAX_UCALLSTACK_FRAMES;
+
+ kperf_sample_user_internal(sbuf, context, actionid, sample_what);
+}
+
static kern_return_t
kperf_sample_internal(struct kperf_sample *sbuf,
struct kperf_context *context,
uint32_t userdata = actionid;
bool task_only = false;
- /* not much point continuing here, but what to do ? return
- * Shutdown? cut a tracepoint and continue?
- */
if (sample_what == 0) {
return SAMPLE_CONTINUE;
}
sbuf->kcallstack.kpkc_nframes = MAX_KCALLSTACK_FRAMES;
}
- if (ucallstack_depth) {
- sbuf->ucallstack.kpuc_nframes = ucallstack_depth;
- } else {
- sbuf->ucallstack.kpuc_nframes = MAX_UCALLSTACK_FRAMES;
- }
-
+ ucallstack_depth = ucallstack_depth ?: MAX_UCALLSTACK_FRAMES;
sbuf->kcallstack.kpkc_flags = 0;
- sbuf->ucallstack.kpuc_flags = 0;
+ sbuf->usample.ucallstack.kpuc_flags = 0;
if (sample_what & SAMPLER_TH_INFO) {
kperf_thread_info_sample(&sbuf->th_info, context);
if (sample_what & SAMPLER_KSTACK) {
if (sample_flags & SAMPLE_FLAG_CONTINUATION) {
kperf_continuation_sample(&(sbuf->kcallstack), context);
- /* outside of interrupt context, backtrace the current thread */
} else if (sample_flags & SAMPLE_FLAG_NON_INTERRUPT) {
+ /* outside of interrupt context, backtrace the current thread */
kperf_backtrace_sample(&(sbuf->kcallstack), context);
} else {
kperf_kcallstack_sample(&(sbuf->kcallstack), context);
kperf_task_snapshot_sample(context->cur_task, &(sbuf->tk_snapshot));
}
- /* sensitive ones */
if (!is_kernel) {
if (sample_what & SAMPLER_MEMINFO) {
kperf_meminfo_sample(context->cur_task, &(sbuf->meminfo));
if (sample_flags & SAMPLE_FLAG_PEND_USER) {
if (sample_what & SAMPLER_USTACK) {
- pended_ucallstack = kperf_ucallstack_pend(context, sbuf->ucallstack.kpuc_nframes);
- }
-
- if (sample_what & SAMPLER_TH_DISPATCH) {
- pended_th_dispatch = kperf_thread_dispatch_pend(context);
- }
- } else {
- if (sample_what & SAMPLER_USTACK) {
- kperf_ucallstack_sample(&(sbuf->ucallstack), context);
+ pended_ucallstack = kperf_ucallstack_pend(context,
+ ucallstack_depth, actionid);
}
if (sample_what & SAMPLER_TH_DISPATCH) {
- kperf_thread_dispatch_sample(&(sbuf->th_dispatch), context);
+ pended_th_dispatch =
+ kperf_thread_dispatch_pend(context, actionid);
}
}
}
if (pended_th_dispatch) {
BUF_INFO(PERF_TI_DISPPEND);
}
- } else {
- if (sample_what & SAMPLER_USTACK) {
- kperf_ucallstack_log(&(sbuf->ucallstack));
- }
-
- if (sample_what & SAMPLER_TH_DISPATCH) {
- kperf_thread_dispatch_log(&(sbuf->th_dispatch));
- }
}
}
/* the samplers to run */
unsigned int sample_what = actionv[actionid - 1].sample;
+ unsigned int ucallstack_depth = actionv[actionid - 1].ucallstack_depth;
/* do the actual sample operation */
return kperf_sample_internal(sbuf, context, sample_what,
- sample_flags, actionid,
- actionv[actionid - 1].ucallstack_depth);
+ sample_flags, actionid, ucallstack_depth);
}
void
void
kperf_thread_ast_handler(thread_t thread)
{
- BUF_INFO(PERF_AST_HNDLR | DBG_FUNC_START, thread, kperf_get_thread_flags(thread));
+ uint32_t ast = thread->kperf_ast;
- /* ~2KB of the stack for the sample since this is called from AST */
- struct kperf_sample sbuf;
- memset(&sbuf, 0, sizeof(struct kperf_sample));
+ BUF_INFO(PERF_AST_HNDLR | DBG_FUNC_START, thread, ast);
+
+ struct kperf_usample sbuf = {};
task_t task = get_threadtask(thread);
return;
}
- /* make a context, take a sample */
struct kperf_context ctx = {
.cur_thread = thread,
.cur_task = task,
.cur_pid = task_pid(task),
};
- /* decode the flags to determine what to sample */
unsigned int sample_what = 0;
- uint32_t flags = kperf_get_thread_flags(thread);
-
- if (flags & T_KPERF_AST_DISPATCH) {
+ if (ast & T_KPERF_AST_DISPATCH) {
sample_what |= SAMPLER_TH_DISPATCH;
}
- if (flags & T_KPERF_AST_CALLSTACK) {
- sample_what |= SAMPLER_USTACK;
- sample_what |= SAMPLER_TH_INFO;
+ if (ast & T_KPERF_AST_CALLSTACK) {
+ /* TH_INFO for backwards compatibility */
+ sample_what |= SAMPLER_USTACK | SAMPLER_TH_INFO;
}
- uint32_t ucallstack_depth = T_KPERF_GET_CALLSTACK_DEPTH(flags);
-
- int r = kperf_sample_internal(&sbuf, &ctx, sample_what, 0, 0, ucallstack_depth);
+ sbuf.ucallstack.kpuc_nframes =
+ T_KPERF_GET_CALLSTACK_DEPTH(ast) ?: MAX_UCALLSTACK_FRAMES;
+ unsigned int actionid = T_KPERF_GET_ACTIONID(ast);
+ kperf_sample_user_internal(&sbuf, &ctx, actionid, sample_what);
- BUF_INFO(PERF_AST_HNDLR | DBG_FUNC_END, r);
+ BUF_INFO(PERF_AST_HNDLR | DBG_FUNC_END);
}
-/* register AST bits */
int
-kperf_ast_pend(thread_t thread, uint32_t set_flags)
+kperf_ast_pend(thread_t thread, uint32_t set_flags, unsigned int set_actionid)
{
- /* can only pend on the current thread */
if (thread != current_thread()) {
- panic("pending to non-current thread");
+ panic("kperf: pending AST to non-current thread");
}
- /* get our current bits */
- uint32_t flags = kperf_get_thread_flags(thread);
+ uint32_t ast = thread->kperf_ast;
+ unsigned int actionid = T_KPERF_GET_ACTIONID(ast);
+ uint32_t flags = ast & T_KPERF_AST_ALL;
- /* see if it's already been done or pended */
- if (!(flags & set_flags)) {
- /* set the bit on the thread */
- flags |= set_flags;
- kperf_set_thread_flags(thread, flags);
+ if ((flags | set_flags) != flags || actionid != set_actionid) {
+ ast &= ~T_KPERF_SET_ACTIONID(actionid);
+ ast |= T_KPERF_SET_ACTIONID(set_actionid);
+ ast |= set_flags;
+
+ thread->kperf_ast = ast;
/* set the actual AST */
act_set_kperf(thread);
void
kperf_ast_set_callstack_depth(thread_t thread, uint32_t depth)
{
- uint32_t ast_flags = kperf_get_thread_flags(thread);
- uint32_t existing_callstack_depth =
- T_KPERF_GET_CALLSTACK_DEPTH(ast_flags);
-
- if (existing_callstack_depth < depth) {
- ast_flags &= ~T_KPERF_SET_CALLSTACK_DEPTH(depth);
- ast_flags |= T_KPERF_SET_CALLSTACK_DEPTH(depth);
- kperf_set_thread_flags(thread, ast_flags);
+ uint32_t ast = thread->kperf_ast;
+ uint32_t existing_depth = T_KPERF_GET_CALLSTACK_DEPTH(ast);
+ if (existing_depth < depth) {
+ ast &= ~T_KPERF_SET_CALLSTACK_DEPTH(existing_depth);
+ ast |= T_KPERF_SET_CALLSTACK_DEPTH(depth);
+ thread->kperf_ast = ast;
}
}
if (depth > MAX_UCALLSTACK_FRAMES) {
return EINVAL;
}
+ if (depth < 2) {
+ return EINVAL;
+ }
actionv[action_id - 1].ucallstack_depth = depth;
if (depth > MAX_KCALLSTACK_FRAMES) {
return EINVAL;
}
+ if (depth < 1) {
+ return EINVAL;
+ }
actionv[action_id - 1].kcallstack_depth = depth;
#include <stdint.h>
#include <stdbool.h>
-/* fwd decl */
struct kperf_sample;
+struct kperf_usample;
struct kperf_context;
/* bits for defining what to do on an action */
unsigned actionid,
unsigned sample_flags);
+/*
+ * Sample user space.
+ */
+void kperf_sample_user(struct kperf_usample *sbuf, struct kperf_context *ctx,
+ unsigned int actionid, unsigned int sample_flags);
+
/* Whether the action provided samples non-system values. */
bool kperf_action_has_non_system(unsigned actionid);
bool kperf_action_has_thread(unsigned int actionid);
/*
- * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2011-2019 Apple Computer, Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
-/* pend ast bits on a thread */
-extern int kperf_ast_pend(thread_t thread, uint32_t flags);
-extern void kperf_ast_set_callstack_depth(thread_t thread, uint32_t depth);
+/*
+ * Ensure that kperf is informed the next time this thread goes back to user
+ * space, to handle an action.
+ */
+int kperf_ast_pend(thread_t thread, uint32_t flags, unsigned int actionid);
+
+/*
+ * Set the depth for the user callstack sample.
+ */
+void kperf_ast_set_callstack_depth(thread_t thread, uint32_t depth);
cs->kpkc_nframes += 1;
}
if (trunc) {
- cs->kpkc_nframes |= CALLSTACK_TRUNCATED;
+ cs->kpkc_flags |= CALLSTACK_TRUNCATED;
}
BUF_VERB(PERF_CS_BACKTRACE | DBG_FUNC_END, cs->kpkc_nframes);
}
int
-kperf_ucallstack_pend(struct kperf_context * context, uint32_t depth)
+kperf_ucallstack_pend(struct kperf_context * context, uint32_t depth,
+ unsigned int actionid)
{
- int did_pend = kperf_ast_pend(context->cur_thread, T_KPERF_AST_CALLSTACK);
+ if (depth < 2) {
+ panic("HUH");
+ }
kperf_ast_set_callstack_depth(context->cur_thread, depth);
-
- return did_pend;
+ return kperf_ast_pend(context->cur_thread, T_KPERF_AST_CALLSTACK,
+ actionid);
}
static kern_return_t
void kperf_backtrace_sample(struct kp_kcallstack *cs, struct kperf_context *context);
void kperf_ucallstack_sample(struct kp_ucallstack *cs, struct kperf_context *);
-int kperf_ucallstack_pend(struct kperf_context *, uint32_t depth);
+int kperf_ucallstack_pend(struct kperf_context *, uint32_t depth,
+ unsigned int actionid);
void kperf_ucallstack_log(struct kp_ucallstack *cs);
#endif /* !defined(KPERF_CALLSTACK_H) */
kperf_lazy_wait_action != 0;
}
-/* random misc-ish functions */
-uint32_t
-kperf_get_thread_flags(thread_t thread)
-{
- return thread->kperf_flags;
-}
-
-void
-kperf_set_thread_flags(thread_t thread, uint32_t flags)
-{
- thread->kperf_flags = flags;
-}
-
unsigned int
kperf_sampling_status(void)
{
#define TRIGGER_TYPE_LAZY_WAIT (3)
#define TRIGGER_TYPE_LAZY_CPU (3)
-/* helpers to get and set AST flags on a thread */
-uint32_t kperf_get_thread_flags(thread_t thread);
-void kperf_set_thread_flags(thread_t thread, uint32_t flags);
+uint32_t kperf_get_thread_ast(thread_t thread);
+void kperf_set_thread_ast(thread_t thread, uint32_t flags);
/*
* Get and set dirtiness of thread, so kperf can track whether the thread
{
kpc_thread_ast_handler(thread);
kperf_thread_ast_handler(thread);
-
- thread->kperf_flags = 0;
+ thread->kperf_ast = 0;
}
void
{
lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
- uint32_t sample_flags = SAMPLE_FLAG_IDLE_THREADS | SAMPLE_FLAG_THREAD_ONLY;
+ uint32_t sample_flags = SAMPLE_FLAG_IDLE_THREADS |
+ SAMPLE_FLAG_THREAD_ONLY;
BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_START);
thread->kperf_pet_cnt++;
kperf_sample(pet_sample, &ctx, pet_action_id, sample_flags);
+ kperf_sample_user(&pet_sample->usample, &ctx, pet_action_id,
+ sample_flags);
BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_END);
}
#include "kperf_kpc.h"
#include "meminfo.h"
+/*
+ * For data that must be sampled in a fault-able context.
+ */
+struct kperf_usample {
+ struct kperf_thread_dispatch th_dispatch;
+ struct kp_ucallstack ucallstack;
+ struct kperf_thread_info th_info;
+};
+
struct kperf_sample {
struct kperf_thread_info th_info;
struct kperf_thread_scheduling th_scheduling;
struct kperf_thread_snapshot th_snapshot;
- struct kperf_thread_dispatch th_dispatch;
struct kperf_task_snapshot tk_snapshot;
struct kp_kcallstack kcallstack;
- struct kp_ucallstack ucallstack;
struct meminfo meminfo;
+ struct kperf_usample usample;
+
#if KPC
struct kpcdata kpcdata;
#endif /* KPC */
}
int
-kperf_thread_dispatch_pend(struct kperf_context *context)
+kperf_thread_dispatch_pend(struct kperf_context *context,
+ unsigned int actionid)
{
- return kperf_ast_pend(context->cur_thread, T_KPERF_AST_DISPATCH);
+ return kperf_ast_pend(context->cur_thread, T_KPERF_AST_DISPATCH,
+ actionid);
}
void
void kperf_thread_dispatch_sample(struct kperf_thread_dispatch *,
struct kperf_context *);
-int kperf_thread_dispatch_pend(struct kperf_context *);
+int kperf_thread_dispatch_pend(struct kperf_context *, unsigned int actionid);
void kperf_thread_dispatch_log(struct kperf_thread_dispatch *);
void kperf_thread_inscyc_log(struct kperf_context *);
#include <mach/machine/_structs.h>
#include <mach/message.h>
+#include <mach/vm_types.h>
#include <mach/arm/thread_state.h>
/*
#define ARM_SAVED_STATE (THREAD_STATE_NONE + 1)
+#if __ARM_VFP__
+#define VFPSAVE_ALIGN 16
+#define VFPSAVE_ATTRIB __attribute__((aligned (VFPSAVE_ALIGN)))
+#define THREAD_ALIGN VFPSAVE_ALIGN
+
+/*
+ * vector floating point saved state
+ */
+struct arm_vfpsaved_state {
+ uint32_t r[64];
+ uint32_t fpscr;
+ uint32_t fpexc;
+};
+#endif
+
struct arm_saved_state {
uint32_t r[13]; /* General purpose register r0-r12 */
uint32_t sp; /* Stack pointer r13 */
uint32_t fsr; /* Fault status */
uint32_t far; /* Virtual Fault Address */
uint32_t exception; /* exception number */
+
+#if __ARM_VFP__
+ /* VFP state */
+ struct arm_vfpsaved_state VFPdata VFPSAVE_ATTRIB;
+ // for packing reasons chtread_self and DebugData
+ // are inside the the PcbData when __ARM_VFP__ is set
+ arm_debug_state_t *VFPpadding_DebugData;
+ vm_address_t VFPpadding_cthread_self;
+#endif
};
typedef struct arm_saved_state arm_saved_state_t;
#define SHARED_REGION_NESTING_MIN_PPC64 0x0000000010000000ULL
#define SHARED_REGION_NESTING_MAX_PPC64 0x0000000010000000ULL
-#define SHARED_REGION_BASE_ARM 0x1A000000ULL
-#define SHARED_REGION_SIZE_ARM 0x26000000ULL
-#define SHARED_REGION_NESTING_BASE_ARM 0x1A000000ULL
-#define SHARED_REGION_NESTING_SIZE_ARM 0x26000000ULL
+#define SHARED_REGION_BASE_ARM 0x40000000ULL
+#define SHARED_REGION_SIZE_ARM 0x40000000ULL
+#define SHARED_REGION_NESTING_BASE_ARM 0x40000000ULL
+#define SHARED_REGION_NESTING_SIZE_ARM 0x40000000ULL
#define SHARED_REGION_NESTING_MIN_ARM ?
#define SHARED_REGION_NESTING_MAX_ARM ?
#define SYNC_POLICY_PREPOST 0x4
#define SYNC_POLICY_DISABLE_IRQ 0x8
-
-/*
- * If the waitq is IRQ safe, 0x10 suggests it's a waitq embedded in turnstile.
- * If the waitq is not IRQ safe, 0x10 suggests it's a waitq of a port and should use it's turnstile safeq.
- */
#define SYNC_POLICY_TURNSTILE 0x10
-#define SYNC_POLICY_PORT 0x10
+#define SYNC_POLICY_TURNSTILE_PROXY 0x20
#endif /* KERNEL_PRIVATE */
+++ /dev/null
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html>
-<head>
- <title>Mach Kernel Interface Reference Manual</title>
-</head>
-<body>
-<h3>Mach IPC Interface</h3>
-<blockquote>
-<p>
-Mach IPC presents itself in a few forms: message queues, lock-sets,
-and semaphores (more may be added in the future). All share one common
-charateristic: the capabilities presented by each are represented through
-a handle known as a Mach port. Specific rights represented in these
-Mach port capability handles allow the underlying IPC object to be used and
-manipulated in consistent ways.</p>
-
-<h4>Mach Message Queue Interface</h4>
-<blockquote>
-<p>
-<a href="mach_msg.html">mach_msg</a> - Send and/or receive a message from the target port.<br>
-<a href="mach_msg.html">mach_msg_overwrite</a> - Send and/or receive messages with possible overwrite.<br>
-</p>
-Mach Message Queue Data Structures
-<p>
-<a href="mach_msg_descriptor.html">mach_msg_descriptor</a> - Specifies an element of a complex IPC message.<br>
-<a href="mach_msg_header.html">mach_msg_header</a> - Specifies the content of an IPC message header.<br>
-</p>
-</blockquote>
-
-<h4>Mach Lock-Set Interface</h4>
-<blockquote>
-<p>
-<a href="lock_acquire.html">lock_acquire</a> - Acquire ownership a lock<br>
-<a href="lock_handoff.html">lock_handoff</a> - Hand-off ownership of a lock.<br>
-<a href="lock_handoff_accept.html">lock_handoff_accept</a> - Accept lock ownership from a handoff.<br>
-<a href="lock_make_stable.html">lock_make_stable</a> - Stabilize the state of the specified lock.<br>
-<a href="lock_release.html">lock_release</a> - Release ownership of a lock.<br>
-<a href="lock_set_create.html">lock_set_create</a> - Create a new lock set.<br>
-<a href="lock_set_destroy.html">lock_set_destroy</a> - Destroy a lock set and its associated locks.<br>
-<a href="lock_try.html">lock_try</a> - Attempt to acquire access rights to a lock.<br>
-</p>
-</blockquote>
-
-<h4>Mach Semaphore Interface</h4>
-<blockquote>
-<p>
-<a href="semaphore_create.html">semaphore_create</a> - Create a new semaphore.<br>
-<a href="semaphore_destroy.html">semaphore_destroy</a> - Destroy a semaphore.<br>
-<a href="semaphore_signal.html">semaphore_signal</a> - Increments the semaphore count.<br>
-<a href="semaphore_signal_all.html">semaphore_signal_all</a> - Wake up all threads blocked on a semaphore.<br>
-<a href="semaphore_wait.html">semaphore_wait</a> - Wait on the specified semaphore.<br>
-</p>
-</blockquote>
-
-<h4>Mach Port Management Interface</h4>
-<blockquote>
-<p>
-<a href="mach_port_allocate.html">mach_port_allocate</a> - Create caller-specified type of port right.<br>
-<a href="mach_port_allocate_full.html">mach_port_allocate_full</a> - Create a port right with full Mach port semantics.<br>
-<a href="mach_port_allocate_name.html">mach_port_allocate_name</a> - Create a port right with the caller-specified name.<br>
-<a href="mach_port_allocate_qos.html">mach_port_allocate_qos</a> - Allocate a port with specified "quality of service".<br>
-<a href="MP_allocate_subsystem.html">mach_port_allocate_subsystem</a> - Create a port right associated with the caller-specified subsystem.<br>
-<a href="mach_port_deallocate.html">mach_port_deallocate</a> - Decrement the target port right's user reference count.<br>
-<a href="mach_port_destroy.html">mach_port_destroy</a> - Deallocate all port rights associated with specified name.<br>
-<a href="mach_port_extract_right.html">mach_port_extract_right</a> - Remove the specified right from the target task and return it to the caller.<br>
-<a href="mach_port_get_attributes.html">mach_port_get_attributes</a> - Return information about target port as specified by the caller.<br>
-<a href="mach_port_get_refs.html">mach_port_get_refs</a> - Return the current count of user references on the target port right.<br>
-<a href="mach_port_get_set_status.html">mach_port_get_set_status</a> - Return the port right names contained in the target port set.<br>
-<a href="mach_port_insert_right.html">mach_port_insert_right</a> - Insert the specified port right into the target task.<br>
-<a href="mach_port_mod_refs.html">mach_port_mod_refs</a> - Modify the specified port right's count of user references.<br>
-<a href="mach_port_move_member.html">mach_port_move_member</a> - Move the specified receive right into or out of the specified port set.<br>
-<a href="mach_port_names.html">mach_port_names</a> - Return information about a task's port name space.<br>
-<a href="MP_request_notification.html">mach_port_request_notification</a> - Request notification of the specified port event type.<br>
-<a href="mach_port_set_attributes.html">mach_port_set_attributes</a> - Set the target port's attributes.<br>
-<a href="mach_port_set_mscount.html">mach_port_set_mscount</a> - Change the target port's make-send count.<br>
-<a href="mach_port_set_seqno.html">mach_port_set_seqno</a> - Change the current value of the target port's sequence number.<br>
-<a href="mach_port_type.html">mach_port_type</a> - Return the characteristics of the target port name.<br>
-<a href="mach_reply_port.html">mach_reply_port</a> - Allocate a new port and insert corresponding receive right in the calling task.<br>
-<a href="mach_subsystem_create.html"> mach_subsystem_create</a> - Used by a server to register information about an RPC subsystem with the kernel.<br>
-</p>
-Mach Port Data Structures
-<p>
-<a href="mach_port_limits.html">mach_port_limits</a> - Specifies a port's resource and message queue limits.<br>
-<a href="mach_port_qos.html">mach_port_qos</a> - Specifies a port's attributes with respect to "Quality Of Service."<br>
-<a href="mach_port_status.html">mach_port_status</a> - Used to present a port's current status with respect to various important attributes.<br>
-</p>
-Mach Port Notification Callbacks
-<p>
-<a href="do_mach_notify_dead_name.html">do_mach_notify_dead_name</a> - Handle the current instance of a dead-name notification.<br>
-<a href="do_mach_notify_no_senders.html">do_mach_notify_no_senders</a> - Handle the current instance of a no-more-senders notification.<br>
-<a href="DMN_port_deleted.html">do_mach_notify_port_deleted</a> - Handle the current instance of a port-deleted notification.<br>
-<a href="DMN_port_destroyed.html">do_mach_notify_port_destroyed</a> - Handle the current instance of a port-destroyed notification.<br>
-<a href="do_mach_notify_send_once.html">do_mach_notify_send_once</a> - Handle the current instance of a send-once notification.<br>
-</p>
-Mach Port Notification Callback Server Helpers
-<p>
-<a href="notify_server.html">notify_server</a> - Detect and handle a kernel-generated IPC notification.<br>
-</p>
-</blockquote>
-
-</blockquote>
-
-<h3>Mach Virtual Memory Interface</h3>
-<blockquote>
-<h4>Mach Virtual Memory Address Space Manipulation Interface</h4>
-<blockquote>
-<p>
-<a href="host_page_size.html">host_page_size</a> - Provide the system's virtual page size.<br>
-<a href="vm_allocate.html">vm_allocate</a> - Allocate a region of virtual memory.<br>
-<a href="vm_behavior_set.html">vm_behavior_set</a> - Specify expected access patterns for the target VM region.<br>
-<a href="vm_copy.html">vm_copy</a> - Copy a region of virtual memory.<br>
-<a href="vm_deallocate.html">vm_deallocate</a> - Deallocate a region of virtual memory.<br>
-<a href="vm_inherit.html">vm_inherit</a> - Set a VM region's inheritance attribute.<br>
-<a href="vm_machine_attribute.html">vm_machine_attribute</a> - Get/set the target memory region's special attributes.<br>
-<a href="vm_map.html">vm_map</a> - Map the specified memory object to a region of virtual memory.<br>
-<a href="vm_msync.html">vm_msync</a> - Synchronize the specified region of virtual memory.<br>
-<a href="vm_protect.html">vm_protect</a> - Set access privilege attribute for a region of virtual memory.<br>
-<a href="vm_read.html">vm_read</a> - Read the specified range of target task's address space.<br>
-<a href="vm_region.html">vm_region</a> - Return description of a virtual memory region.<br>
-<a href="vm_remap.html">vm_remap</a> - Map memory objects in one address space to that of another's.<br>
-<a href="vm_wire.html"> vm_wire</a> - Modify the target region's paging characteristics.<br>
-<a href="vm_write.html">vm_write</a> - Write data to the specified address in the target address space.<br>
-</p>
-Data Structures
-<p>
-<a href="vm_region_basic_info.html">vm_region_basic_info</a> - Defines the attributes of a task's memory region.<br>
-<a href="vm_statistics.html">vm_statistics</a> - Defines statistics for the kernel's use of virtual memory.<br>
-</p>
-</blockquote>
-
-<h4>External Memory Management Interface</h4>
-<blockquote>
-The External Memory Management Interface (EMMI) is undergoing significant change in the Darwin system.
-For this reason, the interface is not currently available to user-level programs. Even for kernel
-extensions, use of these interfaces in not supported. Instead, the BSD filesystem's Universal Buffer Cache (UBC)
-mechanism should be used.<br>
-<p>
-<a href="MO_change_attributes.html">memory_object_change_attributes</a> - Modify subset of memory object attributes.<br>
-<a href="memory_object_destroy.html">memory_object_destroy</a> - Shut down a memory object.<br>
-<a href="MO_get_attributes.html">memory_object_get_attributes</a> - Return current attributes for a memory object.<br>
-<a href="memory_object_lock_request.html">memory_object_lock_request</a> - Restrict access to memory object data.<br>
-<a href="MO_SY_completed.html">memory_object_synchronize_completed</a> - Synchronized data has been processed.<br>
-</p>
-Data Structures
-<p>
-<a href="memory_object_attr_info.html">memory_object_attr_info</a> - Defines memory object attributes.<br>
-<a href="memory_object_perf_info.html">memory_object_perf_info</a>- Specifies performance-related memory object attributes.<br>
-</p>
-External Memory Manager Interface Callbacks
-<p>
-<a href="memory_object_create.html">memory_object_create</a> - Assign a new memory object to the default memory manager.<br>
-<a href="MO_data_initialize.html">memory_object_data_initialize</a> - Provide initial data for a new memory object.<br>
-<a href="memory_object_data_request.html">memory_object_data_request</a> - Request that memory manager page-in specified data.<br>
-<a href="memory_object_data_return.html">memory_object_data_return</a> - Return memory object data to the appropriate memory manager.<br>
-<a href="memory_object_data_unlock.html">memory_object_data_unlock</a> - Request a memory manager release the lock on specific data.<br>
-<a href="memory_object_init.html">memory_object_init</a> - Inform a memory manager on first use of a memory object.<br>
-<a href="memory_object_synchronize.html">memory_object_synchronize</a> - Request synchronization of data with backing store.<br>
-<a href="memory_object_terminate.html">memory_object_terminate</a> - Relinquish access to a memory object.<br>
-</p>
-EMMI Callback Server Helpers
-<p>
-<a href="MO_default_server.html">memory_object_default_server</a> - Handle kernel operation request targeted for the default pager.<br>
-<a href="memory_object_server.html">memory_object_server</a> - Handle kernel operation request aimed at a given memory manager.<br>
-</p>
-</blockquote>
-
-<h4>Default Memory Management Interface</h4>
-<blockquote>
-<p>
-<a href="default_pager_add_segment.html">default_pager_add_segment</a> - Add additional backing storage for a default pager.<br>
-<a href="DP_backing_store_create.html">default_pager_backing_store_create</a> - Create a backing storage object.<br>
-<a href="DP_backing_store_delete.html"> default_pager_backing_store_delete</a> - Delete a backing storage object.<br>
-<a href="DP_backing_store_info.html">default_pager_backing_store_info</a> - Return information about a backing storage object.<br>
-<a href="default_pager_info.html">default_pager_info</a> - Furnish caller with information about the default pager.<br>
-<a href="DP_object_create.html">default_pager_object_create</a> - Initialize a non-persistent memory object.<br>
-<a href="HD_memory_manager.html">host_default_memory_manager</a> - Register/Lookup the host's default pager.<br>
-</p>
-</blockquote>
-
-</blockquote>
-
-<h3>Process Management Interface</h3>
-<blockquote>
-
-<h4>Task Interface</h4>
-<blockquote>
-<p>
-<a href="mach_ports_lookup.html">mach_ports_lookup</a> - Provide caller with an array of the target task's well-known ports.<br>
-<a href="mach_ports_register.html">mach_ports_register</a> - Register an array of well-known ports on behalf of the target task.<br>
-<a href="mach_task_self.html">mach_task_self</a> - Return a send right to the caller's task_self port.<br>
-<a href="task_create.html">task_create</a> - Create a new task.<br>
-<a href="task_get_emulation_vector.html">task_get_emulation_vector</a> - Return an array identifying the target task's user-level system call handlers.<br>
-<a href="task_get_exception_ports.html">task_get_exception_ports</a> - Return send rights to the target task's exception ports.<br>
-<a href="task_get_special_port.html">task_get_special_port</a> - Return a send write to the indicated special port.<br>
-<a href="task_info.html">task_info</a> - Return per-task information according to specified flavor.<br>
-<a href="task_resume.html">task_resume</a> - Decrement the target task's suspend count.<br>
-<a href="task_sample.html">task_sample</a> - Sample the target task's thread program counters periodically.<br>
-<a href="task_set_emulation.html">task_set_emulation</a> - Establish a user-level handler for a system call.<br>
-<a href="task_set_emulation_vector.html">task_set_emulation_vector</a> - Establish the target task's user-level system call handlers.<br>
-<a href="task_set_exception_ports.html">task_set_exception_ports</a> - Set target task's exception ports.<br>
-<a href="task_set_info.html">task_set_info</a> - Set task-specific information state.<br>
-<a href="task_set_port_space.html">task_set_port_space</a> - Set the size of the target task's port name space table.<br>
-<a href="task_set_special_port.html">task_set_special_port</a> - Set the indicated special port.<br>
-<a href="task_suspend.html">task_suspend</a> - Suspend the target task.<br>
-<a href="task_swap_exception_ports.html">task_swap_exception_ports</a> - Set target task's exception ports, returning the previous exception ports.<br>
-<a href="task_terminate.html">task_terminate</a> - Terminate the target task and deallocate its resources.<br>
-<a href="task_threads.html">task_threads</a> - Return the target task's list of threads.<br>
-</p>
-Task Data Structures
-<p>
-<a href="task_basic_info.html">task_basic_info</a> - Defines basic information for a task.<br>
-<a href="task_thread_times_info.html">task_thread_times_info</a> - Defines thread execution times information for tasks.<br>
-</p>
-</blockquote>
-
-<h4>Thread Interface</h4>
-<blockquote>
-<p>
-<a href="mach_thread_self.html">mach_thread_self</a> - Returns the thread self port.<br>
-<a href="thread_abort.html">thread_abort</a> - Abort a thread.<br>
-<a href="thread_abort_safely.html">thread_abort_safely</a> - Abort a thread, restartably.<br>
-<a href="thread_create.html">thread_create</a> - Create a thread within a task.<br>
-<a href="thread_create_running.html">thread_create_running</a> - Optimized creation of a running thread.<br>
-<a href="thread_depress_abort.html">thread_depress_abort</a> - Cancel thread scheduling depression.<br>
-<a href="thread_get_exception_ports.html">thread_get_exception_ports</a> - Return a send right to an exception port.<br>
-<a href="thread_get_special_port.html">thread_get_special_port</a> - Return a send right to the caller-specified special port.<br>
-<a href="thread_get_state.html">thread_get_state</a> - Return the execution state for a thread.<br>
-<a href="thread_info.html">thread_info</a> - Return information about a thread.<br>
-<a href="thread_resume.html">thread_resume</a> - Resume a thread.<br>
-<a href="thread_sample.html">thread_sample</a> - Perform periodic PC sampling for a thread.<br>
-<a href="thread_set_exception_ports.html">thread_set_exception_ports</a> - Set exception ports for a thread.<br>
-<a href="thread_set_special_port.html">thread_set_special_port</a> - Set caller-specified special port belonging to the target thread.<br>
-<a href="thread_set_state.html">thread_set_state</a> - Set the target thread's user-mode execution state.<br>
-<a href="thread_suspend.html">thread_suspend</a> - Suspend a thread.<br>
-<a href="TS_exception_ports.html">thread_swap_exception_ports</a> - Swap exception ports for a thread.<br>
-<a href="thread_terminate.html">thread_terminate</a> - Destroy a thread.<br>
-<a href="thread_wire.html">thread_wire</a> - Mark the thread as privileged with respect to kernel resources.<br>
-</p>
-Thread Data Structures
-<p>
-<a href="thread_basic_info.html">thread_basic_info</a> - Defines basic information for a thread.<br>
-</p>
-Thread Exception Callbacks
-<p>
-<a href="catch_exception_raise.html">catch_exception_raise</a> - Handles the occurrence of an exception within a thread.<br>
-</p>
-Thread Exception Callback Server Helpers
-<p>
-<a href="exc_server.html">exc_server</a> - Handle kernel-reported thread exception.<br>
-</p>
-</blockquote>
-
-<h4>Scheduling Interface</h4>
-<blockquote>
-<p>
-<a href="task_policy.html">task_policy</a> - Set target task's default scheduling policy state.<br>
-<a href="task_set_policy.html">task_set_policy</a> - Set target task's default scheduling policy state.<br>
-<a href="thread_policy.html">thread_policy</a> - Set target thread's scheduling policy state.<br>
-<a href="thread_set_policy.html">thread_set_policy</a> - Set target thread's scheduling policy state.<br>
-<a href="thread_switch.html">thread_switch</a> - Cause context switch with options.<br>
-</p>
-Scheduling Data Structures
-<p>
-<a href="policy_fifo_info.html">policy_fifo_info</a> - Specifies information associated with the system's First-In-First-Out scheduling policy.<br>
-<a href="policy_rr_info.html">policy_rr_info</a> - Specifies information associated with the system's Round Robin scheduling policy.<br>
-<a href="policy_timeshare_info.html">policy_timeshare_info</a> - Specifies information associated with the system's Timeshare scheduling policy.<br>
-</p>
-</blockquote>
-</blockquote>
-
-<h3>System Management Interface</h3>
-<blockquote>
-
-<h4>Host Interface</h4>
-<blockquote>
-<p>
-<a href="host_get_clock_service.html">host_get_clock_service</a> - Return a send right to a kernel clock's service port.<br>
-<a href="host_get_time.html">host_get_time</a> - Returns the current time as seen by that host.<br>
-<a href="host_info.html">host_info</a> - Return information about a host.<br>
-<a href="host_kernel_version.html">host_kernel_version</a> - Return kernel version information for a host.<br>
-<a href="host_statistics.html">host_statistics</a> - Return statistics for a host.<br>
-<a href="mach_host_self.html">mach_host_self</a> - Returns send rights to the task's host self port.<br>
-</p>
-Data Structures
-<p>
-<a href="host_basic_info.html">host_basic_info</a> - Used to present basic information about a host.<br>
-<a href="host_load_info.html">host_load_info</a> - Used to present a host's processor load information.<br>
-<a href="host_sched_info.html">host_sched_info</a> - - Used to present the set of scheduler limits associated with the host.<br>
-<a href="kernel_resource_sizes.html">kernel_resource_sizes</a> - Used to present the sizes of kernel's major structures.<br>
-</p>
-</blockquote>
-
-<h4>Host Control Interface</h4>
-<blockquote>
-<p>
-<a href="host_adjust_time.html">host_adjust_time</a> - Arranges for the time on a specified host to be gradually changed by an adjustment value.<br>
-<a href="HD_memory_manager.html">host_default_memory_manager</a> - Set the default memory manager.<br>
-<a href="host_get_boot_info.html">host_get_boot_info</a> - Return operator boot information.<br>
-<a href="host_get_clock_control.html">host_get_clock_control</a> - Return a send right to a kernel clock's control port.<br>
-<a href="host_processor_slots.html">host_processor_slots</a> - Return a list of numbers that map processor slots to active processors.<br>
-<a href="host_processors.html">host_processors</a> - Return a list of send rights representing all processor ports.<br>
-<a href="host_reboot.html">host_reboot</a> - Reboot this host.<br>
-<a href="host_set_time.html">host_set_time</a> - Establishes the time on the specified host.<br>
-</p>
-</blockquote>
-
-<h4>Host Security Interface</h4>
-<blockquote>
-<p>
-<a href="host_security_create_task_token.html">host_security_create_task_token</a> - Create a new task with an explicit security token.<br>
-<a href="host_security_set_task_token.html">host_security_set_task_token</a> - Change the target task's security token.<br>
-</p>
-</blockquote>
-
-<h4>Resource Accounting Interface</h4>
-<blockquote>
-<i>
-The Mach resource accounting mechanism is not functional in the current Mac OS X/Darwin system. It will become functional in a future release.
-</i>
-<p>
-<a href="ledger_create.html">ledger_create</a> - Create a subordinate ledger.<br>
-<a href="ledger_read.html">ledger_read</a> - Return the ledger limit and balance.<br>
-<a href="ledger_terminate.html">ledger_terminate</a> - Destroy a ledger.<br>
-<a href="ledger_transfer.html">ledger_transfer</a> - Transfer resources from a parent ledger to a child.<br>
-</p>
-</blockquote>
-
-<h4>Processor Management Interface</h4>
-<blockquote>
-<p>
-<a href="processor_control.html">processor_control</a> - Perform caller-specified operation on target processor.<br>
-<a href="processor_exit.html">processor_exit</a> - Exit a processor.<br>
-<a href="processor_info.html">processor_info</a> - Return information about a processor.<br>
-<a href="processor_start.html">processor_start</a> - Start a processor.<br>
-</p>
-Processor Data Structures
-<p>
-<a href="processor_basic_info.html">processor_basic_info</a> - Defines the basic information about a processor.<br>
-</p>
-</blockquote>
-
-<h4>Processor Set Interface</h4>
-<blockquote>
-<i>
-The processor set interface allows for the grouping of tasks and
-processors for the purpose of exclusive scheduling. These interface
-are <b>deprecated</b> and should not be used in code that isn't tied
-to a particular release of Mac OS X/Darwin. These will likely change
-or disappear in a future release.
-</i>
-<p>
-<a href="host_processor_sets.html">host_processor_sets</a> - Return a list of send rights representing all processor set name ports.<br>
-<a href="host_processor_set_priv.html">host_processor_set_priv</a> - Translate a processor set name port into a processor set control port.<br>
-<a href="processor_assign.html">processor_assign</a> - Assign a processor to a processor set.<br>
-<a href="processor_get_assignment.html">processor_get_assignment</a> - Get current assignment for a processor.<br>
-<a href="processor_set_create.html">processor_set_create</a> - Create a new processor set.<br>
-<a href="processor_set_default.html">processor_set_default</a> - Return the default processor set.<br>
-<a href="processor_set_destroy.html">processor_set_destroy</a> - Destroy the target processor set.<br>
-<a href="processor_set_info.html">processor_set_info</a> - Return processor set state according to caller-specified flavor.<br>
-<a href="processor_set_max_priority.html">processor_set_max_priority</a> - Sets the maximum scheduling priority for a processor set.<br>
-<a href="P_set_policy_control.html">processor_set_policy_control</a> - Set target processor set's scheduling policy state.<br>
-<a href="P_set_policy_disable.html">processor_set_policy_disable</a> - Enables a scheduling policy for a processor set.<br>
-<a href="P_set_policy_enable.html">processor_set_policy_enable</a> - Enables a scheduling policy for a processor set.<br>
-<a href="processor_set_statistics.html">processor_set_statistics</a> - Return scheduling statistics for a processor set.<br>
-<a href="processor_set_tasks.html">processor_set_tasks</a> - Return all tasks currently assigned to the target processor set.<br>
-<a href="processor_set_threads.html">processor_set_threads</a> - Return all threads currently assigned to the target processor set.<br>
-<a href="task_assign.html">task_assign</a> - Assign a task to a processor set.<br>
-<a href="task_assign_default.html">task_assign_default</a> - Assign a task to the default processor set.<br>
-<a href="task_get_assignment.html">task_get_assignment</a> - Create a new task with an explicit security token.<br>
-<a href="thread_assign.html">thread_assign</a> - Assign a thread to a processor set.<br>
-<a href="thread_assign_default.html">thread_assign_default</a> - Assign a thread to the default processor set.<br>
-<a href="thread_get_assignment.html">thread_get_assignment</a> - Return the processor set to which a thread is assigned.<br>
-</p>
-Processor Set Data Structures
-<p>
-<a href="processor_set_basic_info.html">processor_set_basic_info</a> - Defines the basic information about a processor set.<br>
-<a href="processor_set_load_info.html">processor_set_load_info</a> - Defines the scheduling statistics for a processor set.<br>
-</p>
-</blockquote>
-
-<h4>Clock Interface</h4>
-<blockquote>
-<p>
-<a href="clock_alarm.html">clock_alarm</a> - Set up an alarm.<br>
-<a href="clock_get_attributes.html">clock_get_attributes</a> - Return attributes of a clock.<br>
-<a href="clock_get_time.html">clock_get_time</a> - Return the current time.<br>
-<a href="clock_map_time.html">clock_map_time</a> - Return a memory object that maps a clock.<br>
-<a href="clock_set_attributes.html">clock_set_attributes</a> - Set a particular clock's attributes.<br>
-<a href="clock_set_time.html">clock_set_time</a> - Set the current time.<br>
-<a href="clock_sleep.html">clock_sleep</a> - Delay the invoking thread until a specified time.<br>
-</p>
-Clock Data Structures
-<p>
-<a href="mapped_tvalspec.html">mapped_tvalspec</a> - Specifies the format the kernel uses to maintain a mapped clock's time.<br>
-<a href="tvalspec.html">tvalspec</a> - Defines format of system time values.<br>
-</p>
-Clock Interface Callbacks
-<p>
-<a href="clock_alarm_reply.html">clock_alarm_reply</a> - Ring a preset alarm.<br>
-</p>
-Clock Callback Server Helpers
-<p>
-<a href="clock_reply_server.html"> clock_reply_server</a> - Handle kernel-generated alarm.<br>
-</p>
-</blockquote>
-
-<h4>Multi-Computer Support Interface</h4>
-<blockquote>
-<i>
-These multi-computer support interfaces are no longer supported by
-the Mac OS X/Darwin kernel. If and when multi-computer support is
-added back in, something like these will likely be added.
-</i>
-<p>
-<a href="host_page_size.html">host_page_size</a> - Returns the page size for the given host.<br>
-<a href="ledger_get_remote.html">ledger_get_remote</a> - Return send right to specified host's remote ledger port.<br>
-<a href="ledger_set_remote.html">ledger_set_remote</a> - Set this host's remote ledger port.<br>
-</p>
-</blockquote>
-
-</blockquote>
-
-<h3>Machine Specific Interface</h3>
-<blockquote>
-
-<h4>Intel 386 Support</h4>
-<blockquote>
-<p>
-<a href="i386_get_ldt.html">i386_get_ldt</a> - Returns per-thread segment descriptors from the local descriptor table (LDT).<br>
-<a href="i386_io_port_add.html">i386_io_port_add</a> - Adds a device to the I/O permission bitmap for a thread. <br>
-<a href="i386_io_port_list.html">i386_io_port_list</a> - Returns a list of the devices named in the thread's I/O permission bitmap.<br>
-<a href="i386_io_port_remove.html">i386_io_port_remove</a> - Removes the specified device from the thread's I/O permission bitmap.<br>
-<a href="i386_set_ldt.html">i386_set_ldt</a> - Allows a thread to have a private local descriptor table (LDT).<br>
-</p>
-</blockquote>
-
-<h4>PowerPC Support</h4>
-<blockquote>
-<p>
-</p>
-</blockquote>
-
-</blockquote>
-
-</BODY>
-
-</HTML>
-
PE_parse_boot_argn("vm_compression_limit", &vm_compression_limit, sizeof(vm_compression_limit));
#ifdef CONFIG_EMBEDDED
-#if XNU_TARGET_OS_WATCH
- // rdar://problem/51012698
- vm_compressor_minorcompact_threshold_divisor = 40;
-#else
vm_compressor_minorcompact_threshold_divisor = 20;
-#endif
vm_compressor_majorcompact_threshold_divisor = 30;
vm_compressor_unthrottle_threshold_divisor = 40;
vm_compressor_catchup_threshold_divisor = 60;
}
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, event_code, trace_real_vaddr, (fault_info.user_tag << 16) | (caller_prot << 8) | type_of_fault, m->vmp_offset, get_current_unique_pid(), 0);
-
+ if (need_retry == FALSE) {
+ KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_REAL_FAULT_FAST), get_current_unique_pid(), 0, 0, 0, 0);
+ }
DTRACE_VM6(real_fault, vm_map_offset_t, real_vaddr, vm_map_offset_t, m->vmp_offset, int, event_code, int, caller_prot, int, type_of_fault, int, fault_info.user_tag);
}
if (kr == KERN_SUCCESS &&
}
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, event_code, trace_real_vaddr, (fault_info.user_tag << 16) | (caller_prot << 8) | type_of_fault, m->vmp_offset, get_current_unique_pid(), 0);
+ KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_REAL_FAULT_SLOW), get_current_unique_pid(), 0, 0, 0, 0);
DTRACE_VM6(real_fault, vm_map_offset_t, real_vaddr, vm_map_offset_t, m->vmp_offset, int, event_code, int, caller_prot, int, type_of_fault, int, fault_info.user_tag);
}
*offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
*object = VME_OBJECT(entry);
*out_prot = prot;
+ KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_MAP_LOOKUP_OBJECT), VM_KERNEL_UNSLIDE_OR_PERM(*object), 0, 0, 0, 0);
if (fault_info) {
fault_info->interruptible = THREAD_UNINT; /* for now... */
local_map = VME_SUBMAP(entry);
local_offset = VME_OFFSET(entry);
+ vm_map_reference(local_map);
vm_map_unlock(map);
if (vm_map_msync(
local_map,
sync_flags) == KERN_INVALID_ADDRESS) {
had_hole = TRUE;
}
+ vm_map_deallocate(local_map);
continue;
}
object = VME_OBJECT(entry);
* copy_call.
*/
struct vm_object *shadow; /* My shadow */
+ memory_object_t pager; /* Where to get data */
union {
vm_object_offset_t vou_shadow_offset; /* Offset into shadow */
*/
} vo_un2;
- memory_object_t pager; /* Where to get data */
vm_object_offset_t paging_offset; /* Offset into memory object */
memory_object_control_t pager_control; /* Where data comes back */
* they are updated via atomic compare and swap
*/
vm_object_offset_t last_alloc; /* last allocation offset */
+ vm_offset_t cow_hint; /* last page present in */
+ /* shadow but not in object */
int sequential; /* sequential access size */
uint32_t pages_created;
uint32_t pages_used;
- vm_offset_t cow_hint; /* last page present in */
- /* shadow but not in object */
/* hold object lock when altering */
unsigned int
wimg_bits:8, /* cache WIMG bits */
#endif /* VM_OBJECT_ACCESS_TRACKING */
uint8_t scan_collisions;
+ uint8_t __object4_unused_bits[1];
vm_tag_t wire_tag;
- uint8_t __object4_unused_bits[2];
#if CONFIG_PHANTOM_CACHE
uint32_t phantom_object_id;
#define VM_REAL_FAULT_ADDR_PURGABLE 0x03
#define VM_REAL_FAULT_ADDR_EXTERNAL 0x04
#define VM_REAL_FAULT_ADDR_SHAREDCACHE 0x05
+#define VM_REAL_FAULT_FAST 0x06
+#define VM_REAL_FAULT_SLOW 0x07
+#define VM_MAP_LOOKUP_OBJECT 0x08
#include <kperf/context.h>
#include <kperf/action.h>
+#include <kern/monotonic.h>
+
/* Fixed counter mask -- three counters, each with OS and USER */
#define IA32_FIXED_CTR_ENABLE_ALL_CTRS_ALL_RINGS (0x333)
#define IA32_FIXED_CTR_ENABLE_ALL_PMI (0x888)
return rdmsr64( MSR_IA32_PERF_FIXED_CTR_CTRL );
}
-static uint64_t
-IA32_FIXED_CTRx(uint32_t ctr)
-{
-#ifdef USE_RDPMC
- return rdpmc64(RDPMC_FIXED_COUNTER_SELECTOR | ctr);
-#else /* !USE_RDPMC */
- return rdmsr64(MSR_IA32_PERF_FIXED_CTR0 + ctr);
-#endif /* !USE_RDPMC */
-}
-
#ifdef FIXED_COUNTER_RELOAD
static void
wrIA32_FIXED_CTRx(uint32_t ctr, uint64_t value)
int
kpc_get_fixed_counters(uint64_t *counterv)
{
- int i, n = kpc_fixed_count();
-
-#ifdef FIXED_COUNTER_SHADOW
- uint64_t status;
-
- /* snap the counters */
- for (i = 0; i < n; i++) {
- counterv[i] = FIXED_SHADOW(ctr) +
- (IA32_FIXED_CTRx(i) - FIXED_RELOAD(ctr));
- }
-
- /* Grab the overflow bits */
- status = rdmsr64(MSR_IA32_PERF_GLOBAL_STATUS);
-
- /* If the overflow bit is set for a counter, our previous read may or may not have been
- * before the counter overflowed. Re-read any counter with it's overflow bit set so
- * we know for sure that it has overflowed. The reason this matters is that the math
- * is different for a counter that has overflowed. */
- for (i = 0; i < n; i++) {
- if ((1ull << (i + 32)) & status) {
- counterv[i] = FIXED_SHADOW(ctr) +
- (kpc_fixed_max() - FIXED_RELOAD(ctr) + 1 /* Wrap */) + IA32_FIXED_CTRx(i);
- }
- }
-#else
- for (i = 0; i < n; i++) {
- counterv[i] = IA32_FIXED_CTRx(i);
- }
-#endif
-
+#if MONOTONIC
+ mt_fixed_counts(counterv);
return 0;
+#else /* MONOTONIC */
+#pragma unused(counterv)
+ return ENOTSUP;
+#endif /* !MONOTONIC */
}
int
ifneq (osx,$(TARGET_NAME))
EXCLUDED_SOURCES += no32exec_35914211.c no32exec_35914211_helper.c
-endif
+else # target = osx
+CUSTOM_TARGETS += no32exec_35914211_helper no32exec_35914211_helper_binprefs
+
+no32exec_35914211_helper: INVALID_ARCHS = x86_64 i386
+no32exec_35914211_helper:
+ $(CC) $(LDFLAGS) $(CFLAGS) -arch i386 no32exec_35914211_helper.c -o $(SYMROOT)/$@;
+ env CODESIGN_ALLOCATE=$(CODESIGN_ALLOCATE) $(CODESIGN) --force --sign - --timestamp=none $(SYMROOT)/$@;
+
+install-no32exec_35914211_helper:
+ mkdir -p $(INSTALLDIR)
+ cp $(SYMROOT)/no32exec_35914211_helper $(INSTALLDIR)/
-no32exec_35914211_helper: INVALID_ARCHS = x86_64
+no32exec_35914211_helper_binprefs: INVALID_ARCHS = x86_64 i386
no32exec_35914211_helper_binprefs:
- $(CC) $(OTHER_CFLAGS) $(CFLAGS) $(OTHER_LDFLAGS) $(LDFLAGS) -ldarwintest -arch i386 -arch x86_64 \
- no32exec_35914211_helper_binprefs.c -o $(SYMROOT)/no32exec_35914211_helper_binprefs
+ $(CC) $(OTHER_CFLAGS) $(CFLAGS) $(OTHER_LDFLAGS) $(LDFLAGS) -arch i386 -arch x86_64 no32exec_35914211_helper.c -o $(SYMROOT)/$@;
+ env CODESIGN_ALLOCATE=$(CODESIGN_ALLOCATE) $(CODESIGN) --force --sign - --timestamp=none $(SYMROOT)/$@;
+
+install-no32exec_35914211_helper_binprefs:
+ mkdir -p $(INSTALLDIR)
+ cp $(SYMROOT)/no32exec_35914211_helper_binprefs $(INSTALLDIR)/
no32exec_35914211: INVALID_ARCHS = i386
no32exec_35914211: no32exec_35914211_helper
no32exec_35914211: no32exec_35914211_helper_binprefs
+endif # (osx,$(TARGET_NAME)))
MIG:=SDKROOT=$(SDKROOT) $(shell xcrun -sdk "$(TARGETSDK)" -find mig)
#include <stdlib.h>
#include <signal.h>
-static int binprefs_child_is_64 = 0;
-
-static void
-signal_handler(__unused int sig)
-{
- binprefs_child_is_64 = 1;
- return;
-}
-
-T_DECL(no32exec_bootarg_with_spawn, "make sure the no32exec boot-arg is honored, using posix_spawn", T_META_BOOTARGS_SET("-no32exec"))
+T_DECL(no32exec_bootarg_with_spawn, "make sure we can't posix_spawn 32-bit")
{
int spawn_ret, pid;
char path[1024];
uint32_t size = sizeof(path);
T_QUIET; T_ASSERT_EQ(_NSGetExecutablePath(path, &size), 0, NULL);
- T_QUIET; T_ASSERT_LT(strlcat(path, "_helper", size), size, NULL);
+ T_QUIET; T_ASSERT_LT(strlcat(path, "_helper", size), (unsigned long)size, NULL);
spawn_ret = posix_spawn(&pid, path, NULL, NULL, NULL, NULL);
if (spawn_ret == 0) {
T_ASSERT_EQ(spawn_ret, EBADARCH, NULL);
}
-T_DECL(no32exec_bootarg_with_spawn_binprefs, "make sure the no32exec boot-arg is honored, using posix_spawn"
- "with binprefs on a fat i386/x86_64 Mach-O", T_META_BOOTARGS_SET("-no32exec"))
+T_DECL(no32_exec_bootarg_with_exec, "make sure we can't fork and exec 32-bit")
+{
+ int pid;
+ char path[1024];
+ uint32_t size = sizeof(path);
+
+ T_QUIET; T_ASSERT_EQ(_NSGetExecutablePath(path, &size), 0, NULL);
+ T_QUIET; T_ASSERT_LT(strlcat(path, "_helper", size), (unsigned long)size, NULL);
+
+ pid = fork();
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(pid, "fork");
+
+ if (pid == 0) { /* child */
+ execve(path, NULL, NULL); /* this should fail, resulting in the call to exit below */
+ exit(errno);
+ } else { /* parent */
+ int wait_ret = 0;
+ waitpid(pid, &wait_ret, 0);
+ T_QUIET; T_ASSERT_TRUE(WIFEXITED(wait_ret), "child should have called exit()");
+ T_ASSERT_EQ(WEXITSTATUS(wait_ret), EBADARCH, "execve should set errno = EBADARCH");
+ }
+}
+
+T_DECL(no32exec_bootarg_with_spawn_binprefs, "make sure we honor no32exec, using posix_spawn with binprefs on a fat i386/x86_64 Mach-O")
{
int pid, ret;
posix_spawnattr_t spawnattr;
char path[1024];
uint32_t size = sizeof(path);
T_QUIET; T_ASSERT_EQ(_NSGetExecutablePath(path, &size), 0, NULL);
- T_QUIET; T_ASSERT_LT(strlcat(path, "_helper_binprefs", size), size, NULL);
-
- T_QUIET; T_ASSERT_NE(signal(SIGUSR1, signal_handler), SIG_ERR, "signal");
+ T_QUIET; T_ASSERT_LT(strlcat(path, "_helper_binprefs", size), (unsigned long)size, NULL);
ret = posix_spawnattr_init(&spawnattr);
T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "posix_spawnattr_init");
ret = posix_spawn(&pid, path, NULL, &spawnattr, NULL, NULL);
T_ASSERT_EQ(ret, 0, "posix_spawn should succeed despite 32-bit binpref appearing first");
- sleep(1);
- ret = kill(pid, SIGUSR1); // ping helper; helper should ping back if running 64-bit
- T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kill");
-
- ret = wait(NULL);
+ int wait_ret = 0;
+ ret = waitpid(pid, &wait_ret, 0);
T_QUIET; T_ASSERT_EQ(ret, pid, "child pid");
- T_ASSERT_EQ(binprefs_child_is_64, 1, "child process should be running in 64-bit mode");
+ T_QUIET; T_ASSERT_EQ(WIFEXITED(wait_ret), 1, "child process should have called exit()");
+ T_ASSERT_EQ(WEXITSTATUS(wait_ret), 8, "child process should be running in 64-bit mode");
ret = posix_spawnattr_destroy(&spawnattr);
T_QUIET; T_ASSERT_EQ(ret, 0, "posix_spawnattr_destroy");
}
-T_DECL(no32_exec_bootarg_with_exec, "make sure the no32exec boot-arg is honored, using fork and exec", T_META_BOOTARGS_SET("-no32exec"))
+T_DECL(no32exec_bootarg_with_32only_spawn_binprefs, "make sure we honor no32exec, using posix_spawn with 32-bit only binprefs on a fat i386/x86_64 Mach-O")
{
- int pid;
+ int pid, ret, spawn_ret;
+ posix_spawnattr_t spawnattr;
+ cpu_type_t cpuprefs[] = { CPU_TYPE_X86 };
+
char path[1024];
uint32_t size = sizeof(path);
-
T_QUIET; T_ASSERT_EQ(_NSGetExecutablePath(path, &size), 0, NULL);
- T_QUIET; T_ASSERT_LT(strlcat(path, "_helper", size), size, NULL);
+ T_QUIET; T_ASSERT_LT(strlcat(path, "_helper_binprefs", size), (unsigned long)size, NULL);
- pid = fork();
- T_QUIET; T_ASSERT_POSIX_SUCCESS(pid, "fork");
+ ret = posix_spawnattr_init(&spawnattr);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "posix_spawnattr_init");
- if (pid == 0) { /* child */
- execve(path, NULL, NULL); /* this should fail, resulting in the call to exit below */
- exit(errno);
- } else { /* parent */
- int wait_ret = 0;
- waitpid(pid, &wait_ret, 0);
- T_ASSERT_EQ(WEXITSTATUS(wait_ret), EBADARCH, "execve should set errno = EBADARCH");
- }
+ ret = posix_spawnattr_setbinpref_np(&spawnattr, sizeof(cpuprefs) / sizeof(cpuprefs[0]), cpuprefs, NULL);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "posix_spawnattr_setbinpref_np");
+
+ spawn_ret = posix_spawn(&pid, path, NULL, &spawnattr, NULL, NULL);
+ T_ASSERT_EQ(spawn_ret, EBADARCH, "posix_spawn should return EBADARCH since only 32-bit binpref is requested");
+
+ ret = posix_spawnattr_destroy(&spawnattr);
+ T_QUIET; T_ASSERT_EQ(ret, 0, "posix_spawnattr_destroy");
}
-#include <darwintest.h>
+/* This is a file that compiles as a 32-bit helper to test
+ * forking of 32-bit programs, now that 32-bit has been
+ * deprecated on macOS despite still requiring its support in
+ * the watchOS simulator.
+ */
-T_DECL(null_test, "nothing to see here")
+#include <stdio.h>
+#include <unistd.h>
+
+int
+main(int argc __unused, char **argv)
{
- T_SKIP("nothing to see here");
+ (void)argc;
+ size_t retval = sizeof(void *);
+ printf("%s(%d): sizeof(void *) = %lu\n", argv[0], getpid(), retval);
+ return (int)retval;
}
+++ /dev/null
-#include <darwintest.h>
-#include <unistd.h>
-#include <signal.h>
-
-int can_signal_parent = 0;
-
-void
-signal_handler(int sig)
-{
- if (sig == SIGUSR1) {
- can_signal_parent = 1;
- }
- return;
-}
-
-T_DECL(no32exec_bootarg_with_spawn_binprefs_helper, "helper for no32exec_bootarg_with_spawn_binprefs test")
-{
- unsigned long ptrSize = sizeof(long);
- int ppid = getppid();
-
- signal(SIGUSR1, signal_handler);
- signal(SIGALRM, signal_handler);
-
- // parent will signal us if they're no32exec_bootarg_with_spawn_binprefs, otherwise timeout
- alarm(3);
- pause();
-
- /* signal to parent process if we are running in 64-bit mode */
- if (can_signal_parent && ptrSize == 8) {
- kill(ppid, SIGUSR1);
- }
-
- T_SKIP("nothing to see here");
-}
other = long(other)
return me.__cmp__(other)
if type(other) is value:
- return int(self).__cmp__(int(other))
+ try:
+ return int(self).__cmp__(int(other))
+ except TypeError: # Try promoting to long
+ return long(self).__cmp__(long(other))
raise TypeError("Cannot compare value with type {}".format(type(other)))
def __str__(self):
pset = unsigned(ArgumentStringToInt(cmd_args[0])) - unsigned(psetoff)
print PrintPortSetSummary.header
PrintPortSetSummary(kern.GetValueFromAddress(pset, 'struct ipc_pset *'), space)
- elif int(wq_type) == 2:
+ elif int(wq_type) in [2, 1]:
portoff = getfieldoffset('struct ipc_port', 'ip_messages')
port = unsigned(ArgumentStringToInt(cmd_args[0])) - unsigned(portoff)
print PrintPortSummary.header
return None
showmapvme(task.map, 0, 0, show_pager_info, show_all_shadows, False)
-@lldb_command("showmapvme", "A:B:PRST")
+@lldb_command("showmapvme", "A:B:F:PRST")
def ShowMapVME(cmd_args=None, cmd_options={}):
"""Routine to print out info about the specified vm_map and its vm entries
usage: showmapvme <vm_map> [-A start] [-B end] [-S] [-P]
Use -A <start> flag to start at virtual address <start>
Use -B <end> flag to end at virtual address <end>
+ Use -F <virtaddr> flag to find just the VME containing the given VA
Use -S flag to show VM object shadow chains
Use -P flag to show pager info (mapped file, compressed pages, ...)
Use -R flag to reverse order
start_vaddr = unsigned(int(cmd_options['-A'], 16))
if "-B" in cmd_options:
end_vaddr = unsigned(int(cmd_options['-B'], 16))
+ if "-F" in cmd_options:
+ start_vaddr = unsigned(int(cmd_options['-F'], 16))
+ end_vaddr = start_vaddr
if "-P" in cmd_options:
show_pager_info = True
if "-S" in cmd_options:
if "-A" in cmd_options:
all_tags = True
page_size = unsigned(kern.globals.page_size)
- nsites = unsigned(kern.globals.vm_allocation_tag_highest)
+ nsites = unsigned(kern.globals.vm_allocation_tag_highest) + 1
tagcounts = [0] * nsites
tagpeaks = [0] * nsites
tagmapped = [0] * nsites
total = 0
totalmapped = 0
- print " vm_allocation_tag_highest: {:<7d} ".format(nsites)
+ print " vm_allocation_tag_highest: {:<7d} ".format(nsites - 1)
print " {:<7s} {:>7s} {:>7s} {:>7s} {:<50s}".format("tag.kmod", "peak", "size", "mapped", "name")
for tag in range(nsites):
if all_tags or tagcounts[tag] or tagmapped[tag]:
if entry.is_sub_map == 1:
showmapvme(entry.backing.map, 0, 0, show_pager_info, show_all_shadows)
if entry.is_copy == 1:
- showmapcopyvme(entry.backing.copy, 0, 0, 0, show_pager_info, show_all_shadows, 0)
+ showmapcopyvme(entry.backing.copy, 0, 0, show_pager_info, show_all_shadows, 0)
if entry.is_sub_map == 0 and entry.is_copy == 0:
showvmobject(entry.backing.object, entry.offset, entry.size, show_pager_info, show_all_shadows)
if lp64 :
KDBG_TIMESTAMP_MASK = 0xffffffffffffffff
+ KDBG_CPU_SHIFT = 0
else :
KDBG_TIMESTAMP_MASK = 0x00ffffffffffffff
KDBG_CPU_SHIFT = 56
htab[min_kdbp].kd_prev_timebase += 1
e.timestamp = htab[min_kdbp].kd_prev_timebase & KDBG_TIMESTAMP_MASK
- e.timestamp |= (min_cpu << KDBG_CPU_SHIFT)
+ if not lp64:
+ e.timestamp |= (min_cpu << KDBG_CPU_SHIFT)
else :
htab[min_kdbp].kd_prev_timebase = earliest_time
else:
out_str += " - "
- if (unsigned(ledger.le_warn_level) != ledger_limit_infinity):
- out_str += "{:9d} ".format((unsigned(ledger.le_warn_level) * 100) / unsigned(ledger.le_limit))
+ if (unsigned(ledger.le_warn_percent) < 65535):
+ out_str += "{:9d} ".format(unsigned(ledger.le_warn_percent * 100. / 65536))
else:
out_str += " - "
def GetWaitqStateStr(waitq):
wq_types = {
0: 'INV',
- 1: '???',
+ 1: ' TS',
2: ' Q',
3: 'SET'
}
else:
print "{0: <#020x}".format(i)
-def iotrace_parse_Copt(Copt):
+def trace_parse_Copt(Copt):
"""Parses the -C option argument and returns a list of CPUs
"""
cpusOpt = Copt
return chosen_cpus
-@lldb_command('iotrace', 'C:N:S:RB')
-def IOTrace_cmd(cmd_args=[], cmd_options={}):
- """ Prints the iotrace ring buffers for all CPUs by default.
- Arguments:
- -B : Print backtraces for each ring entry
- -C <cpuSpec#>[,...,<cpuSpec#N>] : Limit trace entries to those generated by the specified CPUs (each cpuSpec can be a
- single CPU number or a range separated by a dash (e.g. "0-3"))
- -N <count> : Limit output to the first <count> entries (across all chosen CPUs)
- -R : Display results in reverse-sorted order (oldest first; default is newest-first)
- -S <sort_key_field_name> : Sort output by specified iotrace_entry_t field name (instead of by timestamp)
+IDX_CPU = 0
+IDX_RINGPOS = 1
+IDX_RINGENTRY = 2
+def Trace_cmd(cmd_args=[], cmd_options={}, headerString=lambda:"", entryString=lambda x:"", ring=[], entries_per_cpu=0, max_backtraces=0):
+ """Generic trace dumper helper function
"""
- IDX_CPU = 0
- IDX_RINGPOS = 1
- IDX_RINGENTRY = 2
- MAX_IOTRACE_BACKTRACES = 16
-
- if kern.arch != "x86_64":
- print "Sorry, iotrace is an x86-only command."
- return
if '-S' in cmd_options:
field_arg = cmd_options['-S']
try:
- getattr(kern.globals.iotrace_ring[0][0], field_arg)
+ getattr(ring[0][0], field_arg)
sort_key_field_name = field_arg
except AttributeError:
raise ArgumentError("Invalid sort key field name `%s'" % field_arg)
sort_key_field_name = 'start_time_abs'
if '-C' in cmd_options:
- chosen_cpus = iotrace_parse_Copt(cmd_options['-C'])
+ chosen_cpus = trace_parse_Copt(cmd_options['-C'])
else:
chosen_cpus = [x for x in range(kern.globals.real_ncpus)]
# the original ring index, and the iotrace entry.
entries = []
for x in chosen_cpus:
- ring_slice = [(x, y, kern.globals.iotrace_ring[x][y]) for y in range(kern.globals.iotrace_entries_per_cpu)]
+ ring_slice = [(x, y, ring[x][y]) for y in range(entries_per_cpu)]
entries.extend(ring_slice)
total_entries = len(entries)
else:
entries_to_display = total_entries
- print "%-19s %-8s %-10s %-20s SZ %-18s %-17s DATA" % (
- "START TIME",
- "DURATION",
- "CPU#[RIDX]",
- " TYPE",
- " VIRT ADDR",
- " PHYS ADDR")
+ print headerString()
for x in xrange(entries_to_display):
- print "%-20u(%6u) %6s[%02d] %-20s %d 0x%016x 0x%016x 0x%x" % (
- entries[x][IDX_RINGENTRY].start_time_abs,
- entries[x][IDX_RINGENTRY].duration,
- "CPU%d" % entries[x][IDX_CPU],
- entries[x][IDX_RINGPOS],
- str(entries[x][IDX_RINGENTRY].iotype).split("=")[1].strip(),
- entries[x][IDX_RINGENTRY].size,
- entries[x][IDX_RINGENTRY].vaddr,
- entries[x][IDX_RINGENTRY].paddr,
- entries[x][IDX_RINGENTRY].val)
+ print entryString(entries[x])
+
if backtraces:
- for btidx in range(MAX_IOTRACE_BACKTRACES):
+ for btidx in range(max_backtraces):
nextbt = entries[x][IDX_RINGENTRY].backtrace[btidx]
if nextbt == 0:
break
print "\t" + GetSourceInformationForAddress(nextbt)
+
+
+@lldb_command('iotrace', 'C:N:S:RB')
+def IOTrace_cmd(cmd_args=[], cmd_options={}):
+ """ Prints the iotrace ring buffers for all CPUs by default.
+ Arguments:
+ -B : Print backtraces for each ring entry
+ -C <cpuSpec#>[,...,<cpuSpec#N>] : Limit trace entries to those generated by the specified CPUs (each cpuSpec can be a
+ single CPU number or a range separated by a dash (e.g. "0-3"))
+ -N <count> : Limit output to the first <count> entries (across all chosen CPUs)
+ -R : Display results in reverse-sorted order (oldest first; default is newest-first)
+ -S <sort_key_field_name> : Sort output by specified iotrace_entry_t field name (instead of by timestamp)
+ """
+ MAX_IOTRACE_BACKTRACES = 16
+
+ if kern.arch != "x86_64":
+ print "Sorry, iotrace is an x86-only command."
+ return
+
+ hdrString = lambda : "%-19s %-8s %-10s %-20s SZ %-18s %-17s DATA" % (
+ "START TIME",
+ "DURATION",
+ "CPU#[RIDX]",
+ " TYPE",
+ " VIRT ADDR",
+ " PHYS ADDR")
+
+ entryString = lambda x : "%-20u(%6u) %6s[%02d] %-20s %-2d 0x%016x 0x%016x 0x%x" % (
+ x[IDX_RINGENTRY].start_time_abs,
+ x[IDX_RINGENTRY].duration,
+ "CPU%d" % x[IDX_CPU],
+ x[IDX_RINGPOS],
+ str(x[IDX_RINGENTRY].iotype).split("=")[1].strip(),
+ x[IDX_RINGENTRY].size,
+ x[IDX_RINGENTRY].vaddr,
+ x[IDX_RINGENTRY].paddr,
+ x[IDX_RINGENTRY].val)
+
+ Trace_cmd(cmd_args, cmd_options, hdrString, entryString, kern.globals.iotrace_ring, kern.globals.iotrace_entries_per_cpu, MAX_IOTRACE_BACKTRACES)
+
+
+@lldb_command('ttrace', 'C:N:S:RB')
+def TrapTrace_cmd(cmd_args=[], cmd_options={}):
+ """ Prints the iotrace ring buffers for all CPUs by default.
+ Arguments:
+ -B : Print backtraces for each ring entry
+ -C <cpuSpec#>[,...,<cpuSpec#N>] : Limit trace entries to those generated by the specified CPUs (each cpuSpec can be a
+ single CPU number or a range separated by a dash (e.g. "0-3"))
+ -N <count> : Limit output to the first <count> entries (across all chosen CPUs)
+ -R : Display results in reverse-sorted order (oldest first; default is newest-first)
+ -S <sort_key_field_name> : Sort output by specified traptrace_entry_t field name (instead of by timestamp)
+ """
+ MAX_TRAPTRACE_BACKTRACES = 8
+
+ if kern.arch != "x86_64":
+ print "Sorry, ttrace is an x86-only command."
+ return
+
+ hdrString = lambda : "%-30s CPU#[RIDX] VECT INTERRUPTED_THREAD PREMLV INTRLV INTERRUPTED_PC" % (
+ "START TIME (DURATION [ns])")
+ entryString = lambda x : "%-20u(%6s) %8s[%02d] 0x%02x 0x%016x %6d %6d %s" % (
+ x[IDX_RINGENTRY].start_time_abs,
+ str(x[IDX_RINGENTRY].duration) if hex(x[IDX_RINGENTRY].duration) != "0xffffffffffffffff" else 'inprog',
+ "CPU%d" % x[IDX_CPU],
+ x[IDX_RINGPOS],
+ int(x[IDX_RINGENTRY].vector),
+ x[IDX_RINGENTRY].curthread,
+ x[IDX_RINGENTRY].curpl,
+ x[IDX_RINGENTRY].curil,
+ GetSourceInformationForAddress(x[IDX_RINGENTRY].interrupted_pc))
+
+ Trace_cmd(cmd_args, cmd_options, hdrString, entryString, kern.globals.traptrace_ring,
+ kern.globals.traptrace_entries_per_cpu, MAX_TRAPTRACE_BACKTRACES)
#define CONSTRAINT_NANOS (20000000ll) /* 20 ms */
#define COMPUTATION_NANOS (10000000ll) /* 10 ms */
+#define RT_CHURN_COMP_NANOS ( 1000000ll) /* 1 ms */
#define TRACEWORTHY_NANOS (10000000ll) /* 10 ms */
#define TRACEWORTHY_NANOS_TEST ( 2000000ll) /* 2 ms */
static uint32_t g_priority = 0;
static uint32_t g_churn_pri = 0;
static uint32_t g_churn_count = 0;
+static uint32_t g_rt_churn_count = 0;
static pthread_t* g_churn_threads = NULL;
+static pthread_t* g_rt_churn_threads = NULL;
/* Threshold for dropping a 'bad run' tracepoint */
static uint64_t g_traceworthy_latency_ns = TRACEWORTHY_NANOS;
/* Test whether realtime threads are scheduled on the separate CPUs */
static boolean_t g_test_rt = FALSE;
+static boolean_t g_rt_churn = FALSE;
+
/* On SMT machines, test whether realtime threads are scheduled on the correct CPUs */
static boolean_t g_test_rt_smt = FALSE;
static semaphore_t g_leadersem;
static semaphore_t g_readysem;
static semaphore_t g_donesem;
+static semaphore_t g_rt_churn_sem;
+static semaphore_t g_rt_churn_start_sem;
/* Global variables (chain) */
static semaphore_t *g_semarr;
}
}
+/*
+ * Set policy
+ */
+static int
+rt_churn_thread_setup(void)
+{
+ kern_return_t kr;
+ thread_time_constraint_policy_data_t pol;
+
+ /* Hard-coded realtime parameters (similar to what Digi uses) */
+ pol.period = 100000;
+ pol.constraint = (uint32_t) nanos_to_abs(CONSTRAINT_NANOS * 2);
+ pol.computation = (uint32_t) nanos_to_abs(RT_CHURN_COMP_NANOS * 2);
+ pol.preemptible = 0; /* Ignored by OS */
+
+ kr = thread_policy_set(mach_thread_self(), THREAD_TIME_CONSTRAINT_POLICY,
+ (thread_policy_t) &pol, THREAD_TIME_CONSTRAINT_POLICY_COUNT);
+ mach_assert_zero_t(0, kr);
+
+ return 0;
+}
+
+static void *
+rt_churn_thread(__unused void *arg)
+{
+ rt_churn_thread_setup();
+
+ for (uint32_t i = 0; i < g_iterations; i++) {
+ kern_return_t kr = semaphore_wait_signal(g_rt_churn_start_sem, g_rt_churn_sem);
+ mach_assert_zero_t(0, kr);
+
+ volatile double x = 0.0;
+ volatile double y = 0.0;
+
+ uint64_t endspin = mach_absolute_time() + nanos_to_abs(RT_CHURN_COMP_NANOS);
+ while (mach_absolute_time() < endspin) {
+ y = y + 1.5 + x;
+ x = sqrt(y);
+ }
+ }
+
+ kern_return_t kr = semaphore_signal(g_rt_churn_sem);
+ mach_assert_zero_t(0, kr);
+
+ return NULL;
+}
+
+static void
+wait_for_rt_churn_threads(void)
+{
+ for (uint32_t i = 0; i < g_rt_churn_count; i++) {
+ kern_return_t kr = semaphore_wait(g_rt_churn_sem);
+ mach_assert_zero_t(0, kr);
+ }
+}
+
+static void
+start_rt_churn_threads(void)
+{
+ for (uint32_t i = 0; i < g_rt_churn_count; i++) {
+ kern_return_t kr = semaphore_signal(g_rt_churn_start_sem);
+ mach_assert_zero_t(0, kr);
+ }
+}
+
+static void
+create_rt_churn_threads(void)
+{
+ if (g_rt_churn_count == 0) {
+ /* Leave 1 CPU to ensure that the main thread can make progress */
+ g_rt_churn_count = g_numcpus - 1;
+ }
+
+ errno_t err;
+
+ struct sched_param param = { .sched_priority = (int)g_churn_pri };
+ pthread_attr_t attr;
+
+ /* Array for churn threads */
+ g_rt_churn_threads = (pthread_t*) valloc(sizeof(pthread_t) * g_rt_churn_count);
+ assert(g_rt_churn_threads);
+
+ if ((err = pthread_attr_init(&attr))) {
+ errc(EX_OSERR, err, "pthread_attr_init");
+ }
+
+ if ((err = pthread_attr_setschedparam(&attr, ¶m))) {
+ errc(EX_OSERR, err, "pthread_attr_setschedparam");
+ }
+
+ if ((err = pthread_attr_setschedpolicy(&attr, SCHED_RR))) {
+ errc(EX_OSERR, err, "pthread_attr_setschedpolicy");
+ }
+
+ for (uint32_t i = 0; i < g_rt_churn_count; i++) {
+ pthread_t new_thread;
+
+ if ((err = pthread_create(&new_thread, &attr, rt_churn_thread, NULL))) {
+ errc(EX_OSERR, err, "pthread_create");
+ }
+ g_rt_churn_threads[i] = new_thread;
+ }
+
+ if ((err = pthread_attr_destroy(&attr))) {
+ errc(EX_OSERR, err, "pthread_attr_destroy");
+ }
+
+ /* Wait until all threads have checked in */
+ wait_for_rt_churn_threads();
+}
+
+static void
+join_rt_churn_threads(void)
+{
+ /* Rejoin rt churn threads */
+ for (uint32_t i = 0; i < g_rt_churn_count; i++) {
+ errno_t err = pthread_join(g_rt_churn_threads[i], NULL);
+ if (err) {
+ errc(EX_OSERR, err, "pthread_join %d", i);
+ }
+ }
+}
+
/*
* Figure out what thread policy to use
*/
kr = semaphore_create(mach_task_self(), &g_readysem, SYNC_POLICY_FIFO, 0);
mach_assert_zero(kr);
+ kr = semaphore_create(mach_task_self(), &g_rt_churn_sem, SYNC_POLICY_FIFO, 0);
+ mach_assert_zero(kr);
+
+ kr = semaphore_create(mach_task_self(), &g_rt_churn_start_sem, SYNC_POLICY_FIFO, 0);
+ mach_assert_zero(kr);
+
atomic_store_explicit(&g_done_threads, 0, memory_order_relaxed);
/* Create the threads */
if (g_churn_pri) {
create_churn_threads();
}
+ if (g_rt_churn) {
+ create_rt_churn_threads();
+ }
/* Let everyone get settled */
kr = semaphore_wait(g_main_sem);
g_one_long_spin_id = (uint32_t)rand() % g_numthreads;
}
+ if (g_rt_churn) {
+ start_rt_churn_threads();
+ usleep(100);
+ }
+
debug_log("%d Main thread reset\n", i);
atomic_store_explicit(&g_done_threads, 0, memory_order_seq_cst);
assert(atomic_load_explicit(&g_done_threads, memory_order_relaxed) == g_numthreads);
+ if (g_rt_churn) {
+ wait_for_rt_churn_threads();
+ }
+
/*
* We report the worst latencies relative to start time
* and relative to the lead worker thread.
}
}
+ if (g_rt_churn) {
+ join_rt_churn_threads();
+ }
+
if (g_churn_pri) {
join_churn_threads();
}
OPT_PRIORITY,
OPT_CHURN_PRI,
OPT_CHURN_COUNT,
+ OPT_RT_CHURN_COUNT,
};
static struct option longopts[] = {
{ "priority", required_argument, NULL, OPT_PRIORITY },
{ "churn-pri", required_argument, NULL, OPT_CHURN_PRI },
{ "churn-count", required_argument, NULL, OPT_CHURN_COUNT },
+ { "rt-churn-count", required_argument, NULL, OPT_RT_CHURN_COUNT },
{ "switched_apptype", no_argument, (int*)&g_seen_apptype, TRUE },
{ "spin-one", no_argument, (int*)&g_do_one_long_spin, TRUE },
{ "spin-all", no_argument, (int*)&g_do_all_spin, TRUE },
{ "test-rt", no_argument, (int*)&g_test_rt, TRUE },
{ "test-rt-smt", no_argument, (int*)&g_test_rt_smt, TRUE },
{ "test-rt-avoid0", no_argument, (int*)&g_test_rt_avoid0, TRUE },
+ { "rt-churn", no_argument, (int*)&g_rt_churn, TRUE },
{ "histogram", no_argument, (int*)&g_histogram, TRUE },
{ "verbose", no_argument, (int*)&g_verbose, TRUE },
{ "help", no_argument, NULL, 'h' },
case OPT_CHURN_COUNT:
g_churn_count = read_dec_arg();
break;
+ case OPT_RT_CHURN_COUNT:
+ g_rt_churn_count = read_dec_arg();
+ break;
case '?':
case 'h':
default:
--- /dev/null
+#!/usr/local/bin/recon
+
+local ktrace = require 'ktrace'
+
+if not arg[1] or arg[1] == '-h' then
+ print[[
+usage: ktruss <syscall-name> [<more-names> ...]
+
+Use Kernel TRace to print User Space Syscalls (ktruss).]]
+ os.exit(arg[1] == nil)
+end
+
+local sess = ktrace.Session.new()
+
+for i = 1, #arg do
+ sess:add_callback_pair('BSC_' .. arg[i], function (start, finish)
+ print(('%s[%d]: %s(0x%x, 0x%x, 0x%x, 0x%x) -> %d'):format(
+ sess:procname_for_threadid(start.threadid),
+ sess:pid_for_threadid(start.threadid), arg[1], start[1], start[2],
+ start[3], start[4], finish[2]))
+ end)
+end
+
+local ok, err = sess:start()
+if not ok then
+ io.stderr:write('tracing failed: ', err, '\n')
+ os.exit(1)
+end