bsd/dev/arm64/cpu_in_cksum.s standard
+#if defined(KERNEL_INTEGRITY_CTRR)
+bsd/tests/ctrr_test_sysctl.c optional config_xnupost
+#endif /* defined(KERNEL_INTEGRITY_CTRR) */
bsd/dev/arm64/dtrace_isa.c optional config_dtrace
bsd/dev/arm64/dtrace_subr_arm.c optional config_dtrace
extern lck_attr_t *dtrace_lck_attr;
extern lck_grp_t *dtrace_lck_grp;
+#if XNU_MONITOR
+extern void * pmap_stacks_start;
+extern void * pmap_stacks_end;
+#endif
struct frame {
struct frame *backchain;
}
}
+#if XNU_MONITOR
+static inline boolean_t
+dtrace_frame_in_ppl_stack(struct frame * fp)
+{
+ return ((void *)fp >= pmap_stacks_start) &&
+ ((void *)fp < pmap_stacks_end);
+}
+#endif
void
dtrace_getpcstack(pc_t * pcstack, int pcstack_limit, int aframes,
struct frame *nextfp, *minfp, *stacktop;
int depth = 0;
int on_intr;
+#if XNU_MONITOR
+ int on_ppl_stack;
+#endif
int last = 0;
uintptr_t pc;
uintptr_t caller = CPU->cpu_dtrace_caller;
if ((on_intr = CPU_ON_INTR(CPU)) != 0) {
stacktop = (struct frame *) dtrace_get_cpu_int_stack_top();
}
+#if XNU_MONITOR
+ else if ((on_ppl_stack = dtrace_frame_in_ppl_stack(fp))) {
+ stacktop = (struct frame *) pmap_stacks_end;
+ }
+#endif
else {
stacktop = (struct frame *) (dtrace_get_kernel_stack(current_thread()) + kernel_stack_size);
}
if (arm_kern_regs) {
nextfp = (struct frame *)(saved_state64(arm_kern_regs)->fp);
+#if XNU_MONITOR
+ on_ppl_stack = dtrace_frame_in_ppl_stack(nextfp);
+
+ if (on_ppl_stack) {
+ minfp = pmap_stacks_start;
+ stacktop = pmap_stacks_end;
+ } else
+#endif
{
vm_offset_t kstack_base = dtrace_get_kernel_stack(current_thread());
last = 1;
}
} else {
+#if XNU_MONITOR
+ if ((!on_ppl_stack) && dtrace_frame_in_ppl_stack(nextfp)) {
+ /*
+ * We are switching from the kernel stack
+ * to the PPL stack.
+ */
+ on_ppl_stack = 1;
+ minfp = pmap_stacks_start;
+ stacktop = pmap_stacks_end;
+ } else if (on_ppl_stack) {
+ /*
+ * We could be going from the PPL stack
+ * to the kernel stack.
+ */
+ vm_offset_t kstack_base = dtrace_get_kernel_stack(current_thread());
+
+ minfp = (struct frame *)kstack_base;
+ stacktop = (struct frame *)(kstack_base + kernel_stack_size);
+
+ if (nextfp <= minfp || nextfp >= stacktop) {
+ last = 1;
+ }
+ } else
+#endif
{
/*
* This is the last frame we can process; indicate
0, 0, sysctl_wake_conttime, "I",
"Continuous Time at the last wakeup");
+#if defined(HAS_IPI)
+static int
+cpu_signal_deferred_timer(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
+{
+ int new_value = 0;
+ int changed = 0;
+
+ int old_value = (int)ml_cpu_signal_deferred_get_timer();
+
+ int error = sysctl_io_number(req, old_value, sizeof(int), &new_value, &changed);
+
+ if (error == 0 && changed) {
+ ml_cpu_signal_deferred_adjust_timer((uint64_t)new_value);
+ }
+
+ return error;
+}
+
+SYSCTL_PROC(_machdep, OID_AUTO, deferred_ipi_timeout,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
+ 0, 0,
+ cpu_signal_deferred_timer, "I", "Deferred IPI timeout (nanoseconds)");
+
+#endif /* defined(HAS_IPI) */
/*
* For source compatibility, here's some machdep.cpu mibs that
#include <security/audit/audit.h>
+#if CONFIG_MACF
+#include <security/mac_framework.h>
+#endif /* CONFIG_MACF */
+
#if CONFIG_CSR
#include <sys/codesign.h>
#include <sys/csr.h>
* coredump_flags Extra options (ignore rlimit, run fsync)
*
* Returns: 0 Success
- * EFAULT Failed
+ * !0 Failure errno
*
* IMPORTANT: This function can only be called on the current process, due
* to assumptions below; see variable declaration section for
int error1 = 0;
char stack_name[MAXCOMLEN + 6];
char *alloced_name = NULL;
- char *name;
+ char *name = NULL;
mythread_state_flavor_t flavors[MAX_TSTATE_FLAVORS];
vm_size_t mapsize;
int i;
((sugid_coredump == 0) && /* Not dumping SUID/SGID binaries */
((kauth_cred_getsvuid(cred) != kauth_cred_getruid(cred)) ||
(kauth_cred_getsvgid(cred) != kauth_cred_getrgid(cred))))) {
-#if CONFIG_AUDIT
- audit_proc_coredump(core_proc, NULL, EFAULT);
-#endif
- return EFAULT;
+ error = EFAULT;
+ goto out2;
+ }
+
+#if CONFIG_MACF
+ error = mac_proc_check_dump_core(core_proc);
+ if (error != 0) {
+ goto out2;
}
+#endif
#if CONFIG_CSR
/* If the process is restricted, CSR isn't configured to allow
if (cs_restricted(core_proc) &&
csr_check(CSR_ALLOW_TASK_FOR_PID) &&
csr_check(CSR_ALLOW_APPLE_INTERNAL)) {
-#if CONFIG_AUDIT
- audit_proc_coredump(core_proc, NULL, EFAULT);
-#endif
- return EFAULT;
+ error = EPERM;
+ goto out2;
}
#endif
if (((coredump_flags & COREDUMP_IGNORE_ULIMIT) == 0) &&
(mapsize >= core_proc->p_rlimit[RLIMIT_CORE].rlim_cur)) {
- return EFAULT;
+ error = EFAULT;
+ goto out2;
}
(void) task_suspend_internal(task);
* - Raise the jetsam threshold ("clear-the-deck")
* - Enabled parallel jetsam on eligible devices
*/
+#if __AMP__
+int fast_jetsam_enabled = 1;
+#else /* __AMP__ */
int fast_jetsam_enabled = 0;
+#endif /* __AMP__ */
/* Routine to find the jetsam state structure for the current jetsam thread */
static inline struct jetsam_thread_state *
(void *)PERFCONTROL_STAT_CYCLES, PERFCONTROL_CALLOUT_STATE_UPDATE,
sysctl_perfcontrol_callout_stat, "I", "");
+#if __AMP__
+extern int sched_amp_idle_steal;
+SYSCTL_INT(_kern, OID_AUTO, sched_amp_idle_steal,
+ CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED,
+ &sched_amp_idle_steal, 0, "");
+extern int sched_amp_spill_steal;
+SYSCTL_INT(_kern, OID_AUTO, sched_amp_spill_steal,
+ CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED,
+ &sched_amp_spill_steal, 0, "");
+extern int sched_amp_spill_count;
+SYSCTL_INT(_kern, OID_AUTO, sched_amp_spill_count,
+ CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED,
+ &sched_amp_spill_count, 0, "");
+extern int sched_amp_spill_deferred_ipi;
+SYSCTL_INT(_kern, OID_AUTO, sched_amp_spill_deferred_ipi,
+ CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED,
+ &sched_amp_spill_deferred_ipi, 0, "");
+extern int sched_amp_pcores_preempt_immediate_ipi;
+SYSCTL_INT(_kern, OID_AUTO, sched_amp_pcores_preempt_immediate_ipi,
+ CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED,
+ &sched_amp_pcores_preempt_immediate_ipi, 0, "");
+#endif /* __AMP__ */
#endif /* __arm__ || __arm64__ */
#if __arm64__
}
if (uap->opt & RB_PANIC && uap->msg != USER_ADDR_NULL) {
- if (copyinstr(uap->msg, (void *)message, sizeof(message), (size_t *)&dummy)) {
+ int copy_error = copyinstr(uap->msg, (void *)message, sizeof(message), (size_t *)&dummy);
+ if (copy_error != 0 && copy_error != ENAMETOOLONG) {
strncpy(message, "user space RB_PANIC message copyin failed", sizeof(message) - 1);
+ } else {
+ message[sizeof(message) - 1] = '\0';
}
}
return rv;
}
-#if (MAC_POLICY_OPS_VERSION != 58)
+#if (MAC_POLICY_OPS_VERSION != 59)
# error "struct mac_policy_ops doesn't match definition in mac_policy.h"
#endif
/*
CHECK_SET_HOOK(proc_check_setlcid)
CHECK_SET_HOOK(proc_check_signal)
CHECK_SET_HOOK(proc_check_wait)
+ CHECK_SET_HOOK(proc_check_dump_core)
.mpo_reserved5 = (mpo_reserved_hook_t *)common_hook,
- .mpo_reserved6 = (mpo_reserved_hook_t *)common_hook,
CHECK_SET_HOOK(socket_check_accept)
CHECK_SET_HOOK(socket_check_accepted)
#endif /* CONFIG_MACH_BRIDGE_RECV_TIME */
#if DEVELOPMENT || DEBUG
+#if __AMP__
+#include <pexpert/pexpert.h>
+extern int32_t sysctl_get_bound_cpuid(void);
+extern void sysctl_thread_bind_cpuid(int32_t cpuid);
+static int
+sysctl_kern_sched_thread_bind_cpu SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+
+ if (!PE_parse_boot_argn("enable_skstb", NULL, 0)) {
+ return ENOENT;
+ }
+
+ int32_t cpuid = sysctl_get_bound_cpuid();
+
+ int32_t new_value;
+ int changed;
+ int error = sysctl_io_number(req, cpuid, sizeof cpuid, &new_value, &changed);
+ if (error) {
+ return error;
+ }
+
+ if (changed) {
+ sysctl_thread_bind_cpuid(new_value);
+ }
+
+ return error;
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, sched_thread_bind_cpu, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
+ 0, 0, sysctl_kern_sched_thread_bind_cpu, "I", "");
+
+extern char sysctl_get_bound_cluster_type(void);
+extern void sysctl_thread_bind_cluster_type(char cluster_type);
+static int
+sysctl_kern_sched_thread_bind_cluster_type SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+ char buff[4];
+
+ if (!PE_parse_boot_argn("enable_skstb", NULL, 0)) {
+ return ENOENT;
+ }
+
+ int error = SYSCTL_IN(req, buff, 1);
+ if (error) {
+ return error;
+ }
+ char cluster_type = buff[0];
+
+ if (!req->newptr) {
+ goto out;
+ }
+
+ sysctl_thread_bind_cluster_type(cluster_type);
+out:
+ cluster_type = sysctl_get_bound_cluster_type();
+ buff[0] = cluster_type;
+
+ return SYSCTL_OUT(req, buff, 1);
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, sched_thread_bind_cluster_type, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED,
+ 0, 0, sysctl_kern_sched_thread_bind_cluster_type, "A", "");
+
+extern char sysctl_get_task_cluster_type(void);
+extern void sysctl_task_set_cluster_type(char cluster_type);
+static int
+sysctl_kern_sched_task_set_cluster_type SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+ char buff[4];
+
+ if (!PE_parse_boot_argn("enable_skstsct", NULL, 0)) {
+ return ENOENT;
+ }
+
+ int error = SYSCTL_IN(req, buff, 1);
+ if (error) {
+ return error;
+ }
+ char cluster_type = buff[0];
+
+ if (!req->newptr) {
+ goto out;
+ }
+
+ sysctl_task_set_cluster_type(cluster_type);
+out:
+ cluster_type = sysctl_get_task_cluster_type();
+ buff[0] = cluster_type;
+
+ return SYSCTL_OUT(req, buff, 1);
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, sched_task_set_cluster_type, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED,
+ 0, 0, sysctl_kern_sched_task_set_cluster_type, "A", "");
+#endif /* __AMP__ */
#endif /* DEVELOPMENT || DEBUG */
extern uint32_t task_exc_guard_default;
#include <net/if_llatbl.h>
#include <net/net_api_stats.h>
#include <net/if_ports_used.h>
+#include <net/if_vlan_var.h>
#include <netinet/in.h>
#if INET
#include <netinet/in_var.h>
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
+extern void if_headless_init(void);
+
void
if_headless_init(void)
{
#define IPSEC_FLAGS_KPIPE_ALLOCATED 1
/* data movement refcounting functions */
-static boolean_t ipsec_data_move_begin(struct ipsec_pcb *pcb);
-static void ipsec_data_move_end(struct ipsec_pcb *pcb);
static void ipsec_wait_data_move_drain(struct ipsec_pcb *pcb);
/* Data path states */
bpfattach(pcb->ipsec_ifp, DLT_NULL, 0);
}
+#if IPSEC_NEXUS
/*
* Mark the data path as ready.
* If kpipe nexus is being used then the data path is marked ready only when a kpipe channel is connected.
IPSEC_SET_DATA_PATH_READY(pcb);
lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
}
+#endif
/* The interfaces resoures allocated, mark it as running */
ifnet_set_flags(pcb->ipsec_ifp, IFF_RUNNING, IFF_RUNNING);
}
}
-static boolean_t
-ipsec_data_move_begin(struct ipsec_pcb *pcb)
-{
- boolean_t ret = 0;
-
- lck_mtx_lock_spin(&pcb->ipsec_pcb_data_move_lock);
- if ((ret = IPSEC_IS_DATA_PATH_READY(pcb))) {
- pcb->ipsec_pcb_data_move++;
- }
- lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
-
- return ret;
-}
-
-static void
-ipsec_data_move_end(struct ipsec_pcb *pcb)
-{
- lck_mtx_lock_spin(&pcb->ipsec_pcb_data_move_lock);
- VERIFY(pcb->ipsec_pcb_data_move > 0);
- /*
- * if there's no more thread moving data, wakeup any
- * drainers that's blocked waiting for this.
- */
- if (--pcb->ipsec_pcb_data_move == 0 && pcb->ipsec_pcb_drainers > 0) {
- wakeup(&(pcb->ipsec_pcb_data_move));
- }
- lck_mtx_unlock(&pcb->ipsec_pcb_data_move_lock);
-}
static void
ipsec_data_move_drain(struct ipsec_pcb *pcb)
struct flow_divert_trie new_trie;
int insert_error = 0;
size_t nodes_mem_size;
- int prefix_count = 0;
+ int prefix_count = -1;
int signing_id_count = 0;
size_t trie_memory_size = 0;
memset(&new_trie, 0, sizeof(new_trie));
/* Get the number of shared prefixes in the new set of signing ID strings */
- flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_PREFIX_COUNT, sizeof(prefix_count), &prefix_count, NULL);
+ error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_PREFIX_COUNT, sizeof(prefix_count), &prefix_count, NULL);
- if (prefix_count < 0) {
+ if (prefix_count < 0 || error) {
+ FDLOG(LOG_ERR, &nil_pcb, "Invalid prefix count (%d) or an error occurred while reading the prefix count: %d", prefix_count, error);
lck_rw_done(&group->lck);
return;
}
cursor >= 0;
cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) {
uint32_t sid_size = 0;
- flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
+ error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
+ if (error || sid_size == 0) {
+ FDLOG(LOG_ERR, &nil_pcb, "Failed to get the length of the signing identifier at offset %d: %d", cursor, error);
+ signing_id_count = 0;
+ break;
+ }
new_trie.bytes_count += sid_size;
signing_id_count++;
}
new_trie.bytes = (uint8_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size + child_maps_mem_size);
new_trie.bytes_free_next = 0;
+ memset(new_trie.bytes, 0, bytes_mem_size);
/* The root is an empty node */
new_trie.root = trie_node_alloc(&new_trie);
cursor >= 0;
cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) {
uint32_t sid_size = 0;
- flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
+ error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size);
+ if (error || sid_size == 0) {
+ FDLOG(LOG_ERR, &nil_pcb, "Failed to get the length of the signing identifier at offset %d while building: %d", cursor, error);
+ insert_error = EINVAL;
+ break;
+ }
if (new_trie.bytes_free_next + sid_size <= new_trie.bytes_count) {
uint16_t new_node_idx;
- flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, &TRIE_BYTE(&new_trie, new_trie.bytes_free_next), NULL);
+ error = flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, &TRIE_BYTE(&new_trie, new_trie.bytes_free_next), NULL);
+ if (error) {
+ FDLOG(LOG_ERR, &nil_pcb, "Failed to read the signing identifier at offset %d: %d", cursor, error);
+ insert_error = EINVAL;
+ break;
+ }
new_node_idx = flow_divert_trie_insert(&new_trie, new_trie.bytes_free_next, sid_size);
if (new_node_idx == NULL_TRIE_IDX) {
insert_error = EINVAL;
}
if (sbappendaddr(&so->so_rcv, SA(dst), NULL, m_mtu, NULL) == 0) {
- m_freem(m_mtu);
- /* XXX: should count statistics */
- } else {
- sorwakeup(so);
+ return;
}
+ sorwakeup(so);
}
/*
{
error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &np->n_vnode);
}
-notsup:
if (error) {
FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
nfs_node_unlock(np);
{
vnode_t vp = ap->a_vp;
nfsnode_t np = VTONFS(vp);
- vfs_context_t ctx = ap->a_context;
struct nfs_open_file *nofp, *nextnofp;
struct nfs_file_lock *nflp, *nextnflp;
struct nfs_lock_owner *nlop, *nextnlop;
*/
int
nfsm_chain_add_v3sattr_f(
- struct nfsmount *nmp,
+ __unused struct nfsmount *nmp,
struct nfsm_chain *nmc,
struct vnode_attr *vap)
{
}
int
-nfsm_rpchead2(struct nfsmount *nmp, int sotype, int prog, int vers, int proc, int auth_type,
+nfsm_rpchead2(__unused struct nfsmount *nmp, int sotype, int prog, int vers, int proc, int auth_type,
kauth_cred_t cred, struct nfsreq *req, mbuf_t mrest, u_int64_t *xidp, mbuf_t *mreqp)
{
mbuf_t mreq, mb;
*/
int
nfs_parsefattr(
- struct nfsmount *nmp,
+ __unused struct nfsmount *nmp,
struct nfsm_chain *nmc,
int nfsvers,
struct nfs_vattr *nvap)
* Return an NFS volume name from the mntfrom name.
*/
static void
-nfs_get_volname(struct mount *mp, char *volname, size_t len, vfs_context_t ctx)
+nfs_get_volname(struct mount *mp, char *volname, size_t len, __unused vfs_context_t ctx)
{
const char *ptr, *cptr;
const char *mntfrom = mp->mnt_vfsstat.f_mntfromname;
- struct nfsmount *nmp = VFSTONFS(mp);
size_t mflen;
struct nfs_open_file *nofp,
uint32_t accessMode,
uint32_t denyMode,
- vfs_context_t ctx)
+ __unused vfs_context_t ctx)
{
#if CONFIG_NFS4
struct nfs_lock_owner *nlop;
return error;
}
-static int
-nfs_parse_user_access(
- mount_t mp,
- enum vtype type)
-{
- int user_access = R_OK;
- if ((vfs_flags(mp) & MNT_RDONLY) == 0) {
- user_access |= W_OK;
- }
- if (type == VDIR) {
- user_access |= X_OK;
- }
- return user_access;
-}
/*
* NFS getattr call from vfs.
return fixedpri_rv;
}
+
return 0;
}
#endif
kern_return_t kalloc_test(void);
kern_return_t ipi_test(void);
+#if defined(KERNEL_INTEGRITY_CTRR)
+extern kern_return_t ctrr_test(void);
+#endif
#if __ARM_PAN_AVAILABLE__
extern kern_return_t arm64_late_pan_test(void);
#endif
#ifdef __arm64__
XNUPOST_TEST_CONFIG_BASIC(arm64_lock_test),
#endif
+#if defined(KERNEL_INTEGRITY_CTRR)
+ XNUPOST_TEST_CONFIG_BASIC(ctrr_test),
+#endif
#if __ARM_PAN_AVAILABLE__
XNUPOST_TEST_CONFIG_BASIC(arm64_late_pan_test),
#endif
#include <sys/sysctl.h>
+#if defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST)
+extern kern_return_t ctrr_test(void);
+
+static int
+sysctl_run_ctrr_test(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
+{
+ unsigned int dummy;
+ int error, changed;
+ error = sysctl_io_number(req, 0, sizeof(dummy), &dummy, &changed);
+ if (error || !changed) {
+ return error;
+ }
+ return ctrr_test();
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, run_ctrr_test,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
+ 0, 0, sysctl_run_ctrr_test, "I", "");
+#endif /* defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST) */
return error;
}
mp = nd.ni_vp->v_mount;
+ mount_ref(mp, 0);
vnode_put(nd.ni_vp);
nameidone(&nd);
break;
} /* switch */
+ mount_drop(mp, 0);
return error;
}
#else
-19.2.0
+19.3.0
# The first line of this file contains the master version number for the kernel.
# All other instances of the kernel version in xnu are derived from this file.
replyMsg.m.msg64.notifyHdr.size = sizeof(IOAsyncCompletionContent)
+ numArgs * sizeof(io_user_reference_t);
replyMsg.m.msg64.notifyHdr.type = kIOAsyncCompletionNotificationType;
- bcopy(reference, replyMsg.m.msg64.notifyHdr.reference, sizeof(OSAsyncReference64));
+ /* Copy reference except for reference[0], which is left as 0 from the earlier bzero */
+ bcopy(&reference[1], &replyMsg.m.msg64.notifyHdr.reference[1], sizeof(OSAsyncReference64) - sizeof(reference[0]));
replyMsg.m.msg64.asyncContent.result = result;
if (numArgs) {
+ numArgs * sizeof(uint32_t);
replyMsg.m.msg32.notifyHdr.type = kIOAsyncCompletionNotificationType;
- for (idx = 0; idx < kOSAsyncRefCount; idx++) {
+ /* Skip reference[0] which is left as 0 from the earlier bzero */
+ for (idx = 1; idx < kOSAsyncRefCount; idx++) {
replyMsg.m.msg32.notifyHdr.reference[idx] = REF32(reference[idx]);
}
boolean_t up_style_idle_exit = 0;
+#if HAS_NEX_PG
+uint32_t nex_pg = 1;
+extern void set_nex_pg(void);
+#endif
+#if HAS_BP_RET
+/* Enable both branch target retention (0x2) and branch direction retention (0x1) across sleep */
+uint32_t bp_ret = 3;
+extern void set_bp_ret(void);
+#endif
#if INTERRUPT_MASKED_DEBUG
boolean_t interrupt_masked_debug = 1;
PE_parse_boot_argn("interrupt_masked_debug_timeout", &interrupt_masked_timeout, sizeof(interrupt_masked_timeout));
#endif
+#if HAS_NEX_PG
+ PE_parse_boot_argn("nexpg", &nex_pg, sizeof(nex_pg));
+ set_nex_pg(); // Apply NEX powergating settings to boot CPU
+#endif
+#if HAS_BP_RET
+ PE_parse_boot_argn("bpret", &bp_ret, sizeof(bp_ret));
+ set_bp_ret(); // Apply branch predictor retention settings to boot CPU
+#endif
PE_parse_boot_argn("immediate_NMI", &force_immediate_debug_halt, sizeof(force_immediate_debug_halt));
mt_wake_per_core();
#endif /* MONOTONIC && defined(__arm64__) */
+#if defined(KERNEL_INTEGRITY_CTRR)
+ if (cpu_data_ptr->cluster_master) {
+ lck_spin_lock(&ctrr_cpu_start_lck);
+ ctrr_cluster_locked[cpu_data_ptr->cpu_cluster_id] = 1;
+ thread_wakeup(&ctrr_cluster_locked[cpu_data_ptr->cpu_cluster_id]);
+ lck_spin_unlock(&ctrr_cpu_start_lck);
+ }
+#endif
slave_main(NULL);
}
#define OS_ATOMIC_USE_LLSC 0
#endif
+#if defined(__ARM_ARCH_8_4__) && defined(__arm64__)
+/* on armv8.4 16-byte aligned load/store pair is atomic */
+#undef os_atomic_load_is_plain
+#define os_atomic_load_is_plain(p) \
+ (sizeof(*(p)) <= 16 && _Alignof(typeof(*(p))) >= sizeof(*(p)))
+#endif
/*
* On armv7 & arm64, we do provide fine grained dependency injection, so
cpu_data_ptr->cpu_CLW_active = 0x1UL;
#endif
+#if !XNU_MONITOR
pmap_cpu_data_t * pmap_cpu_data_ptr = &cpu_data_ptr->cpu_pmap_cpu_data;
pmap_cpu_data_ptr->cpu_user_pmap = (struct pmap *) NULL;
for (i = 0; i < (sizeof(pmap_cpu_data_ptr->cpu_asid_high_bits) / sizeof(*pmap_cpu_data_ptr->cpu_asid_high_bits)); i++) {
pmap_cpu_data_ptr->cpu_asid_high_bits[i] = 0;
}
+#endif
cpu_data_ptr->halt_status = CPU_NOT_HALTED;
}
cpu_data_ptr = CpuDataEntries[cpu].cpu_data_vaddr;
cpu_data_ptr->cpu_reset_handler = (vm_offset_t) start_cpu_paddr;
+#if !XNU_MONITOR
cpu_data_ptr->cpu_pmap_cpu_data.cpu_user_pmap = NULL;
+#endif
if (cpu_data_ptr->cpu_processor->startup_thread != THREAD_NULL) {
first_thread = cpu_data_ptr->cpu_processor->startup_thread;
boolean_t idle_enable = FALSE;
uint64_t wake_abstime = 0x0ULL;
+#if defined(HAS_IPI)
+extern unsigned int gFastIPI;
+#endif /* defined(HAS_IPI) */
cpu_data_t *
cpu_datap(int cpu)
if (!(target_proc->cpu_signal & SIGPdisabled)) {
if (defer) {
+#if defined(HAS_IPI)
+ if (gFastIPI) {
+ ml_cpu_signal_deferred(target_proc->cpu_phys_id);
+ } else {
+ PE_cpu_signal_deferred(getCpuDatap()->cpu_id, target_proc->cpu_id);
+ }
+#else
PE_cpu_signal_deferred(getCpuDatap()->cpu_id, target_proc->cpu_id);
+#endif /* defined(HAS_IPI) */
} else {
+#if defined(HAS_IPI)
+ if (gFastIPI) {
+ ml_cpu_signal(target_proc->cpu_phys_id);
+ } else {
+ PE_cpu_signal(getCpuDatap()->cpu_id, target_proc->cpu_id);
+ }
+#else
PE_cpu_signal(getCpuDatap()->cpu_id, target_proc->cpu_id);
+#endif /* defined(HAS_IPI) */
}
}
{
/* TODO: Should we care about the state of a core as far as squashing deferred IPIs goes? */
if (!(target_proc->cpu_signal & SIGPdisabled)) {
+#if defined(HAS_IPI)
+ if (gFastIPI) {
+ ml_cpu_signal_retract(target_proc->cpu_phys_id);
+ } else {
+ PE_cpu_signal_cancel(getCpuDatap()->cpu_id, target_proc->cpu_id);
+ }
+#else
PE_cpu_signal_cancel(getCpuDatap()->cpu_id, target_proc->cpu_id);
+#endif /* defined(HAS_IPI) */
}
}
uint32_t cpu_l3_id;
uint32_t cpu_l3_size;
+#if !XNU_MONITOR
struct pmap_cpu_data cpu_pmap_cpu_data;
+#endif
dbgwrap_thread_state_t halt_state;
enum {
CPU_NOT_HALTED = 0,
extern void arm64_ipi_test(void);
#endif /* defined(CONFIG_XNUPOST) && __arm64__ */
+#if defined(KERNEL_INTEGRITY_CTRR)
+extern void init_ctrr_cpu_start_lock(void);
+extern lck_spin_t ctrr_cpu_start_lck;
+extern bool ctrr_cluster_locked[__ARM_CLUSTER_COUNT__];
+#endif /* defined(KERNEL_INTEGRITY_CTRR) */
#endif /* _ARM_CPU_INTERNAL_H_ */
case CPU_PART_TEMPEST_ARUBA:
cpufamily = CPUFAMILY_ARM_VORTEX_TEMPEST;
break;
+#ifndef RC_HIDE_XNU_LIGHTNING
+ case CPU_PART_LIGHTNING:
+ case CPU_PART_THUNDER:
+ cpufamily = CPUFAMILY_ARM_LIGHTNING_THUNDER;
+ break;
+#endif /* !RC_HIDE_XNU_LIGHTNING */
default:
cpufamily = CPUFAMILY_UNKNOWN;
break;
/* H11G e-Core (ARMv8 architecture) */
#define CPU_PART_TEMPEST_ARUBA 0x11
+#ifndef RC_HIDE_XNU_LIGHTNING
+/* H12 p-Core (ARMv8 architecture) */
+#define CPU_PART_LIGHTNING 0x12
+
+/* H12 e-Core (ARMv8 architecture) */
+#define CPU_PART_THUNDER 0x13
+
+#endif /* !RC_HIDE_XNU_LIGHTNING */
/* Cache type identification */
#if __arm64__
.section __DATA, __const
-#if defined(KERNEL_INTEGRITY_KTRR)
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
/* reserve space for read only page tables */
.align 14
LEXT(ropagetable_begin)
.space 14*16*1024,0
#else
LEXT(ropagetable_begin)
-#endif /* defined(KERNEL_INTEGRITY_KTRR)*/
+#endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
LEXT(ropagetable_end)
uintptr_t arg2, uintptr_t arg3);
#endif /* MONITOR */
-#if defined(KERNEL_INTEGRITY_KTRR)
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
void rorgn_stash_range(void);
void rorgn_lockdown(void);
-#endif /* defined(KERNEL_INTEGRITY_KTRR)*/
+#endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
#if __ARM_KERNEL_PROTECT__
extern void set_vbar_el1(uint64_t);
extern uint64_t interrupt_masked_timeout;
#endif
+#if !HAS_CONTINUOUS_HWCLOCK
extern uint64_t mach_absolutetime_asleep;
+#else
+extern uint64_t wake_abstime;
+static uint64_t wake_conttime = UINT64_MAX;
+#endif
static void
sched_perfcontrol_oncore_default(perfcontrol_state_t new_thread_state __unused, going_on_core_t on __unused)
uint64_t
ml_get_conttime_offset(void)
{
+#if HAS_CONTINUOUS_HWCLOCK
+ return 0;
+#else
return rtclock_base_abstime + mach_absolutetime_asleep;
+#endif
}
uint64_t
ml_get_time_since_reset(void)
{
+#if HAS_CONTINUOUS_HWCLOCK
+ if (wake_conttime == UINT64_MAX) {
+ return UINT64_MAX;
+ } else {
+ return mach_continuous_time() - wake_conttime;
+ }
+#else
/* The timebase resets across S2R, so just return the raw value. */
return ml_get_hwclock();
+#endif
}
void
ml_set_reset_time(__unused uint64_t wake_time)
{
+#if HAS_CONTINUOUS_HWCLOCK
+ wake_conttime = wake_time;
+#endif
}
uint64_t
ml_get_conttime_wake_time(void)
{
+#if HAS_CONTINUOUS_HWCLOCK
+ /*
+ * For now, we will reconstitute the timebase value from
+ * cpu_timebase_init and use it as the wake time.
+ */
+ return wake_abstime - ml_get_abstime_offset();
+#else /* HAS_CONTINOUS_HWCLOCK */
/* The wake time is simply our continuous time offset. */
return ml_get_conttime_offset();
+#endif /* HAS_CONTINOUS_HWCLOCK */
}
/*
#endif /* DEVELOPMENT || DEBUG */
+#if XNU_MONITOR
+/*
+ * PPL External References.
+ */
+extern vm_offset_t segPPLDATAB;
+extern unsigned long segSizePPLDATA;
+extern vm_offset_t segPPLTEXTB;
+extern unsigned long segSizePPLTEXT;
+#if __APRR_SUPPORTED__
+extern vm_offset_t segPPLTRAMPB;
+extern unsigned long segSizePPLTRAMP;
+extern void ppl_trampoline_start;
+extern void ppl_trampoline_end;
+#endif
+extern vm_offset_t segPPLDATACONSTB;
+extern unsigned long segSizePPLDATACONST;
+
+
+/*
+ * PPL Global Variables
+ */
+
+#if (DEVELOPMENT || DEBUG)
+/* Indicates if the PPL will enforce mapping policies; set by -unsafe_kernel_text */
+SECURITY_READ_ONLY_LATE(boolean_t) pmap_ppl_disable = FALSE;
+#else
+const boolean_t pmap_ppl_disable = FALSE;
+#endif
+
+/* Indicates if the PPL has started applying APRR. */
+boolean_t pmap_ppl_locked_down MARK_AS_PMAP_DATA = FALSE;
+
+/*
+ * The PPL cannot invoke the kernel in order to allocate memory, so we must
+ * maintain a list of free pages that the PPL owns. The kernel can give the PPL
+ * additional pages.
+ */
+decl_simple_lock_data(, pmap_ppl_free_page_lock MARK_AS_PMAP_DATA);
+void ** pmap_ppl_free_page_list MARK_AS_PMAP_DATA = NULL;
+uint64_t pmap_ppl_free_page_count MARK_AS_PMAP_DATA = 0;
+uint64_t pmap_ppl_pages_returned_to_kernel_count_total = 0;
+
+struct pmap_cpu_data_array_entry pmap_cpu_data_array[MAX_CPUS] MARK_AS_PMAP_DATA;
+
+#ifdef CPU_CLUSTER_OFFSETS
+const uint64_t pmap_cluster_offsets[] = CPU_CLUSTER_OFFSETS;
+_Static_assert((sizeof(pmap_cluster_offsets) / sizeof(pmap_cluster_offsets[0])) == __ARM_CLUSTER_COUNT__,
+ "pmap_cluster_offsets[] count does not match __ARM_CLUSTER_COUNT__");
+#endif
+
+extern void *pmap_stacks_start;
+extern void *pmap_stacks_end;
+SECURITY_READ_ONLY_LATE(pmap_paddr_t) pmap_stacks_start_pa = 0;
+SECURITY_READ_ONLY_LATE(pmap_paddr_t) pmap_stacks_end_pa = 0;
+SECURITY_READ_ONLY_LATE(pmap_paddr_t) ppl_cpu_save_area_start = 0;
+SECURITY_READ_ONLY_LATE(pmap_paddr_t) ppl_cpu_save_area_end = 0;
+
+/* Allocation data/locks for pmap structures. */
+decl_simple_lock_data(, pmap_free_list_lock MARK_AS_PMAP_DATA);
+SECURITY_READ_ONLY_LATE(unsigned long) pmap_array_count = 0;
+SECURITY_READ_ONLY_LATE(void *) pmap_array_begin = NULL;
+SECURITY_READ_ONLY_LATE(void *) pmap_array_end = NULL;
+SECURITY_READ_ONLY_LATE(pmap_t) pmap_array = NULL;
+pmap_t pmap_free_list MARK_AS_PMAP_DATA = NULL;
+
+/* Allocation data/locks/structs for task ledger structures. */
+#define PMAP_LEDGER_DATA_BYTES \
+ (((sizeof(task_ledgers) / sizeof(int)) * sizeof(struct ledger_entry)) + sizeof(struct ledger))
+
+/*
+ * Maximum number of ledgers allowed are maximum number of tasks
+ * allowed on system plus some more i.e. ~10% of total tasks = 200.
+ */
+#define MAX_PMAP_LEDGERS (MAX_ASID + 200)
+
+typedef struct pmap_ledger_data {
+ char pld_data[PMAP_LEDGER_DATA_BYTES];
+} pmap_ledger_data_t;
+
+typedef struct pmap_ledger {
+ union {
+ struct pmap_ledger_data ple_data;
+ struct pmap_ledger * next;
+ };
+
+ struct pmap_ledger ** back_ptr;
+} pmap_ledger_t;
+
+SECURITY_READ_ONLY_LATE(bool) pmap_ledger_alloc_initialized = false;
+decl_simple_lock_data(, pmap_ledger_lock MARK_AS_PMAP_DATA);
+SECURITY_READ_ONLY_LATE(void *) pmap_ledger_refcnt_begin = NULL;
+SECURITY_READ_ONLY_LATE(void *) pmap_ledger_refcnt_end = NULL;
+SECURITY_READ_ONLY_LATE(os_refcnt_t *) pmap_ledger_refcnt = NULL;
+SECURITY_READ_ONLY_LATE(void *) pmap_ledger_ptr_array_begin = NULL;
+SECURITY_READ_ONLY_LATE(void *) pmap_ledger_ptr_array_end = NULL;
+SECURITY_READ_ONLY_LATE(pmap_ledger_t * *) pmap_ledger_ptr_array = NULL;
+uint64_t pmap_ledger_ptr_array_free_index MARK_AS_PMAP_DATA = 0;
+pmap_ledger_t * pmap_ledger_free_list MARK_AS_PMAP_DATA = NULL;
+
+#define pmap_ledger_debit(p, e, a) ledger_debit_nocheck((p)->ledger, e, a)
+#define pmap_ledger_credit(p, e, a) ledger_credit_nocheck((p)->ledger, e, a)
+
+static inline void
+pmap_check_ledger_fields(ledger_t ledger)
+{
+ if (ledger == NULL) {
+ return;
+ }
+
+ thread_t cur_thread = current_thread();
+ ledger_check_new_balance(cur_thread, ledger, task_ledgers.alternate_accounting);
+ ledger_check_new_balance(cur_thread, ledger, task_ledgers.alternate_accounting_compressed);
+ ledger_check_new_balance(cur_thread, ledger, task_ledgers.internal);
+ ledger_check_new_balance(cur_thread, ledger, task_ledgers.internal_compressed);
+ ledger_check_new_balance(cur_thread, ledger, task_ledgers.page_table);
+ ledger_check_new_balance(cur_thread, ledger, task_ledgers.phys_footprint);
+ ledger_check_new_balance(cur_thread, ledger, task_ledgers.phys_mem);
+ ledger_check_new_balance(cur_thread, ledger, task_ledgers.tkm_private);
+ ledger_check_new_balance(cur_thread, ledger, task_ledgers.wired_mem);
+}
+
+#define pmap_ledger_check_balance(p) pmap_check_ledger_fields((p)->ledger)
+
+#else /* XNU_MONITOR */
#define pmap_ledger_debit(p, e, a) ledger_debit((p)->ledger, e, a)
#define pmap_ledger_credit(p, e, a) ledger_credit((p)->ledger, e, a)
+#endif /* !XNU_MONITOR */
#if DEVELOPMENT || DEBUG
int panic_on_unsigned_execute = 0;
#define PP_ATTR_REFFAULT 0x1000
#define PP_ATTR_MODFAULT 0x2000
+#if XNU_MONITOR
+/*
+ * Denotes that a page is owned by the PPL. This is modified/checked with the
+ * PVH lock held, to avoid ownership related races. This does not need to be a
+ * PP_ATTR bit (as we have the lock), but for now this is a convenient place to
+ * put the bit.
+ */
+#define PP_ATTR_MONITOR 0x4000
+
+/*
+ * Denotes that a page *cannot* be owned by the PPL. This is required in order
+ * to temporarily 'pin' kernel pages that are used to store PPL output parameters.
+ * Otherwise a malicious or buggy caller could pass PPL-owned memory for these
+ * parameters and in so doing stage a write gadget against the PPL.
+ */
+#define PP_ATTR_NO_MONITOR 0x8000
+
+/*
+ * All of the bits owned by the PPL; kernel requests to set or clear these bits
+ * are illegal.
+ */
+#define PP_ATTR_PPL_OWNED_BITS (PP_ATTR_MONITOR | PP_ATTR_NO_MONITOR)
+#endif
SECURITY_READ_ONLY_LATE(pp_attr_t*) pp_attr_table;
SECURITY_READ_ONLY_LATE(pmap_t) sharedpage_pmap;
#endif
+#if XNU_MONITOR
+/*
+ * We define our target as 8 pages; enough for 2 page table pages, a PTD page,
+ * and a PV page; in essence, twice as many pages as may be necessary to satisfy
+ * a single pmap_enter request.
+ */
+#define PMAP_MIN_FREE_PPL_PAGES 8
+#endif
#define pa_index(pa) \
(atop((pa) - vm_first_phys))
#define pa_clear_reference(x) \
pa_clear_bits(x, PP_ATTR_REFERENCED)
+#if XNU_MONITOR
+#define pa_set_monitor(x) \
+ pa_set_bits((x), PP_ATTR_MONITOR)
+
+#define pa_clear_monitor(x) \
+ pa_clear_bits((x), PP_ATTR_MONITOR)
+
+#define pa_test_monitor(x) \
+ pa_test_bits((x), PP_ATTR_MONITOR)
+
+#define pa_set_no_monitor(x) \
+ pa_set_bits((x), PP_ATTR_NO_MONITOR)
+
+#define pa_clear_no_monitor(x) \
+ pa_clear_bits((x), PP_ATTR_NO_MONITOR)
+
+#define pa_test_no_monitor(x) \
+ pa_test_bits((x), PP_ATTR_NO_MONITOR)
+#endif
#define IS_INTERNAL_PAGE(pai) \
ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
#define current_pmap() \
(vm_map_pmap(current_thread()->map))
+#if XNU_MONITOR
+/*
+ * PPL-related macros.
+ */
+#define ARRAY_ELEM_PTR_IS_VALID(_ptr_, _elem_size_, _array_begin_, _array_end_) \
+ (((_ptr_) >= (typeof(_ptr_))_array_begin_) && \
+ ((_ptr_) < (typeof(_ptr_))_array_end_) && \
+ !((((void *)(_ptr_)) - ((void *)_array_begin_)) % (_elem_size_)))
+
+#define PMAP_PTR_IS_VALID(x) ARRAY_ELEM_PTR_IS_VALID(x, sizeof(struct pmap), pmap_array_begin, pmap_array_end)
+
+#define USER_PMAP_IS_VALID(x) (PMAP_PTR_IS_VALID(x) && (os_atomic_load(&(x)->ref_count, relaxed) > 0))
+
+#define VALIDATE_USER_PMAP(x) \
+ if (__improbable(!USER_PMAP_IS_VALID(x))) \
+ panic("%s: invalid pmap %p", __func__, (x));
+
+#define VALIDATE_PMAP(x) \
+ if (__improbable(((x) != kernel_pmap) && !USER_PMAP_IS_VALID(x))) \
+ panic("%s: invalid pmap %p", __func__, (x));
+
+#define VALIDATE_LEDGER_PTR(x) \
+ if (__improbable(!ARRAY_ELEM_PTR_IS_VALID(x, sizeof(void *), pmap_ledger_ptr_array_begin, pmap_ledger_ptr_array_end))) \
+ panic("%s: invalid ledger ptr %p", __func__, (x));
+
+#define ARRAY_ELEM_INDEX(x, _elem_size_, _array_begin_) ((uint64_t)((((void *)(x)) - (_array_begin_)) / (_elem_size_)))
+
+static uint64_t
+pmap_ledger_validate(void * ledger)
+{
+ uint64_t array_index;
+ pmap_ledger_t ** ledger_ptr_array_ptr = ((pmap_ledger_t*)ledger)->back_ptr;
+ VALIDATE_LEDGER_PTR(ledger_ptr_array_ptr);
+ array_index = ARRAY_ELEM_INDEX(ledger_ptr_array_ptr, sizeof(pmap_ledger_t *), pmap_ledger_ptr_array_begin);
+
+ if (array_index >= MAX_PMAP_LEDGERS) {
+ panic("%s: ledger %p array index invalid, index was %#llx", __func__, ledger, array_index);
+ }
+
+ pmap_ledger_t *ledger_ptr = *ledger_ptr_array_ptr;
+
+ if (__improbable(ledger_ptr != ledger)) {
+ panic("%s: ledger pointer mismatch, %p != %p", __func__, ledger, ledger_ptr);
+ }
+
+ return array_index;
+}
+
+#else /* XNU_MONITOR */
#define VALIDATE_USER_PMAP(x)
#define VALIDATE_PMAP(x)
#define VALIDATE_LEDGER(x)
+#endif
#if DEVELOPMENT || DEBUG
static void pmap_trim_self(pmap_t pmap);
static void pmap_trim_subord(pmap_t subord);
+#if __APRR_SUPPORTED__
+static uint64_t pte_to_xprr_perm(pt_entry_t pte);
+static pt_entry_t xprr_perm_to_pte(uint64_t perm);
+#endif /* __APRR_SUPPORTED__*/
+
+#if XNU_MONITOR
+static pmap_paddr_t pmap_alloc_page_for_kern(void);
+static void pmap_alloc_page_for_ppl(void);
+
+
+/*
+ * This macro generates prototypes for the *_internal functions, which
+ * represent the PPL interface. When the PPL is enabled, this will also
+ * generate prototypes for the PPL entrypoints (*_ppl), as well as generating
+ * the entrypoints.
+ */
+#define GEN_ASM_NAME(__function_name) _##__function_name##_ppl
+
+#define PMAP_SUPPORT_PROTOTYPES_WITH_ASM_INTERNAL(__return_type, __function_name, __function_args, __function_index, __assembly_function_name) \
+ static __return_type __function_name##_internal __function_args; \
+ extern __return_type __function_name##_ppl __function_args; \
+ __asm__ (".text \n" \
+ ".align 2 \n" \
+ ".globl " #__assembly_function_name "\n" \
+ #__assembly_function_name ":\n" \
+ "mov x15, " #__function_index "\n" \
+ "b _aprr_ppl_enter\n")
+
+#define PMAP_SUPPORT_PROTOTYPES_WITH_ASM(__return_type, __function_name, __function_args, __function_index, __assembly_function_name) \
+ PMAP_SUPPORT_PROTOTYPES_WITH_ASM_INTERNAL(__return_type, __function_name, __function_args, __function_index, __assembly_function_name)
+#define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
+ PMAP_SUPPORT_PROTOTYPES_WITH_ASM(__return_type, __function_name, __function_args, __function_index, GEN_ASM_NAME(__function_name))
+#else /* XNU_MONITOR */
#define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
static __return_type __function_name##_internal __function_args
+#endif /* XNU_MONITOR */
PMAP_SUPPORT_PROTOTYPES(
kern_return_t,
void,
pmap_set_nested, (pmap_t pmap), PMAP_SET_NESTED_INDEX);
-#if MACH_ASSERT
+#if MACH_ASSERT || XNU_MONITOR
PMAP_SUPPORT_PROTOTYPES(
void,
pmap_set_process, (pmap_t pmap,
uint64_t size,
unsigned int option), PMAP_UNNEST_OPTIONS_INDEX);
+#if XNU_MONITOR
+PMAP_SUPPORT_PROTOTYPES(
+ void,
+ pmap_cpu_data_init, (unsigned int cpu_number), PMAP_CPU_DATA_INIT_INDEX);
+#endif
PMAP_SUPPORT_PROTOTYPES(
void,
phys_attribute_set, (ppnum_t pn,
unsigned int bits), PHYS_ATTRIBUTE_SET_INDEX);
+#if XNU_MONITOR
+PMAP_SUPPORT_PROTOTYPES(
+ void,
+ pmap_mark_page_as_ppl_page, (pmap_paddr_t pa), PMAP_MARK_PAGE_AS_PMAP_PAGE_INDEX);
+#endif
PMAP_SUPPORT_PROTOTYPES(
void,
void,
pmap_clear_user_ttb, (void), PMAP_CLEAR_USER_TTB_INDEX);
+#if XNU_MONITOR
+PMAP_SUPPORT_PROTOTYPES(
+ uint64_t,
+ pmap_release_ppl_pages_to_kernel, (void), PMAP_RELEASE_PAGES_TO_KERNEL_INDEX);
+#endif
PMAP_SUPPORT_PROTOTYPES(
void,
addr64_t nstart,
uint64_t size), PMAP_TRIM_INDEX);
+#if HAS_APPLE_PAC && XNU_MONITOR
+PMAP_SUPPORT_PROTOTYPES(
+ void *,
+ pmap_sign_user_ptr, (void *value, ptrauth_key key, uint64_t discriminator), PMAP_SIGN_USER_PTR);
+PMAP_SUPPORT_PROTOTYPES(
+ void *,
+ pmap_auth_user_ptr, (void *value, ptrauth_key key, uint64_t discriminator), PMAP_AUTH_USER_PTR);
+#endif /* HAS_APPLE_PAC && XNU_MONITOR */
+#if XNU_MONITOR
+static void pmap_mark_page_as_ppl_page(pmap_paddr_t pa);
+#endif
void pmap_footprint_suspend(vm_map_t map,
boolean_t suspend);
boolean_t suspend),
PMAP_FOOTPRINT_SUSPEND_INDEX);
+#if XNU_MONITOR
+PMAP_SUPPORT_PROTOTYPES(
+ void,
+ pmap_ledger_alloc_init, (size_t),
+ PMAP_LEDGER_ALLOC_INIT_INDEX);
+
+PMAP_SUPPORT_PROTOTYPES(
+ ledger_t,
+ pmap_ledger_alloc, (void),
+ PMAP_LEDGER_ALLOC_INDEX);
+
+PMAP_SUPPORT_PROTOTYPES(
+ void,
+ pmap_ledger_free, (ledger_t),
+ PMAP_LEDGER_FREE_INDEX);
+#endif
#if CONFIG_PGTRACE
boolean_t pgtrace_enabled = 0;
int pt_fake_zone_index = -1; /* index of pmap fake zone */
+#if XNU_MONITOR
+/*
+ * Table of function pointers used for PPL dispatch.
+ */
+const void * const ppl_handler_table[PMAP_COUNT] = {
+ [ARM_FAST_FAULT_INDEX] = arm_fast_fault_internal,
+ [ARM_FORCE_FAST_FAULT_INDEX] = arm_force_fast_fault_internal,
+ [MAPPING_FREE_PRIME_INDEX] = mapping_free_prime_internal,
+ [MAPPING_REPLENISH_INDEX] = mapping_replenish_internal,
+ [PHYS_ATTRIBUTE_CLEAR_INDEX] = phys_attribute_clear_internal,
+ [PHYS_ATTRIBUTE_SET_INDEX] = phys_attribute_set_internal,
+ [PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX] = pmap_batch_set_cache_attributes_internal,
+ [PMAP_CHANGE_WIRING_INDEX] = pmap_change_wiring_internal,
+ [PMAP_CREATE_INDEX] = pmap_create_options_internal,
+ [PMAP_DESTROY_INDEX] = pmap_destroy_internal,
+ [PMAP_ENTER_OPTIONS_INDEX] = pmap_enter_options_internal,
+ [PMAP_EXTRACT_INDEX] = pmap_extract_internal,
+ [PMAP_FIND_PHYS_INDEX] = pmap_find_phys_internal,
+ [PMAP_INSERT_SHAREDPAGE_INDEX] = pmap_insert_sharedpage_internal,
+ [PMAP_IS_EMPTY_INDEX] = pmap_is_empty_internal,
+ [PMAP_MAP_CPU_WINDOWS_COPY_INDEX] = pmap_map_cpu_windows_copy_internal,
+ [PMAP_MARK_PAGE_AS_PMAP_PAGE_INDEX] = pmap_mark_page_as_ppl_page_internal,
+ [PMAP_NEST_INDEX] = pmap_nest_internal,
+ [PMAP_PAGE_PROTECT_OPTIONS_INDEX] = pmap_page_protect_options_internal,
+ [PMAP_PROTECT_OPTIONS_INDEX] = pmap_protect_options_internal,
+ [PMAP_QUERY_PAGE_INFO_INDEX] = pmap_query_page_info_internal,
+ [PMAP_QUERY_RESIDENT_INDEX] = pmap_query_resident_internal,
+ [PMAP_REFERENCE_INDEX] = pmap_reference_internal,
+ [PMAP_REMOVE_OPTIONS_INDEX] = pmap_remove_options_internal,
+ [PMAP_RETURN_INDEX] = pmap_return_internal,
+ [PMAP_SET_CACHE_ATTRIBUTES_INDEX] = pmap_set_cache_attributes_internal,
+ [PMAP_UPDATE_COMPRESSOR_PAGE_INDEX] = pmap_update_compressor_page_internal,
+ [PMAP_SET_NESTED_INDEX] = pmap_set_nested_internal,
+ [PMAP_SET_PROCESS_INDEX] = pmap_set_process_internal,
+ [PMAP_SWITCH_INDEX] = pmap_switch_internal,
+ [PMAP_SWITCH_USER_TTB_INDEX] = pmap_switch_user_ttb_internal,
+ [PMAP_CLEAR_USER_TTB_INDEX] = pmap_clear_user_ttb_internal,
+ [PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX] = pmap_unmap_cpu_windows_copy_internal,
+ [PMAP_UNNEST_OPTIONS_INDEX] = pmap_unnest_options_internal,
+ [PMAP_FOOTPRINT_SUSPEND_INDEX] = pmap_footprint_suspend_internal,
+ [PMAP_CPU_DATA_INIT_INDEX] = pmap_cpu_data_init_internal,
+ [PMAP_RELEASE_PAGES_TO_KERNEL_INDEX] = pmap_release_ppl_pages_to_kernel_internal,
+ [PMAP_SET_JIT_ENTITLED_INDEX] = pmap_set_jit_entitled_internal,
+ [PMAP_TRIM_INDEX] = pmap_trim_internal,
+ [PMAP_LEDGER_ALLOC_INIT_INDEX] = pmap_ledger_alloc_init_internal,
+ [PMAP_LEDGER_ALLOC_INDEX] = pmap_ledger_alloc_internal,
+ [PMAP_LEDGER_FREE_INDEX] = pmap_ledger_free_internal,
+#if HAS_APPLE_PAC && XNU_MONITOR
+ [PMAP_SIGN_USER_PTR] = pmap_sign_user_ptr_internal,
+ [PMAP_AUTH_USER_PTR] = pmap_auth_user_ptr_internal,
+#endif /* HAS_APPLE_PAC && XNU_MONITOR */
+};
+
+static uint64_t
+pmap_get_ppl_cpu_id(void)
+{
+ uint64_t mpidr_el1_value = 0;
+
+ /* We identify the CPU based on the constant bits of MPIDR_EL1. */
+ MRS(mpidr_el1_value, "MPIDR_EL1");
+
+#ifdef CPU_CLUSTER_OFFSETS
+ uint64_t cluster_id = (mpidr_el1_value & MPIDR_AFF1_MASK) >> MPIDR_AFF1_SHIFT;
+ assert(cluster_id < (sizeof(pmap_cluster_offsets) / sizeof(pmap_cluster_offsets[0])));
+
+ /* For multi-cluster configurations, AFF0 reflects the core number within the cluster. */
+ mpidr_el1_value = (mpidr_el1_value & MPIDR_AFF0_MASK) + pmap_cluster_offsets[cluster_id];
+#else
+ /*
+ * AFF2 is not constant (it can change for e-core versus p-core on H9),
+ * so mask it out.
+ */
+ mpidr_el1_value &= MPIDR_AFF0_MASK;
+#endif
+
+ if (mpidr_el1_value > MAX_CPUS) {
+ panic("%s: mpidr_el1_value=%#llx > MAX_CPUS=%#x",
+ __FUNCTION__, mpidr_el1_value, MAX_CPUS);
+ }
+
+ return mpidr_el1_value;
+}
+
+
+#endif
/*
{
pmap_cpu_data_t * pmap_cpu_data = pmap_get_cpu_data();
+#if XNU_MONITOR
+ /* Verify cacheline-aligned */
+ assert(((vm_offset_t)pmap_cpu_data & ((1 << L2_CLINE) - 1)) == 0);
+ if (pmap_cpu_data->cpu_number != PMAP_INVALID_CPU_NUM) {
+ panic("%s: pmap_cpu_data->cpu_number=%u, "
+ "cpu_number=%u",
+ __FUNCTION__, pmap_cpu_data->cpu_number,
+ cpu_number);
+ }
+#endif
pmap_cpu_data->cpu_number = cpu_number;
}
void
pmap_cpu_data_init(void)
{
+#if XNU_MONITOR
+ pmap_cpu_data_init_ppl(cpu_number());
+#else
pmap_cpu_data_init_internal(cpu_number());
+#endif
}
static void
pmap_cpu_data_array_init(void)
{
+#if XNU_MONITOR
+ unsigned int i = 0;
+ pmap_paddr_t ppl_cpu_save_area_cur = 0;
+ pt_entry_t template, *pte_p;
+ vm_offset_t stack_va = (vm_offset_t)pmap_stacks_start + ARM_PGBYTES;
+ assert((pmap_stacks_start != NULL) && (pmap_stacks_end != NULL));
+ pmap_stacks_start_pa = avail_start;
+
+ for (i = 0; i < MAX_CPUS; i++) {
+ for (vm_offset_t cur_va = stack_va; cur_va < (stack_va + PPL_STACK_SIZE); cur_va += ARM_PGBYTES) {
+ assert(cur_va < (vm_offset_t)pmap_stacks_end);
+ pte_p = pmap_pte(kernel_pmap, cur_va);
+ assert(*pte_p == ARM_PTE_EMPTY);
+ template = pa_to_pte(avail_start) | ARM_PTE_AF | ARM_PTE_SH(SH_OUTER_MEMORY) | ARM_PTE_TYPE |
+ ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT) | xprr_perm_to_pte(XPRR_PPL_RW_PERM);
+#if __ARM_KERNEL_PROTECT__
+ template |= ARM_PTE_NG;
+#endif /* __ARM_KERNEL_PROTECT__ */
+ WRITE_PTE(pte_p, template);
+ __builtin_arm_isb(ISB_SY);
+ avail_start += ARM_PGBYTES;
+ }
+#if KASAN
+ kasan_map_shadow(stack_va, PPL_STACK_SIZE, false);
+#endif
+ pmap_cpu_data_array[i].cpu_data.cpu_id = i;
+ pmap_cpu_data_array[i].cpu_data.cpu_number = PMAP_INVALID_CPU_NUM;
+ pmap_cpu_data_array[i].cpu_data.ppl_state = PPL_STATE_KERNEL;
+ pmap_cpu_data_array[i].cpu_data.ppl_stack = (void*)(stack_va + PPL_STACK_SIZE);
+ stack_va += (PPL_STACK_SIZE + ARM_PGBYTES);
+ }
+ sync_tlb_flush();
+ pmap_stacks_end_pa = avail_start;
+
+ ppl_cpu_save_area_start = avail_start;
+ ppl_cpu_save_area_end = ppl_cpu_save_area_start;
+ ppl_cpu_save_area_cur = ppl_cpu_save_area_start;
+
+ for (i = 0; i < MAX_CPUS; i++) {
+ while ((ppl_cpu_save_area_end - ppl_cpu_save_area_cur) < sizeof(arm_context_t)) {
+ avail_start += PAGE_SIZE;
+ ppl_cpu_save_area_end = avail_start;
+ }
+
+ pmap_cpu_data_array[i].cpu_data.save_area = (arm_context_t *)phystokv(ppl_cpu_save_area_cur);
+ ppl_cpu_save_area_cur += sizeof(arm_context_t);
+ }
+#endif
pmap_cpu_data_init();
}
{
pmap_cpu_data_t * pmap_cpu_data = NULL;
+#if XNU_MONITOR
+ uint64_t cpu_id = 0;
+
+ cpu_id = pmap_get_ppl_cpu_id();
+ pmap_cpu_data = &pmap_cpu_data_array[cpu_id].cpu_data;
+
+ if (pmap_cpu_data->cpu_id != cpu_id) {
+ panic("%s: CPU ID mismatch, cpu_id=0x%#llx, pmap_cpu_data->cpu_id=%#llx",
+ __FUNCTION__, cpu_id, pmap_cpu_data->cpu_id);
+ }
+#else
pmap_cpu_data = &getCpuDatap()->cpu_pmap_cpu_data;
+#endif
return pmap_cpu_data;
}
+#if XNU_MONITOR
+/*
+ * pmap_set_range_xprr_perm takes a range (specified using start and end) that
+ * falls within the physical aperture. All mappings within this range have
+ * their protections changed from those specified by the expected_perm to those
+ * specified by the new_perm.
+ */
+static void
+pmap_set_range_xprr_perm(vm_address_t start,
+ vm_address_t end,
+ unsigned int expected_perm,
+ unsigned int new_perm)
+{
+#if (__ARM_VMSA__ == 7)
+#error This function is not supported on older ARM hardware
+#else
+ pmap_t pmap = NULL;
+
+ vm_address_t va = 0;
+ vm_address_t tte_start = 0;
+ vm_address_t tte_end = 0;
+
+ tt_entry_t *tte_p = NULL;
+ pt_entry_t *pte_p = NULL;
+ pt_entry_t *cpte_p = NULL;
+ pt_entry_t *bpte_p = NULL;
+ pt_entry_t *epte_p = NULL;
+
+ tt_entry_t tte = 0;
+ pt_entry_t cpte = 0;
+ pt_entry_t template = 0;
+
+ pmap = kernel_pmap;
+
+ va = start;
+
+ /*
+ * Validate our arguments; any invalid argument will be grounds for a
+ * panic.
+ */
+ if ((start | end) % ARM_PGBYTES) {
+ panic("%s: start or end not page aligned, "
+ "start=%p, end=%p, new_perm=%u, expected_perm=%u",
+ __FUNCTION__,
+ (void *)start, (void *)end, new_perm, expected_perm);
+ }
+
+ if (start > end) {
+ panic("%s: start > end, "
+ "start=%p, end=%p, new_perm=%u, expected_perm=%u",
+ __FUNCTION__,
+ (void *)start, (void *)end, new_perm, expected_perm);
+ }
+
+ if (start < gVirtBase) {
+ panic("%s: start is before physical aperture, "
+ "start=%p, end=%p, new_perm=%u, expected_perm=%u",
+ __FUNCTION__,
+ (void *)start, (void *)end, new_perm, expected_perm);
+ }
+
+ if (end > static_memory_end) {
+ panic("%s: end is after physical aperture, "
+ "start=%p, end=%p, new_perm=%u, expected_perm=%u",
+ __FUNCTION__,
+ (void *)start, (void *)end, new_perm, expected_perm);
+ }
+
+ if ((new_perm > XPRR_MAX_PERM) || (expected_perm > XPRR_MAX_PERM)) {
+ panic("%s: invalid XPRR index, "
+ "start=%p, end=%p, new_perm=%u, expected_perm=%u",
+ __FUNCTION__,
+ (void *)start, (void *)end, new_perm, expected_perm);
+ }
+
+ /*
+ * Walk over the PTEs for the given range, and set the protections on
+ * those PTEs.
+ */
+ while (va < end) {
+ tte_start = va;
+ tte_end = ((va + pt_attr_twig_size(native_pt_attr)) & ~pt_attr_twig_offmask(native_pt_attr));
+
+ if (tte_end > end) {
+ tte_end = end;
+ }
+
+ tte_p = pmap_tte(pmap, va);
+
+ /*
+ * The physical aperture should not have holes.
+ * The physical aperture should be contiguous.
+ * Do not make eye contact with the physical aperture.
+ */
+ if (tte_p == NULL) {
+ panic("%s: physical aperture tte is NULL, "
+ "start=%p, end=%p, new_perm=%u, expected_perm=%u",
+ __FUNCTION__,
+ (void *)start, (void *)end, new_perm, expected_perm);
+ }
+
+ tte = *tte_p;
+
+ if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
+ /*
+ * Walk over the given L3 page table page and update the
+ * PTEs.
+ */
+ pte_p = (pt_entry_t *)ttetokv(tte);
+ bpte_p = &pte_p[ptenum(va)];
+ epte_p = bpte_p + ((tte_end - va) >> pt_attr_leaf_shift(native_pt_attr));
+
+ for (cpte_p = bpte_p; cpte_p < epte_p;
+ cpte_p += PAGE_SIZE / ARM_PGBYTES, va += PAGE_SIZE) {
+ int pai = (int)pa_index(pte_to_pa(*cpte_p));
+ LOCK_PVH(pai);
+ cpte = *cpte_p;
+
+ /*
+ * Every PTE involved should be valid, should
+ * not have the hint bit set, and should have
+ * Every valid PTE involved should
+ * not have the hint bit set and should have
+ * the expected APRR index.
+ */
+ if ((cpte & ARM_PTE_TYPE_MASK) ==
+ ARM_PTE_TYPE_FAULT) {
+ panic("%s: physical aperture PTE is invalid, va=%p, "
+ "start=%p, end=%p, new_perm=%u, expected_perm=%u",
+ __FUNCTION__,
+ (void *)va,
+ (void *)start, (void *)end, new_perm, expected_perm);
+ UNLOCK_PVH(pai);
+ continue;
+ }
+
+ if (cpte & ARM_PTE_HINT_MASK) {
+ panic("%s: physical aperture PTE has hint bit set, va=%p, cpte=0x%llx, "
+ "start=%p, end=%p, new_perm=%u, expected_perm=%u",
+ __FUNCTION__,
+ (void *)va, cpte,
+ (void *)start, (void *)end, new_perm, expected_perm);
+ }
+
+ if (pte_to_xprr_perm(cpte) != expected_perm) {
+ panic("%s: perm=%llu does not match expected_perm, cpte=0x%llx, "
+ "start=%p, end=%p, new_perm=%u, expected_perm=%u",
+ __FUNCTION__,
+ pte_to_xprr_perm(cpte), cpte,
+ (void *)start, (void *)end, new_perm, expected_perm);
+ }
+
+ template = cpte;
+ template &= ~ARM_PTE_XPRR_MASK;
+ template |= xprr_perm_to_pte(new_perm);
+
+ WRITE_PTE_STRONG(cpte_p, template);
+ UNLOCK_PVH(pai);
+ }
+ } else {
+ panic("%s: tte=0x%llx is not a table type entry, "
+ "start=%p, end=%p, new_perm=%u, expected_perm=%u",
+ __FUNCTION__,
+ tte,
+ (void *)start, (void *)end, new_perm, expected_perm);
+ }
+
+ va = tte_end;
+ }
+
+ PMAP_UPDATE_TLBS(pmap, start, end, false);
+#endif /* (__ARM_VMSA__ == 7) */
+}
+
+/*
+ * A convenience function for setting protections on a single page.
+ */
+static inline void
+pmap_set_xprr_perm(vm_address_t page_kva,
+ unsigned int expected_perm,
+ unsigned int new_perm)
+{
+ pmap_set_range_xprr_perm(page_kva, page_kva + PAGE_SIZE, expected_perm, new_perm);
+}
+#endif /* XNU_MONITOR */
/* TODO */
}
}
+#if XNU_MONITOR
+/*
+ * Return a PPL page to the free list.
+ */
+static void
+pmap_give_free_ppl_page(pmap_paddr_t paddr)
+{
+ assert((paddr & ARM_PGMASK) == 0);
+ void ** new_head = (void **)phystokv(paddr);
+ pmap_simple_lock(&pmap_ppl_free_page_lock);
+
+ void * cur_head = pmap_ppl_free_page_list;
+ *new_head = cur_head;
+ pmap_ppl_free_page_list = new_head;
+ pmap_ppl_free_page_count++;
+
+ pmap_simple_unlock(&pmap_ppl_free_page_lock);
+}
+
+/*
+ * Get a PPL page from the free list.
+ */
+static pmap_paddr_t
+pmap_get_free_ppl_page(void)
+{
+ pmap_paddr_t result = 0;
+
+ pmap_simple_lock(&pmap_ppl_free_page_lock);
+
+ if (pmap_ppl_free_page_list != NULL) {
+ void ** new_head = NULL;
+ new_head = *((void**)pmap_ppl_free_page_list);
+ result = kvtophys((vm_offset_t)pmap_ppl_free_page_list);
+ pmap_ppl_free_page_list = new_head;
+ pmap_ppl_free_page_count--;
+ } else {
+ result = 0L;
+ }
+
+ pmap_simple_unlock(&pmap_ppl_free_page_lock);
+ assert((result & ARM_PGMASK) == 0);
+
+ return result;
+}
+
+/*
+ * pmap_mark_page_as_ppl_page claims a page on behalf of the PPL by marking it
+ * as PPL-owned and only allowing the PPL to write to it.
+ */
+MARK_AS_PMAP_TEXT static void
+pmap_mark_page_as_ppl_page_internal(pmap_paddr_t pa)
+{
+ vm_offset_t kva = 0;
+ unsigned int pai = 0;
+ pp_attr_t attr;
+
+ /*
+ * Mark each page that we allocate as belonging to the monitor, as we
+ * intend to use it for monitor-y stuff (page tables, table pages, that
+ * sort of thing).
+ */
+ assert(!TEST_PAGE_RATIO_4);
+
+ if (!pa_valid(pa)) {
+ panic("%s: bad address, "
+ "pa=%p",
+ __func__,
+ (void *)pa);
+ }
+
+ pai = (unsigned int)pa_index(pa);
+ LOCK_PVH(pai);
+
+ /* A page that the PPL already owns can't be given to the PPL. */
+ if (pa_test_monitor(pa)) {
+ panic("%s: page already belongs to PPL, "
+ "pa=0x%llx",
+ __FUNCTION__,
+ pa);
+ }
+ /* The page cannot be mapped outside of the physical aperture. */
+ if (!pmap_verify_free((ppnum_t)atop(pa))) {
+ panic("%s: page is not free, "
+ "pa=0x%llx",
+ __FUNCTION__,
+ pa);
+ }
+
+ do {
+ attr = pp_attr_table[pai];
+ if (attr & PP_ATTR_NO_MONITOR) {
+ panic("%s: page excluded from PPL, "
+ "pa=0x%llx",
+ __FUNCTION__,
+ pa);
+ }
+ } while (!OSCompareAndSwap16(attr, attr | PP_ATTR_MONITOR, &pp_attr_table[pai]));
+
+ UNLOCK_PVH(pai);
+
+ kva = phystokv(pa);
+ pmap_set_xprr_perm(kva, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
+ bzero((void *)(kva & ~PAGE_MASK), PAGE_SIZE);
+
+ pmap_give_free_ppl_page(pa);
+}
+
+static void
+pmap_mark_page_as_ppl_page(pmap_paddr_t pa)
+{
+ pmap_mark_page_as_ppl_page_ppl(pa);
+}
+
+static void
+pmap_mark_page_as_kernel_page(pmap_paddr_t pa)
+{
+ vm_offset_t kva = 0;
+ unsigned int pai = 0;
+
+ pai = (unsigned int)pa_index(pa);
+ LOCK_PVH(pai);
+
+ if (!pa_test_monitor(pa)) {
+ panic("%s: page is not a PPL page, "
+ "pa=%p",
+ __FUNCTION__,
+ (void *)pa);
+ }
+
+ pa_clear_monitor(pa);
+ UNLOCK_PVH(pai);
+
+ kva = phystokv(pa);
+ pmap_set_xprr_perm(kva, XPRR_PPL_RW_PERM, XPRR_KERN_RW_PERM);
+}
+
+MARK_AS_PMAP_TEXT static pmap_paddr_t
+pmap_release_ppl_pages_to_kernel_internal(void)
+{
+ pmap_paddr_t pa = 0;
+
+ if (pmap_ppl_free_page_count <= PMAP_MIN_FREE_PPL_PAGES) {
+ goto done;
+ }
+
+ pa = pmap_get_free_ppl_page();
+
+ if (!pa) {
+ goto done;
+ }
+
+ pmap_mark_page_as_kernel_page(pa);
+
+done:
+ return pa;
+}
+
+static uint64_t
+pmap_release_ppl_pages_to_kernel(void)
+{
+ pmap_paddr_t pa = 0;
+ vm_page_t m = VM_PAGE_NULL;
+ vm_page_t local_freeq = VM_PAGE_NULL;
+ uint64_t pmap_ppl_pages_returned_to_kernel_count = 0;
+
+ while (pmap_ppl_free_page_count > PMAP_MIN_FREE_PPL_PAGES) {
+ pa = pmap_release_ppl_pages_to_kernel_ppl();
+
+ if (!pa) {
+ break;
+ }
+
+ /* If we retrieved a page, add it to the free queue. */
+ vm_object_lock(pmap_object);
+ m = vm_page_lookup(pmap_object, (pa - gPhysBase));
+ assert(m != VM_PAGE_NULL);
+ assert(VM_PAGE_WIRED(m));
+
+ m->vmp_busy = TRUE;
+ m->vmp_snext = local_freeq;
+ local_freeq = m;
+ pmap_ppl_pages_returned_to_kernel_count++;
+ pmap_ppl_pages_returned_to_kernel_count_total++;
+
+ vm_object_unlock(pmap_object);
+ }
+
+ if (local_freeq) {
+ /* We need to hold the object lock for freeing pages. */
+ vm_object_lock(pmap_object);
+ vm_page_free_list(local_freeq, TRUE);
+ vm_object_unlock(pmap_object);
+ }
+
+ return pmap_ppl_pages_returned_to_kernel_count;
+}
+#endif
static kern_return_t
pmap_pages_alloc(
unsigned size,
unsigned option)
{
+#if XNU_MONITOR
+ if (size != PAGE_SIZE) {
+ panic("%s: size != PAGE_SIZE, "
+ "pa=%p, size=%u, option=%u",
+ __FUNCTION__,
+ pa, size, option);
+ }
+
+ if (option & PMAP_PAGES_RECLAIM_NOWAIT) {
+ *pa = pmap_pages_reclaim();
+ assert(*pa);
+ return KERN_SUCCESS;
+ }
+
+ assert(option & PMAP_PAGES_ALLOCATE_NOWAIT);
+
+ *pa = pmap_get_free_ppl_page();
+
+ if (*pa == 0) {
+ return KERN_RESOURCE_SHORTAGE;
+ } else {
+ return KERN_SUCCESS;
+ }
+#else
vm_page_t m = VM_PAGE_NULL, m_prev;
if (option & PMAP_PAGES_RECLAIM_NOWAIT) {
m = NEXT_PAGE(m_prev);
*(NEXT_PAGE_PTR(m_prev)) = VM_PAGE_NULL;
}
- vm_object_unlock(pmap_object);
-
- OSAddAtomic(size >> PAGE_SHIFT, &inuse_pmap_pages_count);
- OSAddAtomic64(size >> PAGE_SHIFT, &alloc_pmap_pages_count);
+ vm_object_unlock(pmap_object);
+
+ OSAddAtomic(size >> PAGE_SHIFT, &inuse_pmap_pages_count);
+ OSAddAtomic64(size >> PAGE_SHIFT, &alloc_pmap_pages_count);
+
+ return KERN_SUCCESS;
+#endif
+}
+
+#if XNU_MONITOR
+static pmap_paddr_t
+pmap_alloc_page_for_kern(void)
+{
+ pmap_paddr_t paddr = 0;
+ vm_page_t m, m_prev;
+
+ while ((m = vm_page_grab()) == VM_PAGE_NULL) {
+ VM_PAGE_WAIT();
+ }
+
+ vm_page_lock_queues();
+ vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
+ vm_page_unlock_queues();
+
+ paddr = (pmap_paddr_t)ptoa(VM_PAGE_GET_PHYS_PAGE(m));
+
+ if (paddr == 0) {
+ panic("%s: paddr is 0",
+ __FUNCTION__);
+ }
+
+ vm_object_lock(pmap_object);
+
+ while (m != VM_PAGE_NULL) {
+ vm_page_insert_wired(m, pmap_object, (vm_object_offset_t) ((ptoa(VM_PAGE_GET_PHYS_PAGE(m))) - gPhysBase), VM_KERN_MEMORY_PTE);
+ m_prev = m;
+ m = NEXT_PAGE(m_prev);
+ *(NEXT_PAGE_PTR(m_prev)) = VM_PAGE_NULL;
+ }
+
+ vm_object_unlock(pmap_object);
+
+ OSAddAtomic(1, &inuse_pmap_pages_count);
+ OSAddAtomic64(1, &alloc_pmap_pages_count);
+
+ return paddr;
+}
+
+static void
+pmap_alloc_page_for_ppl(void)
+{
+ pmap_mark_page_as_ppl_page(pmap_alloc_page_for_kern());
+}
+
+static pmap_t
+pmap_alloc_pmap(void)
+{
+ pmap_t pmap = PMAP_NULL;
+
+ pmap_simple_lock(&pmap_free_list_lock);
+
+ if (pmap_free_list != PMAP_NULL) {
+ pmap = pmap_free_list;
+ pmap_free_list = *((pmap_t *)pmap);
+
+ if (!PMAP_PTR_IS_VALID(pmap)) {
+ panic("%s: allocated pmap is not valid, pmap=%p",
+ __FUNCTION__, pmap);
+ }
+ }
+
+ pmap_simple_unlock(&pmap_free_list_lock);
+
+ return pmap;
+}
+
+static void
+pmap_free_pmap(pmap_t pmap)
+{
+ if (!PMAP_PTR_IS_VALID(pmap)) {
+ panic("%s: pmap is not valid, "
+ "pmap=%p",
+ __FUNCTION__,
+ pmap);
+ }
- return KERN_SUCCESS;
+ pmap_simple_lock(&pmap_free_list_lock);
+ *((pmap_t *)pmap) = pmap_free_list;
+ pmap_free_list = pmap;
+ pmap_simple_unlock(&pmap_free_list_lock);
}
+static void
+pmap_bootstrap_pmap_free_list(void)
+{
+ pmap_t cur_head = PMAP_NULL;
+ unsigned long i = 0;
+
+ simple_lock_init(&pmap_free_list_lock, 0);
+
+ for (i = 0; i < pmap_array_count; i++) {
+ *((pmap_t *)(&pmap_array[i])) = cur_head;
+ cur_head = &pmap_array[i];
+ }
+
+ pmap_free_list = cur_head;
+}
+#endif
static void
pmap_pages_free(
pmap_simple_unlock(&pmap_pages_lock);
+#if XNU_MONITOR
+ (void)size;
+
+ pmap_give_free_ppl_page(pa);
+#else
vm_page_t m;
pmap_paddr_t pa_max;
vm_page_unlock_queues();
vm_object_unlock(pmap_object);
}
+#endif
}
static inline void
pmap_paddr_t pa;
kern_return_t ret;
+#if XNU_MONITOR
+ /*
+ * The PPL has no guarantee that its allocation
+ * will succeed, so steal pages if necessary to
+ * ensure that we can free up a PV allocation.
+ */
+ ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
+
+ if (ret == KERN_RESOURCE_SHORTAGE) {
+ ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_RECLAIM_NOWAIT);
+ }
+#else
ret = pmap_pages_alloc(&pa, PAGE_SIZE, 0);
+#endif
if (ret != KERN_SUCCESS) {
panic("%s: failed to alloc page, ret=%d, "
PV_ALLOC(pv_entry_t **pv_ep)
{
assert(*pv_ep == PV_ENTRY_NULL);
+#if !XNU_MONITOR
if (pv_kern_free_count < pv_kern_low_water_mark) {
/*
* If the kernel reserved pool is low, let non-kernel mappings wait for a page
*/
return;
}
+#endif
pmap_simple_lock(&pv_free_list_lock);
if ((*pv_ep = pv_free_list) != 0) {
{
kern_return_t kr = KERN_FAILURE;
+#if XNU_MONITOR
+ unsigned int i = 0;
+
+ /*
+ * Allocate the needed PPL pages up front, to minimize the change that
+ * we will need to call into the PPL multiple times.
+ */
+ for (i = 0; i < PV_ALLOC_INITIAL_TARGET; i += (PAGE_SIZE / sizeof(pv_entry_t))) {
+ pmap_alloc_page_for_ppl();
+ }
+
+ for (i = 0; i < PV_KERN_ALLOC_INITIAL_TARGET; i += (PAGE_SIZE / sizeof(pv_entry_t))) {
+ pmap_alloc_page_for_ppl();
+ }
+
+ while ((kr = mapping_free_prime_ppl()) == KERN_RESOURCE_SHORTAGE) {
+ pmap_alloc_page_for_ppl();
+ }
+#else
kr = mapping_free_prime_internal();
+#endif
if (kr != KERN_SUCCESS) {
panic("%s: failed, kr=%d",
pv_cnt = 0;
pv_eh = pv_et = PV_ENTRY_NULL;
+#if XNU_MONITOR
+ if ((ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT)) != KERN_SUCCESS) {
+ return ret;
+ }
+#else
ret = pmap_pages_alloc(&pa, PAGE_SIZE, 0);
assert(ret == KERN_SUCCESS);
+#endif
pv_page_count++;
current_thread()->options |= TH_OPT_VMPRIV;
for (;;) {
+#if XNU_MONITOR
+
+ while ((kr = mapping_replenish_ppl(pv_kern_low_water_mark, pv_low_water_mark)) == KERN_RESOURCE_SHORTAGE) {
+ pmap_alloc_page_for_ppl();
+ }
+#else
kr = mapping_replenish_internal(pv_kern_low_water_mark, pv_low_water_mark);
+#endif
if (kr != KERN_SUCCESS) {
panic("%s: failed, kr=%d", __FUNCTION__, kr);
#endif
+#if __APRR_SUPPORTED__
+/*
+ * Indicates whether the given PTE has special restrictions due to the current
+ * APRR settings.
+ */
+static boolean_t
+is_pte_aprr_protected(pt_entry_t pte)
+{
+ uint64_t aprr_el0_value;
+ uint64_t aprr_el1_value;
+ uint64_t aprr_index;
+
+ MRS(aprr_el0_value, APRR_EL0);
+ MRS(aprr_el1_value, APRR_EL1);
+ aprr_index = PTE_TO_APRR_INDEX(pte);
+
+ /* Check to see if this mapping had APRR restrictions. */
+ if ((APRR_EXTRACT_IDX_ATTR(aprr_el0_value, aprr_index) != APRR_EXTRACT_IDX_ATTR(APRR_EL0_RESET, aprr_index)) ||
+ (APRR_EXTRACT_IDX_ATTR(aprr_el1_value, aprr_index) != APRR_EXTRACT_IDX_ATTR(APRR_EL1_RESET, aprr_index))
+ ) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+#endif /* __APRR_SUPPORTED__ */
+
+#if __APRR_SUPPORTED__
+static boolean_t
+is_pte_xprr_protected(pt_entry_t pte)
+{
+#if __APRR_SUPPORTED__
+ return is_pte_aprr_protected(pte);
+#else /* __APRR_SUPPORTED__ */
+#error "XPRR configuration error"
+#endif /* __APRR_SUPPORTED__ */
+}
+#endif /* __APRR_SUPPORTED__*/
+#if __APRR_SUPPORTED__
+static uint64_t
+__unused pte_to_xprr_perm(pt_entry_t pte)
+{
+#if __APRR_SUPPORTED__
+ switch (PTE_TO_APRR_INDEX(pte)) {
+ case APRR_FIRM_RX_INDEX: return XPRR_FIRM_RX_PERM;
+ case APRR_FIRM_RO_INDEX: return XPRR_FIRM_RO_PERM;
+ case APRR_PPL_RW_INDEX: return XPRR_PPL_RW_PERM;
+ case APRR_KERN_RW_INDEX: return XPRR_KERN_RW_PERM;
+ case APRR_FIRM_RW_INDEX: return XPRR_FIRM_RW_PERM;
+ case APRR_KERN0_RW_INDEX: return XPRR_KERN0_RW_PERM;
+ case APRR_USER_JIT_INDEX: return XPRR_USER_JIT_PERM;
+ case APRR_USER_RW_INDEX: return XPRR_USER_RW_PERM;
+ case APRR_PPL_RX_INDEX: return XPRR_PPL_RX_PERM;
+ case APRR_KERN_RX_INDEX: return XPRR_KERN_RX_PERM;
+ case APRR_PPL_RO_INDEX: return XPRR_PPL_RO_PERM;
+ case APRR_KERN_RO_INDEX: return XPRR_KERN_RO_PERM;
+ case APRR_KERN0_RX_INDEX: return XPRR_KERN0_RO_PERM;
+ case APRR_KERN0_RO_INDEX: return XPRR_KERN0_RO_PERM;
+ case APRR_USER_RX_INDEX: return XPRR_USER_RX_PERM;
+ case APRR_USER_RO_INDEX: return XPRR_USER_RO_PERM;
+ default: return XPRR_MAX_PERM;
+ }
+#else
+#error "XPRR configuration error"
+#endif /**/
+}
+#if __APRR_SUPPORTED__
+static uint64_t
+xprr_perm_to_aprr_index(uint64_t perm)
+{
+ switch (perm) {
+ case XPRR_FIRM_RX_PERM: return APRR_FIRM_RX_INDEX;
+ case XPRR_FIRM_RO_PERM: return APRR_FIRM_RO_INDEX;
+ case XPRR_PPL_RW_PERM: return APRR_PPL_RW_INDEX;
+ case XPRR_KERN_RW_PERM: return APRR_KERN_RW_INDEX;
+ case XPRR_FIRM_RW_PERM: return APRR_FIRM_RW_INDEX;
+ case XPRR_KERN0_RW_PERM: return APRR_KERN0_RW_INDEX;
+ case XPRR_USER_JIT_PERM: return APRR_USER_JIT_INDEX;
+ case XPRR_USER_RW_PERM: return APRR_USER_RW_INDEX;
+ case XPRR_PPL_RX_PERM: return APRR_PPL_RX_INDEX;
+ case XPRR_KERN_RX_PERM: return APRR_KERN_RX_INDEX;
+ case XPRR_PPL_RO_PERM: return APRR_PPL_RO_INDEX;
+ case XPRR_KERN_RO_PERM: return APRR_KERN_RO_INDEX;
+ case XPRR_KERN0_RX_PERM: return APRR_KERN0_RO_INDEX;
+ case XPRR_KERN0_RO_PERM: return APRR_KERN0_RO_INDEX;
+ case XPRR_USER_RX_PERM: return APRR_USER_RX_INDEX;
+ case XPRR_USER_RO_PERM: return APRR_USER_RO_INDEX;
+ default: return APRR_MAX_INDEX;
+ }
+}
+#endif /* __APRR_SUPPORTED__ */
+
+static pt_entry_t
+__unused xprr_perm_to_pte(uint64_t perm)
+{
+#if __APRR_SUPPORTED__
+ return APRR_INDEX_TO_PTE(xprr_perm_to_aprr_index(perm));
+#else
+#error "XPRR configuration error"
+#endif /**/
+}
+#endif /* __APRR_SUPPORTED__*/
/*
lck_grp_init(&pmap_lck_grp, "pmap", LCK_GRP_ATTR_NULL);
+#if XNU_MONITOR
+
+#if DEVELOPMENT || DEBUG
+ PE_parse_boot_argn("-unsafe_kernel_text", &pmap_ppl_disable, sizeof(pmap_ppl_disable));
+#endif
+
+ simple_lock_init(&pmap_ppl_free_page_lock, 0);
+
+#if __APRR_SUPPORTED__
+ if (((uintptr_t)(&ppl_trampoline_start)) % PAGE_SIZE) {
+ panic("%s: ppl_trampoline_start is not page aligned, "
+ "vstart=%#lx",
+ __FUNCTION__,
+ vstart);
+ }
+
+ if (((uintptr_t)(&ppl_trampoline_end)) % PAGE_SIZE) {
+ panic("%s: ppl_trampoline_end is not page aligned, "
+ "vstart=%#lx",
+ __FUNCTION__,
+ vstart);
+ }
+#endif /* __APRR_SUPPORTED__ */
+#endif /* XNU_MONITOR */
#if DEVELOPMENT || DEBUG
if (PE_parse_boot_argn("pmap_trace", &pmap_trace_mask, sizeof(pmap_trace_mask))) {
pmap_load_io_rgns();
ptd_bootstrap(ptd_root_table, (unsigned int)(ptd_root_table_size / sizeof(pt_desc_t)));
+#if XNU_MONITOR
+ pmap_array_begin = (void *)phystokv(avail_start);
+ pmap_array = pmap_array_begin;
+ avail_start += round_page(MAX_ASID * sizeof(struct pmap));
+ pmap_array_end = (void *)phystokv(avail_start);
+
+ pmap_array_count = ((pmap_array_end - pmap_array_begin) / sizeof(struct pmap));
+
+ pmap_bootstrap_pmap_free_list();
+
+ pmap_ledger_ptr_array_begin = (void *)phystokv(avail_start);
+ pmap_ledger_ptr_array = pmap_ledger_ptr_array_begin;
+ avail_start += round_page(MAX_PMAP_LEDGERS * sizeof(void*));
+ pmap_ledger_ptr_array_end = (void *)phystokv(avail_start);
+
+ pmap_ledger_refcnt_begin = (void *)phystokv(avail_start);
+ pmap_ledger_refcnt = pmap_ledger_refcnt_begin;
+ avail_start += round_page(MAX_PMAP_LEDGERS * sizeof(os_refcnt_t));
+ pmap_ledger_refcnt_end = (void *)phystokv(avail_start);
+
+ simple_lock_init(&pmap_ledger_lock, 0);
+#endif
pmap_cpu_data_array_init();
vm_first_phys = gPhysBase;
#endif /* KASAN */
}
+#if XNU_MONITOR
+
+static inline void
+pa_set_range_monitor(pmap_paddr_t start_pa, pmap_paddr_t end_pa)
+{
+ pmap_paddr_t cur_pa;
+ for (cur_pa = start_pa; cur_pa < end_pa; cur_pa += ARM_PGBYTES) {
+ assert(pa_valid(cur_pa));
+ pa_set_monitor(cur_pa);
+ }
+}
+
+static void
+pa_set_range_xprr_perm(pmap_paddr_t start_pa,
+ pmap_paddr_t end_pa,
+ unsigned int expected_perm,
+ unsigned int new_perm)
+{
+ vm_offset_t start_va = phystokv(start_pa);
+ vm_offset_t end_va = start_va + (end_pa - start_pa);
+
+ pa_set_range_monitor(start_pa, end_pa);
+ pmap_set_range_xprr_perm(start_va, end_va, expected_perm, new_perm);
+}
+
+void
+pmap_static_allocations_done(void)
+{
+ pmap_paddr_t monitor_start_pa;
+ pmap_paddr_t monitor_end_pa;
+
+ /*
+ * We allocate memory for bootstrap starting at topOfKernelData (which
+ * is at the end of the device tree and ramdisk data, if applicable).
+ * We use avail_start as a pointer to the first address that has not
+ * been reserved for bootstrap, so we know which pages to give to the
+ * virtual memory layer.
+ *
+ * These bootstrap allocations will be used primarily for page tables.
+ * If we wish to secure the page tables, we need to start by marking
+ * these bootstrap allocations as pages that we want to protect.
+ */
+ monitor_start_pa = BootArgs->topOfKernelData;
+ monitor_end_pa = BootArgs->topOfKernelData + BOOTSTRAP_TABLE_SIZE;
+
+ /* The bootstrap page tables are mapped RO at boostrap. */
+ pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RO_PERM, XPRR_PPL_RO_PERM);
+
+ monitor_start_pa = BootArgs->topOfKernelData + BOOTSTRAP_TABLE_SIZE;
+ monitor_end_pa = avail_start;
+
+ /* The other bootstrap allocations are mapped RW at bootstrap. */
+ pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
+
+ /* The RO page tables are mapped RW at bootstrap. */
+ monitor_start_pa = kvtophys((vm_offset_t)&ropagetable_begin);
+ monitor_end_pa = monitor_start_pa + ((vm_offset_t)&ropagetable_end - (vm_offset_t)&ropagetable_begin);
+ pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
+
+ monitor_start_pa = kvtophys(segPPLDATAB);
+ monitor_end_pa = monitor_start_pa + segSizePPLDATA;
+
+ /* PPL data is RW for the PPL, RO for the kernel. */
+ pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
+
+ monitor_start_pa = kvtophys(segPPLTEXTB);
+ monitor_end_pa = monitor_start_pa + segSizePPLTEXT;
+
+ /* PPL text is RX for the PPL, RO for the kernel. */
+ pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RX_PERM, XPRR_PPL_RX_PERM);
+
+#if __APRR_SUPPORTED__
+ monitor_start_pa = kvtophys(segPPLTRAMPB);
+ monitor_end_pa = monitor_start_pa + segSizePPLTRAMP;
+
+ /*
+ * The PPLTRAMP pages will be a mix of PPL RX/kernel RO and
+ * PPL RX/kernel RX. However, all of these pages belong to the PPL.
+ */
+ pa_set_range_monitor(monitor_start_pa, monitor_end_pa);
+#endif
+
+ /*
+ * In order to support DTrace, the save areas for the PPL must be
+ * writable. This is due to the fact that DTrace will try to update
+ * register state.
+ */
+ if (pmap_ppl_disable) {
+ vm_offset_t monitor_start_va = phystokv(ppl_cpu_save_area_start);
+ vm_offset_t monitor_end_va = monitor_start_va + (ppl_cpu_save_area_end - ppl_cpu_save_area_start);
+
+ pmap_set_range_xprr_perm(monitor_start_va, monitor_end_va, XPRR_PPL_RW_PERM, XPRR_KERN_RW_PERM);
+ }
+
+#if __APRR_SUPPORTED__
+ /* The trampoline must also be specially protected. */
+ pmap_set_range_xprr_perm((vm_offset_t)&ppl_trampoline_start, (vm_offset_t)&ppl_trampoline_end, XPRR_KERN_RX_PERM, XPRR_PPL_RX_PERM);
+#endif
+
+ if (segSizePPLDATACONST > 0) {
+ monitor_start_pa = kvtophys(segPPLDATACONSTB);
+ monitor_end_pa = monitor_start_pa + segSizePPLDATACONST;
+
+ pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RO_PERM, XPRR_PPL_RO_PERM);
+ }
+
+ /*
+ * Mark the original physical aperture mapping for the PPL stack pages RO as an additional security
+ * precaution. The real RW mappings are at a different location with guard pages.
+ */
+ pa_set_range_xprr_perm(pmap_stacks_start_pa, pmap_stacks_end_pa, XPRR_PPL_RW_PERM, XPRR_PPL_RO_PERM);
+}
+
+
+void
+pmap_lockdown_ppl(void)
+{
+ /* Mark the PPL as being locked down. */
+
+#if __APRR_SUPPORTED__
+ pmap_ppl_locked_down = TRUE;
+ /* Force a trap into to the PPL to update APRR_EL1. */
+ pmap_return(FALSE, FALSE);
+#else
+#error "XPRR configuration error"
+#endif /* __APRR_SUPPORTED__ */
+
+}
+#endif /* XNU_MONITOR */
void
pmap_virtual_space(
PAGE_SIZE, "pmap");
}
+#if XNU_MONITOR
+MARK_AS_PMAP_TEXT static void
+pmap_ledger_alloc_init_internal(size_t size)
+{
+ pmap_simple_lock(&pmap_ledger_lock);
+
+ if (pmap_ledger_alloc_initialized) {
+ panic("%s: already initialized, "
+ "size=%lu",
+ __func__,
+ size);
+ }
+
+ if (size != sizeof(pmap_ledger_data_t)) {
+ panic("%s: size mismatch, expected %lu, "
+ "size=%lu",
+ __func__, PMAP_LEDGER_DATA_BYTES,
+ size);
+ }
+
+ pmap_ledger_alloc_initialized = true;
+
+ pmap_simple_unlock(&pmap_ledger_lock);
+}
+
+MARK_AS_PMAP_TEXT static ledger_t
+pmap_ledger_alloc_internal(void)
+{
+ pmap_paddr_t paddr;
+ uint64_t vaddr, vstart, vend;
+ uint64_t index;
+
+ ledger_t new_ledger;
+ uint64_t array_index;
+
+ pmap_simple_lock(&pmap_ledger_lock);
+ if (pmap_ledger_free_list == NULL) {
+ paddr = pmap_get_free_ppl_page();
+
+ if (paddr == 0) {
+ pmap_simple_unlock(&pmap_ledger_lock);
+ return NULL;
+ }
+
+ vstart = phystokv(paddr);
+ vend = vstart + PAGE_SIZE;
+
+ for (vaddr = vstart; (vaddr < vend) && ((vaddr + sizeof(pmap_ledger_t)) <= vend); vaddr += sizeof(pmap_ledger_t)) {
+ pmap_ledger_t *free_ledger;
+
+ index = pmap_ledger_ptr_array_free_index++;
+
+ if (index >= MAX_PMAP_LEDGERS) {
+ panic("%s: pmap_ledger_ptr_array is full, index=%llu",
+ __func__, index);
+ }
+
+ free_ledger = (pmap_ledger_t*)vaddr;
+
+ pmap_ledger_ptr_array[index] = free_ledger;
+ free_ledger->back_ptr = &pmap_ledger_ptr_array[index];
+
+ free_ledger->next = pmap_ledger_free_list;
+ pmap_ledger_free_list = free_ledger;
+ }
+
+ pa_set_range_xprr_perm(paddr, paddr + PAGE_SIZE, XPRR_PPL_RW_PERM, XPRR_KERN_RW_PERM);
+ }
+
+ new_ledger = (ledger_t)pmap_ledger_free_list;
+ pmap_ledger_free_list = pmap_ledger_free_list->next;
+
+ array_index = pmap_ledger_validate(new_ledger);
+ os_ref_init(&pmap_ledger_refcnt[array_index], NULL);
+
+ pmap_simple_unlock(&pmap_ledger_lock);
+
+ return new_ledger;
+}
+
+MARK_AS_PMAP_TEXT static void
+pmap_ledger_free_internal(ledger_t ledger)
+{
+ pmap_ledger_t* free_ledger;
+
+ free_ledger = (pmap_ledger_t*)ledger;
+
+ pmap_simple_lock(&pmap_ledger_lock);
+ uint64_t array_index = pmap_ledger_validate(ledger);
+
+ if (os_ref_release(&pmap_ledger_refcnt[array_index]) != 0) {
+ panic("%s: ledger still referenced, "
+ "ledger=%p",
+ __func__,
+ ledger);
+ }
+
+ free_ledger->next = pmap_ledger_free_list;
+ pmap_ledger_free_list = free_ledger;
+ pmap_simple_unlock(&pmap_ledger_lock);
+}
+
+
+static void
+pmap_ledger_retain(ledger_t ledger)
+{
+ pmap_simple_lock(&pmap_ledger_lock);
+ uint64_t array_index = pmap_ledger_validate(ledger);
+ os_ref_retain(&pmap_ledger_refcnt[array_index]);
+ pmap_simple_unlock(&pmap_ledger_lock);
+}
+
+static void
+pmap_ledger_release(ledger_t ledger)
+{
+ pmap_simple_lock(&pmap_ledger_lock);
+ uint64_t array_index = pmap_ledger_validate(ledger);
+ os_ref_release_live(&pmap_ledger_refcnt[array_index]);
+ pmap_simple_unlock(&pmap_ledger_lock);
+}
+
+void
+pmap_ledger_alloc_init(size_t size)
+{
+ pmap_ledger_alloc_init_ppl(size);
+}
+
+ledger_t
+pmap_ledger_alloc(void)
+{
+ ledger_t retval = NULL;
+
+ while ((retval = pmap_ledger_alloc_ppl()) == NULL) {
+ pmap_alloc_page_for_ppl();
+ }
+
+ return retval;
+}
+
+void
+pmap_ledger_free(ledger_t ledger)
+{
+ pmap_ledger_free_ppl(ledger);
+}
+#else /* XNU_MONITOR */
__dead2
void
pmap_ledger_alloc_init(size_t size)
"ledger=%p",
__func__, ledger);
}
+#endif /* XNU_MONITOR */
/*
* Create and return a physical map.
return PMAP_NULL;
}
+#if XNU_MONITOR
+ if ((p = pmap_alloc_pmap()) == PMAP_NULL) {
+ return PMAP_NULL;
+ }
+#else
/*
* Allocate a pmap struct from the pmap_zone. Then allocate
* the translation table of the right size for the pmap.
if ((p = (pmap_t) zalloc(pmap_zone)) == PMAP_NULL) {
return PMAP_NULL;
}
+#endif
if (flags & PMAP_CREATE_64BIT) {
p->min = MACH_VM_MIN_ADDRESS;
}
+#if XNU_MONITOR
+ if (ledger) {
+ pmap_ledger_validate(ledger);
+ pmap_ledger_retain(ledger);
+ }
+#endif /* XNU_MONITOR */
p->ledger = ledger;
p->tte_index_max = tte_index_max;
#endif
+#if XNU_MONITOR
+ p->tte = pmap_tt1_allocate(p, PMAP_ROOT_ALLOC_SIZE, PMAP_TT_ALLOCATE_NOWAIT);
+#else
p->tte = pmap_tt1_allocate(p, PMAP_ROOT_ALLOC_SIZE, 0);
+#endif
if (!(p->tte)) {
goto tt1_alloc_fail;
}
tt1_alloc_fail:
pmap_get_pt_ops(p)->free_id(p);
id_alloc_fail:
+#if XNU_MONITOR
+ pmap_free_pmap(p);
+
+ if (ledger) {
+ pmap_ledger_release(ledger);
+ }
+#else
zfree(pmap_zone, p);
+#endif
return PMAP_NULL;
}
ledger_reference(ledger);
+#if XNU_MONITOR
+ /*
+ * TODO: It should be valid for pmap_create_options_internal to fail; we could
+ * be out of ASIDs.
+ */
+ while ((pmap = pmap_create_options_ppl(ledger, size, flags)) == PMAP_NULL) {
+ pmap_alloc_page_for_ppl();
+ }
+#else
pmap = pmap_create_options_internal(ledger, size, flags);
+#endif
if (pmap == PMAP_NULL) {
ledger_dereference(ledger);
return pmap;
}
-#if MACH_ASSERT
+#if XNU_MONITOR
+/*
+ * This symbol remains in place when the PPL is enabled so that the dispatch
+ * table does not change from development to release configurations.
+ */
+#endif
+#if MACH_ASSERT || XNU_MONITOR
MARK_AS_PMAP_TEXT static void
pmap_set_process_internal(
__unused pmap_t pmap,
}
#endif /* MACH_ASSERT */
}
-#endif /* MACH_ASSERT*/
+#endif /* MACH_ASSERT || XNU_MONITOR */
#if MACH_ASSERT
void
int pid,
char *procname)
{
+#if XNU_MONITOR
+ pmap_set_process_ppl(pmap, pid, procname);
+#else
pmap_set_process_internal(pmap, pid, procname);
+#endif
}
#endif /* MACH_ASSERT */
pmap_check_ledgers(pmap);
if (pmap->nested_region_asid_bitmap) {
+#if XNU_MONITOR
+ pmap_pages_free(kvtophys((vm_offset_t)(pmap->nested_region_asid_bitmap)), PAGE_SIZE);
+#else
kfree(pmap->nested_region_asid_bitmap, pmap->nested_region_asid_bitmap_size * sizeof(unsigned int));
+#endif
}
+#if XNU_MONITOR
+ if (pmap->ledger) {
+ pmap_ledger_release(pmap->ledger);
+ }
+
+ pmap_free_pmap(pmap);
+#else
zfree(pmap_zone, pmap);
+#endif
}
void
ledger = pmap->ledger;
+#if XNU_MONITOR
+ pmap_destroy_ppl(pmap);
+
+ pmap_check_ledger_fields(ledger);
+#else
pmap_destroy_internal(pmap);
+#endif
ledger_dereference(ledger);
pmap_reference(
pmap_t pmap)
{
+#if XNU_MONITOR
+ pmap_reference_ppl(pmap);
+#else
pmap_reference_internal(pmap);
+#endif
}
static tt_entry_t *
return (tt_entry_t *)0;
}
+#if XNU_MONITOR
+ assert(pa);
+#endif
if (size < PAGE_SIZE) {
va = phystokv(pa) + size;
*ttp = (tt_entry_t *)phystokv(pa);
}
+#if XNU_MONITOR
+ assert(*ttp);
+#endif
return KERN_SUCCESS;
}
pv_h = pai_to_pvh(pai);
vm_offset_t pvh_flags = pvh_get_flags(pv_h);
+#if XNU_MONITOR
+ if (pvh_flags & PVH_FLAG_LOCKDOWN) {
+ panic("%d is locked down (%#lx), cannot remove", pai, pvh_flags);
+ }
+#endif
if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
if (__builtin_expect((cpte != pvh_ptep(pv_h)), 0)) {
//assert(!ARM_PTE_IS_COMPRESSED(spte));
pa = pte_to_pa(spte);
if (!pa_valid(pa)) {
+#if XNU_MONITOR || HAS_MILD_DSB
+ unsigned int cacheattr = pmap_cache_attributes((ppnum_t)atop(pa));
+#endif
+#if XNU_MONITOR
+ if (!pmap_ppl_disable && (cacheattr & PP_ATTR_MONITOR)) {
+ panic("%s: attempt to remove mapping of PPL-protected I/O address 0x%llx", __func__, (uint64_t)pa);
+ }
+#endif
break;
}
pai = (int)pa_index(pa);
l = end;
}
+#if XNU_MONITOR
+ remove_count += pmap_remove_options_ppl(pmap, va, l, options);
+
+ pmap_ledger_check_balance(pmap);
+#else
remove_count += pmap_remove_options_internal(pmap, va, l, options);
+#endif
va = l;
}
pmap_t pmap)
{
PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
+#if XNU_MONITOR
+ pmap_switch_ppl(pmap);
+#else
pmap_switch_internal(pmap);
+#endif
PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_END);
}
pv_h = pai_to_pvh(pai);
pvh_flags = pvh_get_flags(pv_h);
+#if XNU_MONITOR
+ if (remove && (pvh_flags & PVH_FLAG_LOCKDOWN)) {
+ panic("%d is locked down (%#llx), cannot remove", pai, pvh_get_flags(pv_h));
+ }
+#endif
pte_p = PT_ENTRY_NULL;
pve_p = PV_ENTRY_NULL;
#ifdef PVH_FLAG_IOMMU
if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
+#if XNU_MONITOR
+ if (pvh_flags & PVH_FLAG_LOCKDOWN) {
+ panic("pmap_page_protect: ppnum 0x%x locked down, cannot be owned by iommu 0x%llx, pve_p=%p",
+ ppnum, (uint64_t)pte_p & ~PVH_FLAG_IOMMU, pve_p);
+ }
+#endif
if (remove) {
if (options & PMAP_OPTIONS_COMPRESSOR) {
panic("pmap_page_protect: attempt to compress ppnum 0x%x owned by iommu 0x%llx, pve_p=%p",
tmplate |= pt_attr_leaf_xn(pt_attr);
}
+#if __APRR_SUPPORTED__
+ if (__improbable(is_pte_xprr_protected(spte))) {
+ panic("pmap_page_protect: modifying an xPRR mapping pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx ppnum: 0x%x",
+ pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)spte, (uint64_t)tmplate, (uint64_t)va, ppnum);
+ }
+
+ if (__improbable(is_pte_xprr_protected(tmplate))) {
+ panic("pmap_page_protect: creating an xPRR mapping pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx ppnum: 0x%x",
+ pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)spte, (uint64_t)tmplate, (uint64_t)va, ppnum);
+ }
+#endif /* __APRR_SUPPORTED__*/
if (*pte_p != ARM_PTE_TYPE_FAULT &&
!ARM_PTE_IS_COMPRESSED(*pte_p, pte_p) &&
PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, ppnum, prot);
+#if XNU_MONITOR
+ pmap_page_protect_options_ppl(ppnum, prot, options);
+#else
pmap_page_protect_options_internal(ppnum, prot, options);
+#endif
PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END);
}
/* We do not expect to write fast fault the entry. */
pte_set_was_writeable(tmplate, false);
+#if __APRR_SUPPORTED__
+ if (__improbable(is_pte_xprr_protected(spte) && (pte_to_xprr_perm(spte) != XPRR_USER_JIT_PERM))) {
+ /* Only test for PPL protection here, User-JIT mappings may be mutated by this function. */
+ panic("%s: modifying a PPL mapping pte_p=%p pmap=%p prot=%d options=%u, pte=0x%llx, tmplate=0x%llx",
+ __func__, pte_p, pmap, prot, options, (uint64_t)spte, (uint64_t)tmplate);
+ }
+
+ if (__improbable(is_pte_xprr_protected(tmplate))) {
+ panic("%s: creating an xPRR mapping pte_p=%p pmap=%p prot=%d options=%u, pte=0x%llx, tmplate=0x%llx",
+ __func__, pte_p, pmap, prot, options, (uint64_t)spte, (uint64_t)tmplate);
+ }
+#endif /* __APRR_SUPPORTED__*/
WRITE_PTE_FAST(pte_p, tmplate);
if (managed) {
l = e;
}
+#if XNU_MONITOR
+ pmap_protect_options_ppl(pmap, beg, l, prot, options, args);
+#else
pmap_protect_options_internal(pmap, beg, l, prot, options, args);
+#endif
beg = l;
}
vm_offset_t pvh_flags = pvh_get_flags(pv_h);
+#if XNU_MONITOR
+ if (pvh_flags & PVH_FLAG_LOCKDOWN) {
+ panic("%d is locked down (%#lx), cannot enter", pai, pvh_flags);
+ }
+#endif
#ifdef PVH_FLAG_CPU
/* An IOMMU mapping may already be present for a page that hasn't yet
pte &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
pte |= pmap_get_pt_ops(pmap)->wimg_to_pte(wimg_bits);
+#if XNU_MONITOR
+ /* The regular old kernel is not allowed to remap PPL pages. */
+ if (pa_test_monitor(pa)) {
+ panic("%s: page belongs to PPL, "
+ "pmap=%p, v=0x%llx, pn=%u, prot=0x%x, fault_type=0x%x, flags=0x%x, wired=%u, options=0x%x",
+ __FUNCTION__,
+ pmap, v, pn, prot, fault_type, flags, wired, options);
+ }
+
+ if (pvh_get_flags(pai_to_pvh(pai)) & PVH_FLAG_LOCKDOWN) {
+ panic("%s: page locked down, "
+ "pmap=%p, v=0x%llx, pn=%u, prot=0x%x, fault_type=0x%x, flags=0x%x, wired=%u, options=0x%x",
+ __FUNCTION__,
+ pmap, v, pn, prot, fault_type, flags, wired, options);
+ }
+#endif
if (pte == *pte_p) {
pte |= pmap_get_pt_ops(pmap)->wimg_to_pte(wimg_bits);
+#if XNU_MONITOR
+ if (!pmap_ppl_disable && (wimg_bits & PP_ATTR_MONITOR)) {
+ uint64_t xprr_perm = pte_to_xprr_perm(pte);
+ pte &= ~ARM_PTE_XPRR_MASK;
+ switch (xprr_perm) {
+ case XPRR_KERN_RO_PERM:
+ pte |= xprr_perm_to_pte(XPRR_PPL_RO_PERM);
+ break;
+ case XPRR_KERN_RW_PERM:
+ pte |= xprr_perm_to_pte(XPRR_PPL_RW_PERM);
+ break;
+ default:
+ panic("Unsupported xPRR perm %llu for pte 0x%llx", xprr_perm, (uint64_t)pte);
+ }
+ }
+#endif
pmap_enter_pte(pmap, pte_p, pte, v);
}
PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v), pn, prot);
+#if XNU_MONITOR
+ if (options & PMAP_OPTIONS_NOWAIT) {
+ /* If NOWAIT was requested, just return the result. */
+ kr = pmap_enter_options_ppl(pmap, v, pn, prot, fault_type, flags, wired, options);
+ } else {
+ /*
+ * If NOWAIT was not requested, loop until the enter does not
+ * fail due to lack of resources.
+ */
+ while ((kr = pmap_enter_options_ppl(pmap, v, pn, prot, fault_type, flags, wired, options | PMAP_OPTIONS_NOWAIT)) == KERN_RESOURCE_SHORTAGE) {
+ pv_water_mark_check();
+ pmap_alloc_page_for_ppl();
+ }
+ }
+
+ pmap_ledger_check_balance(pmap);
+#else
kr = pmap_enter_options_internal(pmap, v, pn, prot, fault_type, flags, wired, options);
+#endif
pv_water_mark_check();
PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, kr);
vm_map_address_t v,
boolean_t wired)
{
+#if XNU_MONITOR
+ pmap_change_wiring_ppl(pmap, v, wired);
+
+ pmap_ledger_check_balance(pmap);
+#else
pmap_change_wiring_internal(pmap, v, wired);
+#endif
}
MARK_AS_PMAP_TEXT static ppnum_t
}
if (not_in_kdp) {
+#if XNU_MONITOR
+ return pmap_find_phys_ppl(pmap, va);
+#else
return pmap_find_phys_internal(pmap, va);
+#endif
} else {
return pmap_vtophys(pmap, va);
}
return pa;
}
+#if XNU_MONITOR
+ return pmap_extract_ppl(pmap, va);
+#else
return pmap_extract_internal(pmap, va);
+#endif
}
/*
if (options & PMAP_OPTIONS_NOWAIT) {
return KERN_RESOURCE_SHORTAGE;
}
+#if XNU_MONITOR
+ panic("%s: failed to allocate tt, "
+ "pmap=%p, v=%p, options=0x%x, level=%u",
+ __FUNCTION__,
+ pmap, (void *)v, options, level);
+#else
VM_PAGE_WAIT();
+#endif
}
PMAP_LOCK(pmap);
if ((pmap_ttne(pmap, ttlevel + 1, v) == PT_ENTRY_NULL)) {
pmap_gc(
void)
{
+#if XNU_MONITOR
+ /*
+ * We cannot invoke the scheduler from the PPL, so for now we elide the
+ * GC logic if the PPL is enabled.
+ */
+#endif
+#if !XNU_MONITOR
pmap_t pmap, pmap_next;
boolean_t gc_wait;
}
pmap_simple_unlock(&pmaps_lock);
}
+#endif
}
/*
uint64_t
pmap_release_pages_fast(void)
{
+#if XNU_MONITOR
+ return pmap_release_ppl_pages_to_kernel();
+#else /* XNU_MONITOR */
return 0;
+#endif
}
/*
pmap_paddr_t pa = ptoa(pn);
vm_prot_t allow_mode = VM_PROT_ALL;
+#if XNU_MONITOR
+ if (bits & PP_ATTR_PPL_OWNED_BITS) {
+ panic("%s: illegal request, "
+ "pn=%u, bits=%#x, options=%#x, arg=%p",
+ __FUNCTION__,
+ pn, bits, options, arg);
+ }
+#endif
if ((bits & PP_ATTR_MODIFIED) &&
(options & PMAP_OPTIONS_NOFLUSH) &&
*/
PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, pn, bits);
+#if XNU_MONITOR
+ phys_attribute_clear_ppl(pn, bits, options, arg);
+#else
phys_attribute_clear_internal(pn, bits, options, arg);
+#endif
PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END);
}
pmap_paddr_t pa = ptoa(pn);
assert(pn != vm_page_fictitious_addr);
+#if XNU_MONITOR
+ if (bits & PP_ATTR_PPL_OWNED_BITS) {
+ panic("%s: illegal request, "
+ "pn=%u, bits=%#x",
+ __FUNCTION__,
+ pn, bits);
+ }
+#endif
pa_set_bits(pa, bits);
ppnum_t pn,
unsigned int bits)
{
+#if XNU_MONITOR
+ phys_attribute_set_ppl(pn, bits);
+#else
phys_attribute_set_internal(pn, bits);
+#endif
}
#endif
}
+#if XNU_MONITOR
+boolean_t
+pmap_is_monitor(ppnum_t pn)
+{
+ assert(pa_valid(ptoa(pn)));
+ return phys_attribute_test(pn, PP_ATTR_MONITOR);
+}
+#endif
void
pmap_lock_phys_page(ppnum_t pn)
{
+#if !XNU_MONITOR
int pai;
pmap_paddr_t phys = ptoa(pn);
pai = (int)pa_index(phys);
LOCK_PVH(pai);
} else
+#else
+ (void)pn;
+#endif
{ simple_lock(&phys_backup_lock, LCK_GRP_NULL);}
}
void
pmap_unlock_phys_page(ppnum_t pn)
{
+#if !XNU_MONITOR
int pai;
pmap_paddr_t phys = ptoa(pn);
pai = (int)pa_index(phys);
UNLOCK_PVH(pai);
} else
+#else
+ (void)pn;
+#endif
{ simple_unlock(&phys_backup_lock);}
}
pmap_t pmap)
{
PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
+#if XNU_MONITOR
+ pmap_switch_user_ttb_ppl(pmap);
+#else
pmap_switch_user_ttb_internal(pmap);
+#endif
PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB) | DBG_FUNC_END);
}
void
pmap_clear_user_ttb(void)
{
+#if XNU_MONITOR
+ pmap_clear_user_ttb_ppl();
+#else
pmap_clear_user_ttb_internal();
+#endif
}
/*
}
}
+#if MACH_ASSERT && XNU_MONITOR
+ if (is_pte_xprr_protected(spte)) {
+ if (pte_to_xprr_perm(spte) != pte_to_xprr_perm(tmplate)) {
+ panic("%s: attempted to mutate an xPRR mapping pte_p=%p, pmap=%p, pv_h=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx, "
+ "ppnum=0x%x, options=0x%x, allow_mode=0x%x",
+ __FUNCTION__, pte_p, pmap, pv_h, pve_p, (unsigned long long)spte, (unsigned long long)tmplate, (unsigned long long)va,
+ ppnum, options, allow_mode);
+ }
+ }
+#endif /* MACH_ASSERT && XNU_MONITOR */
if (update_pte) {
if (*pte_p != ARM_PTE_TYPE_FAULT &&
return FALSE; /* Not a managed page. */
}
+#if XNU_MONITOR
+ return arm_force_fast_fault_ppl(ppnum, allow_mode, options);
+#else
return arm_force_fast_fault_internal(ppnum, allow_mode, options);
+#endif
}
/*
}
}
+#if MACH_ASSERT && XNU_MONITOR
+ if (is_pte_xprr_protected(spte)) {
+ if (pte_to_xprr_perm(spte) != pte_to_xprr_perm(tmplate)) {
+ panic("%s: attempted to mutate an xPRR mapping pte_p=%p, pmap=%p, pv_h=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx, "
+ "ppnum=0x%x, fault_type=0x%x",
+ __FUNCTION__, pte_p, pmap, pv_h, pve_p, (unsigned long long)spte, (unsigned long long)tmplate, (unsigned long long)va,
+ ppnum, fault_type);
+ }
+ }
+#endif /* MACH_ASSERT && XNU_MONITOR */
if (spte != tmplate) {
if (spte != ARM_PTE_TYPE_FAULT) {
if (!pa_valid(pa)) {
PMAP_UNLOCK(pmap);
+#if XNU_MONITOR
+ if (pmap_cache_attributes((ppnum_t)atop(pa)) & PP_ATTR_MONITOR) {
+ return KERN_PROTECTION_FAILURE;
+ } else
+#endif
return result;
}
pai = (int)pa_index(pa);
LOCK_PVH(pai);
+#if __APRR_SUPPORTED__
+ if (*ptep == spte) {
+ /*
+ * Double-check the spte value, as we care
+ * about the AF bit.
+ */
+ break;
+ }
+ UNLOCK_PVH(pai);
+#else /* !(__APRR_SUPPORTED__*/
break;
+#endif /* !(__APRR_SUPPORTED__*/
}
} else {
PMAP_UNLOCK(pmap);
return result;
}
+#if __APRR_SUPPORTED__
+ /* Check to see if this mapping had APRR restrictions. */
+ if (is_pte_xprr_protected(spte)) {
+ /*
+ * We have faulted on an XPRR managed mapping; decide if the access should be
+ * reattempted or if it should cause an exception. Now that all JIT entitled
+ * task threads always have MPRR enabled we're only here because of
+ * an AF fault or an actual permission fault. AF faults will have result
+ * changed to KERN_SUCCESS below upon arm_clear_fast_fault return.
+ */
+ if (was_af_fault && (spte & ARM_PTE_AF)) {
+ result = KERN_SUCCESS;
+ goto out;
+ } else {
+ result = KERN_PROTECTION_FAILURE;
+ }
+ }
+#endif /* __APRR_SUPPORTED__*/
if ((IS_REFFAULT_PAGE(pai)) ||
((fault_type & VM_PROT_WRITE) && IS_MODFAULT_PAGE(pai))) {
}
}
+#if __APRR_SUPPORTED__
+out:
+#endif /* __APRR_SUPPORTED__*/
UNLOCK_PVH(pai);
PMAP_UNLOCK(pmap);
return result;
}
#endif
+#if XNU_MONITOR
+ result = arm_fast_fault_ppl(pmap, va, fault_type, was_af_fault, from_user);
+#else
result = arm_fast_fault_internal(pmap, va, fault_type, was_af_fault, from_user);
+#endif
#if (__ARM_VMSA__ == 7)
done:
vm_offset_t cpu_copywindow_vaddr = 0;
bool need_strong_sync = false;
+#if XNU_MONITOR || HAS_MILD_DSB
+ unsigned int cacheattr = (!pa_valid(ptoa(pn)) ? pmap_cache_attributes(pn) : 0);
+ need_strong_sync = ((cacheattr & PMAP_IO_RANGE_STRONG_SYNC) != 0);
+#endif
+
+#if XNU_MONITOR
+#ifdef __ARM_COHERENT_IO__
+ if (pa_valid(ptoa(pn)) && !pmap_ppl_disable) {
+ panic("%s: attempted to map a managed page, "
+ "pn=%u, prot=0x%x, wimg_bits=0x%x",
+ __FUNCTION__,
+ pn, prot, wimg_bits);
+ }
+ if (!pmap_ppl_disable && (cacheattr & PP_ATTR_MONITOR)) {
+ panic("%s: attempt to map PPL-protected I/O address 0x%llx", __func__, (uint64_t)ptoa(pn));
+ }
+#else /* __ARM_COHERENT_IO__ */
+#error CPU copy windows are not properly supported with both the PPL and incoherent IO
+#endif /* __ARM_COHERENT_IO__ */
+#endif /* XNU_MONITOR */
cpu_num = pmap_cpu_data->cpu_number;
for (i = 0; i < CPUWINDOWS_MAX; i++) {
vm_prot_t prot,
unsigned int wimg_bits)
{
+#if XNU_MONITOR
+ return pmap_map_cpu_windows_copy_ppl(pn, prot, wimg_bits);
+#else
return pmap_map_cpu_windows_copy_internal(pn, prot, wimg_bits);
+#endif
}
MARK_AS_PMAP_TEXT static void
pmap_unmap_cpu_windows_copy(
unsigned int index)
{
+#if XNU_MONITOR
+ return pmap_unmap_cpu_windows_copy_ppl(index);
+#else
return pmap_unmap_cpu_windows_copy_internal(index);
+#endif
}
/*
pmap_set_nested(
pmap_t pmap)
{
+#if XNU_MONITOR
+ pmap_set_nested_ppl(pmap);
+#else
pmap_set_nested_internal(pmap);
+#endif
}
/*
addr64_t nstart,
uint64_t size)
{
+#if XNU_MONITOR
+ pmap_trim_ppl(grand, subord, vstart, nstart, size);
+
+ pmap_ledger_check_balance(grand);
+ pmap_ledger_check_balance(subord);
+#else
pmap_trim_internal(grand, subord, vstart, nstart, size);
+#endif
}
+#if HAS_APPLE_PAC && XNU_MONITOR
+static void *
+pmap_sign_user_ptr_internal(void *value, ptrauth_key key, uint64_t discriminator)
+{
+ void *res = NULL;
+ boolean_t current_intr_state = ml_set_interrupts_enabled(FALSE);
+
+ ml_set_kernelkey_enabled(FALSE);
+ switch (key) {
+ case ptrauth_key_asia:
+ res = ptrauth_sign_unauthenticated(value, ptrauth_key_asia, discriminator);
+ break;
+ case ptrauth_key_asda:
+ res = ptrauth_sign_unauthenticated(value, ptrauth_key_asda, discriminator);
+ break;
+ default:
+ panic("attempt to sign user pointer without process independent key");
+ }
+ ml_set_kernelkey_enabled(TRUE);
+
+ ml_set_interrupts_enabled(current_intr_state);
+
+ return res;
+}
+
+void *
+pmap_sign_user_ptr(void *value, ptrauth_key key, uint64_t discriminator)
+{
+ return pmap_sign_user_ptr_internal(value, key, discriminator);
+}
+
+static void *
+pmap_auth_user_ptr_internal(void *value, ptrauth_key key, uint64_t discriminator)
+{
+ if ((key != ptrauth_key_asia) && (key != ptrauth_key_asda)) {
+ panic("attempt to auth user pointer without process independent key");
+ }
+
+ void *res = NULL;
+ boolean_t current_intr_state = ml_set_interrupts_enabled(FALSE);
+
+ ml_set_kernelkey_enabled(FALSE);
+ res = ml_auth_ptr_unchecked(value, key, discriminator);
+ ml_set_kernelkey_enabled(TRUE);
+
+ ml_set_interrupts_enabled(current_intr_state);
+
+ return res;
+}
+
+void *
+pmap_auth_user_ptr(void *value, ptrauth_key key, uint64_t discriminator)
+{
+ return pmap_auth_user_ptr_internal(value, key, discriminator);
+}
+#endif /* HAS_APPLE_PAC && XNU_MONITOR */
/*
* kern_return_t pmap_nest(grand, subord, vstart, size)
__unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
assert(pmap_get_pt_attr(subord) == pt_attr);
+#if XNU_MONITOR
+ expand_options |= PMAP_TT_ALLOCATE_NOWAIT;
+#endif
if (((size | vstart | nstart) & (pt_attr_leaf_table_offmask(pt_attr))) != 0x0ULL) {
panic("pmap_nest() pmap %p unaligned nesting request 0x%llx, 0x%llx, 0x%llx\n", grand, vstart, nstart, size);
if (subord->nested_region_asid_bitmap == NULL) {
nested_region_asid_bitmap_size = (unsigned int)(size >> pt_attr_twig_shift(pt_attr)) / (sizeof(unsigned int) * NBBY);
+#if XNU_MONITOR
+ pmap_paddr_t pa = 0;
+
+ if ((nested_region_asid_bitmap_size * sizeof(unsigned int)) > PAGE_SIZE) {
+ panic("%s: nested_region_asid_bitmap_size=%u will not fit in a page, "
+ "grand=%p, subord=%p, vstart=0x%llx, nstart=0x%llx, size=%llx",
+ __FUNCTION__,
+ nested_region_asid_bitmap_size,
+ grand, subord, vstart, nstart, size);
+ }
+
+ kr = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
+
+ if (kr != KERN_SUCCESS) {
+ return kr;
+ }
+
+ assert(pa);
+
+ nested_region_asid_bitmap = (unsigned int *)phystokv(pa);
+#else
nested_region_asid_bitmap = kalloc(nested_region_asid_bitmap_size * sizeof(unsigned int));
+#endif
bzero(nested_region_asid_bitmap, nested_region_asid_bitmap_size * sizeof(unsigned int));
PMAP_LOCK(subord);
}
PMAP_UNLOCK(subord);
if (nested_region_asid_bitmap != NULL) {
+#if XNU_MONITOR
+ pmap_pages_free(kvtophys((vm_offset_t)nested_region_asid_bitmap), PAGE_SIZE);
+#else
kfree(nested_region_asid_bitmap, nested_region_asid_bitmap_size * sizeof(unsigned int));
+#endif
}
}
if ((subord->nested_region_subord_addr + subord->nested_region_size) < nend) {
/* We explicitly add 1 to the bitmap allocation size in order to avoid issues with truncation. */
new_nested_region_asid_bitmap_size = (unsigned int)((new_size >> pt_attr_twig_shift(pt_attr)) / (sizeof(unsigned int) * NBBY)) + 1;
+#if XNU_MONITOR
+ pmap_paddr_t pa = 0;
+
+ if ((new_nested_region_asid_bitmap_size * sizeof(unsigned int)) > PAGE_SIZE) {
+ panic("%s: new_nested_region_asid_bitmap_size=%u will not fit in a page, "
+ "grand=%p, subord=%p, vstart=0x%llx, nstart=0x%llx, size=%llx",
+ __FUNCTION__,
+ new_nested_region_asid_bitmap_size,
+ grand, subord, vstart, nstart, size);
+ }
+
+ kr = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
+
+ if (kr != KERN_SUCCESS) {
+ return kr;
+ }
+
+ assert(pa);
+
+ new_nested_region_asid_bitmap = (unsigned int *)phystokv(pa);
+#else
new_nested_region_asid_bitmap = kalloc(new_nested_region_asid_bitmap_size * sizeof(unsigned int));
+#endif
PMAP_LOCK(subord);
if (subord->nested_region_size < new_size) {
bzero(new_nested_region_asid_bitmap, new_nested_region_asid_bitmap_size * sizeof(unsigned int));
}
PMAP_UNLOCK(subord);
if (nested_region_asid_bitmap != NULL)
+#if XNU_MONITOR
+ {pmap_pages_free(kvtophys((vm_offset_t)nested_region_asid_bitmap), PAGE_SIZE);}
+#else
{ kfree(nested_region_asid_bitmap, nested_region_asid_bitmap_size * sizeof(unsigned int));}
+#endif
if (new_nested_region_asid_bitmap != NULL)
+#if XNU_MONITOR
+ {pmap_pages_free(kvtophys((vm_offset_t)new_nested_region_asid_bitmap), PAGE_SIZE);}
+#else
{ kfree(new_nested_region_asid_bitmap, new_nested_region_asid_bitmap_size * sizeof(unsigned int));}
+#endif
}
PMAP_LOCK(subord);
VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(subord),
VM_KERNEL_ADDRHIDE(vstart));
+#if XNU_MONITOR
+ while ((kr = pmap_nest_ppl(grand, subord, vstart, nstart, size)) == KERN_RESOURCE_SHORTAGE) {
+ pmap_alloc_page_for_ppl();
+ }
+
+ pmap_ledger_check_balance(grand);
+ pmap_ledger_check_balance(subord);
+#else
kr = pmap_nest_internal(grand, subord, vstart, nstart, size);
+#endif
PMAP_TRACE(2, PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, kr);
PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(vaddr));
+#if XNU_MONITOR
+ kr = pmap_unnest_options_ppl(grand, vaddr, size, option);
+#else
kr = pmap_unnest_options_internal(grand, vaddr, size, option);
+#endif
PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, kr);
LOCK_PVH(pai);
+#if XNU_MONITOR
+ if (__improbable(pa_test_monitor(paddr))) {
+ panic("%s invoked on PPL page 0x%08x", __func__, pn);
+ }
+#endif
pmap_update_cache_attributes_locked(pn, new_cacheattr);
#if __ARM_PTE_PHYSMAP__
unsigned int cacheattr = pmap_cache_attributes(pn) & VM_WIMG_MASK;
if (cacheattr != VM_WIMG_DEFAULT) {
+#if XNU_MONITOR
+ pmap_update_compressor_page_ppl(pn, cacheattr, VM_WIMG_DEFAULT);
+#else
pmap_update_compressor_page_internal(pn, cacheattr, VM_WIMG_DEFAULT);
+#endif
}
#endif
return (void*)phystokv(ptoa(pn));
#if __ARM_PTE_PHYSMAP__
unsigned int cacheattr = pmap_cache_attributes(pn) & VM_WIMG_MASK;
if (cacheattr != VM_WIMG_DEFAULT) {
+#if XNU_MONITOR
+ pmap_update_compressor_page_ppl(pn, VM_WIMG_DEFAULT, cacheattr);
+#else
pmap_update_compressor_page_internal(pn, VM_WIMG_DEFAULT, cacheattr);
+#endif
}
#endif
}
if (doit) {
LOCK_PVH(pai);
+#if XNU_MONITOR
+ if (pa_test_monitor(paddr)) {
+ panic("%s invoked on PPL page 0x%llx", __func__, (uint64_t)paddr);
+ }
+#endif
}
do {
boolean_t doit,
unsigned int *res)
{
+#if XNU_MONITOR
+ return pmap_batch_set_cache_attributes_ppl(pn, cacheattr, page_cnt, page_index, doit, res);
+#else
return pmap_batch_set_cache_attributes_internal(pn, cacheattr, page_cnt, page_index, doit, res);
+#endif
}
MARK_AS_PMAP_TEXT static void
LOCK_PVH(pai);
+#if XNU_MONITOR
+ if (external && pa_test_monitor(paddr)) {
+ panic("%s invoked on PPL page 0x%llx", __func__, (uint64_t)paddr);
+ } else if (!external && !pa_test_monitor(paddr)) {
+ panic("%s invoked on non-PPL page 0x%llx", __func__, (uint64_t)paddr);
+ }
+#endif
do {
pp_attr_current = pp_attr_table[pai];
ppnum_t pn,
unsigned int cacheattr)
{
+#if XNU_MONITOR
+ pmap_set_cache_attributes_ppl(pn, cacheattr);
+#else
pmap_set_cache_attributes_internal(pn, cacheattr);
+#endif
}
MARK_AS_PMAP_TEXT void
tmplate = *pte_p;
tmplate &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
+#if XNU_MONITOR
+ tmplate |= (wimg_to_pte(attributes) & ~ARM_PTE_XPRR_MASK);
+#else
tmplate |= wimg_to_pte(attributes);
+#endif
#if (__ARM_VMSA__ > 7)
if (tmplate & ARM_PTE_HINT_MASK) {
panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
kern_return_t kr;
pmap_paddr_t pa = 0;
+#if XNU_MONITOR
+ pa = pmap_alloc_page_for_kern();
+ assert(pa);
+#else
(void) pmap_pages_alloc(&pa, PAGE_SIZE, 0);
+#endif
memset((char *) phystokv(pa), 0, PAGE_SIZE);
int options = 0;
VALIDATE_PMAP(pmap);
+#if XNU_MONITOR
+ options |= PMAP_OPTIONS_NOWAIT;
+#endif /* XNU_MONITOR */
#if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
#error We assume a single page.
kr = pmap_expand(pmap, sharedpage_vaddr, options, PMAP_TT_L2_LEVEL);
if (kr != KERN_SUCCESS) {
+#if XNU_MONITOR
+ if (kr == KERN_RESOURCE_SHORTAGE) {
+ return kr;
+ } else
+#endif
{
panic("Failed to pmap_expand for commpage, pmap=%p", pmap);
}
pmap_insert_sharedpage(
pmap_t pmap)
{
+#if XNU_MONITOR
+ kern_return_t kr = KERN_FAILURE;
+
+ while ((kr = pmap_insert_sharedpage_ppl(pmap)) == KERN_RESOURCE_SHORTAGE) {
+ pmap_alloc_page_for_ppl();
+ }
+
+ pmap_ledger_check_balance(pmap);
+
+ if (kr != KERN_SUCCESS) {
+ panic("%s: failed to insert the shared page, kr=%d, "
+ "pmap=%p",
+ __FUNCTION__, kr,
+ pmap);
+ }
+#else
pmap_insert_sharedpage_internal(pmap);
+#endif
}
static boolean_t
vm_map_offset_t va_start,
vm_map_offset_t va_end)
{
+#if XNU_MONITOR
+ return pmap_is_empty_ppl(pmap, va_start, va_end);
+#else
return pmap_is_empty_internal(pmap, va_start, va_end);
+#endif
}
vm_map_offset_t
return;
}
+#if XNU_MONITOR
+
+/*
+ * Enforce that the address range described by kva and nbytes is not currently
+ * PPL-owned, and won't become PPL-owned while pinned. This is to prevent
+ * unintentionally writing to PPL-owned memory.
+ */
+static void
+pmap_pin_kernel_pages(vm_offset_t kva, size_t nbytes)
+{
+ vm_offset_t end;
+ if (os_add_overflow(kva, nbytes, &end)) {
+ panic("%s(%p, 0x%llx): overflow", __func__, (void*)kva, (uint64_t)nbytes);
+ }
+ for (vm_offset_t ckva = kva; ckva < end; ckva = round_page(ckva + 1)) {
+ pmap_paddr_t pa = kvtophys(ckva);
+ if (!pa_valid(pa)) {
+ panic("%s(%p): invalid physical page 0x%llx", __func__, (void*)kva, (uint64_t)pa);
+ }
+ pp_attr_t attr;
+ unsigned int pai = (unsigned int)pa_index(pa);
+ if (ckva == phystokv(pa)) {
+ panic("%s(%p): attempt to pin static mapping for page 0x%llx", __func__, (void*)kva, (uint64_t)pa);
+ }
+ do {
+ attr = pp_attr_table[pai] & ~PP_ATTR_NO_MONITOR;
+ if (attr & PP_ATTR_MONITOR) {
+ panic("%s(%p): physical page 0x%llx belongs to PPL", __func__, (void*)kva, (uint64_t)pa);
+ }
+ } while (!OSCompareAndSwap16(attr, attr | PP_ATTR_NO_MONITOR, &pp_attr_table[pai]));
+ }
+}
+
+static void
+pmap_unpin_kernel_pages(vm_offset_t kva, size_t nbytes)
+{
+ vm_offset_t end;
+ if (os_add_overflow(kva, nbytes, &end)) {
+ panic("%s(%p, 0x%llx): overflow", __func__, (void*)kva, (uint64_t)nbytes);
+ }
+ for (vm_offset_t ckva = kva; ckva < end; ckva = round_page(ckva + 1)) {
+ pmap_paddr_t pa = kvtophys(ckva);
+ if (!pa_valid(pa)) {
+ panic("%s(%p): invalid physical page 0x%llx", __func__, (void*)kva, (uint64_t)pa);
+ }
+ if (!(pp_attr_table[pa_index(pa)] & PP_ATTR_NO_MONITOR)) {
+ panic("%s(%p): physical page 0x%llx not pinned", __func__, (void*)kva, (uint64_t)pa);
+ }
+ assert(!(pp_attr_table[pa_index(pa)] & PP_ATTR_MONITOR));
+ pa_clear_no_monitor(pa);
+ }
+}
+
+/*
+ * Lock down a page, making all mappings read-only, and preventing
+ * further mappings or removal of this particular kva's mapping.
+ * Effectively, it makes the page at kva immutable.
+ */
+MARK_AS_PMAP_TEXT static void
+pmap_ppl_lockdown_page(vm_address_t kva)
+{
+ pmap_paddr_t pa = kvtophys(kva);
+ unsigned int pai = (unsigned int)pa_index(pa);
+ LOCK_PVH(pai);
+ pv_entry_t **pv_h = pai_to_pvh(pai);
+
+ if (pa_test_monitor(pa)) {
+ panic("%#lx: page %llx belongs to PPL", kva, pa);
+ }
+
+ if (pvh_get_flags(pv_h) & (PVH_FLAG_LOCKDOWN | PVH_FLAG_EXEC)) {
+ panic("%#lx: already locked down/executable (%#llx)", kva, pvh_get_flags(pv_h));
+ }
+
+ pt_entry_t *pte_p = pmap_pte(kernel_pmap, kva);
+
+ if (pte_p == PT_ENTRY_NULL) {
+ panic("%#lx: NULL pte", kva);
+ }
+
+ pt_entry_t tmplate = *pte_p;
+ if ((tmplate & ARM_PTE_APMASK) != ARM_PTE_AP(AP_RWNA)) {
+ panic("%#lx: not a kernel r/w page (%#llx)", kva, tmplate & ARM_PTE_APMASK);
+ }
+
+ pvh_set_flags(pv_h, pvh_get_flags(pv_h) | PVH_FLAG_LOCKDOWN);
+
+ pmap_set_ptov_ap(pai, AP_RONA, FALSE);
+
+ UNLOCK_PVH(pai);
+
+ pmap_page_protect_options_internal((ppnum_t)atop(pa), VM_PROT_READ, 0);
+}
+
+/*
+ * Release a page from being locked down to the PPL, making it writable
+ * to the kernel once again.
+ */
+MARK_AS_PMAP_TEXT static void
+pmap_ppl_unlockdown_page(vm_address_t kva)
+{
+ pmap_paddr_t pa = kvtophys(kva);
+ unsigned int pai = (unsigned int)pa_index(pa);
+ LOCK_PVH(pai);
+ pv_entry_t **pv_h = pai_to_pvh(pai);
+
+ vm_offset_t pvh_flags = pvh_get_flags(pv_h);
+
+ if (!(pvh_flags & PVH_FLAG_LOCKDOWN)) {
+ panic("unlockdown attempt on not locked down virtual %#lx/pai %d", kva, pai);
+ }
+
+ pvh_set_flags(pv_h, pvh_flags & ~PVH_FLAG_LOCKDOWN);
+ pmap_set_ptov_ap(pai, AP_RWNA, FALSE);
+ UNLOCK_PVH(pai);
+}
+
+#else /* XNU_MONITOR */
static void __unused
pmap_pin_kernel_pages(vm_offset_t kva __unused, size_t nbytes __unused)
{
}
+#endif /* !XNU_MONITOR */
#define PMAP_RESIDENT_INVALID ((mach_vm_size_t)-1)
if (l > end) {
l = end;
}
+#if XNU_MONITOR
+ resident_bytes = pmap_query_resident_ppl(pmap, va, l, compressed_bytes_p);
+#else
resident_bytes = pmap_query_resident_internal(pmap, va, l, compressed_bytes_p);
+#endif
if (resident_bytes == PMAP_RESIDENT_INVALID) {
break;
}
pmap_set_jit_entitled(
pmap_t pmap)
{
+#if XNU_MONITOR
+ pmap_set_jit_entitled_ppl(pmap);
+#else
pmap_set_jit_entitled_internal(pmap);
+#endif
}
MARK_AS_PMAP_TEXT static kern_return_t
vm_map_offset_t va,
int *disp_p)
{
+#if XNU_MONITOR
+ return pmap_query_page_info_ppl(pmap, va, disp_p);
+#else
return pmap_query_page_info_internal(pmap, va, disp_p);
+#endif
}
MARK_AS_PMAP_TEXT kern_return_t
kern_return_t
pmap_return(boolean_t do_panic, boolean_t do_recurse)
{
+#if XNU_MONITOR
+ return pmap_return_ppl(do_panic, do_recurse);
+#else
return pmap_return_internal(do_panic, do_recurse);
+#endif
}
vm_map_t map,
boolean_t suspend)
{
+#if XNU_MONITOR
+ pmap_footprint_suspend_ppl(map, suspend);
+#else
pmap_footprint_suspend_internal(map, suspend);
+#endif
}
#if defined(__arm64__) && (DEVELOPMENT || DEBUG)
#define CPUWINDOWS_MAX 4
struct pmap_cpu_data {
+#if XNU_MONITOR
+ uint64_t cpu_id;
+ void * ppl_kern_saved_sp;
+ void * ppl_stack;
+ arm_context_t * save_area;
+ unsigned int ppl_state;
+#endif
#if defined(__arm64__)
pmap_t cpu_nested_pmap;
#else
extern uint64_t get_tcr(void);
extern void set_tcr(uint64_t);
extern uint64_t pmap_get_arm64_prot(pmap_t, vm_offset_t);
+#if defined(HAS_VMSA_LOCK)
+extern void vmsa_lock(void);
+#endif
#else
extern uint32_t get_mmu_control(void);
extern void set_mmu_control(uint32_t);
#if defined(__arm64__)
extern vm_offset_t pmap_extract(pmap_t pmap, vm_map_offset_t va);
#endif
+#if HAS_APPLE_PAC && XNU_MONITOR
+extern void * pmap_sign_user_ptr(void *value, ptrauth_key key, uint64_t data);
+extern void * pmap_auth_user_ptr(void *value, ptrauth_key key, uint64_t data);
+#endif /* HAS_APPLE_PAC && XNU_MONITOR */
/*
* Interfaces implemented as macros.
#define PMAP_LEDGER_ALLOC_INDEX 66
#define PMAP_LEDGER_FREE_INDEX 67
+#if HAS_APPLE_PAC && XNU_MONITOR
+#define PMAP_SIGN_USER_PTR 68
+#define PMAP_AUTH_USER_PTR 69
+#endif /* HAS_APPLE_PAC && XNU_MONITOR */
#define PMAP_COUNT 71
/* Get the pmap per-CPU data for the current CPU. */
extern pmap_cpu_data_t * pmap_get_cpu_data(void);
+#if XNU_MONITOR
+extern boolean_t pmap_ppl_locked_down;
+
+/*
+ * Denotes the bounds of the PPL stacks. These are visible so that other code
+ * can check if addresses are part of the PPL stacks.
+ */
+extern void * pmap_stacks_start;
+extern void * pmap_stacks_end;
+
+/* Asks if a page belongs to the monitor. */
+extern boolean_t pmap_is_monitor(ppnum_t pn);
+
+/*
+ * Indicates that we are done with our static bootstrap
+ * allocations, so the monitor may now mark the pages
+ * that it owns.
+ */
+extern void pmap_static_allocations_done(void);
+
+/*
+ * Indicates that we are done mutating sensitive state in the system, and that
+ * the PPL may now restict write access to PPL owned mappings.
+ */
+extern void pmap_lockdown_ppl(void);
+
+
+#ifdef KASAN
+#define PPL_STACK_SIZE (PAGE_SIZE << 2)
+#else
+#define PPL_STACK_SIZE PAGE_SIZE
+#endif
+
+/* One stack for each CPU, plus a guard page below each stack and above the last stack */
+#define PPL_STACK_REGION_SIZE ((MAX_CPUS * (PPL_STACK_SIZE + ARM_PGBYTES)) + ARM_PGBYTES)
+
+#define PPL_DATA_SEGMENT_SECTION_NAME "__PPLDATA,__data"
+#define PPL_TEXT_SEGMENT_SECTION_NAME "__PPLTEXT,__text,regular,pure_instructions"
+#define PPL_DATACONST_SEGMENT_SECTION_NAME "__PPLDATA,__const"
+
+#define MARK_AS_PMAP_DATA \
+ __PLACE_IN_SECTION(PPL_DATA_SEGMENT_SECTION_NAME)
+#define MARK_AS_PMAP_TEXT \
+ __attribute__((used, section(PPL_TEXT_SEGMENT_SECTION_NAME), noinline))
+#define MARK_AS_PMAP_RODATA \
+ __PLACE_IN_SECTION(PPL_DATACONST_SEGMENT_SECTION_NAME)
+
+#else /* XNU_MONITOR */
#define MARK_AS_PMAP_TEXT
#define MARK_AS_PMAP_DATA
#define MARK_AS_PMAP_RODATA
+#endif /* !XNU_MONITOR */
extern kern_return_t pmap_return(boolean_t do_panic, boolean_t do_recurse);
extern lck_grp_t pmap_lck_grp;
+#if XNU_MONITOR
+extern void CleanPoC_DcacheRegion_Force_nopreempt(vm_offset_t va, unsigned length);
+#define pmap_force_dcache_clean(va, sz) CleanPoC_DcacheRegion_Force_nopreempt(va, sz)
+#define pmap_simple_lock(l) simple_lock_nopreempt(l, &pmap_lck_grp)
+#define pmap_simple_unlock(l) simple_unlock_nopreempt(l)
+#define pmap_simple_lock_try(l) simple_lock_try_nopreempt(l, &pmap_lck_grp)
+#define pmap_lock_bit(l, i) hw_lock_bit_nopreempt(l, i, &pmap_lck_grp)
+#define pmap_unlock_bit(l, i) hw_unlock_bit_nopreempt(l, i)
+#else
#define pmap_force_dcache_clean(va, sz) CleanPoC_DcacheRegion_Force(va, sz)
#define pmap_simple_lock(l) simple_lock(l, &pmap_lck_grp)
#define pmap_simple_unlock(l) simple_unlock(l)
#define pmap_simple_lock_try(l) simple_lock_try(l, &pmap_lck_grp)
#define pmap_lock_bit(l, i) hw_lock_bit(l, i, &pmap_lck_grp)
#define pmap_unlock_bit(l, i) hw_unlock_bit(l, i)
+#endif
#endif /* #ifndef ASSEMBLER */
#define __PLATFORM_WKDM_ALIGNMENT_BOUNDARY__ (64)
#define __ARM_CLUSTER_COUNT__ 2
+#elif defined (APPLEVORTEX)
+#define __ARM_ARCH__ 8
+#define __ARM_VMSA__ 8
+#define __ARM_SMP__ 1
+#define __ARM_VFP__ 4
+#define __ARM_COHERENT_CACHE__ 1
+#define __ARM_COHERENT_IO__ 1
+#define __ARM_IC_NOALIAS_ICACHE__ 1
+#define __ARM_DEBUG__ 7
+#define __ARM_ENABLE_SWAP__ 1
+#define __ARM_V8_CRYPTO_EXTENSIONS__ 1
+#define __ARM_16K_PG__ 1
+#define __ARM64_PMAP_SUBPAGE_L1__ 1
+#define __ARM_GLOBAL_SLEEP_BIT__ 1
+#define __ARM_PAN_AVAILABLE__ 1
+#define __ARM_WKDM_ISA_AVAILABLE__ 1
+#define __PLATFORM_WKDM_ALIGNMENT_MASK__ (0x3FULL)
+#define __PLATFORM_WKDM_ALIGNMENT_BOUNDARY__ (64)
+#define __ARM_CLUSTER_COUNT__ 2
+
+#elif defined (APPLELIGHTNING)
+#define __ARM_ARCH__ 8
+#define __ARM_VMSA__ 8
+#define __ARM_SMP__ 1
+#define __ARM_AMP__ 1
+#define __ARM_VFP__ 4
+#define __ARM_COHERENT_CACHE__ 1
+#define __ARM_COHERENT_IO__ 1
+#define __ARM_IC_NOALIAS_ICACHE__ 1
+#define __ARM_DEBUG__ 7
+#define __ARM_ENABLE_SWAP__ 1
+#define __ARM_V8_CRYPTO_EXTENSIONS__ 1
+#define __ARM_16K_PG__ 1
+#define __ARM64_PMAP_SUBPAGE_L1__ 1
+#define __ARM_GLOBAL_SLEEP_BIT__ 1
+#define __ARM_PAN_AVAILABLE__ 1
+#define __ARM_WKDM_ISA_AVAILABLE__ 1
+#define __PLATFORM_WKDM_ALIGNMENT_MASK__ (0x3FULL)
+#define __PLATFORM_WKDM_ALIGNMENT_BOUNDARY__ (64)
+#define __ARM_CLUSTER_COUNT__ 2
+#define
+#define __APCFG_SUPPORTED__ 1
+#define __ARM_RANGE_TLBI__ 1
+
#elif defined (BCM2837)
#define __ARM_ARCH__ 8
#define __ARM_VMSA__ 8
#define L2_SWAY (L2_CSIZE - L2_NWAY) /* set size 1<<L2_SWAY */
#define L2_NSET (L2_SWAY - L2_CLINE) /* lines per way 1<<L2_NSET */
+#elif defined (APPLEVORTEX)
+
+/* I-Cache, 128KB 8-way for Vortex, 48KB 6-way for Tempest. */
+#define MMU_I_CLINE 6 /* cache line size as 1<<MMU_I_CLINE (64) */
+
+/* D-Cache, 128KB 8-way for Vortex, 32KB 4-way for Tempest. */
+#define MMU_CSIZE 17 /* cache size as 1<<MMU_CSIZE (128K) */
+#define MMU_CLINE 6 /* cache line size is 1<<MMU_CLINE (64) */
+#define MMU_NWAY 3 /* set associativity 1<<MMU_NWAY (8) */
+#define MMU_I7SET 6 /* cp15 c7 set incrementer 1<<MMU_I7SET */
+#define MMU_I7WAY 30 /* cp15 c7 way incrementer 1<<MMU_I7WAY */
+#define MMU_I9WAY 30 /* cp15 c9 way incrementer 1<<MMU_I9WAY */
+
+#define MMU_SWAY (MMU_CSIZE - MMU_NWAY) /* set size 1<<MMU_SWAY */
+#define MMU_NSET (MMU_SWAY - MMU_CLINE) /* lines per way 1<<MMU_NSET */
+
+/* L2-Cache */
+#define __ARM_L2CACHE__ 1
+
+/*
+ * LLC (Vortex L2): 8MB, 128-byte lines, 16-way.
+ * LLC (Tempest L2): 2MB, 128-byte lines, 16-way.
+ */
+#define L2_CSIZE __ARM_L2CACHE_SIZE_LOG__ /* cache size as 1<<L2_CSIZE */
+#define L2_CLINE 7 /* cache line size as 1<<L2_CLINE (128) */
+#define L2_NWAY 4 /* set associativity as 1<<L2_NWAY (16) */
+#define L2_I7SET 6 /* TODO: cp15 c7 set incrementer 1<<L2_I7SET */
+#define L2_I7WAY 28 /* TODO: cp15 c7 way incrementer 1<<L2_I7WAY */
+#define L2_I9WAY 28 /* TODO: cp15 c9 way incremenber 1<<L2_I9WAY */
+
+#define L2_SWAY (L2_CSIZE - L2_NWAY) /* set size 1<<L2_SWAY */
+#define L2_NSET (L2_SWAY - L2_CLINE) /* lines per way 1<<L2_NSET */
+
+#elif defined (APPLELIGHTNING)
+
+/* I-Cache, 192KB for Lightning, 96KB for Thunder, 6-way. */
+#define MMU_I_CLINE 6 /* cache line size as 1<<MMU_I_CLINE (64) */
+
+/* D-Cache, 128KB for Lightning, 8-way. 48KB for Thunder, 6-way. */
+#define MMU_CSIZE 17 /* cache size as 1<<MMU_CSIZE (128K) */
+#define MMU_CLINE 6 /* cache line size is 1<<MMU_CLINE (64) */
+#define MMU_NWAY 3 /* set associativity 1<<MMU_NWAY (8) */
+#define MMU_I7SET 6 /* cp15 c7 set incrementer 1<<MMU_I7SET */
+#define MMU_I7WAY 30 /* cp15 c7 way incrementer 1<<MMU_I7WAY */
+#define MMU_I9WAY 30 /* cp15 c9 way incrementer 1<<MMU_I9WAY */
+
+#define MMU_SWAY (MMU_CSIZE - MMU_NWAY) /* set size 1<<MMU_SWAY */
+#define MMU_NSET (MMU_SWAY - MMU_CLINE) /* lines per way 1<<MMU_NSET */
+
+/* L2-Cache */
+#define __ARM_L2CACHE__ 1
+
+/*
+ * LLC (Lightning L2): 8MB, 128-byte lines, 16-way.
+ * LLC (Thunder L2): 4MB, 128-byte lines, 16-way.
+ */
+#define L2_CSIZE __ARM_L2CACHE_SIZE_LOG__ /* cache size as 1<<L2_CSIZE */
+#define L2_CLINE 7 /* cache line size as 1<<L2_CLINE (128) */
+#define L2_NWAY 4 /* set associativity as 1<<L2_NWAY (16) */
+#define L2_I7SET 6 /* TODO: cp15 c7 set incrementer 1<<L2_I7SET */
+#define L2_I7WAY 28 /* TODO: cp15 c7 way incrementer 1<<L2_I7WAY */
+#define L2_I9WAY 28 /* TODO: cp15 c9 way incremenber 1<<L2_I9WAY */
+
+#define L2_SWAY (L2_CSIZE - L2_NWAY) /* set size 1<<L2_SWAY */
+#define L2_NSET (L2_SWAY - L2_CLINE) /* lines per way 1<<L2_NSET */
+
#elif defined (BCM2837) /* Raspberry Pi 3 */
/* I-Cache. We don't have detailed spec so we just follow the ARM technical reference. */
static_assert((((~ARM_KERNEL_PROTECT_EXCEPTION_START) + 1) * 2ULL) <= (ARM_TT_ROOT_SIZE + ARM_TT_ROOT_INDEX_MASK));
#endif /* __ARM_KERNEL_PROTECT__ */
+#if __APRR_SUPPORTED__ && XNU_MONITOR
+/*
+ * If APRR is supported, setting XN on L1/L2 table entries will shift the effective
+ * APRR index of L3 PTEs covering PPL-protected pages in the kernel dynamic region
+ * from PPL R/W to kernel R/W. That will effectively remove PPL write protection
+ * from those pages. Avoid setting XN at the table level for MONITOR-enabled builds
+ * that are backed by APRR.
+ */
+#define ARM_DYNAMIC_TABLE_XN ARM_TTE_TABLE_PXN
+#else
#define ARM_DYNAMIC_TABLE_XN (ARM_TTE_TABLE_PXN | ARM_TTE_TABLE_XN)
+#endif
#if KASAN
extern vm_offset_t shadow_pbase;
SECURITY_READ_ONLY_LATE(static vm_offset_t) segTEXTB;
SECURITY_READ_ONLY_LATE(static unsigned long) segSizeTEXT;
+#if XNU_MONITOR
+SECURITY_READ_ONLY_LATE(vm_offset_t) segPPLTEXTB;
+SECURITY_READ_ONLY_LATE(unsigned long) segSizePPLTEXT;
+
+SECURITY_READ_ONLY_LATE(vm_offset_t) segPPLTRAMPB;
+SECURITY_READ_ONLY_LATE(unsigned long) segSizePPLTRAMP;
+
+SECURITY_READ_ONLY_LATE(vm_offset_t) segPPLDATACONSTB;
+SECURITY_READ_ONLY_LATE(unsigned long) segSizePPLDATACONST;
+SECURITY_READ_ONLY_LATE(void *) pmap_stacks_start = NULL;
+SECURITY_READ_ONLY_LATE(void *) pmap_stacks_end = NULL;
+#endif
SECURITY_READ_ONLY_LATE(static vm_offset_t) segDATACONSTB;
SECURITY_READ_ONLY_LATE(static unsigned long) segSizeDATACONST;
SECURITY_READ_ONLY_LATE(static vm_offset_t) segDATAB;
SECURITY_READ_ONLY_LATE(static unsigned long) segSizeDATA;
+#if XNU_MONITOR
+SECURITY_READ_ONLY_LATE(vm_offset_t) segPPLDATAB;
+SECURITY_READ_ONLY_LATE(unsigned long) segSizePPLDATA;
+#endif
SECURITY_READ_ONLY_LATE(vm_offset_t) segBOOTDATAB;
SECURITY_READ_ONLY_LATE(unsigned long) segSizeBOOTDATA;
vm_offset_t alloc_ptpage(boolean_t map_static) {
vm_offset_t vaddr;
-#if !(defined(KERNEL_INTEGRITY_KTRR))
+#if !(defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR))
map_static = FALSE;
#endif
#endif /* DEBUG */
-#if __ARM_KERNEL_PROTECT__
+#if __ARM_KERNEL_PROTECT__ || XNU_MONITOR
/*
* arm_vm_map:
* root_ttp: The kernel virtual address for the root of the target page tables
*ptep = pte;
}
-#endif // __ARM_KERNEL_PROTECT
+#endif // __ARM_KERNEL_PROTECT || XNU_MONITOR
#if __ARM_KERNEL_PROTECT__
}
#endif /* __ARM_KERNEL_PROTECT__ */
-#if defined(KERNEL_INTEGRITY_KTRR)
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
extern void bootstrap_instructions;
/*
ARM_PTE_AP(AP_RONA) |
ARM_PTE_NX;
}
-#endif /* defined(KERNEL_INTEGRITY_KTRR)*/
+#endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
tt_entry_t *arm_kva_to_tte(vm_offset_t);
return tte2;
}
+#if XNU_MONITOR
+
+static inline pt_entry_t *
+arm_kva_to_pte(vm_offset_t va)
+{
+ tt_entry_t *tte2 = arm_kva_to_tte(va);
+ return L3_TABLE_VA(tte2) + L3_TABLE_INDEX(va);
+}
+
+#endif
#define ARM64_GRANULE_ALLOW_BLOCK (1 << 0)
#define ARM64_GRANULE_ALLOW_HINT (1 << 1)
* NO, stuff in this segment gets modified during startup (viz. mac_policy_init()/mac_policy_list)
* Make RNX in prot_finalize
*/
+#if XNU_MONITOR
+ /* The ropagetable region will ultimately be owned by the PPL. Set permissions
+ * on it separately to avoid applying mismatched block settings between this function,
+ * pmap_static_allocations_done(), and arm_vm_prot_finalize(). */
+ vm_offset_t segDATACONSTE = segDATACONSTB + segSizeDATACONST;
+
+ arm_vm_page_granular_RWNX(segDATACONSTB, (vm_offset_t)&ropagetable_begin - segDATACONSTB, ARM64_GRANULE_ALLOW_BLOCK);
+ arm_vm_page_granular_RWNX((vm_offset_t)&ropagetable_begin,
+ (vm_offset_t)&ropagetable_end - (vm_offset_t)&ropagetable_begin, ARM64_GRANULE_ALLOW_BLOCK);
+ arm_vm_page_granular_RWNX((vm_offset_t)&ropagetable_end,
+ segDATACONSTE - (vm_offset_t)&ropagetable_end, ARM64_GRANULE_ALLOW_BLOCK);
+#else
arm_vm_page_granular_RWNX(segDATACONSTB, segSizeDATACONST, ARM64_GRANULE_ALLOW_BLOCK);
+#endif
arm_vm_page_granular_ROX(segTEXTEXECB, segSizeTEXTEXEC, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
+#if XNU_MONITOR
+ arm_vm_page_granular_ROX(segPPLTEXTB, segSizePPLTEXT, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
+ arm_vm_page_granular_ROX(segPPLTRAMPB, segSizePPLTRAMP, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
+ arm_vm_page_granular_RNX(segPPLDATACONSTB, segSizePPLDATACONST, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
+#endif
/* DATA segment will remain RWNX */
arm_vm_page_granular_RWNX(segDATAB, segSizeDATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
+#if XNU_MONITOR
+ arm_vm_page_granular_RWNX(segPPLDATAB, segSizePPLDATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
+#endif
arm_vm_page_granular_RWNX(segBOOTDATAB, segSizeBOOTDATA, 0);
arm_vm_page_granular_RNX((vm_offset_t)&intstack_low_guard, PAGE_MAX_SIZE, 0);
++ptov_index;
}
+#if XNU_MONITOR
+
+SECURITY_READ_ONLY_LATE(static boolean_t) keep_linkedit = FALSE;
+
+static void
+arm_vm_physmap_init(boot_args *args, vm_map_address_t physmap_base, vm_map_address_t dynamic_memory_begin __unused)
+{
+ ptov_table_entry temp_ptov_table[PTOV_TABLE_SIZE];
+ bzero(temp_ptov_table, sizeof(temp_ptov_table));
+
+ // This is memory that will either be handed back to the VM layer via ml_static_mfree(),
+ // or will be available for general-purpose use. Physical aperture mappings for this memory
+ // must be at page granularity, so that PPL ownership or cache attribute changes can be reflected
+ // in the physical aperture mappings.
+
+
+ // Slid region between gPhysBase and beginning of protected text
+ arm_vm_physmap_slide(temp_ptov_table, physmap_base, gVirtBase, segLOWEST - gVirtBase, AP_RWNA, 0);
+
+ // kext bootstrap segment
+ arm_vm_physmap_slide(temp_ptov_table, physmap_base, segKLDB, segSizeKLD, AP_RONA, 0);
+
+ // Early-boot data
+ arm_vm_physmap_slide(temp_ptov_table, physmap_base, segBOOTDATAB, segSizeBOOTDATA, AP_RONA, 0);
+
+#if KASAN_DYNAMIC_BLACKLIST
+ /* KASAN's dynamic blacklist needs to query the LINKEDIT segment at runtime. As such, the
+ * kext bootstrap code will not jettison LINKEDIT on kasan kernels, so don't bother to relocate it. */
+ keep_linkedit = TRUE;
+#else
+ PE_parse_boot_argn("keepsyms", &keep_linkedit, sizeof(keep_linkedit));
+#endif
+ if (!keep_linkedit) {
+ // Kernel LINKEDIT
+ arm_vm_physmap_slide(temp_ptov_table, physmap_base, segLINKB, segSizeLINK, AP_RWNA, 0);
+
+ // Prelinked kernel LINKEDIT
+ arm_vm_physmap_slide(temp_ptov_table, physmap_base, segPLKLINKEDITB, segSizePLKLINKEDIT, AP_RWNA, 0);
+ }
+
+ // Prelinked kernel plists
+ arm_vm_physmap_slide(temp_ptov_table, physmap_base, segPRELINKINFOB, segSizePRELINKINFO, AP_RWNA, 0);
+
+ // Device tree, ramdisk, boot args
+ arm_vm_physmap_slide(temp_ptov_table, physmap_base, end_kern, (args->topOfKernelData - gPhysBase + gVirtBase) - end_kern, AP_RWNA, 0);
+ PE_slide_devicetree(temp_ptov_table[ptov_index - 1].va - end_kern);
+
+ // Remainder of physical memory
+ arm_vm_physmap_slide(temp_ptov_table, physmap_base, (args->topOfKernelData + BOOTSTRAP_TABLE_SIZE - gPhysBase + gVirtBase),
+ real_avail_end - (args->topOfKernelData + BOOTSTRAP_TABLE_SIZE), AP_RWNA, 0);
+
+ assert((temp_ptov_table[ptov_index - 1].va + temp_ptov_table[ptov_index - 1].len) <= dynamic_memory_begin);
+
+ // Sort in descending order of segment length. LUT traversal is linear, so largest (most likely used)
+ // segments should be placed earliest in the table to optimize lookup performance.
+ qsort(temp_ptov_table, PTOV_TABLE_SIZE, sizeof(temp_ptov_table[0]), cmp_ptov_entries);
+
+ memcpy(ptov_table, temp_ptov_table, sizeof(ptov_table));
+}
+
+#else
static void
arm_vm_physmap_init(boot_args *args, vm_map_address_t physmap_base, vm_map_address_t dynamic_memory_begin __unused)
memcpy(ptov_table, temp_ptov_table, sizeof(ptov_table));
}
+#endif // XNU_MONITOR
void
arm_vm_prot_finalize(boot_args * args __unused)
arm_vm_populate_kernel_el0_mappings();
#endif /* __ARM_KERNEL_PROTECT__ */
+#if XNU_MONITOR
+ for (vm_offset_t va = segKLDB; va < (segKLDB + segSizeKLD); va += ARM_PGBYTES) {
+ pt_entry_t *pte = arm_kva_to_pte(va);
+ *pte = ARM_PTE_EMPTY;
+ }
+ /* Clear the original stack mappings; these pages should be mapped through ptov_table. */
+ for (vm_offset_t va = segBOOTDATAB; va < (segBOOTDATAB + segSizeBOOTDATA); va += ARM_PGBYTES) {
+ pt_entry_t *pte = arm_kva_to_pte(va);
+ *pte = ARM_PTE_EMPTY;
+ }
+ /* Clear the original PRELINKINFO mapping. This segment should be jettisoned during I/O Kit
+ * initialization before we reach this point. */
+ for (vm_offset_t va = segPRELINKINFOB; va < (segPRELINKINFOB + segSizePRELINKINFO); va += ARM_PGBYTES) {
+ pt_entry_t *pte = arm_kva_to_pte(va);
+ *pte = ARM_PTE_EMPTY;
+ }
+ if (!keep_linkedit) {
+ for (vm_offset_t va = segLINKB; va < (segLINKB + segSizeLINK); va += ARM_PGBYTES) {
+ pt_entry_t *pte = arm_kva_to_pte(va);
+ *pte = ARM_PTE_EMPTY;
+ }
+ for (vm_offset_t va = segPLKLINKEDITB; va < (segPLKLINKEDITB + segSizePLKLINKEDIT); va += ARM_PGBYTES) {
+ pt_entry_t *pte = arm_kva_to_pte(va);
+ *pte = ARM_PTE_EMPTY;
+ }
+ }
+#endif /* XNU_MONITOR */
-#if defined(KERNEL_INTEGRITY_KTRR)
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
/*
* __LAST,__pinst should no longer be executable.
*/
*/
#endif
+#if XNU_MONITOR
+ vm_offset_t segDATACONSTE = segDATACONSTB + segSizeDATACONST;
+
+ /*
+ * For the moment, the RO pagetable allocation is part of the
+ * constant data segment, but it is technically owned by the
+ * PPL. Hence, we should not reprotect it.
+ */
+ arm_vm_page_granular_RNX(segDATACONSTB, (vm_offset_t)&ropagetable_begin - segDATACONSTB, ARM64_GRANULE_ALLOW_BLOCK);
+ arm_vm_page_granular_RNX((vm_offset_t)&ropagetable_end,
+ segDATACONSTE - (vm_offset_t)&ropagetable_end, ARM64_GRANULE_ALLOW_BLOCK);
+#else
arm_vm_page_granular_RNX(segDATACONSTB, segSizeDATACONST, ARM64_GRANULE_ALLOW_BLOCK);
+#endif
__builtin_arm_dsb(DSB_ISH);
flush_mmu_tlb();
physmap_base += physmap_slide;
+#if XNU_MONITOR
+ physmap_base = ROUND_TWIG(physmap_base);
+ static_memory_end = physmap_base + mem_size;
+#else
static_memory_end = physmap_base + mem_size + (PTOV_TABLE_SIZE * ARM_TT_TWIG_SIZE); // worst possible case for block alignment
+#endif
#if KASAN
/* add the KASAN stolen memory to the physmap */
dynamic_memory_begin = static_memory_end + (shadow_ptop - shadow_pbase);
#else
dynamic_memory_begin = static_memory_end;
+#endif
+#if XNU_MONITOR
+ pmap_stacks_start = (void*)dynamic_memory_begin;
+ dynamic_memory_begin += PPL_STACK_REGION_SIZE;
+ pmap_stacks_end = (void*)dynamic_memory_begin;
#endif
if (dynamic_memory_begin > VM_MAX_KERNEL_ADDRESS)
panic("Unsupported memory configuration %lx\n", mem_size);
*/
avail_start = boot_ttep + BOOTSTRAP_TABLE_SIZE;
-#if defined(KERNEL_INTEGRITY_KTRR)
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
arm_replace_identity_map(args);
#endif
segTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__TEXT", &segSizeTEXT);
segDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__DATA_CONST", &segSizeDATACONST);
segTEXTEXECB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__TEXT_EXEC", &segSizeTEXTEXEC);
+#if XNU_MONITOR
+ segPPLTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PPLTEXT", &segSizePPLTEXT);
+ segPPLTRAMPB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PPLTRAMP", &segSizePPLTRAMP);
+ segPPLDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PPLDATA_CONST", &segSizePPLDATACONST);
+#endif
segDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__DATA", &segSizeDATA);
+#if XNU_MONITOR
+ segPPLDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PPLDATA", &segSizePPLDATA);
+#endif
segBOOTDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__BOOTDATA", &segSizeBOOTDATA);
segLINKB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LINKEDIT", &segSizeLINK);
set_mmu_ttb(invalid_ttep & TTBR_BADDR_MASK);
flush_mmu_tlb();
+#if defined(HAS_VMSA_LOCK)
+ vmsa_lock();
+#endif
kva_active = TRUE;
// global table pointers may need to be different due to physical aperture remapping
cpu_tte = (tt_entry_t*)(phystokv(cpu_ttep));
vm_kernel_etext = segTEXTB + segSizeTEXT + segSizeDATACONST + segSizeTEXTEXEC;
dynamic_memory_begin = ROUND_TWIG(dynamic_memory_begin);
+#if defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST)
+ // reserve a 32MB region without permission overrides to use later for a CTRR unit test
+ {
+ extern vm_offset_t ctrr_test_page;
+ tt_entry_t *new_tte;
+
+ ctrr_test_page = dynamic_memory_begin;
+ dynamic_memory_begin += ARM_TT_L2_SIZE;
+ cpu_l1_tte = cpu_tte + ((ctrr_test_page & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
+ assert((*cpu_l1_tte) & ARM_TTE_VALID);
+ cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((ctrr_test_page & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
+ assert((*cpu_l2_tte) == ARM_TTE_EMPTY);
+ new_tte = (tt_entry_t *)alloc_ptpage(FALSE);
+ bzero(new_tte, ARM_PGBYTES);
+ *cpu_l2_tte = (kvtophys((vm_offset_t)new_tte) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
+ }
+#endif /* defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST) */
+#if XNU_MONITOR
+ for (vm_offset_t cur = (vm_offset_t)pmap_stacks_start; cur < (vm_offset_t)pmap_stacks_end; cur += ARM_PGBYTES) {
+ arm_vm_map(cpu_tte, cur, ARM_PTE_EMPTY);
+ }
+#endif
pmap_bootstrap(dynamic_memory_begin);
disable_preemption();
*/
avail_start = (avail_start + PAGE_MASK) & ~PAGE_MASK;
+#if XNU_MONITOR
+ pmap_static_allocations_done();
+#endif
first_avail = avail_start;
patch_low_glo_static_region(args->topOfKernelData, avail_start - args->topOfKernelData);
enable_preemption();
extern void typhoon_return_from_wfi(void);
#endif
+#if HAS_RETENTION_STATE
+extern void arm64_retention_wfi(void);
+#endif
vm_address_t start_cpu_paddr;
typhoon_prepare_for_wfi();
#endif
__builtin_arm_dsb(DSB_SY);
+#if HAS_RETENTION_STATE
+ arm64_retention_wfi();
+#else
__builtin_arm_wfi();
+#endif
#if defined(APPLETYPHOON)
// <rdar://problem/15827409> CPU1 Stuck in WFIWT Because of MMU Prefetch
cpu_data_ptr->coresight_base[i] = 0;
}
+#if !XNU_MONITOR
pmap_cpu_data_t * pmap_cpu_data_ptr = &cpu_data_ptr->cpu_pmap_cpu_data;
pmap_cpu_data_ptr->cpu_nested_pmap = (struct pmap *) NULL;
for (i = 0; i < (sizeof(pmap_cpu_data_ptr->cpu_asid_high_bits) / sizeof(*pmap_cpu_data_ptr->cpu_asid_high_bits)); i++) {
pmap_cpu_data_ptr->cpu_asid_high_bits[i] = 0;
}
+#endif
cpu_data_ptr->halt_status = CPU_NOT_HALTED;
#if __ARM_KERNEL_PROTECT__
cpu_data_ptr->cpu_exc_vectors = (vm_offset_t)&exc_vectors_table;
return KERN_SUCCESS;
}
+#if defined(KERNEL_INTEGRITY_CTRR)
+
+lck_spin_t ctrr_cpu_start_lck;
+bool ctrr_cluster_locked[__ARM_CLUSTER_COUNT__];
+
+void
+init_ctrr_cpu_start_lock(void)
+{
+ lck_grp_t *ctrr_cpu_start_lock_grp = lck_grp_alloc_init("ctrr_cpu_start_lock", 0);
+ assert(ctrr_cpu_start_lock_grp);
+ lck_spin_init(&ctrr_cpu_start_lck, ctrr_cpu_start_lock_grp, NULL);
+}
+
+#endif
kern_return_t
cpu_start(int cpu)
cpu_data_ptr->cpu_reset_handler = (vm_offset_t) start_cpu_paddr;
+#if !XNU_MONITOR
cpu_data_ptr->cpu_pmap_cpu_data.cpu_nested_pmap = NULL;
+#endif
if (cpu_data_ptr->cpu_processor->startup_thread != THREAD_NULL) {
first_thread = cpu_data_ptr->cpu_processor->startup_thread;
flush_dcache((vm_offset_t)&CpuDataEntries[cpu], sizeof(cpu_data_entry_t), FALSE);
flush_dcache((vm_offset_t)cpu_data_ptr, sizeof(cpu_data_t), FALSE);
+#if defined(KERNEL_INTEGRITY_CTRR)
+ /* first time CPU starts, if not cluster master, and if cluster is not already locked,
+ * block until cluster becomes locked. */
+ if (cpu_data_ptr->cpu_processor->active_thread == THREAD_NULL
+ && !cpu_data_ptr->cluster_master) {
+ lck_spin_lock(&ctrr_cpu_start_lck);
+ if (ctrr_cluster_locked[cpu_data_ptr->cpu_cluster_id] == 0) {
+ assert_wait(&ctrr_cluster_locked[cpu_data_ptr->cpu_cluster_id], THREAD_UNINT);
+ lck_spin_unlock(&ctrr_cpu_start_lck);
+ thread_block(THREAD_CONTINUE_NULL);
+ assert(ctrr_cluster_locked[cpu_data_ptr->cpu_cluster_id] == 1);
+ } else {
+ lck_spin_unlock(&ctrr_cpu_start_lck);
+ }
+ }
+#endif
(void) PE_cpu_start(cpu_data_ptr->cpu_id, (vm_offset_t)NULL, (vm_offset_t)NULL);
}
#include <pexpert/arm64/board_config.h>
#endif
+#if XNU_MONITOR
+/* Exit path defines; for controlling PPL -> kernel transitions. */
+#define PPL_EXIT_DISPATCH 0 /* This is a clean exit after a PPL request. */
+#define PPL_EXIT_PANIC_CALL 1 /* The PPL has called panic. */
+#define PPL_EXIT_BAD_CALL 2 /* The PPL request failed. */
+#define PPL_EXIT_EXCEPTION 3 /* The PPL took an exception. */
+
+#define KERNEL_MODE_ELR ELR_GL11
+#define KERNEL_MODE_FAR FAR_GL11
+#define KERNEL_MODE_ESR ESR_GL11
+#define KERNEL_MODE_SPSR SPSR_GL11
+#define KERNEL_MODE_ASPSR ASPSR_GL11
+#define KERNEL_MODE_VBAR VBAR_GL11
+#define KERNEL_MODE_TPIDR TPIDR_GL11
+
+#define GUARDED_MODE_ELR ELR_EL1
+#define GUARDED_MODE_FAR FAR_EL1
+#define GUARDED_MODE_ESR ESR_EL1
+#define GUARDED_MODE_SPSR SPSR_EL1
+#define GUARDED_MODE_ASPSR ASPSR_EL1
+#define GUARDED_MODE_VBAR VBAR_EL1
+#define GUARDED_MODE_TPIDR TPIDR_EL1
+
+/*
+ * GET_PMAP_CPU_DATA
+ *
+ * Retrieves the PPL per-CPU data for the current CPU.
+ * arg0 - Address of the PPL per-CPU data is returned through this
+ * arg1 - Scratch register
+ * arg2 - Scratch register
+ *
+ */
+.macro GET_PMAP_CPU_DATA
+/* Get the CPU ID. */
+mrs $0, MPIDR_EL1
+#ifdef CPU_CLUSTER_OFFSETS
+ubfx $1, $0, MPIDR_AFF1_SHIFT, MPIDR_AFF1_WIDTH
+cmp $1, __ARM_CLUSTER_COUNT__
+b.hs .
+adrp $2, EXT(pmap_cluster_offsets)@page
+add $2, $2, EXT(pmap_cluster_offsets)@pageoff
+ldr $1, [$2, $1, lsl #3]
+and $0, $0, MPIDR_AFF0_MASK
+add $0, $0, $1
+#else
+and $0, $0, MPIDR_AFF0_MASK
+#endif
+
+/* Get the PPL CPU data array. */
+adrp $1, EXT(pmap_cpu_data_array)@page
+add $1, $1, EXT(pmap_cpu_data_array)@pageoff
+
+/*
+ * Sanity check the CPU ID (this is not a panic because this pertains to
+ * the hardware configuration; this should only fail if our
+ * understanding of the hardware is incorrect).
+ */
+cmp $0, MAX_CPUS
+b.hs .
+
+mov $2, PMAP_CPU_DATA_ARRAY_ENTRY_SIZE
+/* Get the PPL per-CPU data. */
+madd $0, $0, $2, $1
+.endmacro
+#endif /* XNU_MONITOR */
/*
* INIT_SAVED_STATE_FLAVORS
DECLARE("SR_RESTORE_TCR_EL1", offsetof(struct sysreg_restore, tcr_el1));
+#if XNU_MONITOR
+ DECLARE("PMAP_CPU_DATA_PPL_STATE", offsetof(struct pmap_cpu_data, ppl_state));
+ DECLARE("PMAP_CPU_DATA_ARRAY_ENTRY_SIZE", sizeof(struct pmap_cpu_data_array_entry));
+ DECLARE("PMAP_CPU_DATA_PPL_STACK", offsetof(struct pmap_cpu_data, ppl_stack));
+ DECLARE("PMAP_CPU_DATA_KERN_SAVED_SP", offsetof(struct pmap_cpu_data, ppl_kern_saved_sp));
+ DECLARE("PMAP_CPU_DATA_SAVE_AREA", offsetof(struct pmap_cpu_data, save_area));
+ DECLARE("PMAP_COUNT", PMAP_COUNT);
+#endif /* XNU_MONITOR */
#if defined(HAS_APPLE_PAC)
#include <arm/pmap.h>
#endif
+#if XNU_MONITOR
+/*
+ * CHECK_EXCEPTION_RETURN_DISPATCH_PPL
+ *
+ * Checks if an exception was taken from the PPL, and if so, trampolines back
+ * into the PPL.
+ * x26 - 0 if the exception was taken while in the kernel, 1 if the
+ * exception was taken while in the PPL.
+ */
+.macro CHECK_EXCEPTION_RETURN_DISPATCH_PPL
+ cmp x26, xzr
+ b.eq 1f
+
+ /* Return to the PPL. */
+ mov x15, #0
+ mov w10, #PPL_STATE_EXCEPTION
+#if __APRR_SUPPORTED__
+ b Ldisable_aif_and_enter_ppl
+#else
+#error "XPRR configuration error"
+#endif /* __APRR_SUPPORTED__ */
+1:
+.endmacro
+
+#if __APRR_SUPPORTED__
+/*
+ * EL1_SP0_VECTOR_PPL_CHECK
+ *
+ * Check to see if the exception was taken by the kernel or the PPL. Falls
+ * through if kernel, hands off to the given label if PPL. Expects to run on
+ * SP1.
+ * arg0 - Label to go to if this was a PPL exception.
+ */
+.macro EL1_SP0_VECTOR_PPL_CHECK
+ sub sp, sp, ARM_CONTEXT_SIZE
+ stp x0, x1, [sp, SS64_X0]
+ mrs x0, APRR_EL1
+ MOV64 x1, APRR_EL1_DEFAULT
+ cmp x0, x1
+ b.ne $0
+ ldp x0, x1, [sp, SS64_X0]
+ add sp, sp, ARM_CONTEXT_SIZE
+.endmacro
+
+#define STAY_ON_SP1 0
+#define SWITCH_TO_SP0 1
+
+#define INVOKE_PREFLIGHT 0
+#define NO_INVOKE_PREFLIGHT 1
+
+/*
+ * EL1_SP0_VECTOR_NOT_IN_KERNEL_MODE
+ *
+ * Verify whether an exception came from the PPL or from the kernel. If it came
+ * from the PPL, save off the PPL state and transition out of the PPL.
+ * arg0 - Label to go to if this was a kernel exception
+ * arg1 - Label to go to (after leaving the PPL) if this was a PPL exception
+ * arg2 - Indicates if this should switch back to SP0
+ * x0 - xPRR_EL1_BR1 read by EL1_SP0_VECTOR_PPL_CHECK
+ */
+.macro EL1_SP0_VECTOR_NOT_IN_KERNEL_MODE
+ /* Spill some more registers. */
+ stp x2, x3, [sp, SS64_X2]
+
+ /*
+ * Check if the PPL is locked down; if not, we can treat this as a
+ * kernel execption.
+ */
+ adrp x1, EXT(pmap_ppl_locked_down)@page
+ ldr w1, [x1, #EXT(pmap_ppl_locked_down)@pageoff]
+ cbz x1, 2f
+
+ /* Ensure that APRR_EL1 is actually in PPL mode. */
+ MOV64 x1, APRR_EL1_PPL
+ cmp x0, x1
+ b.ne .
+
+ /*
+ * Check if the CPU is in the PPL; if not we can treat this as a
+ * kernel exception.
+ */
+ GET_PMAP_CPU_DATA x3, x1, x2
+ ldr w1, [x3, PMAP_CPU_DATA_PPL_STATE]
+ cmp x1, #PPL_STATE_KERNEL
+ b.eq 2f
+
+ /* Ensure that the CPU is in the expected PPL state. */
+ cmp x1, #PPL_STATE_DISPATCH
+ b.ne .
+
+ /* Mark the CPU as dealing with an exception. */
+ mov x1, #PPL_STATE_EXCEPTION
+ str w1, [x3, PMAP_CPU_DATA_PPL_STATE]
+
+ /* Load the bounds of the PPL trampoline. */
+ adrp x0, EXT(ppl_no_exception_start)@page
+ add x0, x0, EXT(ppl_no_exception_start)@pageoff
+ adrp x1, EXT(ppl_no_exception_end)@page
+ add x1, x1, EXT(ppl_no_exception_end)@pageoff
+
+ /*
+ * Ensure that the exception did not occur in the trampoline. If it
+ * did, we are either being attacked or our state machine is
+ * horrifically broken.
+ */
+ mrs x2, ELR_EL1
+ cmp x2, x0
+ b.lo 1f
+ cmp x2, x1
+ b.hi 1f
+
+ /* We might be under attack; spin. */
+ b .
+
+1:
+ /* Get the PPL save area. */
+ mov x1, x3
+ ldr x0, [x3, PMAP_CPU_DATA_SAVE_AREA]
+
+ /* Save our x0, x1 state. */
+ ldp x2, x3, [sp, SS64_X0]
+ stp x2, x3, [x0, SS64_X0]
+
+ /* Restore SP1 to its original state. */
+ mov x3, sp
+ add sp, sp, ARM_CONTEXT_SIZE
+
+ .if $2 == SWITCH_TO_SP0
+ /* Switch back to SP0. */
+ msr SPSel, #0
+ mov x2, sp
+ .else
+ /* Load the SP0 value. */
+ mrs x2, SP_EL0
+ .endif
+
+ /* Save off the stack pointer. */
+ str x2, [x0, SS64_SP]
+
+ INIT_SAVED_STATE_FLAVORS x0, w1, w2
+
+ /* Save the context that was interrupted. */
+ ldp x2, x3, [x3, SS64_X2]
+ stp fp, lr, [x0, SS64_FP]
+ SPILL_REGISTERS KERNEL_MODE
+
+ /*
+ * Stash the function we wish to be invoked to deal with the exception;
+ * usually this is some preflight function for the fleh_* handler.
+ */
+ adrp x25, $1@page
+ add x25, x25, $1@pageoff
+
+ /*
+ * Indicate that this is a PPL exception, and that we should return to
+ * the PPL.
+ */
+ mov x26, #1
+
+ /* Transition back to kernel mode. */
+ mov x15, #PPL_EXIT_EXCEPTION
+ b ppl_return_to_kernel_mode
+2:
+ /* Restore SP1 state. */
+ ldp x2, x3, [sp, SS64_X2]
+ ldp x0, x1, [sp, SS64_X0]
+ add sp, sp, ARM_CONTEXT_SIZE
+
+ /* Go to the specified label (usually the original exception vector). */
+ b $0
+.endmacro
+#endif /* __APRR_SUPPORTED__ */
+
+#endif /* XNU_MONITOR */
#define CBF_DISABLE 0
#define CBF_ENABLE 1
.endmacro
el1_sp0_synchronous_vector_long:
+#if XNU_MONITOR && __APRR_SUPPORTED__
+ /*
+ * We do not have enough space for new instructions in this vector, so
+ * jump to outside code to check if this exception was taken in the PPL.
+ */
+ b el1_sp0_synchronous_vector_ppl_check
+Lel1_sp0_synchronous_vector_kernel:
+#endif
sub sp, sp, ARM_CONTEXT_SIZE // Make space on the exception stack
stp x0, x1, [sp, SS64_X0] // Save x0, x1 to the stack
mrs x1, ESR_EL1 // Get the exception syndrome
b fleh_dispatch64
el1_sp0_irq_vector_long:
+#if XNU_MONITOR && __APRR_SUPPORTED__
+ EL1_SP0_VECTOR_PPL_CHECK el1_sp0_irq_vector_not_in_kernel_mode
+Lel1_sp0_irq_vector_kernel:
+#endif
EL1_SP0_VECTOR
mrs x1, TPIDR_EL1
ldr x1, [x1, ACT_CPUDATAP]
el1_sp0_fiq_vector_long:
// ARM64_TODO write optimized decrementer
+#if XNU_MONITOR && __APRR_SUPPORTED__
+ EL1_SP0_VECTOR_PPL_CHECK el1_sp0_fiq_vector_not_in_kernel_mode
+Lel1_sp0_fiq_vector_kernel:
+#endif
EL1_SP0_VECTOR
mrs x1, TPIDR_EL1
ldr x1, [x1, ACT_CPUDATAP]
b fleh_dispatch64
el1_sp0_serror_vector_long:
+#if XNU_MONITOR && __APRR_SUPPORTED__
+ EL1_SP0_VECTOR_PPL_CHECK el1_sp0_serror_vector_not_in_kernel_mode
+Lel1_sp0_serror_vector_kernel:
+#endif
EL1_SP0_VECTOR
adrp x1, EXT(fleh_serror)@page // Load address for fleh
add x1, x1, EXT(fleh_serror)@pageoff
add x1, x1, EXT(fleh_serror)@pageoff
b fleh_dispatch64
+#if XNU_MONITOR && __APRR_SUPPORTED__
+el1_sp0_synchronous_vector_ppl_check:
+ EL1_SP0_VECTOR_PPL_CHECK el1_sp0_synchronous_vector_not_in_kernel_mode
+
+ /* Jump back to the primary exception vector if we fell through. */
+ b Lel1_sp0_synchronous_vector_kernel
+#endif
/*
* check_exception_stack
b.ne Lel1_sp1_synchronous_vector_continue
msr ELR_EL1, lr // Return to caller
eret
-#endif /* defined(KERNEL_INTEGRITY_KTRR)*/
+#endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
/* 64-bit first level exception handler dispatcher.
* Completes register context saving and branches to FLEH.
mov x23, #0
mov x24, #0
mov x25, #0
+#if !XNU_MONITOR
mov x26, #0
+#endif
mov x27, #0
mov x28, #0
/* fp/lr already cleared by EL0_64_VECTOR */
mov x21, x0 // Copy arm_context_t pointer to x21
mov x22, x1 // Copy handler routine to x22
+#if XNU_MONITOR
+ /* Zero x26 to indicate that this should not return to the PPL. */
+ mov x26, #0
+#endif
#if !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME
tst x23, PSR64_MODE_EL_MASK // If any EL MODE bits are set, we're coming from
bl EXT(sleh_synchronous)
POP_FRAME
+#if XNU_MONITOR
+ CHECK_EXCEPTION_RETURN_DISPATCH_PPL
+#endif
b exception_return_dispatch
POP_FRAME
END_INTERRUPT_HANDLER
+#if XNU_MONITOR
+ CHECK_EXCEPTION_RETURN_DISPATCH_PPL
+#endif
b exception_return_dispatch
POP_FRAME
END_INTERRUPT_HANDLER
+#if XNU_MONITOR
+ CHECK_EXCEPTION_RETURN_DISPATCH_PPL
+#endif
b exception_return_dispatch
bl EXT(sleh_serror)
POP_FRAME
+#if XNU_MONITOR
+ CHECK_EXCEPTION_RETURN_DISPATCH_PPL
+#endif
b exception_return_dispatch
user_set_debug_state_and_return:
+#if defined(APPLELIGHTNING)
+/* rdar://53177964 ([Cebu Errata SW WA][v8Debug] MDR NEX L3 clock turns OFF during restoreCheckpoint due to SWStep getting masked) */
+
+ ARM64_IS_PCORE x12 // if we're not a pCORE, also do nothing
+ cbz x12, 1f
+
+ mrs x12, ARM64_REG_HID1 // if any debug session ever existed, set forceNexL3ClkOn
+ orr x12, x12, ARM64_REG_HID1_forceNexL3ClkOn
+ msr ARM64_REG_HID1, x12
+1:
+
+#endif
ldr x4, [x3, ACT_CPUDATAP] // Get current CPU data pointer
isb // Synchronize context
LEXT(ExceptionVectorsEnd)
#endif /* __ARM_KERNEL_PROTECT__ */
+#if XNU_MONITOR
+#if __APRR_SUPPORTED__
+ .text
+ .align 2
+el1_sp0_synchronous_vector_not_in_kernel_mode:
+ EL1_SP0_VECTOR_NOT_IN_KERNEL_MODE Lel1_sp0_synchronous_vector_kernel, fleh_synchronous_from_ppl, STAY_ON_SP1
+
+ .text
+ .align 2
+el1_sp0_fiq_vector_not_in_kernel_mode:
+ EL1_SP0_VECTOR_NOT_IN_KERNEL_MODE Lel1_sp0_fiq_vector_kernel, fleh_fiq_from_ppl, SWITCH_TO_SP0
+
+ .text
+ .align 2
+el1_sp0_irq_vector_not_in_kernel_mode:
+ EL1_SP0_VECTOR_NOT_IN_KERNEL_MODE Lel1_sp0_irq_vector_kernel, fleh_irq_from_ppl, SWITCH_TO_SP0
+
+ .text
+ .align 2
+el1_sp0_serror_vector_not_in_kernel_mode:
+ EL1_SP0_VECTOR_NOT_IN_KERNEL_MODE Lel1_sp0_serror_vector_kernel, fleh_serror_from_ppl, SWITCH_TO_SP0
+#endif /* __APRR_SUPPORTED__ */
+
+/*
+ * Functions to preflight the fleh handlers when the PPL has taken an exception;
+ * mostly concerned with setting up state for the normal fleh code.
+ */
+fleh_synchronous_from_ppl:
+ /* Save x0. */
+ mov x15, x0
+
+ /* Grab the ESR. */
+ mrs x1, ESR_EL1 // Get the exception syndrome
+
+ /* If the stack pointer is corrupt, it will manifest either as a data abort
+ * (syndrome 0x25) or a misaligned pointer (syndrome 0x26). We can check
+ * these quickly by testing bit 5 of the exception class.
+ */
+ tbz x1, #(5 + ESR_EC_SHIFT), Lvalid_ppl_stack
+ mrs x0, SP_EL0 // Get SP_EL0
+
+ /* Perform high level checks for stack corruption. */
+ and x1, x1, #ESR_EC_MASK // Mask the exception class
+ mov x2, #(ESR_EC_SP_ALIGN << ESR_EC_SHIFT)
+ cmp x1, x2 // If we have a stack alignment exception
+ b.eq Lcorrupt_ppl_stack // ...the stack is definitely corrupted
+ mov x2, #(ESR_EC_DABORT_EL1 << ESR_EC_SHIFT)
+ cmp x1, x2 // If we have a data abort, we need to
+ b.ne Lvalid_ppl_stack // ...validate the stack pointer
+
+Ltest_pstack:
+ /* Bounds check the PPL stack. */
+ adrp x10, EXT(pmap_stacks_start)@page
+ ldr x10, [x10, #EXT(pmap_stacks_start)@pageoff]
+ adrp x11, EXT(pmap_stacks_end)@page
+ ldr x11, [x11, #EXT(pmap_stacks_end)@pageoff]
+ cmp x0, x10
+ b.lo Lcorrupt_ppl_stack
+ cmp x0, x11
+ b.hi Lcorrupt_ppl_stack
+
+Lvalid_ppl_stack:
+ /* Restore x0. */
+ mov x0, x15
+
+ /* Switch back to the kernel stack. */
+ msr SPSel, #0
+ GET_PMAP_CPU_DATA x5, x6, x7
+ ldr x6, [x5, PMAP_CPU_DATA_KERN_SAVED_SP]
+ mov sp, x6
+
+ /* Hand off to the synch handler. */
+ b EXT(fleh_synchronous)
+
+Lcorrupt_ppl_stack:
+ /* Restore x0. */
+ mov x0, x15
+
+ /* Hand off to the invalid stack handler. */
+ b fleh_invalid_stack
+
+fleh_fiq_from_ppl:
+ mrs x1, TPIDR_EL1
+ ldr x1, [x1, ACT_CPUDATAP]
+ ldr x1, [x1, CPU_ISTACKPTR]
+ mov sp, x1
+ b EXT(fleh_fiq)
+
+fleh_irq_from_ppl:
+ mrs x1, TPIDR_EL1
+ ldr x1, [x1, ACT_CPUDATAP]
+ ldr x1, [x1, CPU_ISTACKPTR]
+ mov sp, x1
+ b EXT(fleh_irq)
+
+fleh_serror_from_ppl:
+ GET_PMAP_CPU_DATA x5, x6, x7
+ ldr x6, [x5, PMAP_CPU_DATA_KERN_SAVED_SP]
+ mov sp, x6
+ b EXT(fleh_serror)
+
+/*
+ * REENABLE_DAIF
+ *
+ * Restores the DAIF bits to their original state (well, the AIF bits at least).
+ * arg0 - DAIF bits (read from the DAIF interface) to restore
+ */
+.macro REENABLE_DAIF
+ /* AIF enable. */
+ tst $0, #(DAIF_IRQF | DAIF_FIQF | DAIF_ASYNCF)
+ b.eq 3f
+
+ /* IF enable. */
+ tst $0, #(DAIF_IRQF | DAIF_FIQF)
+ b.eq 2f
+
+ /* A enable. */
+ tst $0, #(DAIF_ASYNCF)
+ b.eq 1f
+
+ /* Enable nothing. */
+ b 4f
+
+ /* A enable. */
+1:
+ msr DAIFClr, #(DAIFSC_ASYNCF)
+ b 4f
+
+ /* IF enable. */
+2:
+ msr DAIFClr, #(DAIFSC_IRQF | DAIFSC_FIQF)
+ b 4f
+
+ /* AIF enable. */
+3:
+ msr DAIFClr, #(DAIFSC_IRQF | DAIFSC_FIQF | DAIFSC_ASYNCF)
+
+ /* Done! */
+4:
+.endmacro
+
+
+#if XNU_MONITOR && __APRR_SUPPORTED__
+/*
+ * aprr_ppl_enter
+ *
+ * Invokes the PPL
+ * x15 - The index of the requested PPL function.
+ */
+ .text
+ .align 2
+ .globl EXT(aprr_ppl_enter)
+LEXT(aprr_ppl_enter)
+ /* Push a frame. */
+ ARM64_STACK_PROLOG
+ stp x20, x21, [sp, #-0x20]!
+ stp x29, x30, [sp, #0x10]
+ add x29, sp, #0x10
+
+ /* Increase the preemption count. */
+ mrs x10, TPIDR_EL1
+ ldr w12, [x10, ACT_PREEMPT_CNT]
+ add w12, w12, #1
+ str w12, [x10, ACT_PREEMPT_CNT]
+
+ /* Is the PPL currently locked down? */
+ adrp x13, EXT(pmap_ppl_locked_down)@page
+ add x13, x13, EXT(pmap_ppl_locked_down)@pageoff
+ ldr w14, [x13]
+ cmp w14, wzr
+
+ /* If not, just perform the call in the current context. */
+ b.eq EXT(ppl_bootstrap_dispatch)
+
+ mov w10, #PPL_STATE_KERNEL
+ b Ldisable_aif_and_enter_ppl
+
+ /* We align this to land the next few instructions on their own page. */
+ .section __PPLTRAMP,__text,regular,pure_instructions
+ .align 14
+ .space (16*1024)-(4*8) // 8 insns
+
+ /*
+ * This label is used by exception handlers that are trying to return
+ * to the PPL.
+ */
+Ldisable_aif_and_enter_ppl:
+ /* We must trampoline to the PPL context; disable AIF. */
+ mrs x20, DAIF
+ msr DAIFSet, #(DAIFSC_ASYNCF | DAIFSC_IRQF | DAIFSC_FIQF)
+
+ .globl EXT(ppl_no_exception_start)
+LEXT(ppl_no_exception_start)
+ /* Switch APRR_EL1 to PPL mode. */
+ MOV64 x14, APRR_EL1_PPL
+ msr APRR_EL1, x14
+
+ /* This ISB should be the last instruction on a page. */
+ // TODO: can we static assert this?
+ isb
+#endif /* XNU_MONITOR && __APRR_SUPPORTED__ */
+
+
+ // x15: ppl call number
+ // w10: ppl_state
+ // x20: gxf_enter caller's DAIF
+ .globl EXT(ppl_trampoline_start)
+LEXT(ppl_trampoline_start)
+
+#if __APRR_SUPPORTED__
+ /* Squash AIF AGAIN, because someone may have attacked us. */
+ msr DAIFSet, #(DAIFSC_ASYNCF | DAIFSC_IRQF | DAIFSC_FIQF)
+#endif /* __APRR_SUPPORTED__ */
+
+#if __APRR_SUPPORTED__
+ /* Verify the state of APRR_EL1. */
+ MOV64 x14, APRR_EL1_PPL
+ mrs x21, APRR_EL1
+#else /* __APRR_SUPPORTED__ */
+#error "XPRR configuration error"
+#endif /* __APRR_SUPPORTED__ */
+ cmp x14, x21
+ b.ne Lppl_fail_dispatch
+
+ /* Verify the request ID. */
+ cmp x15, PMAP_COUNT
+ b.hs Lppl_fail_dispatch
+
+ /* Get the PPL CPU data structure. */
+ GET_PMAP_CPU_DATA x12, x13, x14
+
+ /* Mark this CPU as being in the PPL. */
+ ldr w9, [x12, PMAP_CPU_DATA_PPL_STATE]
+
+ cmp w9, #PPL_STATE_KERNEL
+ b.eq Lppl_mark_cpu_as_dispatching
+
+ /* Check to see if we are trying to trap from within the PPL. */
+ cmp w9, #PPL_STATE_DISPATCH
+ b.eq Lppl_fail_dispatch_ppl
+
+
+ /* Ensure that we are returning from an exception. */
+ cmp w9, #PPL_STATE_EXCEPTION
+ b.ne Lppl_fail_dispatch
+
+ // where is w10 set?
+ // in CHECK_EXCEPTION_RETURN_DISPATCH_PPL
+ cmp w10, #PPL_STATE_EXCEPTION
+ b.ne Lppl_fail_dispatch
+
+ /* This is an exception return; set the CPU to the dispatching state. */
+ mov w9, #PPL_STATE_DISPATCH
+ str w9, [x12, PMAP_CPU_DATA_PPL_STATE]
+
+ /* Find the save area, and return to the saved PPL context. */
+ ldr x0, [x12, PMAP_CPU_DATA_SAVE_AREA]
+ mov sp, x0
+#if __APRR_SUPPORTED__
+ b Lexception_return_restore_registers
+#else
+ b EXT(return_to_ppl)
+#endif /* __APRR_SUPPORTED__ */
+
+Lppl_mark_cpu_as_dispatching:
+ cmp w10, #PPL_STATE_KERNEL
+ b.ne Lppl_fail_dispatch
+
+ /* Mark the CPU as dispatching. */
+ mov w13, #PPL_STATE_DISPATCH
+ str w13, [x12, PMAP_CPU_DATA_PPL_STATE]
+
+ /* Get the handler for the request */
+ adrp x9, EXT(ppl_handler_table)@page
+ add x9, x9, EXT(ppl_handler_table)@pageoff
+ ldr x10, [x9, x15, lsl #3]
+
+ /* Switch to the regular PPL stack. */
+ // TODO: switch to PPL_STACK earlier in gxf_ppl_entry_handler
+ ldr x9, [x12, PMAP_CPU_DATA_PPL_STACK]
+
+ // SP0 is thread stack here
+ mov x21, sp
+ // SP0 is now PPL stack
+ mov sp, x9
+
+
+ /* Save the old stack pointer off in case we need it. */
+ str x21, [x12, PMAP_CPU_DATA_KERN_SAVED_SP]
+
+ /* Branch to the code that will invoke the PPL request. */
+ b EXT(ppl_dispatch)
+
+Lppl_fail_dispatch_ppl:
+ /* Switch back to the kernel stack. */
+ ldr x10, [x12, PMAP_CPU_DATA_KERN_SAVED_SP]
+ mov sp, x10
+
+Lppl_fail_dispatch:
+ /* Indicate that we failed. */
+ mov x15, #PPL_EXIT_BAD_CALL
+
+ /* Move the DAIF bits into the expected register. */
+ mov x10, x20
+
+ /* Return to kernel mode. */
+ b ppl_return_to_kernel_mode
+
+Lppl_dispatch_exit:
+ /* Indicate that we are cleanly exiting the PPL. */
+ mov x15, #PPL_EXIT_DISPATCH
+
+ /* Switch back to the original (kernel thread) stack. */
+ mov sp, x21
+
+ /* Move the saved DAIF bits. */
+ mov x10, x20
+
+ /* Clear the old stack pointer. */
+ str xzr, [x12, PMAP_CPU_DATA_KERN_SAVED_SP]
+
+ /*
+ * Mark the CPU as no longer being in the PPL. We spin if our state
+ * machine is broken.
+ */
+ ldr w9, [x12, PMAP_CPU_DATA_PPL_STATE]
+ cmp w9, #PPL_STATE_DISPATCH
+ b.ne .
+ mov w9, #PPL_STATE_KERNEL
+ str w9, [x12, PMAP_CPU_DATA_PPL_STATE]
+
+ /* Return to the kernel. */
+ b ppl_return_to_kernel_mode
+
+#if __APRR_SUPPORTED__
+ /* We align this to land the next few instructions on their own page. */
+ .align 14
+ .space (16*1024)-(4*5) // 5 insns
+
+ppl_return_to_kernel_mode:
+ /* Switch APRR_EL1 back to the kernel mode. */
+ // must be 5 instructions
+ MOV64 x14, APRR_EL1_DEFAULT
+ msr APRR_EL1, x14
+
+ .globl EXT(ppl_trampoline_end)
+LEXT(ppl_trampoline_end)
+
+ /* This should be the first instruction on a page. */
+ isb
+
+ .globl EXT(ppl_no_exception_end)
+LEXT(ppl_no_exception_end)
+ b ppl_exit
+#endif /* __APRR_SUPPORTED__ */
+
+
+ .text
+ppl_exit:
+ /*
+ * If we are dealing with an exception, hand off to the first level
+ * exception handler.
+ */
+ cmp x15, #PPL_EXIT_EXCEPTION
+ b.eq Ljump_to_fleh_handler
+
+ /* Restore the original AIF state. */
+ REENABLE_DAIF x10
+
+ /* If this was a panic call from the PPL, reinvoke panic. */
+ cmp x15, #PPL_EXIT_PANIC_CALL
+ b.eq Ljump_to_panic_trap_to_debugger
+
+ /* Load the preemption count. */
+ mrs x10, TPIDR_EL1
+ ldr w12, [x10, ACT_PREEMPT_CNT]
+
+ /* Detect underflow */
+ cbnz w12, Lno_preempt_underflow
+ b preempt_underflow
+Lno_preempt_underflow:
+
+ /* Lower the preemption count. */
+ sub w12, w12, #1
+ str w12, [x10, ACT_PREEMPT_CNT]
+
+ /* Skip ASTs if the peemption count is not zero. */
+ cbnz x12, Lppl_skip_ast_taken
+
+ /* Skip the AST check if interrupts are disabled. */
+ mrs x1, DAIF
+ tst x1, #DAIF_IRQF
+ b.ne Lppl_skip_ast_taken
+
+ /* Disable interrupts. */
+ msr DAIFSet, #(DAIFSC_IRQF | DAIFSC_FIQF)
+
+ /* IF there is no urgent AST, skip the AST. */
+ ldr x12, [x10, ACT_CPUDATAP]
+ ldr x14, [x12, CPU_PENDING_AST]
+ tst x14, AST_URGENT
+ b.eq Lppl_defer_ast_taken
+
+ /* Stash our return value and return reason. */
+ mov x20, x0
+ mov x21, x15
+
+ /* Handle the AST. */
+ bl EXT(ast_taken_kernel)
+
+ /* Restore the return value and the return reason. */
+ mov x15, x21
+ mov x0, x20
+
+Lppl_defer_ast_taken:
+ /* Reenable interrupts. */
+ msr DAIFClr, #(DAIFSC_IRQF | DAIFSC_FIQF)
+
+Lppl_skip_ast_taken:
+ /* Pop the stack frame. */
+ ldp x29, x30, [sp, #0x10]
+ ldp x20, x21, [sp], #0x20
+
+ /* Check to see if this was a bad request. */
+ cmp x15, #PPL_EXIT_BAD_CALL
+ b.eq Lppl_bad_call
+
+ /* Return. */
+ ARM64_STACK_EPILOG
+
+ .align 2
+Ljump_to_fleh_handler:
+ br x25
+
+ .align 2
+Ljump_to_panic_trap_to_debugger:
+ b EXT(panic_trap_to_debugger)
+
+Lppl_bad_call:
+ /* Panic. */
+ adrp x0, Lppl_bad_call_panic_str@page
+ add x0, x0, Lppl_bad_call_panic_str@pageoff
+ b EXT(panic)
+
+ .text
+ .align 2
+ .globl EXT(ppl_dispatch)
+LEXT(ppl_dispatch)
+ /*
+ * Save a couple of important registers (implementation detail; x12 has
+ * the PPL per-CPU data address; x13 is not actually interesting).
+ */
+ stp x12, x13, [sp, #-0x10]!
+
+ /* Restore the original AIF state. */
+ REENABLE_DAIF x20
+
+ /*
+ * Note that if the method is NULL, we'll blow up with a prefetch abort,
+ * but the exception vectors will deal with this properly.
+ */
+
+ /* Invoke the PPL method. */
+#ifdef HAS_APPLE_PAC
+ blraaz x10
+#else
+ blr x10
+#endif
+
+ /* Disable AIF. */
+ msr DAIFSet, #(DAIFSC_ASYNCF | DAIFSC_IRQF | DAIFSC_FIQF)
+
+ /* Restore those important registers. */
+ ldp x12, x13, [sp], #0x10
+
+ /* Mark this as a regular return, and hand off to the return path. */
+ b Lppl_dispatch_exit
+
+ .text
+ .align 2
+ .globl EXT(ppl_bootstrap_dispatch)
+LEXT(ppl_bootstrap_dispatch)
+ /* Verify the PPL request. */
+ cmp x15, PMAP_COUNT
+ b.hs Lppl_fail_bootstrap_dispatch
+
+ /* Get the requested PPL routine. */
+ adrp x9, EXT(ppl_handler_table)@page
+ add x9, x9, EXT(ppl_handler_table)@pageoff
+ ldr x10, [x9, x15, lsl #3]
+
+ /* Invoke the requested PPL routine. */
+#ifdef HAS_APPLE_PAC
+ blraaz x10
+#else
+ blr x10
+#endif
+ /* Stash off the return value */
+ mov x20, x0
+ /* Drop the preemption count */
+ bl EXT(_enable_preemption)
+ mov x0, x20
+
+ /* Pop the stack frame. */
+ ldp x29, x30, [sp, #0x10]
+ ldp x20, x21, [sp], #0x20
+#if __has_feature(ptrauth_returns)
+ retab
+#else
+ ret
+#endif
+
+Lppl_fail_bootstrap_dispatch:
+ /* Pop our stack frame and panic. */
+ ldp x29, x30, [sp, #0x10]
+ ldp x20, x21, [sp], #0x20
+#if __has_feature(ptrauth_returns)
+ autibsp
+#endif
+ adrp x0, Lppl_bad_call_panic_str@page
+ add x0, x0, Lppl_bad_call_panic_str@pageoff
+ b EXT(panic)
+
+ .text
+ .align 2
+ .globl EXT(ml_panic_trap_to_debugger)
+LEXT(ml_panic_trap_to_debugger)
+#if 0
+ // TODO: why would we ever want to turn interrupts back on after going down panic path?
+ /* Grab the current AIF state, and disable AIF. */
+ mrs x10, DAIF
+#endif
+ msr DAIFSet, #(DAIFSC_ASYNCF | DAIFSC_IRQF | DAIFSC_FIQF)
+
+ // we want interrupts to stay masked after exiting PPL when calling into panic to halt system
+ // x10 is used in ppl_return_to_kernel_mode restore desired DAIF state after GEXIT
+ mrs x10, DAIF
+
+ /* Indicate (for the PPL->kernel transition) that we are panicking. */
+ mov x15, #PPL_EXIT_PANIC_CALL
+
+ /* Get the PPL per-CPU data. */
+ GET_PMAP_CPU_DATA x11, x12, x13
+
+ /* Restore the old stack pointer as we can't push onto PPL stack after we exit PPL */
+ ldr x12, [x11, PMAP_CPU_DATA_KERN_SAVED_SP]
+ mov sp, x12
+
+ /*
+ * Mark this CPU as being in the PPL. Halt and catch fire if our state
+ * machine appears to be broken.
+ */
+ ldr w12, [x11, PMAP_CPU_DATA_PPL_STATE]
+ cmp w12, #PPL_STATE_DISPATCH
+ b.ne .
+ mov w13, #PPL_STATE_PANIC
+ str w13, [x11, PMAP_CPU_DATA_PPL_STATE]
+
+ /* Now we are ready to exit the PPL. */
+ b ppl_return_to_kernel_mode
+
+ .data
+Lppl_bad_call_panic_str:
+ .asciz "ppl_dispatch: failed due to bad arguments/state"
+#else /* XNU_MONITOR */
.text
.align 2
.globl EXT(ml_panic_trap_to_debugger)
LEXT(ml_panic_trap_to_debugger)
ret
+#endif /* XNU_MONITOR */
/* ARM64_TODO Is globals_asm.h needed? */
//#include "globals_asm.h"
#include <IOKit/IOPlatformExpert.h>
-#if defined(KERNEL_INTEGRITY_KTRR)
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
#include <libkern/kernel_mach_header.h>
#endif
#include <kern/kpc.h>
#endif
+#if HAS_CLUSTER
+static uint8_t cluster_initialized = 0;
+#endif
static int max_cpus_initialized = 0;
extern vm_offset_t segLASTB;
extern unsigned long segSizeLAST;
+#if defined(HAS_IPI)
+unsigned int gFastIPI = 1;
+#define kDeferredIPITimerDefault (64 * NSEC_PER_USEC) /* in nanoseconds */
+static uint64_t deferred_ipi_timer_ns = kDeferredIPITimerDefault;
+#endif /* defined(HAS_IPI) */
void machine_conf(void);
void ml_lockdown_run_handler(void);
uint32_t get_arm_cpu_version(void);
+#if defined(HAS_IPI)
+static inline void
+ml_cpu_signal_type(unsigned int cpu_mpidr, uint32_t type)
+{
+#if HAS_CLUSTER
+ uint64_t local_mpidr;
+ /* NOTE: this logic expects that we are called in a non-preemptible
+ * context, or at least one in which the calling thread is bound
+ * to a single CPU. Otherwise we may migrate between choosing which
+ * IPI mechanism to use and issuing the IPI. */
+ MRS(local_mpidr, "MPIDR_EL1");
+ if ((local_mpidr & MPIDR_AFF1_MASK) == (cpu_mpidr & MPIDR_AFF1_MASK)) {
+ uint64_t x = type | (cpu_mpidr & MPIDR_AFF0_MASK);
+ MSR(ARM64_REG_IPI_RR_LOCAL, x);
+ } else {
+ #define IPI_RR_TARGET_CLUSTER_SHIFT 16
+ uint64_t x = type | ((cpu_mpidr & MPIDR_AFF1_MASK) << (IPI_RR_TARGET_CLUSTER_SHIFT - MPIDR_AFF1_SHIFT)) | (cpu_mpidr & MPIDR_AFF0_MASK);
+ MSR(ARM64_REG_IPI_RR_GLOBAL, x);
+ }
+#else
+ uint64_t x = type | (cpu_mpidr & MPIDR_AFF0_MASK);
+ MSR(ARM64_REG_IPI_RR, x);
+#endif
+}
+#endif
+#if !defined(HAS_IPI)
__dead2
+#endif
void
ml_cpu_signal(unsigned int cpu_mpidr __unused)
{
+#if defined(HAS_IPI)
+ ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_IMMEDIATE);
+#else
panic("Platform does not support ACC Fast IPI");
+#endif
}
+#if !defined(HAS_IPI)
__dead2
+#endif
void
ml_cpu_signal_deferred_adjust_timer(uint64_t nanosecs)
{
+#if defined(HAS_IPI)
+ /* adjust IPI_CR timer countdown value for deferred IPI
+ * accepts input in nanosecs, convert to absolutetime (REFCLK ticks),
+ * clamp maximum REFCLK ticks to 0xFFFF (16 bit field)
+ *
+ * global register, should only require a single write to update all
+ * CPU cores: from Skye ACC user spec section 5.7.3.3
+ *
+ * IPICR is a global register but there are two copies in ACC: one at pBLK and one at eBLK.
+ * IPICR write SPR token also traverses both pCPM and eCPM rings and updates both copies.
+ */
+ uint64_t abstime;
+
+ nanoseconds_to_absolutetime(nanosecs, &abstime);
+
+ abstime = MIN(abstime, 0xFFFF);
+
+ /* update deferred_ipi_timer_ns with the new clamped value */
+ absolutetime_to_nanoseconds(abstime, &deferred_ipi_timer_ns);
+
+ MSR(ARM64_REG_IPI_CR, abstime);
+#else
(void)nanosecs;
panic("Platform does not support ACC Fast IPI");
+#endif
}
uint64_t
ml_cpu_signal_deferred_get_timer()
{
+#if defined(HAS_IPI)
+ return deferred_ipi_timer_ns;
+#else
return 0;
+#endif
}
+#if !defined(HAS_IPI)
__dead2
+#endif
void
ml_cpu_signal_deferred(unsigned int cpu_mpidr __unused)
{
+#if defined(HAS_IPI)
+ ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_DEFERRED);
+#else
panic("Platform does not support ACC Fast IPI deferral");
+#endif
}
+#if !defined(HAS_IPI)
__dead2
+#endif
void
ml_cpu_signal_retract(unsigned int cpu_mpidr __unused)
{
+#if defined(HAS_IPI)
+ ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_RETRACT);
+#else
panic("Platform does not support ACC Fast IPI retraction");
+#endif
}
void
boolean_t
user_cont_hwclock_allowed(void)
{
+#if HAS_CONTINUOUS_HWCLOCK
+ return TRUE;
+#else
return FALSE;
+#endif
}
return TRUE;
}
-#if defined(KERNEL_INTEGRITY_KTRR)
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
uint64_t rorgn_begin __attribute__((section("__DATA, __const"))) = 0;
uint64_t rorgn_end __attribute__((section("__DATA, __const"))) = 0;
rc = DTGetProperty(entryP, "reg", (void **)®_prop, &prop_size);
assert(rc == kSuccess);
amcc_base = ml_io_map(soc_base + *reg_prop, *(reg_prop + 1));
+#elif defined(KERNEL_INTEGRITY_CTRR)
+ /* TODO: t8020 mcc entry not in device tree yet; we'll do it LIVE */
+#define TEMP_AMCC_BASE_PA 0x200000000ULL
+#define TEMP_AMCC_SZ 0x100000
+ amcc_base = ml_io_map(TEMP_AMCC_BASE_PA, TEMP_AMCC_SZ);
#else
#error "KERNEL_INTEGRITY config error"
#endif
assert(rRORGNENDADDR > rRORGNBASEADDR);
rorgn_begin = (rRORGNBASEADDR << AMCC_PGSHIFT) + dram_base;
rorgn_end = (rRORGNENDADDR << AMCC_PGSHIFT) + dram_base;
+#elif defined(KERNEL_INTEGRITY_CTRR)
+ rorgn_begin = rCTRR_AMCC_PLANE_REG(0, CTRR_A_BASEADDR);
+ rorgn_end = rCTRR_AMCC_PLANE_REG(0, CTRR_A_ENDADDR);
+ assert(rorgn_end > rorgn_begin);
+
+ for (int i = 0; i < CTRR_AMCC_MAX_PLANES; ++i) {
+ uint32_t begin = rCTRR_AMCC_PLANE_REG(i, CTRR_A_BASEADDR);
+ uint32_t end = rCTRR_AMCC_PLANE_REG(i, CTRR_A_ENDADDR);
+ if (!(begin == rorgn_begin && end == rorgn_end)) {
+#if DEVELOPMENT || DEBUG
+ panic("iboot programmed CTRR bounds are inconsistent");
+#else
+ panic("Inconsistent memory configuration");
+#endif
+ }
+ }
+
+ // convert from page number from DRAM base to PA
+ rorgn_begin = (rorgn_begin << AMCC_PGSHIFT) + dram_base;
+ rorgn_end = (rorgn_end << AMCC_PGSHIFT) + dram_base;
+
#else
#error KERNEL_INTEGRITY config error
#endif /* defined (KERNEL_INTEGRITY_KTRR) */
#if defined(KERNEL_INTEGRITY_KTRR)
rorgn_lock = rRORGNLOCK;
ktrr_lock = __builtin_arm_rsr64(ARM64_REG_KTRR_LOCK_EL1);
+#elif defined(KERNEL_INTEGRITY_CTRR)
+ for (int i = 0; i < CTRR_AMCC_MAX_PLANES; ++i) {
+ rorgn_lock |= rCTRR_AMCC_PLANE_REG(i, CTRR_A_LOCK);
+ }
+ ktrr_lock = __builtin_arm_rsr64(ARM64_REG_CTRR_LOCK_EL1);
#else
#error KERNEL_INTEGRITY config error
#endif /* defined(KERNEL_INTEGRITY_KTRR) */
#if defined(KERNEL_INTEGRITY_KTRR)
rRORGNLOCK = 1;
__builtin_arm_isb(ISB_SY);
+#elif defined(KERNEL_INTEGRITY_CTRR)
+ /* lockdown planes in reverse order as plane 0 should be locked last */
+ for (int i = 0; i < CTRR_AMCC_MAX_PLANES; ++i) {
+ rCTRR_AMCC_PLANE_REG(CTRR_AMCC_MAX_PLANES - i - 1, CTRR_A_ENABLE) = 1;
+ rCTRR_AMCC_PLANE_REG(CTRR_AMCC_MAX_PLANES - i - 1, CTRR_A_LOCK) = 1;
+ __builtin_arm_isb(ISB_SY);
+ }
#else
#error KERNEL_INTEGRITY config error
#endif
__builtin_arm_isb(ISB_SY);
flush_mmu_tlb();
+#elif defined (KERNEL_INTEGRITY_CTRR)
+ /* this will lock the entire bootstrap cluster. non bootstrap clusters
+ * will be locked by respective cluster master in start.s */
+
+ __builtin_arm_wsr64(ARM64_REG_CTRR_A_LWR_EL1, begin);
+ __builtin_arm_wsr64(ARM64_REG_CTRR_A_UPR_EL1, end);
+
+#if !defined(APPLEVORTEX)
+ /* H12 changed sequence, must invalidate TLB immediately after setting CTRR bounds */
+ __builtin_arm_isb(ISB_SY); /* ensure all prior MSRs are complete */
+ flush_mmu_tlb();
+#endif /* !defined(APPLEVORTEX) */
+
+ __builtin_arm_wsr64(ARM64_REG_CTRR_CTL_EL1, CTRR_CTL_EL1_A_PXN | CTRR_CTL_EL1_A_MMUON_WRPROTECT);
+ __builtin_arm_wsr64(ARM64_REG_CTRR_LOCK_EL1, 1ULL);
+
+ uint64_t current_el = __builtin_arm_rsr64("CurrentEL");
+ if (current_el == PSR64_MODE_EL2) {
+ // CTRR v2 has explicit registers for cluster config. they can only be written in EL2
+
+ __builtin_arm_wsr64(ACC_CTRR_A_LWR_EL2, begin);
+ __builtin_arm_wsr64(ACC_CTRR_A_UPR_EL2, end);
+ __builtin_arm_wsr64(ACC_CTRR_CTL_EL2, CTRR_CTL_EL1_A_PXN | CTRR_CTL_EL1_A_MMUON_WRPROTECT);
+ __builtin_arm_wsr64(ACC_CTRR_LOCK_EL2, 1ULL);
+ }
+
+ __builtin_arm_isb(ISB_SY); /* ensure all prior MSRs are complete */
+#if defined(APPLEVORTEX)
+ flush_mmu_tlb();
+#endif /* defined(APPLEVORTEX) */
+
#else /* defined(KERNEL_INTEGRITY_KTRR) */
#error KERNEL_INTEGRITY config error
#endif /* defined(KERNEL_INTEGRITY_KTRR) */
{
#if defined(KERNEL_INTEGRITY_KTRR)
assert((rMCCGEN & 1) == 0); /* assert M$ disabled or LLC clean will be unreliable */
+#elif defined(KERNEL_INTEGRITY_CTRR) && (defined(ARM64_BOARD_CONFIG_T8006))
+ /*
+ * T8006 differentiates between data and tag ways being powered up, so
+ * make sure to check that both are zero on its single memory plane.
+ */
+ assert((rCTRR_AMCC_PLANE_REG(0, CTRR_AMCC_PWRONWAYCNTSTATUS) &
+ (AMCC_CURTAGWAYCNT_MASK | AMCC_CURDATWAYCNT_MASK)) == 0);
+#elif defined (KERNEL_INTEGRITY_CTRR)
+ for (int i = 0; i < CTRR_AMCC_MAX_PLANES; ++i) {
+ assert(rCTRR_AMCC_PLANE_REG(i, CTRR_AMCC_WAYONCNT) == 0);
+ }
#else
#error KERNEL_INTEGRITY config error
#endif
assert(rorgn_begin == ktrr_begin && rorgn_end == (ktrr_end + last_segsz));
/* assert that __LAST segment containing privileged insns is only a single page */
assert(last_segsz == PAGE_SIZE);
+#elif defined(KERNEL_INTEGRITY_CTRR)
+ ktrr_end = (ktrr_end + last_segsz - 1) & ~AMCC_PGMASK;
+ /* __LAST is part of MMU CTRR region. Can't use the KTRR style method of making
+ * __pinst no execute because PXN applies with MMU off in CTRR. */
+ assert(rorgn_begin == ktrr_begin && rorgn_end == ktrr_end);
#endif
out:
#endif
+#if defined(KERNEL_INTEGRITY_CTRR)
+ {
+ /* wake any threads blocked on cluster master lockdown */
+ cpu_data_t *cdp;
+ uint64_t mpidr_el1_value;
+
+ cdp = getCpuDatap();
+ MRS(mpidr_el1_value, "MPIDR_EL1");
+ cdp->cpu_cluster_id = (mpidr_el1_value & MPIDR_AFF1_MASK) >> MPIDR_AFF1_SHIFT;
+ assert(cdp->cpu_cluster_id < __ARM_CLUSTER_COUNT__);
+ ctrr_cluster_locked[cdp->cpu_cluster_id] = 1;
+ thread_wakeup(&ctrr_cluster_locked[cdp->cpu_cluster_id]);
+ }
+#endif
/* now we can run lockdown handler */
ml_lockdown_run_handler();
}
-#endif /* defined(KERNEL_INTEGRITY_KTRR)*/
+#endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
void
machine_startup(__unused boot_args * args)
{
int boot_arg;
+#if defined(HAS_IPI) && (DEVELOPMENT || DEBUG)
+ if (!PE_parse_boot_argn("fastipi", &gFastIPI, sizeof(gFastIPI))) {
+ gFastIPI = 1;
+ }
+
+ PE_parse_boot_argn("fastipitimeout", &deferred_ipi_timer_ns, sizeof(deferred_ipi_timer_ns));
+#endif /* defined(HAS_IPI) && (DEVELOPMENT || DEBUG)*/
#if CONFIG_NONFATAL_ASSERTS
PE_parse_boot_argn("assert", &mach_assert, sizeof(mach_assert));
{
#if CONFIG_KERNEL_INTEGRITY
-#if defined(KERNEL_INTEGRITY_KTRR)
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
rorgn_stash_range();
#endif
#endif
#endif /* KERNEL_INTEGRITY_WT */
+#if XNU_MONITOR
+ pmap_lockdown_ppl();
+#endif
-#if defined(KERNEL_INTEGRITY_KTRR)
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
/* KTRR
*
* Lock physical KTRR region. KTRR region is read-only. Memory outside
*/
rorgn_lockdown();
-#endif /* defined(KERNEL_INTEGRITY_KTRR)*/
+#endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
#endif /* CONFIG_KERNEL_INTEGRITY */
void
ml_init_interrupt(void)
{
+#if defined(HAS_IPI)
+ /*
+ * ml_init_interrupt will get called once for each CPU, but this is redundant
+ * because there is only one global copy of the register for skye. do it only
+ * on the bootstrap cpu
+ */
+ if (getCpuDatap()->cluster_master) {
+ ml_cpu_signal_deferred_adjust_timer(deferred_ipi_timer_ns);
+ }
+#endif
}
/*
lck_mtx_init(&lockdown_handler_lck, lockdown_handler_grp, NULL);
+#if defined(KERNEL_INTEGRITY_CTRR)
+ init_ctrr_cpu_start_lock();
+#endif
}
kern_return_t
lockdown_handler = f;
lockdown_this = this;
-#if !(defined(KERNEL_INTEGRITY_KTRR))
+#if !(defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR))
lockdown_done = 1;
lockdown_handler(this);
#else
this_cpu_datap->cpu_l3_id = in_processor_info->l3_cache_id;
this_cpu_datap->cpu_l3_size = in_processor_info->l3_cache_size;
+#if HAS_CLUSTER
+ this_cpu_datap->cluster_master = !OSTestAndSet(this_cpu_datap->cpu_cluster_id, &cluster_initialized);
+#else /* HAS_CLUSTER */
this_cpu_datap->cluster_master = is_boot_cpu;
+#endif /* HAS_CLUSTER */
pset = pset_find(in_processor_info->cluster_id, processor_pset(master_processor));
assert(pset != NULL);
pt_entry_t *pte_p;
pt_entry_t ptmp;
+#if XNU_MONITOR
+ assert(!TEST_PAGE_RATIO_4);
+ assert(!pmap_is_monitor(ppn));
+#endif
tte2 = arm_kva_to_tte(vaddr_cur);
ml_wants_panic_trap_to_debugger(void)
{
boolean_t result = FALSE;
+#if XNU_MONITOR
+ /*
+ * This looks racey, but if we are in the PPL, preemption will be
+ * disabled.
+ */
+ result = ((pmap_get_cpu_data()->ppl_state == PPL_STATE_DISPATCH) && pmap_ppl_locked_down);
+#endif
return result;
}
#endif /* defined(HAS_APPLE_PAC) */
+#if HAS_BP_RET
+/*
+ * void set_bp_ret(void)
+ * Helper function to enable branch predictor state retention
+ * across ACC sleep
+ */
+
+ .align 2
+ .globl EXT(set_bp_ret)
+LEXT(set_bp_ret)
+ // Load bpret boot-arg
+ adrp x14, EXT(bp_ret)@page
+ add x14, x14, EXT(bp_ret)@pageoff
+ ldr w14, [x14]
+
+ mrs x13, ARM64_REG_ACC_CFG
+ and x13, x13, (~(ARM64_REG_ACC_CFG_bpSlp_mask << ARM64_REG_ACC_CFG_bpSlp_shift))
+ and x14, x14, #(ARM64_REG_ACC_CFG_bpSlp_mask)
+ orr x13, x13, x14, lsl #(ARM64_REG_ACC_CFG_bpSlp_shift)
+ msr ARM64_REG_ACC_CFG, x13
+
+ ret
+#endif // HAS_BP_RET
+
+#if HAS_NEX_PG
+ .align 2
+ .globl EXT(set_nex_pg)
+LEXT(set_nex_pg)
+ mrs x14, MPIDR_EL1
+ // Skip if this isn't a p-core; NEX powergating isn't available for e-cores
+ and x14, x14, #(MPIDR_PNE)
+ cbz x14, Lnex_pg_done
+
+ // Set the SEG-recommended value of 12 additional reset cycles
+ mrs x14, ARM64_REG_HID13
+ and x14, x14, (~ARM64_REG_HID13_RstCyc_mask)
+ orr x14, x14, ARM64_REG_HID13_RstCyc_val
+ msr ARM64_REG_HID13, x14
+
+ // Load nexpg boot-arg
+ adrp x14, EXT(nex_pg)@page
+ add x14, x14, EXT(nex_pg)@pageoff
+ ldr w14, [x14]
+
+ mrs x13, ARM64_REG_HID14
+ and x13, x13, (~ARM64_REG_HID14_NexPwgEn)
+ cbz w14, Lset_nex_pg
+ orr x13, x13, ARM64_REG_HID14_NexPwgEn
+Lset_nex_pg:
+ msr ARM64_REG_HID14, x13
+
+Lnex_pg_done:
+ ret
+
+#endif // HAS_NEX_PG
/* uint32_t get_fpscr(void):
* Returns (FPSR | FPCR).
bl EXT(pinst_set_ttbr1)
mov lr, x1
#else
+#if defined(HAS_VMSA_LOCK)
+ mrs x1, ARM64_REG_VMSA_LOCK_EL1
+ and x1, x1, #(VMSA_LOCK_TTBR1_EL1)
+ cbnz x1, L_set_locked_reg_panic
+#endif /* defined(HAS_VMSA_LOCK) */
msr TTBR1_EL1, x0
#endif /* defined(KERNEL_INTEGRITY_KTRR) */
isb sy
ret
+#if XNU_MONITOR
+ .section __PPLTEXT,__text,regular,pure_instructions
+#else
.text
+#endif
.align 2
.globl EXT(set_mmu_ttb)
LEXT(set_mmu_ttb)
#endif
#endif /* __ARM_KERNEL_PROTECT__ */
+#if defined(HAS_VMSA_LOCK)
+ .text
+ .align 2
+ .globl EXT(vmsa_lock)
+LEXT(vmsa_lock)
+ isb sy
+ mov x1, #(VMSA_LOCK_SCTLR_M_BIT)
+ mov x0, #(VMSA_LOCK_TTBR1_EL1 | VMSA_LOCK_TCR_EL1 | VMSA_LOCK_VBAR_EL1)
+ orr x0, x0, x1
+ msr ARM64_REG_VMSA_LOCK_EL1, x0
+ isb sy
+ ret
+#endif /* defined(HAS_VMSA_LOCK) */
/*
* set translation control register
bl EXT(pinst_set_tcr)
mov lr, x1
#else
+#if defined(HAS_VMSA_LOCK)
+ // assert TCR unlocked
+ mrs x1, ARM64_REG_VMSA_LOCK_EL1
+ and x1, x1, #(VMSA_LOCK_TCR_EL1)
+ cbnz x1, L_set_locked_reg_panic
+#endif /* defined(HAS_VMSA_LOCK) */
msr TCR_EL1, x0
#endif /* defined(KERNEL_INTRITY_KTRR) */
isb sy
L_set_locked_reg_panic_str:
.asciz "attempt to set locked register: (%llx)\n"
#else
-#if defined(KERNEL_INTEGRITY_KTRR)
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
mov x1, lr
bl EXT(pinst_set_tcr)
mov lr, x1
orr x1, x1, #( ARM64_REG_ACC_OVRD_ok2TrDnLnk_deepsleep)
and x1, x1, #(~(ARM64_REG_ACC_OVRD_ok2PwrDnCPM_mask))
orr x1, x1, #( ARM64_REG_ACC_OVRD_ok2PwrDnCPM_deepsleep)
+#if HAS_RETENTION_STATE
+ orr x1, x1, #(ARM64_REG_ACC_OVRD_disPioOnWfiCpu)
+#endif
msr ARM64_REG_ACC_OVRD, x1
// Set "OK to power down" (<rdar://problem/12390433>)
mrs x0, ARM64_REG_CYC_OVRD
orr x0, x0, #(ARM64_REG_CYC_OVRD_ok2pwrdn_force_down)
+#if HAS_RETENTION_STATE
+ orr x0, x0, #(ARM64_REG_CYC_OVRD_disWfiRetn)
+#endif
msr ARM64_REG_CYC_OVRD, x0
-#if defined(APPLEMONSOON)
+#if defined(APPLEMONSOON) || defined(APPLEVORTEX)
ARM64_IS_PCORE x0
cbz x0, Lwfi_inst // skip if not p-core
* and re-enabling GUPS, which forces the prefetch queue to
* drain. This should be done as close to wfi as possible, i.e.
* at the very end of arm64_prepare_for_sleep(). */
+#if defined(APPLEVORTEX)
+ /* <rdar://problem/32821461>: Cyprus A0/A1 parts have a similar
+ * bug in the HSP prefetcher that can be worked around through
+ * the same method mentioned above for Skye. */
+ SKIP_IF_CPU_VERSION_GREATER_OR_EQUAL x0, VORTEX_CPU_VERSION_B0, Lwfi_inst
+#endif
mrs x0, ARM64_REG_HID10
orr x0, x0, #(ARM64_REG_HID10_DisHwpGups)
msr ARM64_REG_HID10, x0
ARM64_STACK_EPILOG
+#if HAS_RETENTION_STATE
+ .text
+ .align 2
+ .globl EXT(arm64_retention_wfi)
+LEXT(arm64_retention_wfi)
+ wfi
+ cbz lr, Lwfi_retention // If lr is 0, we entered retention state and lost all GPRs except sp and pc
+ ret // Otherwise just return to cpu_idle()
+Lwfi_retention:
+ mov x0, #1
+ bl EXT(ClearIdlePop)
+ mov x0, #0
+ bl EXT(cpu_idle_exit) // cpu_idle_exit(from_reset = FALSE)
+ b . // cpu_idle_exit() should never return
+#endif
#if defined(APPLETYPHOON)
mrs x4, DAIF // Load current DAIF; use x4 as pinst may trash x1-x3
msr DAIFSet, #(DAIFSC_IRQF | DAIFSC_FIQF | DAIFSC_ASYNCF) // Disable IRQ/FIQ/serror
// Set SP_EL1 to exception stack
-#if defined(KERNEL_INTEGRITY_KTRR)
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
mov x1, lr
bl EXT(pinst_spsel_1)
mov lr, x1
#include <stdint.h>
+#if HAS_UNCORE_CTRS
+#define MT_NDEVS 2
+#else /* HAS_UNCORE_CTRS */
#define MT_NDEVS 1
+#endif /* !HAS_UNCORE_CTRS */
#define MT_CORE_CYCLES 0
#define MT_CORE_INSTRS 1
#define PMCR0_PMAI (UINT64_C(1) << 11)
#define PMCR0_PMI(REG) ((REG) & PMCR0_PMAI)
+#if HAS_UNCORE_CTRS
+
+#define UPMSR "s3_7_c15_c6_4"
+#define UPMSR_PMI(REG) ((REG) & 0x1)
+
+#endif /* HAS_UNCORE_CTRS */
static inline bool
mt_pmi_pending(uint64_t * restrict pmcr0_out,
}
*pmcr0_out = pmcr0;
+#if HAS_UNCORE_CTRS
+ extern bool mt_uncore_enabled;
+ if (mt_uncore_enabled) {
+ uint64_t upmsr = __builtin_arm_rsr64(UPMSR);
+ if (UPMSR_PMI(upmsr)) {
+ pmi = true;
+ }
+ *upmsr_out = upmsr;
+ }
+#else /* HAS_UNCORE_CTRS */
#pragma unused(upmsr_out)
+#endif /* !HAS_UNCORE_CTRS */
return pmi;
}
#pragma mark uncore performance monitor
+#if HAS_UNCORE_CTRS
+
+static bool mt_uncore_initted = false;
+
+/*
+ * Uncore Performance Monitor
+ *
+ * Uncore performance monitors provide event-counting for the last-level caches
+ * (LLCs). Each LLC has its own uncore performance monitor, which can only be
+ * accessed by cores that use that LLC. Like the core performance monitoring
+ * unit, uncore counters are configured globally. If there is more than one
+ * LLC on the system, PIO reads must be used to satisfy uncore requests (using
+ * the `_r` remote variants of the access functions). Otherwise, local MSRs
+ * suffice (using the `_l` local variants of the access functions).
+ */
+
+#if UNCORE_PER_CLUSTER
+static vm_size_t cpm_impl_size = 0;
+static uintptr_t cpm_impl[__ARM_CLUSTER_COUNT__] = {};
+static uintptr_t cpm_impl_phys[__ARM_CLUSTER_COUNT__] = {};
+#endif /* UNCORE_PER_CLUSTER */
+
+#if UNCORE_VERSION >= 2
+/*
+ * V2 uncore monitors feature a CTI mechanism -- the second bit of UPMSR is
+ * used to track if a CTI has been triggered due to an overflow.
+ */
+#define UPMSR_OVF_POS 2
+#else /* UNCORE_VERSION >= 2 */
+#define UPMSR_OVF_POS 1
+#endif /* UNCORE_VERSION < 2 */
+#define UPMSR_OVF(R, CTR) ((R) >> ((CTR) + UPMSR_OVF_POS) & 0x1)
+#define UPMSR_OVF_MASK (((UINT64_C(1) << UNCORE_NCTRS) - 1) << UPMSR_OVF_POS)
+
+#define UPMPCM "s3_7_c15_c5_4"
+#define UPMPCM_CORE(ID) (UINT64_C(1) << (ID))
+
+/*
+ * The uncore_pmi_mask is a bitmask of CPUs that receive uncore PMIs. It's
+ * initialized by uncore_init and controllable by the uncore_pmi_mask boot-arg.
+ */
+static int32_t uncore_pmi_mask = 0;
+
+/*
+ * The uncore_active_ctrs is a bitmask of uncore counters that are currently
+ * requested.
+ */
+static uint16_t uncore_active_ctrs = 0;
+static_assert(sizeof(uncore_active_ctrs) * CHAR_BIT >= UNCORE_NCTRS,
+ "counter mask should fit the full range of counters");
+
+/*
+ * mt_uncore_enabled is true when any uncore counters are active.
+ */
+bool mt_uncore_enabled = false;
+
+/*
+ * Each uncore unit has its own monitor, corresponding to the memory hierarchy
+ * of the LLCs.
+ */
+#if UNCORE_PER_CLUSTER
+#define UNCORE_NMONITORS (__ARM_CLUSTER_COUNT__)
+#else /* UNCORE_PER_CLUSTER */
+#define UNCORE_NMONITORS (1)
+#endif /* !UNCORE_PER_CLUSTER */
+
+/*
+ * The uncore_events are the event configurations for each uncore counter -- as
+ * a union to make it easy to program the hardware registers.
+ */
+static struct uncore_config {
+ union {
+ uint8_t uce_ctrs[UNCORE_NCTRS];
+ uint64_t uce_regs[UNCORE_NCTRS / 8];
+ } uc_events;
+ union {
+ uint16_t uccm_masks[UNCORE_NCTRS];
+ uint64_t uccm_regs[UNCORE_NCTRS / 4];
+ } uc_cpu_masks[UNCORE_NMONITORS];
+} uncore_config;
+
+static struct uncore_monitor {
+ /*
+ * The last snapshot of each of the hardware counter values.
+ */
+ uint64_t um_snaps[UNCORE_NCTRS];
+
+ /*
+ * The accumulated counts for each counter.
+ */
+ uint64_t um_counts[UNCORE_NCTRS];
+
+ /*
+ * Protects accessing the hardware registers and fields in this structure.
+ */
+ lck_spin_t um_lock;
+
+ /*
+ * Whether this monitor needs its registers restored after wake.
+ */
+ bool um_sleeping;
+} uncore_monitors[UNCORE_NMONITORS];
+
+static unsigned int
+uncmon_get_curid(void)
+{
+#if UNCORE_PER_CLUSTER
+ return cpu_cluster_id();
+#else /* UNCORE_PER_CLUSTER */
+ return 0;
+#endif /* !UNCORE_PER_CLUSTER */
+}
+
+/*
+ * Per-monitor locks are required to prevent races with the PMI handlers, not
+ * from other CPUs that are configuring (those are serialized with monotonic's
+ * per-device lock).
+ */
+
+static int
+uncmon_lock(struct uncore_monitor *mon)
+{
+ int intrs_en = ml_set_interrupts_enabled(FALSE);
+ lck_spin_lock(&mon->um_lock);
+ return intrs_en;
+}
+
+static void
+uncmon_unlock(struct uncore_monitor *mon, int intrs_en)
+{
+ lck_spin_unlock(&mon->um_lock);
+ (void)ml_set_interrupts_enabled(intrs_en);
+}
+
+/*
+ * Helper functions for accessing the hardware -- these require the monitor be
+ * locked to prevent other CPUs' PMI handlers from making local modifications
+ * or updating the counts.
+ */
+
+#if UNCORE_VERSION >= 2
+#define UPMCR0_INTEN_POS 20
+#define UPMCR0_INTGEN_POS 16
+#else /* UNCORE_VERSION >= 2 */
+#define UPMCR0_INTEN_POS 12
+#define UPMCR0_INTGEN_POS 8
+#endif /* UNCORE_VERSION < 2 */
+enum {
+ UPMCR0_INTGEN_OFF = 0,
+ /* fast PMIs are only supported on core CPMU */
+ UPMCR0_INTGEN_AIC = 2,
+ UPMCR0_INTGEN_HALT = 3,
+ UPMCR0_INTGEN_FIQ = 4,
+};
+/* always enable interrupts for all counters */
+#define UPMCR0_INTEN (((1ULL << UNCORE_NCTRS) - 1) << UPMCR0_INTEN_POS)
+/* route uncore PMIs through the FIQ path */
+#define UPMCR0_INIT (UPMCR0_INTEN | (UPMCR0_INTGEN_FIQ << UPMCR0_INTGEN_POS))
+
+/*
+ * Turn counting on for counters set in the `enctrmask` and off, otherwise.
+ */
+static inline void
+uncmon_set_counting_locked_l(__unused unsigned int monid, uint64_t enctrmask)
+{
+ /*
+ * UPMCR0 controls which counters are enabled and how interrupts are generated
+ * for overflows.
+ */
+#define UPMCR0 "s3_7_c15_c0_4"
+ __builtin_arm_wsr64(UPMCR0, UPMCR0_INIT | enctrmask);
+}
+
+#if UNCORE_PER_CLUSTER
+
+/*
+ * Turn counting on for counters set in the `enctrmask` and off, otherwise.
+ */
+static inline void
+uncmon_set_counting_locked_r(unsigned int monid, uint64_t enctrmask)
+{
+ const uintptr_t upmcr0_offset = 0x4180;
+ *(uint64_t *)(cpm_impl[monid] + upmcr0_offset) = UPMCR0_INIT | enctrmask;
+}
+
+#endif /* UNCORE_PER_CLUSTER */
+
+/*
+ * The uncore performance monitoring counters (UPMCs) are 48-bits wide. The
+ * high bit is an overflow bit, triggering a PMI, providing 47 usable bits.
+ */
+
+#define UPMC_MAX ((UINT64_C(1) << 48) - 1)
+
+/*
+ * The `__builtin_arm_{r,w}sr` functions require constant strings, since the
+ * MSR/MRS instructions encode the registers as immediates. Otherwise, this
+ * would be indexing into an array of strings.
+ */
+
+#define UPMC0 "s3_7_c15_c7_4"
+#define UPMC1 "s3_7_c15_c8_4"
+#define UPMC2 "s3_7_c15_c9_4"
+#define UPMC3 "s3_7_c15_c10_4"
+#define UPMC4 "s3_7_c15_c11_4"
+#define UPMC5 "s3_7_c15_c12_4"
+#define UPMC6 "s3_7_c15_c13_4"
+#define UPMC7 "s3_7_c15_c14_4"
+#if UNCORE_NCTRS > 8
+#define UPMC8 "s3_7_c15_c0_5"
+#define UPMC9 "s3_7_c15_c1_5"
+#define UPMC10 "s3_7_c15_c2_5"
+#define UPMC11 "s3_7_c15_c3_5"
+#define UPMC12 "s3_7_c15_c4_5"
+#define UPMC13 "s3_7_c15_c5_5"
+#define UPMC14 "s3_7_c15_c6_5"
+#define UPMC15 "s3_7_c15_c7_5"
+#endif /* UNCORE_NCTRS > 8 */
+
+#define UPMC_0_7(X, A) X(0, A); X(1, A); X(2, A); X(3, A); X(4, A); X(5, A); \
+ X(6, A); X(7, A)
+#if UNCORE_NCTRS <= 8
+#define UPMC_ALL(X, A) UPMC_0_7(X, A)
+#else /* UNCORE_NCTRS <= 8 */
+#define UPMC_8_15(X, A) X(8, A); X(9, A); X(10, A); X(11, A); X(12, A); \
+ X(13, A); X(14, A); X(15, A)
+#define UPMC_ALL(X, A) UPMC_0_7(X, A); UPMC_8_15(X, A)
+#endif /* UNCORE_NCTRS > 8 */
+
+static inline uint64_t
+uncmon_read_counter_locked_l(__unused unsigned int monid, unsigned int ctr)
+{
+ assert(ctr < UNCORE_NCTRS);
+ switch (ctr) {
+#define UPMC_RD(CTR, UNUSED) case (CTR): return __builtin_arm_rsr64(UPMC ## CTR)
+ UPMC_ALL(UPMC_RD, 0);
+#undef UPMC_RD
+ default:
+ panic("monotonic: invalid counter read %u", ctr);
+ __builtin_unreachable();
+ }
+}
+
+static inline void
+uncmon_write_counter_locked_l(__unused unsigned int monid, unsigned int ctr,
+ uint64_t count)
+{
+ assert(count < UPMC_MAX);
+ assert(ctr < UNCORE_NCTRS);
+ switch (ctr) {
+#define UPMC_WR(CTR, COUNT) case (CTR): \
+ return __builtin_arm_wsr64(UPMC ## CTR, (COUNT))
+ UPMC_ALL(UPMC_WR, count);
+#undef UPMC_WR
+ default:
+ panic("monotonic: invalid counter write %u", ctr);
+ }
+}
+
+#if UNCORE_PER_CLUSTER
+
+static const uint8_t clust_offs[__ARM_CLUSTER_COUNT__] = CPU_CLUSTER_OFFSETS;
+
+uintptr_t upmc_offs[UNCORE_NCTRS] = {
+ [0] = 0x4100, [1] = 0x4248, [2] = 0x4110, [3] = 0x4250, [4] = 0x4120,
+ [5] = 0x4258, [6] = 0x4130, [7] = 0x4260, [8] = 0x4140, [9] = 0x4268,
+ [10] = 0x4150, [11] = 0x4270, [12] = 0x4160, [13] = 0x4278,
+ [14] = 0x4170, [15] = 0x4280,
+};
+
+static inline uint64_t
+uncmon_read_counter_locked_r(unsigned int mon_id, unsigned int ctr)
+{
+ assert(mon_id < __ARM_CLUSTER_COUNT__);
+ assert(ctr < UNCORE_NCTRS);
+ return *(uint64_t *)(cpm_impl[mon_id] + upmc_offs[ctr]);
+}
+
+static inline void
+uncmon_write_counter_locked_r(unsigned int mon_id, unsigned int ctr,
+ uint64_t count)
+{
+ assert(count < UPMC_MAX);
+ assert(ctr < UNCORE_NCTRS);
+ assert(mon_id < __ARM_CLUSTER_COUNT__);
+ *(uint64_t *)(cpm_impl[mon_id] + upmc_offs[ctr]) = count;
+}
+
+#endif /* UNCORE_PER_CLUSTER */
+
+static inline void
+uncmon_update_locked(unsigned int monid, unsigned int curid, unsigned int ctr)
+{
+ struct uncore_monitor *mon = &uncore_monitors[monid];
+ uint64_t snap = 0;
+ if (curid == monid) {
+ snap = uncmon_read_counter_locked_l(monid, ctr);
+ } else {
+#if UNCORE_PER_CLUSTER
+ snap = uncmon_read_counter_locked_r(monid, ctr);
+#endif /* UNCORE_PER_CLUSTER */
+ }
+ /* counters should increase monotonically */
+ assert(snap >= mon->um_snaps[ctr]);
+ mon->um_counts[ctr] += snap - mon->um_snaps[ctr];
+ mon->um_snaps[ctr] = snap;
+}
+
+static inline void
+uncmon_program_events_locked_l(unsigned int monid)
+{
+ /*
+ * UPMESR[01] is the event selection register that determines which event a
+ * counter will count.
+ */
+#define UPMESR0 "s3_7_c15_c1_4"
+ CTRL_REG_SET(UPMESR0, uncore_config.uc_events.uce_regs[0]);
+
+#if UNCORE_NCTRS > 8
+#define UPMESR1 "s3_7_c15_c11_5"
+ CTRL_REG_SET(UPMESR1, uncore_config.uc_events.uce_regs[1]);
+#endif /* UNCORE_NCTRS > 8 */
+
+ /*
+ * UPMECM[0123] are the event core masks for each counter -- whether or not
+ * that counter counts events generated by an agent. These are set to all
+ * ones so the uncore counters count events from all cores.
+ *
+ * The bits are based off the start of the cluster -- e.g. even if a core
+ * has a CPU ID of 4, it might be the first CPU in a cluster. Shift the
+ * registers right by the ID of the first CPU in the cluster.
+ */
+#define UPMECM0 "s3_7_c15_c3_4"
+#define UPMECM1 "s3_7_c15_c4_4"
+
+ CTRL_REG_SET(UPMECM0,
+ uncore_config.uc_cpu_masks[monid].uccm_regs[0]);
+ CTRL_REG_SET(UPMECM1,
+ uncore_config.uc_cpu_masks[monid].uccm_regs[1]);
+
+#if UNCORE_NCTRS > 8
+#define UPMECM2 "s3_7_c15_c8_5"
+#define UPMECM3 "s3_7_c15_c9_5"
+
+ CTRL_REG_SET(UPMECM2,
+ uncore_config.uc_cpu_masks[monid].uccm_regs[2]);
+ CTRL_REG_SET(UPMECM3,
+ uncore_config.uc_cpu_masks[monid].uccm_regs[3]);
+#endif /* UNCORE_NCTRS > 8 */
+}
+
+#if UNCORE_PER_CLUSTER
+
+static inline void
+uncmon_program_events_locked_r(unsigned int monid)
+{
+ const uintptr_t upmesr_offs[2] = {[0] = 0x41b0, [1] = 0x41b8, };
+
+ for (unsigned int i = 0; i < sizeof(upmesr_offs) / sizeof(upmesr_offs[0]);
+ i++) {
+ *(uint64_t *)(cpm_impl[monid] + upmesr_offs[i]) =
+ uncore_config.uc_events.uce_regs[i];
+ }
+
+ const uintptr_t upmecm_offs[4] = {
+ [0] = 0x4190, [1] = 0x4198, [2] = 0x41a0, [3] = 0x41a8,
+ };
+
+ for (unsigned int i = 0; i < sizeof(upmecm_offs) / sizeof(upmecm_offs[0]);
+ i++) {
+ *(uint64_t *)(cpm_impl[monid] + upmecm_offs[i]) =
+ uncore_config.uc_cpu_masks[monid].uccm_regs[i];
+ }
+}
+
+#endif /* UNCORE_PER_CLUSTER */
+
+static void
+uncmon_clear_int_locked_l(__unused unsigned int monid)
+{
+ __builtin_arm_wsr64(UPMSR, 0);
+}
+
+#if UNCORE_PER_CLUSTER
+
+static void
+uncmon_clear_int_locked_r(unsigned int monid)
+{
+ const uintptr_t upmsr_off = 0x41c0;
+ *(uint64_t *)(cpm_impl[monid] + upmsr_off) = 0;
+}
+
+#endif /* UNCORE_PER_CLUSTER */
+
+/*
+ * Get the PMI mask for the provided `monid` -- that is, the bitmap of CPUs
+ * that should be sent PMIs for a particular monitor.
+ */
+static uint64_t
+uncmon_get_pmi_mask(unsigned int monid)
+{
+ uint64_t pmi_mask = uncore_pmi_mask;
+
+#if UNCORE_PER_CLUSTER
+ /*
+ * Set up the mask for the high bits.
+ */
+ uint64_t clust_cpumask;
+ if (monid == __ARM_CLUSTER_COUNT__ - 1) {
+ clust_cpumask = UINT64_MAX;
+ } else {
+ clust_cpumask = ((1ULL << clust_offs[monid + 1]) - 1);
+ }
+
+ /*
+ * Mask off the low bits, if necessary.
+ */
+ if (clust_offs[monid] != 0) {
+ clust_cpumask &= ~((1ULL << clust_offs[monid]) - 1);
+ }
+
+ pmi_mask &= clust_cpumask;
+#else /* UNCORE_PER_CLUSTER */
+#pragma unused(monid)
+#endif /* !UNCORE_PER_CLUSTER */
+
+ return pmi_mask;
+}
+
+/*
+ * Initialization routines for the uncore counters.
+ */
+
+static void
+uncmon_init_locked_l(unsigned int monid)
+{
+ /*
+ * UPMPCM defines the PMI core mask for the UPMCs -- which cores should
+ * receive interrupts on overflow.
+ */
+ CTRL_REG_SET(UPMPCM, uncmon_get_pmi_mask(monid));
+ uncmon_set_counting_locked_l(monid,
+ mt_uncore_enabled ? uncore_active_ctrs : 0);
+}
+
+#if UNCORE_PER_CLUSTER
+
+static vm_size_t acc_impl_size = 0;
+static uintptr_t acc_impl[__ARM_CLUSTER_COUNT__] = {};
+static uintptr_t acc_impl_phys[__ARM_CLUSTER_COUNT__] = {};
+
+static void
+uncmon_init_locked_r(unsigned int monid)
+{
+ const uintptr_t upmpcm_off = 0x1010;
+
+ *(uint64_t *)(acc_impl[monid] + upmpcm_off) = uncmon_get_pmi_mask(monid);
+ uncmon_set_counting_locked_r(monid,
+ mt_uncore_enabled ? uncore_active_ctrs : 0);
+}
+
+#endif /* UNCORE_PER_CLUSTER */
+
+/*
+ * Initialize the uncore device for monotonic.
+ */
+static int
+uncore_init(__unused mt_device_t dev)
+{
+#if DEVELOPMENT || DEBUG
+ /*
+ * Development and debug kernels observe the `uncore_pmi_mask` boot-arg,
+ * allowing PMIs to be routed to the CPUs present in the supplied bitmap.
+ * Do some sanity checks on the value provided.
+ */
+ bool parsed_arg = PE_parse_boot_argn("uncore_pmi_mask", &uncore_pmi_mask,
+ sizeof(uncore_pmi_mask));
+ if (parsed_arg) {
+#if UNCORE_PER_CLUSTER
+ if (__builtin_popcount(uncore_pmi_mask) != __ARM_CLUSTER_COUNT__) {
+ panic("monotonic: invalid uncore PMI mask 0x%x", uncore_pmi_mask);
+ }
+ for (unsigned int i = 0; i < __ARM_CLUSTER_COUNT__; i++) {
+ if (__builtin_popcountll(uncmon_get_pmi_mask(i)) != 1) {
+ panic("monotonic: invalid uncore PMI CPU for cluster %d in mask 0x%x",
+ i, uncore_pmi_mask);
+ }
+ }
+#else /* UNCORE_PER_CLUSTER */
+ if (__builtin_popcount(uncore_pmi_mask) != 1) {
+ panic("monotonic: invalid uncore PMI mask 0x%x", uncore_pmi_mask);
+ }
+#endif /* !UNCORE_PER_CLUSTER */
+ } else
+#endif /* DEVELOPMENT || DEBUG */
+ {
+#if UNCORE_PER_CLUSTER
+ for (int i = 0; i < __ARM_CLUSTER_COUNT__; i++) {
+ /* route to the first CPU in each cluster */
+ uncore_pmi_mask |= (1ULL << clust_offs[i]);
+ }
+#else /* UNCORE_PER_CLUSTER */
+ /* arbitrarily route to core 0 */
+ uncore_pmi_mask |= 1;
+#endif /* !UNCORE_PER_CLUSTER */
+ }
+ assert(uncore_pmi_mask != 0);
+
+ unsigned int curmonid = uncmon_get_curid();
+
+ for (unsigned int monid = 0; monid < UNCORE_NMONITORS; monid++) {
+#if UNCORE_PER_CLUSTER
+ cpm_impl[monid] = (uintptr_t)ml_io_map(cpm_impl_phys[monid],
+ cpm_impl_size);
+ assert(cpm_impl[monid] != 0);
+
+ acc_impl[monid] = (uintptr_t)ml_io_map(acc_impl_phys[monid],
+ acc_impl_size);
+ assert(acc_impl[monid] != 0);
+#endif /* UNCORE_PER_CLUSTER */
+
+ struct uncore_monitor *mon = &uncore_monitors[monid];
+ lck_spin_init(&mon->um_lock, mt_lock_grp, NULL);
+
+ int intrs_en = uncmon_lock(mon);
+ if (monid != curmonid) {
+#if UNCORE_PER_CLUSTER
+ uncmon_init_locked_r(monid);
+#endif /* UNCORE_PER_CLUSTER */
+ } else {
+ uncmon_init_locked_l(monid);
+ }
+ uncmon_unlock(mon, intrs_en);
+ }
+
+ mt_uncore_initted = true;
+
+ return 0;
+}
+
+/*
+ * Support for monotonic's mtd_read function.
+ */
+
+static void
+uncmon_read_all_counters(unsigned int monid, unsigned int curmonid,
+ uint64_t ctr_mask, uint64_t *counts)
+{
+ struct uncore_monitor *mon = &uncore_monitors[monid];
+
+ int intrs_en = uncmon_lock(mon);
+
+ for (unsigned int ctr = 0; ctr < UNCORE_NCTRS; ctr++) {
+ if (ctr_mask & (1ULL << ctr)) {
+ uncmon_update_locked(monid, curmonid, ctr);
+ counts[ctr] = mon->um_counts[ctr];
+ }
+ }
+
+ uncmon_unlock(mon, intrs_en);
+}
+
+/*
+ * Read all monitor's counters.
+ */
+static int
+uncore_read(uint64_t ctr_mask, uint64_t *counts_out)
+{
+ assert(ctr_mask != 0);
+ assert(counts_out != NULL);
+
+ if (!uncore_active_ctrs) {
+ return EPWROFF;
+ }
+ if (ctr_mask & ~uncore_active_ctrs) {
+ return EINVAL;
+ }
+
+ unsigned int curmonid = uncmon_get_curid();
+ for (unsigned int monid = 0; monid < UNCORE_NMONITORS; monid++) {
+ /*
+ * Find this monitor's starting offset into the `counts_out` array.
+ */
+ uint64_t *counts = counts_out + (UNCORE_NCTRS * monid);
+
+ uncmon_read_all_counters(monid, curmonid, ctr_mask, counts);
+ }
+
+ return 0;
+}
+
+/*
+ * Support for monotonic's mtd_add function.
+ */
+
+/*
+ * Add an event to the current uncore configuration. This doesn't take effect
+ * until the counters are enabled again, so there's no need to involve the
+ * monitors.
+ */
+static int
+uncore_add(struct monotonic_config *config, uint32_t *ctr_out)
+{
+ if (mt_uncore_enabled) {
+ return EBUSY;
+ }
+
+ uint32_t available = ~uncore_active_ctrs & config->allowed_ctr_mask;
+
+ if (available == 0) {
+ return ENOSPC;
+ }
+
+ uint32_t valid_ctrs = (UINT32_C(1) << UNCORE_NCTRS) - 1;
+ if ((available & valid_ctrs) == 0) {
+ return E2BIG;
+ }
+
+ uint32_t ctr = __builtin_ffsll(available) - 1;
+
+ uncore_active_ctrs |= UINT64_C(1) << ctr;
+ uncore_config.uc_events.uce_ctrs[ctr] = config->event;
+ uint64_t cpu_mask = UINT64_MAX;
+ if (config->cpu_mask != 0) {
+ cpu_mask = config->cpu_mask;
+ }
+ for (int i = 0; i < UNCORE_NMONITORS; i++) {
+#if UNCORE_PER_CLUSTER
+ const unsigned int shift = clust_offs[i];
+#else /* UNCORE_PER_CLUSTER */
+ const unsigned int shift = 0;
+#endif /* !UNCORE_PER_CLUSTER */
+ uncore_config.uc_cpu_masks[i].uccm_masks[ctr] = cpu_mask >> shift;
+ }
+
+ *ctr_out = ctr;
+ return 0;
+}
+
+/*
+ * Support for monotonic's mtd_reset function.
+ */
+
+/*
+ * Reset all configuration and disable the counters if they're currently
+ * counting.
+ */
+static void
+uncore_reset(void)
+{
+ mt_uncore_enabled = false;
+
+ unsigned int curmonid = uncmon_get_curid();
+
+ for (unsigned int monid = 0; monid < UNCORE_NMONITORS; monid++) {
+ struct uncore_monitor *mon = &uncore_monitors[monid];
+ bool remote = monid != curmonid;
+
+ int intrs_en = uncmon_lock(mon);
+ if (remote) {
+#if UNCORE_PER_CLUSTER
+ uncmon_set_counting_locked_r(monid, 0);
+#endif /* UNCORE_PER_CLUSTER */
+ } else {
+ uncmon_set_counting_locked_l(monid, 0);
+ }
+
+ for (int ctr = 0; ctr < UNCORE_NCTRS; ctr++) {
+ if (uncore_active_ctrs & (1U << ctr)) {
+ if (remote) {
+#if UNCORE_PER_CLUSTER
+ uncmon_write_counter_locked_r(monid, ctr, 0);
+#endif /* UNCORE_PER_CLUSTER */
+ } else {
+ uncmon_write_counter_locked_l(monid, ctr, 0);
+ }
+ }
+ }
+
+ memset(&mon->um_snaps, 0, sizeof(mon->um_snaps));
+ memset(&mon->um_counts, 0, sizeof(mon->um_counts));
+ if (remote) {
+#if UNCORE_PER_CLUSTER
+ uncmon_clear_int_locked_r(monid);
+#endif /* UNCORE_PER_CLUSTER */
+ } else {
+ uncmon_clear_int_locked_l(monid);
+ }
+
+ uncmon_unlock(mon, intrs_en);
+ }
+
+ uncore_active_ctrs = 0;
+ memset(&uncore_config, 0, sizeof(uncore_config));
+
+ for (unsigned int monid = 0; monid < UNCORE_NMONITORS; monid++) {
+ struct uncore_monitor *mon = &uncore_monitors[monid];
+ bool remote = monid != curmonid;
+
+ int intrs_en = uncmon_lock(mon);
+ if (remote) {
+#if UNCORE_PER_CLUSTER
+ uncmon_program_events_locked_r(monid);
+#endif /* UNCORE_PER_CLUSTER */
+ } else {
+ uncmon_program_events_locked_l(monid);
+ }
+ uncmon_unlock(mon, intrs_en);
+ }
+}
+
+/*
+ * Support for monotonic's mtd_enable function.
+ */
+
+static void
+uncmon_set_enabled_l(unsigned int monid, bool enable)
+{
+ struct uncore_monitor *mon = &uncore_monitors[monid];
+ int intrs_en = uncmon_lock(mon);
+
+ if (enable) {
+ uncmon_program_events_locked_l(monid);
+ uncmon_set_counting_locked_l(monid, uncore_active_ctrs);
+ } else {
+ uncmon_set_counting_locked_l(monid, 0);
+ }
+
+ uncmon_unlock(mon, intrs_en);
+}
+
+#if UNCORE_PER_CLUSTER
+
+static void
+uncmon_set_enabled_r(unsigned int monid, bool enable)
+{
+ struct uncore_monitor *mon = &uncore_monitors[monid];
+ int intrs_en = uncmon_lock(mon);
+
+ if (enable) {
+ uncmon_program_events_locked_r(monid);
+ uncmon_set_counting_locked_r(monid, uncore_active_ctrs);
+ } else {
+ uncmon_set_counting_locked_r(monid, 0);
+ }
+
+ uncmon_unlock(mon, intrs_en);
+}
+
+#endif /* UNCORE_PER_CLUSTER */
+
+static void
+uncore_set_enabled(bool enable)
+{
+ mt_uncore_enabled = enable;
+
+ unsigned int curmonid = uncmon_get_curid();
+ for (unsigned int monid = 0; monid < UNCORE_NMONITORS; monid++) {
+ if (monid != curmonid) {
+#if UNCORE_PER_CLUSTER
+ uncmon_set_enabled_r(monid, enable);
+#endif /* UNCORE_PER_CLUSTER */
+ } else {
+ uncmon_set_enabled_l(monid, enable);
+ }
+ }
+}
+
+/*
+ * Hooks in the machine layer.
+ */
+
+static void
+uncore_fiq(uint64_t upmsr)
+{
+ /*
+ * Determine which counters overflowed.
+ */
+ uint64_t disable_ctr_mask = (upmsr & UPMSR_OVF_MASK) >> UPMSR_OVF_POS;
+ /* should not receive interrupts from inactive counters */
+ assert(!(disable_ctr_mask & ~uncore_active_ctrs));
+
+ unsigned int monid = uncmon_get_curid();
+ struct uncore_monitor *mon = &uncore_monitors[monid];
+
+ int intrs_en = uncmon_lock(mon);
+
+ /*
+ * Disable any counters that overflowed.
+ */
+ uncmon_set_counting_locked_l(monid,
+ uncore_active_ctrs & ~disable_ctr_mask);
+
+ /*
+ * With the overflowing counters disabled, capture their counts and reset
+ * the UPMCs and their snapshots to 0.
+ */
+ for (unsigned int ctr = 0; ctr < UNCORE_NCTRS; ctr++) {
+ if (UPMSR_OVF(upmsr, ctr)) {
+ uncmon_update_locked(monid, monid, ctr);
+ mon->um_snaps[ctr] = 0;
+ uncmon_write_counter_locked_l(monid, ctr, 0);
+ }
+ }
+
+ /*
+ * Acknowledge the interrupt, now that any overflowed PMCs have been reset.
+ */
+ uncmon_clear_int_locked_l(monid);
+
+ /*
+ * Re-enable all active counters.
+ */
+ uncmon_set_counting_locked_l(monid, uncore_active_ctrs);
+
+ uncmon_unlock(mon, intrs_en);
+}
+
+static void
+uncore_save(void)
+{
+ if (!uncore_active_ctrs) {
+ return;
+ }
+
+ unsigned int curmonid = uncmon_get_curid();
+
+ for (unsigned int monid = 0; monid < UNCORE_NMONITORS; monid++) {
+ struct uncore_monitor *mon = &uncore_monitors[monid];
+ int intrs_en = uncmon_lock(mon);
+
+ if (mt_uncore_enabled) {
+ if (monid != curmonid) {
+#if UNCORE_PER_CLUSTER
+ uncmon_set_counting_locked_r(monid, 0);
+#endif /* UNCORE_PER_CLUSTER */
+ } else {
+ uncmon_set_counting_locked_l(monid, 0);
+ }
+ }
+
+ for (unsigned int ctr = 0; ctr < UNCORE_NCTRS; ctr++) {
+ if (uncore_active_ctrs & (1U << ctr)) {
+ uncmon_update_locked(monid, curmonid, ctr);
+ }
+ }
+
+ mon->um_sleeping = true;
+ uncmon_unlock(mon, intrs_en);
+ }
+}
+
+static void
+uncore_restore(void)
+{
+ if (!uncore_active_ctrs) {
+ return;
+ }
+ unsigned int curmonid = uncmon_get_curid();
+
+ struct uncore_monitor *mon = &uncore_monitors[curmonid];
+ int intrs_en = uncmon_lock(mon);
+ if (!mon->um_sleeping) {
+ goto out;
+ }
+
+ for (unsigned int ctr = 0; ctr < UNCORE_NCTRS; ctr++) {
+ if (uncore_active_ctrs & (1U << ctr)) {
+ uncmon_write_counter_locked_l(curmonid, ctr, mon->um_snaps[ctr]);
+ }
+ }
+ uncmon_program_events_locked_l(curmonid);
+ uncmon_init_locked_l(curmonid);
+ mon->um_sleeping = false;
+
+out:
+ uncmon_unlock(mon, intrs_en);
+}
+
+static void
+uncore_early_init(void)
+{
+#if UNCORE_PER_CLUSTER
+ /*
+ * Initialize the necessary PIO physical regions from the device tree.
+ */
+ DTEntry armio_entry = NULL;
+ if ((DTFindEntry("name", "arm-io", &armio_entry) != kSuccess)) {
+ panic("unable to find arm-io DT entry");
+ }
+
+ uint64_t *regs;
+ unsigned int regs_size = 0;
+ if (DTGetProperty(armio_entry, "acc-impl", (void **)®s, ®s_size) !=
+ kSuccess) {
+ panic("unable to find acc-impl DT property");
+ }
+ /*
+ * Two 8-byte values are expected for each cluster -- the physical address
+ * of the region and its size.
+ */
+ const unsigned int expected_size =
+ (typeof(expected_size))sizeof(uint64_t) * __ARM_CLUSTER_COUNT__ * 2;
+ if (regs_size != expected_size) {
+ panic("invalid size for acc-impl DT property");
+ }
+ for (int i = 0; i < __ARM_CLUSTER_COUNT__; i++) {
+ acc_impl_phys[i] = regs[i * 2];
+ }
+ acc_impl_size = regs[1];
+
+ regs_size = 0;
+ if (DTGetProperty(armio_entry, "cpm-impl", (void **)®s, ®s_size) !=
+ kSuccess) {
+ panic("unable to find cpm-impl property");
+ }
+ if (regs_size != expected_size) {
+ panic("invalid size for cpm-impl DT property");
+ }
+ for (int i = 0; i < __ARM_CLUSTER_COUNT__; i++) {
+ cpm_impl_phys[i] = regs[i * 2];
+ }
+ cpm_impl_size = regs[1];
+#endif /* UNCORE_PER_CLUSTER */
+}
+
+#endif /* HAS_UNCORE_CTRS */
#pragma mark common hooks
void
mt_early_init(void)
{
+#if HAS_UNCORE_CTRS
+ uncore_early_init();
+#endif /* HAS_UNCORE_CTRS */
}
void
void
mt_sleep(void)
{
+#if HAS_UNCORE_CTRS
+ uncore_save();
+#endif /* HAS_UNCORE_CTRS */
}
void
mt_wake_per_core(void)
{
+#if HAS_UNCORE_CTRS
+ if (mt_uncore_initted) {
+ uncore_restore();
+ }
+#endif /* HAS_UNCORE_CTRS */
}
uint64_t
mt_cpu_pmi(cpu, pmcr0);
#endif /* !CPMU_AIC_PMI */
+#if HAS_UNCORE_CTRS
+ uncore_fiq(upmsr);
+#else /* HAS_UNCORE_CTRS */
#pragma unused(upmsr)
+#endif /* !HAS_UNCORE_CTRS */
}
static uint32_t mt_xc_sync;
.mtd_name = "core",
.mtd_init = core_init,
},
+#if HAS_UNCORE_CTRS
+ [1] = {
+ .mtd_name = "uncore",
+ .mtd_init = uncore_init,
+ .mtd_add = uncore_add,
+ .mtd_reset = uncore_reset,
+ .mtd_enable = uncore_set_enabled,
+ .mtd_read = uncore_read,
+
+ .mtd_nmonitors = UNCORE_NMONITORS,
+ .mtd_ncounters = UNCORE_NCTRS,
+ }
+#endif /* HAS_UNCORE_CTRS */
};
static_assert(
#endif /* defined(KERNEL_INTEGRITY_KTRR) */
-#if defined(KERNEL_INTEGRITY_KTRR)
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
.text
.section __LAST,__pinst
check_instruction x2, x3, __pinst_spsel_1, 0xd65f03c0d50041bf
b __pinst_spsel_1
-#endif /* defined(KERNEL_INTEGRITY_KTRR)*/
+#if __APRR_SUPPORTED__
+
+/*
+ * APRR registers aren't covered by VMSA lockdown, so we'll keep these
+ * gadgets in pinst for protection against undesired execution.
+ */
+
+ .text
+ .section __LAST,__pinst
+ .align 2
+
+__pinst_set_aprr_el0:
+ msr APRR_EL0, x0
+ ret
+
+__pinst_set_aprr_el1:
+ msr APRR_EL1, x0
+ ret
+
+__pinst_set_aprr_shadow_mask_en_el1:
+ msr APRR_SHADOW_MASK_EN_EL1, x0
+
+ ret
+
+ .text
+ .section __TEXT_EXEC,__text
+ .align 2
+
+ .globl _pinst_set_aprr_el0
+_pinst_set_aprr_el0:
+ check_instruction x2, x3, __pinst_set_aprr_el0, 0xd65f03c0d51cf200
+ b __pinst_set_aprr_el0
+
+ .globl _pinst_set_aprr_el1
+_pinst_set_aprr_el1:
+ check_instruction x2, x3, __pinst_set_aprr_el1, 0xd65f03c0d51cf220
+ b __pinst_set_aprr_el1
+
+ .globl _pinst_set_aprr_shadow_mask_en_el1
+_pinst_set_aprr_shadow_mask_en_el1:
+ check_instruction x2, x3, __pinst_set_aprr_shadow_mask_en_el1, 0xd65f03c0d51cf2c0
+ b __pinst_set_aprr_shadow_mask_en_el1
+#endif /* __APRR_SUPPORTED__ */
+#endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
#include <ptrauth.h>
kern_return_t arm64_ropjop_test(void);
#endif
+#if defined(KERNEL_INTEGRITY_CTRR)
+kern_return_t ctrr_test(void);
+kern_return_t ctrr_test_cpu(void);
+#endif
#if HAS_TWO_STAGE_SPR_LOCK
kern_return_t arm64_spr_lock_test(void);
extern void arm64_msr_lock_test(uint64_t);
lck_rw_done(<_rwlock);
}
+#if __AMP__
+const int limit = 1000000;
+static int lt_stress_local_counters[MAX_CPUS];
+
+lck_ticket_t lt_ticket_lock;
+
+static void
+lt_stress_ticket_lock()
+{
+ int local_counter = 0;
+
+ uint cpuid = current_processor()->cpu_id;
+
+ kprintf("%s>cpu %d starting\n", __FUNCTION__, cpuid);
+
+ lck_ticket_lock(<_ticket_lock);
+ lt_counter++;
+ local_counter++;
+ lck_ticket_unlock(<_ticket_lock);
+
+ while (lt_counter < lt_target_done_threads) {
+ ;
+ }
+
+ kprintf("%s>cpu %d started\n", __FUNCTION__, cpuid);
+
+ while (lt_counter < limit) {
+ lck_ticket_lock(<_ticket_lock);
+ if (lt_counter < limit) {
+ lt_counter++;
+ local_counter++;
+ }
+ lck_ticket_unlock(<_ticket_lock);
+ }
+
+ lt_stress_local_counters[cpuid] = local_counter;
+
+ kprintf("%s>final counter %d cpu %d incremented the counter %d times\n", __FUNCTION__, lt_counter, cpuid, local_counter);
+}
+#endif
static void
lt_grab_hw_lock()
thread_deallocate(thread);
}
+#if __AMP__
+static void
+lt_bound_thread(void *arg, wait_result_t wres __unused)
+{
+ void (*func)(void) = (void (*)(void))arg;
+
+ int cpuid = OSIncrementAtomic((volatile SInt32 *)<_cpu_bind_id);
+
+ processor_t processor = processor_list;
+ while ((processor != NULL) && (processor->cpu_id != cpuid)) {
+ processor = processor->processor_list;
+ }
+
+ if (processor != NULL) {
+ thread_bind(processor);
+ }
+
+ thread_block(THREAD_CONTINUE_NULL);
+
+ func();
+
+ OSIncrementAtomic((volatile SInt32*) <_done_threads);
+}
+
+static void
+lt_e_thread(void *arg, wait_result_t wres __unused)
+{
+ void (*func)(void) = (void (*)(void))arg;
+
+ thread_t thread = current_thread();
+
+ spl_t s = splsched();
+ thread_lock(thread);
+ thread->sched_flags |= TH_SFLAG_ECORE_ONLY;
+ thread_unlock(thread);
+ splx(s);
+
+ thread_block(THREAD_CONTINUE_NULL);
+
+ func();
+
+ OSIncrementAtomic((volatile SInt32*) <_done_threads);
+}
+
+static void
+lt_p_thread(void *arg, wait_result_t wres __unused)
+{
+ void (*func)(void) = (void (*)(void))arg;
+
+ thread_t thread = current_thread();
+
+ spl_t s = splsched();
+ thread_lock(thread);
+ thread->sched_flags |= TH_SFLAG_PCORE_ONLY;
+ thread_unlock(thread);
+ splx(s);
+
+ thread_block(THREAD_CONTINUE_NULL);
+
+ func();
+
+ OSIncrementAtomic((volatile SInt32*) <_done_threads);
+}
+
+static void
+lt_start_lock_thread_e(thread_continue_t func)
+{
+ thread_t thread;
+ kern_return_t kr;
+
+ kr = kernel_thread_start(lt_e_thread, func, &thread);
+ assert(kr == KERN_SUCCESS);
+
+ thread_deallocate(thread);
+}
+
+static void
+lt_start_lock_thread_p(thread_continue_t func)
+{
+ thread_t thread;
+ kern_return_t kr;
+
+ kr = kernel_thread_start(lt_p_thread, func, &thread);
+ assert(kr == KERN_SUCCESS);
+
+ thread_deallocate(thread);
+}
+
+static void
+lt_start_lock_thread_bound(thread_continue_t func)
+{
+ thread_t thread;
+ kern_return_t kr;
+
+ kr = kernel_thread_start(lt_bound_thread, func, &thread);
+ assert(kr == KERN_SUCCESS);
+
+ thread_deallocate(thread);
+}
+#endif
static kern_return_t
lt_test_locks()
lt_wait_for_lock_test_threads();
T_EXPECT_EQ_UINT(lt_counter, LOCK_TEST_ITERATIONS * lt_target_done_threads, NULL);
+#if __AMP__
+ /* Ticket locks stress test */
+ T_LOG("Running Ticket locks stress test with lck_ticket_lock()");
+ extern unsigned int real_ncpus;
+ lck_ticket_init(<_ticket_lock);
+ lt_reset();
+ lt_target_done_threads = real_ncpus;
+ for (processor_t processor = processor_list; processor != NULL; processor = processor->processor_list) {
+ lt_start_lock_thread_bound(lt_stress_ticket_lock);
+ }
+ lt_wait_for_lock_test_threads();
+ bool starvation = false;
+ uint total_local_count = 0;
+ for (processor_t processor = processor_list; processor != NULL; processor = processor->processor_list) {
+ starvation = starvation || (lt_stress_local_counters[processor->cpu_id] < 10);
+ total_local_count += lt_stress_local_counters[processor->cpu_id];
+ }
+ if (total_local_count != lt_counter) {
+ T_FAIL("Lock failure\n");
+ } else if (starvation) {
+ T_FAIL("Lock starvation found\n");
+ } else {
+ T_PASS("Ticket locks stress test with lck_ticket_lock()");
+ }
+
+ /* AMP ticket locks stress test */
+ T_LOG("Running AMP Ticket locks stress test bound to clusters with lck_ticket_lock()");
+ lt_reset();
+ lt_target_done_threads = real_ncpus;
+ for (processor_t processor = processor_list; processor != NULL; processor = processor->processor_list) {
+ processor_set_t pset = processor->processor_set;
+ if (pset->pset_cluster_type == PSET_AMP_P) {
+ lt_start_lock_thread_p(lt_stress_ticket_lock);
+ } else if (pset->pset_cluster_type == PSET_AMP_E) {
+ lt_start_lock_thread_e(lt_stress_ticket_lock);
+ } else {
+ lt_start_lock_thread(lt_stress_ticket_lock);
+ }
+ }
+ lt_wait_for_lock_test_threads();
+#endif
/* HW locks: trylocks */
T_LOG("Running test with hw_lock_try()");
return 0;
}
+#if defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST)
+SECURITY_READ_ONLY_LATE(uint64_t) ctrr_ro_test;
+uint64_t ctrr_nx_test = 0xd65f03c0; /* RET */
+volatile uint64_t ctrr_exception_esr;
+vm_offset_t ctrr_test_va;
+vm_offset_t ctrr_test_page;
+
+kern_return_t
+ctrr_test(void)
+{
+ processor_t p;
+ boolean_t ctrr_disable = FALSE;
+
+ PE_parse_boot_argn("-unsafe_kernel_text", &ctrr_disable, sizeof(ctrr_disable));
+
+ if (ctrr_disable) {
+ T_LOG("Skipping CTRR test when -unsafe_kernel_text boot-arg present");
+ return KERN_SUCCESS;
+ }
+
+ T_LOG("Running CTRR test.");
+
+ for (p = processor_list; p != NULL; p = p->processor_list) {
+ thread_bind(p);
+ thread_block(THREAD_CONTINUE_NULL);
+ T_LOG("Running CTRR test on cpu %d\n", p->cpu_id);
+ ctrr_test_cpu();
+ }
+
+ /* unbind thread from specific cpu */
+ thread_bind(PROCESSOR_NULL);
+ thread_block(THREAD_CONTINUE_NULL);
+
+ return KERN_SUCCESS;
+}
+
+/* test CTRR on a cpu, caller to bind thread to desired cpu */
+/* ctrr_test_page was reserved during bootstrap process */
+kern_return_t
+ctrr_test_cpu(void)
+{
+ ppnum_t ro_pn, nx_pn;
+ uint64_t *ctrr_ro_test_ptr;
+ void (*ctrr_nx_test_ptr)(void);
+ kern_return_t kr;
+ uint64_t prot = 0;
+ extern uint64_t rorgn_begin, rorgn_end;
+ extern vm_offset_t virtual_space_start;
+
+ /* rorgn = [rorgn_begin_va, rorgn_end_va) */
+
+ vm_offset_t rorgn_begin_va = phystokv(rorgn_begin);
+ vm_offset_t rorgn_end_va = phystokv(rorgn_end) + PAGE_SIZE;
+ vm_offset_t ro_test_va = (vm_offset_t)&ctrr_ro_test;
+ vm_offset_t nx_test_va = (vm_offset_t)&ctrr_nx_test;
+
+ T_EXPECT(rorgn_begin_va <= ro_test_va && ro_test_va < rorgn_end_va, "Expect ro_test_va to be inside the CTRR region");
+ T_EXPECT((nx_test_va < rorgn_begin_va) ^ (nx_test_va >= rorgn_end_va), "Expect nx_test_va to be outside the CTRR region");
+
+ ro_pn = pmap_find_phys(kernel_pmap, ro_test_va);
+ nx_pn = pmap_find_phys(kernel_pmap, nx_test_va);
+ T_EXPECT(ro_pn && nx_pn, "Expect ro page number and nx page number to be non zero");
+
+ T_LOG("test virtual page: %p, ctrr_ro_test: %p, ctrr_nx_test: %p, ro_pn: %x, nx_pn: %x ",
+ (void *)ctrr_test_page, &ctrr_ro_test, &ctrr_nx_test, ro_pn, nx_pn);
+
+ prot = pmap_get_arm64_prot(kernel_pmap, ctrr_test_page);
+ T_EXPECT(~prot & ARM_TTE_VALID, "Expect ctrr_test_page to be unmapped");
+
+ T_LOG("Read only region test mapping virtual page %p to CTRR RO page number %d", ctrr_test_page, ro_pn);
+ kr = pmap_enter(kernel_pmap, ctrr_test_page, ro_pn,
+ VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, FALSE);
+ T_EXPECT(kr == KERN_SUCCESS, "Expect pmap_enter of RW mapping to succeed");
+
+ // assert entire mmu prot path (Hierarchical protection model) is NOT RO
+ // fetch effective block level protections from table/block entries
+ prot = pmap_get_arm64_prot(kernel_pmap, ctrr_test_page);
+ T_EXPECT(ARM_PTE_EXTRACT_AP(prot) == AP_RWNA && (prot & ARM_PTE_PNX), "Mapping is EL1 RWNX");
+
+ ctrr_test_va = ctrr_test_page + (ro_test_va & PAGE_MASK);
+ ctrr_ro_test_ptr = (void *)ctrr_test_va;
+
+ T_LOG("Read only region test writing to %p to provoke data abort", ctrr_ro_test_ptr);
+
+ // should cause data abort
+ *ctrr_ro_test_ptr = 1;
+
+ // ensure write permission fault at expected level
+ // data abort handler will set ctrr_exception_esr when ctrr_test_va takes a permission fault
+
+ T_EXPECT(ESR_EC(ctrr_exception_esr) == ESR_EC_DABORT_EL1, "Data Abort from EL1 expected");
+ T_EXPECT(ISS_DA_FSC(ESR_ISS(ctrr_exception_esr)) == FSC_PERMISSION_FAULT_L3, "Permission Fault Expected");
+ T_EXPECT(ESR_ISS(ctrr_exception_esr) & ISS_DA_WNR, "Write Fault Expected");
+
+ ctrr_test_va = 0;
+ ctrr_exception_esr = 0;
+ pmap_remove(kernel_pmap, ctrr_test_page, ctrr_test_page + PAGE_SIZE);
+
+ T_LOG("No execute test mapping virtual page %p to CTRR PXN page number %d", ctrr_test_page, nx_pn);
+
+ kr = pmap_enter(kernel_pmap, ctrr_test_page, nx_pn,
+ VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, FALSE);
+ T_EXPECT(kr == KERN_SUCCESS, "Expect pmap_enter of RX mapping to succeed");
+
+ // assert entire mmu prot path (Hierarchical protection model) is NOT XN
+ prot = pmap_get_arm64_prot(kernel_pmap, ctrr_test_page);
+ T_EXPECT(ARM_PTE_EXTRACT_AP(prot) == AP_RONA && (~prot & ARM_PTE_PNX), "Mapping is EL1 ROX");
+
+ ctrr_test_va = ctrr_test_page + (nx_test_va & PAGE_MASK);
+ ctrr_nx_test_ptr = (void *)ctrr_test_va;
+
+ T_LOG("No execute test calling ctrr_nx_test_ptr(): %p to provoke instruction abort", ctrr_nx_test_ptr);
+
+#if __has_feature(ptrauth_calls)
+ // must sign before calling if we're creating function pointers out of thin air
+ ctrr_nx_test_ptr = ptrauth_sign_unauthenticated(ctrr_nx_test_ptr, ptrauth_key_function_pointer, 0);
+#endif
+ // should cause prefetch abort
+ ctrr_nx_test_ptr();
+
+ // TODO: ensure execute permission fault at expected level
+ T_EXPECT(ESR_EC(ctrr_exception_esr) == ESR_EC_IABORT_EL1, "Instruction abort from EL1 Expected");
+ T_EXPECT(ISS_DA_FSC(ESR_ISS(ctrr_exception_esr)) == FSC_PERMISSION_FAULT_L3, "Permission Fault Expected");
+
+ ctrr_test_va = 0;
+ ctrr_exception_esr = 0;
+ pmap_remove(kernel_pmap, ctrr_test_page, ctrr_test_page + PAGE_SIZE);
+ return KERN_SUCCESS;
+}
+#endif /* defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST) */
#if HAS_TWO_STAGE_SPR_LOCK
* global mappings would be visible to userspace unless we invalidate them on
* eret.
*/
+#if XNU_MONITOR
+/*
+ * Please note that because we indirect through the thread register in order to
+ * locate the kernel, and because we unmap most of the kernel, the security
+ * model of the PPL is undermined by __ARM_KERNEL_PROTECT__, as we rely on
+ * kernel controlled data to direct codeflow in the exception vectors.
+ *
+ * If we want to ship XNU_MONITOR paired with __ARM_KERNEL_PROTECT__, we will
+ * need to find a performant solution to this problem.
+ */
+#endif
#endif /* __ARM_KERNEL_PROTECT */
/*
#define CORESIGHT_REGIONS 4
#define CORESIGHT_SIZE 0x1000
+#if __APRR_SUPPORTED__
+/*
+ * APRR_EL0/APRR_EL1
+ *
+ * 63 0
+ * +--------------------+
+ * | Attr[15:0]RWX[3:0] |
+ * +--------------------+
+ *
+ * These registers consist of 16 4-bit fields.
+ *
+ * The attribute index consists of the access protection
+ * and execution protections on a mapping. The index
+ * for a given mapping type is constructed as follows.
+ *
+ * Attribute Index
+ *
+ * 3 2 1 0
+ * +-------+-------+-----+----+
+ * | AP[1] | AP[0] | PXN | XN |
+ * +-------+-------+-----+----+
+ *
+ * The attribute for a given index determines what
+ * protections are disabled for that mappings type
+ * (protections beyond the scope of the standard ARM
+ * protections for a mapping cannot be granted via
+ * APRR).
+ *
+ * Attribute
+ *
+ * 3 2 1 0
+ * +----------+---+---+---+
+ * | Reserved | R | W | X |
+ * +----------+---+---+---+
+ *
+ * Where:
+ * R: Read is allowed.
+ * W: Write is allowed.
+ * X: Execute is allowed.
+ */
+
+#define APRR_IDX_XN (1ULL)
+#define APRR_IDX_PXN (2ULL)
+
+
+#define APRR_IDX_XN_SHIFT (0ULL)
+#define APRR_IDX_PXN_SHIFT (1ULL)
+#define APRR_IDX_APSHIFT (2ULL)
+
+#endif /* __APRR_SUPPORTED__ */
+
+
+#if __APRR_SUPPORTED__
+
+#define APRR_ATTR_X (1ULL)
+#define APRR_ATTR_W (2ULL)
+#define APRR_ATTR_R (4ULL)
+
+#define APRR_ATTR_WX (APRR_ATTR_W | APRR_ATTR_X)
+#define APRR_ATTR_RX (APRR_ATTR_R | APRR_ATTR_X)
+#define APRR_ATTR_RWX (APRR_ATTR_R | APRR_ATTR_W | APRR_ATTR_X)
+
+#define APRR_ATTR_NONE (0ULL)
+#define APRR_ATTR_MASK (APRR_ATTR_RWX)
+
+#define APRR_RESERVED_MASK (0x8888888888888888ULL)
+#endif /* __APRR_SUPPORTED__ */
+
+#if __APRR_SUPPORTED__
+#define XPRR_FIRM_RX_PERM (0ULL)
+#define XPRR_PPL_RW_PERM (1ULL)
+#define XPRR_FIRM_RO_PERM (2ULL)
+#define XPRR_KERN_RW_PERM (3ULL)
+#define XPRR_FIRM_RW_PERM (4ULL)
+#define XPRR_USER_JIT_PERM (5ULL)
+#define XPRR_KERN0_RW_PERM (6ULL)
+#define XPRR_USER_RW_PERM (7ULL)
+#define XPRR_PPL_RX_PERM (8ULL)
+#define XPRR_PPL_RO_PERM (9ULL)
+#define XPRR_KERN_RX_PERM (10ULL)
+#define XPRR_KERN_RO_PERM (11ULL)
+#define XPRR_KERN0_RX_PERM (12ULL)
+#define XPRR_USER_RX_PERM (13ULL)
+#define XPRR_KERN0_RO_PERM (14ULL)
+#define XPRR_USER_RO_PERM (15ULL)
+#define XPRR_MAX_PERM (15ULL)
+
+#define XPRR_VERSION_NONE (0ULL)
+#define XPRR_VERSION_APRR (1ULL)
+
+
+#endif /* __APRR_SUPPORTED__*/
+
+#if __APRR_SUPPORTED__
+/* Indices for attributes, named based on how we intend to use them. */
+#define APRR_FIRM_RX_INDEX (0ULL) /* AP_RWNA, PX, X */
+#define APRR_FIRM_RO_INDEX (1ULL) /* AP_RWNA, PX, XN */
+#define APRR_PPL_RW_INDEX (2ULL) /* AP_RWNA, PXN, X */
+#define APRR_KERN_RW_INDEX (3ULL) /* AP_RWNA, PXN, XN */
+#define APRR_FIRM_RW_INDEX (4ULL) /* AP_RWRW, PX, X */
+#define APRR_KERN0_RW_INDEX (5ULL) /* AP_RWRW, PX, XN */
+#define APRR_USER_JIT_INDEX (6ULL) /* AP_RWRW, PXN, X */
+#define APRR_USER_RW_INDEX (7ULL) /* AP_RWRW, PXN, XN */
+#define APRR_PPL_RX_INDEX (8ULL) /* AP_RONA, PX, X */
+#define APRR_KERN_RX_INDEX (9ULL) /* AP_RONA, PX, XN */
+#define APRR_PPL_RO_INDEX (10ULL) /* AP_RONA, PXN, X */
+#define APRR_KERN_RO_INDEX (11ULL) /* AP_RONA, PXN, XN */
+#define APRR_KERN0_RX_INDEX (12ULL) /* AP_RORO, PX, X */
+#define APRR_KERN0_RO_INDEX (13ULL) /* AP_RORO, PX, XN */
+#define APRR_USER_RX_INDEX (14ULL) /* AP_RORO, PXN, X */
+#define APRR_USER_RO_INDEX (15ULL) /* AP_RORO, PXN, XN */
+#define APRR_MAX_INDEX (15ULL) /* For sanity checking index values */
+#endif /* __APRR_SUPPORTED */
+
+
+#if __APRR_SUPPORTED__
+#define APRR_SHIFT_FOR_IDX(x) \
+ ((x) << 2ULL)
+
+/* Shifts for attributes, named based on how we intend to use them. */
+#define APRR_FIRM_RX_SHIFT (0ULL) /* AP_RWNA, PX, X */
+#define APRR_FIRM_RO_SHIFT (4ULL) /* AP_RWNA, PX, XN */
+#define APRR_PPL_RW_SHIFT (8ULL) /* AP_RWNA, PXN, X */
+#define APRR_KERN_RW_SHIFT (12ULL) /* AP_RWNA, PXN, XN */
+#define APRR_FIRM_RW_SHIFT (16ULL) /* AP_RWRW, PX, X */
+#define APRR_KERN0_RW_SHIFT (20ULL) /* AP_RWRW, PX, XN */
+#define APRR_USER_JIT_SHIFT (24ULL) /* AP_RWRW, PXN, X */
+#define APRR_USER_RW_SHIFT (28ULL) /* AP_RWRW, PXN, XN */
+#define APRR_PPL_RX_SHIFT (32ULL) /* AP_RONA, PX, X */
+#define APRR_KERN_RX_SHIFT (36ULL) /* AP_RONA, PX, XN */
+#define APRR_PPL_RO_SHIFT (40ULL) /* AP_RONA, PXN, X */
+#define APRR_KERN_RO_SHIFT (44ULL) /* AP_RONA, PXN, XN */
+#define APRR_KERN0_RX_SHIFT (48ULL) /* AP_RORO, PX, X */
+#define APRR_KERN0_RO_SHIFT (52ULL) /* AP_RORO, PX, XN */
+#define APRR_USER_RX_SHIFT (56ULL) /* AP_RORO, PXN, X */
+#define APRR_USER_RO_SHIFT (60ULL) /* AP_RORO, PXN, XN */
+
+#define ARM_PTE_APRR_MASK \
+ (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)
+
+#define ARM_PTE_XPRR_MASK ARM_PTE_APRR_MASK
+
+#define APRR_INDEX_TO_PTE(x) \
+ ((pt_entry_t) \
+ (((x) & 0x8) ? ARM_PTE_AP(0x2) : 0) | \
+ (((x) & 0x4) ? ARM_PTE_AP(0x1) : 0) | \
+ (((x) & 0x2) ? ARM_PTE_PNX : 0) | \
+ (((x) & 0x1) ? ARM_PTE_NX : 0))
+
+#define PTE_TO_APRR_INDEX(x) \
+ ((ARM_PTE_EXTRACT_AP(x) << APRR_IDX_APSHIFT) | \
+ (((x) & ARM_PTE_PNXMASK) ? APRR_IDX_PXN : 0) | \
+ (((x) & ARM_PTE_NXMASK) ? APRR_IDX_XN : 0))
+
+#endif /* __APRR_SUPPORTED__ */
+
+#if __APRR_SUPPORTED__
+
+#define APRR_EXTRACT_IDX_ATTR(_aprr_value, _idx) \
+ (((_aprr_value) >> APRR_SHIFT_FOR_IDX(_idx)) & APRR_ATTR_MASK)
+
+#define APRR_REMOVE(x) (~(x))
+
+#define APRR_EL1_UNRESTRICTED (0x4455445566666677ULL)
+
+#define APRR_EL1_RESET \
+ APRR_EL1_UNRESTRICTED
+
+#define APRR_EL1_BASE \
+ APRR_EL1_UNRESTRICTED
+
+#if XNU_MONITOR
+#define APRR_EL1_DEFAULT \
+ (APRR_EL1_BASE & \
+ (APRR_REMOVE((APRR_ATTR_WX << APRR_PPL_RW_SHIFT) | \
+ (APRR_ATTR_WX << APRR_PPL_RO_SHIFT) | \
+ (APRR_ATTR_WX << APRR_PPL_RX_SHIFT))))
+
+#define APRR_EL1_PPL \
+ (APRR_EL1_BASE & \
+ (APRR_REMOVE((APRR_ATTR_X << APRR_PPL_RW_SHIFT) | \
+ (APRR_ATTR_WX << APRR_PPL_RO_SHIFT) | \
+ (APRR_ATTR_W << APRR_PPL_RX_SHIFT))))
+#else
+#define APRR_EL1_DEFAULT \
+ APRR_EL1_BASE
+#endif
+#define APRR_EL0_UNRESTRICTED (0x4545010167670101ULL)
+#define APRR_EL0_RESET \
+ APRR_EL0_UNRESTRICTED
+#if XNU_MONITOR
+#define APRR_EL0_BASE \
+ (APRR_EL0_UNRESTRICTED & \
+ (APRR_REMOVE((APRR_ATTR_RWX << APRR_PPL_RW_SHIFT) | \
+ (APRR_ATTR_RWX << APRR_PPL_RX_SHIFT) | \
+ (APRR_ATTR_RWX << APRR_PPL_RO_SHIFT))))
+#else
+#define APRR_EL0_BASE \
+ APRR_EL0_UNRESTRICTED
+#endif
+#define APRR_EL0_JIT_RW \
+ (APRR_EL0_BASE & APRR_REMOVE(APRR_ATTR_X << APRR_USER_JIT_SHIFT))
+#define APRR_EL0_JIT_RX \
+ (APRR_EL0_BASE & APRR_REMOVE(APRR_ATTR_W << APRR_USER_JIT_SHIFT))
+#define APRR_EL0_JIT_RWX \
+ APRR_EL0_BASE
+#define APRR_EL0_DEFAULT \
+ APRR_EL0_BASE
+
+#endif /* __APRR_SUPPORTED__ */
/*
#define MSR(reg, src) __asm__ volatile ("msr " reg ", %0" :: "r" (src))
#define MRS(dest, reg) __asm__ volatile ("mrs %0, " reg : "=r" (dest))
+#if XNU_MONITOR
+#define __ARM_PTE_PHYSMAP__ 1
+#define PPL_STATE_KERNEL 0
+#define PPL_STATE_DISPATCH 1
+#define PPL_STATE_PANIC 2
+#define PPL_STATE_EXCEPTION 3
+#endif
#endif /* _ARM64_PROC_REG_H_ */
#define CPU_NAME "Twister"
#elif defined(APPLEHURRICANE)
#define CPU_NAME "Hurricane"
+#elif defined(APPLELIGHTNING)
+#define CPU_NAME "Lightning"
#else
#define CPU_NAME "Unknown"
#endif
#define WT_REASON_REG_VIOLATION 8
#endif
+#if defined(HAS_IPI)
+void cpu_signal_handler(void);
+extern unsigned int gFastIPI;
+#endif /* defined(HAS_IPI) */
extern vm_offset_t static_memory_end;
thread_exception_return();
case ESR_EC_IABORT_EL1:
+#if defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST)
+ {
+ extern volatile vm_offset_t ctrr_test_va;
+ if (ctrr_test_va && far == ctrr_test_va) {
+ extern volatile uint64_t ctrr_exception_esr;
+ ctrr_exception_esr = esr;
+ /* return to the instruction immediately after the call to NX page */
+ set_saved_state_pc(state, get_saved_state_lr(state));
+ break;
+ }
+ }
+#endif
panic_with_thread_kernel_state("Kernel instruction fetch abort", state);
}
}
-#if __ARM_PAN_AVAILABLE__
+#if __ARM_PAN_AVAILABLE__ || defined(KERNEL_INTEGRITY_CTRR)
static int
is_permission_fault(fault_status_t status)
{
* when running with KTRR.
*/
+#if defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST)
+ extern volatile vm_offset_t ctrr_test_va;
+ if (ctrr_test_va && fault_addr == ctrr_test_va && is_permission_fault(fault_code)) {
+ extern volatile uint64_t ctrr_exception_esr;
+ ctrr_exception_esr = esr;
+ add_saved_state_pc(state, 4);
+ return;
+ }
+#endif
#if __ARM_PAN_AVAILABLE__ && defined(CONFIG_XNUPOST)
if (is_permission_fault(fault_code) && !(get_saved_state_cpsr(state) & PSR64_PAN) &&
uint64_t pmcr0 = 0, upmsr = 0;
#endif /* MONOTONIC_FIQ */
+#if defined(HAS_IPI)
+ boolean_t is_ipi = FALSE;
+ uint64_t ipi_sr = 0;
+
+ if (gFastIPI) {
+ MRS(ipi_sr, ARM64_REG_IPI_SR);
+
+ if (ipi_sr & 1) {
+ is_ipi = TRUE;
+ }
+ }
+
+ if (is_ipi) {
+ type = DBG_INTR_TYPE_IPI;
+ } else
+#endif /* defined(HAS_IPI) */
#if MONOTONIC_FIQ
if (mt_pmi_pending(&pmcr0, &upmsr)) {
type = DBG_INTR_TYPE_PMI;
sleh_interrupt_handler_prologue(state, type);
+#if defined(HAS_IPI)
+ if (is_ipi) {
+ /*
+ * Order is important here: we must ack the IPI by writing IPI_SR
+ * before we call cpu_signal_handler(). Otherwise, there will be
+ * a window between the completion of pending-signal processing in
+ * cpu_signal_handler() and the ack during which a newly-issued
+ * IPI to this CPU may be lost. ISB is required to ensure the msr
+ * is retired before execution of cpu_signal_handler().
+ */
+ MSR(ARM64_REG_IPI_SR, ipi_sr);
+ __builtin_arm_isb(ISB_SY);
+ cpu_signal_handler();
+ } else
+#endif /* defined(HAS_IPI) */
#if MONOTONIC_FIQ
if (type == DBG_INTR_TYPE_PMI) {
mt_fiq(getCpuDatap(), pmcr0, upmsr);
#endif /* __ARM_KERNEL_PROTECT__ */
+#if __APRR_SUPPORTED__
+
+.macro MSR_APRR_EL1_X0
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
+ bl EXT(pinst_set_aprr_el1)
+#else
+ msr APRR_EL1, x0
+#endif
+.endmacro
+
+.macro MSR_APRR_EL0_X0
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
+ bl EXT(pinst_set_aprr_el0)
+#else
+ msr APRR_EL0, x0
+#endif
+.endmacro
+
+.macro MSR_APRR_SHADOW_MASK_EN_EL1_X0
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
+ bl EXT(pinst_set_aprr_shadow_mask_en_el1)
+#else
+ msr APRR_SHADOW_MASK_EN_EL1, x0
+#endif
+.endmacro
+
+#endif /* __APRR_SUPPORTED__ */
.macro MSR_VBAR_EL1_X0
#if defined(KERNEL_INTEGRITY_KTRR)
msr OSLAR_EL1, xzr
msr DAIFSet, #(DAIFSC_ALL) // Disable all interrupts
-#if !(defined(KERNEL_INTEGRITY_KTRR))
+#if !(defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR))
// Set low reset vector before attempting any loads
adrp x0, EXT(LowExceptionVectorBase)@page
add x0, x0, EXT(LowExceptionVectorBase)@pageoff
msr VBAR_EL1, x0
#endif
+#if __APRR_SUPPORTED__
+ MOV64 x0, APRR_EL1_DEFAULT
+#if XNU_MONITOR
+ adrp x4, EXT(pmap_ppl_locked_down)@page
+ ldrb w5, [x4, #EXT(pmap_ppl_locked_down)@pageoff]
+ cmp w5, #0
+ b.ne 1f
+
+ // If the PPL is not locked down, we start in PPL mode.
+ MOV64 x0, APRR_EL1_PPL
+1:
+#endif /* XNU_MONITOR */
+
+ MSR_APRR_EL1_X0
+
+ // Load up the default APRR_EL0 value.
+ MOV64 x0, APRR_EL0_DEFAULT
+ MSR_APRR_EL0_X0
+#endif /* __APRR_SUPPORTED__ */
#if defined(KERNEL_INTEGRITY_KTRR)
/*
adrp x19, EXT(ResetHandlerData)@page // Get address of the reset handler data
add x19, x19, EXT(ResetHandlerData)@pageoff
mrs x15, MPIDR_EL1 // Load MPIDR to get CPU number
+#if HAS_CLUSTER
+ and x0, x15, #0xFFFF // CPU number in Affinity0, cluster ID in Affinity1
+#else
and x0, x15, #0xFF // CPU number is in MPIDR Affinity Level 0
+#endif
ldr x1, [x19, CPU_DATA_ENTRIES] // Load start of data entries
add x3, x1, MAX_CPUS * 16 // end addr of data entries = start + (16 * MAX_CPUS)
Lcheck_cpu_data_entry:
b.eq Lskip_cpu_reset_handler // Not found
b Lcheck_cpu_data_entry // loop
Lfound_cpu_data_entry:
+#if defined(KERNEL_INTEGRITY_CTRR)
+ /*
+ * Program and lock CTRR if this CPU is non-boot cluster master. boot cluster will be locked
+ * in machine_lockdown. pinst insns protected by VMSA_LOCK
+ * A_PXN and A_MMUON_WRPROTECT options provides something close to KTRR behavior
+ */
+
+ /* spin until bootstrap core has completed machine lockdown */
+ adrp x17, EXT(lockdown_done)@page
+1:
+ ldr x18, [x17, EXT(lockdown_done)@pageoff]
+ cbz x18, 1b
+
+ // load stashed rorgn_begin
+ adrp x17, EXT(rorgn_begin)@page
+ add x17, x17, EXT(rorgn_begin)@pageoff
+ ldr x17, [x17]
+ // if rorgn_begin is zero, we're debugging. skip enabling ctrr
+ cbz x17, Lskip_ctrr
+
+ // load stashed rorgn_end
+ adrp x19, EXT(rorgn_end)@page
+ add x19, x19, EXT(rorgn_end)@pageoff
+ ldr x19, [x19]
+ cbz x19, Lskip_ctrr
+
+ mrs x18, ARM64_REG_CTRR_LOCK_EL1
+ cbnz x18, Lskip_ctrr /* don't touch if already locked */
+ ldr w18, [x21, CLUSTER_MASTER] /* cluster master is unsigned int (32bit) */
+ cbz w18, Lspin_ctrr_unlocked /* non-cluster master spins if CTRR unlocked (unexpected) */
+ msr ARM64_REG_CTRR_A_LWR_EL1, x17
+ msr ARM64_REG_CTRR_A_UPR_EL1, x19
+ mov x18, #(CTRR_CTL_EL1_A_PXN | CTRR_CTL_EL1_A_MMUON_WRPROTECT)
+ msr ARM64_REG_CTRR_CTL_EL1, x18
+ mov x18, #1
+ msr ARM64_REG_CTRR_LOCK_EL1, x18
+
+
+ isb
+ tlbi vmalle1
+ dsb ish
+ isb
+Lspin_ctrr_unlocked:
+ /* we shouldn't ever be here as cpu start is serialized by cluster in cpu_start(),
+ * and first core started in cluster is designated cluster master and locks
+ * both core and cluster. subsequent cores in same cluster will run locked from
+ * from reset vector */
+ mrs x18, ARM64_REG_CTRR_LOCK_EL1
+ cbz x18, Lspin_ctrr_unlocked
+Lskip_ctrr:
+#endif
adrp x20, EXT(const_boot_args)@page
add x20, x20, EXT(const_boot_args)@pageoff
ldr x0, [x21, CPU_RESET_HANDLER] // Call CPU reset handler
bne Lskip_cpu_reset_handler
1:
+#if HAS_NEX_PG
+ bl EXT(set_nex_pg)
+#endif
+#if HAS_BP_RET
+ bl EXT(set_bp_ret)
+#endif
#if __ARM_KERNEL_PROTECT__ && defined(KERNEL_INTEGRITY_KTRR)
/*
b .
.align 12, 0
-#if defined(KERNEL_INTEGRITY_KTRR)
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
/*
* Provide a global symbol so that we can narrow the V=P mapping to cover
* this page during arm_vm_init.
.globl EXT(bootstrap_instructions)
LEXT(bootstrap_instructions)
-#endif /* defined(KERNEL_INTEGRITY_KTRR)*/
+#endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
.align 2
.globl EXT(resume_idle_cpu)
LEXT(resume_idle_cpu)
.align 2
start_cpu:
-#if defined(KERNEL_INTEGRITY_KTRR)
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
// This is done right away in reset vector for pre-KTRR devices
// Set low reset vector now that we are in the KTRR-free zone
adrp x0, EXT(LowExceptionVectorBase)@page
add x0, x0, EXT(LowExceptionVectorBase)@pageoff
MSR_VBAR_EL1_X0
-#endif /* defined(KERNEL_INTEGRITY_KTRR)*/
+#endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
// x20 set to BootArgs phys address
// x21 set to cpu data phys address
// Set SP_EL1 to exception stack
-#if defined(KERNEL_INTEGRITY_KTRR)
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
mov x1, lr
bl EXT(pinst_spsel_1)
mov lr, x1
add x0, x0, EXT(LowExceptionVectorBase)@pageoff
MSR_VBAR_EL1_X0
+#if __APRR_SUPPORTED__
+ // Save the LR
+ mov x1, lr
+
+#if XNU_MONITOR
+ // If the PPL is supported, we start out in PPL mode.
+ MOV64 x0, APRR_EL1_PPL
+#else
+ // Otherwise, we start out in default mode.
+ MOV64 x0, APRR_EL1_DEFAULT
+#endif
+
+ // Set the APRR state for EL1.
+ MSR_APRR_EL1_X0
+
+ // Set the APRR state for EL0.
+ MOV64 x0, APRR_EL0_DEFAULT
+ MSR_APRR_EL0_X0
+
+
+ // Restore the LR.
+ mov lr, x1
+#endif /* __APRR_SUPPORTED__ */
// Get the kernel memory parameters from the boot args
ldr x22, [x20, BA_VIRT_BASE] // Get the kernel virt base
sub x0, x0, x23
// Set SP_EL1 to exception stack
-#if defined(KERNEL_INTEGRITY_KTRR)
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
bl EXT(pinst_spsel_1)
#else
msr SPSel, #1
* TTBR0 - V=P table @ top of kernel
* TTBR1 - KVA table @ top of kernel + 1 page
*/
-#if defined(KERNEL_INTEGRITY_KTRR)
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
/* Note that for KTRR configurations, the V=P map will be modified by
* arm_vm_init.c.
*/
#endif /* defined(APPLEMONSOON) */
+#if defined(APPLEVORTEX)
+ ARM64_IS_PCORE x15
+ // Skip if not P-core
+ cbz x15, Lskip_cyprus_pcore_only
+ mrs x12, ARM64_REG_HID1
+
+ mrs x13, MIDR_EL1
+ ubfx x14, x13, #MIDR_EL1_PNUM_SHIFT, #12
+ // Should be applied to all Aruba variants, but only Cyprus variants B0 and later
+ cmp x14, #0xb // Part number 11 => Cyprus, 16 => Aruba
+ bne Lbr_kill
+ ubfx x14, x13, #MIDR_EL1_VAR_SHIFT, #4
+ cbz x14, Lskip_br_kill // variant 0 => Cyprus AX, 1 => Cyprus BX
+
+Lbr_kill:
+
+ // rdar://problem/36716477: data corruption due to incorrect branch predictor resolution
+ orr x12, x12, ARM64_REG_HID1_enaBrKillLimit
+
+Lskip_br_kill:
+
+ // rdar://problem/34435356: segfaults due to IEX clock-gating
+ orr x12, x12, ARM64_REG_HID1_rccForceAllIexL3ClksOn
+ msr ARM64_REG_HID1, x12
+
+#if ARM64_BOARD_CONFIG_T8027
+ // rdar://problem/40695685: Enable BIF fill buffer stall logic to prevent skid buffer overflow (Aruba A1 only)
+ mrs x12, ARM64_REG_HID5
+ orr x12, x12, ARM64_REG_HID5_EnableDnFIFORdStall
+ msr ARM64_REG_HID5, x12
+
+#endif /* ARM64_BOARD_CONFIG_T8027 */
+
+ // Prevent ordered loads from being dispatched from LSU until all prior loads have completed.
+ // rdar://problem/34095873: AF2 ordering rules allow ARM device ordering violations
+ mrs x12, ARM64_REG_HID4
+ orr x12, x12, ARM64_REG_HID4_ForceNsOrdLdReqNoOlderLd
+ msr ARM64_REG_HID4, x12
+
+ // rdar://problem/38482968: [Cyprus Tunable] Poisoned cache line crossing younger load is not redirected by older load-barrier
+ mrs x12, ARM64_REG_HID3
+ orr x12, x12, ARM64_REG_HID3_DisColorOpt
+ msr ARM64_REG_HID3, x12
+
+ // rdar://problem/41056604: disable faster launches of uncacheable unaligned stores to workaround load/load ordering violation
+ mrs x12, ARM64_REG_HID11
+ orr x12, x12, ARM64_REG_HID11_DisX64NTLnchOpt
+ msr ARM64_REG_HID11, x12
+
+ b Lskip_cyprus_ecore_only
+
+Lskip_cyprus_pcore_only:
+
+ // Prevent ordered loads from being dispatched from LSU until all prior loads have completed.
+ // rdar://problem/34095873: AF2 ordering rules allow ARM device ordering violations
+ mrs x12, ARM64_REG_EHID4
+ orr x12, x12, ARM64_REG_HID4_ForceNsOrdLdReqNoOlderLd
+ msr ARM64_REG_EHID4, x12
+
+ // rdar://problem/36595004: Poisoned younger load is not redirected by older load-acquire
+ mrs x12, ARM64_REG_EHID3
+ orr x12, x12, ARM64_REG_EHID3_DisColorOpt
+ msr ARM64_REG_EHID3, x12
+
+ // rdar://problem/37949166: Disable the extension of prefetcher training pipe clock gating, revert to default gating
+ mrs x12, ARM64_REG_EHID10
+ orr x12, x12, ARM64_REG_EHID10_rccDisPwrSavePrfClkOff
+ msr ARM64_REG_EHID10, x12
+
+Lskip_cyprus_ecore_only:
+
+#endif /* defined (APPLEVORTEX) */
+
+#if defined(ARM64_BOARD_CONFIG_T8030)
+ // Cebu <B0 is deprecated and unsupported (see rdar://problem/42835678)
+ SKIP_IF_CPU_VERSION_LESS_THAN x12, LIGHTNING_CPU_VERSION_B0, .
+
+ ARM64_IS_PCORE x15
+
+ // Skip if not P-core
+ cbz x15, Lskip_cebu_pcore_only
+
+ // rdar://problem/50664291: [Cebu B0/B1 Tunables][PerfVerif][LSU] Post-silicon tuning of STNT widget contiguous counter threshold
+ mrs x12, ARM64_REG_HID4
+ and x12, x12, ~ARM64_REG_HID4_CnfCntrThresh_mask
+ orr x12, x12, 3 << ARM64_REG_HID4_CnfCntrThresh_shift
+ msr ARM64_REG_HID4, x12
+
+ mrs x12, ARM64_REG_HID9
+ // rdar://problem/47744434: Barrier Load Ordering property is not satisfied for x64-loads
+ orr x12, x12, ARM64_REG_HID9_EnableFixBug47221499
+ // rdar://problem/50664291: [Cebu B0/B1 Tunables][PerfVerif][LSU] Post-silicon tuning of STNT widget contiguous counter threshold
+ orr x12, x12, ARM64_REG_HID9_DisSTNTWidgetForUnalign
+ msr ARM64_REG_HID9, x12
+
+ // rdar://problem/47865629: RF bank and Multipass conflict forward progress widget does not handle 3+ cycle livelock
+ mrs x12, ARM64_REG_HID16
+ orr x12, x12, ARM64_REG_HID16_EnRs4Sec
+ and x12, x12, ~ARM64_REG_HID16_DisxPickRs45
+ orr x12, x12, ARM64_REG_HID16_EnMPxPick45
+ orr x12, x12, ARM64_REG_HID16_EnMPCyc7
+ msr ARM64_REG_HID16, x12
+
+ mrs x12, ARM64_REG_HID4
+ // Prevent ordered loads from being dispatched from LSU until all prior loads have completed.
+ // rdar://problem/34095873: AF2 ordering rules allow ARM device ordering violations
+ orr x12, x12, ARM64_REG_HID4_ForceNsOrdLdReqNoOlderLd
+ // rdar://problem/51690962: Disable Store-Non-Temporal downgrade widget
+ orr x12, x12, ARM64_REG_HID4_DisSTNTWidget
+ msr ARM64_REG_HID4, x12
+
+ // rdar://problem/41056604: disable faster launches of uncacheable unaligned stores to workaround load/load ordering violation
+ mrs x12, ARM64_REG_HID11
+ orr x12, x12, ARM64_REG_HID11_DisX64NTLnchOpt
+ msr ARM64_REG_HID11, x12
+
+ // rdar://problem/41029832: configure dummy cycles to work around incorrect temp sensor readings on NEX power gating
+ mrs x12, ARM64_REG_HID13
+ and x12, x12, ~ARM64_REG_HID13_PreCyc_mask
+ orr x12, x12, 4 << ARM64_REG_HID13_PreCyc_shift
+ msr ARM64_REG_HID13, x12
+
+ // rdar://problem/45024523: enable aggressive LEQ throttling to work around LEQ credit leak
+ mrs x12, ARM64_REG_HID16
+ orr x12, x12, ARM64_REG_HID16_leqThrottleAggr
+ msr ARM64_REG_HID16, x12
+
+ b Lskip_cebu_ecore_only
+
+Lskip_cebu_pcore_only:
+
+ // Prevent ordered loads from being dispatched from LSU until all prior loads have completed.
+ // rdar://problem/34095873: AF2 ordering rules allow ARM device ordering violations
+ mrs x12, ARM64_REG_EHID4
+ orr x12, x12, ARM64_REG_HID4_ForceNsOrdLdReqNoOlderLd
+ msr ARM64_REG_EHID4, x12
+
+ // rdar://problem/37949166: Disable the extension of prefetcher training pipe clock gating, revert to default gating
+ mrs x12, ARM64_REG_EHID10
+ orr x12, x12, ARM64_REG_EHID10_rccDisPwrSavePrfClkOff
+ msr ARM64_REG_EHID10, x12
+
+Lskip_cebu_ecore_only:
+#endif /* defined(ARM64_BOARD_CONFIG_T8030) */
+
+#if defined(APPLELIGHTNING)
+ // rdar://54225210 (Incorrect fusing of a direct branch with AMX/EAS instruction at cross-beat location)
+ ARM64_IS_PCORE x15
+ cbz x15, not_cebu_pcore
+
+ mrs x12, ARM64_REG_HID0
+ orr x12, x12, ARM64_REG_HID0_CacheFusionDisable
+ msr ARM64_REG_HID0, x12
+
+not_cebu_pcore:
+#endif /* defined(APPLELIGHTNING) */
+
+#if defined(APPLELIGHTNING)
+
+ // rdar://53907283 ([Cebu ACC Errata] Sibling Merge in LLC can cause UC load to violate ARM Memory Ordering Rules.)
+ mrs x12, ARM64_REG_HID5
+ orr x12, x12, ARM64_REG_HID5_DisFill2cMerge
+ msr ARM64_REG_HID5, x12
+
+ // Skip if not E-core or not a two-cluster CPU
+#if defined(CPU_CLUSTER_OFFSETS)
+ ARM64_IS_PCORE x15
+ cbnz x15, Lskip_h12_h13_ecore_only
+
+ // rdar://problem/48476033: Prevent store-to-load forwarding for UC memory to avoid barrier ordering violation
+ mrs x12, ARM64_REG_EHID10
+ orr x12, x12, ARM64_REG_EHID10_ForceWStDrainUc
+ msr ARM64_REG_EHID10, x12
+
+Lskip_h12_h13_ecore_only:
+#endif /* defined(CPU_CLUSTER_OFFSETS) */
+#endif /* defined(APPLELIGHTNING)*/
mov x19, lr
+#if defined(HAS_VMSA_LOCK)
+ bl EXT(vmsa_lock)
+#endif
// Convert CPU data PA to VA and set as first argument
mov x0, x21
bl EXT(phystokv)
osfmk/kern/processor_data.c standard
osfmk/kern/restartable.c standard
osfmk/kern/sched_average.c standard
+#ifdef __AMP__
+osfmk/kern/sched_amp.c optional config_sched_multiq
+osfmk/kern/sched_amp_common.c optional config_sched_multiq
+#endif
osfmk/kern/sched_dualq.c optional config_sched_multiq
osfmk/kern/sched_clutch.c optional config_clutch
osfmk/kern/sched_prim.c standard
#endif
}
+#if XNU_MONITOR
+ vm_offset_t cpu_base = (vm_offset_t)pmap_stacks_start;
+ vm_offset_t cpu_top = (vm_offset_t)pmap_stacks_end;
+
+ if (((prevfp >= cpu_base) && (prevfp < cpu_top)) !=
+ ((fp >= cpu_base) && (fp < cpu_top))) {
+ switched_stacks = TRUE;
+ break;
+ }
+#endif
}
if (!switched_stacks) {
#include <os/log.h>
uint32_t hz_tick_interval = 1;
+#if !HAS_CONTINUOUS_HWCLOCK
static uint64_t has_monotonic_clock = 0;
+#endif
decl_simple_lock_data(, clock_lock);
lck_grp_attr_t * settime_lock_grp_attr;
*nanosecs = ((uint64_t)NSEC_PER_SEC * (uint32_t)(_bt->frac >> 32)) >> 32;
}
+#if !defined(HAS_CONTINUOUS_HWCLOCK)
static __inline void
bintime2absolutetime(const struct bintime *_bt, uint64_t *abs)
{
extern int
kernel_sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen);
+#endif
/*
* Time of day (calendar) variables.
*
struct bintime offset; /* cumulative offset expressed in (sec, 64 bits frac of a second) */
struct bintime bintime; /* cumulative offset (it includes bootime) expressed in (sec, 64 bits frac of a second) */
struct bintime boottime; /* boot time expressed in (sec, 64 bits frac of a second) */
+#if !HAS_CONTINUOUS_HWCLOCK
struct bintime basesleep;
+#endif
} clock_calend;
static uint64_t ticks_per_sec; /* ticks in a second (expressed in abs time) */
func, clock_calend_cp->boottime.sec, clock_calend_cp->boottime.frac,
(unsigned long)bootime_secs, bootime_microsecs);
+#if !HAS_CONTINUOUS_HWCLOCK
clock_sec_t basesleep_secs;
clock_usec_t basesleep_microsecs;
os_log(OS_LOG_DEFAULT, "%s basesleep.sec %ld basesleep.frac %llu basesleep_secs %lu basesleep_microsecs %d\n",
func, clock_calend_cp->basesleep.sec, clock_calend_cp->basesleep.frac,
(unsigned long)basesleep_secs, basesleep_microsecs);
+#endif
}
clock_usec_t utc_offset_microsecs;
spl_t s;
struct bintime bt;
+#if !HAS_CONTINUOUS_HWCLOCK
struct bintime monotonic_bt;
struct latched_time monotonic_time;
uint64_t monotonic_usec_total;
clock_usec_t microsys2, monotonic_usec;
size_t size;
+#endif
//Get the UTC time and corresponding sys time
PEGetUTCTimeOfDay(&secs, µsecs);
clock_get_system_microtime(&sys, µsys);
+#if !HAS_CONTINUOUS_HWCLOCK
/*
* If the platform has a monotonic clock, use kern.monotonicclock_usecs
* to estimate the sleep/wake time, otherwise use the UTC time to estimate
absolutetime_to_microtime(monotonic_time.mach_time, &sys2, µsys2);
os_log(OS_LOG_DEFAULT, "%s system has monotonic clock\n", __func__);
}
+#endif
s = splclock();
clock_lock();
clock_calend.s_scale_ns = NSEC_PER_SEC;
clock_calend.s_adj_nsx = 0;
+#if !HAS_CONTINUOUS_HWCLOCK
if (has_monotonic_clock) {
monotonic_sec = monotonic_usec_total / (clock_sec_t)USEC_PER_SEC;
monotonic_usec = monotonic_usec_total % (clock_usec_t)USEC_PER_SEC;
// set the baseleep as the difference between monotonic clock - sys
clock_calend.basesleep = monotonic_bt;
}
+#endif
commpage_update_mach_continuous_time(mach_absolutetime_asleep);
#if DEVELOPMENT || DEBUG
#endif
}
+#if HAS_CONTINUOUS_HWCLOCK
+
+static void
+scale_sleep_time(void)
+{
+ /* Apply the current NTP frequency adjustment to the time slept.
+ * The frequency adjustment remains stable between calls to ntp_adjtime(),
+ * and should thus provide a reasonable approximation of the total adjustment
+ * required for the time slept. */
+ struct bintime sleep_time;
+ uint64_t tick_scale_x, s_scale_ns;
+ int64_t s_adj_nsx;
+ int64_t sleep_adj = ntp_get_freq();
+ if (sleep_adj) {
+ get_scale_factors_from_adj(sleep_adj, &tick_scale_x, &s_scale_ns, &s_adj_nsx);
+ sleep_time = scale_delta(mach_absolutetime_last_sleep, tick_scale_x, s_scale_ns, s_adj_nsx);
+ } else {
+ tick_scale_x = (uint64_t)1 << 63;
+ tick_scale_x /= ticks_per_sec;
+ tick_scale_x *= 2;
+ sleep_time.sec = mach_absolutetime_last_sleep / ticks_per_sec;
+ sleep_time.frac = (mach_absolutetime_last_sleep % ticks_per_sec) * tick_scale_x;
+ }
+ bintime_add(&clock_calend.offset, &sleep_time);
+ bintime_add(&clock_calend.bintime, &sleep_time);
+}
+
+void
+clock_wakeup_calendar(void)
+{
+ spl_t s;
+
+ s = splclock();
+ clock_lock();
+
+ commpage_disable_timestamp();
+
+ uint64_t abstime = mach_absolute_time();
+ uint64_t total_sleep_time = ml_get_hwclock() - abstime;
+
+ mach_absolutetime_last_sleep = total_sleep_time - mach_absolutetime_asleep;
+ mach_absolutetime_asleep = total_sleep_time;
+
+ scale_sleep_time();
+
+ KERNEL_DEBUG_CONSTANT(
+ MACHDBG_CODE(DBG_MACH_CLOCK, MACH_EPOCH_CHANGE) | DBG_FUNC_NONE,
+ (uintptr_t) mach_absolutetime_last_sleep,
+ (uintptr_t) mach_absolutetime_asleep,
+ (uintptr_t) (mach_absolutetime_last_sleep >> 32),
+ (uintptr_t) (mach_absolutetime_asleep >> 32),
+ 0);
+
+ commpage_update_mach_continuous_time(mach_absolutetime_asleep);
+ adjust_cont_time_thread_calls();
+
+ clock_unlock();
+ splx(s);
+
+ host_notify_calendar_change();
+
+#if CONFIG_DTRACE
+ clock_track_calend_nowait();
+#endif
+}
+
+#else /* HAS_CONTINUOUS_HWCLOCK */
void
clock_wakeup_calendar(void)
#endif
}
+#endif /* !HAS_CONTINUOUS_HWCLOCK */
/*
* clock_get_boottime_nanotime:
uint64_t
mach_continuous_time(void)
{
+#if HAS_CONTINUOUS_HWCLOCK
+ return ml_get_hwclock();
+#else
while (1) {
uint64_t read1 = mach_absolutetime_asleep;
uint64_t absolute = mach_absolute_time();
return absolute + read1;
}
}
+#endif
}
uint64_t
mach_continuous_approximate_time(void)
{
+#if HAS_CONTINUOUS_HWCLOCK
+ return ml_get_hwclock();
+#else
while (1) {
uint64_t read1 = mach_absolutetime_asleep;
uint64_t absolute = mach_approximate_time();
return absolute + read1;
}
}
+#endif
}
/*
}
kern_return_t
-host_set_atm_diagnostic_flag(host_priv_t host_priv, uint32_t diagnostic_flag)
+host_set_atm_diagnostic_flag(host_t host, uint32_t diagnostic_flag)
{
- if (host_priv == HOST_PRIV_NULL) {
+ if (host == HOST_NULL) {
return KERN_INVALID_ARGUMENT;
}
- assert(host_priv == &realhost);
+ if (!IOTaskHasEntitlement(current_task(), "com.apple.private.set-atm-diagnostic-flag")) {
+ return KERN_NO_ACCESS;
+ }
#if CONFIG_ATM
return atm_set_diagnostic_config(diagnostic_flag);
ipc_kobject_type_t type,
ipc_kobject_alloc_options_t options)
{
- ipc_port_t port = ipc_port_alloc_kernel();
+ ipc_port_init_flags_t flags;
+ ipc_space_t space;
+ ipc_port_t port;
+ if (options & IPC_KOBJECT_ALLOC_IN_TRANSIT) {
+ /* kobject port intended to be copied out to user-space */
+ flags = IPC_PORT_INIT_MESSAGE_QUEUE;
+ space = IS_NULL;
+ } else {
+ /* true kernel-bound kobject port */
+ flags = IPC_PORT_INIT_NONE;
+ space = ipc_space_kernel;
+ }
+ port = ipc_port_alloc_special(space, flags);
if (port == IP_NULL) {
panic("ipc_kobject_alloc_port(): failed to allocate port");
}
if (options & IPC_KOBJECT_ALLOC_MAKE_SEND) {
ipc_port_make_send_locked(port);
}
- if (options & IPC_KOBJECT_ALLOC_NSREQUEST) {
- ipc_port_make_sonce_locked(port);
- port->ip_nsrequest = port;
- }
- if (options & IPC_KOBJECT_ALLOC_NO_GRANT) {
- port->ip_no_grant = 1;
+
+ if (options & IPC_KOBJECT_ALLOC_IN_TRANSIT) {
+ /* reset the port like it has been copied in circularity checked */
+ if (options & IPC_KOBJECT_ALLOC_NSREQUEST) {
+ panic("ipc_kobject_alloc_port(): invalid option for user-space port");
+ }
+ port->ip_mscount = 0;
+ assert(port->ip_tempowner == 0);
+ assert(port->ip_receiver == IS_NULL);
+ port->ip_receiver = IS_NULL;
+ port->ip_receiver_name = MACH_PORT_NULL;
+ } else {
+ if (options & IPC_KOBJECT_ALLOC_NSREQUEST) {
+ ipc_port_make_sonce_locked(port);
+ port->ip_nsrequest = port;
+ }
}
if (options & IPC_KOBJECT_ALLOC_IMMOVABLE_SEND) {
port->ip_immovable_send = 1;
}
+ if (options & IPC_KOBJECT_ALLOC_NO_GRANT) {
+ port->ip_no_grant = 1;
+ }
return port;
}
IPC_KOBJECT_ALLOC_NO_GRANT = 0x00000004,
/* Make all the send rights immovable */
IPC_KOBJECT_ALLOC_IMMOVABLE_SEND = 0x00000008,
+ /* Make the port in-transit from the get-go */
+ IPC_KOBJECT_ALLOC_IN_TRANSIT = 0x00000010,
});
/* Allocates a kobject port, never fails */
stackshotbuf_size = get_stackshot_estsize(size_hint);
for (; stackshotbuf_size <= max_tracebuf_size; stackshotbuf_size <<= 1) {
- if (kmem_alloc(kernel_map, (vm_offset_t *)&stackshotbuf, stackshotbuf_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
+ if (kmem_alloc_flags(kernel_map, (vm_offset_t *)&stackshotbuf, stackshotbuf_size, VM_KERN_MEMORY_DIAG, KMA_ZERO) != KERN_SUCCESS) {
error = KERN_RESOURCE_SHORTAGE;
goto error_exit;
}
return MACH_PORT_NULL;
}
- result = mach_port_allocate_internal(myspace, MACH_PORT_RIGHT_RECEIVE,
- &mk_timer_qos, &name);
- if (result == KERN_SUCCESS) {
- result = ipc_port_translate_receive(myspace, name, &port);
- }
-
- if (result != KERN_SUCCESS) {
+ /* Pre-allocate a kmsg for the timer messages */
+ ipc_kmsg_t kmsg;
+ kmsg = ipc_kmsg_prealloc(mk_timer_qos.len + MAX_TRAILER_SIZE);
+ if (kmsg == IKM_NULL) {
zfree(mk_timer_zone, timer);
-
return MACH_PORT_NULL;
}
+ /* Allocate an in-transit kobject port with a send right */
+ ipc_kobject_alloc_options_t options;
+ options = (IPC_KOBJECT_ALLOC_IN_TRANSIT | IPC_KOBJECT_ALLOC_MAKE_SEND);
+ port = ipc_kobject_alloc_port((ipc_kobject_t)timer, IKOT_TIMER, options);
+ assert(port != IP_NULL);
+
+ /* Associate the kmsg */
+ ipc_kmsg_set_prealloc(kmsg, port);
+
+ /* Initialize the timer object and bind port to it */
simple_lock_init(&timer->lock, 0);
thread_call_setup(&timer->call_entry, mk_timer_expire, timer);
timer->is_armed = timer->is_dead = FALSE;
timer->active = 0;
-
timer->port = port;
- ipc_kobject_set_atomically(port, (ipc_kobject_t)timer, IKOT_TIMER);
- port->ip_srights++;
- ip_reference(port);
- ip_unlock(port);
+ /* Copyout the receive right for the timer port to user-space */
+ current_thread()->ith_knote = ITH_KNOTE_NULL;
+ result = ipc_object_copyout(myspace, ip_to_object(port),
+ MACH_MSG_TYPE_MOVE_RECEIVE,
+ NULL, NULL, &name);
+ if (result != KERN_SUCCESS) {
+ ipc_object_destroy(ip_to_object(port), MACH_MSG_TYPE_MOVE_RECEIVE);
+ /* should trigger mk_timer_port_destroy() call */
+ return MACH_PORT_NULL;
+ }
return name;
}
typedef enum {
PSET_SMP,
+#if __AMP__
+ PSET_AMP_E,
+ PSET_AMP_P,
+#endif
} pset_cluster_type_t;
typedef bitmap_t cpumap_t;
--- /dev/null
+/*
+ * Copyright (c) 2016 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <mach/mach_types.h>
+#include <mach/machine.h>
+
+#include <machine/machine_routines.h>
+#include <machine/sched_param.h>
+#include <machine/machine_cpu.h>
+
+#include <kern/kern_types.h>
+#include <kern/debug.h>
+#include <kern/machine.h>
+#include <kern/misc_protos.h>
+#include <kern/processor.h>
+#include <kern/queue.h>
+#include <kern/sched.h>
+#include <kern/sched_prim.h>
+#include <kern/task.h>
+#include <kern/thread.h>
+#include <kern/thread_group.h>
+#include <kern/sched_amp_common.h>
+
+#include <sys/kdebug.h>
+
+#if __AMP__
+
+static thread_t
+sched_amp_steal_thread(processor_set_t pset);
+
+static void
+sched_amp_thread_update_scan(sched_update_scan_context_t scan_context);
+
+static boolean_t
+sched_amp_processor_enqueue(processor_t processor, thread_t thread,
+ sched_options_t options);
+
+static boolean_t
+sched_amp_processor_queue_remove(processor_t processor, thread_t thread);
+
+static ast_t
+sched_amp_processor_csw_check(processor_t processor);
+
+static boolean_t
+sched_amp_processor_queue_has_priority(processor_t processor, int priority, boolean_t gte);
+
+static int
+sched_amp_runq_count(processor_t processor);
+
+static boolean_t
+sched_amp_processor_queue_empty(processor_t processor);
+
+static uint64_t
+sched_amp_runq_stats_count_sum(processor_t processor);
+
+static int
+sched_amp_processor_bound_count(processor_t processor);
+
+static void
+sched_amp_pset_init(processor_set_t pset);
+
+static void
+sched_amp_processor_init(processor_t processor);
+
+static thread_t
+sched_amp_choose_thread(processor_t processor, int priority, ast_t reason);
+
+static void
+sched_amp_processor_queue_shutdown(processor_t processor);
+
+static sched_mode_t
+sched_amp_initial_thread_sched_mode(task_t parent_task);
+
+static processor_t
+sched_amp_choose_processor(processor_set_t pset, processor_t processor, thread_t thread);
+
+static bool
+sched_amp_thread_avoid_processor(processor_t processor, thread_t thread);
+
+static bool
+sched_amp_thread_should_yield(processor_t processor, thread_t thread);
+
+static void
+sched_amp_thread_group_recommendation_change(struct thread_group *tg, cluster_type_t new_recommendation);
+
+const struct sched_dispatch_table sched_amp_dispatch = {
+ .sched_name = "amp",
+ .init = sched_amp_init,
+ .timebase_init = sched_timeshare_timebase_init,
+ .processor_init = sched_amp_processor_init,
+ .pset_init = sched_amp_pset_init,
+ .maintenance_continuation = sched_timeshare_maintenance_continue,
+ .choose_thread = sched_amp_choose_thread,
+ .steal_thread_enabled = sched_amp_steal_thread_enabled,
+ .steal_thread = sched_amp_steal_thread,
+ .compute_timeshare_priority = sched_compute_timeshare_priority,
+ .choose_processor = sched_amp_choose_processor,
+ .processor_enqueue = sched_amp_processor_enqueue,
+ .processor_queue_shutdown = sched_amp_processor_queue_shutdown,
+ .processor_queue_remove = sched_amp_processor_queue_remove,
+ .processor_queue_empty = sched_amp_processor_queue_empty,
+ .priority_is_urgent = priority_is_urgent,
+ .processor_csw_check = sched_amp_processor_csw_check,
+ .processor_queue_has_priority = sched_amp_processor_queue_has_priority,
+ .initial_quantum_size = sched_timeshare_initial_quantum_size,
+ .initial_thread_sched_mode = sched_amp_initial_thread_sched_mode,
+ .can_update_priority = can_update_priority,
+ .update_priority = update_priority,
+ .lightweight_update_priority = lightweight_update_priority,
+ .quantum_expire = sched_default_quantum_expire,
+ .processor_runq_count = sched_amp_runq_count,
+ .processor_runq_stats_count_sum = sched_amp_runq_stats_count_sum,
+ .processor_bound_count = sched_amp_processor_bound_count,
+ .thread_update_scan = sched_amp_thread_update_scan,
+ .multiple_psets_enabled = TRUE,
+ .sched_groups_enabled = FALSE,
+ .avoid_processor_enabled = TRUE,
+ .thread_avoid_processor = sched_amp_thread_avoid_processor,
+ .processor_balance = sched_amp_balance,
+
+ .rt_runq = sched_amp_rt_runq,
+ .rt_init = sched_amp_rt_init,
+ .rt_queue_shutdown = sched_amp_rt_queue_shutdown,
+ .rt_runq_scan = sched_amp_rt_runq_scan,
+ .rt_runq_count_sum = sched_amp_rt_runq_count_sum,
+
+ .qos_max_parallelism = sched_amp_qos_max_parallelism,
+ .check_spill = sched_amp_check_spill,
+ .ipi_policy = sched_amp_ipi_policy,
+ .thread_should_yield = sched_amp_thread_should_yield,
+ .run_count_incr = sched_run_incr,
+ .run_count_decr = sched_run_decr,
+ .update_thread_bucket = sched_update_thread_bucket,
+ .pset_made_schedulable = sched_pset_made_schedulable,
+ .thread_group_recommendation_change = sched_amp_thread_group_recommendation_change,
+};
+
+extern processor_set_t ecore_set;
+extern processor_set_t pcore_set;
+
+__attribute__((always_inline))
+static inline run_queue_t
+amp_main_runq(processor_t processor)
+{
+ return &processor->processor_set->pset_runq;
+}
+
+__attribute__((always_inline))
+static inline run_queue_t
+amp_bound_runq(processor_t processor)
+{
+ return &processor->runq;
+}
+
+__attribute__((always_inline))
+static inline run_queue_t
+amp_runq_for_thread(processor_t processor, thread_t thread)
+{
+ if (thread->bound_processor == PROCESSOR_NULL) {
+ return amp_main_runq(processor);
+ } else {
+ assert(thread->bound_processor == processor);
+ return amp_bound_runq(processor);
+ }
+}
+
+static sched_mode_t
+sched_amp_initial_thread_sched_mode(task_t parent_task)
+{
+ if (parent_task == kernel_task) {
+ return TH_MODE_FIXED;
+ } else {
+ return TH_MODE_TIMESHARE;
+ }
+}
+
+static void
+sched_amp_processor_init(processor_t processor)
+{
+ run_queue_init(&processor->runq);
+}
+
+static void
+sched_amp_pset_init(processor_set_t pset)
+{
+ run_queue_init(&pset->pset_runq);
+}
+
+static thread_t
+sched_amp_choose_thread(
+ processor_t processor,
+ int priority,
+ __unused ast_t reason)
+{
+ processor_set_t pset = processor->processor_set;
+ bool spill_pending = false;
+ int spill_pri = -1;
+
+ if (pset == ecore_set && bit_test(pset->pending_spill_cpu_mask, processor->cpu_id)) {
+ spill_pending = true;
+ spill_pri = pcore_set->pset_runq.highq;
+ }
+
+ run_queue_t main_runq = amp_main_runq(processor);
+ run_queue_t bound_runq = amp_bound_runq(processor);
+ run_queue_t chosen_runq;
+
+ if ((bound_runq->highq < priority) &&
+ (main_runq->highq < priority) &&
+ (spill_pri < priority)) {
+ return THREAD_NULL;
+ }
+
+ if ((spill_pri > bound_runq->highq) &&
+ (spill_pri > main_runq->highq)) {
+ /*
+ * There is a higher priority thread on the P-core runq,
+ * so returning THREAD_NULL here will cause thread_select()
+ * to call sched_amp_steal_thread() to try to get it.
+ */
+ return THREAD_NULL;
+ }
+
+ if (bound_runq->highq >= main_runq->highq) {
+ chosen_runq = bound_runq;
+ } else {
+ chosen_runq = main_runq;
+ }
+
+ return run_queue_dequeue(chosen_runq, SCHED_HEADQ);
+}
+
+static boolean_t
+sched_amp_processor_enqueue(
+ processor_t processor,
+ thread_t thread,
+ sched_options_t options)
+{
+ run_queue_t rq = amp_runq_for_thread(processor, thread);
+ boolean_t result;
+
+ result = run_queue_enqueue(rq, thread, options);
+ thread->runq = processor;
+
+ return result;
+}
+
+static boolean_t
+sched_amp_processor_queue_empty(processor_t processor)
+{
+ processor_set_t pset = processor->processor_set;
+ bool spill_pending = bit_test(pset->pending_spill_cpu_mask, processor->cpu_id);
+
+ return (amp_main_runq(processor)->count == 0) &&
+ (amp_bound_runq(processor)->count == 0) &&
+ !spill_pending;
+}
+
+static bool
+sched_amp_thread_should_yield(processor_t processor, thread_t thread)
+{
+ if (!sched_amp_processor_queue_empty(processor) || (rt_runq_count(processor->processor_set) > 0)) {
+ return true;
+ }
+
+ if ((processor->processor_set->pset_cluster_type == PSET_AMP_E) && (recommended_pset_type(thread) == PSET_AMP_P)) {
+ return pcore_set->pset_runq.count > 0;
+ }
+
+ return false;
+}
+
+static ast_t
+sched_amp_processor_csw_check(processor_t processor)
+{
+ boolean_t has_higher;
+ int pri;
+
+ run_queue_t main_runq = amp_main_runq(processor);
+ run_queue_t bound_runq = amp_bound_runq(processor);
+
+ assert(processor->active_thread != NULL);
+
+ processor_set_t pset = processor->processor_set;
+ bool spill_pending = false;
+ int spill_pri = -1;
+ int spill_urgency = 0;
+
+ if (pset == ecore_set && bit_test(pset->pending_spill_cpu_mask, processor->cpu_id)) {
+ spill_pending = true;
+ spill_pri = pcore_set->pset_runq.highq;
+ spill_urgency = pcore_set->pset_runq.urgency;
+ }
+
+ pri = MAX(main_runq->highq, bound_runq->highq);
+ if (spill_pending) {
+ pri = MAX(pri, spill_pri);
+ }
+
+ if (processor->first_timeslice) {
+ has_higher = (pri > processor->current_pri);
+ } else {
+ has_higher = (pri >= processor->current_pri);
+ }
+
+ if (has_higher) {
+ if (main_runq->urgency > 0) {
+ return AST_PREEMPT | AST_URGENT;
+ }
+
+ if (bound_runq->urgency > 0) {
+ return AST_PREEMPT | AST_URGENT;
+ }
+
+ if (spill_urgency > 0) {
+ return AST_PREEMPT | AST_URGENT;
+ }
+
+ return AST_PREEMPT;
+ }
+
+ return AST_NONE;
+}
+
+static boolean_t
+sched_amp_processor_queue_has_priority(processor_t processor,
+ int priority,
+ boolean_t gte)
+{
+ bool spill_pending = false;
+ int spill_pri = -1;
+ processor_set_t pset = processor->processor_set;
+
+ if (pset == ecore_set && bit_test(pset->pending_spill_cpu_mask, processor->cpu_id)) {
+ spill_pending = true;
+ spill_pri = pcore_set->pset_runq.highq;
+ }
+ run_queue_t main_runq = amp_main_runq(processor);
+ run_queue_t bound_runq = amp_bound_runq(processor);
+
+ int qpri = MAX(main_runq->highq, bound_runq->highq);
+ if (spill_pending) {
+ qpri = MAX(qpri, spill_pri);
+ }
+
+ if (gte) {
+ return qpri >= priority;
+ } else {
+ return qpri > priority;
+ }
+}
+
+static int
+sched_amp_runq_count(processor_t processor)
+{
+ return amp_main_runq(processor)->count + amp_bound_runq(processor)->count;
+}
+
+static uint64_t
+sched_amp_runq_stats_count_sum(processor_t processor)
+{
+ uint64_t bound_sum = amp_bound_runq(processor)->runq_stats.count_sum;
+
+ if (processor->cpu_id == processor->processor_set->cpu_set_low) {
+ return bound_sum + amp_main_runq(processor)->runq_stats.count_sum;
+ } else {
+ return bound_sum;
+ }
+}
+static int
+sched_amp_processor_bound_count(processor_t processor)
+{
+ return amp_bound_runq(processor)->count;
+}
+
+static void
+sched_amp_processor_queue_shutdown(processor_t processor)
+{
+ processor_set_t pset = processor->processor_set;
+ run_queue_t rq = amp_main_runq(processor);
+ thread_t thread;
+ queue_head_t tqueue;
+
+ /* We only need to migrate threads if this is the last active or last recommended processor in the pset */
+ if ((pset->online_processor_count > 0) && pset_is_recommended(pset)) {
+ pset_unlock(pset);
+ return;
+ }
+
+ queue_init(&tqueue);
+
+ while (rq->count > 0) {
+ thread = run_queue_dequeue(rq, SCHED_HEADQ);
+ enqueue_tail(&tqueue, &thread->runq_links);
+ }
+
+ pset_unlock(pset);
+
+ qe_foreach_element_safe(thread, &tqueue, runq_links) {
+ remqueue(&thread->runq_links);
+
+ thread_lock(thread);
+
+ thread_setrun(thread, SCHED_TAILQ);
+
+ thread_unlock(thread);
+ }
+}
+
+static boolean_t
+sched_amp_processor_queue_remove(
+ processor_t processor,
+ thread_t thread)
+{
+ run_queue_t rq;
+ processor_set_t pset = processor->processor_set;
+
+ pset_lock(pset);
+
+ rq = amp_runq_for_thread(processor, thread);
+
+ if (processor == thread->runq) {
+ /*
+ * Thread is on a run queue and we have a lock on
+ * that run queue.
+ */
+ run_queue_remove(rq, thread);
+ } else {
+ /*
+ * The thread left the run queue before we could
+ * lock the run queue.
+ */
+ assert(thread->runq == PROCESSOR_NULL);
+ processor = PROCESSOR_NULL;
+ }
+
+ pset_unlock(pset);
+
+ return processor != PROCESSOR_NULL;
+}
+
+/*
+ * sched_amp_steal_thread()
+ *
+ */
+thread_t
+sched_amp_steal_thread(processor_set_t pset)
+{
+ thread_t thread = THREAD_NULL;
+ processor_set_t nset = pset;
+
+ assert(pset->pset_cluster_type != PSET_AMP_P);
+
+ processor_t processor = current_processor();
+ assert(pset == processor->processor_set);
+
+ bool spill_pending = bit_test(pset->pending_spill_cpu_mask, processor->cpu_id);
+ bit_clear(pset->pending_spill_cpu_mask, processor->cpu_id);
+
+ nset = pcore_set;
+
+ assert(nset != pset);
+
+ if (sched_get_pset_load_average(nset) >= sched_amp_steal_threshold(nset, spill_pending)) {
+ pset_unlock(pset);
+
+ pset = nset;
+
+ pset_lock(pset);
+
+ /* Allow steal if load average still OK, no idle cores, and more threads on runq than active cores DISPATCHING */
+ if ((sched_get_pset_load_average(pset) >= sched_amp_steal_threshold(pset, spill_pending)) &&
+ (pset->pset_runq.count > bit_count(pset->cpu_state_map[PROCESSOR_DISPATCHING])) &&
+ (bit_count(pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_IDLE]) == 0)) {
+ thread = run_queue_dequeue(&pset->pset_runq, SCHED_HEADQ);
+ KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_STEAL) | DBG_FUNC_NONE, spill_pending, 0, 0, 0);
+ sched_update_pset_load_average(pset);
+ }
+ }
+
+ pset_unlock(pset);
+ return thread;
+}
+
+
+
+static void
+sched_amp_thread_update_scan(sched_update_scan_context_t scan_context)
+{
+ boolean_t restart_needed = FALSE;
+ processor_t processor = processor_list;
+ processor_set_t pset;
+ thread_t thread;
+ spl_t s;
+
+ /*
+ * We update the threads associated with each processor (bound and idle threads)
+ * and then update the threads in each pset runqueue.
+ */
+
+ do {
+ do {
+ pset = processor->processor_set;
+
+ s = splsched();
+ pset_lock(pset);
+
+ restart_needed = runq_scan(amp_bound_runq(processor), scan_context);
+
+ pset_unlock(pset);
+ splx(s);
+
+ if (restart_needed) {
+ break;
+ }
+
+ thread = processor->idle_thread;
+ if (thread != THREAD_NULL && thread->sched_stamp != sched_tick) {
+ if (thread_update_add_thread(thread) == FALSE) {
+ restart_needed = TRUE;
+ break;
+ }
+ }
+ } while ((processor = processor->processor_list) != NULL);
+
+ /* Ok, we now have a collection of candidates -- fix them. */
+ thread_update_process_threads();
+ } while (restart_needed);
+
+ pset_node_t node = &pset_node0;
+ pset = node->psets;
+
+ do {
+ do {
+ restart_needed = FALSE;
+ while (pset != NULL) {
+ s = splsched();
+ pset_lock(pset);
+
+ restart_needed = runq_scan(&pset->pset_runq, scan_context);
+
+ pset_unlock(pset);
+ splx(s);
+
+ if (restart_needed) {
+ break;
+ }
+
+ pset = pset->pset_list;
+ }
+
+ if (restart_needed) {
+ break;
+ }
+ } while (((node = node->node_list) != NULL) && ((pset = node->psets) != NULL));
+
+ /* Ok, we now have a collection of candidates -- fix them. */
+ thread_update_process_threads();
+ } while (restart_needed);
+}
+
+static bool
+pcores_recommended(thread_t thread)
+{
+ if (pcore_set->online_processor_count == 0) {
+ /* No pcores available */
+ return false;
+ }
+
+ if (!pset_is_recommended(ecore_set)) {
+ /* No E cores recommended, must use P cores */
+ return true;
+ }
+
+ if (recommended_pset_type(thread) == PSET_AMP_E) {
+ return false;
+ }
+
+ return pset_is_recommended(pcore_set);
+}
+
+/* Return true if this thread should not continue running on this processor */
+static bool
+sched_amp_thread_avoid_processor(processor_t processor, thread_t thread)
+{
+ if (processor->processor_set->pset_cluster_type == PSET_AMP_E) {
+ if (pcores_recommended(thread)) {
+ return true;
+ }
+ } else if (processor->processor_set->pset_cluster_type == PSET_AMP_P) {
+ if (!pcores_recommended(thread)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static processor_t
+sched_amp_choose_processor(processor_set_t pset, processor_t processor, thread_t thread)
+{
+ /* Bound threads don't call this function */
+ assert(thread->bound_processor == PROCESSOR_NULL);
+
+ processor_set_t nset = pset;
+ bool choose_pcores;
+
+again:
+ choose_pcores = pcores_recommended(thread);
+
+ if (choose_pcores && (pset->pset_cluster_type != PSET_AMP_P)) {
+ nset = pcore_set;
+ assert(nset != NULL);
+ } else if (!choose_pcores && (pset->pset_cluster_type != PSET_AMP_E)) {
+ nset = ecore_set;
+ assert(nset != NULL);
+ }
+
+ if (nset != pset) {
+ pset_unlock(pset);
+ pset_lock(nset);
+ }
+
+ /* Now that the chosen pset is definitely locked, make sure nothing important has changed */
+ if (!pset_is_recommended(nset)) {
+ pset = nset;
+ goto again;
+ }
+
+ return choose_processor(nset, processor, thread);
+}
+
+void
+sched_amp_thread_group_recommendation_change(struct thread_group *tg, cluster_type_t new_recommendation)
+{
+ thread_group_update_recommendation(tg, new_recommendation);
+
+ if (new_recommendation != CLUSTER_TYPE_P) {
+ return;
+ }
+
+ sched_amp_bounce_thread_group_from_ecores(ecore_set, tg);
+}
+
+#if DEVELOPMENT || DEBUG
+extern int32_t sysctl_get_bound_cpuid(void);
+int32_t
+sysctl_get_bound_cpuid(void)
+{
+ int32_t cpuid = -1;
+ thread_t self = current_thread();
+
+ processor_t processor = self->bound_processor;
+ if (processor == NULL) {
+ cpuid = -1;
+ } else {
+ cpuid = processor->cpu_id;
+ }
+
+ return cpuid;
+}
+
+extern void sysctl_thread_bind_cpuid(int32_t cpuid);
+void
+sysctl_thread_bind_cpuid(int32_t cpuid)
+{
+ if (cpuid < 0 || cpuid >= MAX_SCHED_CPUS) {
+ return;
+ }
+
+ processor_t processor = processor_array[cpuid];
+ if (processor == PROCESSOR_NULL) {
+ return;
+ }
+
+ thread_bind(processor);
+
+ thread_block(THREAD_CONTINUE_NULL);
+}
+
+extern char sysctl_get_bound_cluster_type(void);
+char
+sysctl_get_bound_cluster_type(void)
+{
+ thread_t self = current_thread();
+
+ if (self->sched_flags & TH_SFLAG_ECORE_ONLY) {
+ return 'E';
+ } else if (self->sched_flags & TH_SFLAG_PCORE_ONLY) {
+ return 'P';
+ }
+
+ return '0';
+}
+
+extern void sysctl_thread_bind_cluster_type(char cluster_type);
+void
+sysctl_thread_bind_cluster_type(char cluster_type)
+{
+ thread_bind_cluster_type(cluster_type);
+}
+
+extern char sysctl_get_task_cluster_type(void);
+char
+sysctl_get_task_cluster_type(void)
+{
+ thread_t thread = current_thread();
+ task_t task = thread->task;
+
+ if (task->pset_hint == ecore_set) {
+ return 'E';
+ } else if (task->pset_hint == pcore_set) {
+ return 'P';
+ }
+
+ return '0';
+}
+
+extern void sysctl_task_set_cluster_type(char cluster_type);
+void
+sysctl_task_set_cluster_type(char cluster_type)
+{
+ thread_t thread = current_thread();
+ task_t task = thread->task;
+
+ switch (cluster_type) {
+ case 'e':
+ case 'E':
+ task->pset_hint = ecore_set;
+ break;
+ case 'p':
+ case 'P':
+ task->pset_hint = pcore_set;
+ break;
+ default:
+ break;
+ }
+
+ thread_block(THREAD_CONTINUE_NULL);
+}
+#endif
+
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2019 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <mach/mach_types.h>
+#include <mach/machine.h>
+#include <machine/machine_routines.h>
+#include <machine/sched_param.h>
+#include <machine/machine_cpu.h>
+#include <kern/kern_types.h>
+#include <kern/debug.h>
+#include <kern/machine.h>
+#include <kern/misc_protos.h>
+#include <kern/processor.h>
+#include <kern/queue.h>
+#include <kern/sched.h>
+#include <kern/sched_prim.h>
+#include <kern/task.h>
+#include <kern/thread.h>
+#include <machine/atomic.h>
+#include <sys/kdebug.h>
+#include <kern/sched_amp_common.h>
+
+#if __AMP__
+
+/* Exported globals */
+processor_set_t ecore_set = NULL;
+processor_set_t pcore_set = NULL;
+
+static struct processor_set pset1;
+static struct pset_node pset_node1;
+
+#if DEVELOPMENT || DEBUG
+bool system_ecore_only = false;
+#endif /* DEVELOPMENT || DEBUG */
+
+/*
+ * sched_amp_init()
+ *
+ * Initialize the pcore_set and ecore_set globals which describe the
+ * P/E processor sets.
+ */
+void
+sched_amp_init(void)
+{
+ pset_init(&pset1, &pset_node1);
+ pset_node1.psets = &pset1;
+ pset_node0.node_list = &pset_node1;
+
+ if (ml_get_boot_cluster() == CLUSTER_TYPE_P) {
+ pcore_set = &pset0;
+ ecore_set = &pset1;
+ } else {
+ ecore_set = &pset0;
+ pcore_set = &pset1;
+ }
+
+ ecore_set->pset_cluster_type = PSET_AMP_E;
+ ecore_set->pset_cluster_id = 0;
+
+ pcore_set->pset_cluster_type = PSET_AMP_P;
+ pcore_set->pset_cluster_id = 1;
+
+#if !CONFIG_SCHED_CLUTCH
+ /*
+ * For non-clutch scheduler, allow system to be e-core only.
+ * Clutch scheduler support for this feature needs to be implemented.
+ */
+#if DEVELOPMENT || DEBUG
+ if (PE_parse_boot_argn("enable_skstsct", NULL, 0)) {
+ system_ecore_only = true;
+ }
+#endif /* DEVELOPMENT || DEBUG */
+
+#endif /* !CONFIG_SCHED_CLUTCH */
+ sched_timeshare_init();
+}
+
+/* Spill threshold load average is ncpus in pset + (sched_amp_spill_count/(1 << PSET_LOAD_FRACTIONAL_SHIFT) */
+int sched_amp_spill_count = 3;
+int sched_amp_idle_steal = 1;
+int sched_amp_spill_steal = 1;
+
+/*
+ * We see performance gains from doing immediate IPIs to P-cores to run
+ * P-eligible threads and lesser P-E migrations from using deferred IPIs
+ * for spill.
+ */
+int sched_amp_spill_deferred_ipi = 1;
+int sched_amp_pcores_preempt_immediate_ipi = 1;
+
+
+/*
+ * sched_amp_spill_threshold()
+ *
+ * Routine to calulate spill threshold which decides if cluster should spill.
+ */
+int
+sched_amp_spill_threshold(processor_set_t pset)
+{
+ int recommended_processor_count = bit_count(pset->recommended_bitmask & pset->cpu_bitmask);
+
+ return (recommended_processor_count << PSET_LOAD_FRACTIONAL_SHIFT) + sched_amp_spill_count;
+}
+
+/*
+ * pset_signal_spill()
+ *
+ * Routine to signal a running/idle CPU to cause a spill onto that CPU.
+ * Called with pset locked, returns unlocked
+ */
+void
+pset_signal_spill(processor_set_t pset, int spilled_thread_priority)
+{
+ processor_t processor;
+ sched_ipi_type_t ipi_type = SCHED_IPI_NONE;
+
+ uint64_t idle_map = pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_IDLE];
+ for (int cpuid = lsb_first(idle_map); cpuid >= 0; cpuid = lsb_next(idle_map, cpuid)) {
+ processor = processor_array[cpuid];
+ if (bit_set_if_clear(pset->pending_spill_cpu_mask, processor->cpu_id)) {
+ KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_SIGNAL_SPILL) | DBG_FUNC_NONE, processor->cpu_id, 0, 0, 0);
+
+ processor->deadline = UINT64_MAX;
+ pset_update_processor_state(pset, processor, PROCESSOR_DISPATCHING);
+
+ if (processor == current_processor()) {
+ bit_set(pset->pending_AST_URGENT_cpu_mask, processor->cpu_id);
+ } else {
+ ipi_type = sched_ipi_action(processor, NULL, true, SCHED_IPI_EVENT_SPILL);
+ }
+ pset_unlock(pset);
+ sched_ipi_perform(processor, ipi_type);
+ return;
+ }
+ }
+
+ processor_t ast_processor = NULL;
+ uint64_t running_map = pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_RUNNING];
+ for (int cpuid = lsb_first(running_map); cpuid >= 0; cpuid = lsb_next(running_map, cpuid)) {
+ processor = processor_array[cpuid];
+ if (processor->current_recommended_pset_type == PSET_AMP_P) {
+ /* Already running a spilled P-core recommended thread */
+ continue;
+ }
+ if (bit_test(pset->pending_spill_cpu_mask, processor->cpu_id)) {
+ /* Already received a spill signal */
+ continue;
+ }
+ if (processor->current_pri >= spilled_thread_priority) {
+ /* Already running a higher or equal priority thread */
+ continue;
+ }
+
+ /* Found a suitable processor */
+ bit_set(pset->pending_spill_cpu_mask, processor->cpu_id);
+ KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_SIGNAL_SPILL) | DBG_FUNC_NONE, processor->cpu_id, 1, 0, 0);
+ if (processor == current_processor()) {
+ ast_on(AST_PREEMPT);
+ }
+ ipi_type = sched_ipi_action(processor, NULL, false, SCHED_IPI_EVENT_SPILL);
+ if (ipi_type != SCHED_IPI_NONE) {
+ ast_processor = processor;
+ }
+ break;
+ }
+
+ pset_unlock(pset);
+ sched_ipi_perform(ast_processor, ipi_type);
+}
+
+/*
+ * pset_should_accept_spilled_thread()
+ *
+ * Routine to decide if pset should accept spilled threads.
+ * This function must be safe to call (to use as a hint) without holding the pset lock.
+ */
+bool
+pset_should_accept_spilled_thread(processor_set_t pset, int spilled_thread_priority)
+{
+ if ((pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_IDLE]) != 0) {
+ return true;
+ }
+
+ uint64_t cpu_map = (pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_RUNNING]);
+
+ for (int cpuid = lsb_first(cpu_map); cpuid >= 0; cpuid = lsb_next(cpu_map, cpuid)) {
+ processor_t processor = processor_array[cpuid];
+
+ if (processor->current_recommended_pset_type == PSET_AMP_P) {
+ /* This processor is already running a spilled thread */
+ continue;
+ }
+
+ if (processor->current_pri < spilled_thread_priority) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*
+ * should_spill_to_ecores()
+ *
+ * Spill policy is implemented here
+ */
+bool
+should_spill_to_ecores(processor_set_t nset, thread_t thread)
+{
+ if (nset->pset_cluster_type == PSET_AMP_E) {
+ /* Not relevant if ecores already preferred */
+ return false;
+ }
+
+ if (!pset_is_recommended(ecore_set)) {
+ /* E cores must be recommended */
+ return false;
+ }
+
+#if !CONFIG_SCHED_CLUTCH
+ /* Per-thread P-core scheduling support needs to be implemented for clutch scheduler */
+ if (thread->sched_flags & TH_SFLAG_PCORE_ONLY) {
+ return false;
+ }
+#endif /* !CONFIG_SCHED_CLUTCH */
+
+ if (thread->sched_pri >= BASEPRI_RTQUEUES) {
+ /* Never spill realtime threads */
+ return false;
+ }
+
+ if ((nset->recommended_bitmask & nset->cpu_state_map[PROCESSOR_IDLE]) != 0) {
+ /* Don't spill if idle cores */
+ return false;
+ }
+
+ if ((sched_get_pset_load_average(nset) >= sched_amp_spill_threshold(nset)) && /* There is already a load on P cores */
+ pset_should_accept_spilled_thread(ecore_set, thread->sched_pri)) { /* There are lower priority E cores */
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * sched_amp_check_spill()
+ *
+ * Routine to check if the thread should be spilled and signal the pset if needed.
+ */
+void
+sched_amp_check_spill(processor_set_t pset, thread_t thread)
+{
+ /* pset is unlocked */
+
+ /* Bound threads don't call this function */
+ assert(thread->bound_processor == PROCESSOR_NULL);
+
+ if (should_spill_to_ecores(pset, thread)) {
+ pset_lock(ecore_set);
+
+ pset_signal_spill(ecore_set, thread->sched_pri);
+ /* returns with ecore_set unlocked */
+ }
+}
+
+/*
+ * sched_amp_steal_threshold()
+ *
+ * Routine to calculate the steal threshold
+ */
+int
+sched_amp_steal_threshold(processor_set_t pset, bool spill_pending)
+{
+ int recommended_processor_count = bit_count(pset->recommended_bitmask & pset->cpu_bitmask);
+
+ return (recommended_processor_count << PSET_LOAD_FRACTIONAL_SHIFT) + (spill_pending ? sched_amp_spill_steal : sched_amp_idle_steal);
+}
+
+/*
+ * sched_amp_steal_thread_enabled()
+ *
+ */
+bool
+sched_amp_steal_thread_enabled(processor_set_t pset)
+{
+ return (pset->pset_cluster_type == PSET_AMP_E) && (pcore_set->online_processor_count > 0);
+}
+
+/*
+ * sched_amp_balance()
+ *
+ * Invoked with pset locked, returns with pset unlocked
+ */
+void
+sched_amp_balance(processor_t cprocessor, processor_set_t cpset)
+{
+ assert(cprocessor == current_processor());
+
+ pset_unlock(cpset);
+
+ if (cpset->pset_cluster_type == PSET_AMP_E || !cprocessor->is_recommended) {
+ return;
+ }
+
+ /*
+ * cprocessor is an idle, recommended P core processor.
+ * Look for P-eligible threads that have spilled to an E core
+ * and coax them to come back.
+ */
+
+ processor_set_t pset = ecore_set;
+
+ pset_lock(pset);
+
+ processor_t eprocessor;
+ uint64_t ast_processor_map = 0;
+
+ sched_ipi_type_t ipi_type[MAX_CPUS] = {SCHED_IPI_NONE};
+ uint64_t running_map = pset->cpu_state_map[PROCESSOR_RUNNING];
+ for (int cpuid = lsb_first(running_map); cpuid >= 0; cpuid = lsb_next(running_map, cpuid)) {
+ eprocessor = processor_array[cpuid];
+ if ((eprocessor->current_pri < BASEPRI_RTQUEUES) &&
+ (eprocessor->current_recommended_pset_type == PSET_AMP_P)) {
+ ipi_type[eprocessor->cpu_id] = sched_ipi_action(eprocessor, NULL, false, SCHED_IPI_EVENT_REBALANCE);
+ if (ipi_type[eprocessor->cpu_id] != SCHED_IPI_NONE) {
+ bit_set(ast_processor_map, eprocessor->cpu_id);
+ assert(eprocessor != cprocessor);
+ }
+ }
+ }
+
+ pset_unlock(pset);
+
+ for (int cpuid = lsb_first(ast_processor_map); cpuid >= 0; cpuid = lsb_next(ast_processor_map, cpuid)) {
+ processor_t ast_processor = processor_array[cpuid];
+ sched_ipi_perform(ast_processor, ipi_type[cpuid]);
+ }
+}
+
+/*
+ * Helper function for sched_amp_thread_group_recommendation_change()
+ * Find all the cores in the pset running threads from the thread_group tg
+ * and send them a rebalance interrupt.
+ */
+void
+sched_amp_bounce_thread_group_from_ecores(processor_set_t pset, struct thread_group *tg)
+{
+ assert(pset->pset_cluster_type == PSET_AMP_E);
+ uint64_t ast_processor_map = 0;
+ sched_ipi_type_t ipi_type[MAX_CPUS] = {SCHED_IPI_NONE};
+
+ spl_t s = splsched();
+ pset_lock(pset);
+
+ uint64_t running_map = pset->cpu_state_map[PROCESSOR_RUNNING];
+ for (int cpuid = lsb_first(running_map); cpuid >= 0; cpuid = lsb_next(running_map, cpuid)) {
+ processor_t eprocessor = processor_array[cpuid];
+ if (eprocessor->current_thread_group == tg) {
+ ipi_type[eprocessor->cpu_id] = sched_ipi_action(eprocessor, NULL, false, SCHED_IPI_EVENT_REBALANCE);
+ if (ipi_type[eprocessor->cpu_id] != SCHED_IPI_NONE) {
+ bit_set(ast_processor_map, eprocessor->cpu_id);
+ } else if (eprocessor == current_processor()) {
+ ast_on(AST_PREEMPT);
+ bit_set(pset->pending_AST_PREEMPT_cpu_mask, eprocessor->cpu_id);
+ }
+ }
+ }
+
+ KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_RECOMMENDATION_CHANGE) | DBG_FUNC_NONE, tg, ast_processor_map, 0, 0);
+
+ pset_unlock(pset);
+
+ for (int cpuid = lsb_first(ast_processor_map); cpuid >= 0; cpuid = lsb_next(ast_processor_map, cpuid)) {
+ processor_t ast_processor = processor_array[cpuid];
+ sched_ipi_perform(ast_processor, ipi_type[cpuid]);
+ }
+
+ splx(s);
+}
+
+/*
+ * sched_amp_ipi_policy()
+ */
+sched_ipi_type_t
+sched_amp_ipi_policy(processor_t dst, thread_t thread, boolean_t dst_idle, sched_ipi_event_t event)
+{
+ processor_set_t pset = dst->processor_set;
+ assert(bit_test(pset->pending_AST_URGENT_cpu_mask, dst->cpu_id) == false);
+ assert(dst != current_processor());
+
+ boolean_t deferred_ipi_supported = false;
+#if defined(CONFIG_SCHED_DEFERRED_AST)
+ deferred_ipi_supported = true;
+#endif /* CONFIG_SCHED_DEFERRED_AST */
+
+ switch (event) {
+ case SCHED_IPI_EVENT_SPILL:
+ /* For Spill event, use deferred IPIs if sched_amp_spill_deferred_ipi set */
+ if (deferred_ipi_supported && sched_amp_spill_deferred_ipi) {
+ return sched_ipi_deferred_policy(pset, dst, event);
+ }
+ break;
+ case SCHED_IPI_EVENT_PREEMPT:
+ /* For preemption, the default policy is to use deferred IPIs
+ * for Non-RT P-core preemption. Override that behavior if
+ * sched_amp_pcores_preempt_immediate_ipi is set
+ */
+ if (thread && thread->sched_pri < BASEPRI_RTQUEUES) {
+ if (sched_amp_pcores_preempt_immediate_ipi && (pset == pcore_set)) {
+ return dst_idle ? SCHED_IPI_IDLE : SCHED_IPI_IMMEDIATE;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ /* Default back to the global policy for all other scenarios */
+ return sched_ipi_policy(dst, thread, dst_idle, event);
+}
+
+/*
+ * sched_amp_qos_max_parallelism()
+ */
+uint32_t
+sched_amp_qos_max_parallelism(int qos, uint64_t options)
+{
+ uint32_t ecount = ecore_set->cpu_set_count;
+ uint32_t pcount = pcore_set->cpu_set_count;
+
+ if (options & QOS_PARALLELISM_REALTIME) {
+ /* For realtime threads on AMP, we would want them
+ * to limit the width to just the P-cores since we
+ * do not spill/rebalance for RT threads.
+ */
+ return pcount;
+ }
+
+ /*
+ * The current AMP scheduler policy is not run
+ * background and utility threads on the P-Cores.
+ */
+ switch (qos) {
+ case THREAD_QOS_UTILITY:
+ case THREAD_QOS_BACKGROUND:
+ case THREAD_QOS_MAINTENANCE:
+ return ecount;
+ default:
+ return ecount + pcount;
+ }
+}
+
+/*
+ * sched_amp_rt_runq()
+ */
+rt_queue_t
+sched_amp_rt_runq(processor_set_t pset)
+{
+ return &pset->rt_runq;
+}
+
+/*
+ * sched_amp_rt_init()
+ */
+void
+sched_amp_rt_init(processor_set_t pset)
+{
+ pset_rt_init(pset);
+}
+
+/*
+ * sched_amp_rt_queue_shutdown()
+ */
+void
+sched_amp_rt_queue_shutdown(processor_t processor)
+{
+ processor_set_t pset = processor->processor_set;
+ thread_t thread;
+ queue_head_t tqueue;
+
+ pset_lock(pset);
+
+ /* We only need to migrate threads if this is the last active or last recommended processor in the pset */
+ if ((pset->online_processor_count > 0) && pset_is_recommended(pset)) {
+ pset_unlock(pset);
+ return;
+ }
+
+ queue_init(&tqueue);
+
+ rt_lock_lock(pset);
+
+ while (rt_runq_count(pset) > 0) {
+ thread = qe_dequeue_head(&pset->rt_runq.queue, struct thread, runq_links);
+ thread->runq = PROCESSOR_NULL;
+ SCHED_STATS_RUNQ_CHANGE(&pset->rt_runq.runq_stats, pset->rt_runq.count);
+ rt_runq_count_decr(pset);
+ enqueue_tail(&tqueue, &thread->runq_links);
+ }
+ rt_lock_unlock(pset);
+ sched_update_pset_load_average(pset);
+ pset_unlock(pset);
+
+ qe_foreach_element_safe(thread, &tqueue, runq_links) {
+ remqueue(&thread->runq_links);
+
+ thread_lock(thread);
+
+ thread_setrun(thread, SCHED_TAILQ);
+
+ thread_unlock(thread);
+ }
+}
+
+/*
+ * sched_amp_rt_runq_scan()
+ *
+ * Assumes RT lock is not held, and acquires splsched/rt_lock itself
+ */
+void
+sched_amp_rt_runq_scan(sched_update_scan_context_t scan_context)
+{
+ thread_t thread;
+
+ pset_node_t node = &pset_node0;
+ processor_set_t pset = node->psets;
+
+ spl_t s = splsched();
+ do {
+ while (pset != NULL) {
+ rt_lock_lock(pset);
+
+ qe_foreach_element_safe(thread, &pset->rt_runq.queue, runq_links) {
+ if (thread->last_made_runnable_time < scan_context->earliest_rt_make_runnable_time) {
+ scan_context->earliest_rt_make_runnable_time = thread->last_made_runnable_time;
+ }
+ }
+
+ rt_lock_unlock(pset);
+
+ pset = pset->pset_list;
+ }
+ } while (((node = node->node_list) != NULL) && ((pset = node->psets) != NULL));
+ splx(s);
+}
+
+/*
+ * sched_amp_rt_runq_count_sum()
+ */
+int64_t
+sched_amp_rt_runq_count_sum(void)
+{
+ pset_node_t node = &pset_node0;
+ processor_set_t pset = node->psets;
+ int64_t count = 0;
+
+ do {
+ while (pset != NULL) {
+ count += pset->rt_runq.runq_stats.count_sum;
+
+ pset = pset->pset_list;
+ }
+ } while (((node = node->node_list) != NULL) && ((pset = node->psets) != NULL));
+
+ return count;
+}
+
+#endif /* __AMP__ */
--- /dev/null
+/*
+ * Copyright (c) 2019 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _KERN_SCHED_AMP_COMMON_H_
+#define _KERN_SCHED_AMP_COMMON_H_
+
+#if __AMP__
+
+/* Routine to initialize processor sets on AMP platforms */
+void sched_amp_init(void);
+
+/*
+ * The AMP scheduler uses spill/steal/rebalance logic to make sure the most appropriate threads
+ * are scheduled on the P/E clusters. Here are the definitions of those terms:
+ *
+ * - Spill: Spill threads from an overcommited P-cluster onto the E-cluster. This is needed to make sure
+ * that high priority P-recommended threads experience low scheduling latency in the presence of
+ * lots of P-recommended threads.
+ *
+ * - Steal: From an E-core, steal a thread from the P-cluster to provide low scheduling latency for
+ * P-recommended threads.
+ *
+ * - Rebalance: Once a P-core goes idle, check if the E-cores are running any P-recommended threads and
+ * bring it back to run on its recommended cluster type.
+ */
+
+/* Spill logic */
+int sched_amp_spill_threshold(processor_set_t pset);
+void pset_signal_spill(processor_set_t pset, int spilled_thread_priority);
+bool pset_should_accept_spilled_thread(processor_set_t pset, int spilled_thread_priority);
+bool should_spill_to_ecores(processor_set_t nset, thread_t thread);
+void sched_amp_check_spill(processor_set_t pset, thread_t thread);
+
+/* Steal logic */
+int sched_amp_steal_threshold(processor_set_t pset, bool spill_pending);
+bool sched_amp_steal_thread_enabled(processor_set_t pset);
+
+/* Rebalance logic */
+void sched_amp_balance(processor_t cprocessor, processor_set_t cpset);
+
+/* IPI policy */
+sched_ipi_type_t sched_amp_ipi_policy(processor_t dst, thread_t thread, boolean_t dst_idle, sched_ipi_event_t event);
+
+/* AMP realtime runq management */
+rt_queue_t sched_amp_rt_runq(processor_set_t pset);
+void sched_amp_rt_init(processor_set_t pset);
+void sched_amp_rt_queue_shutdown(processor_t processor);
+void sched_amp_rt_runq_scan(sched_update_scan_context_t scan_context);
+int64_t sched_amp_rt_runq_count_sum(void);
+
+uint32_t sched_amp_qos_max_parallelism(int qos, uint64_t options);
+void sched_amp_bounce_thread_group_from_ecores(processor_set_t pset, struct thread_group *tg);
+
+#endif /* __AMP__ */
+
+#endif /* _KERN_SCHED_AMP_COMMON_H_ */
#include <kern/sched_clutch.h>
#include <sys/kdebug.h>
+#if __AMP__
+#include <kern/sched_amp_common.h>
+#endif /* __AMP__ */
#if CONFIG_SCHED_CLUTCH
static uint32_t sched_clutch_root_count_sum(sched_clutch_root_t);
static int sched_clutch_root_priority(sched_clutch_root_t);
+#if __AMP__
+/* System based routines */
+static bool sched_clutch_pset_available(processor_set_t);
+#endif /* __AMP__ */
/* Helper debugging routines */
static inline void sched_clutch_hierarchy_locked_assert(sched_clutch_root_t);
}
}
+#if __AMP__
+
+/*
+ * sched_clutch_pset_available()
+ *
+ * Routine to determine if a pset is available for scheduling.
+ */
+static bool
+sched_clutch_pset_available(processor_set_t pset)
+{
+ /* Check if cluster has none of the CPUs available */
+ if (pset->online_processor_count == 0) {
+ return false;
+ }
+
+ /* Check if the cluster is not recommended by CLPC */
+ if (!pset_is_recommended(pset)) {
+ return false;
+ }
+
+ return true;
+}
+
+#endif /* __AMP__ */
/*
* sched_clutch_root_init()
assert(os_atomic_load(&clutch->sc_thr_count, relaxed) == 0);
}
+#if __AMP__
+
+/*
+ * sched_clutch_bucket_foreign()
+ *
+ * Identifies if the clutch bucket is a foreign (not recommended for) this
+ * hierarchy. This is possible due to the recommended hierarchy/pset not
+ * available for scheduling currently.
+ */
+static boolean_t
+sched_clutch_bucket_foreign(sched_clutch_root_t root_clutch, sched_clutch_bucket_t clutch_bucket)
+{
+ assert(clutch_bucket->scb_thr_count > 0);
+ if (!sched_clutch_pset_available(root_clutch->scr_pset)) {
+ /* Even though the pset was not available for scheduling, threads
+ * are being put in its runq (this might be due to the other pset
+ * being turned off and this being the master processor pset).
+ * Mark the clutch bucket as foreign so that when the other
+ * pset becomes available, it moves the clutch bucket accordingly.
+ */
+ return true;
+ }
+ thread_t thread = run_queue_peek(&clutch_bucket->scb_runq);
+ pset_cluster_type_t pset_type = recommended_pset_type(thread);
+ return pset_type != root_clutch->scr_pset->pset_cluster_type;
+}
+
+#endif /* __AMP__ */
/*
* sched_clutch_bucket_hierarchy_insert()
/* Enqueue the timeshare clutch buckets into the global runnable clutch_bucket list; used for sched tick operations */
enqueue_tail(&root_clutch->scr_clutch_buckets, &clutch_bucket->scb_listlink);
}
+#if __AMP__
+ /* Check if the bucket is a foreign clutch bucket and add it to the foreign buckets list */
+ if (sched_clutch_bucket_foreign(root_clutch, clutch_bucket)) {
+ clutch_bucket->scb_foreign = true;
+ enqueue_tail(&root_clutch->scr_foreign_buckets, &clutch_bucket->scb_foreignlink);
+ }
+#endif /* __AMP__ */
sched_clutch_root_bucket_t root_bucket = &root_clutch->scr_buckets[bucket];
/* If this is the first clutch bucket in the root bucket, insert the root bucket into the root priority queue */
/* Remove the timeshare clutch bucket from the globally runnable clutch_bucket list */
remqueue(&clutch_bucket->scb_listlink);
}
+#if __AMP__
+ if (clutch_bucket->scb_foreign) {
+ clutch_bucket->scb_foreign = false;
+ remqueue(&clutch_bucket->scb_foreignlink);
+ }
+#endif /* __AMP__ */
sched_clutch_root_bucket_t root_bucket = &root_clutch->scr_buckets[bucket];
}
}
+#if __AMP__
+
+/* Implementation of the AMP version of the clutch scheduler */
+
+static thread_t
+sched_clutch_amp_steal_thread(processor_set_t pset);
+
+static ast_t
+sched_clutch_amp_processor_csw_check(processor_t processor);
+
+static boolean_t
+sched_clutch_amp_processor_queue_has_priority(processor_t processor, int priority, boolean_t gte);
+
+static boolean_t
+sched_clutch_amp_processor_queue_empty(processor_t processor);
+
+static thread_t
+sched_clutch_amp_choose_thread(processor_t processor, int priority, ast_t reason);
+
+static void
+sched_clutch_amp_processor_queue_shutdown(processor_t processor);
+
+static processor_t
+sched_clutch_amp_choose_processor(processor_set_t pset, processor_t processor, thread_t thread);
+
+static bool
+sched_clutch_amp_thread_avoid_processor(processor_t processor, thread_t thread);
+
+static bool
+sched_clutch_amp_thread_should_yield(processor_t processor, thread_t thread);
+
+static void
+sched_clutch_migrate_foreign_buckets(processor_t processor, processor_set_t dst_pset, boolean_t drop_lock);
+
+static void
+sched_clutch_amp_thread_group_recommendation_change(struct thread_group *tg, cluster_type_t new_recommendation);
+
+const struct sched_dispatch_table sched_clutch_amp_dispatch = {
+ .sched_name = "clutch_amp",
+ .init = sched_amp_init,
+ .timebase_init = sched_clutch_timebase_init,
+ .processor_init = sched_clutch_processor_init,
+ .pset_init = sched_clutch_pset_init,
+ .maintenance_continuation = sched_timeshare_maintenance_continue,
+ .choose_thread = sched_clutch_amp_choose_thread,
+ .steal_thread_enabled = sched_amp_steal_thread_enabled,
+ .steal_thread = sched_clutch_amp_steal_thread,
+ .compute_timeshare_priority = sched_compute_timeshare_priority,
+ .choose_processor = sched_clutch_amp_choose_processor,
+ .processor_enqueue = sched_clutch_processor_enqueue,
+ .processor_queue_shutdown = sched_clutch_amp_processor_queue_shutdown,
+ .processor_queue_remove = sched_clutch_processor_queue_remove,
+ .processor_queue_empty = sched_clutch_amp_processor_queue_empty,
+ .priority_is_urgent = priority_is_urgent,
+ .processor_csw_check = sched_clutch_amp_processor_csw_check,
+ .processor_queue_has_priority = sched_clutch_amp_processor_queue_has_priority,
+ .initial_quantum_size = sched_clutch_initial_quantum_size,
+ .initial_thread_sched_mode = sched_clutch_initial_thread_sched_mode,
+ .can_update_priority = can_update_priority,
+ .update_priority = update_priority,
+ .lightweight_update_priority = lightweight_update_priority,
+ .quantum_expire = sched_default_quantum_expire,
+ .processor_runq_count = sched_clutch_runq_count,
+ .processor_runq_stats_count_sum = sched_clutch_runq_stats_count_sum,
+ .processor_bound_count = sched_clutch_processor_bound_count,
+ .thread_update_scan = sched_clutch_thread_update_scan,
+ .multiple_psets_enabled = TRUE,
+ .sched_groups_enabled = FALSE,
+ .avoid_processor_enabled = TRUE,
+ .thread_avoid_processor = sched_clutch_amp_thread_avoid_processor,
+ .processor_balance = sched_amp_balance,
+
+ .rt_runq = sched_amp_rt_runq,
+ .rt_init = sched_amp_rt_init,
+ .rt_queue_shutdown = sched_amp_rt_queue_shutdown,
+ .rt_runq_scan = sched_amp_rt_runq_scan,
+ .rt_runq_count_sum = sched_amp_rt_runq_count_sum,
+
+ .qos_max_parallelism = sched_amp_qos_max_parallelism,
+ .check_spill = sched_amp_check_spill,
+ .ipi_policy = sched_amp_ipi_policy,
+ .thread_should_yield = sched_clutch_amp_thread_should_yield,
+ .run_count_incr = sched_clutch_run_incr,
+ .run_count_decr = sched_clutch_run_decr,
+ .update_thread_bucket = sched_clutch_update_thread_bucket,
+ .pset_made_schedulable = sched_clutch_migrate_foreign_buckets,
+ .thread_group_recommendation_change = sched_clutch_amp_thread_group_recommendation_change,
+};
+
+extern processor_set_t ecore_set;
+extern processor_set_t pcore_set;
+
+static thread_t
+sched_clutch_amp_choose_thread(
+ processor_t processor,
+ int priority,
+ __unused ast_t reason)
+{
+ processor_set_t pset = processor->processor_set;
+ bool spill_pending = false;
+ int spill_pri = -1;
+
+ if (pset == ecore_set && bit_test(pset->pending_spill_cpu_mask, processor->cpu_id)) {
+ spill_pending = true;
+ spill_pri = sched_clutch_root_priority(&pcore_set->pset_clutch_root);
+ }
+
+ int clutch_pri = sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor));
+ run_queue_t bound_runq = sched_clutch_bound_runq(processor);
+ boolean_t choose_from_boundq = false;
+
+ if ((bound_runq->highq < priority) &&
+ (clutch_pri < priority) &&
+ (spill_pri < priority)) {
+ return THREAD_NULL;
+ }
+
+ if ((spill_pri > bound_runq->highq) &&
+ (spill_pri > clutch_pri)) {
+ /*
+ * There is a higher priority thread on the P-core runq,
+ * so returning THREAD_NULL here will cause thread_select()
+ * to call sched_clutch_amp_steal_thread() to try to get it.
+ */
+ return THREAD_NULL;
+ }
+
+ if (bound_runq->highq >= clutch_pri) {
+ choose_from_boundq = true;
+ }
+
+ thread_t thread = THREAD_NULL;
+ if (choose_from_boundq == false) {
+ sched_clutch_root_t pset_clutch_root = sched_clutch_processor_root_clutch(processor);
+ thread = sched_clutch_thread_highest(pset_clutch_root);
+ } else {
+ thread = run_queue_dequeue(bound_runq, SCHED_HEADQ);
+ }
+ return thread;
+}
+
+static boolean_t
+sched_clutch_amp_processor_queue_empty(processor_t processor)
+{
+ processor_set_t pset = processor->processor_set;
+ bool spill_pending = bit_test(pset->pending_spill_cpu_mask, processor->cpu_id);
+
+ return (sched_clutch_root_count(sched_clutch_processor_root_clutch(processor)) == 0) &&
+ (sched_clutch_bound_runq(processor)->count == 0) &&
+ !spill_pending;
+}
+
+static bool
+sched_clutch_amp_thread_should_yield(processor_t processor, thread_t thread)
+{
+ if (!sched_clutch_amp_processor_queue_empty(processor) || (rt_runq_count(processor->processor_set) > 0)) {
+ return true;
+ }
+
+ if ((processor->processor_set->pset_cluster_type == PSET_AMP_E) && (recommended_pset_type(thread) == PSET_AMP_P)) {
+ return sched_clutch_root_count(&pcore_set->pset_clutch_root) > 0;
+ }
+
+ return false;
+}
+
+static ast_t
+sched_clutch_amp_processor_csw_check(processor_t processor)
+{
+ boolean_t has_higher;
+ int pri;
+
+ int clutch_pri = sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor));
+ run_queue_t bound_runq = sched_clutch_bound_runq(processor);
+
+ assert(processor->active_thread != NULL);
+
+ processor_set_t pset = processor->processor_set;
+ bool spill_pending = false;
+ int spill_pri = -1;
+ int spill_urgency = 0;
+
+ if (pset == ecore_set && bit_test(pset->pending_spill_cpu_mask, processor->cpu_id)) {
+ spill_pending = true;
+ spill_pri = sched_clutch_root_priority(&pcore_set->pset_clutch_root);
+ spill_urgency = sched_clutch_root_urgency(&pcore_set->pset_clutch_root);
+ }
+
+ pri = MAX(clutch_pri, bound_runq->highq);
+ if (spill_pending) {
+ pri = MAX(pri, spill_pri);
+ }
+
+ if (processor->first_timeslice) {
+ has_higher = (pri > processor->current_pri);
+ } else {
+ has_higher = (pri >= processor->current_pri);
+ }
+
+ if (has_higher) {
+ if (sched_clutch_root_urgency(sched_clutch_processor_root_clutch(processor)) > 0) {
+ return AST_PREEMPT | AST_URGENT;
+ }
+
+ if (bound_runq->urgency > 0) {
+ return AST_PREEMPT | AST_URGENT;
+ }
+
+ if (spill_urgency > 0) {
+ return AST_PREEMPT | AST_URGENT;
+ }
+
+ return AST_PREEMPT;
+ }
+
+ return AST_NONE;
+}
+
+static boolean_t
+sched_clutch_amp_processor_queue_has_priority(processor_t processor,
+ int priority,
+ boolean_t gte)
+{
+ bool spill_pending = false;
+ int spill_pri = -1;
+ processor_set_t pset = processor->processor_set;
+
+ if (pset == ecore_set && bit_test(pset->pending_spill_cpu_mask, processor->cpu_id)) {
+ spill_pending = true;
+ spill_pri = sched_clutch_root_priority(&pcore_set->pset_clutch_root);
+ }
+ run_queue_t bound_runq = sched_clutch_bound_runq(processor);
+
+ int qpri = MAX(sched_clutch_root_priority(sched_clutch_processor_root_clutch(processor)), bound_runq->highq);
+ if (spill_pending) {
+ qpri = MAX(qpri, spill_pri);
+ }
+
+ if (gte) {
+ return qpri >= priority;
+ } else {
+ return qpri > priority;
+ }
+}
+
+/*
+ * sched_clutch_hierarchy_thread_pset()
+ *
+ * Routine to determine where a thread should be enqueued based on its
+ * recommendation if this is the first runnable thread in the clutch_bucket
+ * or its clutch bucket's hierarchy membership.
+ */
+static processor_set_t
+sched_clutch_hierarchy_thread_pset(thread_t thread)
+{
+ if (SCHED_CLUTCH_THREAD_ELIGIBLE(thread) == false) {
+ return (recommended_pset_type(thread) == PSET_AMP_P) ? pcore_set : ecore_set;
+ }
+
+ sched_clutch_t clutch = sched_clutch_for_thread(thread);
+ sched_clutch_bucket_t clutch_bucket = &(clutch->sc_clutch_buckets[thread->th_sched_bucket]);
+ sched_clutch_root_t scb_root = os_atomic_load(&clutch_bucket->scb_root, relaxed);
+ if (scb_root) {
+ /* Clutch bucket is already runnable, return the pset hierarchy its part of */
+ return scb_root->scr_pset;
+ }
+ return (recommended_pset_type(thread) == PSET_AMP_E) ? ecore_set : pcore_set;
+}
+
+/*
+ * sched_clutch_thread_pset_recommended()
+ *
+ * Routine to determine if the thread should be placed on the provided pset.
+ * The routine first makes sure the cluster is available for scheduling. If
+ * it is available, it looks at the thread's recommendation. Called
+ * with the pset lock held.
+ */
+static bool
+sched_clutch_thread_pset_recommended(thread_t thread, processor_set_t pset)
+{
+ if (!sched_clutch_pset_available(pset)) {
+ return false;
+ }
+
+ /* At this point, all clusters should be available and recommended */
+ if (sched_clutch_hierarchy_thread_pset(thread) != pset) {
+ return false;
+ }
+
+ return true;
+}
+
+
+static void
+sched_clutch_amp_processor_queue_shutdown(processor_t processor)
+{
+ processor_set_t pset = processor->processor_set;
+ sched_clutch_root_t pset_clutch_root = sched_clutch_processor_root_clutch(processor);
+ thread_t thread;
+ queue_head_t tqueue;
+
+ /* We only need to migrate threads if this is the last active or last recommended processor in the pset */
+ if ((pset->online_processor_count > 0) && pset_is_recommended(pset)) {
+ pset_unlock(pset);
+ return;
+ }
+
+ queue_init(&tqueue);
+ while (sched_clutch_root_count(pset_clutch_root) > 0) {
+ thread = sched_clutch_thread_highest(pset_clutch_root);
+ enqueue_tail(&tqueue, &thread->runq_links);
+ }
+ pset_unlock(pset);
+
+ qe_foreach_element_safe(thread, &tqueue, runq_links) {
+ remqueue(&thread->runq_links);
+ thread_lock(thread);
+ thread_setrun(thread, SCHED_TAILQ);
+ thread_unlock(thread);
+ }
+}
+
+static thread_t
+sched_clutch_amp_steal_thread(processor_set_t pset)
+{
+ thread_t thread = THREAD_NULL;
+ processor_set_t nset = pset;
+
+ if (pcore_set->online_processor_count == 0) {
+ /* Nothing to steal from */
+ goto out;
+ }
+
+ if (pset->pset_cluster_type == PSET_AMP_P) {
+ /* P cores don't steal from E cores */
+ goto out;
+ }
+
+ processor_t processor = current_processor();
+ assert(pset == processor->processor_set);
+
+ bool spill_pending = bit_test(pset->pending_spill_cpu_mask, processor->cpu_id);
+ bit_clear(pset->pending_spill_cpu_mask, processor->cpu_id);
+
+ nset = pcore_set;
+
+ assert(nset != pset);
+
+ if (sched_get_pset_load_average(nset) >= sched_amp_steal_threshold(nset, spill_pending)) {
+ pset_unlock(pset);
+
+ pset = nset;
+
+ pset_lock(pset);
+
+ /* Allow steal if load average still OK, no idle cores, and more threads on runq than active cores DISPATCHING */
+ if ((sched_get_pset_load_average(pset) >= sched_amp_steal_threshold(pset, spill_pending)) &&
+ ((int)sched_clutch_root_count(&pset->pset_clutch_root) > bit_count(pset->cpu_state_map[PROCESSOR_DISPATCHING])) &&
+ (bit_count(pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_IDLE]) == 0)) {
+ thread = sched_clutch_thread_highest(&pset->pset_clutch_root);
+ KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_STEAL) | DBG_FUNC_NONE, spill_pending, 0, 0, 0);
+ sched_update_pset_load_average(pset);
+ }
+ }
+
+out:
+ pset_unlock(pset);
+ return thread;
+}
+
+/* Return true if this thread should not continue running on this processor */
+static bool
+sched_clutch_amp_thread_avoid_processor(processor_t processor, thread_t thread)
+{
+ if (processor->processor_set->pset_cluster_type == PSET_AMP_E) {
+ if (sched_clutch_thread_pset_recommended(thread, pcore_set)) {
+ return true;
+ }
+ } else if (processor->processor_set->pset_cluster_type == PSET_AMP_P) {
+ if (!sched_clutch_thread_pset_recommended(thread, pcore_set)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static processor_t
+sched_clutch_amp_choose_processor(processor_set_t pset, processor_t processor, thread_t thread)
+{
+ /* Bound threads don't call this function */
+ assert(thread->bound_processor == PROCESSOR_NULL);
+
+ processor_set_t nset;
+ processor_t chosen_processor = PROCESSOR_NULL;
+
+select_pset:
+ nset = (pset == ecore_set) ? pcore_set : ecore_set;
+ if (!sched_clutch_pset_available(pset)) {
+ /* If the current pset is not available for scheduling, just use the other pset */
+ pset_unlock(pset);
+ pset_lock(nset);
+ goto select_processor;
+ }
+
+ /* Check if the thread is recommended to run on this pset */
+ if (sched_clutch_thread_pset_recommended(thread, pset)) {
+ nset = pset;
+ goto select_processor;
+ } else {
+ /* pset not recommended; try the other pset */
+ pset_unlock(pset);
+ pset_lock(nset);
+ pset = nset;
+ goto select_pset;
+ }
+
+select_processor:
+ if (!sched_clutch_pset_available(nset)) {
+ /*
+ * It looks like both psets are not available due to some
+ * reason. In that case, just use the master processor's
+ * pset for scheduling.
+ */
+ if (master_processor->processor_set != nset) {
+ pset_unlock(nset);
+ nset = master_processor->processor_set;
+ pset_lock(nset);
+ }
+ }
+ chosen_processor = choose_processor(nset, processor, thread);
+ assert(chosen_processor->processor_set == nset);
+ return chosen_processor;
+}
+
+/*
+ * AMP Clutch Scheduler Thread Migration
+ *
+ * For the AMP version of the clutch scheduler the thread is always scheduled via its
+ * thread group. So it is important to make sure that the thread group is part of the
+ * correct processor set hierarchy. In order to do that, the clutch scheduler moves
+ * all eligble clutch buckets to the correct hierarchy when the recommendation of a
+ * thread group is changed by CLPC.
+ */
+
+/*
+ * sched_clutch_recommended_pset()
+ *
+ * Routine to decide which hierarchy the thread group should be in based on the
+ * recommendation and other thread group and system properties. This routine is
+ * used to determine if thread group migration is necessary and should mimic the
+ * logic in sched_clutch_thread_pset_recommended() & recommended_pset_type().
+ */
+static processor_set_t
+sched_clutch_recommended_pset(sched_clutch_t sched_clutch, cluster_type_t recommendation)
+{
+ if (!sched_clutch_pset_available(pcore_set)) {
+ return ecore_set;
+ }
+
+ if (!sched_clutch_pset_available(ecore_set)) {
+ return pcore_set;
+ }
+
+ /*
+ * If all clusters are available and recommended, use the recommendation
+ * to decide which cluster to use.
+ */
+ pset_cluster_type_t type = thread_group_pset_recommendation(sched_clutch->sc_tg, recommendation);
+ return (type == PSET_AMP_E) ? ecore_set : pcore_set;
+}
+
+static void
+sched_clutch_bucket_threads_drain(sched_clutch_bucket_t clutch_bucket, sched_clutch_root_t root_clutch, queue_t clutch_threads)
+{
+ uint16_t thread_count = clutch_bucket->scb_thr_count;
+ thread_t thread;
+ uint64_t current_timestamp = mach_approximate_time();
+ while (thread_count > 0) {
+ thread = run_queue_peek(&clutch_bucket->scb_runq);
+ sched_clutch_thread_remove(root_clutch, thread, current_timestamp);
+ enqueue_tail(clutch_threads, &thread->runq_links);
+ thread_count--;
+ }
+
+ /*
+ * This operation should have drained the clutch bucket and pulled it out of the
+ * hierarchy.
+ */
+ assert(clutch_bucket->scb_thr_count == 0);
+ assert(clutch_bucket->scb_root == NULL);
+}
+
+/*
+ * sched_clutch_migrate_thread_group()
+ *
+ * Routine to implement the migration of threads when the thread group
+ * recommendation is updated. The migration works using a 2-phase
+ * algorithm.
+ *
+ * Phase 1: With the source pset (determined by sched_clutch_recommended_pset)
+ * locked, drain all the runnable threads into a local queue and update the TG
+ * recommendation.
+ *
+ * Phase 2: Call thread_setrun() on all the drained threads. Since the TG recommendation
+ * has been updated, these should all end up in the right hierarchy.
+ */
+static void
+sched_clutch_migrate_thread_group(sched_clutch_t sched_clutch, cluster_type_t new_recommendation)
+{
+ thread_t thread;
+
+ /* If the thread group is empty, just update the recommendation */
+ if (os_atomic_load(&sched_clutch->sc_thr_count, relaxed) == 0) {
+ thread_group_update_recommendation(sched_clutch->sc_tg, new_recommendation);
+ return;
+ }
+
+ processor_set_t dst_pset = sched_clutch_recommended_pset(sched_clutch, new_recommendation);
+ processor_set_t src_pset = (dst_pset == pcore_set) ? ecore_set : pcore_set;
+
+ queue_head_t clutch_threads;
+ queue_init(&clutch_threads);
+
+ /* Interrupts need to be disabled to make sure threads wont become runnable during the
+ * migration and attempt to grab the pset/thread locks.
+ */
+ spl_t s = splsched();
+
+ pset_lock(src_pset);
+ for (sched_bucket_t bucket = TH_BUCKET_FIXPRI; bucket < TH_BUCKET_SCHED_MAX; bucket++) {
+ sched_clutch_bucket_t clutch_bucket = &(sched_clutch->sc_clutch_buckets[bucket]);
+ sched_clutch_root_t scb_root = os_atomic_load(&clutch_bucket->scb_root, relaxed);
+ if ((scb_root == NULL) || (scb_root->scr_pset == dst_pset)) {
+ /* Clutch bucket not runnable or already in the right hierarchy; nothing to do here */
+ continue;
+ }
+ assert(scb_root->scr_pset == src_pset);
+ /* Now remove all the threads from the runq so that thread->runq is set correctly */
+ sched_clutch_bucket_threads_drain(clutch_bucket, scb_root, &clutch_threads);
+ }
+
+ /*
+ * Now that all the clutch buckets have been drained, update the TG recommendation.
+ * This operation needs to be done with the pset lock held to make sure that anyone
+ * coming in before the migration started would get the original pset as the root
+ * of this sched_clutch and attempt to hold the src_pset lock. Once the TG changes,
+ * all threads that are becoming runnable would find the clutch bucket empty and
+ * the TG recommendation would coax them to enqueue it in the new recommended
+ * hierarchy. This effectively synchronizes with other threads calling
+ * thread_setrun() and trying to decide which pset the thread/clutch_bucket
+ * belongs in.
+ */
+ thread_group_update_recommendation(sched_clutch->sc_tg, new_recommendation);
+ pset_unlock(src_pset);
+
+ /* Now setrun all the threads in the local queue */
+ qe_foreach_element_safe(thread, &clutch_threads, runq_links) {
+ remqueue(&thread->runq_links);
+ thread_lock(thread);
+ thread_setrun(thread, SCHED_TAILQ);
+ thread_unlock(thread);
+ }
+
+ splx(s);
+}
+
+static void
+sched_clutch_amp_thread_group_recommendation_change(struct thread_group *tg, cluster_type_t new_recommendation)
+{
+ /*
+ * For the clutch scheduler, the change in recommendation moves the thread group
+ * to the right hierarchy. sched_clutch_migrate_thread_group() is also responsible
+ * for updating the recommendation of the thread group.
+ */
+ sched_clutch_migrate_thread_group(&tg->tg_sched_clutch, new_recommendation);
+
+ if (new_recommendation != CLUSTER_TYPE_P) {
+ return;
+ }
+
+ sched_amp_bounce_thread_group_from_ecores(ecore_set, tg);
+}
+
+/*
+ * sched_clutch_migrate_foreign_buckets()
+ *
+ * Routine to migrate all the clutch buckets which are not in their recommended
+ * pset hierarchy now that a new pset has become runnable. The algorithm is
+ * similar to sched_clutch_migrate_thread_group().
+ *
+ * Invoked with the newly recommended pset lock held and interrupts disabled.
+ */
+static void
+sched_clutch_migrate_foreign_buckets(__unused processor_t processor, processor_set_t dst_pset, boolean_t drop_lock)
+{
+ thread_t thread;
+ processor_set_t src_pset = (dst_pset == pcore_set) ? ecore_set : pcore_set;
+
+ if (!sched_clutch_pset_available(dst_pset)) {
+ /*
+ * It is possible that some state about the pset changed,
+ * but its still not available for scheduling. Nothing to
+ * do here in that case.
+ */
+ if (drop_lock) {
+ pset_unlock(dst_pset);
+ }
+ return;
+ }
+ pset_unlock(dst_pset);
+
+ queue_head_t clutch_threads;
+ queue_init(&clutch_threads);
+ sched_clutch_root_t src_root = &src_pset->pset_clutch_root;
+
+ pset_lock(src_pset);
+ queue_t clutch_bucket_list = &src_pset->pset_clutch_root.scr_foreign_buckets;
+
+ if (sched_clutch_root_count(src_root) == 0) {
+ /* No threads present in this hierarchy */
+ pset_unlock(src_pset);
+ goto migration_complete;
+ }
+
+ sched_clutch_bucket_t clutch_bucket;
+ qe_foreach_element_safe(clutch_bucket, clutch_bucket_list, scb_foreignlink) {
+ sched_clutch_root_t scb_root = os_atomic_load(&clutch_bucket->scb_root, relaxed);
+ assert(scb_root->scr_pset == src_pset);
+ /* Now remove all the threads from the runq so that thread->runq is set correctly */
+ sched_clutch_bucket_threads_drain(clutch_bucket, scb_root, &clutch_threads);
+ assert(clutch_bucket->scb_foreign == false);
+ }
+ pset_unlock(src_pset);
+
+ /* Now setrun all the threads in the local queue */
+ qe_foreach_element_safe(thread, &clutch_threads, runq_links) {
+ remqueue(&thread->runq_links);
+ thread_lock(thread);
+ thread_setrun(thread, SCHED_TAILQ);
+ thread_unlock(thread);
+ }
+
+migration_complete:
+ if (!drop_lock) {
+ pset_lock(dst_pset);
+ }
+}
+
+#endif /* __AMP__ */
#endif /* CONFIG_SCHED_CLUTCH */
/* (P) linkage for all clutch_buckets in a root bucket; used for tick operations */
queue_chain_t scb_listlink;
+#if __AMP__
+ /* (P) linkage for all "foreign" clutch buckets in the root clutch */
+ queue_chain_t scb_foreignlink;
+#endif /* __AMP__ */
/* (P) timestamp for the last time the interactivity score was updated */
uint64_t scb_interactivity_ts;
* platforms, simply return the master_processor.
*/
fallback_processor = true;
+#if CONFIG_SCHED_CLUTCH && __AMP__
+ processor = processor_array[lsb_first(starting_pset->primary_map)];
+#else /* CONFIG_SCHED_CLUTCH && __AMP__ */
processor = master_processor;
+#endif /* CONFIG_SCHED_CLUTCH && __AMP__ */
}
/*
pset->load_average = new_load_average;
#if (DEVELOPMENT || DEBUG)
+#if __AMP__
+ if (pset->pset_cluster_type == PSET_AMP_P) {
+ KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PSET_LOAD_AVERAGE) | DBG_FUNC_NONE, sched_get_pset_load_average(pset), (bit_count(pset->cpu_state_map[PROCESSOR_RUNNING]) + pset->pset_runq.count + rt_runq_count(pset)));
+ }
+#endif
#endif
}
__private_extern__ void
thread_bind_cluster_type(char cluster_type)
{
+#if __AMP__
+ thread_t thread = current_thread();
+
+ spl_t s = splsched();
+ thread_lock(thread);
+ thread->sched_flags &= ~(TH_SFLAG_ECORE_ONLY | TH_SFLAG_PCORE_ONLY);
+ switch (cluster_type) {
+ case 'e':
+ case 'E':
+ thread->sched_flags |= TH_SFLAG_ECORE_ONLY;
+ break;
+ case 'p':
+ case 'P':
+ thread->sched_flags |= TH_SFLAG_PCORE_ONLY;
+ break;
+ default:
+ break;
+ }
+ thread_unlock(thread);
+ splx(s);
+
+ thread_block(THREAD_CONTINUE_NULL);
+#else /* __AMP__ */
(void)cluster_type;
+#endif /* __AMP__ */
}
#error Enable at least one scheduler algorithm in osfmk/conf/MASTER.XXX
#endif
+#if __AMP__
+extern const struct sched_dispatch_table sched_amp_dispatch;
+#define SCHED(f) (sched_amp_dispatch.f)
+
+#else /* __AMP__ */
#if CONFIG_SCHED_CLUTCH
extern const struct sched_dispatch_table sched_clutch_dispatch;
#define SCHED(f) (sched_dualq_dispatch.f)
#endif /* CONFIG_SCHED_CLUTCH */
+#endif /* __AMP__ */
struct sched_dispatch_table {
const char *sched_name;
#if defined(CONFIG_SCHED_MULTIQ)
extern const struct sched_dispatch_table sched_multiq_dispatch;
extern const struct sched_dispatch_table sched_dualq_dispatch;
+#if __AMP__
+extern const struct sched_dispatch_table sched_amp_dispatch;
+#endif
#endif
#if defined(CONFIG_SCHED_PROTO)
task_wakeups_rate_exceeded, NULL, NULL);
ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
+#if XNU_MONITOR
+ ledger_template_complete_secure_alloc(t);
+#else /* XNU_MONITOR */
ledger_template_complete(t);
+#endif /* XNU_MONITOR */
task_ledger_template = t;
}
return energy;
}
+#if __AMP__
+
+uint64_t
+task_cpu_ptime(
+ task_t task)
+{
+ uint64_t cpu_ptime = 0;
+ thread_t thread;
+
+ task_lock(task);
+ cpu_ptime += task->total_ptime;
+
+ queue_iterate(&task->threads, thread, thread_t, task_threads) {
+ cpu_ptime += timer_grab(&thread->ptime);
+ }
+
+ task_unlock(task);
+ return cpu_ptime;
+}
+
+#else /* __AMP__ */
uint64_t
task_cpu_ptime(
return 0;
}
+#endif /* __AMP__ */
/* This function updates the cpu time in the arrays for each
* effective and requested QoS class
#define TH_SFLAG_BASE_PRI_FROZEN 0x0800 /* (effective) base_pri is frozen */
#define TH_SFLAG_WAITQ_PROMOTED 0x1000 /* promote reason: waitq wakeup (generally for IPC receive) */
+#if __AMP__
+#define TH_SFLAG_ECORE_ONLY 0x2000 /* Bind thread to E core processor set */
+#define TH_SFLAG_PCORE_ONLY 0x4000 /* Bind thread to P core processor set */
+#endif
#define TH_SFLAG_EXEC_PROMOTED 0x8000 /* promote reason: thread is in an exec */
* Update the global ATM diagnostic flag, readable from the commpage
*/
routine host_set_atm_diagnostic_flag(
- host_priv : host_priv_t;
- in diagnostic_flag : uint32_t);
+ host : host_t;
+ in diagnostic_flag : uint32_t);
#if !KERNEL && LIBSYSCALL_INTERFACE
routine host_get_atm_diagnostic_flag(
#define CPUFAMILY_ARM_HURRICANE 0x67ceee93
#define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6
#define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f
+#ifndef RC_HIDE_XNU_LIGHTNING
+#define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2
+#endif /* !RC_HIDE_XNU_LIGHTNING */
/* The following synonyms are deprecated: */
#define CPUFAMILY_INTEL_6_23 CPUFAMILY_INTEL_PENRYN
--- /dev/null
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+ <title>Mach Kernel Interface Reference Manual</title>
+</head>
+<body>
+<h3>Mach IPC Interface</h3>
+<blockquote>
+<p>
+Mach IPC presents itself in a few forms: message queues, lock-sets,
+and semaphores (more may be added in the future). All share one common
+charateristic: the capabilities presented by each are represented through
+a handle known as a Mach port. Specific rights represented in these
+Mach port capability handles allow the underlying IPC object to be used and
+manipulated in consistent ways.</p>
+
+<h4>Mach Message Queue Interface</h4>
+<blockquote>
+<p>
+<a href="mach_msg.html">mach_msg</a> - Send and/or receive a message from the target port.<br>
+<a href="mach_msg.html">mach_msg_overwrite</a> - Send and/or receive messages with possible overwrite.<br>
+</p>
+Mach Message Queue Data Structures
+<p>
+<a href="mach_msg_descriptor.html">mach_msg_descriptor</a> - Specifies an element of a complex IPC message.<br>
+<a href="mach_msg_header.html">mach_msg_header</a> - Specifies the content of an IPC message header.<br>
+</p>
+</blockquote>
+
+<h4>Mach Lock-Set Interface</h4>
+<blockquote>
+<p>
+<a href="lock_acquire.html">lock_acquire</a> - Acquire ownership a lock<br>
+<a href="lock_handoff.html">lock_handoff</a> - Hand-off ownership of a lock.<br>
+<a href="lock_handoff_accept.html">lock_handoff_accept</a> - Accept lock ownership from a handoff.<br>
+<a href="lock_make_stable.html">lock_make_stable</a> - Stabilize the state of the specified lock.<br>
+<a href="lock_release.html">lock_release</a> - Release ownership of a lock.<br>
+<a href="lock_set_create.html">lock_set_create</a> - Create a new lock set.<br>
+<a href="lock_set_destroy.html">lock_set_destroy</a> - Destroy a lock set and its associated locks.<br>
+<a href="lock_try.html">lock_try</a> - Attempt to acquire access rights to a lock.<br>
+</p>
+</blockquote>
+
+<h4>Mach Semaphore Interface</h4>
+<blockquote>
+<p>
+<a href="semaphore_create.html">semaphore_create</a> - Create a new semaphore.<br>
+<a href="semaphore_destroy.html">semaphore_destroy</a> - Destroy a semaphore.<br>
+<a href="semaphore_signal.html">semaphore_signal</a> - Increments the semaphore count.<br>
+<a href="semaphore_signal_all.html">semaphore_signal_all</a> - Wake up all threads blocked on a semaphore.<br>
+<a href="semaphore_wait.html">semaphore_wait</a> - Wait on the specified semaphore.<br>
+</p>
+</blockquote>
+
+<h4>Mach Port Management Interface</h4>
+<blockquote>
+<p>
+<a href="mach_port_allocate.html">mach_port_allocate</a> - Create caller-specified type of port right.<br>
+<a href="mach_port_allocate_full.html">mach_port_allocate_full</a> - Create a port right with full Mach port semantics.<br>
+<a href="mach_port_allocate_name.html">mach_port_allocate_name</a> - Create a port right with the caller-specified name.<br>
+<a href="mach_port_allocate_qos.html">mach_port_allocate_qos</a> - Allocate a port with specified "quality of service".<br>
+<a href="MP_allocate_subsystem.html">mach_port_allocate_subsystem</a> - Create a port right associated with the caller-specified subsystem.<br>
+<a href="mach_port_deallocate.html">mach_port_deallocate</a> - Decrement the target port right's user reference count.<br>
+<a href="mach_port_destroy.html">mach_port_destroy</a> - Deallocate all port rights associated with specified name.<br>
+<a href="mach_port_extract_right.html">mach_port_extract_right</a> - Remove the specified right from the target task and return it to the caller.<br>
+<a href="mach_port_get_attributes.html">mach_port_get_attributes</a> - Return information about target port as specified by the caller.<br>
+<a href="mach_port_get_refs.html">mach_port_get_refs</a> - Return the current count of user references on the target port right.<br>
+<a href="mach_port_get_set_status.html">mach_port_get_set_status</a> - Return the port right names contained in the target port set.<br>
+<a href="mach_port_insert_right.html">mach_port_insert_right</a> - Insert the specified port right into the target task.<br>
+<a href="mach_port_mod_refs.html">mach_port_mod_refs</a> - Modify the specified port right's count of user references.<br>
+<a href="mach_port_move_member.html">mach_port_move_member</a> - Move the specified receive right into or out of the specified port set.<br>
+<a href="mach_port_names.html">mach_port_names</a> - Return information about a task's port name space.<br>
+<a href="MP_request_notification.html">mach_port_request_notification</a> - Request notification of the specified port event type.<br>
+<a href="mach_port_set_attributes.html">mach_port_set_attributes</a> - Set the target port's attributes.<br>
+<a href="mach_port_set_mscount.html">mach_port_set_mscount</a> - Change the target port's make-send count.<br>
+<a href="mach_port_set_seqno.html">mach_port_set_seqno</a> - Change the current value of the target port's sequence number.<br>
+<a href="mach_port_type.html">mach_port_type</a> - Return the characteristics of the target port name.<br>
+<a href="mach_reply_port.html">mach_reply_port</a> - Allocate a new port and insert corresponding receive right in the calling task.<br>
+<a href="mach_subsystem_create.html"> mach_subsystem_create</a> - Used by a server to register information about an RPC subsystem with the kernel.<br>
+</p>
+Mach Port Data Structures
+<p>
+<a href="mach_port_limits.html">mach_port_limits</a> - Specifies a port's resource and message queue limits.<br>
+<a href="mach_port_qos.html">mach_port_qos</a> - Specifies a port's attributes with respect to "Quality Of Service."<br>
+<a href="mach_port_status.html">mach_port_status</a> - Used to present a port's current status with respect to various important attributes.<br>
+</p>
+Mach Port Notification Callbacks
+<p>
+<a href="do_mach_notify_dead_name.html">do_mach_notify_dead_name</a> - Handle the current instance of a dead-name notification.<br>
+<a href="do_mach_notify_no_senders.html">do_mach_notify_no_senders</a> - Handle the current instance of a no-more-senders notification.<br>
+<a href="DMN_port_deleted.html">do_mach_notify_port_deleted</a> - Handle the current instance of a port-deleted notification.<br>
+<a href="DMN_port_destroyed.html">do_mach_notify_port_destroyed</a> - Handle the current instance of a port-destroyed notification.<br>
+<a href="do_mach_notify_send_once.html">do_mach_notify_send_once</a> - Handle the current instance of a send-once notification.<br>
+</p>
+Mach Port Notification Callback Server Helpers
+<p>
+<a href="notify_server.html">notify_server</a> - Detect and handle a kernel-generated IPC notification.<br>
+</p>
+</blockquote>
+
+</blockquote>
+
+<h3>Mach Virtual Memory Interface</h3>
+<blockquote>
+<h4>Mach Virtual Memory Address Space Manipulation Interface</h4>
+<blockquote>
+<p>
+<a href="host_page_size.html">host_page_size</a> - Provide the system's virtual page size.<br>
+<a href="vm_allocate.html">vm_allocate</a> - Allocate a region of virtual memory.<br>
+<a href="vm_behavior_set.html">vm_behavior_set</a> - Specify expected access patterns for the target VM region.<br>
+<a href="vm_copy.html">vm_copy</a> - Copy a region of virtual memory.<br>
+<a href="vm_deallocate.html">vm_deallocate</a> - Deallocate a region of virtual memory.<br>
+<a href="vm_inherit.html">vm_inherit</a> - Set a VM region's inheritance attribute.<br>
+<a href="vm_machine_attribute.html">vm_machine_attribute</a> - Get/set the target memory region's special attributes.<br>
+<a href="vm_map.html">vm_map</a> - Map the specified memory object to a region of virtual memory.<br>
+<a href="vm_msync.html">vm_msync</a> - Synchronize the specified region of virtual memory.<br>
+<a href="vm_protect.html">vm_protect</a> - Set access privilege attribute for a region of virtual memory.<br>
+<a href="vm_read.html">vm_read</a> - Read the specified range of target task's address space.<br>
+<a href="vm_region.html">vm_region</a> - Return description of a virtual memory region.<br>
+<a href="vm_remap.html">vm_remap</a> - Map memory objects in one address space to that of another's.<br>
+<a href="vm_wire.html"> vm_wire</a> - Modify the target region's paging characteristics.<br>
+<a href="vm_write.html">vm_write</a> - Write data to the specified address in the target address space.<br>
+</p>
+Data Structures
+<p>
+<a href="vm_region_basic_info.html">vm_region_basic_info</a> - Defines the attributes of a task's memory region.<br>
+<a href="vm_statistics.html">vm_statistics</a> - Defines statistics for the kernel's use of virtual memory.<br>
+</p>
+</blockquote>
+
+<h4>External Memory Management Interface</h4>
+<blockquote>
+The External Memory Management Interface (EMMI) is undergoing significant change in the Darwin system.
+For this reason, the interface is not currently available to user-level programs. Even for kernel
+extensions, use of these interfaces in not supported. Instead, the BSD filesystem's Universal Buffer Cache (UBC)
+mechanism should be used.<br>
+<p>
+<a href="MO_change_attributes.html">memory_object_change_attributes</a> - Modify subset of memory object attributes.<br>
+<a href="memory_object_destroy.html">memory_object_destroy</a> - Shut down a memory object.<br>
+<a href="MO_get_attributes.html">memory_object_get_attributes</a> - Return current attributes for a memory object.<br>
+<a href="memory_object_lock_request.html">memory_object_lock_request</a> - Restrict access to memory object data.<br>
+<a href="MO_SY_completed.html">memory_object_synchronize_completed</a> - Synchronized data has been processed.<br>
+</p>
+Data Structures
+<p>
+<a href="memory_object_attr_info.html">memory_object_attr_info</a> - Defines memory object attributes.<br>
+<a href="memory_object_perf_info.html">memory_object_perf_info</a>- Specifies performance-related memory object attributes.<br>
+</p>
+External Memory Manager Interface Callbacks
+<p>
+<a href="memory_object_create.html">memory_object_create</a> - Assign a new memory object to the default memory manager.<br>
+<a href="MO_data_initialize.html">memory_object_data_initialize</a> - Provide initial data for a new memory object.<br>
+<a href="memory_object_data_request.html">memory_object_data_request</a> - Request that memory manager page-in specified data.<br>
+<a href="memory_object_data_return.html">memory_object_data_return</a> - Return memory object data to the appropriate memory manager.<br>
+<a href="memory_object_data_unlock.html">memory_object_data_unlock</a> - Request a memory manager release the lock on specific data.<br>
+<a href="memory_object_init.html">memory_object_init</a> - Inform a memory manager on first use of a memory object.<br>
+<a href="memory_object_synchronize.html">memory_object_synchronize</a> - Request synchronization of data with backing store.<br>
+<a href="memory_object_terminate.html">memory_object_terminate</a> - Relinquish access to a memory object.<br>
+</p>
+EMMI Callback Server Helpers
+<p>
+<a href="MO_default_server.html">memory_object_default_server</a> - Handle kernel operation request targeted for the default pager.<br>
+<a href="memory_object_server.html">memory_object_server</a> - Handle kernel operation request aimed at a given memory manager.<br>
+</p>
+</blockquote>
+
+<h4>Default Memory Management Interface</h4>
+<blockquote>
+<p>
+<a href="default_pager_add_segment.html">default_pager_add_segment</a> - Add additional backing storage for a default pager.<br>
+<a href="DP_backing_store_create.html">default_pager_backing_store_create</a> - Create a backing storage object.<br>
+<a href="DP_backing_store_delete.html"> default_pager_backing_store_delete</a> - Delete a backing storage object.<br>
+<a href="DP_backing_store_info.html">default_pager_backing_store_info</a> - Return information about a backing storage object.<br>
+<a href="default_pager_info.html">default_pager_info</a> - Furnish caller with information about the default pager.<br>
+<a href="DP_object_create.html">default_pager_object_create</a> - Initialize a non-persistent memory object.<br>
+<a href="HD_memory_manager.html">host_default_memory_manager</a> - Register/Lookup the host's default pager.<br>
+</p>
+</blockquote>
+
+</blockquote>
+
+<h3>Process Management Interface</h3>
+<blockquote>
+
+<h4>Task Interface</h4>
+<blockquote>
+<p>
+<a href="mach_ports_lookup.html">mach_ports_lookup</a> - Provide caller with an array of the target task's well-known ports.<br>
+<a href="mach_ports_register.html">mach_ports_register</a> - Register an array of well-known ports on behalf of the target task.<br>
+<a href="mach_task_self.html">mach_task_self</a> - Return a send right to the caller's task_self port.<br>
+<a href="task_create.html">task_create</a> - Create a new task.<br>
+<a href="task_get_emulation_vector.html">task_get_emulation_vector</a> - Return an array identifying the target task's user-level system call handlers.<br>
+<a href="task_get_exception_ports.html">task_get_exception_ports</a> - Return send rights to the target task's exception ports.<br>
+<a href="task_get_special_port.html">task_get_special_port</a> - Return a send write to the indicated special port.<br>
+<a href="task_info.html">task_info</a> - Return per-task information according to specified flavor.<br>
+<a href="task_resume.html">task_resume</a> - Decrement the target task's suspend count.<br>
+<a href="task_sample.html">task_sample</a> - Sample the target task's thread program counters periodically.<br>
+<a href="task_set_emulation.html">task_set_emulation</a> - Establish a user-level handler for a system call.<br>
+<a href="task_set_emulation_vector.html">task_set_emulation_vector</a> - Establish the target task's user-level system call handlers.<br>
+<a href="task_set_exception_ports.html">task_set_exception_ports</a> - Set target task's exception ports.<br>
+<a href="task_set_info.html">task_set_info</a> - Set task-specific information state.<br>
+<a href="task_set_port_space.html">task_set_port_space</a> - Set the size of the target task's port name space table.<br>
+<a href="task_set_special_port.html">task_set_special_port</a> - Set the indicated special port.<br>
+<a href="task_suspend.html">task_suspend</a> - Suspend the target task.<br>
+<a href="task_swap_exception_ports.html">task_swap_exception_ports</a> - Set target task's exception ports, returning the previous exception ports.<br>
+<a href="task_terminate.html">task_terminate</a> - Terminate the target task and deallocate its resources.<br>
+<a href="task_threads.html">task_threads</a> - Return the target task's list of threads.<br>
+</p>
+Task Data Structures
+<p>
+<a href="task_basic_info.html">task_basic_info</a> - Defines basic information for a task.<br>
+<a href="task_thread_times_info.html">task_thread_times_info</a> - Defines thread execution times information for tasks.<br>
+</p>
+</blockquote>
+
+<h4>Thread Interface</h4>
+<blockquote>
+<p>
+<a href="mach_thread_self.html">mach_thread_self</a> - Returns the thread self port.<br>
+<a href="thread_abort.html">thread_abort</a> - Abort a thread.<br>
+<a href="thread_abort_safely.html">thread_abort_safely</a> - Abort a thread, restartably.<br>
+<a href="thread_create.html">thread_create</a> - Create a thread within a task.<br>
+<a href="thread_create_running.html">thread_create_running</a> - Optimized creation of a running thread.<br>
+<a href="thread_depress_abort.html">thread_depress_abort</a> - Cancel thread scheduling depression.<br>
+<a href="thread_get_exception_ports.html">thread_get_exception_ports</a> - Return a send right to an exception port.<br>
+<a href="thread_get_special_port.html">thread_get_special_port</a> - Return a send right to the caller-specified special port.<br>
+<a href="thread_get_state.html">thread_get_state</a> - Return the execution state for a thread.<br>
+<a href="thread_info.html">thread_info</a> - Return information about a thread.<br>
+<a href="thread_resume.html">thread_resume</a> - Resume a thread.<br>
+<a href="thread_sample.html">thread_sample</a> - Perform periodic PC sampling for a thread.<br>
+<a href="thread_set_exception_ports.html">thread_set_exception_ports</a> - Set exception ports for a thread.<br>
+<a href="thread_set_special_port.html">thread_set_special_port</a> - Set caller-specified special port belonging to the target thread.<br>
+<a href="thread_set_state.html">thread_set_state</a> - Set the target thread's user-mode execution state.<br>
+<a href="thread_suspend.html">thread_suspend</a> - Suspend a thread.<br>
+<a href="TS_exception_ports.html">thread_swap_exception_ports</a> - Swap exception ports for a thread.<br>
+<a href="thread_terminate.html">thread_terminate</a> - Destroy a thread.<br>
+<a href="thread_wire.html">thread_wire</a> - Mark the thread as privileged with respect to kernel resources.<br>
+</p>
+Thread Data Structures
+<p>
+<a href="thread_basic_info.html">thread_basic_info</a> - Defines basic information for a thread.<br>
+</p>
+Thread Exception Callbacks
+<p>
+<a href="catch_exception_raise.html">catch_exception_raise</a> - Handles the occurrence of an exception within a thread.<br>
+</p>
+Thread Exception Callback Server Helpers
+<p>
+<a href="exc_server.html">exc_server</a> - Handle kernel-reported thread exception.<br>
+</p>
+</blockquote>
+
+<h4>Scheduling Interface</h4>
+<blockquote>
+<p>
+<a href="task_policy.html">task_policy</a> - Set target task's default scheduling policy state.<br>
+<a href="task_set_policy.html">task_set_policy</a> - Set target task's default scheduling policy state.<br>
+<a href="thread_policy.html">thread_policy</a> - Set target thread's scheduling policy state.<br>
+<a href="thread_set_policy.html">thread_set_policy</a> - Set target thread's scheduling policy state.<br>
+<a href="thread_switch.html">thread_switch</a> - Cause context switch with options.<br>
+</p>
+Scheduling Data Structures
+<p>
+<a href="policy_fifo_info.html">policy_fifo_info</a> - Specifies information associated with the system's First-In-First-Out scheduling policy.<br>
+<a href="policy_rr_info.html">policy_rr_info</a> - Specifies information associated with the system's Round Robin scheduling policy.<br>
+<a href="policy_timeshare_info.html">policy_timeshare_info</a> - Specifies information associated with the system's Timeshare scheduling policy.<br>
+</p>
+</blockquote>
+</blockquote>
+
+<h3>System Management Interface</h3>
+<blockquote>
+
+<h4>Host Interface</h4>
+<blockquote>
+<p>
+<a href="host_get_clock_service.html">host_get_clock_service</a> - Return a send right to a kernel clock's service port.<br>
+<a href="host_get_time.html">host_get_time</a> - Returns the current time as seen by that host.<br>
+<a href="host_info.html">host_info</a> - Return information about a host.<br>
+<a href="host_kernel_version.html">host_kernel_version</a> - Return kernel version information for a host.<br>
+<a href="host_statistics.html">host_statistics</a> - Return statistics for a host.<br>
+<a href="mach_host_self.html">mach_host_self</a> - Returns send rights to the task's host self port.<br>
+</p>
+Data Structures
+<p>
+<a href="host_basic_info.html">host_basic_info</a> - Used to present basic information about a host.<br>
+<a href="host_load_info.html">host_load_info</a> - Used to present a host's processor load information.<br>
+<a href="host_sched_info.html">host_sched_info</a> - - Used to present the set of scheduler limits associated with the host.<br>
+<a href="kernel_resource_sizes.html">kernel_resource_sizes</a> - Used to present the sizes of kernel's major structures.<br>
+</p>
+</blockquote>
+
+<h4>Host Control Interface</h4>
+<blockquote>
+<p>
+<a href="host_adjust_time.html">host_adjust_time</a> - Arranges for the time on a specified host to be gradually changed by an adjustment value.<br>
+<a href="HD_memory_manager.html">host_default_memory_manager</a> - Set the default memory manager.<br>
+<a href="host_get_boot_info.html">host_get_boot_info</a> - Return operator boot information.<br>
+<a href="host_get_clock_control.html">host_get_clock_control</a> - Return a send right to a kernel clock's control port.<br>
+<a href="host_processor_slots.html">host_processor_slots</a> - Return a list of numbers that map processor slots to active processors.<br>
+<a href="host_processors.html">host_processors</a> - Return a list of send rights representing all processor ports.<br>
+<a href="host_reboot.html">host_reboot</a> - Reboot this host.<br>
+<a href="host_set_time.html">host_set_time</a> - Establishes the time on the specified host.<br>
+</p>
+</blockquote>
+
+<h4>Host Security Interface</h4>
+<blockquote>
+<p>
+<a href="host_security_create_task_token.html">host_security_create_task_token</a> - Create a new task with an explicit security token.<br>
+<a href="host_security_set_task_token.html">host_security_set_task_token</a> - Change the target task's security token.<br>
+</p>
+</blockquote>
+
+<h4>Resource Accounting Interface</h4>
+<blockquote>
+<i>
+The Mach resource accounting mechanism is not functional in the current Mac OS X/Darwin system. It will become functional in a future release.
+</i>
+<p>
+<a href="ledger_create.html">ledger_create</a> - Create a subordinate ledger.<br>
+<a href="ledger_read.html">ledger_read</a> - Return the ledger limit and balance.<br>
+<a href="ledger_terminate.html">ledger_terminate</a> - Destroy a ledger.<br>
+<a href="ledger_transfer.html">ledger_transfer</a> - Transfer resources from a parent ledger to a child.<br>
+</p>
+</blockquote>
+
+<h4>Processor Management Interface</h4>
+<blockquote>
+<p>
+<a href="processor_control.html">processor_control</a> - Perform caller-specified operation on target processor.<br>
+<a href="processor_exit.html">processor_exit</a> - Exit a processor.<br>
+<a href="processor_info.html">processor_info</a> - Return information about a processor.<br>
+<a href="processor_start.html">processor_start</a> - Start a processor.<br>
+</p>
+Processor Data Structures
+<p>
+<a href="processor_basic_info.html">processor_basic_info</a> - Defines the basic information about a processor.<br>
+</p>
+</blockquote>
+
+<h4>Processor Set Interface</h4>
+<blockquote>
+<i>
+The processor set interface allows for the grouping of tasks and
+processors for the purpose of exclusive scheduling. These interface
+are <b>deprecated</b> and should not be used in code that isn't tied
+to a particular release of Mac OS X/Darwin. These will likely change
+or disappear in a future release.
+</i>
+<p>
+<a href="host_processor_sets.html">host_processor_sets</a> - Return a list of send rights representing all processor set name ports.<br>
+<a href="host_processor_set_priv.html">host_processor_set_priv</a> - Translate a processor set name port into a processor set control port.<br>
+<a href="processor_assign.html">processor_assign</a> - Assign a processor to a processor set.<br>
+<a href="processor_get_assignment.html">processor_get_assignment</a> - Get current assignment for a processor.<br>
+<a href="processor_set_create.html">processor_set_create</a> - Create a new processor set.<br>
+<a href="processor_set_default.html">processor_set_default</a> - Return the default processor set.<br>
+<a href="processor_set_destroy.html">processor_set_destroy</a> - Destroy the target processor set.<br>
+<a href="processor_set_info.html">processor_set_info</a> - Return processor set state according to caller-specified flavor.<br>
+<a href="processor_set_max_priority.html">processor_set_max_priority</a> - Sets the maximum scheduling priority for a processor set.<br>
+<a href="P_set_policy_control.html">processor_set_policy_control</a> - Set target processor set's scheduling policy state.<br>
+<a href="P_set_policy_disable.html">processor_set_policy_disable</a> - Enables a scheduling policy for a processor set.<br>
+<a href="P_set_policy_enable.html">processor_set_policy_enable</a> - Enables a scheduling policy for a processor set.<br>
+<a href="processor_set_statistics.html">processor_set_statistics</a> - Return scheduling statistics for a processor set.<br>
+<a href="processor_set_tasks.html">processor_set_tasks</a> - Return all tasks currently assigned to the target processor set.<br>
+<a href="processor_set_threads.html">processor_set_threads</a> - Return all threads currently assigned to the target processor set.<br>
+<a href="task_assign.html">task_assign</a> - Assign a task to a processor set.<br>
+<a href="task_assign_default.html">task_assign_default</a> - Assign a task to the default processor set.<br>
+<a href="task_get_assignment.html">task_get_assignment</a> - Create a new task with an explicit security token.<br>
+<a href="thread_assign.html">thread_assign</a> - Assign a thread to a processor set.<br>
+<a href="thread_assign_default.html">thread_assign_default</a> - Assign a thread to the default processor set.<br>
+<a href="thread_get_assignment.html">thread_get_assignment</a> - Return the processor set to which a thread is assigned.<br>
+</p>
+Processor Set Data Structures
+<p>
+<a href="processor_set_basic_info.html">processor_set_basic_info</a> - Defines the basic information about a processor set.<br>
+<a href="processor_set_load_info.html">processor_set_load_info</a> - Defines the scheduling statistics for a processor set.<br>
+</p>
+</blockquote>
+
+<h4>Clock Interface</h4>
+<blockquote>
+<p>
+<a href="clock_alarm.html">clock_alarm</a> - Set up an alarm.<br>
+<a href="clock_get_attributes.html">clock_get_attributes</a> - Return attributes of a clock.<br>
+<a href="clock_get_time.html">clock_get_time</a> - Return the current time.<br>
+<a href="clock_map_time.html">clock_map_time</a> - Return a memory object that maps a clock.<br>
+<a href="clock_set_attributes.html">clock_set_attributes</a> - Set a particular clock's attributes.<br>
+<a href="clock_set_time.html">clock_set_time</a> - Set the current time.<br>
+<a href="clock_sleep.html">clock_sleep</a> - Delay the invoking thread until a specified time.<br>
+</p>
+Clock Data Structures
+<p>
+<a href="mapped_tvalspec.html">mapped_tvalspec</a> - Specifies the format the kernel uses to maintain a mapped clock's time.<br>
+<a href="tvalspec.html">tvalspec</a> - Defines format of system time values.<br>
+</p>
+Clock Interface Callbacks
+<p>
+<a href="clock_alarm_reply.html">clock_alarm_reply</a> - Ring a preset alarm.<br>
+</p>
+Clock Callback Server Helpers
+<p>
+<a href="clock_reply_server.html"> clock_reply_server</a> - Handle kernel-generated alarm.<br>
+</p>
+</blockquote>
+
+<h4>Multi-Computer Support Interface</h4>
+<blockquote>
+<i>
+These multi-computer support interfaces are no longer supported by
+the Mac OS X/Darwin kernel. If and when multi-computer support is
+added back in, something like these will likely be added.
+</i>
+<p>
+<a href="host_page_size.html">host_page_size</a> - Returns the page size for the given host.<br>
+<a href="ledger_get_remote.html">ledger_get_remote</a> - Return send right to specified host's remote ledger port.<br>
+<a href="ledger_set_remote.html">ledger_set_remote</a> - Set this host's remote ledger port.<br>
+</p>
+</blockquote>
+
+</blockquote>
+
+<h3>Machine Specific Interface</h3>
+<blockquote>
+
+<h4>Intel 386 Support</h4>
+<blockquote>
+<p>
+<a href="i386_get_ldt.html">i386_get_ldt</a> - Returns per-thread segment descriptors from the local descriptor table (LDT).<br>
+<a href="i386_io_port_add.html">i386_io_port_add</a> - Adds a device to the I/O permission bitmap for a thread. <br>
+<a href="i386_io_port_list.html">i386_io_port_list</a> - Returns a list of the devices named in the thread's I/O permission bitmap.<br>
+<a href="i386_io_port_remove.html">i386_io_port_remove</a> - Removes the specified device from the thread's I/O permission bitmap.<br>
+<a href="i386_set_ldt.html">i386_set_ldt</a> - Allows a thread to have a private local descriptor table (LDT).<br>
+</p>
+</blockquote>
+
+<h4>PowerPC Support</h4>
+<blockquote>
+<p>
+</p>
+</blockquote>
+
+</blockquote>
+
+</BODY>
+
+</HTML>
+
#include <kern/thread.h>
#if defined(__arm64__)
#include <pexpert/arm64/board_config.h>
+#if XNU_MONITOR
+#include <arm64/ppl/tests/shart.h>
+#endif
#endif
extern ledger_template_t task_ledger_template;
kern_return_t
test_pmap_iommu_disconnect(void)
{
+#if XNU_MONITOR
+ kern_return_t kr = KERN_SUCCESS;
+ pmap_t new_pmap = pmap_create_wrapper(0);
+
+ vm_page_t m = vm_page_grab();
+
+ vm_page_lock_queues();
+ if (m != VM_PAGE_NULL) {
+ vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
+ }
+ vm_page_unlock_queues();
+
+ shart_ppl *iommu = NULL;
+ kr = pmap_iommu_init(shart_get_desc(), "sharttest0", NULL, 0, (ppl_iommu_state**)(&iommu));
+
+ if (kr != KERN_SUCCESS) {
+ goto cleanup;
+ }
+
+ if ((new_pmap == NULL) || (m == VM_PAGE_NULL) || (iommu == NULL)) {
+ kr = KERN_FAILURE;
+ goto cleanup;
+ }
+
+ ppnum_t phys_page = VM_PAGE_GET_PHYS_PAGE(m);
+
+ const ppl_iommu_seg shart_segs[] = {
+ {.iova = 0,
+ .paddr = ptoa(phys_page),
+ .nbytes = PAGE_SIZE,
+ .prot = VM_PROT_READ,
+ .refcon = 0},
+
+ {.iova = 1,
+ .paddr = ptoa(phys_page),
+ .nbytes = PAGE_SIZE,
+ .prot = VM_PROT_READ | VM_PROT_WRITE,
+ .refcon = 0},
+
+ {.iova = 2,
+ .paddr = ptoa(phys_page),
+ .nbytes = PAGE_SIZE,
+ .prot = VM_PROT_READ,
+ .refcon = 0},
+
+ {.iova = 3,
+ .paddr = ptoa(phys_page),
+ .nbytes = PAGE_SIZE,
+ .prot = VM_PROT_READ,
+ .refcon = 0}
+ };
+
+ /* Phase 1: one CPU mapping */
+ kr = pmap_enter(new_pmap, PMAP_TEST_VA, phys_page, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, FALSE);
+ assert(kr == KERN_SUCCESS);
+ assert(!pmap_verify_free(phys_page));
+ pmap_disconnect(phys_page);
+ assert(pmap_verify_free(phys_page));
+
+ /* Phase 2: two CPU mappings */
+ kr = pmap_enter(new_pmap, PMAP_TEST_VA, phys_page, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, FALSE);
+ assert(kr == KERN_SUCCESS);
+ kr = pmap_enter(new_pmap, PMAP_TEST_VA + PAGE_SIZE, phys_page, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, FALSE);
+ assert(kr == KERN_SUCCESS);
+ assert(!pmap_verify_free(phys_page));
+ pmap_disconnect(phys_page);
+ assert(pmap_verify_free(phys_page));
+
+ /* Phase 3: one IOMMU mapping */
+ kr = pmap_iommu_map(&iommu->super, shart_segs, 1, 0, NULL);
+ assert(kr == KERN_SUCCESS);
+ assert(!pmap_verify_free(phys_page));
+ pmap_disconnect(phys_page);
+ assert(!pmap_verify_free(phys_page));
+ pmap_iommu_unmap(&iommu->super, shart_segs, 1, 0, NULL);
+ assert(pmap_verify_free(phys_page));
+
+ /* Phase 4: two IOMMU mappings */
+ kr = pmap_iommu_map(&iommu->super, shart_segs, 2, 0, NULL);
+ assert(kr == KERN_SUCCESS);
+ assert(!pmap_verify_free(phys_page));
+ pmap_disconnect(phys_page);
+ assert(!pmap_verify_free(phys_page));
+ pmap_iommu_unmap(&iommu->super, &shart_segs[1], 1, 0, NULL);
+ assert(!pmap_verify_free(phys_page));
+ pmap_disconnect(phys_page);
+ assert(!pmap_verify_free(phys_page));
+ pmap_iommu_unmap(&iommu->super, shart_segs, 1, 0, NULL);
+ assert(pmap_verify_free(phys_page));
+
+ /* Phase 5: combined CPU and IOMMU mappings */
+ kr = pmap_iommu_map(&iommu->super, shart_segs, 1, 0, NULL);
+ assert(kr == KERN_SUCCESS);
+ kr = pmap_enter(new_pmap, PMAP_TEST_VA, phys_page, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, FALSE);
+ assert(kr == KERN_SUCCESS);
+ kr = pmap_iommu_map(&iommu->super, &shart_segs[1], 2, 0, NULL);
+ assert(kr == KERN_SUCCESS);
+ kr = pmap_enter(new_pmap, PMAP_TEST_VA + PAGE_SIZE, phys_page, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, FALSE);
+ assert(kr == KERN_SUCCESS);
+ kr = pmap_iommu_map(&iommu->super, &shart_segs[3], 1, 0, NULL);
+ assert(kr == KERN_SUCCESS);
+ assert(!pmap_verify_free(phys_page));
+ pmap_disconnect(phys_page);
+ assert(!pmap_verify_free(phys_page));
+ pmap_iommu_unmap(&iommu->super, shart_segs, 4, 0, NULL);
+ assert(pmap_verify_free(phys_page));
+
+ /* Phase 6: differently combined CPU and IOMMU mappings */
+ kr = pmap_enter(new_pmap, PMAP_TEST_VA, phys_page, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, FALSE);
+ assert(kr == KERN_SUCCESS);
+ kr = pmap_iommu_map(&iommu->super, &shart_segs[1], 3, 0, NULL);
+ assert(kr == KERN_SUCCESS);
+ kr = pmap_enter(new_pmap, PMAP_TEST_VA + PAGE_SIZE, phys_page, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, FALSE);
+ assert(kr == KERN_SUCCESS);
+ kr = pmap_iommu_map(&iommu->super, shart_segs, 1, 0, NULL);
+ assert(kr == KERN_SUCCESS);
+ kr = pmap_enter(new_pmap, PMAP_TEST_VA + (2 * PAGE_SIZE), phys_page, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, FALSE);
+ assert(kr == KERN_SUCCESS);
+ assert(!pmap_verify_free(phys_page));
+ pmap_iommu_unmap(&iommu->super, &shart_segs[2], 1, 0, NULL);
+ assert(!pmap_verify_free(phys_page));
+ pmap_disconnect(phys_page);
+ assert(!pmap_verify_free(phys_page));
+ pmap_iommu_unmap(&iommu->super, shart_segs, 4, 0, NULL);
+ assert(pmap_verify_free(phys_page));
+ pmap_disconnect(phys_page);
+ assert(pmap_verify_free(phys_page));
+
+cleanup:
+
+ if (iommu != NULL) {
+ pmap_iommu_ioctl(&iommu->super, SHART_IOCTL_TEARDOWN, NULL, 0, NULL, 0);
+ }
+ vm_page_lock_queues();
+ if (m != VM_PAGE_NULL) {
+ vm_page_free(m);
+ }
+ vm_page_unlock_queues();
+ if (new_pmap != NULL) {
+ pmap_destroy(new_pmap);
+ }
+
+ return kr;
+#else
return KERN_SUCCESS;
+#endif
}
kern_return_t
if (!copy) {
if (src_entry->used_for_jit == TRUE) {
if (same_map) {
+#if __APRR_SUPPORTED__
+ /*
+ * Disallow re-mapping of any JIT regions on APRR devices.
+ */
+ result = KERN_PROTECTION_FAILURE;
+ break;
+#endif /* __APRR_SUPPORTED__*/
} else {
#if CONFIG_EMBEDDED
/*
#endif
+#if __AMP__
+int vm_compressor_ebound = 1;
+int vm_pgo_pbound = 0;
+extern void thread_bind_cluster_type(char);
+#endif /* __AMP__ */
/*
KERNEL_DEBUG(0xe040000c | DBG_FUNC_END, 0, 0, 0, 0, 0);
q = cq->q;
+#if __AMP__
+ if (vm_compressor_ebound && (vm_pageout_state.vm_compressor_thread_count > 1)) {
+ local_batch_size = (q->pgo_maxlaundry >> 3);
+ local_batch_size = MAX(local_batch_size, 16);
+ } else {
+ local_batch_size = q->pgo_maxlaundry / (vm_pageout_state.vm_compressor_thread_count * 2);
+ }
+#else
local_batch_size = q->pgo_maxlaundry / (vm_pageout_state.vm_compressor_thread_count * 2);
+#endif
#if RECORD_THE_COMPRESSED_DATA
if (q->pgo_laundry) {
}
+#if __AMP__
+ if (vm_compressor_ebound) {
+ thread_bind_cluster_type('E');
+ }
+#endif /* __AMP__ */
thread_set_thread_name(current_thread(), "VM_compressor");
#if DEVELOPMENT || DEBUG
+#if __AMP__
+ PE_parse_boot_argn("vmpgo_pcluster", &vm_pgo_pbound, sizeof(vm_pgo_pbound));
+ if (vm_pgo_pbound) {
+ thread_bind_cluster_type('P');
+ }
+#endif /* __AMP__ */
splx(s);
PE_parse_boot_argn("vmcomp_threads", &vm_pageout_state.vm_compressor_thread_count,
sizeof(vm_pageout_state.vm_compressor_thread_count));
+#if __AMP__
+ PE_parse_boot_argn("vmcomp_ecluster", &vm_compressor_ebound, sizeof(vm_compressor_ebound));
+ if (vm_compressor_ebound) {
+ vm_pageout_state.vm_compressor_thread_count = 2;
+ }
+#endif
if (vm_pageout_state.vm_compressor_thread_count >= hinfo.max_cpus) {
vm_pageout_state.vm_compressor_thread_count = hinfo.max_cpus - 1;
}
static struct tbd_ops t8015_funcs = {NULL, NULL, NULL};
#endif /* defined(ARM_BOARD_CLASS_T8015) */
+#if defined(ARM_BOARD_CLASS_T8020)
+static struct tbd_ops t8020_funcs = {NULL, NULL, NULL};
+#endif /* defined(ARM_BOARD_CLASS_T8020) */
+#if defined(ARM_BOARD_CLASS_T8006)
+static struct tbd_ops t8006_funcs = {NULL, NULL, NULL};
+#endif /* defined(ARM_BOARD_CLASS_T8006) */
+#if defined(ARM_BOARD_CLASS_T8027)
+static struct tbd_ops t8027_funcs = {NULL, NULL, NULL};
+#endif /* defined(ARM_BOARD_CLASS_T8027) */
+#if defined(ARM_BOARD_CLASS_T8028)
+static struct tbd_ops t8028_funcs = {NULL, NULL, NULL};
+#endif /* defined(ARM_BOARD_CLASS_T8028) */
+#if defined(ARM_BOARD_CLASS_T8030)
+static struct tbd_ops t8030_funcs = {NULL, NULL, NULL};
+#endif /* defined(ARM_BOARD_CLASS_T8030) */
tbd_funcs = &t8015_funcs;
} else
#endif
+#if defined(ARM_BOARD_CLASS_T8020)
+ if (!strcmp(gPESoCDeviceType, "t8020-io")) {
+ tbd_funcs = &t8020_funcs;
+ } else
+#endif
+#if defined(ARM_BOARD_CLASS_T8006)
+ if (!strcmp(gPESoCDeviceType, "t8006-io")) {
+ tbd_funcs = &t8006_funcs;
+ } else
+#endif
+#if defined(ARM_BOARD_CLASS_T8027)
+ if (!strcmp(gPESoCDeviceType, "t8027-io")) {
+ tbd_funcs = &t8027_funcs;
+ } else
+#endif
+#if defined(ARM_BOARD_CLASS_T8028)
+ if (!strcmp(gPESoCDeviceType, "t8028-io")) {
+ tbd_funcs = &t8028_funcs;
+ } else
+#endif
+#if defined(ARM_BOARD_CLASS_T8030)
+ if (!strcmp(gPESoCDeviceType, "t8030-io")) {
+ tbd_funcs = &t8030_funcs;
+ } else
+#endif
#if defined(ARM_BOARD_CLASS_BCM2837)
if (!strcmp(gPESoCDeviceType, "bcm2837-io")) {
tbd_funcs = &bcm2837_funcs;
#endif /* defined (HAS_KTRR) */
+#if defined(HAS_CTRR)
+#ifdef ASSEMBLER
+#define ARM64_REG_CTRR_A_LWR_EL1 S3_4_c15_c2_3
+#define ARM64_REG_CTRR_A_UPR_EL1 S3_4_c15_c2_4
+#define ARM64_REG_CTRR_CTL_EL1 S3_4_c15_c2_5
+#define ARM64_REG_CTRR_LOCK_EL1 S3_4_c15_c2_2
+
+#define ACC_CTRR_A_LWR_EL2 S3_4_c15_c11_0
+#define ACC_CTRR_A_UPR_EL2 S3_4_c15_c11_1
+#define ACC_CTRR_CTL_EL2 S3_4_c15_c11_4
+#define ACC_CTRR_LOCK_EL2 S3_4_c15_c11_5
+#else /* ASSEMBLER */
+#define ARM64_REG_CTRR_A_LWR_EL1 "S3_4_c15_c2_3"
+#define ARM64_REG_CTRR_A_UPR_EL1 "S3_4_c15_c2_4"
+#define ARM64_REG_CTRR_CTL_EL1 "S3_4_c15_c2_5"
+#define ARM64_REG_CTRR_LOCK_EL1 "S3_4_c15_c2_2"
+
+#define ACC_CTRR_A_LWR_EL2 "S3_4_c15_c11_0"
+#define ACC_CTRR_A_UPR_EL2 "S3_4_c15_c11_1"
+#define ACC_CTRR_CTL_EL2 "S3_4_c15_c11_4"
+#define ACC_CTRR_LOCK_EL2 "S3_4_c15_c11_5"
+#endif /* ASSEMBLER */
+
+#define CTRR_CTL_EL1_A_MMUOFF_WRPROTECT (1 << 0)
+#define CTRR_CTL_EL1_A_MMUON_WRPROTECT (1 << 1)
+#define CTRR_CTL_EL1_B_MMUOFF_WRPROTECT (1 << 2)
+#define CTRR_CTL_EL1_B_MMUON_WRPROTECT (1 << 3)
+#define CTRR_CTL_EL1_A_PXN (1 << 4)
+#define CTRR_CTL_EL1_B_PXN (1 << 5)
+#define CTRR_CTL_EL1_A_UXN (1 << 6)
+#define CTRR_CTL_EL1_B_UXN (1 << 7)
+
+#endif /* defined (HAS_CTRR) */
+
+#if defined(HAS_IPI)
+
+#define ARM64_REG_IPI_RR_TYPE_IMMEDIATE (0 << 28)
+#define ARM64_REG_IPI_RR_TYPE_RETRACT (1 << 28)
+#define ARM64_REG_IPI_RR_TYPE_DEFERRED (2 << 28)
+#define ARM64_REG_IPI_RR_TYPE_NOWAKE (3 << 28)
+
+#if defined(HAS_CLUSTER)
+#define ARM64_REG_IPI_RR_LOCAL "S3_5_c15_c0_0"
+#define ARM64_REG_IPI_RR_GLOBAL "S3_5_c15_c0_1"
+#else /* defined(HAS_CLUSTER) */
+#define ARM64_REG_IPI_RR "S3_5_c15_c0_1"
+#endif /* defined(HAS_CLUSTER) */
+
+#define ARM64_REG_IPI_SR "S3_5_c15_c1_1"
+#define ARM64_REG_IPI_CR "S3_5_c15_c3_1"
+
+#endif /* defined(HAS_IPI) */
#endif /* APPLE_ARM64_ARCH_FAMILY */
+#if defined(HAS_NEX_PG)
+#define ARM64_REG_HID13 S3_0_c15_c14_0
+#define ARM64_REG_HID13_RstCyc_mask (0xfULL << 60)
+#define ARM64_REG_HID13_RstCyc_val (0xcULL << 60)
+
+#define ARM64_REG_HID14 S3_0_c15_c15_0
+#define ARM64_REG_HID14_NexPwgEn (1ULL << 32)
+#endif /* defined(HAS_NEX_PG) */
+#if defined(HAS_BP_RET)
+#define ARM64_REG_ACC_CFG S3_5_c15_c4_0
+#define ARM64_REG_ACC_CFG_bdpSlpEn (1ULL << 2)
+#define ARM64_REG_ACC_CFG_btpSlpEn (1ULL << 3)
+#define ARM64_REG_ACC_CFG_bpSlp_mask 3
+#define ARM64_REG_ACC_CFG_bpSlp_shift 2
+#endif /* defined(HAS_BP_RET) */
#if defined(HAS_APPLE_PAC)
#endif /* ASSEMBLER */
#endif /* HAS_APPLE_PAC */
+#if defined(HAS_VMSA_LOCK)
+
+#define ARM64_REG_VMSA_LOCK_EL1 S3_4_c15_c1_2
+
+#define VMSA_LOCK_VBAR_EL1 (1ULL << 0)
+#define VMSA_LOCK_SCTLR_EL1 (1ULL << 1)
+#define VMSA_LOCK_TCR_EL1 (1ULL << 2)
+#define VMSA_LOCK_TTBR0_EL1 (1ULL << 3)
+#define VMSA_LOCK_TTBR1_EL1 (1ULL << 4)
+#define VMSA_LOCK_SCTLR_M_BIT (1ULL << 63)
+
+#endif /* HAS_VMSA_LOCK */
#endif
#endif /* ARM64_BOARD_CONFIG_T8015 */
+#ifdef ARM64_BOARD_CONFIG_T8020
+/*
+ * The LLC size for Vortex is 8MB, but the LLC on Tempest is only 2MB.
+ * We use the larger cache size here. The expectation is
+ * that this may cause flushes from Tempest to be less efficient
+ * (cycles will be wasted on unnecessary way/set operations), but it
+ * will be technically correct... the best kind of correct.
+ */
+#define APPLE_ARM64_ARCH_FAMILY 1
+#define APPLEVORTEX
+#define ARM_ARCH_TIMER
+#define KERNEL_INTEGRITY_CTRR
+#include <pexpert/arm64/T8020.h>
+#define __ARM_L2CACHE_SIZE_LOG__ 23
+#define ARM_BOARD_WFE_TIMEOUT_NS 1000
+#define ARM_BOARD_CLASS_T8020
+#define CPU_COUNT 6
+#define CPU_CLUSTER_OFFSETS {0, 4}
+#define HAS_UNCORE_CTRS 1
+#define UNCORE_VERSION 2
+#define UNCORE_PER_CLUSTER 1
+#define UNCORE_NCTRS 16
+#define CORE_NCTRS 10
+#define PMAP_PV_LOAD_FACTOR 5
+#define PMAP_CS 1
+#define PMAP_CS_ENABLE 1
+#endif /* ARM64_BOARD_CONFIG_T8020 */
+#ifdef ARM64_BOARD_CONFIG_T8006
+/*
+ * The T8006 consists of 2 Tempest cores (i.e. T8020 eCores) and for most
+ * of our purposes here may be considered a functional subset of T8020.
+ */
+#define APPLE_ARM64_ARCH_FAMILY 1
+#define APPLEVORTEX
+#define ARM_ARCH_TIMER
+#define KERNEL_INTEGRITY_CTRR
+#include <pexpert/arm64/T8020.h>
+#define __ARM_L2CACHE_SIZE_LOG__ 21
+#define ARM_BOARD_WFE_TIMEOUT_NS 1000
+#define ARM_BOARD_CLASS_T8006
+#define PEXPERT_NO_3X_IMAGES 1
+#define CORE_NCTRS 10
+#define PMAP_PV_LOAD_FACTOR 5
+#define PMAP_CS 1
+#define PMAP_CS_ENABLE 1
+#endif /* ARM64_BOARD_CONFIG_T8006 */
+#ifdef ARM64_BOARD_CONFIG_T8027
+#define APPLE_ARM64_ARCH_FAMILY 1
+#define APPLEVORTEX
+#define ARM_ARCH_TIMER
+#define KERNEL_INTEGRITY_CTRR
+#include <pexpert/arm64/T8020.h>
+#define __ARM_L2CACHE_SIZE_LOG__ 23
+#define ARM_BOARD_WFE_TIMEOUT_NS 1000
+#define ARM_BOARD_CLASS_T8027
+#define CPU_COUNT 8
+#define CPU_CLUSTER_OFFSETS {0, 4}
+#define HAS_UNCORE_CTRS 1
+#define UNCORE_VERSION 2
+#define UNCORE_PER_CLUSTER 1
+#define UNCORE_NCTRS 16
+#define CORE_NCTRS 10
+#define PMAP_PV_LOAD_FACTOR 5
+#define PMAP_CS 1
+#define PMAP_CS_ENABLE 1
+#endif /* ARM64_BOARD_CONFIG_T8027 */
+#ifdef ARM64_BOARD_CONFIG_T8028
+#define APPLE_ARM64_ARCH_FAMILY 1
+#define APPLEVORTEX
+#define ARM_ARCH_TIMER
+#define KERNEL_INTEGRITY_CTRR
+#include <pexpert/arm64/T8020.h>
+#define __ARM_L2CACHE_SIZE_LOG__ 23
+#define ARM_BOARD_WFE_TIMEOUT_NS 1000
+#define ARM_BOARD_CLASS_T8028
+#define CPU_COUNT 8
+#define CPU_CLUSTER_OFFSETS {0, 4}
+#define HAS_UNCORE_CTRS 1
+#define UNCORE_VERSION 2
+#define UNCORE_PER_CLUSTER 1
+#define UNCORE_NCTRS 16
+#define CORE_NCTRS 10
+#define PMAP_PV_LOAD_FACTOR 5
+#define PMAP_CS 1
+#define PMAP_CS_ENABLE 1
+#endif /* ARM64_BOARD_CONFIG_T8028 */
+#ifdef ARM64_BOARD_CONFIG_T8030
+/*
+ * The LLC size for Lightning is 8MB, but the LLC on Thunder is only 4MB.
+ * We use the larger cache size here. The expectation is
+ * that this may cause flushes from Tempest to be less efficient
+ * (cycles will be wasted on unnecessary way/set operations), but it
+ * will be technically correct... the best kind of correct.
+ */
+#define APPLE_ARM64_ARCH_FAMILY 1
+#define APPLELIGHTNING
+#define ARM_ARCH_TIMER
+#define KERNEL_INTEGRITY_CTRR
+#include <pexpert/arm64/T8030.h>
+#define __ARM_L2CACHE_SIZE_LOG__ 23
+#define ARM_BOARD_WFE_TIMEOUT_NS 1000
+#define ARM_BOARD_CLASS_T8030
+#define CPU_COUNT 6
+#define CPU_CLUSTER_OFFSETS {0, 4}
+#define CPU_PIO_RO_CTL_OFFSETS {0x210055000, 0x210155000, 0x210255000, 0x210355000, 0x211055000, 0x211155000}
+#define CLUSTER_PIO_RO_CTL_OFFSETS {0x210e49000, 0x211e49000}
+#define HAS_UNCORE_CTRS 1
+#define UNCORE_VERSION 2
+#define UNCORE_PER_CLUSTER 1
+#define UNCORE_NCTRS 16
+#define CORE_NCTRS 10
+#define PMAP_PV_LOAD_FACTOR 7
+#define PMAP_CS 1
+#define PMAP_CS_ENABLE 1
+#endif /* ARM64_BOARD_CONFIG_T8030 */
/* Map the physical aperture */
kasan_map_shadow(kernel_vtop, physmap_vtop - kernel_vtop, true);
-#if defined(KERNEL_INTEGRITY_KTRR)
+#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
/* Pre-allocate all the L3 page table pages to avoid triggering KTRR */
kasan_map_shadow_internal(VM_MIN_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS + 1, false, false);
#endif
int mac_priv_check(kauth_cred_t cred, int priv);
int mac_priv_grant(kauth_cred_t cred, int priv);
int mac_proc_check_debug(proc_t proc1, proc_t proc2);
+int mac_proc_check_dump_core(proc_t proc);
int mac_proc_check_proc_info(proc_t curp, proc_t target, int callnum, int flavor);
int mac_proc_check_get_cs_info(proc_t curp, proc_t target, unsigned int op);
int mac_proc_check_set_cs_info(proc_t curp, proc_t target, unsigned int op);
kauth_cred_t cred,
int priv
);
+/**
+ * @brief Access control over process core dumps
+ * @param proc Subject process
+ *
+ * Determine whether a core dump may be written to disk for the subject
+ * identified.
+ *
+ * @return Return 0 if access is granted, otherwise an appropriate value for
+ * errno should be returned.
+ */
+typedef int mpo_proc_check_dump_core_t(
+ struct proc *proc
+ );
/**
* @brief Access control check for debugging process
* @param cred Subject credential
* Please note that this should be kept in sync with the check assumptions
* policy in bsd/kern/policy_check.c (policy_ops struct).
*/
-#define MAC_POLICY_OPS_VERSION 58 /* inc when new reserved slots are taken */
+#define MAC_POLICY_OPS_VERSION 59 /* inc when new reserved slots are taken */
struct mac_policy_ops {
mpo_audit_check_postselect_t *mpo_audit_check_postselect;
mpo_audit_check_preselect_t *mpo_audit_check_preselect;
mpo_proc_check_setlcid_t *mpo_proc_check_setlcid;
mpo_proc_check_signal_t *mpo_proc_check_signal;
mpo_proc_check_wait_t *mpo_proc_check_wait;
+ mpo_proc_check_dump_core_t *mpo_proc_check_dump_core;
mpo_reserved_hook_t *mpo_reserved5;
- mpo_reserved_hook_t *mpo_reserved6;
mpo_socket_check_accept_t *mpo_socket_check_accept;
mpo_socket_check_accepted_t *mpo_socket_check_accepted;
return error;
}
+int
+mac_proc_check_dump_core(struct proc *proc)
+{
+ int error;
+
+#if SECURITY_MAC_CHECK_ENFORCE
+ /* 21167099 - only check if we allow write */
+ if (!mac_proc_enforce) {
+ return 0;
+ }
+#endif
+ if (!mac_proc_check_enforce(proc)) {
+ return 0;
+ }
+
+ MAC_CHECK(proc_check_dump_core, proc);
+
+ return error;
+}
+
int
mac_proc_check_fork(proc_t curp)
{
atm_diagnostic_flag: OTHER_CFLAGS += drop_priv.c
+atm_diagnostic_flag_entitled: CODE_SIGN_ENTITLEMENTS = atm_diagnostic_flag.entitlements
+atm_diagnostic_flag_entitled: OTHER_CFLAGS += drop_priv.c
+
testposixshm: INVALID_ARCHS = i386
avx: INVALID_ARCHS = i386
}
}
-T_DECL(toggle_atm_diagnostic_flag,
- "change the atm_diagnostic_flag, which should use the commpage",
- T_META_ASROOT(true))
+static void
+_toggle_atm_diagnostic_flag(void)
{
T_ATEND(_reset_atm_diagnostic_flag);
uint32_t f = _save_atm_diagnostic_flag();
"Ignoring host_set_atm_diagnostic_flag functionality. "
"Bailing gracefully.");
}
- T_EXPECT_MACH_SUCCESS(kr, "Set atm_diagnostic_flag");
+ T_EXPECT_MACH_ERROR(KERN_NO_ACCESS, kr,
+ "Deny change to atm_diagnostic_flag");
+}
+
+T_DECL(atm_diagnostic_flag_unentitled_privileged,
+ "expect to fail to set the atm_diagnostic_flag (unentitled, privileged)",
+ T_META_ASROOT(true))
+{
+ _toggle_atm_diagnostic_flag();
}
-T_DECL(unprivileged_atm_diagnostic_flag,
- "expect to fail to set the atm_diagnostic_flag",
+T_DECL(atm_diagnostic_flag_unentitled_unprivileged,
+ "expect to fail to set the atm_diagnostic_flag (unentitled, unprivileged)",
T_META_ASROOT(false))
{
drop_priv();
- T_ATEND(_reset_atm_diagnostic_flag);
- uint32_t f = _save_atm_diagnostic_flag();
- f ^= LIBTRACE_PRIVATE_DATA;
- kern_return_t kr = _mutate_atm_diagnostic_flag(f);
- T_EXPECT_MACH_ERROR(KERN_INVALID_ARGUMENT, kr,
- "Deny change to atm_diagnostic_flag");
+ _toggle_atm_diagnostic_flag();
}
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>com.apple.private.set-atm-diagnostic-flag</key>
+ <true/>
+</dict>
+</plist>
--- /dev/null
+#include <darwintest.h>
+
+#include <mach/mach_error.h>
+#include <mach/mach_host.h>
+
+T_GLOBAL_META(T_META_NAMESPACE("xnu.debugging"));
+
+/*
+ * The low 8 bits may be in use, so modify one
+ * of the upper 8 bits to ensure round-tripping.
+ */
+#define LIBTRACE_PRIVATE_DATA 0x01000000
+
+extern void drop_priv(void);
+
+static bool _needs_reset;
+static uint32_t _original;
+
+static uint32_t
+_save_atm_diagnostic_flag(void)
+{
+ kern_return_t kr;
+ kr = host_get_atm_diagnostic_flag(mach_host_self(), &_original);
+ T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "host_get_atm_diagnostic_flag()");
+ T_LOG("Original ATM diagnostic flag: 0x%08x", _original);
+ return _original;
+}
+
+static kern_return_t
+_mutate_atm_diagnostic_flag(uint32_t v)
+{
+ T_LOG("Try to set ATM diagnostic flag to: 0x%08x", v);
+ kern_return_t kr = host_set_atm_diagnostic_flag(mach_host_self(), v);
+ if (kr == KERN_SUCCESS) {
+ _needs_reset = true;
+ }
+ return kr;
+}
+
+static void
+_reset_atm_diagnostic_flag(void)
+{
+ if (!_needs_reset) {
+ return;
+ }
+ T_LOG("Reset ATM diagnostic flag to: 0x%08x", _original);
+ kern_return_t kr;
+ kr = host_set_atm_diagnostic_flag(mach_host_self(), _original);
+ if (kr != KERN_SUCCESS) {
+ T_ASSERT_FAIL("host_set_atm_diagnostic_flag() failed: %s",
+ mach_error_string(kr));
+ }
+}
+
+static void
+_toggle_atm_diagnostic_flag(void)
+{
+ T_ATEND(_reset_atm_diagnostic_flag);
+ uint32_t f = _save_atm_diagnostic_flag();
+ f ^= LIBTRACE_PRIVATE_DATA;
+ kern_return_t kr = _mutate_atm_diagnostic_flag(f);
+ if (kr == KERN_NOT_SUPPORTED) {
+ T_SKIP("Seems ATM is disabled on this platform. "
+ "Ignoring host_set_atm_diagnostic_flag functionality. "
+ "Bailing gracefully.");
+ }
+ T_EXPECT_MACH_SUCCESS(kr, "Set atm_diagnostic_flag");
+}
+
+T_DECL(atm_diagnostic_flag_entitled_privileged,
+ "change the atm_diagnostic_flag (entitled, privileged)",
+ T_META_ASROOT(true))
+{
+ _toggle_atm_diagnostic_flag();
+}
+
+T_DECL(atm_diagnostic_flag_entitled_unprivileged,
+ "change the atm_diagnostic_flag (entitled, unprivileged)",
+ T_META_ASROOT(false))
+{
+ drop_priv();
+ _toggle_atm_diagnostic_flag();
+}
--- /dev/null
+/*
+ * Must come before including darwintest.h
+ */
+#ifdef T_NAMESPACE
+#undef T_NAMESPACE
+#endif /* defined(T_NAMESPACE) */
+
+#include <darwintest.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#ifndef PRIVATE
+/*
+ * Need new CPU families.
+ */
+#define PRIVATE
+#include <mach/machine.h>
+#undef PRIVATE
+#else /* !defined(PRIVATE) */
+#include <mach/machine.h>
+#endif /* defined(PRIVATE) */
+#include <stdint.h>
+#include <System/sys/guarded.h>
+#include <System/sys/monotonic.h>
+#include <sys/ioctl.h>
+#include <sys/sysctl.h>
+#include <unistd.h>
+
+T_GLOBAL_META(
+ T_META_NAMESPACE("xnu.monotonic"),
+ T_META_CHECK_LEAKS(false),
+ T_META_ENABLED(false)
+ );
+
+static bool
+device_supports_uncore(void)
+{
+ int r;
+ int type, subtype;
+ unsigned int family;
+ size_t size = sizeof(type);
+
+ /*
+ * Only arm64 Monsoon devices support uncore counters.
+ */
+
+ r = sysctlbyname("hw.cputype", &type, &size, NULL, 0);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(r, "sysctlbyname(\"hw.cputype\")");
+ r = sysctlbyname("hw.cpusubtype", &subtype, &size, NULL, 0);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(r, "sysctlbyname(\"hw.cpusubtype\")");
+ r = sysctlbyname("hw.cpufamily", &family, &size, NULL, 0);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(r, "sysctlbyname(\"hw.cpufamily\")");
+
+ if (type == CPU_TYPE_ARM64 &&
+ subtype == CPU_SUBTYPE_ARM64_V8 &&
+ (family == CPUFAMILY_ARM_MONSOON_MISTRAL ||
+ family == CPUFAMILY_ARM_VORTEX_TEMPEST)) {
+ return true;
+ }
+
+ return false;
+}
+
+#define UNCORE_DEV_PATH "/dev/monotonic/uncore"
+
+static int
+open_uncore_error(int *error)
+{
+ guardid_t guard;
+ int fd;
+
+ guard = 0xa5adcafe;
+
+ T_SETUPBEGIN;
+
+ fd = guarded_open_np(UNCORE_DEV_PATH, &guard,
+ GUARD_CLOSE | GUARD_DUP | GUARD_WRITE, O_CLOEXEC | O_EXCL);
+ if (fd < 0 && errno == ENOENT) {
+ T_ASSERT_FALSE(device_supports_uncore(),
+ "lack of dev node implies no uncore support");
+ T_SKIP("uncore counters are unsupported");
+ __builtin_unreachable();
+ }
+
+ if (error == NULL) {
+ T_ASSERT_POSIX_SUCCESS(fd, "open '%s'", UNCORE_DEV_PATH);
+ } else {
+ *error = errno;
+ }
+
+ T_SETUPEND;
+
+ return fd;
+}
+
+static void
+uncore_counts(int fd, uint64_t ctr_mask, uint64_t *counts)
+{
+ int r;
+ union monotonic_ctl_counts *cts_ctl;
+
+ cts_ctl = (union monotonic_ctl_counts *)counts;
+ cts_ctl->in.ctr_mask = ctr_mask;
+
+ r = ioctl(fd, MT_IOC_COUNTS, cts_ctl);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(r, "MT_IOC_COUNTS got counter values");
+}
+
+#define REF_TIMEBASE_EVENT 0x3
+#define CTRS_MAX 32
+
+T_DECL(uncore_max_counters,
+ "ensure that the maximum number of uncore countes is sane",
+ T_META_ASROOT(true))
+{
+ int nctrs = 0;
+ int fd;
+
+ fd = open_uncore_error(NULL);
+
+ do {
+ union monotonic_ctl_add add_ctl;
+ int r;
+
+ add_ctl.in.config.event = REF_TIMEBASE_EVENT;
+ add_ctl.in.config.allowed_ctr_mask = UINT64_MAX;
+
+ r = ioctl(fd, MT_IOC_ADD, &add_ctl);
+ if (r < 0 && errno == E2BIG) {
+ break;
+ }
+
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(r, "added reference timebase event to counters");
+ nctrs++;
+ } while (nctrs < CTRS_MAX);
+
+ T_EXPECT_LT(nctrs, CTRS_MAX,
+ "only able to allocate a reasonable number of counters");
+}
+
+static uint32_t
+uncore_add(int fd, uint64_t event, uint64_t allowed_ctrs, int error)
+{
+ int save_errno;
+ int r;
+ uint32_t ctr;
+ union monotonic_ctl_add add_ctl;
+
+ add_ctl.in.config.event = event;
+ add_ctl.in.config.allowed_ctr_mask = allowed_ctrs;
+ r = ioctl(fd, MT_IOC_ADD, &add_ctl);
+ if (error) {
+ save_errno = errno;
+ T_EXPECT_LT(r, 0, "adding event to counter should fail");
+ T_EXPECT_EQ(save_errno, error,
+ "adding event to counter should fail with %d: %s",
+ error, strerror(error));
+ return UINT32_MAX;
+ } else {
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(r,
+ "added event %#" PRIx64 " to counters", event);
+ }
+
+ ctr = add_ctl.out.ctr;
+ T_QUIET; T_ASSERT_LT(ctr, (uint32_t)CTRS_MAX, "counter returned should be sane");
+ return ctr;
+}
+
+T_DECL(uncore_collision,
+ "ensure that trying to add an event on the same counter fails",
+ T_META_ASROOT(true))
+{
+ int fd;
+ uint32_t ctr;
+
+ fd = open_uncore_error(NULL);
+
+ ctr = uncore_add(fd, REF_TIMEBASE_EVENT, UINT64_MAX, 0);
+ T_LOG("added event to uncore counter %d\n", ctr);
+
+ (void)uncore_add(fd, REF_TIMEBASE_EVENT, UINT64_C(1) << ctr, ENOSPC);
+}
+
+static void
+uncore_enable(int fd)
+{
+ union monotonic_ctl_enable en_ctl = {
+ .in = { .enable = true }
+ };
+
+ T_ASSERT_POSIX_SUCCESS(ioctl(fd, MT_IOC_ENABLE, &en_ctl),
+ "enabling counters");
+}
+
+T_DECL(uncore_enabled_busy,
+ "ensure that trying to add an event while enabled fails",
+ T_META_ASROOT(true))
+{
+ int fd;
+
+ fd = open_uncore_error(NULL);
+
+ (void)uncore_add(fd, REF_TIMEBASE_EVENT, UINT64_MAX, 0);
+
+ uncore_enable(fd);
+ (void)uncore_add(fd, REF_TIMEBASE_EVENT, UINT64_MAX, EBUSY);
+}
+
+T_DECL(uncore_reset,
+ "ensure that resetting the counters works")
+{
+ int fd;
+ int r;
+
+ fd = open_uncore_error(NULL);
+
+ (void)uncore_add(fd, REF_TIMEBASE_EVENT, UINT64_C(1), 0);
+ (void)uncore_add(fd, REF_TIMEBASE_EVENT, UINT64_C(1), ENOSPC);
+
+ r = ioctl(fd, MT_IOC_RESET);
+ T_ASSERT_POSIX_SUCCESS(r, "resetting succeeds");
+
+ T_LOG("adding event to same counter after reset");
+ (void)uncore_add(fd, REF_TIMEBASE_EVENT, UINT64_C(1), 0);
+}
+
+#define SLEEP_USECS (500 * 1000)
+
+static int
+uncore_add_all(int fd, uint64_t event, int *nmonitors)
+{
+ int nctrs = 0;
+ int r;
+
+ do {
+ union monotonic_ctl_add add_ctl;
+
+ add_ctl.in.config.event = event;
+ add_ctl.in.config.allowed_ctr_mask = UINT64_MAX;
+
+ r = ioctl(fd, MT_IOC_ADD, &add_ctl);
+ if (r < 0 && errno == E2BIG) {
+ break;
+ }
+
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(r, "added event %#" PRIx64 " to counters",
+ event);
+ nctrs++;
+ } while (nctrs < CTRS_MAX);
+
+ if (nmonitors) {
+ union monotonic_ctl_info info_ctl;
+ r = ioctl(fd, MT_IOC_GET_INFO, &info_ctl);
+ T_QUIET; T_ASSERT_POSIX_SUCCESS(r, "got info about uncore counters");
+
+ *nmonitors = (int)info_ctl.out.nmonitors;
+ }
+
+ return nctrs;
+}
+
+T_DECL(uncore_accuracy,
+ "ensure that the uncore counters count accurately",
+ T_META_ASROOT(true))
+{
+ int fd;
+ int nctrs = 0;
+ int nmonitors = 0;
+ uint64_t ctr_mask;
+ uint64_t counts[2][CTRS_MAX];
+ uint64_t times[2];
+
+ fd = open_uncore_error(NULL);
+
+ /*
+ * The reference timebase event counts the same as mach_continuous_time
+ * (on hardware supporting uncore counters). Make sure that the counter
+ * is close to the values returned from the trap.
+ *
+ * Fill all the counters with this event.
+ */
+ nctrs = uncore_add_all(fd, REF_TIMEBASE_EVENT, &nmonitors);
+ ctr_mask = (UINT64_C(1) << nctrs) - 1;
+
+ T_LOG("added %d counters to check", nctrs);
+
+ uncore_enable(fd);
+
+ /*
+ * First, make sure there's an upper bound on the counter -- take the
+ * time around getting the counter values.
+ */
+
+ times[0] = mach_absolute_time();
+ uncore_counts(fd, ctr_mask, counts[0]);
+
+ usleep(SLEEP_USECS);
+
+ uncore_counts(fd, ctr_mask, counts[1]);
+ times[1] = mach_absolute_time();
+
+ T_QUIET; T_EXPECT_GT(times[1], times[0],
+ "mach_continuous_time is monotonically increasing");
+ for (int i = 0; i < nctrs; i++) {
+ T_EXPECT_GT(counts[1][i], counts[0][i],
+ "uncore counter %d value is monotonically increasing", i);
+ T_EXPECT_LT(counts[1][i] - counts[0][i], times[1] - times[0],
+ "reference timebase on uncore counter %d satisfies upper bound "
+ "from mach_absolute_time", i);
+ }
+
+ /*
+ * Next, the lower bound -- put mach_absolute_time inside getting the
+ * counter values.
+ */
+
+ uncore_counts(fd, ctr_mask, counts[0]);
+ times[0] = mach_absolute_time();
+
+ volatile int iterations = 100000;
+ while (iterations--) {
+ ;
+ }
+
+ times[1] = mach_absolute_time();
+ uncore_counts(fd, ctr_mask, counts[1]);
+
+ for (int mon = 0; mon < nmonitors; mon++) {
+ for (int i = 0; i < nctrs; i++) {
+ T_QUIET;
+ T_EXPECT_GT(counts[1][i * mon], counts[0][i * mon],
+ "uncore %d counter %d value is monotonically increasing",
+ mon, i);
+ T_EXPECT_GT(counts[1][i * mon] - counts[0][i * mon],
+ times[1] - times[0],
+ "reference timebase on uncore %d counter %d satisfies "
+ "lower bound from mach_absolute_time", mon, i);
+ }
+ }
+}
+
+T_DECL(uncore_ownership,
+ "ensure the dev node cannot be open in two places",
+ T_META_ASROOT(true))
+{
+ int fd;
+ int other_fd;
+ int error;
+
+ fd = open_uncore_error(NULL);
+
+ other_fd = open_uncore_error(&error);
+ T_ASSERT_LT(other_fd, 0, "opening a second uncore fd should fail");
+ T_ASSERT_EQ(error, EBUSY, "failure should be EBUSY");
+}
+
+T_DECL(uncore_root_required,
+ "ensure the dev node cannot be opened by non-root users",
+ T_META_ASROOT(false))
+{
+ int fd;
+ int error = 0;
+
+ T_SKIP("libdarwintest doesn't drop privileges properly");
+
+ fd = open_uncore_error(&error);
+ T_ASSERT_LT(fd, 0, "opening dev node should not return an fd");
+ T_ASSERT_EQ(error, EPERM,
+ "opening dev node as non-root user should fail with EPERM");
+}
+
+T_DECL(perf_uncore,
+ "measure the latency of accessing the counters",
+ T_META_TAG_PERF)
+{
+ int fd;
+ int nctrs;
+ int nmonitors;
+ int r;
+ uint64_t ctr_mask;
+ dt_stat_thread_instructions_t counts_instrs;
+ dt_stat_t counter_deltas;
+
+ counts_instrs = dt_stat_thread_instructions_create("ioctl_counts");
+ counter_deltas = dt_stat_create("abs_time", "between_each_counter");
+
+ fd = open_uncore_error(NULL);
+
+ nctrs = uncore_add_all(fd, REF_TIMEBASE_EVENT, &nmonitors);
+ ctr_mask = (UINT64_C(1) << nctrs) - 1;
+
+ uncore_enable(fd);
+
+ do {
+ dt_stat_token token;
+ uint64_t counts[nctrs * nmonitors];
+ union monotonic_ctl_counts *cts_ctl;
+
+ cts_ctl = (union monotonic_ctl_counts *)counts;
+ cts_ctl->in.ctr_mask = ctr_mask;
+
+ token = dt_stat_thread_instructions_begin(counts_instrs);
+ r = ioctl(fd, MT_IOC_COUNTS, cts_ctl);
+ dt_stat_thread_instructions_end(counts_instrs, token);
+ T_QUIET;
+ T_ASSERT_POSIX_SUCCESS(r,
+ "getting uncore counter values %#" PRIx64, ctr_mask);
+
+ for (int i = 0; i < (nctrs - 1); i++) {
+ dt_stat_add(counter_deltas, (double)(counts[i + 1] - counts[i]));
+ }
+ } while (!dt_stat_stable(counts_instrs) || !dt_stat_stable(counter_deltas));
+
+ dt_stat_finalize(counts_instrs);
+ dt_stat_finalize(counter_deltas);
+}