2 * Copyright (c) 2007-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <arm64/proc_reg.h>
30 #include <arm/machine_cpu.h>
31 #include <arm/cpu_internal.h>
32 #include <arm/cpuid.h>
33 #include <arm/io_map_entries.h>
34 #include <arm/cpu_data.h>
35 #include <arm/cpu_data_internal.h>
36 #include <arm/caches_internal.h>
37 #include <arm/misc_protos.h>
38 #include <arm/machdep_call.h>
39 #include <arm/machine_routines.h>
40 #include <arm/rtclock.h>
41 #include <arm/cpuid_internal.h>
42 #include <arm/cpu_capabilities.h>
43 #include <console/serial_protos.h>
44 #include <kern/machine.h>
45 #include <kern/misc_protos.h>
46 #include <prng/random.h>
47 #include <kern/startup.h>
48 #include <kern/thread.h>
49 #include <kern/timer_queue.h>
50 #include <mach/machine.h>
51 #include <machine/atomic.h>
52 #include <machine/config.h>
54 #include <vm/vm_page.h>
55 #include <vm/vm_shared_region.h>
56 #include <vm/vm_map.h>
57 #include <sys/codesign.h>
58 #include <sys/kdebug.h>
59 #include <kern/coalition.h>
60 #include <pexpert/device_tree.h>
62 #include <IOKit/IOPlatformExpert.h>
64 #include <IOKit/IOHibernatePrivate.h>
65 #endif /* HIBERNATION */
67 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
68 #include <arm64/amcc_rorgn.h>
71 #include <libkern/section_keywords.h>
74 * On supported hardware, debuggable builds make the HID bits read-only
75 * without locking them. This lets people manually modify HID bits while
76 * debugging, since they can use a debugging tool to first reset the HID
77 * bits back to read/write. However it will still catch xnu changes that
78 * accidentally write to HID bits after they've been made read-only.
80 #if HAS_TWO_STAGE_SPR_LOCK && !(DEVELOPMENT || DEBUG)
81 #define USE_TWO_STAGE_SPR_LOCK
88 #define MPIDR_CPU_ID(mpidr_el1_val) (((mpidr_el1_val) & MPIDR_AFF0_MASK) >> MPIDR_AFF0_SHIFT)
89 #define MPIDR_CLUSTER_ID(mpidr_el1_val) (((mpidr_el1_val) & MPIDR_AFF1_MASK) >> MPIDR_AFF1_SHIFT)
92 static uint8_t cluster_initialized
= 0;
96 uint32_t LockTimeOutUsec
;
97 uint64_t TLockTimeOut
;
99 uint64_t low_MutexSpin
;
100 int64_t high_MutexSpin
;
102 static uint64_t ml_wfe_hint_max_interval
;
103 #define MAX_WFE_HINT_INTERVAL_US (500ULL)
105 /* Must be less than cpu_idle_latency to ensure ml_delay_should_spin is true */
106 TUNABLE(uint32_t, yield_delay_us
, "yield_delay_us", 0);
108 extern vm_offset_t segLOWEST
;
109 extern vm_offset_t segLOWESTTEXT
;
110 extern vm_offset_t segLASTB
;
111 extern unsigned long segSizeLAST
;
113 /* ARM64 specific bounds; used to test for presence in the kernelcache. */
114 extern vm_offset_t vm_kernelcache_base
;
115 extern vm_offset_t vm_kernelcache_top
;
118 unsigned int gFastIPI
= 1;
119 #define kDeferredIPITimerDefault (64 * NSEC_PER_USEC) /* in nanoseconds */
120 static TUNABLE_WRITEABLE(uint64_t, deferred_ipi_timer_ns
, "fastipitimeout",
121 kDeferredIPITimerDefault
);
122 #endif /* defined(HAS_IPI) */
124 thread_t
Idle_context(void);
126 SECURITY_READ_ONLY_LATE(static ml_topology_cpu_t
) topology_cpu_array
[MAX_CPUS
];
127 SECURITY_READ_ONLY_LATE(static ml_topology_cluster_t
) topology_cluster_array
[MAX_CPU_CLUSTERS
];
128 SECURITY_READ_ONLY_LATE(static ml_topology_info_t
) topology_info
= {
129 .version
= CPU_TOPOLOGY_VERSION
,
130 .cpus
= topology_cpu_array
,
131 .clusters
= topology_cluster_array
,
134 * Represents the offset of each cluster within a hypothetical array of MAX_CPUS
135 * entries of an arbitrary data type. This is intended for use by specialized consumers
136 * that must quickly access per-CPU data using only the physical CPU ID (MPIDR_EL1),
138 * hypothetical_array[cluster_offsets[AFF1] + AFF0]
139 * Most consumers should instead use general-purpose facilities such as PERCPU or
140 * ml_get_cpu_number().
142 SECURITY_READ_ONLY_LATE(int64_t) cluster_offsets
[MAX_CPU_CLUSTER_PHY_ID
+ 1];
144 SECURITY_READ_ONLY_LATE(static uint32_t) arm64_eventi
= UINT32_MAX
;
146 extern uint32_t lockdown_done
;
149 * Represents regions of virtual address space that should be reserved
150 * (pre-mapped) in each user address space.
152 SECURITY_READ_ONLY_LATE(static struct vm_reserved_region
) vm_reserved_regions
[] = {
154 .vmrr_name
= "GPU Carveout",
155 .vmrr_addr
= MACH_VM_MIN_GPU_CARVEOUT_ADDRESS
,
156 .vmrr_size
= (vm_map_size_t
)(MACH_VM_MAX_GPU_CARVEOUT_ADDRESS
- MACH_VM_MIN_GPU_CARVEOUT_ADDRESS
)
159 * Reserve the virtual memory space representing the commpage nesting region
160 * to prevent user processes from allocating memory within it. The actual
161 * page table entries for the commpage are inserted by vm_commpage_enter().
162 * This vm_map_enter() just prevents userspace from allocating/deallocating
163 * anything within the entire commpage nested region.
166 .vmrr_name
= "commpage nesting",
167 .vmrr_addr
= _COMM_PAGE64_NESTING_START
,
168 .vmrr_size
= _COMM_PAGE64_NESTING_SIZE
172 uint32_t get_arm_cpu_version(void);
176 ml_cpu_signal_type(unsigned int cpu_mpidr
, uint32_t type
)
179 uint64_t local_mpidr
;
180 /* NOTE: this logic expects that we are called in a non-preemptible
181 * context, or at least one in which the calling thread is bound
182 * to a single CPU. Otherwise we may migrate between choosing which
183 * IPI mechanism to use and issuing the IPI. */
184 MRS(local_mpidr
, "MPIDR_EL1");
185 if (MPIDR_CLUSTER_ID(local_mpidr
) == MPIDR_CLUSTER_ID(cpu_mpidr
)) {
186 uint64_t x
= type
| MPIDR_CPU_ID(cpu_mpidr
);
187 MSR(ARM64_REG_IPI_RR_LOCAL
, x
);
189 #define IPI_RR_TARGET_CLUSTER_SHIFT 16
190 uint64_t x
= type
| (MPIDR_CLUSTER_ID(cpu_mpidr
) << IPI_RR_TARGET_CLUSTER_SHIFT
) | MPIDR_CPU_ID(cpu_mpidr
);
191 MSR(ARM64_REG_IPI_RR_GLOBAL
, x
);
194 uint64_t x
= type
| MPIDR_CPU_ID(cpu_mpidr
);
195 MSR(ARM64_REG_IPI_RR
, x
);
200 #if !defined(HAS_IPI)
204 ml_cpu_signal(unsigned int cpu_mpidr __unused
)
207 ml_cpu_signal_type(cpu_mpidr
, ARM64_REG_IPI_RR_TYPE_IMMEDIATE
);
209 panic("Platform does not support ACC Fast IPI");
213 #if !defined(HAS_IPI)
217 ml_cpu_signal_deferred_adjust_timer(uint64_t nanosecs
)
220 /* adjust IPI_CR timer countdown value for deferred IPI
221 * accepts input in nanosecs, convert to absolutetime (REFCLK ticks),
222 * clamp maximum REFCLK ticks to 0xFFFF (16 bit field)
224 * global register, should only require a single write to update all
225 * CPU cores: from Skye ACC user spec section 5.7.3.3
227 * IPICR is a global register but there are two copies in ACC: one at pBLK and one at eBLK.
228 * IPICR write SPR token also traverses both pCPM and eCPM rings and updates both copies.
232 nanoseconds_to_absolutetime(nanosecs
, &abstime
);
234 abstime
= MIN(abstime
, 0xFFFF);
236 /* update deferred_ipi_timer_ns with the new clamped value */
237 absolutetime_to_nanoseconds(abstime
, &deferred_ipi_timer_ns
);
239 MSR(ARM64_REG_IPI_CR
, abstime
);
242 panic("Platform does not support ACC Fast IPI");
247 ml_cpu_signal_deferred_get_timer()
250 return deferred_ipi_timer_ns
;
256 #if !defined(HAS_IPI)
260 ml_cpu_signal_deferred(unsigned int cpu_mpidr __unused
)
263 ml_cpu_signal_type(cpu_mpidr
, ARM64_REG_IPI_RR_TYPE_DEFERRED
);
265 panic("Platform does not support ACC Fast IPI deferral");
269 #if !defined(HAS_IPI)
273 ml_cpu_signal_retract(unsigned int cpu_mpidr __unused
)
276 ml_cpu_signal_type(cpu_mpidr
, ARM64_REG_IPI_RR_TYPE_RETRACT
);
278 panic("Platform does not support ACC Fast IPI retraction");
285 /* Interrupts are expected to be masked on entry or re-entry via
286 * Idle_load_context()
288 assert((__builtin_arm_rsr("DAIF") & DAIF_IRQF
) == DAIF_IRQF
);
290 __builtin_arm_wsr("DAIFClr", (DAIFSC_IRQF
| DAIFSC_FIQF
));
294 OSSynchronizeIO(void)
296 __builtin_arm_dsb(DSB_SY
);
300 get_aux_control(void)
304 MRS(value
, "ACTLR_EL1");
309 get_mmu_control(void)
313 MRS(value
, "SCTLR_EL1");
322 MRS(value
, "TCR_EL1");
327 ml_get_interrupts_enabled(void)
332 if (value
& DAIF_IRQF
) {
343 MRS(value
, "TTBR0_EL1");
348 get_arm_cpu_version(void)
350 uint32_t value
= machine_read_midr();
352 /* Compose the register values into 8 bits; variant[7:4], revision[3:0]. */
353 return ((value
& MIDR_EL1_REV_MASK
) >> MIDR_EL1_REV_SHIFT
) | ((value
& MIDR_EL1_VAR_MASK
) >> (MIDR_EL1_VAR_SHIFT
- 4));
357 ml_feature_supported(uint32_t feature_bit
)
359 uint64_t aidr_el1_value
= 0;
361 MRS(aidr_el1_value
, "AIDR_EL1");
364 return aidr_el1_value
& feature_bit
;
368 * user_cont_hwclock_allowed()
370 * Indicates whether we allow EL0 to read the virtual timebase (CNTVCT_EL0)
371 * as a continuous time source (e.g. from mach_continuous_time)
374 user_cont_hwclock_allowed(void)
376 #if HAS_CONTINUOUS_HWCLOCK
385 user_timebase_type(void)
387 return USER_TIMEBASE_SPEC
;
391 machine_startup(__unused boot_args
* args
)
393 #if defined(HAS_IPI) && (DEVELOPMENT || DEBUG)
394 if (!PE_parse_boot_argn("fastipi", &gFastIPI
, sizeof(gFastIPI
))) {
397 #endif /* defined(HAS_IPI) && (DEVELOPMENT || DEBUG)*/
402 * Kick off the kernel bootstrap.
408 typedef void (*invalidate_fn_t
)(void);
410 static SECURITY_READ_ONLY_LATE(invalidate_fn_t
) invalidate_hmac_function
= NULL
;
412 void set_invalidate_hmac_function(invalidate_fn_t fn
);
415 set_invalidate_hmac_function(invalidate_fn_t fn
)
417 if (NULL
!= invalidate_hmac_function
) {
418 panic("Invalidate HMAC function already set");
421 invalidate_hmac_function
= fn
;
425 machine_lockdown(void)
427 arm_vm_prot_finalize(PE_state
.bootArgs
);
429 #if CONFIG_KERNEL_INTEGRITY
430 #if KERNEL_INTEGRITY_WT
433 * Notify the monitor about the completion of early kernel bootstrap.
434 * From this point forward it will enforce the integrity of kernel text,
435 * rodata and page tables.
439 monitor_call(MONITOR_LOCKDOWN
, 0, 0, 0);
441 #endif /* KERNEL_INTEGRITY_WT */
447 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
450 * Lock physical KTRR region. KTRR region is read-only. Memory outside
451 * the region is not executable at EL1.
455 #endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
458 #endif /* CONFIG_KERNEL_INTEGRITY */
461 if (NULL
!= invalidate_hmac_function
) {
462 invalidate_hmac_function();
472 __unused vm_size_t size
)
474 return PE_boot_args();
478 slave_machine_init(__unused
void *param
)
480 cpu_machine_init(); /* Initialize the processor */
481 clock_init(); /* Init the clock */
485 * Routine: machine_processor_shutdown
489 machine_processor_shutdown(
490 __unused thread_t thread
,
491 void (*doshutdown
)(processor_t
),
492 processor_t processor
)
494 return Shutdown_context(doshutdown
, processor
);
498 * Routine: ml_init_lock_timeout
502 ml_init_lock_timeout(void)
506 uint64_t default_timeout_ns
= NSEC_PER_SEC
>> 2;
509 if (PE_parse_boot_argn("slto_us", &slto
, sizeof(slto
))) {
510 default_timeout_ns
= slto
* NSEC_PER_USEC
;
513 nanoseconds_to_absolutetime(default_timeout_ns
, &abstime
);
514 LockTimeOutUsec
= (uint32_t) (default_timeout_ns
/ NSEC_PER_USEC
);
515 LockTimeOut
= (uint32_t)abstime
;
517 if (PE_parse_boot_argn("tlto_us", &slto
, sizeof(slto
))) {
518 nanoseconds_to_absolutetime(slto
* NSEC_PER_USEC
, &abstime
);
519 TLockTimeOut
= abstime
;
521 TLockTimeOut
= LockTimeOut
>> 1;
524 if (PE_parse_boot_argn("mtxspin", &mtxspin
, sizeof(mtxspin
))) {
525 if (mtxspin
> USEC_PER_SEC
>> 4) {
526 mtxspin
= USEC_PER_SEC
>> 4;
528 nanoseconds_to_absolutetime(mtxspin
* NSEC_PER_USEC
, &abstime
);
530 nanoseconds_to_absolutetime(10 * NSEC_PER_USEC
, &abstime
);
533 low_MutexSpin
= MutexSpin
;
535 * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
536 * real_ncpus is not set at this time
538 * NOTE: active spinning is disabled in arm. It can be activated
539 * by setting high_MutexSpin through the sysctl.
541 high_MutexSpin
= low_MutexSpin
;
543 nanoseconds_to_absolutetime(MAX_WFE_HINT_INTERVAL_US
* NSEC_PER_USEC
, &ml_wfe_hint_max_interval
);
547 * This is called from the machine-independent routine cpu_up()
548 * to perform machine-dependent info updates.
553 os_atomic_inc(&machine_info
.physical_cpu
, relaxed
);
554 os_atomic_inc(&machine_info
.logical_cpu
, relaxed
);
558 * This is called from the machine-independent routine cpu_down()
559 * to perform machine-dependent info updates.
564 cpu_data_t
*cpu_data_ptr
;
566 os_atomic_dec(&machine_info
.physical_cpu
, relaxed
);
567 os_atomic_dec(&machine_info
.logical_cpu
, relaxed
);
570 * If we want to deal with outstanding IPIs, we need to
571 * do relatively early in the processor_doshutdown path,
572 * as we pend decrementer interrupts using the IPI
573 * mechanism if we cannot immediately service them (if
574 * IRQ is masked). Do so now.
576 * We aren't on the interrupt stack here; would it make
577 * more sense to disable signaling and then enable
578 * interrupts? It might be a bit cleaner.
580 cpu_data_ptr
= getCpuDatap();
581 cpu_data_ptr
->cpu_running
= FALSE
;
583 if (cpu_data_ptr
!= &BootCpuData
) {
585 * Move all of this cpu's timers to the master/boot cpu,
586 * and poke it in case there's a sooner deadline for it to schedule.
588 timer_queue_shutdown(&cpu_data_ptr
->rtclock_timer
.queue
);
589 cpu_xcall(BootCpuData
.cpu_number
, &timer_queue_expire_local
, NULL
);
592 cpu_signal_handler_internal(TRUE
);
596 * Routine: ml_cpu_get_info
600 ml_cpu_get_info(ml_cpu_info_t
* ml_cpu_info
)
602 cache_info_t
*cpuid_cache_info
;
604 cpuid_cache_info
= cache_info();
605 ml_cpu_info
->vector_unit
= 0;
606 ml_cpu_info
->cache_line_size
= cpuid_cache_info
->c_linesz
;
607 ml_cpu_info
->l1_icache_size
= cpuid_cache_info
->c_isize
;
608 ml_cpu_info
->l1_dcache_size
= cpuid_cache_info
->c_dsize
;
610 #if (__ARM_ARCH__ >= 7)
611 ml_cpu_info
->l2_settings
= 1;
612 ml_cpu_info
->l2_cache_size
= cpuid_cache_info
->c_l2size
;
614 ml_cpu_info
->l2_settings
= 0;
615 ml_cpu_info
->l2_cache_size
= 0xFFFFFFFF;
617 ml_cpu_info
->l3_settings
= 0;
618 ml_cpu_info
->l3_cache_size
= 0xFFFFFFFF;
622 ml_get_machine_mem(void)
624 return machine_info
.memory_size
;
627 __attribute__((noreturn
))
629 halt_all_cpus(boolean_t reboot
)
632 printf("MACH Reboot\n");
633 PEHaltRestart(kPERestartCPU
);
635 printf("CPU halted\n");
636 PEHaltRestart(kPEHaltCPU
);
643 __attribute__((noreturn
))
647 halt_all_cpus(FALSE
);
651 * Routine: machine_signal_idle
656 processor_t processor
)
658 cpu_signal(processor_to_cpu_datap(processor
), SIGPnop
, (void *)NULL
, (void *)NULL
);
659 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_REMOTE_AST
), processor
->cpu_id
, 0 /* nop */, 0, 0, 0);
663 machine_signal_idle_deferred(
664 processor_t processor
)
666 cpu_signal_deferred(processor_to_cpu_datap(processor
));
667 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_REMOTE_DEFERRED_AST
), processor
->cpu_id
, 0 /* nop */, 0, 0, 0);
671 machine_signal_idle_cancel(
672 processor_t processor
)
674 cpu_signal_cancel(processor_to_cpu_datap(processor
));
675 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_REMOTE_CANCEL_AST
), processor
->cpu_id
, 0 /* nop */, 0, 0, 0);
679 * Routine: ml_install_interrupt_handler
680 * Function: Initialize Interrupt Handler
683 ml_install_interrupt_handler(
687 IOInterruptHandler handler
,
690 cpu_data_t
*cpu_data_ptr
;
691 boolean_t current_state
;
693 current_state
= ml_set_interrupts_enabled(FALSE
);
694 cpu_data_ptr
= getCpuDatap();
696 cpu_data_ptr
->interrupt_nub
= nub
;
697 cpu_data_ptr
->interrupt_source
= source
;
698 cpu_data_ptr
->interrupt_target
= target
;
699 cpu_data_ptr
->interrupt_handler
= handler
;
700 cpu_data_ptr
->interrupt_refCon
= refCon
;
702 (void) ml_set_interrupts_enabled(current_state
);
706 * Routine: ml_init_interrupt
707 * Function: Initialize Interrupts
710 ml_init_interrupt(void)
714 * ml_init_interrupt will get called once for each CPU, but this is redundant
715 * because there is only one global copy of the register for skye. do it only
716 * on the bootstrap cpu
718 if (getCpuDatap()->cluster_master
) {
719 ml_cpu_signal_deferred_adjust_timer(deferred_ipi_timer_ns
);
725 * Routine: ml_init_timebase
726 * Function: register and setup Timebase, Decremeter services
732 vm_offset_t int_address
,
733 vm_offset_t int_value __unused
)
735 cpu_data_t
*cpu_data_ptr
;
737 cpu_data_ptr
= (cpu_data_t
*)args
;
739 if ((cpu_data_ptr
== &BootCpuData
)
740 && (rtclock_timebase_func
.tbd_fiq_handler
== (void *)NULL
)) {
741 rtclock_timebase_func
= *tbd_funcs
;
742 rtclock_timebase_addr
= int_address
;
746 #define ML_READPROP_MANDATORY UINT64_MAX
749 ml_readprop(const DTEntry entry
, const char *propertyName
, uint64_t default_value
)
752 unsigned int propSize
;
754 if (SecureDTGetProperty(entry
, propertyName
, &prop
, &propSize
) == kSuccess
) {
755 if (propSize
== sizeof(uint8_t)) {
756 return *((uint8_t const *)prop
);
757 } else if (propSize
== sizeof(uint16_t)) {
758 return *((uint16_t const *)prop
);
759 } else if (propSize
== sizeof(uint32_t)) {
760 return *((uint32_t const *)prop
);
761 } else if (propSize
== sizeof(uint64_t)) {
762 return *((uint64_t const *)prop
);
764 panic("CPU property '%s' has bad size %u", propertyName
, propSize
);
767 if (default_value
== ML_READPROP_MANDATORY
) {
768 panic("Missing mandatory property '%s'", propertyName
);
770 return default_value
;
775 ml_read_reg_range(const DTEntry entry
, const char *propertyName
, uint64_t *pa_ptr
, uint64_t *len_ptr
)
777 uint64_t const *prop
;
778 unsigned int propSize
;
780 if (SecureDTGetProperty(entry
, propertyName
, (void const **)&prop
, &propSize
) != kSuccess
) {
784 if (propSize
!= sizeof(uint64_t) * 2) {
785 panic("Wrong property size for %s", propertyName
);
794 ml_is_boot_cpu(const DTEntry entry
)
797 unsigned int propSize
;
799 if (SecureDTGetProperty(entry
, "state", &prop
, &propSize
) != kSuccess
) {
800 panic("unable to retrieve state for cpu");
803 if (strncmp((char const *)prop
, "running", propSize
) == 0) {
811 ml_read_chip_revision(unsigned int *rev __unused
)
813 // The CPU_VERSION_* macros are only defined on APPLE_ARM64_ARCH_FAMILY builds
814 #ifdef APPLE_ARM64_ARCH_FAMILY
817 if ((SecureDTFindEntry("name", "arm-io", &entryP
) == kSuccess
)) {
818 *rev
= (unsigned int)ml_readprop(entryP
, "chip-revision", CPU_VERSION_UNKNOWN
);
820 *rev
= CPU_VERSION_UNKNOWN
;
826 ml_parse_interrupt_prop(const DTEntry entry
, ml_topology_cpu_t
*cpu
)
828 uint32_t const *prop
;
829 unsigned int propSize
;
831 if (SecureDTGetProperty(entry
, "interrupts", (void const **)&prop
, &propSize
) != kSuccess
) {
835 if (propSize
== sizeof(uint32_t) * 1) {
836 cpu
->pmi_irq
= prop
[0];
838 } else if (propSize
== sizeof(uint32_t) * 3) {
839 cpu
->self_ipi_irq
= prop
[0];
840 cpu
->pmi_irq
= prop
[1];
841 cpu
->other_ipi_irq
= prop
[2];
849 ml_parse_cpu_topology(void)
851 DTEntry entry
, child __unused
;
852 OpaqueDTEntryIterator iter
;
853 uint32_t cpu_boot_arg
;
856 int64_t cluster_phys_to_logical
[MAX_CPU_CLUSTER_PHY_ID
+ 1];
857 int64_t cluster_max_cpu_phys_id
[MAX_CPU_CLUSTER_PHY_ID
+ 1];
858 cpu_boot_arg
= MAX_CPUS
;
859 PE_parse_boot_argn("cpus", &cpu_boot_arg
, sizeof(cpu_boot_arg
));
861 err
= SecureDTLookupEntry(NULL
, "/cpus", &entry
);
862 assert(err
== kSuccess
);
864 err
= SecureDTInitEntryIterator(entry
, &iter
);
865 assert(err
== kSuccess
);
867 for (int i
= 0; i
<= MAX_CPU_CLUSTER_PHY_ID
; i
++) {
868 cluster_offsets
[i
] = -1;
869 cluster_phys_to_logical
[i
] = -1;
870 cluster_max_cpu_phys_id
[i
] = 0;
873 while (kSuccess
== SecureDTIterateEntries(&iter
, &child
)) {
874 boolean_t is_boot_cpu
= ml_is_boot_cpu(child
);
876 // If the number of CPUs is constrained by the cpus= boot-arg, and the boot CPU hasn't
877 // been added to the topology struct yet, and we only have one slot left, then skip
878 // every other non-boot CPU in order to leave room for the boot CPU.
880 // e.g. if the boot-args say "cpus=3" and CPU4 is the boot CPU, then the cpus[]
881 // array will list CPU0, CPU1, and CPU4. CPU2-CPU3 and CPU5-CPUn will be omitted.
882 if (topology_info
.num_cpus
>= (cpu_boot_arg
- 1) && topology_info
.boot_cpu
== NULL
&& !is_boot_cpu
) {
885 if (topology_info
.num_cpus
>= cpu_boot_arg
) {
889 ml_topology_cpu_t
*cpu
= &topology_info
.cpus
[topology_info
.num_cpus
];
891 cpu
->cpu_id
= topology_info
.num_cpus
++;
892 assert(cpu
->cpu_id
< MAX_CPUS
);
893 topology_info
.max_cpu_id
= MAX(topology_info
.max_cpu_id
, cpu
->cpu_id
);
895 cpu
->die_id
= (int)ml_readprop(child
, "die-id", 0);
896 topology_info
.max_die_id
= MAX(topology_info
.max_die_id
, cpu
->die_id
);
898 cpu
->phys_id
= (uint32_t)ml_readprop(child
, "reg", ML_READPROP_MANDATORY
);
900 cpu
->l2_access_penalty
= (uint32_t)ml_readprop(child
, "l2-access-penalty", 0);
901 cpu
->l2_cache_size
= (uint32_t)ml_readprop(child
, "l2-cache-size", 0);
902 cpu
->l2_cache_id
= (uint32_t)ml_readprop(child
, "l2-cache-id", 0);
903 cpu
->l3_cache_size
= (uint32_t)ml_readprop(child
, "l3-cache-size", 0);
904 cpu
->l3_cache_id
= (uint32_t)ml_readprop(child
, "l3-cache-id", 0);
906 ml_parse_interrupt_prop(child
, cpu
);
907 ml_read_reg_range(child
, "cpu-uttdbg-reg", &cpu
->cpu_UTTDBG_pa
, &cpu
->cpu_UTTDBG_len
);
908 ml_read_reg_range(child
, "cpu-impl-reg", &cpu
->cpu_IMPL_pa
, &cpu
->cpu_IMPL_len
);
909 ml_read_reg_range(child
, "coresight-reg", &cpu
->coresight_pa
, &cpu
->coresight_len
);
910 cpu
->cluster_type
= CLUSTER_TYPE_SMP
;
912 int cluster_type
= (int)ml_readprop(child
, "cluster-type", 0);
913 if (cluster_type
== 'E') {
914 cpu
->cluster_type
= CLUSTER_TYPE_E
;
915 } else if (cluster_type
== 'P') {
916 cpu
->cluster_type
= CLUSTER_TYPE_P
;
920 * Since we want to keep a linear cluster ID space, we cannot just rely
921 * on the value provided by EDT. Instead, use the MPIDR value to see if we have
922 * seen this exact cluster before. If so, then reuse that cluster ID for this CPU.
925 uint32_t phys_cluster_id
= MPIDR_CLUSTER_ID(cpu
->phys_id
);
927 uint32_t phys_cluster_id
= (cpu
->cluster_type
== CLUSTER_TYPE_P
);
929 assert(phys_cluster_id
<= MAX_CPU_CLUSTER_PHY_ID
);
930 cpu
->cluster_id
= ((cluster_phys_to_logical
[phys_cluster_id
] == -1) ?
931 topology_info
.num_clusters
: cluster_phys_to_logical
[phys_cluster_id
]);
933 assert(cpu
->cluster_id
< MAX_CPU_CLUSTERS
);
935 ml_topology_cluster_t
*cluster
= &topology_info
.clusters
[cpu
->cluster_id
];
936 if (cluster
->num_cpus
== 0) {
937 assert(topology_info
.num_clusters
< MAX_CPU_CLUSTERS
);
939 topology_info
.num_clusters
++;
940 topology_info
.max_cluster_id
= MAX(topology_info
.max_cluster_id
, cpu
->cluster_id
);
942 cluster
->cluster_id
= cpu
->cluster_id
;
943 cluster
->cluster_type
= cpu
->cluster_type
;
944 cluster
->first_cpu_id
= cpu
->cpu_id
;
945 assert(cluster_phys_to_logical
[phys_cluster_id
] == -1);
946 cluster_phys_to_logical
[phys_cluster_id
] = cpu
->cluster_id
;
948 // Since we don't have a per-cluster EDT node, this is repeated in each CPU node.
949 // If we wind up with a bunch of these, we might want to create separate per-cluster
950 // EDT nodes and have the CPU nodes reference them through a phandle.
951 ml_read_reg_range(child
, "acc-impl-reg", &cluster
->acc_IMPL_pa
, &cluster
->acc_IMPL_len
);
952 ml_read_reg_range(child
, "cpm-impl-reg", &cluster
->cpm_IMPL_pa
, &cluster
->cpm_IMPL_len
);
956 if (MPIDR_CPU_ID(cpu
->phys_id
) > cluster_max_cpu_phys_id
[phys_cluster_id
]) {
957 cluster_max_cpu_phys_id
[phys_cluster_id
] = MPIDR_CPU_ID(cpu
->phys_id
);
961 cpu
->die_cluster_id
= (int)ml_readprop(child
, "die-cluster-id", MPIDR_CLUSTER_ID(cpu
->phys_id
));
962 cpu
->cluster_core_id
= (int)ml_readprop(child
, "cluster-core-id", MPIDR_CPU_ID(cpu
->phys_id
));
965 cluster
->cpu_mask
|= 1ULL << cpu
->cpu_id
;
968 assert(topology_info
.boot_cpu
== NULL
);
969 topology_info
.boot_cpu
= cpu
;
970 topology_info
.boot_cluster
= cluster
;
976 * Build the cluster offset array, ensuring that the region reserved
977 * for each physical cluster contains enough entries to be indexed
978 * by the maximum physical CPU ID (AFF0) within the cluster.
980 unsigned int cur_cluster_offset
= 0;
981 for (int i
= 0; i
<= MAX_CPU_CLUSTER_PHY_ID
; i
++) {
982 if (cluster_phys_to_logical
[i
] != -1) {
983 cluster_offsets
[i
] = cur_cluster_offset
;
984 cur_cluster_offset
+= (cluster_max_cpu_phys_id
[i
] + 1);
987 assert(cur_cluster_offset
<= MAX_CPUS
);
990 * For H10, there are really 2 physical clusters, but they are not separated
991 * into distinct ACCs. AFF1 therefore always reports 0, and AFF0 numbering
992 * is linear across both clusters. For the purpose of MPIDR_EL1-based indexing,
993 * treat H10 and earlier devices as though they contain a single cluster.
995 cluster_offsets
[0] = 0;
997 assert(topology_info
.boot_cpu
!= NULL
);
998 ml_read_chip_revision(&topology_info
.chip_revision
);
1001 * Set TPIDRRO_EL0 to indicate the correct cpu number, as we may
1002 * not be booting from cpu 0. Userspace will consume the current
1003 * CPU number through this register. For non-boot cores, this is
1004 * done in start.s (start_cpu) using the cpu_number field of the
1005 * per-cpu data object.
1007 assert(__builtin_arm_rsr64("TPIDRRO_EL0") == 0);
1008 __builtin_arm_wsr64("TPIDRRO_EL0", (uint64_t)topology_info
.boot_cpu
->cpu_id
);
1011 const ml_topology_info_t
*
1012 ml_get_topology_info(void)
1014 return &topology_info
;
1018 ml_map_cpu_pio(void)
1022 for (i
= 0; i
< topology_info
.num_cpus
; i
++) {
1023 ml_topology_cpu_t
*cpu
= &topology_info
.cpus
[i
];
1024 if (cpu
->cpu_IMPL_pa
) {
1025 cpu
->cpu_IMPL_regs
= (vm_offset_t
)ml_io_map(cpu
->cpu_IMPL_pa
, cpu
->cpu_IMPL_len
);
1026 cpu
->coresight_regs
= (vm_offset_t
)ml_io_map(cpu
->coresight_pa
, cpu
->coresight_len
);
1028 if (cpu
->cpu_UTTDBG_pa
) {
1029 cpu
->cpu_UTTDBG_regs
= (vm_offset_t
)ml_io_map(cpu
->cpu_UTTDBG_pa
, cpu
->cpu_UTTDBG_len
);
1033 for (i
= 0; i
< topology_info
.num_clusters
; i
++) {
1034 ml_topology_cluster_t
*cluster
= &topology_info
.clusters
[i
];
1035 if (cluster
->acc_IMPL_pa
) {
1036 cluster
->acc_IMPL_regs
= (vm_offset_t
)ml_io_map(cluster
->acc_IMPL_pa
, cluster
->acc_IMPL_len
);
1038 if (cluster
->cpm_IMPL_pa
) {
1039 cluster
->cpm_IMPL_regs
= (vm_offset_t
)ml_io_map(cluster
->cpm_IMPL_pa
, cluster
->cpm_IMPL_len
);
1045 ml_get_cpu_count(void)
1047 return topology_info
.num_cpus
;
1051 ml_get_cluster_count(void)
1053 return topology_info
.num_clusters
;
1057 ml_get_boot_cpu_number(void)
1059 return topology_info
.boot_cpu
->cpu_id
;
1063 ml_get_boot_cluster(void)
1065 return topology_info
.boot_cluster
->cluster_type
;
1069 ml_get_cpu_number(uint32_t phys_id
)
1071 phys_id
&= MPIDR_AFF1_MASK
| MPIDR_AFF0_MASK
;
1073 for (unsigned i
= 0; i
< topology_info
.num_cpus
; i
++) {
1074 if (topology_info
.cpus
[i
].phys_id
== phys_id
) {
1083 ml_get_cluster_number(uint32_t phys_id
)
1085 int cpu_id
= ml_get_cpu_number(phys_id
);
1090 ml_topology_cpu_t
*cpu
= &topology_info
.cpus
[cpu_id
];
1092 return cpu
->cluster_id
;
1096 ml_get_cpu_number_local(void)
1098 uint64_t mpidr_el1_value
= 0;
1101 /* We identify the CPU based on the constant bits of MPIDR_EL1. */
1102 MRS(mpidr_el1_value
, "MPIDR_EL1");
1103 cpu_id
= ml_get_cpu_number((uint32_t)mpidr_el1_value
);
1105 assert(cpu_id
<= (unsigned int)ml_get_max_cpu_number());
1111 ml_get_cluster_number_local()
1113 uint64_t mpidr_el1_value
= 0;
1114 unsigned cluster_id
;
1116 /* We identify the cluster based on the constant bits of MPIDR_EL1. */
1117 MRS(mpidr_el1_value
, "MPIDR_EL1");
1118 cluster_id
= ml_get_cluster_number((uint32_t)mpidr_el1_value
);
1120 assert(cluster_id
<= (unsigned int)ml_get_max_cluster_number());
1126 ml_get_max_cpu_number(void)
1128 return topology_info
.max_cpu_id
;
1132 ml_get_max_cluster_number(void)
1134 return topology_info
.max_cluster_id
;
1138 ml_get_first_cpu_id(unsigned int cluster_id
)
1140 return topology_info
.clusters
[cluster_id
].first_cpu_id
;
1146 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
1147 rorgn_stash_range();
1152 ml_lockdown_handler_register(lockdown_handler_t f
, void *this)
1155 return KERN_FAILURE
;
1158 assert(lockdown_done
);
1159 f(this); // XXX: f this whole function
1161 return KERN_SUCCESS
;
1165 ml_processor_register(ml_processor_info_t
*in_processor_info
,
1166 processor_t
*processor_out
, ipi_handler_t
*ipi_handler_out
,
1167 perfmon_interrupt_handler_func
*pmi_handler_out
)
1169 cpu_data_t
*this_cpu_datap
;
1170 processor_set_t pset
;
1171 boolean_t is_boot_cpu
;
1172 static unsigned int reg_cpu_count
= 0;
1174 if (in_processor_info
->log_id
> (uint32_t)ml_get_max_cpu_number()) {
1175 return KERN_FAILURE
;
1178 if ((unsigned)OSIncrementAtomic((SInt32
*)®_cpu_count
) >= topology_info
.num_cpus
) {
1179 return KERN_FAILURE
;
1182 if (in_processor_info
->log_id
!= (uint32_t)ml_get_boot_cpu_number()) {
1183 is_boot_cpu
= FALSE
;
1184 this_cpu_datap
= cpu_data_alloc(FALSE
);
1185 cpu_data_init(this_cpu_datap
);
1187 this_cpu_datap
= &BootCpuData
;
1191 assert(in_processor_info
->log_id
<= (uint32_t)ml_get_max_cpu_number());
1193 this_cpu_datap
->cpu_id
= in_processor_info
->cpu_id
;
1195 this_cpu_datap
->cpu_console_buf
= console_cpu_alloc(is_boot_cpu
);
1196 if (this_cpu_datap
->cpu_console_buf
== (void *)(NULL
)) {
1197 goto processor_register_error
;
1201 this_cpu_datap
->cpu_number
= (unsigned short)(in_processor_info
->log_id
);
1203 if (cpu_data_register(this_cpu_datap
) != KERN_SUCCESS
) {
1204 goto processor_register_error
;
1208 this_cpu_datap
->cpu_idle_notify
= in_processor_info
->processor_idle
;
1209 this_cpu_datap
->cpu_cache_dispatch
= (cache_dispatch_t
)in_processor_info
->platform_cache_dispatch
;
1210 nanoseconds_to_absolutetime((uint64_t) in_processor_info
->powergate_latency
, &this_cpu_datap
->cpu_idle_latency
);
1211 this_cpu_datap
->cpu_reset_assist
= kvtophys(in_processor_info
->powergate_stub_addr
);
1213 this_cpu_datap
->idle_timer_notify
= in_processor_info
->idle_timer
;
1214 this_cpu_datap
->idle_timer_refcon
= in_processor_info
->idle_timer_refcon
;
1216 this_cpu_datap
->platform_error_handler
= in_processor_info
->platform_error_handler
;
1217 this_cpu_datap
->cpu_regmap_paddr
= in_processor_info
->regmap_paddr
;
1218 this_cpu_datap
->cpu_phys_id
= in_processor_info
->phys_id
;
1219 this_cpu_datap
->cpu_l2_access_penalty
= in_processor_info
->l2_access_penalty
;
1221 this_cpu_datap
->cpu_cluster_type
= in_processor_info
->cluster_type
;
1222 this_cpu_datap
->cpu_cluster_id
= in_processor_info
->cluster_id
;
1223 this_cpu_datap
->cpu_l2_id
= in_processor_info
->l2_cache_id
;
1224 this_cpu_datap
->cpu_l2_size
= in_processor_info
->l2_cache_size
;
1225 this_cpu_datap
->cpu_l3_id
= in_processor_info
->l3_cache_id
;
1226 this_cpu_datap
->cpu_l3_size
= in_processor_info
->l3_cache_size
;
1229 this_cpu_datap
->cluster_master
= !OSTestAndSet(this_cpu_datap
->cpu_cluster_id
, &cluster_initialized
);
1230 #else /* HAS_CLUSTER */
1231 this_cpu_datap
->cluster_master
= is_boot_cpu
;
1232 #endif /* HAS_CLUSTER */
1234 pset
= pset_find(in_processor_info
->cluster_id
, processor_pset(master_processor
));
1236 assert(pset
!= NULL
);
1237 kprintf("%s>cpu_id %p cluster_id %d cpu_number %d is type %d\n", __FUNCTION__
, in_processor_info
->cpu_id
, in_processor_info
->cluster_id
, this_cpu_datap
->cpu_number
, in_processor_info
->cluster_type
);
1239 processor_t processor
= PERCPU_GET_RELATIVE(processor
, cpu_data
, this_cpu_datap
);
1241 processor_init(processor
, this_cpu_datap
->cpu_number
, pset
);
1243 if (this_cpu_datap
->cpu_l2_access_penalty
) {
1245 * Cores that have a non-zero L2 access penalty compared
1246 * to the boot processor should be de-prioritized by the
1247 * scheduler, so that threads use the cores with better L2
1250 processor_set_primary(processor
, master_processor
);
1254 *processor_out
= processor
;
1255 *ipi_handler_out
= cpu_signal_handler
;
1256 #if CPMU_AIC_PMI && MONOTONIC
1257 *pmi_handler_out
= mt_cpmu_aic_pmi
;
1259 *pmi_handler_out
= NULL
;
1260 #endif /* CPMU_AIC_PMI && MONOTONIC */
1261 if (in_processor_info
->idle_tickle
!= (idle_tickle_t
*) NULL
) {
1262 *in_processor_info
->idle_tickle
= (idle_tickle_t
) cpu_idle_tickle
;
1266 if (kpc_register_cpu(this_cpu_datap
) != TRUE
) {
1267 goto processor_register_error
;
1272 random_cpu_init(this_cpu_datap
->cpu_number
);
1273 // now let next CPU register itself
1274 OSIncrementAtomic((SInt32
*)&real_ncpus
);
1277 return KERN_SUCCESS
;
1279 processor_register_error
:
1281 kpc_unregister_cpu(this_cpu_datap
);
1284 cpu_data_free(this_cpu_datap
);
1287 return KERN_FAILURE
;
1291 ml_init_arm_debug_interface(
1292 void * in_cpu_datap
,
1293 vm_offset_t virt_address
)
1295 ((cpu_data_t
*)in_cpu_datap
)->cpu_debug_interface_map
= virt_address
;
1300 * Routine: init_ast_check
1305 __unused processor_t processor
)
1310 * Routine: cause_ast_check
1315 processor_t processor
)
1317 if (current_processor() != processor
) {
1318 cpu_signal(processor_to_cpu_datap(processor
), SIGPast
, (void *)NULL
, (void *)NULL
);
1319 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_REMOTE_AST
), processor
->cpu_id
, 1 /* ast */, 0, 0, 0);
1323 extern uint32_t cpu_idle_count
;
1326 ml_get_power_state(boolean_t
*icp
, boolean_t
*pidlep
)
1328 *icp
= ml_at_interrupt_context();
1329 *pidlep
= (cpu_idle_count
== real_ncpus
);
1333 * Routine: ml_cause_interrupt
1334 * Function: Generate a fake interrupt
1337 ml_cause_interrupt(void)
1339 return; /* BS_XXX */
1342 /* Map memory map IO space */
1345 vm_offset_t phys_addr
,
1348 return io_map(phys_addr
, size
, VM_WIMG_IO
);
1351 /* Map memory map IO space (with protections specified) */
1353 ml_io_map_with_prot(
1354 vm_offset_t phys_addr
,
1358 return io_map_with_prot(phys_addr
, size
, VM_WIMG_IO
, prot
);
1363 vm_offset_t phys_addr
,
1366 return io_map(phys_addr
, size
, VM_WIMG_WCOMB
);
1370 ml_io_unmap(vm_offset_t addr
, vm_size_t sz
)
1372 pmap_remove(kernel_pmap
, addr
, addr
+ sz
);
1373 kmem_free(kernel_map
, addr
, sz
);
1376 /* boot memory allocation */
1379 __unused vm_size_t size
)
1381 return (vm_offset_t
) NULL
;
1386 vm_offset_t phys_addr
,
1389 return pmap_map_high_window_bd(phys_addr
, len
, VM_PROT_READ
| VM_PROT_WRITE
);
1396 return phystokv(paddr
);
1403 vm_offset_t slid_vaddr
= vaddr
+ vm_kernel_slide
;
1405 if ((slid_vaddr
< vm_kernelcache_base
) || (slid_vaddr
>= vm_kernelcache_top
)) {
1406 /* This is only intended for use on kernelcache addresses. */
1411 * Because the address is in the kernelcache, we can do a simple
1412 * slide calculation.
1421 if ((vaddr
< vm_kernelcache_base
) || (vaddr
>= vm_kernelcache_top
)) {
1422 /* This is only intended for use on kernelcache addresses. */
1426 return vaddr
- vm_kernel_slide
;
1429 extern tt_entry_t
*arm_kva_to_tte(vm_offset_t va
);
1433 vm_offset_t vaddr
, /* kernel virtual address */
1437 pt_entry_t arm_prot
= 0;
1438 pt_entry_t arm_block_prot
= 0;
1439 vm_offset_t vaddr_cur
;
1441 kern_return_t result
= KERN_SUCCESS
;
1443 if (vaddr
< VM_MIN_KERNEL_ADDRESS
) {
1444 panic("ml_static_protect(): %p < %p", (void *) vaddr
, (void *) VM_MIN_KERNEL_ADDRESS
);
1445 return KERN_FAILURE
;
1448 assert((vaddr
& (PAGE_SIZE
- 1)) == 0); /* must be page aligned */
1450 if ((new_prot
& VM_PROT_WRITE
) && (new_prot
& VM_PROT_EXECUTE
)) {
1451 panic("ml_static_protect(): WX request on %p", (void *) vaddr
);
1453 if (lockdown_done
&& (new_prot
& VM_PROT_EXECUTE
)) {
1454 panic("ml_static_protect(): attempt to inject executable mapping on %p", (void *) vaddr
);
1457 /* Set up the protection bits, and block bits so we can validate block mappings. */
1458 if (new_prot
& VM_PROT_WRITE
) {
1459 arm_prot
|= ARM_PTE_AP(AP_RWNA
);
1460 arm_block_prot
|= ARM_TTE_BLOCK_AP(AP_RWNA
);
1462 arm_prot
|= ARM_PTE_AP(AP_RONA
);
1463 arm_block_prot
|= ARM_TTE_BLOCK_AP(AP_RONA
);
1466 arm_prot
|= ARM_PTE_NX
;
1467 arm_block_prot
|= ARM_TTE_BLOCK_NX
;
1469 if (!(new_prot
& VM_PROT_EXECUTE
)) {
1470 arm_prot
|= ARM_PTE_PNX
;
1471 arm_block_prot
|= ARM_TTE_BLOCK_PNX
;
1474 for (vaddr_cur
= vaddr
;
1475 vaddr_cur
< trunc_page_64(vaddr
+ size
);
1476 vaddr_cur
+= PAGE_SIZE
) {
1477 ppn
= pmap_find_phys(kernel_pmap
, vaddr_cur
);
1478 if (ppn
!= (vm_offset_t
) NULL
) {
1484 assert(!pmap_is_monitor(ppn
));
1485 assert(!TEST_PAGE_RATIO_4
);
1488 tte2
= arm_kva_to_tte(vaddr_cur
);
1490 if (((*tte2
) & ARM_TTE_TYPE_MASK
) != ARM_TTE_TYPE_TABLE
) {
1491 if ((((*tte2
) & ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
) &&
1492 ((*tte2
& (ARM_TTE_BLOCK_NXMASK
| ARM_TTE_BLOCK_PNXMASK
| ARM_TTE_BLOCK_APMASK
)) == arm_block_prot
)) {
1494 * We can support ml_static_protect on a block mapping if the mapping already has
1495 * the desired protections. We still want to run checks on a per-page basis.
1500 result
= KERN_FAILURE
;
1504 pte_p
= (pt_entry_t
*)&((tt_entry_t
*)(phystokv((*tte2
) & ARM_TTE_TABLE_MASK
)))[(((vaddr_cur
) & ARM_TT_L3_INDEX_MASK
) >> ARM_TT_L3_SHIFT
)];
1507 if ((ptmp
& ARM_PTE_HINT_MASK
) && ((ptmp
& (ARM_PTE_APMASK
| ARM_PTE_PNXMASK
| ARM_PTE_NXMASK
)) != arm_prot
)) {
1509 * The contiguous hint is similar to a block mapping for ml_static_protect; if the existing
1510 * protections do not match the desired protections, then we will fail (as we cannot update
1511 * this mapping without updating other mappings as well).
1513 result
= KERN_FAILURE
;
1517 __unreachable_ok_push
1518 if (TEST_PAGE_RATIO_4
) {
1521 pt_entry_t
*ptep_iter
;
1524 for (i
= 0; i
< 4; i
++, ptep_iter
++) {
1525 /* Note that there is a hole in the HINT sanity checking here. */
1528 /* We only need to update the page tables if the protections do not match. */
1529 if ((ptmp
& (ARM_PTE_APMASK
| ARM_PTE_PNXMASK
| ARM_PTE_NXMASK
)) != arm_prot
) {
1530 ptmp
= (ptmp
& ~(ARM_PTE_APMASK
| ARM_PTE_PNXMASK
| ARM_PTE_NXMASK
)) | arm_prot
;
1537 /* We only need to update the page tables if the protections do not match. */
1538 if ((ptmp
& (ARM_PTE_APMASK
| ARM_PTE_PNXMASK
| ARM_PTE_NXMASK
)) != arm_prot
) {
1539 ptmp
= (ptmp
& ~(ARM_PTE_APMASK
| ARM_PTE_PNXMASK
| ARM_PTE_NXMASK
)) | arm_prot
;
1543 __unreachable_ok_pop
1547 if (vaddr_cur
> vaddr
) {
1548 assert(((vaddr_cur
- vaddr
) & 0xFFFFFFFF00000000ULL
) == 0);
1549 flush_mmu_tlb_region(vaddr
, (uint32_t)(vaddr_cur
- vaddr
));
1557 * Routine: ml_static_mfree
1565 vm_offset_t vaddr_cur
;
1567 uint32_t freed_pages
= 0;
1568 uint32_t freed_kernelcache_pages
= 0;
1570 /* It is acceptable (if bad) to fail to free. */
1571 if (vaddr
< VM_MIN_KERNEL_ADDRESS
) {
1575 assert((vaddr
& (PAGE_SIZE
- 1)) == 0); /* must be page aligned */
1577 for (vaddr_cur
= vaddr
;
1578 vaddr_cur
< trunc_page_64(vaddr
+ size
);
1579 vaddr_cur
+= PAGE_SIZE
) {
1580 ppn
= pmap_find_phys(kernel_pmap
, vaddr_cur
);
1581 if (ppn
!= (vm_offset_t
) NULL
) {
1583 * It is not acceptable to fail to update the protections on a page
1584 * we will release to the VM. We need to either panic or continue.
1585 * For now, we'll panic (to help flag if there is memory we can
1588 if (ml_static_protect(vaddr_cur
, PAGE_SIZE
, VM_PROT_WRITE
| VM_PROT_READ
) != KERN_SUCCESS
) {
1589 panic("Failed ml_static_mfree on %p", (void *) vaddr_cur
);
1592 vm_page_create(ppn
, (ppn
+ 1));
1594 if (vaddr_cur
>= segLOWEST
&& vaddr_cur
< end_kern
) {
1595 freed_kernelcache_pages
++;
1599 vm_page_lockspin_queues();
1600 vm_page_wire_count
-= freed_pages
;
1601 vm_page_wire_count_initial
-= freed_pages
;
1602 vm_page_kernelcache_count
-= freed_kernelcache_pages
;
1603 vm_page_unlock_queues();
1605 kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages
, (void *)vaddr
, (uint64_t)size
, ppn
);
1610 /* virtual to physical on wired pages */
1612 ml_vtophys(vm_offset_t vaddr
)
1614 return kvtophys(vaddr
);
1618 * Routine: ml_nofault_copy
1619 * Function: Perform a physical mode copy if the source and destination have
1620 * valid translations in the kernel pmap. If translations are present, they are
1621 * assumed to be wired; e.g., no attempt is made to guarantee that the
1622 * translations obtained remain valid for the duration of the copy process.
1625 ml_nofault_copy(vm_offset_t virtsrc
, vm_offset_t virtdst
, vm_size_t size
)
1627 addr64_t cur_phys_dst
, cur_phys_src
;
1628 vm_size_t count
, nbytes
= 0;
1631 if (!(cur_phys_src
= kvtophys(virtsrc
))) {
1634 if (!(cur_phys_dst
= kvtophys(virtdst
))) {
1637 if (!pmap_valid_address(trunc_page_64(cur_phys_dst
)) ||
1638 !pmap_valid_address(trunc_page_64(cur_phys_src
))) {
1641 count
= PAGE_SIZE
- (cur_phys_src
& PAGE_MASK
);
1642 if (count
> (PAGE_SIZE
- (cur_phys_dst
& PAGE_MASK
))) {
1643 count
= PAGE_SIZE
- (cur_phys_dst
& PAGE_MASK
);
1649 bcopy_phys(cur_phys_src
, cur_phys_dst
, count
);
1661 * Routine: ml_validate_nofault
1662 * Function: Validate that ths address range has a valid translations
1663 * in the kernel pmap. If translations are present, they are
1664 * assumed to be wired; i.e. no attempt is made to guarantee
1665 * that the translation persist after the check.
1666 * Returns: TRUE if the range is mapped and will not cause a fault,
1671 ml_validate_nofault(
1672 vm_offset_t virtsrc
, vm_size_t size
)
1674 addr64_t cur_phys_src
;
1678 if (!(cur_phys_src
= kvtophys(virtsrc
))) {
1681 if (!pmap_valid_address(trunc_page_64(cur_phys_src
))) {
1684 count
= (uint32_t)(PAGE_SIZE
- (cur_phys_src
& PAGE_MASK
));
1686 count
= (uint32_t)size
;
1697 ml_get_bouncepool_info(vm_offset_t
* phys_addr
, vm_size_t
* size
)
1704 active_rt_threads(__unused boolean_t active
)
1709 cpu_qos_cb_default(__unused
int urgency
, __unused
uint64_t qos_param1
, __unused
uint64_t qos_param2
)
1714 cpu_qos_update_t cpu_qos_update
= cpu_qos_cb_default
;
1717 cpu_qos_update_register(cpu_qos_update_t cpu_qos_cb
)
1719 if (cpu_qos_cb
!= NULL
) {
1720 cpu_qos_update
= cpu_qos_cb
;
1722 cpu_qos_update
= cpu_qos_cb_default
;
1727 thread_tell_urgency(thread_urgency_t urgency
, uint64_t rt_period
, uint64_t rt_deadline
, uint64_t sched_latency __unused
, __unused thread_t nthread
)
1729 SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_URGENCY
) | DBG_FUNC_START
, urgency
, rt_period
, rt_deadline
, sched_latency
, 0);
1731 cpu_qos_update((int)urgency
, rt_period
, rt_deadline
);
1733 SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_URGENCY
) | DBG_FUNC_END
, urgency
, rt_period
, rt_deadline
, 0, 0);
1737 machine_run_count(__unused
uint32_t count
)
1742 machine_choose_processor(__unused processor_set_t pset
, processor_t processor
)
1748 vm_offset_t
ml_stack_base(void);
1749 vm_size_t
ml_stack_size(void);
1754 uintptr_t local
= (uintptr_t) &local
;
1755 vm_offset_t intstack_top_ptr
;
1757 intstack_top_ptr
= getCpuDatap()->intstack_top
;
1758 if ((local
< intstack_top_ptr
) && (local
> intstack_top_ptr
- INTSTACK_SIZE
)) {
1759 return intstack_top_ptr
- INTSTACK_SIZE
;
1761 return current_thread()->kernel_stack
;
1767 uintptr_t local
= (uintptr_t) &local
;
1768 vm_offset_t intstack_top_ptr
;
1770 intstack_top_ptr
= getCpuDatap()->intstack_top
;
1771 if ((local
< intstack_top_ptr
) && (local
> intstack_top_ptr
- INTSTACK_SIZE
)) {
1772 return INTSTACK_SIZE
;
1774 return kernel_stack_size
;
1780 machine_timeout_suspended(void)
1786 ml_interrupt_prewarm(__unused
uint64_t deadline
)
1788 return KERN_FAILURE
;
1792 * Assumes fiq, irq disabled.
1795 ml_set_decrementer(uint32_t dec_value
)
1797 cpu_data_t
*cdp
= getCpuDatap();
1799 assert(ml_get_interrupts_enabled() == FALSE
);
1800 cdp
->cpu_decrementer
= dec_value
;
1802 if (cdp
->cpu_set_decrementer_func
) {
1803 cdp
->cpu_set_decrementer_func(dec_value
);
1805 __builtin_arm_wsr64("CNTV_TVAL_EL0", (uint64_t)dec_value
);
1814 // ISB required by ARMV7C.b section B8.1.2 & ARMv8 section D6.1.2
1815 // "Reads of CNT[PV]CT[_EL0] can occur speculatively and out of order relative
1816 // to other instructions executed on the same processor."
1817 __builtin_arm_isb(ISB_SY
);
1818 timebase
= __builtin_arm_rsr64("CNTVCT_EL0");
1826 return ml_get_hwclock() + getCpuDatap()->cpu_base_timebase
;
1830 * Get the speculative timebase without an ISB.
1833 ml_get_speculative_timebase()
1837 timebase
= __builtin_arm_rsr64("CNTVCT_EL0");
1839 return timebase
+ getCpuDatap()->cpu_base_timebase
;
1843 ml_get_timebase_entropy(void)
1845 return ml_get_speculative_timebase();
1849 ml_get_decrementer()
1851 cpu_data_t
*cdp
= getCpuDatap();
1854 assert(ml_get_interrupts_enabled() == FALSE
);
1856 if (cdp
->cpu_get_decrementer_func
) {
1857 dec
= cdp
->cpu_get_decrementer_func();
1861 wide_val
= __builtin_arm_rsr64("CNTV_TVAL_EL0");
1862 dec
= (uint32_t)wide_val
;
1863 assert(wide_val
== (uint64_t)dec
);
1870 ml_get_timer_pending()
1872 uint64_t cntv_ctl
= __builtin_arm_rsr64("CNTV_CTL_EL0");
1873 return ((cntv_ctl
& CNTV_CTL_EL0_ISTATUS
) != 0) ? TRUE
: FALSE
;
1877 cache_trap_error(thread_t thread
, vm_map_address_t fault_addr
)
1879 mach_exception_data_type_t exc_data
[2];
1880 arm_saved_state_t
*regs
= get_user_regs(thread
);
1882 set_saved_state_far(regs
, fault_addr
);
1884 exc_data
[0] = KERN_INVALID_ADDRESS
;
1885 exc_data
[1] = fault_addr
;
1887 exception_triage(EXC_BAD_ACCESS
, exc_data
, 2);
1891 cache_trap_recover()
1893 vm_map_address_t fault_addr
;
1895 __asm__
volatile ("mrs %0, FAR_EL1" : "=r"(fault_addr
));
1897 cache_trap_error(current_thread(), fault_addr
);
1901 set_cache_trap_recover(thread_t thread
)
1903 #if defined(HAS_APPLE_PAC)
1904 thread
->recover
= (vm_address_t
)ptrauth_auth_and_resign(&cache_trap_recover
,
1905 ptrauth_key_function_pointer
, 0,
1906 ptrauth_key_function_pointer
, ptrauth_blend_discriminator(&thread
->recover
, PAC_DISCRIMINATOR_RECOVER
));
1907 #else /* defined(HAS_APPLE_PAC) */
1908 thread
->recover
= (vm_address_t
)cache_trap_recover
;
1909 #endif /* defined(HAS_APPLE_PAC) */
1913 dcache_flush_trap(vm_map_address_t start
, vm_map_size_t size
)
1915 vm_map_address_t end
= start
+ size
;
1916 thread_t thread
= current_thread();
1917 vm_offset_t old_recover
= thread
->recover
;
1920 if (task_has_64Bit_addr(current_task())) {
1921 if (end
> MACH_VM_MAX_ADDRESS
) {
1922 cache_trap_error(thread
, end
& ((1 << ARM64_CLINE_SHIFT
) - 1));
1925 if (end
> VM_MAX_ADDRESS
) {
1926 cache_trap_error(thread
, end
& ((1 << ARM64_CLINE_SHIFT
) - 1));
1931 cache_trap_error(thread
, start
& ((1 << ARM64_CLINE_SHIFT
) - 1));
1934 set_cache_trap_recover(thread
);
1937 * We're coherent on Apple ARM64 CPUs, so this could be a nop. However,
1938 * if the region given us is bad, it would be good to catch it and
1939 * crash, ergo we still do the flush.
1941 FlushPoC_DcacheRegion(start
, (uint32_t)size
);
1943 /* Restore recovery function */
1944 thread
->recover
= old_recover
;
1946 /* Return (caller does exception return) */
1950 icache_invalidate_trap(vm_map_address_t start
, vm_map_size_t size
)
1952 vm_map_address_t end
= start
+ size
;
1953 thread_t thread
= current_thread();
1954 vm_offset_t old_recover
= thread
->recover
;
1957 if (task_has_64Bit_addr(current_task())) {
1958 if (end
> MACH_VM_MAX_ADDRESS
) {
1959 cache_trap_error(thread
, end
& ((1 << ARM64_CLINE_SHIFT
) - 1));
1962 if (end
> VM_MAX_ADDRESS
) {
1963 cache_trap_error(thread
, end
& ((1 << ARM64_CLINE_SHIFT
) - 1));
1968 cache_trap_error(thread
, start
& ((1 << ARM64_CLINE_SHIFT
) - 1));
1971 set_cache_trap_recover(thread
);
1973 /* Invalidate iCache to point of unification */
1974 InvalidatePoU_IcacheRegion(start
, (uint32_t)size
);
1976 /* Restore recovery function */
1977 thread
->recover
= old_recover
;
1979 /* Return (caller does exception return) */
1982 __attribute__((noreturn
))
1984 platform_syscall(arm_saved_state_t
*state
)
1988 #define platform_syscall_kprintf(x...) /* kprintf("platform_syscall: " x) */
1990 code
= (uint32_t)get_saved_state_reg(state
, 3);
1994 platform_syscall_kprintf("icache flush requested.\n");
1995 icache_invalidate_trap(get_saved_state_reg(state
, 0), get_saved_state_reg(state
, 1));
1999 platform_syscall_kprintf("dcache flush requested.\n");
2000 dcache_flush_trap(get_saved_state_reg(state
, 0), get_saved_state_reg(state
, 1));
2004 platform_syscall_kprintf("set cthread self.\n");
2005 thread_set_cthread_self(get_saved_state_reg(state
, 0));
2009 platform_syscall_kprintf("get cthread self.\n");
2010 set_saved_state_reg(state
, 0, thread_get_cthread_self());
2013 platform_syscall_kprintf("unknown: %d\n", code
);
2017 thread_exception_return();
2021 _enable_timebase_event_stream(uint32_t bit_index
)
2023 uint64_t cntkctl
; /* One wants to use 32 bits, but "mrs" prefers it this way */
2025 if (bit_index
>= 64) {
2026 panic("%s: invalid bit index (%u)", __FUNCTION__
, bit_index
);
2029 __asm__
volatile ("mrs %0, CNTKCTL_EL1" : "=r"(cntkctl
));
2031 cntkctl
|= (bit_index
<< CNTKCTL_EL1_EVENTI_SHIFT
);
2032 cntkctl
|= CNTKCTL_EL1_EVNTEN
;
2033 cntkctl
|= CNTKCTL_EL1_EVENTDIR
; /* 1->0; why not? */
2036 * If the SOC supports it (and it isn't broken), enable
2037 * EL0 access to the timebase registers.
2039 if (user_timebase_type() != USER_TIMEBASE_NONE
) {
2040 cntkctl
|= (CNTKCTL_EL1_PL0PCTEN
| CNTKCTL_EL1_PL0VCTEN
);
2043 __builtin_arm_wsr64("CNTKCTL_EL1", cntkctl
);
2047 * Turn timer on, unmask that interrupt.
2050 _enable_virtual_timer(void)
2052 uint64_t cntvctl
= CNTV_CTL_EL0_ENABLE
; /* One wants to use 32 bits, but "mrs" prefers it this way */
2054 __builtin_arm_wsr64("CNTV_CTL_EL0", cntvctl
);
2055 /* disable the physical timer as a precaution, as its registers reset to architecturally unknown values */
2056 __builtin_arm_wsr64("CNTP_CTL_EL0", CNTP_CTL_EL0_IMASKED
);
2060 fiq_context_init(boolean_t enable_fiq __unused
)
2062 /* Interrupts still disabled. */
2063 assert(ml_get_interrupts_enabled() == FALSE
);
2064 _enable_virtual_timer();
2068 wfe_timeout_init(void)
2070 _enable_timebase_event_stream(arm64_eventi
);
2074 wfe_timeout_configure(void)
2076 /* Could fill in our own ops here, if we needed them */
2077 uint64_t ticks_per_sec
, ticks_per_event
, events_per_sec
= 0;
2080 if (PE_parse_boot_argn("wfe_events_sec", &events_per_sec
, sizeof(events_per_sec
))) {
2081 if (events_per_sec
<= 0) {
2083 } else if (events_per_sec
> USEC_PER_SEC
) {
2084 events_per_sec
= USEC_PER_SEC
;
2087 #if defined(ARM_BOARD_WFE_TIMEOUT_NS)
2088 events_per_sec
= NSEC_PER_SEC
/ ARM_BOARD_WFE_TIMEOUT_NS
;
2089 #else /* !defined(ARM_BOARD_WFE_TIMEOUT_NS) */
2090 /* Default to 1usec (or as close as we can get) */
2091 events_per_sec
= USEC_PER_SEC
;
2092 #endif /* !defined(ARM_BOARD_WFE_TIMEOUT_NS) */
2094 ticks_per_sec
= gPEClockFrequencyInfo
.timebase_frequency_hz
;
2095 ticks_per_event
= ticks_per_sec
/ events_per_sec
;
2096 bit_index
= flsll(ticks_per_event
) - 1; /* Highest bit set */
2098 /* Round up to power of two */
2099 if ((ticks_per_event
& ((1 << bit_index
) - 1)) != 0) {
2104 * The timer can only trigger on rising or falling edge,
2105 * not both; we don't care which we trigger on, but we
2106 * do need to adjust which bit we are interested in to
2109 if (bit_index
!= 0) {
2113 arm64_eventi
= bit_index
;
2118 ml_delay_should_spin(uint64_t interval
)
2120 cpu_data_t
*cdp
= getCpuDatap();
2122 if (cdp
->cpu_idle_latency
) {
2123 return (interval
< cdp
->cpu_idle_latency
) ? TRUE
: FALSE
;
2126 * Early boot, latency is unknown. Err on the side of blocking,
2127 * which should always be safe, even if slow
2134 ml_thread_is64bit(thread_t thread
)
2136 return thread_is_64bit_addr(thread
);
2140 ml_delay_on_yield(void)
2142 #if DEVELOPMENT || DEBUG
2143 if (yield_delay_us
) {
2144 delay(yield_delay_us
);
2150 ml_timer_evaluate(void)
2155 ml_timer_forced_evaluation(void)
2161 ml_energy_stat(thread_t t
)
2163 return t
->machine
.energy_estimate_nj
;
2168 ml_gpu_stat_update(__unused
uint64_t gpu_ns_delta
)
2171 * For now: update the resource coalition stats of the
2172 * current thread's coalition
2174 task_coalition_update_gpu_stats(current_task(), gpu_ns_delta
);
2178 ml_gpu_stat(__unused thread_t t
)
2183 #if !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME || HAS_FAST_CNTVCT
2186 timer_state_event(boolean_t switch_to_kernel
)
2188 thread_t thread
= current_thread();
2189 if (!thread
->precise_user_kernel_time
) {
2193 processor_t pd
= current_processor();
2194 uint64_t now
= ml_get_speculative_timebase();
2196 timer_stop(pd
->current_state
, now
);
2197 pd
->current_state
= (switch_to_kernel
) ? &pd
->system_state
: &pd
->user_state
;
2198 timer_start(pd
->current_state
, now
);
2200 timer_stop(pd
->thread_timer
, now
);
2201 pd
->thread_timer
= (switch_to_kernel
) ? &thread
->system_timer
: &thread
->user_timer
;
2202 timer_start(pd
->thread_timer
, now
);
2206 timer_state_event_user_to_kernel(void)
2208 timer_state_event(TRUE
);
2212 timer_state_event_kernel_to_user(void)
2214 timer_state_event(FALSE
);
2216 #endif /* !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME || HAS_FAST_CNTVCT */
2219 * The following are required for parts of the kernel
2220 * that cannot resolve these functions as inlines:
2222 extern thread_t
current_act(void) __attribute__((const));
2226 return current_thread_fast();
2229 #undef current_thread
2230 extern thread_t
current_thread(void) __attribute__((const));
2232 current_thread(void)
2234 return current_thread_fast();
2243 ex_cb_info_t ex_cb_info
[EXCB_CLASS_MAX
];
2246 * Callback registration
2247 * Currently we support only one registered callback per class but
2248 * it should be possible to support more callbacks
2252 ex_cb_class_t cb_class
,
2256 ex_cb_info_t
*pInfo
= &ex_cb_info
[cb_class
];
2258 if ((NULL
== cb
) || (cb_class
>= EXCB_CLASS_MAX
)) {
2259 return KERN_INVALID_VALUE
;
2262 if (NULL
== pInfo
->cb
) {
2264 pInfo
->refcon
= refcon
;
2265 return KERN_SUCCESS
;
2267 return KERN_FAILURE
;
2271 * Called internally by platform kernel to invoke the registered callback for class
2275 ex_cb_class_t cb_class
,
2278 ex_cb_info_t
*pInfo
= &ex_cb_info
[cb_class
];
2279 ex_cb_state_t state
= {far
};
2281 if (cb_class
>= EXCB_CLASS_MAX
) {
2282 panic("Invalid exception callback class 0x%x\n", cb_class
);
2286 return pInfo
->cb(cb_class
, pInfo
->refcon
, &state
);
2288 return EXCB_ACTION_NONE
;
2291 #if defined(HAS_APPLE_PAC)
2293 cpu_supports_userkeyen()
2295 #if defined(APPLEFIRESTORM)
2296 return __builtin_arm_rsr64(ARM64_REG_APCTL_EL1
) & APCTL_EL1_UserKeyEn
;
2297 #elif HAS_APCTL_EL1_USERKEYEN
2305 * Returns the default JOP key. Depending on how the CPU diversifies userspace
2306 * JOP keys, this value may reflect either KERNKeyLo or APIAKeyLo.
2309 ml_default_jop_pid(void)
2311 if (cpu_supports_userkeyen()) {
2312 return KERNEL_KERNKEY_ID
;
2314 return KERNEL_JOP_ID
;
2319 ml_task_set_disable_user_jop(task_t task
, uint8_t disable_user_jop
)
2322 task
->disable_user_jop
= disable_user_jop
;
2326 ml_thread_set_disable_user_jop(thread_t thread
, uint8_t disable_user_jop
)
2329 thread
->machine
.disable_user_jop
= disable_user_jop
;
2333 ml_task_set_rop_pid(task_t task
, task_t parent_task
, boolean_t inherit
)
2336 task
->rop_pid
= parent_task
->rop_pid
;
2338 task
->rop_pid
= early_random();
2343 * jop_pid may be inherited from the parent task or generated inside the shared
2344 * region. Unfortunately these two parameters are available at very different
2345 * times during task creation, so we need to split this into two steps.
2348 ml_task_set_jop_pid(task_t task
, task_t parent_task
, boolean_t inherit
)
2351 task
->jop_pid
= parent_task
->jop_pid
;
2353 task
->jop_pid
= ml_default_jop_pid();
2358 ml_task_set_jop_pid_from_shared_region(task_t task
)
2360 vm_shared_region_t sr
= vm_shared_region_get(task
);
2362 * If there's no shared region, we can assign the key arbitrarily. This
2363 * typically happens when Mach-O image activation failed part of the way
2364 * through, and this task is in the middle of dying with SIGKILL anyway.
2366 if (__improbable(!sr
)) {
2367 task
->jop_pid
= early_random();
2370 vm_shared_region_deallocate(sr
);
2373 * Similarly we have to worry about jetsam having killed the task and
2374 * already cleared the shared_region_id.
2377 if (task
->shared_region_id
!= NULL
) {
2378 task
->jop_pid
= shared_region_find_key(task
->shared_region_id
);
2380 task
->jop_pid
= early_random();
2386 ml_thread_set_jop_pid(thread_t thread
, task_t task
)
2388 thread
->machine
.jop_pid
= task
->jop_pid
;
2390 #endif /* defined(HAS_APPLE_PAC) */
2392 #if defined(HAS_APPLE_PAC)
2393 #define _ml_auth_ptr_unchecked(_ptr, _suffix, _modifier) \
2394 asm volatile ("aut" #_suffix " %[ptr], %[modifier]" : [ptr] "+r"(_ptr) : [modifier] "r"(_modifier));
2397 * ml_auth_ptr_unchecked: call this instead of ptrauth_auth_data
2398 * instrinsic when you don't want to trap on auth fail.
2402 ml_auth_ptr_unchecked(void *ptr
, ptrauth_key key
, uint64_t modifier
)
2404 switch (key
& 0x3) {
2405 case ptrauth_key_asia
:
2406 _ml_auth_ptr_unchecked(ptr
, ia
, modifier
);
2408 case ptrauth_key_asib
:
2409 _ml_auth_ptr_unchecked(ptr
, ib
, modifier
);
2411 case ptrauth_key_asda
:
2412 _ml_auth_ptr_unchecked(ptr
, da
, modifier
);
2414 case ptrauth_key_asdb
:
2415 _ml_auth_ptr_unchecked(ptr
, db
, modifier
);
2421 #endif /* defined(HAS_APPLE_PAC) */
2423 #ifdef CONFIG_XNUPOST
2425 ml_expect_fault_begin(expected_fault_handler_t expected_fault_handler
, uintptr_t expected_fault_addr
)
2427 thread_t thread
= current_thread();
2428 thread
->machine
.expected_fault_handler
= expected_fault_handler
;
2429 thread
->machine
.expected_fault_addr
= expected_fault_addr
;
2433 ml_expect_fault_end(void)
2435 thread_t thread
= current_thread();
2436 thread
->machine
.expected_fault_handler
= NULL
;
2437 thread
->machine
.expected_fault_addr
= 0;
2439 #endif /* CONFIG_XNUPOST */
2442 ml_hibernate_active_pre(void)
2445 if (kIOHibernateStateWakingFromHibernate
== gIOHibernateState
) {
2447 hibernate_rebuild_vm_structs();
2449 #endif /* HIBERNATION */
2453 ml_hibernate_active_post(void)
2456 if (kIOHibernateStateWakingFromHibernate
== gIOHibernateState
) {
2457 hibernate_machine_init();
2458 hibernate_vm_lock_end();
2459 current_cpu_datap()->cpu_hibernate
= 0;
2461 #endif /* HIBERNATION */
2465 * Return back a machine-dependent array of address space regions that should be
2466 * reserved by the VM (pre-mapped in the address space). This will prevent user
2467 * processes from allocating or deallocating from within these regions.
2469 * @param vm_is64bit True if the process has a 64-bit address space.
2470 * @param regions An out parameter representing an array of regions to reserve.
2472 * @return The number of reserved regions returned through `regions`.
2475 ml_get_vm_reserved_regions(bool vm_is64bit
, struct vm_reserved_region
**regions
)
2477 assert(regions
!= NULL
);
2480 * Reserved regions only apply to 64-bit address spaces. This is because
2481 * we only expect to grow the maximum user VA address on 64-bit address spaces
2482 * (we've essentially already reached the max for 32-bit spaces). The reserved
2483 * regions should safely fall outside of the max user VA for 32-bit processes.
2486 *regions
= vm_reserved_regions
;
2487 return ARRAY_COUNT(vm_reserved_regions
);
2489 /* Don't reserve any VA regions on arm64_32 processes. */
2494 /* These WFE recommendations are expected to be updated on a relatively
2495 * infrequent cadence, possibly from a different cluster, hence
2496 * false cacheline sharing isn't expected to be material
2498 static uint64_t arm64_cluster_wfe_recs
[MAX_CPU_CLUSTERS
];
2501 ml_update_cluster_wfe_recommendation(uint32_t wfe_cluster_id
, uint64_t wfe_timeout_abstime_interval
, __unused
uint64_t wfe_hint_flags
)
2503 assert(wfe_cluster_id
< MAX_CPU_CLUSTERS
);
2504 assert(wfe_timeout_abstime_interval
<= ml_wfe_hint_max_interval
);
2505 os_atomic_store(&arm64_cluster_wfe_recs
[wfe_cluster_id
], wfe_timeout_abstime_interval
, relaxed
);
2506 return 0; /* Success */
2510 ml_cluster_wfe_timeout(uint32_t wfe_cluster_id
)
2512 /* This and its consumer does not synchronize vis-a-vis updates
2513 * of the recommendation; races are acceptable.
2515 uint64_t wfet
= os_atomic_load(&arm64_cluster_wfe_recs
[wfe_cluster_id
], relaxed
);