]> git.saurik.com Git - apple/xnu.git/blame - osfmk/arm64/machine_routines.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / osfmk / arm64 / machine_routines.c
CommitLineData
5ba3f43e
A
1/*
2 * Copyright (c) 2007-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <arm64/proc_reg.h>
30#include <arm/machine_cpu.h>
31#include <arm/cpu_internal.h>
32#include <arm/cpuid.h>
33#include <arm/io_map_entries.h>
34#include <arm/cpu_data.h>
35#include <arm/cpu_data_internal.h>
36#include <arm/caches_internal.h>
37#include <arm/misc_protos.h>
38#include <arm/machdep_call.h>
cb323159 39#include <arm/machine_routines.h>
5ba3f43e 40#include <arm/rtclock.h>
0a7de745 41#include <arm/cpuid_internal.h>
cb323159 42#include <arm/cpu_capabilities.h>
5ba3f43e
A
43#include <console/serial_protos.h>
44#include <kern/machine.h>
f427ee49 45#include <kern/misc_protos.h>
5ba3f43e
A
46#include <prng/random.h>
47#include <kern/startup.h>
48#include <kern/thread.h>
cb323159 49#include <kern/timer_queue.h>
5ba3f43e
A
50#include <mach/machine.h>
51#include <machine/atomic.h>
f427ee49 52#include <machine/config.h>
5ba3f43e
A
53#include <vm/pmap.h>
54#include <vm/vm_page.h>
f427ee49
A
55#include <vm/vm_shared_region.h>
56#include <vm/vm_map.h>
57#include <sys/codesign.h>
5ba3f43e
A
58#include <sys/kdebug.h>
59#include <kern/coalition.h>
60#include <pexpert/device_tree.h>
61
62#include <IOKit/IOPlatformExpert.h>
f427ee49
A
63#if HIBERNATION
64#include <IOKit/IOHibernatePrivate.h>
f427ee49 65#endif /* HIBERNATION */
5ba3f43e 66
c6bf4f31 67#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
f427ee49 68#include <arm64/amcc_rorgn.h>
5ba3f43e
A
69#endif
70
d9a64523
A
71#include <libkern/section_keywords.h>
72
f427ee49
A
73/**
74 * On supported hardware, debuggable builds make the HID bits read-only
75 * without locking them. This lets people manually modify HID bits while
76 * debugging, since they can use a debugging tool to first reset the HID
77 * bits back to read/write. However it will still catch xnu changes that
78 * accidentally write to HID bits after they've been made read-only.
79 */
80#if HAS_TWO_STAGE_SPR_LOCK && !(DEVELOPMENT || DEBUG)
81#define USE_TWO_STAGE_SPR_LOCK
82#endif
83
5ba3f43e
A
84#if KPC
85#include <kern/kpc.h>
86#endif
87
f427ee49
A
88#define MPIDR_CPU_ID(mpidr_el1_val) (((mpidr_el1_val) & MPIDR_AFF0_MASK) >> MPIDR_AFF0_SHIFT)
89#define MPIDR_CLUSTER_ID(mpidr_el1_val) (((mpidr_el1_val) & MPIDR_AFF1_MASK) >> MPIDR_AFF1_SHIFT)
90
c6bf4f31
A
91#if HAS_CLUSTER
92static uint8_t cluster_initialized = 0;
93#endif
5ba3f43e 94
5ba3f43e
A
95uint32_t LockTimeOut;
96uint32_t LockTimeOutUsec;
0a7de745 97uint64_t TLockTimeOut;
5ba3f43e 98uint64_t MutexSpin;
ea3f0419
A
99uint64_t low_MutexSpin;
100int64_t high_MutexSpin;
101
f427ee49
A
102static uint64_t ml_wfe_hint_max_interval;
103#define MAX_WFE_HINT_INTERVAL_US (500ULL)
5ba3f43e 104
f427ee49
A
105/* Must be less than cpu_idle_latency to ensure ml_delay_should_spin is true */
106TUNABLE(uint32_t, yield_delay_us, "yield_delay_us", 0);
5ba3f43e 107
cb323159 108extern vm_offset_t segLOWEST;
d9a64523
A
109extern vm_offset_t segLOWESTTEXT;
110extern vm_offset_t segLASTB;
111extern unsigned long segSizeLAST;
112
f427ee49
A
113/* ARM64 specific bounds; used to test for presence in the kernelcache. */
114extern vm_offset_t vm_kernelcache_base;
115extern vm_offset_t vm_kernelcache_top;
116
c6bf4f31
A
117#if defined(HAS_IPI)
118unsigned int gFastIPI = 1;
119#define kDeferredIPITimerDefault (64 * NSEC_PER_USEC) /* in nanoseconds */
f427ee49
A
120static TUNABLE_WRITEABLE(uint64_t, deferred_ipi_timer_ns, "fastipitimeout",
121 kDeferredIPITimerDefault);
c6bf4f31 122#endif /* defined(HAS_IPI) */
5ba3f43e 123
5ba3f43e
A
124thread_t Idle_context(void);
125
f427ee49
A
126SECURITY_READ_ONLY_LATE(static ml_topology_cpu_t) topology_cpu_array[MAX_CPUS];
127SECURITY_READ_ONLY_LATE(static ml_topology_cluster_t) topology_cluster_array[MAX_CPU_CLUSTERS];
128SECURITY_READ_ONLY_LATE(static ml_topology_info_t) topology_info = {
129 .version = CPU_TOPOLOGY_VERSION,
130 .cpus = topology_cpu_array,
131 .clusters = topology_cluster_array,
132};
133/**
134 * Represents the offset of each cluster within a hypothetical array of MAX_CPUS
135 * entries of an arbitrary data type. This is intended for use by specialized consumers
136 * that must quickly access per-CPU data using only the physical CPU ID (MPIDR_EL1),
137 * as follows:
138 * hypothetical_array[cluster_offsets[AFF1] + AFF0]
139 * Most consumers should instead use general-purpose facilities such as PERCPU or
140 * ml_get_cpu_number().
141 */
142SECURITY_READ_ONLY_LATE(int64_t) cluster_offsets[MAX_CPU_CLUSTER_PHY_ID + 1];
d9a64523 143
f427ee49 144SECURITY_READ_ONLY_LATE(static uint32_t) arm64_eventi = UINT32_MAX;
5ba3f43e 145
f427ee49
A
146extern uint32_t lockdown_done;
147
148/**
149 * Represents regions of virtual address space that should be reserved
150 * (pre-mapped) in each user address space.
151 */
152SECURITY_READ_ONLY_LATE(static struct vm_reserved_region) vm_reserved_regions[] = {
2a1bd2d3
A
153 {
154 .vmrr_name = "GPU Carveout",
155 .vmrr_addr = MACH_VM_MIN_GPU_CARVEOUT_ADDRESS,
156 .vmrr_size = (vm_map_size_t)(MACH_VM_MAX_GPU_CARVEOUT_ADDRESS - MACH_VM_MIN_GPU_CARVEOUT_ADDRESS)
157 },
f427ee49
A
158 /*
159 * Reserve the virtual memory space representing the commpage nesting region
160 * to prevent user processes from allocating memory within it. The actual
161 * page table entries for the commpage are inserted by vm_commpage_enter().
162 * This vm_map_enter() just prevents userspace from allocating/deallocating
163 * anything within the entire commpage nested region.
164 */
165 {
166 .vmrr_name = "commpage nesting",
167 .vmrr_addr = _COMM_PAGE64_NESTING_START,
168 .vmrr_size = _COMM_PAGE64_NESTING_SIZE
169 }
170};
5ba3f43e 171
5ba3f43e
A
172uint32_t get_arm_cpu_version(void);
173
c6bf4f31
A
174#if defined(HAS_IPI)
175static inline void
176ml_cpu_signal_type(unsigned int cpu_mpidr, uint32_t type)
177{
178#if HAS_CLUSTER
179 uint64_t local_mpidr;
180 /* NOTE: this logic expects that we are called in a non-preemptible
181 * context, or at least one in which the calling thread is bound
182 * to a single CPU. Otherwise we may migrate between choosing which
183 * IPI mechanism to use and issuing the IPI. */
184 MRS(local_mpidr, "MPIDR_EL1");
f427ee49
A
185 if (MPIDR_CLUSTER_ID(local_mpidr) == MPIDR_CLUSTER_ID(cpu_mpidr)) {
186 uint64_t x = type | MPIDR_CPU_ID(cpu_mpidr);
c3c9b80d 187 MSR("S3_5_C15_C0_0", x);
c6bf4f31
A
188 } else {
189 #define IPI_RR_TARGET_CLUSTER_SHIFT 16
f427ee49 190 uint64_t x = type | (MPIDR_CLUSTER_ID(cpu_mpidr) << IPI_RR_TARGET_CLUSTER_SHIFT) | MPIDR_CPU_ID(cpu_mpidr);
c3c9b80d 191 MSR("S3_5_C15_C0_1", x);
c6bf4f31
A
192 }
193#else
f427ee49 194 uint64_t x = type | MPIDR_CPU_ID(cpu_mpidr);
c3c9b80d 195 MSR("S3_5_C15_C0_1", x);
c6bf4f31
A
196#endif
197}
198#endif
5ba3f43e 199
c6bf4f31 200#if !defined(HAS_IPI)
cb323159 201__dead2
c6bf4f31 202#endif
0a7de745 203void
cb323159 204ml_cpu_signal(unsigned int cpu_mpidr __unused)
5ba3f43e 205{
c6bf4f31
A
206#if defined(HAS_IPI)
207 ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_IMMEDIATE);
208#else
5ba3f43e 209 panic("Platform does not support ACC Fast IPI");
c6bf4f31 210#endif
5ba3f43e
A
211}
212
c6bf4f31 213#if !defined(HAS_IPI)
cb323159 214__dead2
c6bf4f31 215#endif
0a7de745
A
216void
217ml_cpu_signal_deferred_adjust_timer(uint64_t nanosecs)
218{
c6bf4f31
A
219#if defined(HAS_IPI)
220 /* adjust IPI_CR timer countdown value for deferred IPI
221 * accepts input in nanosecs, convert to absolutetime (REFCLK ticks),
222 * clamp maximum REFCLK ticks to 0xFFFF (16 bit field)
223 *
224 * global register, should only require a single write to update all
225 * CPU cores: from Skye ACC user spec section 5.7.3.3
226 *
227 * IPICR is a global register but there are two copies in ACC: one at pBLK and one at eBLK.
228 * IPICR write SPR token also traverses both pCPM and eCPM rings and updates both copies.
229 */
230 uint64_t abstime;
231
232 nanoseconds_to_absolutetime(nanosecs, &abstime);
233
234 abstime = MIN(abstime, 0xFFFF);
235
236 /* update deferred_ipi_timer_ns with the new clamped value */
237 absolutetime_to_nanoseconds(abstime, &deferred_ipi_timer_ns);
238
c3c9b80d 239 MSR("S3_5_C15_C3_1", abstime);
c6bf4f31 240#else
5ba3f43e
A
241 (void)nanosecs;
242 panic("Platform does not support ACC Fast IPI");
c6bf4f31 243#endif
5ba3f43e
A
244}
245
0a7de745
A
246uint64_t
247ml_cpu_signal_deferred_get_timer()
248{
c6bf4f31
A
249#if defined(HAS_IPI)
250 return deferred_ipi_timer_ns;
251#else
5ba3f43e 252 return 0;
c6bf4f31 253#endif
5ba3f43e
A
254}
255
c6bf4f31 256#if !defined(HAS_IPI)
cb323159 257__dead2
c6bf4f31 258#endif
0a7de745 259void
cb323159 260ml_cpu_signal_deferred(unsigned int cpu_mpidr __unused)
5ba3f43e 261{
c6bf4f31
A
262#if defined(HAS_IPI)
263 ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_DEFERRED);
264#else
5ba3f43e 265 panic("Platform does not support ACC Fast IPI deferral");
c6bf4f31 266#endif
5ba3f43e
A
267}
268
c6bf4f31 269#if !defined(HAS_IPI)
cb323159 270__dead2
c6bf4f31 271#endif
0a7de745 272void
cb323159 273ml_cpu_signal_retract(unsigned int cpu_mpidr __unused)
5ba3f43e 274{
c6bf4f31
A
275#if defined(HAS_IPI)
276 ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_RETRACT);
277#else
5ba3f43e 278 panic("Platform does not support ACC Fast IPI retraction");
c6bf4f31 279#endif
5ba3f43e
A
280}
281
0a7de745
A
282void
283machine_idle(void)
5ba3f43e 284{
f427ee49
A
285 /* Interrupts are expected to be masked on entry or re-entry via
286 * Idle_load_context()
287 */
288 assert((__builtin_arm_rsr("DAIF") & DAIF_IRQF) == DAIF_IRQF);
5ba3f43e 289 Idle_context();
cb323159 290 __builtin_arm_wsr("DAIFClr", (DAIFSC_IRQF | DAIFSC_FIQF));
5ba3f43e
A
291}
292
0a7de745
A
293void
294OSSynchronizeIO(void)
5ba3f43e
A
295{
296 __builtin_arm_dsb(DSB_SY);
297}
298
0a7de745
A
299uint64_t
300get_aux_control(void)
5ba3f43e 301{
0a7de745 302 uint64_t value;
5ba3f43e
A
303
304 MRS(value, "ACTLR_EL1");
305 return value;
306}
307
0a7de745
A
308uint64_t
309get_mmu_control(void)
5ba3f43e 310{
0a7de745 311 uint64_t value;
5ba3f43e
A
312
313 MRS(value, "SCTLR_EL1");
314 return value;
315}
316
0a7de745
A
317uint64_t
318get_tcr(void)
5ba3f43e 319{
0a7de745 320 uint64_t value;
5ba3f43e
A
321
322 MRS(value, "TCR_EL1");
323 return value;
324}
325
0a7de745
A
326boolean_t
327ml_get_interrupts_enabled(void)
5ba3f43e 328{
0a7de745 329 uint64_t value;
5ba3f43e
A
330
331 MRS(value, "DAIF");
0a7de745 332 if (value & DAIF_IRQF) {
5ba3f43e 333 return FALSE;
0a7de745 334 }
5ba3f43e
A
335 return TRUE;
336}
337
0a7de745
A
338pmap_paddr_t
339get_mmu_ttb(void)
5ba3f43e 340{
0a7de745 341 pmap_paddr_t value;
5ba3f43e
A
342
343 MRS(value, "TTBR0_EL1");
344 return value;
345}
346
0a7de745
A
347uint32_t
348get_arm_cpu_version(void)
5ba3f43e 349{
0a7de745 350 uint32_t value = machine_read_midr();
5ba3f43e
A
351
352 /* Compose the register values into 8 bits; variant[7:4], revision[3:0]. */
353 return ((value & MIDR_EL1_REV_MASK) >> MIDR_EL1_REV_SHIFT) | ((value & MIDR_EL1_VAR_MASK) >> (MIDR_EL1_VAR_SHIFT - 4));
354}
355
f427ee49
A
356bool
357ml_feature_supported(uint32_t feature_bit)
358{
359 uint64_t aidr_el1_value = 0;
360
361 MRS(aidr_el1_value, "AIDR_EL1");
362
363
364 return aidr_el1_value & feature_bit;
365}
366
5ba3f43e
A
367/*
368 * user_cont_hwclock_allowed()
369 *
f427ee49 370 * Indicates whether we allow EL0 to read the virtual timebase (CNTVCT_EL0)
5ba3f43e
A
371 * as a continuous time source (e.g. from mach_continuous_time)
372 */
0a7de745
A
373boolean_t
374user_cont_hwclock_allowed(void)
5ba3f43e 375{
c6bf4f31
A
376#if HAS_CONTINUOUS_HWCLOCK
377 return TRUE;
378#else
5ba3f43e 379 return FALSE;
c6bf4f31 380#endif
5ba3f43e
A
381}
382
cb323159
A
383
384uint8_t
385user_timebase_type(void)
5ba3f43e 386{
cb323159 387 return USER_TIMEBASE_SPEC;
5ba3f43e
A
388}
389
5ba3f43e
A
390void
391machine_startup(__unused boot_args * args)
392{
c6bf4f31
A
393#if defined(HAS_IPI) && (DEVELOPMENT || DEBUG)
394 if (!PE_parse_boot_argn("fastipi", &gFastIPI, sizeof(gFastIPI))) {
395 gFastIPI = 1;
396 }
c6bf4f31 397#endif /* defined(HAS_IPI) && (DEVELOPMENT || DEBUG)*/
5ba3f43e 398
5ba3f43e
A
399 machine_conf();
400
401 /*
402 * Kick off the kernel bootstrap.
403 */
404 kernel_bootstrap();
405 /* NOTREACHED */
406}
407
2a1bd2d3
A
408typedef void (*invalidate_fn_t)(void);
409
410static SECURITY_READ_ONLY_LATE(invalidate_fn_t) invalidate_hmac_function = NULL;
411
412void set_invalidate_hmac_function(invalidate_fn_t fn);
413
414void
415set_invalidate_hmac_function(invalidate_fn_t fn)
416{
417 if (NULL != invalidate_hmac_function) {
418 panic("Invalidate HMAC function already set");
419 }
420
421 invalidate_hmac_function = fn;
422}
5ba3f43e 423
0a7de745
A
424void
425machine_lockdown(void)
5ba3f43e 426{
f427ee49
A
427 arm_vm_prot_finalize(PE_state.bootArgs);
428
5ba3f43e
A
429#if CONFIG_KERNEL_INTEGRITY
430#if KERNEL_INTEGRITY_WT
431 /* Watchtower
432 *
433 * Notify the monitor about the completion of early kernel bootstrap.
434 * From this point forward it will enforce the integrity of kernel text,
435 * rodata and page tables.
436 */
437
438#ifdef MONITOR
439 monitor_call(MONITOR_LOCKDOWN, 0, 0, 0);
440#endif
441#endif /* KERNEL_INTEGRITY_WT */
442
c6bf4f31
A
443#if XNU_MONITOR
444 pmap_lockdown_ppl();
445#endif
5ba3f43e 446
c6bf4f31 447#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
d9a64523
A
448 /* KTRR
449 *
450 * Lock physical KTRR region. KTRR region is read-only. Memory outside
451 * the region is not executable at EL1.
452 */
5ba3f43e 453
d9a64523 454 rorgn_lockdown();
c6bf4f31 455#endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
5ba3f43e
A
456
457
458#endif /* CONFIG_KERNEL_INTEGRITY */
f427ee49 459
f427ee49 460
2a1bd2d3
A
461 if (NULL != invalidate_hmac_function) {
462 invalidate_hmac_function();
463 }
f427ee49
A
464
465 lockdown_done = 1;
5ba3f43e
A
466}
467
f427ee49 468
5ba3f43e
A
469char *
470machine_boot_info(
0a7de745
A
471 __unused char *buf,
472 __unused vm_size_t size)
5ba3f43e 473{
0a7de745 474 return PE_boot_args();
5ba3f43e
A
475}
476
5ba3f43e
A
477void
478slave_machine_init(__unused void *param)
479{
0a7de745
A
480 cpu_machine_init(); /* Initialize the processor */
481 clock_init(); /* Init the clock */
5ba3f43e
A
482}
483
484/*
485 * Routine: machine_processor_shutdown
486 * Function:
487 */
488thread_t
489machine_processor_shutdown(
0a7de745
A
490 __unused thread_t thread,
491 void (*doshutdown)(processor_t),
492 processor_t processor)
5ba3f43e 493{
0a7de745 494 return Shutdown_context(doshutdown, processor);
5ba3f43e
A
495}
496
c3c9b80d 497
5ba3f43e
A
498/*
499 * Routine: ml_init_lock_timeout
500 * Function:
501 */
502void
503ml_init_lock_timeout(void)
504{
505 uint64_t abstime;
506 uint64_t mtxspin;
0a7de745 507 uint64_t default_timeout_ns = NSEC_PER_SEC >> 2;
5ba3f43e
A
508 uint32_t slto;
509
0a7de745 510 if (PE_parse_boot_argn("slto_us", &slto, sizeof(slto))) {
5ba3f43e 511 default_timeout_ns = slto * NSEC_PER_USEC;
0a7de745 512 }
5ba3f43e
A
513
514 nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
0a7de745 515 LockTimeOutUsec = (uint32_t) (default_timeout_ns / NSEC_PER_USEC);
5ba3f43e
A
516 LockTimeOut = (uint32_t)abstime;
517
0a7de745
A
518 if (PE_parse_boot_argn("tlto_us", &slto, sizeof(slto))) {
519 nanoseconds_to_absolutetime(slto * NSEC_PER_USEC, &abstime);
520 TLockTimeOut = abstime;
521 } else {
522 TLockTimeOut = LockTimeOut >> 1;
523 }
524
525 if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof(mtxspin))) {
526 if (mtxspin > USEC_PER_SEC >> 4) {
527 mtxspin = USEC_PER_SEC >> 4;
528 }
529 nanoseconds_to_absolutetime(mtxspin * NSEC_PER_USEC, &abstime);
5ba3f43e 530 } else {
0a7de745 531 nanoseconds_to_absolutetime(10 * NSEC_PER_USEC, &abstime);
5ba3f43e
A
532 }
533 MutexSpin = abstime;
ea3f0419 534 low_MutexSpin = MutexSpin;
c3c9b80d
A
535
536
ea3f0419
A
537 /*
538 * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
539 * real_ncpus is not set at this time
540 *
541 * NOTE: active spinning is disabled in arm. It can be activated
542 * by setting high_MutexSpin through the sysctl.
543 */
544 high_MutexSpin = low_MutexSpin;
f427ee49
A
545
546 nanoseconds_to_absolutetime(MAX_WFE_HINT_INTERVAL_US * NSEC_PER_USEC, &ml_wfe_hint_max_interval);
5ba3f43e
A
547}
548
c3c9b80d
A
549/*
550 * This is called when all of the ml_processor_info_t structures have been
551 * initialized and all the processors have been started through processor_start().
552 *
553 * Required by the scheduler subsystem.
554 */
555void
556ml_cpu_init_completed(void)
557{
558}
559
5ba3f43e
A
560/*
561 * This is called from the machine-independent routine cpu_up()
562 * to perform machine-dependent info updates.
563 */
564void
565ml_cpu_up(void)
566{
cb323159
A
567 os_atomic_inc(&machine_info.physical_cpu, relaxed);
568 os_atomic_inc(&machine_info.logical_cpu, relaxed);
5ba3f43e
A
569}
570
571/*
572 * This is called from the machine-independent routine cpu_down()
573 * to perform machine-dependent info updates.
574 */
575void
576ml_cpu_down(void)
577{
0a7de745 578 cpu_data_t *cpu_data_ptr;
5ba3f43e 579
cb323159
A
580 os_atomic_dec(&machine_info.physical_cpu, relaxed);
581 os_atomic_dec(&machine_info.logical_cpu, relaxed);
5ba3f43e
A
582
583 /*
584 * If we want to deal with outstanding IPIs, we need to
585 * do relatively early in the processor_doshutdown path,
586 * as we pend decrementer interrupts using the IPI
587 * mechanism if we cannot immediately service them (if
588 * IRQ is masked). Do so now.
589 *
590 * We aren't on the interrupt stack here; would it make
591 * more sense to disable signaling and then enable
592 * interrupts? It might be a bit cleaner.
593 */
594 cpu_data_ptr = getCpuDatap();
595 cpu_data_ptr->cpu_running = FALSE;
cb323159
A
596
597 if (cpu_data_ptr != &BootCpuData) {
598 /*
599 * Move all of this cpu's timers to the master/boot cpu,
600 * and poke it in case there's a sooner deadline for it to schedule.
601 */
602 timer_queue_shutdown(&cpu_data_ptr->rtclock_timer.queue);
603 cpu_xcall(BootCpuData.cpu_number, &timer_queue_expire_local, NULL);
604 }
605
5ba3f43e
A
606 cpu_signal_handler_internal(TRUE);
607}
608
609/*
610 * Routine: ml_cpu_get_info
611 * Function:
612 */
613void
614ml_cpu_get_info(ml_cpu_info_t * ml_cpu_info)
615{
616 cache_info_t *cpuid_cache_info;
617
618 cpuid_cache_info = cache_info();
619 ml_cpu_info->vector_unit = 0;
620 ml_cpu_info->cache_line_size = cpuid_cache_info->c_linesz;
621 ml_cpu_info->l1_icache_size = cpuid_cache_info->c_isize;
622 ml_cpu_info->l1_dcache_size = cpuid_cache_info->c_dsize;
623
624#if (__ARM_ARCH__ >= 7)
625 ml_cpu_info->l2_settings = 1;
626 ml_cpu_info->l2_cache_size = cpuid_cache_info->c_l2size;
627#else
628 ml_cpu_info->l2_settings = 0;
629 ml_cpu_info->l2_cache_size = 0xFFFFFFFF;
630#endif
631 ml_cpu_info->l3_settings = 0;
632 ml_cpu_info->l3_cache_size = 0xFFFFFFFF;
633}
634
635unsigned int
636ml_get_machine_mem(void)
637{
0a7de745 638 return machine_info.memory_size;
5ba3f43e
A
639}
640
641__attribute__((noreturn))
642void
643halt_all_cpus(boolean_t reboot)
644{
645 if (reboot) {
646 printf("MACH Reboot\n");
647 PEHaltRestart(kPERestartCPU);
648 } else {
649 printf("CPU halted\n");
650 PEHaltRestart(kPEHaltCPU);
651 }
0a7de745
A
652 while (1) {
653 ;
654 }
5ba3f43e
A
655}
656
657__attribute__((noreturn))
658void
659halt_cpu(void)
660{
661 halt_all_cpus(FALSE);
662}
663
664/*
665 * Routine: machine_signal_idle
666 * Function:
667 */
668void
669machine_signal_idle(
0a7de745 670 processor_t processor)
5ba3f43e
A
671{
672 cpu_signal(processor_to_cpu_datap(processor), SIGPnop, (void *)NULL, (void *)NULL);
673 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
674}
675
676void
677machine_signal_idle_deferred(
0a7de745 678 processor_t processor)
5ba3f43e
A
679{
680 cpu_signal_deferred(processor_to_cpu_datap(processor));
681 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_DEFERRED_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
682}
683
684void
685machine_signal_idle_cancel(
0a7de745 686 processor_t processor)
5ba3f43e
A
687{
688 cpu_signal_cancel(processor_to_cpu_datap(processor));
689 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_CANCEL_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
690}
691
692/*
693 * Routine: ml_install_interrupt_handler
694 * Function: Initialize Interrupt Handler
695 */
0a7de745 696void
5ba3f43e 697ml_install_interrupt_handler(
0a7de745
A
698 void *nub,
699 int source,
700 void *target,
701 IOInterruptHandler handler,
702 void *refCon)
5ba3f43e
A
703{
704 cpu_data_t *cpu_data_ptr;
705 boolean_t current_state;
706
707 current_state = ml_set_interrupts_enabled(FALSE);
708 cpu_data_ptr = getCpuDatap();
709
710 cpu_data_ptr->interrupt_nub = nub;
711 cpu_data_ptr->interrupt_source = source;
712 cpu_data_ptr->interrupt_target = target;
713 cpu_data_ptr->interrupt_handler = handler;
714 cpu_data_ptr->interrupt_refCon = refCon;
715
5ba3f43e 716 (void) ml_set_interrupts_enabled(current_state);
5ba3f43e
A
717}
718
719/*
720 * Routine: ml_init_interrupt
721 * Function: Initialize Interrupts
722 */
723void
724ml_init_interrupt(void)
725{
c6bf4f31
A
726#if defined(HAS_IPI)
727 /*
728 * ml_init_interrupt will get called once for each CPU, but this is redundant
729 * because there is only one global copy of the register for skye. do it only
730 * on the bootstrap cpu
731 */
732 if (getCpuDatap()->cluster_master) {
733 ml_cpu_signal_deferred_adjust_timer(deferred_ipi_timer_ns);
734 }
735#endif
5ba3f43e
A
736}
737
738/*
739 * Routine: ml_init_timebase
740 * Function: register and setup Timebase, Decremeter services
741 */
0a7de745
A
742void
743ml_init_timebase(
744 void *args,
745 tbd_ops_t tbd_funcs,
5ba3f43e
A
746 vm_offset_t int_address,
747 vm_offset_t int_value __unused)
748{
749 cpu_data_t *cpu_data_ptr;
750
751 cpu_data_ptr = (cpu_data_t *)args;
752
753 if ((cpu_data_ptr == &BootCpuData)
754 && (rtclock_timebase_func.tbd_fiq_handler == (void *)NULL)) {
755 rtclock_timebase_func = *tbd_funcs;
756 rtclock_timebase_addr = int_address;
757 }
758}
759
f427ee49
A
760#define ML_READPROP_MANDATORY UINT64_MAX
761
762static uint64_t
763ml_readprop(const DTEntry entry, const char *propertyName, uint64_t default_value)
764{
765 void const *prop;
766 unsigned int propSize;
767
768 if (SecureDTGetProperty(entry, propertyName, &prop, &propSize) == kSuccess) {
769 if (propSize == sizeof(uint8_t)) {
770 return *((uint8_t const *)prop);
771 } else if (propSize == sizeof(uint16_t)) {
772 return *((uint16_t const *)prop);
773 } else if (propSize == sizeof(uint32_t)) {
774 return *((uint32_t const *)prop);
775 } else if (propSize == sizeof(uint64_t)) {
776 return *((uint64_t const *)prop);
777 } else {
778 panic("CPU property '%s' has bad size %u", propertyName, propSize);
779 }
780 } else {
781 if (default_value == ML_READPROP_MANDATORY) {
782 panic("Missing mandatory property '%s'", propertyName);
783 }
784 return default_value;
785 }
786}
787
788static boolean_t
789ml_read_reg_range(const DTEntry entry, const char *propertyName, uint64_t *pa_ptr, uint64_t *len_ptr)
790{
791 uint64_t const *prop;
792 unsigned int propSize;
793
794 if (SecureDTGetProperty(entry, propertyName, (void const **)&prop, &propSize) != kSuccess) {
795 return FALSE;
796 }
797
798 if (propSize != sizeof(uint64_t) * 2) {
799 panic("Wrong property size for %s", propertyName);
800 }
801
802 *pa_ptr = prop[0];
803 *len_ptr = prop[1];
804 return TRUE;
805}
806
807static boolean_t
808ml_is_boot_cpu(const DTEntry entry)
809{
810 void const *prop;
811 unsigned int propSize;
812
813 if (SecureDTGetProperty(entry, "state", &prop, &propSize) != kSuccess) {
814 panic("unable to retrieve state for cpu");
815 }
816
817 if (strncmp((char const *)prop, "running", propSize) == 0) {
818 return TRUE;
819 } else {
820 return FALSE;
821 }
822}
823
824static void
825ml_read_chip_revision(unsigned int *rev __unused)
826{
827 // The CPU_VERSION_* macros are only defined on APPLE_ARM64_ARCH_FAMILY builds
828#ifdef APPLE_ARM64_ARCH_FAMILY
829 DTEntry entryP;
830
831 if ((SecureDTFindEntry("name", "arm-io", &entryP) == kSuccess)) {
832 *rev = (unsigned int)ml_readprop(entryP, "chip-revision", CPU_VERSION_UNKNOWN);
833 } else {
834 *rev = CPU_VERSION_UNKNOWN;
835 }
836#endif
837}
838
5ba3f43e
A
839void
840ml_parse_cpu_topology(void)
841{
842 DTEntry entry, child __unused;
843 OpaqueDTEntryIterator iter;
844 uint32_t cpu_boot_arg;
845 int err;
846
f427ee49
A
847 int64_t cluster_phys_to_logical[MAX_CPU_CLUSTER_PHY_ID + 1];
848 int64_t cluster_max_cpu_phys_id[MAX_CPU_CLUSTER_PHY_ID + 1];
5ba3f43e 849 cpu_boot_arg = MAX_CPUS;
5ba3f43e
A
850 PE_parse_boot_argn("cpus", &cpu_boot_arg, sizeof(cpu_boot_arg));
851
f427ee49 852 err = SecureDTLookupEntry(NULL, "/cpus", &entry);
5ba3f43e
A
853 assert(err == kSuccess);
854
f427ee49 855 err = SecureDTInitEntryIterator(entry, &iter);
5ba3f43e
A
856 assert(err == kSuccess);
857
f427ee49
A
858 for (int i = 0; i <= MAX_CPU_CLUSTER_PHY_ID; i++) {
859 cluster_offsets[i] = -1;
860 cluster_phys_to_logical[i] = -1;
861 cluster_max_cpu_phys_id[i] = 0;
862 }
863
864 while (kSuccess == SecureDTIterateEntries(&iter, &child)) {
865 boolean_t is_boot_cpu = ml_is_boot_cpu(child);
5ba3f43e 866
f427ee49
A
867 // If the number of CPUs is constrained by the cpus= boot-arg, and the boot CPU hasn't
868 // been added to the topology struct yet, and we only have one slot left, then skip
869 // every other non-boot CPU in order to leave room for the boot CPU.
870 //
871 // e.g. if the boot-args say "cpus=3" and CPU4 is the boot CPU, then the cpus[]
872 // array will list CPU0, CPU1, and CPU4. CPU2-CPU3 and CPU5-CPUn will be omitted.
873 if (topology_info.num_cpus >= (cpu_boot_arg - 1) && topology_info.boot_cpu == NULL && !is_boot_cpu) {
874 continue;
875 }
876 if (topology_info.num_cpus >= cpu_boot_arg) {
877 break;
0a7de745 878 }
5ba3f43e 879
f427ee49 880 ml_topology_cpu_t *cpu = &topology_info.cpus[topology_info.num_cpus];
5ba3f43e 881
f427ee49
A
882 cpu->cpu_id = topology_info.num_cpus++;
883 assert(cpu->cpu_id < MAX_CPUS);
884 topology_info.max_cpu_id = MAX(topology_info.max_cpu_id, cpu->cpu_id);
5ba3f43e 885
f427ee49
A
886 cpu->die_id = (int)ml_readprop(child, "die-id", 0);
887 topology_info.max_die_id = MAX(topology_info.max_die_id, cpu->die_id);
888
889 cpu->phys_id = (uint32_t)ml_readprop(child, "reg", ML_READPROP_MANDATORY);
890
891 cpu->l2_access_penalty = (uint32_t)ml_readprop(child, "l2-access-penalty", 0);
892 cpu->l2_cache_size = (uint32_t)ml_readprop(child, "l2-cache-size", 0);
893 cpu->l2_cache_id = (uint32_t)ml_readprop(child, "l2-cache-id", 0);
894 cpu->l3_cache_size = (uint32_t)ml_readprop(child, "l3-cache-size", 0);
895 cpu->l3_cache_id = (uint32_t)ml_readprop(child, "l3-cache-id", 0);
896
f427ee49
A
897 ml_read_reg_range(child, "cpu-uttdbg-reg", &cpu->cpu_UTTDBG_pa, &cpu->cpu_UTTDBG_len);
898 ml_read_reg_range(child, "cpu-impl-reg", &cpu->cpu_IMPL_pa, &cpu->cpu_IMPL_len);
899 ml_read_reg_range(child, "coresight-reg", &cpu->coresight_pa, &cpu->coresight_len);
900 cpu->cluster_type = CLUSTER_TYPE_SMP;
5ba3f43e 901
2a1bd2d3
A
902 int cluster_type = (int)ml_readprop(child, "cluster-type", 0);
903 if (cluster_type == 'E') {
904 cpu->cluster_type = CLUSTER_TYPE_E;
905 } else if (cluster_type == 'P') {
906 cpu->cluster_type = CLUSTER_TYPE_P;
907 }
5ba3f43e 908
f427ee49
A
909 /*
910 * Since we want to keep a linear cluster ID space, we cannot just rely
911 * on the value provided by EDT. Instead, use the MPIDR value to see if we have
912 * seen this exact cluster before. If so, then reuse that cluster ID for this CPU.
913 */
914#if HAS_CLUSTER
915 uint32_t phys_cluster_id = MPIDR_CLUSTER_ID(cpu->phys_id);
916#else
2a1bd2d3 917 uint32_t phys_cluster_id = (cpu->cluster_type == CLUSTER_TYPE_P);
f427ee49
A
918#endif
919 assert(phys_cluster_id <= MAX_CPU_CLUSTER_PHY_ID);
920 cpu->cluster_id = ((cluster_phys_to_logical[phys_cluster_id] == -1) ?
921 topology_info.num_clusters : cluster_phys_to_logical[phys_cluster_id]);
922
923 assert(cpu->cluster_id < MAX_CPU_CLUSTERS);
924
925 ml_topology_cluster_t *cluster = &topology_info.clusters[cpu->cluster_id];
926 if (cluster->num_cpus == 0) {
927 assert(topology_info.num_clusters < MAX_CPU_CLUSTERS);
928
929 topology_info.num_clusters++;
930 topology_info.max_cluster_id = MAX(topology_info.max_cluster_id, cpu->cluster_id);
931
932 cluster->cluster_id = cpu->cluster_id;
933 cluster->cluster_type = cpu->cluster_type;
934 cluster->first_cpu_id = cpu->cpu_id;
935 assert(cluster_phys_to_logical[phys_cluster_id] == -1);
936 cluster_phys_to_logical[phys_cluster_id] = cpu->cluster_id;
937
938 // Since we don't have a per-cluster EDT node, this is repeated in each CPU node.
939 // If we wind up with a bunch of these, we might want to create separate per-cluster
940 // EDT nodes and have the CPU nodes reference them through a phandle.
941 ml_read_reg_range(child, "acc-impl-reg", &cluster->acc_IMPL_pa, &cluster->acc_IMPL_len);
942 ml_read_reg_range(child, "cpm-impl-reg", &cluster->cpm_IMPL_pa, &cluster->cpm_IMPL_len);
0a7de745 943 }
5ba3f43e 944
f427ee49
A
945#if HAS_CLUSTER
946 if (MPIDR_CPU_ID(cpu->phys_id) > cluster_max_cpu_phys_id[phys_cluster_id]) {
947 cluster_max_cpu_phys_id[phys_cluster_id] = MPIDR_CPU_ID(cpu->phys_id);
948 }
949#endif
950
951 cpu->die_cluster_id = (int)ml_readprop(child, "die-cluster-id", MPIDR_CLUSTER_ID(cpu->phys_id));
952 cpu->cluster_core_id = (int)ml_readprop(child, "cluster-core-id", MPIDR_CPU_ID(cpu->phys_id));
5ba3f43e 953
f427ee49
A
954 cluster->num_cpus++;
955 cluster->cpu_mask |= 1ULL << cpu->cpu_id;
956
957 if (is_boot_cpu) {
958 assert(topology_info.boot_cpu == NULL);
959 topology_info.boot_cpu = cpu;
960 topology_info.boot_cluster = cluster;
961 }
0a7de745 962 }
5ba3f43e 963
f427ee49
A
964#if HAS_CLUSTER
965 /*
966 * Build the cluster offset array, ensuring that the region reserved
967 * for each physical cluster contains enough entries to be indexed
968 * by the maximum physical CPU ID (AFF0) within the cluster.
969 */
970 unsigned int cur_cluster_offset = 0;
971 for (int i = 0; i <= MAX_CPU_CLUSTER_PHY_ID; i++) {
972 if (cluster_phys_to_logical[i] != -1) {
973 cluster_offsets[i] = cur_cluster_offset;
974 cur_cluster_offset += (cluster_max_cpu_phys_id[i] + 1);
975 }
0a7de745 976 }
f427ee49
A
977 assert(cur_cluster_offset <= MAX_CPUS);
978#else
979 /*
980 * For H10, there are really 2 physical clusters, but they are not separated
981 * into distinct ACCs. AFF1 therefore always reports 0, and AFF0 numbering
982 * is linear across both clusters. For the purpose of MPIDR_EL1-based indexing,
983 * treat H10 and earlier devices as though they contain a single cluster.
984 */
985 cluster_offsets[0] = 0;
986#endif
987 assert(topology_info.boot_cpu != NULL);
988 ml_read_chip_revision(&topology_info.chip_revision);
d9a64523
A
989
990 /*
991 * Set TPIDRRO_EL0 to indicate the correct cpu number, as we may
992 * not be booting from cpu 0. Userspace will consume the current
993 * CPU number through this register. For non-boot cores, this is
994 * done in start.s (start_cpu) using the cpu_number field of the
995 * per-cpu data object.
996 */
997 assert(__builtin_arm_rsr64("TPIDRRO_EL0") == 0);
f427ee49
A
998 __builtin_arm_wsr64("TPIDRRO_EL0", (uint64_t)topology_info.boot_cpu->cpu_id);
999}
1000
1001const ml_topology_info_t *
1002ml_get_topology_info(void)
1003{
1004 return &topology_info;
1005}
1006
1007void
1008ml_map_cpu_pio(void)
1009{
1010 unsigned int i;
1011
1012 for (i = 0; i < topology_info.num_cpus; i++) {
1013 ml_topology_cpu_t *cpu = &topology_info.cpus[i];
1014 if (cpu->cpu_IMPL_pa) {
1015 cpu->cpu_IMPL_regs = (vm_offset_t)ml_io_map(cpu->cpu_IMPL_pa, cpu->cpu_IMPL_len);
1016 cpu->coresight_regs = (vm_offset_t)ml_io_map(cpu->coresight_pa, cpu->coresight_len);
1017 }
1018 if (cpu->cpu_UTTDBG_pa) {
1019 cpu->cpu_UTTDBG_regs = (vm_offset_t)ml_io_map(cpu->cpu_UTTDBG_pa, cpu->cpu_UTTDBG_len);
1020 }
1021 }
1022
1023 for (i = 0; i < topology_info.num_clusters; i++) {
1024 ml_topology_cluster_t *cluster = &topology_info.clusters[i];
1025 if (cluster->acc_IMPL_pa) {
1026 cluster->acc_IMPL_regs = (vm_offset_t)ml_io_map(cluster->acc_IMPL_pa, cluster->acc_IMPL_len);
1027 }
1028 if (cluster->cpm_IMPL_pa) {
1029 cluster->cpm_IMPL_regs = (vm_offset_t)ml_io_map(cluster->cpm_IMPL_pa, cluster->cpm_IMPL_len);
1030 }
1031 }
5ba3f43e
A
1032}
1033
1034unsigned int
1035ml_get_cpu_count(void)
1036{
f427ee49
A
1037 return topology_info.num_cpus;
1038}
1039
1040unsigned int
1041ml_get_cluster_count(void)
1042{
1043 return topology_info.num_clusters;
5ba3f43e
A
1044}
1045
1046int
1047ml_get_boot_cpu_number(void)
1048{
f427ee49 1049 return topology_info.boot_cpu->cpu_id;
5ba3f43e
A
1050}
1051
1052cluster_type_t
1053ml_get_boot_cluster(void)
1054{
f427ee49 1055 return topology_info.boot_cluster->cluster_type;
5ba3f43e
A
1056}
1057
1058int
1059ml_get_cpu_number(uint32_t phys_id)
1060{
f427ee49
A
1061 phys_id &= MPIDR_AFF1_MASK | MPIDR_AFF0_MASK;
1062
1063 for (unsigned i = 0; i < topology_info.num_cpus; i++) {
1064 if (topology_info.cpus[i].phys_id == phys_id) {
1065 return i;
0a7de745 1066 }
5ba3f43e 1067 }
f427ee49 1068
5ba3f43e
A
1069 return -1;
1070}
1071
f427ee49
A
1072int
1073ml_get_cluster_number(uint32_t phys_id)
1074{
1075 int cpu_id = ml_get_cpu_number(phys_id);
1076 if (cpu_id < 0) {
1077 return -1;
1078 }
1079
1080 ml_topology_cpu_t *cpu = &topology_info.cpus[cpu_id];
1081
1082 return cpu->cluster_id;
1083}
1084
1085unsigned int
1086ml_get_cpu_number_local(void)
1087{
1088 uint64_t mpidr_el1_value = 0;
1089 unsigned cpu_id;
1090
1091 /* We identify the CPU based on the constant bits of MPIDR_EL1. */
1092 MRS(mpidr_el1_value, "MPIDR_EL1");
1093 cpu_id = ml_get_cpu_number((uint32_t)mpidr_el1_value);
1094
1095 assert(cpu_id <= (unsigned int)ml_get_max_cpu_number());
1096
1097 return cpu_id;
1098}
1099
1100int
1101ml_get_cluster_number_local()
1102{
1103 uint64_t mpidr_el1_value = 0;
1104 unsigned cluster_id;
1105
1106 /* We identify the cluster based on the constant bits of MPIDR_EL1. */
1107 MRS(mpidr_el1_value, "MPIDR_EL1");
1108 cluster_id = ml_get_cluster_number((uint32_t)mpidr_el1_value);
1109
1110 assert(cluster_id <= (unsigned int)ml_get_max_cluster_number());
1111
1112 return cluster_id;
1113}
1114
5ba3f43e
A
1115int
1116ml_get_max_cpu_number(void)
1117{
f427ee49
A
1118 return topology_info.max_cpu_id;
1119}
1120
1121int
1122ml_get_max_cluster_number(void)
1123{
1124 return topology_info.max_cluster_id;
5ba3f43e
A
1125}
1126
f427ee49
A
1127unsigned int
1128ml_get_first_cpu_id(unsigned int cluster_id)
1129{
1130 return topology_info.clusters[cluster_id].first_cpu_id;
1131}
5ba3f43e 1132
0a7de745
A
1133void
1134ml_lockdown_init()
1135{
f427ee49
A
1136#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
1137 rorgn_stash_range();
c6bf4f31 1138#endif
5ba3f43e
A
1139}
1140
1141kern_return_t
1142ml_lockdown_handler_register(lockdown_handler_t f, void *this)
1143{
f427ee49 1144 if (!f) {
0a7de745
A
1145 return KERN_FAILURE;
1146 }
5ba3f43e 1147
f427ee49
A
1148 assert(lockdown_done);
1149 f(this); // XXX: f this whole function
5ba3f43e 1150
0a7de745 1151 return KERN_SUCCESS;
5ba3f43e
A
1152}
1153
5ba3f43e 1154kern_return_t
0a7de745
A
1155ml_processor_register(ml_processor_info_t *in_processor_info,
1156 processor_t *processor_out, ipi_handler_t *ipi_handler_out,
1157 perfmon_interrupt_handler_func *pmi_handler_out)
5ba3f43e
A
1158{
1159 cpu_data_t *this_cpu_datap;
1160 processor_set_t pset;
1161 boolean_t is_boot_cpu;
1162 static unsigned int reg_cpu_count = 0;
1163
0a7de745 1164 if (in_processor_info->log_id > (uint32_t)ml_get_max_cpu_number()) {
5ba3f43e 1165 return KERN_FAILURE;
0a7de745 1166 }
5ba3f43e 1167
f427ee49 1168 if ((unsigned)OSIncrementAtomic((SInt32*)&reg_cpu_count) >= topology_info.num_cpus) {
5ba3f43e 1169 return KERN_FAILURE;
0a7de745 1170 }
5ba3f43e
A
1171
1172 if (in_processor_info->log_id != (uint32_t)ml_get_boot_cpu_number()) {
1173 is_boot_cpu = FALSE;
1174 this_cpu_datap = cpu_data_alloc(FALSE);
1175 cpu_data_init(this_cpu_datap);
1176 } else {
1177 this_cpu_datap = &BootCpuData;
1178 is_boot_cpu = TRUE;
1179 }
1180
f427ee49 1181 assert(in_processor_info->log_id <= (uint32_t)ml_get_max_cpu_number());
5ba3f43e
A
1182
1183 this_cpu_datap->cpu_id = in_processor_info->cpu_id;
1184
5ba3f43e 1185 this_cpu_datap->cpu_console_buf = console_cpu_alloc(is_boot_cpu);
0a7de745 1186 if (this_cpu_datap->cpu_console_buf == (void *)(NULL)) {
5ba3f43e 1187 goto processor_register_error;
0a7de745 1188 }
5ba3f43e
A
1189
1190 if (!is_boot_cpu) {
f427ee49 1191 this_cpu_datap->cpu_number = (unsigned short)(in_processor_info->log_id);
5ba3f43e 1192
0a7de745 1193 if (cpu_data_register(this_cpu_datap) != KERN_SUCCESS) {
5ba3f43e 1194 goto processor_register_error;
0a7de745 1195 }
5ba3f43e
A
1196 }
1197
f427ee49
A
1198 this_cpu_datap->cpu_idle_notify = in_processor_info->processor_idle;
1199 this_cpu_datap->cpu_cache_dispatch = (cache_dispatch_t)in_processor_info->platform_cache_dispatch;
5ba3f43e
A
1200 nanoseconds_to_absolutetime((uint64_t) in_processor_info->powergate_latency, &this_cpu_datap->cpu_idle_latency);
1201 this_cpu_datap->cpu_reset_assist = kvtophys(in_processor_info->powergate_stub_addr);
1202
f427ee49 1203 this_cpu_datap->idle_timer_notify = in_processor_info->idle_timer;
5ba3f43e
A
1204 this_cpu_datap->idle_timer_refcon = in_processor_info->idle_timer_refcon;
1205
f427ee49 1206 this_cpu_datap->platform_error_handler = in_processor_info->platform_error_handler;
5ba3f43e
A
1207 this_cpu_datap->cpu_regmap_paddr = in_processor_info->regmap_paddr;
1208 this_cpu_datap->cpu_phys_id = in_processor_info->phys_id;
1209 this_cpu_datap->cpu_l2_access_penalty = in_processor_info->l2_access_penalty;
1210
1211 this_cpu_datap->cpu_cluster_type = in_processor_info->cluster_type;
1212 this_cpu_datap->cpu_cluster_id = in_processor_info->cluster_id;
1213 this_cpu_datap->cpu_l2_id = in_processor_info->l2_cache_id;
1214 this_cpu_datap->cpu_l2_size = in_processor_info->l2_cache_size;
1215 this_cpu_datap->cpu_l3_id = in_processor_info->l3_cache_id;
1216 this_cpu_datap->cpu_l3_size = in_processor_info->l3_cache_size;
1217
c6bf4f31
A
1218#if HAS_CLUSTER
1219 this_cpu_datap->cluster_master = !OSTestAndSet(this_cpu_datap->cpu_cluster_id, &cluster_initialized);
1220#else /* HAS_CLUSTER */
5ba3f43e 1221 this_cpu_datap->cluster_master = is_boot_cpu;
c6bf4f31 1222#endif /* HAS_CLUSTER */
5ba3f43e 1223
c3c9b80d
A
1224#if !defined(RC_HIDE_XNU_FIRESTORM) && (MAX_CPU_CLUSTERS > 2)
1225 {
1226 /* Workaround for the existing scheduler
1227 * code, which only supports a limited number of psets.
1228 *
1229 * To get around that limitation, we distribute all cores into
1230 * two psets according to their cluster type, instead of
1231 * having a dedicated pset per cluster ID.
1232 */
1233
1234 pset_cluster_type_t pset_cluster_type;
1235
1236 /* For this workaround, we don't expect seeing anything else
1237 * than E or P clusters. */
1238 switch (in_processor_info->cluster_type) {
1239 case CLUSTER_TYPE_E:
1240 pset_cluster_type = PSET_AMP_E;
1241 break;
1242 case CLUSTER_TYPE_P:
1243 pset_cluster_type = PSET_AMP_P;
1244 break;
1245 default:
1246 panic("unknown/unsupported cluster type %d", in_processor_info->cluster_type);
1247 }
1248
1249 pset = pset_find_first_by_cluster_type(pset_cluster_type);
1250
1251 if (pset == NULL) {
1252 panic("no pset for cluster type %d/%d", in_processor_info->cluster_type, pset_cluster_type);
1253 }
1254
1255 kprintf("%s>chosen pset with cluster id %d cluster type %d for core:\n",
1256 __FUNCTION__, pset->pset_cluster_id, pset->pset_cluster_type);
1257 }
1258#else /* !defined(RC_HIDE_XNU_FIRESTORM) && (MAX_CPU_CLUSTERS > 2) */
5ba3f43e 1259 pset = pset_find(in_processor_info->cluster_id, processor_pset(master_processor));
c3c9b80d 1260#endif /* !defined(RC_HIDE_XNU_FIRESTORM) && (MAX_CPU_CLUSTERS > 2) */
f427ee49 1261
5ba3f43e
A
1262 assert(pset != NULL);
1263 kprintf("%s>cpu_id %p cluster_id %d cpu_number %d is type %d\n", __FUNCTION__, in_processor_info->cpu_id, in_processor_info->cluster_id, this_cpu_datap->cpu_number, in_processor_info->cluster_type);
1264
f427ee49 1265 processor_t processor = PERCPU_GET_RELATIVE(processor, cpu_data, this_cpu_datap);
5ba3f43e 1266 if (!is_boot_cpu) {
f427ee49 1267 processor_init(processor, this_cpu_datap->cpu_number, pset);
5ba3f43e
A
1268
1269 if (this_cpu_datap->cpu_l2_access_penalty) {
1270 /*
1271 * Cores that have a non-zero L2 access penalty compared
1272 * to the boot processor should be de-prioritized by the
1273 * scheduler, so that threads use the cores with better L2
1274 * preferentially.
1275 */
f427ee49 1276 processor_set_primary(processor, master_processor);
5ba3f43e
A
1277 }
1278 }
1279
f427ee49 1280 *processor_out = processor;
0a7de745
A
1281 *ipi_handler_out = cpu_signal_handler;
1282#if CPMU_AIC_PMI && MONOTONIC
1283 *pmi_handler_out = mt_cpmu_aic_pmi;
1284#else
1285 *pmi_handler_out = NULL;
1286#endif /* CPMU_AIC_PMI && MONOTONIC */
1287 if (in_processor_info->idle_tickle != (idle_tickle_t *) NULL) {
5ba3f43e 1288 *in_processor_info->idle_tickle = (idle_tickle_t) cpu_idle_tickle;
0a7de745 1289 }
5ba3f43e
A
1290
1291#if KPC
0a7de745 1292 if (kpc_register_cpu(this_cpu_datap) != TRUE) {
5ba3f43e 1293 goto processor_register_error;
0a7de745
A
1294 }
1295#endif /* KPC */
5ba3f43e
A
1296
1297 if (!is_boot_cpu) {
cb323159 1298 random_cpu_init(this_cpu_datap->cpu_number);
5ba3f43e
A
1299 // now let next CPU register itself
1300 OSIncrementAtomic((SInt32*)&real_ncpus);
1301 }
1302
1303 return KERN_SUCCESS;
1304
1305processor_register_error:
1306#if KPC
1307 kpc_unregister_cpu(this_cpu_datap);
0a7de745
A
1308#endif /* KPC */
1309 if (!is_boot_cpu) {
5ba3f43e 1310 cpu_data_free(this_cpu_datap);
0a7de745 1311 }
5ba3f43e
A
1312
1313 return KERN_FAILURE;
1314}
1315
1316void
1317ml_init_arm_debug_interface(
0a7de745
A
1318 void * in_cpu_datap,
1319 vm_offset_t virt_address)
5ba3f43e
A
1320{
1321 ((cpu_data_t *)in_cpu_datap)->cpu_debug_interface_map = virt_address;
1322 do_debugid();
1323}
1324
1325/*
1326 * Routine: init_ast_check
1327 * Function:
1328 */
1329void
1330init_ast_check(
0a7de745 1331 __unused processor_t processor)
5ba3f43e
A
1332{
1333}
1334
1335/*
1336 * Routine: cause_ast_check
1337 * Function:
1338 */
1339void
1340cause_ast_check(
0a7de745 1341 processor_t processor)
5ba3f43e
A
1342{
1343 if (current_processor() != processor) {
1344 cpu_signal(processor_to_cpu_datap(processor), SIGPast, (void *)NULL, (void *)NULL);
1345 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), processor->cpu_id, 1 /* ast */, 0, 0, 0);
1346 }
1347}
1348
5ba3f43e
A
1349extern uint32_t cpu_idle_count;
1350
0a7de745
A
1351void
1352ml_get_power_state(boolean_t *icp, boolean_t *pidlep)
1353{
5ba3f43e
A
1354 *icp = ml_at_interrupt_context();
1355 *pidlep = (cpu_idle_count == real_ncpus);
1356}
1357
1358/*
1359 * Routine: ml_cause_interrupt
1360 * Function: Generate a fake interrupt
1361 */
1362void
1363ml_cause_interrupt(void)
1364{
0a7de745 1365 return; /* BS_XXX */
5ba3f43e
A
1366}
1367
1368/* Map memory map IO space */
1369vm_offset_t
1370ml_io_map(
0a7de745
A
1371 vm_offset_t phys_addr,
1372 vm_size_t size)
5ba3f43e 1373{
0a7de745 1374 return io_map(phys_addr, size, VM_WIMG_IO);
5ba3f43e
A
1375}
1376
cb323159
A
1377/* Map memory map IO space (with protections specified) */
1378vm_offset_t
1379ml_io_map_with_prot(
1380 vm_offset_t phys_addr,
1381 vm_size_t size,
1382 vm_prot_t prot)
1383{
1384 return io_map_with_prot(phys_addr, size, VM_WIMG_IO, prot);
1385}
1386
5ba3f43e
A
1387vm_offset_t
1388ml_io_map_wcomb(
0a7de745
A
1389 vm_offset_t phys_addr,
1390 vm_size_t size)
5ba3f43e 1391{
0a7de745 1392 return io_map(phys_addr, size, VM_WIMG_WCOMB);
5ba3f43e
A
1393}
1394
f427ee49
A
1395void
1396ml_io_unmap(vm_offset_t addr, vm_size_t sz)
1397{
1398 pmap_remove(kernel_pmap, addr, addr + sz);
1399 kmem_free(kernel_map, addr, sz);
1400}
1401
5ba3f43e
A
1402/* boot memory allocation */
1403vm_offset_t
1404ml_static_malloc(
0a7de745 1405 __unused vm_size_t size)
5ba3f43e 1406{
0a7de745 1407 return (vm_offset_t) NULL;
5ba3f43e
A
1408}
1409
1410vm_map_address_t
1411ml_map_high_window(
0a7de745
A
1412 vm_offset_t phys_addr,
1413 vm_size_t len)
5ba3f43e
A
1414{
1415 return pmap_map_high_window_bd(phys_addr, len, VM_PROT_READ | VM_PROT_WRITE);
1416}
1417
1418vm_offset_t
1419ml_static_ptovirt(
0a7de745 1420 vm_offset_t paddr)
5ba3f43e
A
1421{
1422 return phystokv(paddr);
1423}
1424
1425vm_offset_t
d9a64523
A
1426ml_static_slide(
1427 vm_offset_t vaddr)
5ba3f43e 1428{
f427ee49
A
1429 vm_offset_t slid_vaddr = vaddr + vm_kernel_slide;
1430
1431 if ((slid_vaddr < vm_kernelcache_base) || (slid_vaddr >= vm_kernelcache_top)) {
1432 /* This is only intended for use on kernelcache addresses. */
1433 return 0;
1434 }
1435
1436 /*
1437 * Because the address is in the kernelcache, we can do a simple
1438 * slide calculation.
1439 */
1440 return slid_vaddr;
5ba3f43e
A
1441}
1442
d9a64523
A
1443vm_offset_t
1444ml_static_unslide(
1445 vm_offset_t vaddr)
1446{
f427ee49
A
1447 if ((vaddr < vm_kernelcache_base) || (vaddr >= vm_kernelcache_top)) {
1448 /* This is only intended for use on kernelcache addresses. */
1449 return 0;
1450 }
1451
1452 return vaddr - vm_kernel_slide;
d9a64523
A
1453}
1454
1455extern tt_entry_t *arm_kva_to_tte(vm_offset_t va);
1456
5ba3f43e
A
1457kern_return_t
1458ml_static_protect(
1459 vm_offset_t vaddr, /* kernel virtual address */
1460 vm_size_t size,
1461 vm_prot_t new_prot)
1462{
1463 pt_entry_t arm_prot = 0;
1464 pt_entry_t arm_block_prot = 0;
1465 vm_offset_t vaddr_cur;
0a7de745 1466 ppnum_t ppn;
5ba3f43e
A
1467 kern_return_t result = KERN_SUCCESS;
1468
1469 if (vaddr < VM_MIN_KERNEL_ADDRESS) {
1470 panic("ml_static_protect(): %p < %p", (void *) vaddr, (void *) VM_MIN_KERNEL_ADDRESS);
1471 return KERN_FAILURE;
1472 }
1473
1474 assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
1475
1476 if ((new_prot & VM_PROT_WRITE) && (new_prot & VM_PROT_EXECUTE)) {
1477 panic("ml_static_protect(): WX request on %p", (void *) vaddr);
1478 }
f427ee49
A
1479 if (lockdown_done && (new_prot & VM_PROT_EXECUTE)) {
1480 panic("ml_static_protect(): attempt to inject executable mapping on %p", (void *) vaddr);
1481 }
5ba3f43e
A
1482
1483 /* Set up the protection bits, and block bits so we can validate block mappings. */
1484 if (new_prot & VM_PROT_WRITE) {
1485 arm_prot |= ARM_PTE_AP(AP_RWNA);
1486 arm_block_prot |= ARM_TTE_BLOCK_AP(AP_RWNA);
1487 } else {
1488 arm_prot |= ARM_PTE_AP(AP_RONA);
1489 arm_block_prot |= ARM_TTE_BLOCK_AP(AP_RONA);
1490 }
1491
1492 arm_prot |= ARM_PTE_NX;
1493 arm_block_prot |= ARM_TTE_BLOCK_NX;
1494
1495 if (!(new_prot & VM_PROT_EXECUTE)) {
1496 arm_prot |= ARM_PTE_PNX;
1497 arm_block_prot |= ARM_TTE_BLOCK_PNX;
1498 }
1499
1500 for (vaddr_cur = vaddr;
0a7de745
A
1501 vaddr_cur < trunc_page_64(vaddr + size);
1502 vaddr_cur += PAGE_SIZE) {
5ba3f43e
A
1503 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
1504 if (ppn != (vm_offset_t) NULL) {
0a7de745
A
1505 tt_entry_t *tte2;
1506 pt_entry_t *pte_p;
1507 pt_entry_t ptmp;
5ba3f43e 1508
c6bf4f31 1509#if XNU_MONITOR
c6bf4f31 1510 assert(!pmap_is_monitor(ppn));
f427ee49 1511 assert(!TEST_PAGE_RATIO_4);
c6bf4f31 1512#endif
5ba3f43e 1513
d9a64523 1514 tte2 = arm_kva_to_tte(vaddr_cur);
5ba3f43e
A
1515
1516 if (((*tte2) & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE) {
1517 if ((((*tte2) & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) &&
1518 ((*tte2 & (ARM_TTE_BLOCK_NXMASK | ARM_TTE_BLOCK_PNXMASK | ARM_TTE_BLOCK_APMASK)) == arm_block_prot)) {
1519 /*
1520 * We can support ml_static_protect on a block mapping if the mapping already has
1521 * the desired protections. We still want to run checks on a per-page basis.
1522 */
1523 continue;
1524 }
1525
1526 result = KERN_FAILURE;
1527 break;
1528 }
1529
1530 pte_p = (pt_entry_t *)&((tt_entry_t*)(phystokv((*tte2) & ARM_TTE_TABLE_MASK)))[(((vaddr_cur) & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT)];
1531 ptmp = *pte_p;
1532
1533 if ((ptmp & ARM_PTE_HINT_MASK) && ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot)) {
1534 /*
1535 * The contiguous hint is similar to a block mapping for ml_static_protect; if the existing
1536 * protections do not match the desired protections, then we will fail (as we cannot update
1537 * this mapping without updating other mappings as well).
1538 */
1539 result = KERN_FAILURE;
1540 break;
1541 }
1542
1543 __unreachable_ok_push
1544 if (TEST_PAGE_RATIO_4) {
1545 {
0a7de745
A
1546 unsigned int i;
1547 pt_entry_t *ptep_iter;
5ba3f43e
A
1548
1549 ptep_iter = pte_p;
0a7de745 1550 for (i = 0; i < 4; i++, ptep_iter++) {
5ba3f43e
A
1551 /* Note that there is a hole in the HINT sanity checking here. */
1552 ptmp = *ptep_iter;
1553
1554 /* We only need to update the page tables if the protections do not match. */
1555 if ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot) {
1556 ptmp = (ptmp & ~(ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) | arm_prot;
1557 *ptep_iter = ptmp;
1558 }
1559 }
1560 }
5ba3f43e
A
1561 } else {
1562 ptmp = *pte_p;
5ba3f43e
A
1563 /* We only need to update the page tables if the protections do not match. */
1564 if ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot) {
1565 ptmp = (ptmp & ~(ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) | arm_prot;
1566 *pte_p = ptmp;
1567 }
5ba3f43e
A
1568 }
1569 __unreachable_ok_pop
1570 }
1571 }
1572
1573 if (vaddr_cur > vaddr) {
1574 assert(((vaddr_cur - vaddr) & 0xFFFFFFFF00000000ULL) == 0);
1575 flush_mmu_tlb_region(vaddr, (uint32_t)(vaddr_cur - vaddr));
1576 }
1577
1578
1579 return result;
1580}
1581
1582/*
1583 * Routine: ml_static_mfree
1584 * Function:
1585 */
1586void
1587ml_static_mfree(
0a7de745 1588 vm_offset_t vaddr,
c3c9b80d
A
1589 vm_size_t size)
1590{
1591 vm_offset_t vaddr_cur;
1592 ppnum_t ppn;
1593 uint32_t freed_pages = 0;
1594 uint32_t bad_page_cnt = 0;
1595 uint32_t freed_kernelcache_pages = 0;
1596
1597#if defined(__arm64__) && (DEVELOPMENT || DEBUG)
1598 /* For testing hitting a bad ram page */
1599 static int count = 0;
1600 static int bad_at_cnt = -1;
1601 static bool first = true;
1602
1603 if (first) {
1604 (void)PE_parse_boot_argn("bad_static_mfree", &bad_at_cnt, sizeof(bad_at_cnt));
1605 first = false;
1606 }
1607#endif /* defined(__arm64__) && (DEVELOPMENT || DEBUG) */
5ba3f43e
A
1608
1609 /* It is acceptable (if bad) to fail to free. */
0a7de745 1610 if (vaddr < VM_MIN_KERNEL_ADDRESS) {
5ba3f43e 1611 return;
0a7de745 1612 }
5ba3f43e 1613
0a7de745 1614 assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
5ba3f43e
A
1615
1616 for (vaddr_cur = vaddr;
0a7de745
A
1617 vaddr_cur < trunc_page_64(vaddr + size);
1618 vaddr_cur += PAGE_SIZE) {
5ba3f43e
A
1619 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
1620 if (ppn != (vm_offset_t) NULL) {
1621 /*
1622 * It is not acceptable to fail to update the protections on a page
1623 * we will release to the VM. We need to either panic or continue.
1624 * For now, we'll panic (to help flag if there is memory we can
1625 * reclaim).
1626 */
1627 if (ml_static_protect(vaddr_cur, PAGE_SIZE, VM_PROT_WRITE | VM_PROT_READ) != KERN_SUCCESS) {
1628 panic("Failed ml_static_mfree on %p", (void *) vaddr_cur);
1629 }
1630
c3c9b80d
A
1631#if defined(__arm64__)
1632 bool is_bad = pmap_is_bad_ram(ppn);
1633#if DEVELOPMENT || DEBUG
1634 is_bad |= (count++ == bad_at_cnt);
1635#endif /* DEVELOPMENT || DEBUG */
1636
1637 if (is_bad) {
1638 ++bad_page_cnt;
1639 vm_page_create_retired(ppn);
1640 continue;
1641 }
1642#endif /* defined(__arm64__) */
1643
5ba3f43e
A
1644 vm_page_create(ppn, (ppn + 1));
1645 freed_pages++;
f427ee49
A
1646 if (vaddr_cur >= segLOWEST && vaddr_cur < end_kern) {
1647 freed_kernelcache_pages++;
1648 }
5ba3f43e
A
1649 }
1650 }
1651 vm_page_lockspin_queues();
1652 vm_page_wire_count -= freed_pages;
1653 vm_page_wire_count_initial -= freed_pages;
f427ee49 1654 vm_page_kernelcache_count -= freed_kernelcache_pages;
5ba3f43e 1655 vm_page_unlock_queues();
0a7de745 1656#if DEBUG
c3c9b80d 1657 kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x, +%d bad\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn, bad_page_cnt);
5ba3f43e
A
1658#endif
1659}
1660
1661
1662/* virtual to physical on wired pages */
1663vm_offset_t
1664ml_vtophys(vm_offset_t vaddr)
1665{
1666 return kvtophys(vaddr);
1667}
1668
1669/*
1670 * Routine: ml_nofault_copy
1671 * Function: Perform a physical mode copy if the source and destination have
1672 * valid translations in the kernel pmap. If translations are present, they are
1673 * assumed to be wired; e.g., no attempt is made to guarantee that the
1674 * translations obtained remain valid for the duration of the copy process.
1675 */
1676vm_size_t
1677ml_nofault_copy(vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size)
1678{
1679 addr64_t cur_phys_dst, cur_phys_src;
0a7de745 1680 vm_size_t count, nbytes = 0;
5ba3f43e
A
1681
1682 while (size > 0) {
0a7de745 1683 if (!(cur_phys_src = kvtophys(virtsrc))) {
5ba3f43e 1684 break;
0a7de745
A
1685 }
1686 if (!(cur_phys_dst = kvtophys(virtdst))) {
5ba3f43e 1687 break;
0a7de745 1688 }
5ba3f43e 1689 if (!pmap_valid_address(trunc_page_64(cur_phys_dst)) ||
0a7de745 1690 !pmap_valid_address(trunc_page_64(cur_phys_src))) {
5ba3f43e 1691 break;
0a7de745 1692 }
5ba3f43e 1693 count = PAGE_SIZE - (cur_phys_src & PAGE_MASK);
0a7de745 1694 if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK))) {
5ba3f43e 1695 count = PAGE_SIZE - (cur_phys_dst & PAGE_MASK);
0a7de745
A
1696 }
1697 if (count > size) {
5ba3f43e 1698 count = size;
0a7de745 1699 }
5ba3f43e
A
1700
1701 bcopy_phys(cur_phys_src, cur_phys_dst, count);
1702
1703 nbytes += count;
1704 virtsrc += count;
1705 virtdst += count;
1706 size -= count;
1707 }
1708
1709 return nbytes;
1710}
1711
1712/*
1713 * Routine: ml_validate_nofault
1714 * Function: Validate that ths address range has a valid translations
1715 * in the kernel pmap. If translations are present, they are
1716 * assumed to be wired; i.e. no attempt is made to guarantee
1717 * that the translation persist after the check.
1718 * Returns: TRUE if the range is mapped and will not cause a fault,
1719 * FALSE otherwise.
1720 */
1721
0a7de745
A
1722boolean_t
1723ml_validate_nofault(
5ba3f43e
A
1724 vm_offset_t virtsrc, vm_size_t size)
1725{
1726 addr64_t cur_phys_src;
1727 uint32_t count;
1728
1729 while (size > 0) {
0a7de745 1730 if (!(cur_phys_src = kvtophys(virtsrc))) {
5ba3f43e 1731 return FALSE;
0a7de745
A
1732 }
1733 if (!pmap_valid_address(trunc_page_64(cur_phys_src))) {
5ba3f43e 1734 return FALSE;
0a7de745 1735 }
5ba3f43e 1736 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
0a7de745 1737 if (count > size) {
5ba3f43e 1738 count = (uint32_t)size;
0a7de745 1739 }
5ba3f43e
A
1740
1741 virtsrc += count;
1742 size -= count;
1743 }
1744
1745 return TRUE;
1746}
1747
1748void
1749ml_get_bouncepool_info(vm_offset_t * phys_addr, vm_size_t * size)
1750{
1751 *phys_addr = 0;
1752 *size = 0;
1753}
1754
1755void
1756active_rt_threads(__unused boolean_t active)
1757{
1758}
1759
0a7de745
A
1760static void
1761cpu_qos_cb_default(__unused int urgency, __unused uint64_t qos_param1, __unused uint64_t qos_param2)
1762{
5ba3f43e
A
1763 return;
1764}
1765
1766cpu_qos_update_t cpu_qos_update = cpu_qos_cb_default;
1767
0a7de745
A
1768void
1769cpu_qos_update_register(cpu_qos_update_t cpu_qos_cb)
1770{
5ba3f43e
A
1771 if (cpu_qos_cb != NULL) {
1772 cpu_qos_update = cpu_qos_cb;
1773 } else {
1774 cpu_qos_update = cpu_qos_cb_default;
1775 }
1776}
1777
1778void
0a7de745 1779thread_tell_urgency(thread_urgency_t urgency, uint64_t rt_period, uint64_t rt_deadline, uint64_t sched_latency __unused, __unused thread_t nthread)
5ba3f43e 1780{
0a7de745 1781 SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, sched_latency, 0);
5ba3f43e 1782
0a7de745 1783 cpu_qos_update((int)urgency, rt_period, rt_deadline);
5ba3f43e 1784
0a7de745 1785 SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0);
5ba3f43e
A
1786}
1787
1788void
1789machine_run_count(__unused uint32_t count)
1790{
1791}
1792
1793processor_t
1794machine_choose_processor(__unused processor_set_t pset, processor_t processor)
1795{
0a7de745 1796 return processor;
5ba3f43e
A
1797}
1798
5ba3f43e
A
1799#if KASAN
1800vm_offset_t ml_stack_base(void);
1801vm_size_t ml_stack_size(void);
1802
1803vm_offset_t
1804ml_stack_base(void)
1805{
d9a64523
A
1806 uintptr_t local = (uintptr_t) &local;
1807 vm_offset_t intstack_top_ptr;
1808
1809 intstack_top_ptr = getCpuDatap()->intstack_top;
1810 if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) {
1811 return intstack_top_ptr - INTSTACK_SIZE;
5ba3f43e 1812 } else {
d9a64523 1813 return current_thread()->kernel_stack;
5ba3f43e
A
1814 }
1815}
1816vm_size_t
1817ml_stack_size(void)
1818{
d9a64523
A
1819 uintptr_t local = (uintptr_t) &local;
1820 vm_offset_t intstack_top_ptr;
1821
1822 intstack_top_ptr = getCpuDatap()->intstack_top;
1823 if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) {
1824 return INTSTACK_SIZE;
5ba3f43e 1825 } else {
d9a64523 1826 return kernel_stack_size;
5ba3f43e
A
1827 }
1828}
1829#endif
1830
0a7de745
A
1831boolean_t
1832machine_timeout_suspended(void)
1833{
5ba3f43e
A
1834 return FALSE;
1835}
1836
1837kern_return_t
1838ml_interrupt_prewarm(__unused uint64_t deadline)
1839{
1840 return KERN_FAILURE;
1841}
1842
1843/*
1844 * Assumes fiq, irq disabled.
1845 */
1846void
1847ml_set_decrementer(uint32_t dec_value)
1848{
0a7de745 1849 cpu_data_t *cdp = getCpuDatap();
5ba3f43e
A
1850
1851 assert(ml_get_interrupts_enabled() == FALSE);
1852 cdp->cpu_decrementer = dec_value;
1853
0a7de745 1854 if (cdp->cpu_set_decrementer_func) {
f427ee49 1855 cdp->cpu_set_decrementer_func(dec_value);
5ba3f43e 1856 } else {
f427ee49 1857 __builtin_arm_wsr64("CNTV_TVAL_EL0", (uint64_t)dec_value);
5ba3f43e
A
1858 }
1859}
1860
0a7de745
A
1861uint64_t
1862ml_get_hwclock()
5ba3f43e
A
1863{
1864 uint64_t timebase;
1865
1866 // ISB required by ARMV7C.b section B8.1.2 & ARMv8 section D6.1.2
f427ee49 1867 // "Reads of CNT[PV]CT[_EL0] can occur speculatively and out of order relative
5ba3f43e 1868 // to other instructions executed on the same processor."
cb323159 1869 __builtin_arm_isb(ISB_SY);
f427ee49 1870 timebase = __builtin_arm_rsr64("CNTVCT_EL0");
5ba3f43e
A
1871
1872 return timebase;
1873}
1874
1875uint64_t
1876ml_get_timebase()
1877{
0a7de745 1878 return ml_get_hwclock() + getCpuDatap()->cpu_base_timebase;
5ba3f43e
A
1879}
1880
f427ee49
A
1881/*
1882 * Get the speculative timebase without an ISB.
1883 */
2a1bd2d3 1884uint64_t
f427ee49
A
1885ml_get_speculative_timebase()
1886{
1887 uint64_t timebase;
1888
1889 timebase = __builtin_arm_rsr64("CNTVCT_EL0");
1890
1891 return timebase + getCpuDatap()->cpu_base_timebase;
1892}
1893
2a1bd2d3
A
1894uint64_t
1895ml_get_timebase_entropy(void)
1896{
1897 return ml_get_speculative_timebase();
1898}
1899
5ba3f43e
A
1900uint32_t
1901ml_get_decrementer()
1902{
1903 cpu_data_t *cdp = getCpuDatap();
1904 uint32_t dec;
1905
1906 assert(ml_get_interrupts_enabled() == FALSE);
1907
1908 if (cdp->cpu_get_decrementer_func) {
f427ee49 1909 dec = cdp->cpu_get_decrementer_func();
5ba3f43e
A
1910 } else {
1911 uint64_t wide_val;
1912
f427ee49 1913 wide_val = __builtin_arm_rsr64("CNTV_TVAL_EL0");
5ba3f43e
A
1914 dec = (uint32_t)wide_val;
1915 assert(wide_val == (uint64_t)dec);
1916 }
1917
1918 return dec;
1919}
1920
1921boolean_t
1922ml_get_timer_pending()
1923{
f427ee49
A
1924 uint64_t cntv_ctl = __builtin_arm_rsr64("CNTV_CTL_EL0");
1925 return ((cntv_ctl & CNTV_CTL_EL0_ISTATUS) != 0) ? TRUE : FALSE;
5ba3f43e
A
1926}
1927
1928static void
1929cache_trap_error(thread_t thread, vm_map_address_t fault_addr)
1930{
1931 mach_exception_data_type_t exc_data[2];
1932 arm_saved_state_t *regs = get_user_regs(thread);
1933
1934 set_saved_state_far(regs, fault_addr);
1935
1936 exc_data[0] = KERN_INVALID_ADDRESS;
1937 exc_data[1] = fault_addr;
1938
1939 exception_triage(EXC_BAD_ACCESS, exc_data, 2);
1940}
1941
1942static void
c3c9b80d 1943cache_trap_recover(void)
5ba3f43e
A
1944{
1945 vm_map_address_t fault_addr;
1946
0a7de745 1947 __asm__ volatile ("mrs %0, FAR_EL1" : "=r"(fault_addr));
5ba3f43e
A
1948
1949 cache_trap_error(current_thread(), fault_addr);
1950}
1951
0a7de745
A
1952static void
1953set_cache_trap_recover(thread_t thread)
1954{
cb323159 1955#if defined(HAS_APPLE_PAC)
c3c9b80d
A
1956 void *fun = &cache_trap_recover;
1957 thread->recover = (vm_address_t)ptrauth_auth_and_resign(fun,
cb323159
A
1958 ptrauth_key_function_pointer, 0,
1959 ptrauth_key_function_pointer, ptrauth_blend_discriminator(&thread->recover, PAC_DISCRIMINATOR_RECOVER));
1960#else /* defined(HAS_APPLE_PAC) */
0a7de745 1961 thread->recover = (vm_address_t)cache_trap_recover;
cb323159 1962#endif /* defined(HAS_APPLE_PAC) */
0a7de745
A
1963}
1964
5ba3f43e
A
1965static void
1966dcache_flush_trap(vm_map_address_t start, vm_map_size_t size)
1967{
1968 vm_map_address_t end = start + size;
1969 thread_t thread = current_thread();
1970 vm_offset_t old_recover = thread->recover;
1971
1972 /* Check bounds */
d9a64523 1973 if (task_has_64Bit_addr(current_task())) {
5ba3f43e
A
1974 if (end > MACH_VM_MAX_ADDRESS) {
1975 cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1976 }
1977 } else {
1978 if (end > VM_MAX_ADDRESS) {
1979 cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1980 }
1981 }
1982
1983 if (start > end) {
1984 cache_trap_error(thread, start & ((1 << ARM64_CLINE_SHIFT) - 1));
1985 }
1986
0a7de745 1987 set_cache_trap_recover(thread);
5ba3f43e 1988
5ba3f43e
A
1989 /*
1990 * We're coherent on Apple ARM64 CPUs, so this could be a nop. However,
1991 * if the region given us is bad, it would be good to catch it and
1992 * crash, ergo we still do the flush.
1993 */
5ba3f43e 1994 FlushPoC_DcacheRegion(start, (uint32_t)size);
5ba3f43e
A
1995
1996 /* Restore recovery function */
1997 thread->recover = old_recover;
1998
1999 /* Return (caller does exception return) */
2000}
2001
2002static void
2003icache_invalidate_trap(vm_map_address_t start, vm_map_size_t size)
2004{
2005 vm_map_address_t end = start + size;
2006 thread_t thread = current_thread();
2007 vm_offset_t old_recover = thread->recover;
2008
2009 /* Check bounds */
d9a64523 2010 if (task_has_64Bit_addr(current_task())) {
5ba3f43e
A
2011 if (end > MACH_VM_MAX_ADDRESS) {
2012 cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
2013 }
2014 } else {
2015 if (end > VM_MAX_ADDRESS) {
2016 cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
2017 }
2018 }
2019
2020 if (start > end) {
2021 cache_trap_error(thread, start & ((1 << ARM64_CLINE_SHIFT) - 1));
2022 }
2023
0a7de745 2024 set_cache_trap_recover(thread);
5ba3f43e 2025
5ba3f43e 2026 /* Invalidate iCache to point of unification */
5ba3f43e
A
2027 InvalidatePoU_IcacheRegion(start, (uint32_t)size);
2028
2029 /* Restore recovery function */
2030 thread->recover = old_recover;
2031
2032 /* Return (caller does exception return) */
2033}
2034
2035__attribute__((noreturn))
2036void
2037platform_syscall(arm_saved_state_t *state)
2038{
2039 uint32_t code;
2040
2041#define platform_syscall_kprintf(x...) /* kprintf("platform_syscall: " x) */
2042
2043 code = (uint32_t)get_saved_state_reg(state, 3);
2044 switch (code) {
2045 case 0:
2046 /* I-Cache flush */
2047 platform_syscall_kprintf("icache flush requested.\n");
2048 icache_invalidate_trap(get_saved_state_reg(state, 0), get_saved_state_reg(state, 1));
2049 break;
2050 case 1:
2051 /* D-Cache flush */
2052 platform_syscall_kprintf("dcache flush requested.\n");
2053 dcache_flush_trap(get_saved_state_reg(state, 0), get_saved_state_reg(state, 1));
2054 break;
2055 case 2:
2056 /* set cthread */
2057 platform_syscall_kprintf("set cthread self.\n");
2058 thread_set_cthread_self(get_saved_state_reg(state, 0));
2059 break;
2060 case 3:
2061 /* get cthread */
2062 platform_syscall_kprintf("get cthread self.\n");
2063 set_saved_state_reg(state, 0, thread_get_cthread_self());
2064 break;
2065 default:
2066 platform_syscall_kprintf("unknown: %d\n", code);
2067 break;
2068 }
2069
2070 thread_exception_return();
2071}
2072
2073static void
2074_enable_timebase_event_stream(uint32_t bit_index)
2075{
2076 uint64_t cntkctl; /* One wants to use 32 bits, but "mrs" prefers it this way */
2077
2078 if (bit_index >= 64) {
2079 panic("%s: invalid bit index (%u)", __FUNCTION__, bit_index);
2080 }
2081
2082 __asm__ volatile ("mrs %0, CNTKCTL_EL1" : "=r"(cntkctl));
2083
2084 cntkctl |= (bit_index << CNTKCTL_EL1_EVENTI_SHIFT);
2085 cntkctl |= CNTKCTL_EL1_EVNTEN;
2086 cntkctl |= CNTKCTL_EL1_EVENTDIR; /* 1->0; why not? */
2087
2088 /*
2089 * If the SOC supports it (and it isn't broken), enable
f427ee49 2090 * EL0 access to the timebase registers.
5ba3f43e 2091 */
cb323159 2092 if (user_timebase_type() != USER_TIMEBASE_NONE) {
f427ee49 2093 cntkctl |= (CNTKCTL_EL1_PL0PCTEN | CNTKCTL_EL1_PL0VCTEN);
5ba3f43e
A
2094 }
2095
f427ee49 2096 __builtin_arm_wsr64("CNTKCTL_EL1", cntkctl);
5ba3f43e
A
2097}
2098
2099/*
2100 * Turn timer on, unmask that interrupt.
2101 */
2102static void
2103_enable_virtual_timer(void)
2104{
f427ee49 2105 uint64_t cntvctl = CNTV_CTL_EL0_ENABLE; /* One wants to use 32 bits, but "mrs" prefers it this way */
5ba3f43e 2106
f427ee49
A
2107 __builtin_arm_wsr64("CNTV_CTL_EL0", cntvctl);
2108 /* disable the physical timer as a precaution, as its registers reset to architecturally unknown values */
2109 __builtin_arm_wsr64("CNTP_CTL_EL0", CNTP_CTL_EL0_IMASKED);
5ba3f43e
A
2110}
2111
2112void
2113fiq_context_init(boolean_t enable_fiq __unused)
2114{
d9a64523
A
2115 /* Interrupts still disabled. */
2116 assert(ml_get_interrupts_enabled() == FALSE);
2117 _enable_virtual_timer();
2118}
2119
2120void
f427ee49
A
2121wfe_timeout_init(void)
2122{
2123 _enable_timebase_event_stream(arm64_eventi);
2124}
2125
2126void
2127wfe_timeout_configure(void)
d9a64523 2128{
5ba3f43e 2129 /* Could fill in our own ops here, if we needed them */
f427ee49 2130 uint64_t ticks_per_sec, ticks_per_event, events_per_sec = 0;
0a7de745 2131 uint32_t bit_index;
5ba3f43e 2132
f427ee49
A
2133 if (PE_parse_boot_argn("wfe_events_sec", &events_per_sec, sizeof(events_per_sec))) {
2134 if (events_per_sec <= 0) {
2135 events_per_sec = 1;
2136 } else if (events_per_sec > USEC_PER_SEC) {
2137 events_per_sec = USEC_PER_SEC;
2138 }
2139 } else {
2140#if defined(ARM_BOARD_WFE_TIMEOUT_NS)
2141 events_per_sec = NSEC_PER_SEC / ARM_BOARD_WFE_TIMEOUT_NS;
2142#else /* !defined(ARM_BOARD_WFE_TIMEOUT_NS) */
2143 /* Default to 1usec (or as close as we can get) */
2144 events_per_sec = USEC_PER_SEC;
2145#endif /* !defined(ARM_BOARD_WFE_TIMEOUT_NS) */
2146 }
5ba3f43e 2147 ticks_per_sec = gPEClockFrequencyInfo.timebase_frequency_hz;
5ba3f43e
A
2148 ticks_per_event = ticks_per_sec / events_per_sec;
2149 bit_index = flsll(ticks_per_event) - 1; /* Highest bit set */
2150
2151 /* Round up to power of two */
0a7de745 2152 if ((ticks_per_event & ((1 << bit_index) - 1)) != 0) {
5ba3f43e 2153 bit_index++;
0a7de745 2154 }
5ba3f43e
A
2155
2156 /*
2157 * The timer can only trigger on rising or falling edge,
2158 * not both; we don't care which we trigger on, but we
2159 * do need to adjust which bit we are interested in to
2160 * account for this.
2161 */
0a7de745 2162 if (bit_index != 0) {
5ba3f43e 2163 bit_index--;
0a7de745 2164 }
5ba3f43e 2165
f427ee49
A
2166 arm64_eventi = bit_index;
2167 wfe_timeout_init();
5ba3f43e
A
2168}
2169
2170boolean_t
2171ml_delay_should_spin(uint64_t interval)
2172{
2173 cpu_data_t *cdp = getCpuDatap();
2174
2175 if (cdp->cpu_idle_latency) {
2176 return (interval < cdp->cpu_idle_latency) ? TRUE : FALSE;
2177 } else {
2178 /*
2179 * Early boot, latency is unknown. Err on the side of blocking,
2180 * which should always be safe, even if slow
2181 */
2182 return FALSE;
2183 }
2184}
2185
0a7de745
A
2186boolean_t
2187ml_thread_is64bit(thread_t thread)
e8c3f781 2188{
0a7de745 2189 return thread_is_64bit_addr(thread);
e8c3f781
A
2190}
2191
0a7de745
A
2192void
2193ml_delay_on_yield(void)
2194{
2195#if DEVELOPMENT || DEBUG
2196 if (yield_delay_us) {
2197 delay(yield_delay_us);
2198 }
2199#endif
5ba3f43e
A
2200}
2201
0a7de745
A
2202void
2203ml_timer_evaluate(void)
2204{
5ba3f43e
A
2205}
2206
2207boolean_t
0a7de745
A
2208ml_timer_forced_evaluation(void)
2209{
5ba3f43e
A
2210 return FALSE;
2211}
2212
2213uint64_t
0a7de745
A
2214ml_energy_stat(thread_t t)
2215{
5ba3f43e
A
2216 return t->machine.energy_estimate_nj;
2217}
2218
2219
2220void
0a7de745
A
2221ml_gpu_stat_update(__unused uint64_t gpu_ns_delta)
2222{
5ba3f43e
A
2223 /*
2224 * For now: update the resource coalition stats of the
2225 * current thread's coalition
2226 */
2227 task_coalition_update_gpu_stats(current_task(), gpu_ns_delta);
5ba3f43e
A
2228}
2229
2230uint64_t
0a7de745
A
2231ml_gpu_stat(__unused thread_t t)
2232{
5ba3f43e
A
2233 return 0;
2234}
2235
f427ee49
A
2236#if !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME || HAS_FAST_CNTVCT
2237
5ba3f43e
A
2238static void
2239timer_state_event(boolean_t switch_to_kernel)
2240{
2241 thread_t thread = current_thread();
0a7de745
A
2242 if (!thread->precise_user_kernel_time) {
2243 return;
2244 }
5ba3f43e 2245
f427ee49
A
2246 processor_t pd = current_processor();
2247 uint64_t now = ml_get_speculative_timebase();
5ba3f43e
A
2248
2249 timer_stop(pd->current_state, now);
2250 pd->current_state = (switch_to_kernel) ? &pd->system_state : &pd->user_state;
2251 timer_start(pd->current_state, now);
2252
2253 timer_stop(pd->thread_timer, now);
2254 pd->thread_timer = (switch_to_kernel) ? &thread->system_timer : &thread->user_timer;
2255 timer_start(pd->thread_timer, now);
2256}
2257
2258void
2259timer_state_event_user_to_kernel(void)
2260{
2261 timer_state_event(TRUE);
2262}
2263
2264void
2265timer_state_event_kernel_to_user(void)
2266{
2267 timer_state_event(FALSE);
2268}
f427ee49 2269#endif /* !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME || HAS_FAST_CNTVCT */
5ba3f43e
A
2270
2271/*
2272 * The following are required for parts of the kernel
2273 * that cannot resolve these functions as inlines:
2274 */
cb323159 2275extern thread_t current_act(void) __attribute__((const));
5ba3f43e
A
2276thread_t
2277current_act(void)
2278{
2279 return current_thread_fast();
2280}
2281
2282#undef current_thread
cb323159 2283extern thread_t current_thread(void) __attribute__((const));
5ba3f43e
A
2284thread_t
2285current_thread(void)
2286{
2287 return current_thread_fast();
2288}
2289
0a7de745
A
2290typedef struct{
2291 ex_cb_t cb;
2292 void *refcon;
5ba3f43e
A
2293}
2294ex_cb_info_t;
2295
2296ex_cb_info_t ex_cb_info[EXCB_CLASS_MAX];
2297
2298/*
2299 * Callback registration
2300 * Currently we support only one registered callback per class but
2301 * it should be possible to support more callbacks
2302 */
0a7de745
A
2303kern_return_t
2304ex_cb_register(
2305 ex_cb_class_t cb_class,
2306 ex_cb_t cb,
2307 void *refcon)
5ba3f43e
A
2308{
2309 ex_cb_info_t *pInfo = &ex_cb_info[cb_class];
2310
0a7de745 2311 if ((NULL == cb) || (cb_class >= EXCB_CLASS_MAX)) {
5ba3f43e
A
2312 return KERN_INVALID_VALUE;
2313 }
2314
0a7de745 2315 if (NULL == pInfo->cb) {
5ba3f43e
A
2316 pInfo->cb = cb;
2317 pInfo->refcon = refcon;
2318 return KERN_SUCCESS;
2319 }
2320 return KERN_FAILURE;
2321}
2322
2323/*
2324 * Called internally by platform kernel to invoke the registered callback for class
2325 */
0a7de745
A
2326ex_cb_action_t
2327ex_cb_invoke(
2328 ex_cb_class_t cb_class,
2329 vm_offset_t far)
5ba3f43e
A
2330{
2331 ex_cb_info_t *pInfo = &ex_cb_info[cb_class];
2332 ex_cb_state_t state = {far};
2333
0a7de745 2334 if (cb_class >= EXCB_CLASS_MAX) {
5ba3f43e
A
2335 panic("Invalid exception callback class 0x%x\n", cb_class);
2336 }
2337
0a7de745 2338 if (pInfo->cb) {
5ba3f43e
A
2339 return pInfo->cb(cb_class, pInfo->refcon, &state);
2340 }
2341 return EXCB_ACTION_NONE;
2342}
2343
cb323159
A
2344#if defined(HAS_APPLE_PAC)
2345void
f427ee49 2346ml_task_set_disable_user_jop(task_t task, uint8_t disable_user_jop)
cb323159
A
2347{
2348 assert(task);
2349 task->disable_user_jop = disable_user_jop;
2350}
2351
2352void
f427ee49 2353ml_thread_set_disable_user_jop(thread_t thread, uint8_t disable_user_jop)
cb323159
A
2354{
2355 assert(thread);
2356 thread->machine.disable_user_jop = disable_user_jop;
2357}
2358
2359void
2360ml_task_set_rop_pid(task_t task, task_t parent_task, boolean_t inherit)
2361{
2362 if (inherit) {
2363 task->rop_pid = parent_task->rop_pid;
2364 } else {
2365 task->rop_pid = early_random();
2366 }
2367}
f427ee49
A
2368
2369/**
2370 * jop_pid may be inherited from the parent task or generated inside the shared
2371 * region. Unfortunately these two parameters are available at very different
2372 * times during task creation, so we need to split this into two steps.
2373 */
2374void
2375ml_task_set_jop_pid(task_t task, task_t parent_task, boolean_t inherit)
2376{
2377 if (inherit) {
2378 task->jop_pid = parent_task->jop_pid;
2379 } else {
2380 task->jop_pid = ml_default_jop_pid();
2381 }
2382}
2383
2384void
2385ml_task_set_jop_pid_from_shared_region(task_t task)
2386{
2387 vm_shared_region_t sr = vm_shared_region_get(task);
2388 /*
2389 * If there's no shared region, we can assign the key arbitrarily. This
2390 * typically happens when Mach-O image activation failed part of the way
2391 * through, and this task is in the middle of dying with SIGKILL anyway.
2392 */
2393 if (__improbable(!sr)) {
2394 task->jop_pid = early_random();
2395 return;
2396 }
2397 vm_shared_region_deallocate(sr);
2398
2399 /*
2400 * Similarly we have to worry about jetsam having killed the task and
2401 * already cleared the shared_region_id.
2402 */
2403 task_lock(task);
2404 if (task->shared_region_id != NULL) {
2405 task->jop_pid = shared_region_find_key(task->shared_region_id);
2406 } else {
2407 task->jop_pid = early_random();
2408 }
2409 task_unlock(task);
2410}
2411
2412void
2413ml_thread_set_jop_pid(thread_t thread, task_t task)
2414{
2415 thread->machine.jop_pid = task->jop_pid;
2416}
cb323159
A
2417#endif /* defined(HAS_APPLE_PAC) */
2418
cb323159 2419#if defined(HAS_APPLE_PAC)
f427ee49
A
2420#define _ml_auth_ptr_unchecked(_ptr, _suffix, _modifier) \
2421 asm volatile ("aut" #_suffix " %[ptr], %[modifier]" : [ptr] "+r"(_ptr) : [modifier] "r"(_modifier));
cb323159
A
2422
2423/*
2424 * ml_auth_ptr_unchecked: call this instead of ptrauth_auth_data
2425 * instrinsic when you don't want to trap on auth fail.
2426 *
2427 */
cb323159
A
2428void *
2429ml_auth_ptr_unchecked(void *ptr, ptrauth_key key, uint64_t modifier)
2430{
2431 switch (key & 0x3) {
2432 case ptrauth_key_asia:
f427ee49 2433 _ml_auth_ptr_unchecked(ptr, ia, modifier);
cb323159
A
2434 break;
2435 case ptrauth_key_asib:
f427ee49 2436 _ml_auth_ptr_unchecked(ptr, ib, modifier);
cb323159
A
2437 break;
2438 case ptrauth_key_asda:
f427ee49 2439 _ml_auth_ptr_unchecked(ptr, da, modifier);
cb323159
A
2440 break;
2441 case ptrauth_key_asdb:
f427ee49 2442 _ml_auth_ptr_unchecked(ptr, db, modifier);
cb323159
A
2443 break;
2444 }
2445
2446 return ptr;
2447}
2448#endif /* defined(HAS_APPLE_PAC) */
f427ee49
A
2449
2450#ifdef CONFIG_XNUPOST
2451void
2452ml_expect_fault_begin(expected_fault_handler_t expected_fault_handler, uintptr_t expected_fault_addr)
2453{
2454 thread_t thread = current_thread();
2455 thread->machine.expected_fault_handler = expected_fault_handler;
2456 thread->machine.expected_fault_addr = expected_fault_addr;
2457}
2458
2459void
2460ml_expect_fault_end(void)
2461{
2462 thread_t thread = current_thread();
2463 thread->machine.expected_fault_handler = NULL;
2464 thread->machine.expected_fault_addr = 0;
2465}
2466#endif /* CONFIG_XNUPOST */
2467
2468void
2469ml_hibernate_active_pre(void)
2470{
2471#if HIBERNATION
2472 if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) {
f427ee49
A
2473
2474 hibernate_rebuild_vm_structs();
2475 }
2476#endif /* HIBERNATION */
2477}
2478
2479void
2480ml_hibernate_active_post(void)
2481{
2482#if HIBERNATION
2483 if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) {
2484 hibernate_machine_init();
2485 hibernate_vm_lock_end();
2486 current_cpu_datap()->cpu_hibernate = 0;
2487 }
2488#endif /* HIBERNATION */
2489}
2490
2491/**
2492 * Return back a machine-dependent array of address space regions that should be
2493 * reserved by the VM (pre-mapped in the address space). This will prevent user
2494 * processes from allocating or deallocating from within these regions.
2495 *
2496 * @param vm_is64bit True if the process has a 64-bit address space.
2497 * @param regions An out parameter representing an array of regions to reserve.
2498 *
2499 * @return The number of reserved regions returned through `regions`.
2500 */
2501size_t
2502ml_get_vm_reserved_regions(bool vm_is64bit, struct vm_reserved_region **regions)
2503{
2504 assert(regions != NULL);
2505
2506 /**
2507 * Reserved regions only apply to 64-bit address spaces. This is because
2508 * we only expect to grow the maximum user VA address on 64-bit address spaces
2509 * (we've essentially already reached the max for 32-bit spaces). The reserved
2510 * regions should safely fall outside of the max user VA for 32-bit processes.
2511 */
2512 if (vm_is64bit) {
2513 *regions = vm_reserved_regions;
2514 return ARRAY_COUNT(vm_reserved_regions);
2515 } else {
2516 /* Don't reserve any VA regions on arm64_32 processes. */
2517 *regions = NULL;
2518 return 0;
2519 }
2520}
2521/* These WFE recommendations are expected to be updated on a relatively
2522 * infrequent cadence, possibly from a different cluster, hence
2523 * false cacheline sharing isn't expected to be material
2524 */
2525static uint64_t arm64_cluster_wfe_recs[MAX_CPU_CLUSTERS];
2526
2527uint32_t
2528ml_update_cluster_wfe_recommendation(uint32_t wfe_cluster_id, uint64_t wfe_timeout_abstime_interval, __unused uint64_t wfe_hint_flags)
2529{
2530 assert(wfe_cluster_id < MAX_CPU_CLUSTERS);
2531 assert(wfe_timeout_abstime_interval <= ml_wfe_hint_max_interval);
2532 os_atomic_store(&arm64_cluster_wfe_recs[wfe_cluster_id], wfe_timeout_abstime_interval, relaxed);
2533 return 0; /* Success */
2534}
2535
2536uint64_t
2537ml_cluster_wfe_timeout(uint32_t wfe_cluster_id)
2538{
2539 /* This and its consumer does not synchronize vis-a-vis updates
2540 * of the recommendation; races are acceptable.
2541 */
2542 uint64_t wfet = os_atomic_load(&arm64_cluster_wfe_recs[wfe_cluster_id], relaxed);
2543 return wfet;
2544}