]> git.saurik.com Git - apple/xnu.git/blame_incremental - osfmk/arm64/machine_routines.c
xnu-7195.50.7.100.1.tar.gz
[apple/xnu.git] / osfmk / arm64 / machine_routines.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2007-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <arm64/proc_reg.h>
30#include <arm/machine_cpu.h>
31#include <arm/cpu_internal.h>
32#include <arm/cpuid.h>
33#include <arm/io_map_entries.h>
34#include <arm/cpu_data.h>
35#include <arm/cpu_data_internal.h>
36#include <arm/caches_internal.h>
37#include <arm/misc_protos.h>
38#include <arm/machdep_call.h>
39#include <arm/machine_routines.h>
40#include <arm/rtclock.h>
41#include <arm/cpuid_internal.h>
42#include <arm/cpu_capabilities.h>
43#include <console/serial_protos.h>
44#include <kern/machine.h>
45#include <kern/misc_protos.h>
46#include <prng/random.h>
47#include <kern/startup.h>
48#include <kern/thread.h>
49#include <kern/timer_queue.h>
50#include <mach/machine.h>
51#include <machine/atomic.h>
52#include <machine/config.h>
53#include <vm/pmap.h>
54#include <vm/vm_page.h>
55#include <vm/vm_shared_region.h>
56#include <vm/vm_map.h>
57#include <sys/codesign.h>
58#include <sys/kdebug.h>
59#include <kern/coalition.h>
60#include <pexpert/device_tree.h>
61
62#include <IOKit/IOPlatformExpert.h>
63#if HIBERNATION
64#include <IOKit/IOHibernatePrivate.h>
65#include <arm64/hibernate_ppl_hmac.h>
66#include <arm64/ppl/ppl_hib.h>
67#endif /* HIBERNATION */
68
69#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
70#include <arm64/amcc_rorgn.h>
71#endif
72
73#include <libkern/section_keywords.h>
74
75/**
76 * On supported hardware, debuggable builds make the HID bits read-only
77 * without locking them. This lets people manually modify HID bits while
78 * debugging, since they can use a debugging tool to first reset the HID
79 * bits back to read/write. However it will still catch xnu changes that
80 * accidentally write to HID bits after they've been made read-only.
81 */
82#if HAS_TWO_STAGE_SPR_LOCK && !(DEVELOPMENT || DEBUG)
83#define USE_TWO_STAGE_SPR_LOCK
84#endif
85
86#if KPC
87#include <kern/kpc.h>
88#endif
89
90#define MPIDR_CPU_ID(mpidr_el1_val) (((mpidr_el1_val) & MPIDR_AFF0_MASK) >> MPIDR_AFF0_SHIFT)
91#define MPIDR_CLUSTER_ID(mpidr_el1_val) (((mpidr_el1_val) & MPIDR_AFF1_MASK) >> MPIDR_AFF1_SHIFT)
92
93#if HAS_CLUSTER
94static uint8_t cluster_initialized = 0;
95#endif
96
97uint32_t LockTimeOut;
98uint32_t LockTimeOutUsec;
99uint64_t TLockTimeOut;
100uint64_t MutexSpin;
101uint64_t low_MutexSpin;
102int64_t high_MutexSpin;
103
104static uint64_t ml_wfe_hint_max_interval;
105#define MAX_WFE_HINT_INTERVAL_US (500ULL)
106
107/* Must be less than cpu_idle_latency to ensure ml_delay_should_spin is true */
108TUNABLE(uint32_t, yield_delay_us, "yield_delay_us", 0);
109
110extern vm_offset_t segLOWEST;
111extern vm_offset_t segLOWESTTEXT;
112extern vm_offset_t segLASTB;
113extern unsigned long segSizeLAST;
114
115/* ARM64 specific bounds; used to test for presence in the kernelcache. */
116extern vm_offset_t vm_kernelcache_base;
117extern vm_offset_t vm_kernelcache_top;
118
119#if defined(HAS_IPI)
120unsigned int gFastIPI = 1;
121#define kDeferredIPITimerDefault (64 * NSEC_PER_USEC) /* in nanoseconds */
122static TUNABLE_WRITEABLE(uint64_t, deferred_ipi_timer_ns, "fastipitimeout",
123 kDeferredIPITimerDefault);
124#endif /* defined(HAS_IPI) */
125
126thread_t Idle_context(void);
127
128SECURITY_READ_ONLY_LATE(static ml_topology_cpu_t) topology_cpu_array[MAX_CPUS];
129SECURITY_READ_ONLY_LATE(static ml_topology_cluster_t) topology_cluster_array[MAX_CPU_CLUSTERS];
130SECURITY_READ_ONLY_LATE(static ml_topology_info_t) topology_info = {
131 .version = CPU_TOPOLOGY_VERSION,
132 .cpus = topology_cpu_array,
133 .clusters = topology_cluster_array,
134};
135/**
136 * Represents the offset of each cluster within a hypothetical array of MAX_CPUS
137 * entries of an arbitrary data type. This is intended for use by specialized consumers
138 * that must quickly access per-CPU data using only the physical CPU ID (MPIDR_EL1),
139 * as follows:
140 * hypothetical_array[cluster_offsets[AFF1] + AFF0]
141 * Most consumers should instead use general-purpose facilities such as PERCPU or
142 * ml_get_cpu_number().
143 */
144SECURITY_READ_ONLY_LATE(int64_t) cluster_offsets[MAX_CPU_CLUSTER_PHY_ID + 1];
145
146SECURITY_READ_ONLY_LATE(static uint32_t) arm64_eventi = UINT32_MAX;
147
148extern uint32_t lockdown_done;
149
150/**
151 * Represents regions of virtual address space that should be reserved
152 * (pre-mapped) in each user address space.
153 */
154SECURITY_READ_ONLY_LATE(static struct vm_reserved_region) vm_reserved_regions[] = {
155 /*
156 * Reserve the virtual memory space representing the commpage nesting region
157 * to prevent user processes from allocating memory within it. The actual
158 * page table entries for the commpage are inserted by vm_commpage_enter().
159 * This vm_map_enter() just prevents userspace from allocating/deallocating
160 * anything within the entire commpage nested region.
161 */
162 {
163 .vmrr_name = "commpage nesting",
164 .vmrr_addr = _COMM_PAGE64_NESTING_START,
165 .vmrr_size = _COMM_PAGE64_NESTING_SIZE
166 }
167};
168
169uint32_t get_arm_cpu_version(void);
170
171#if defined(HAS_IPI)
172static inline void
173ml_cpu_signal_type(unsigned int cpu_mpidr, uint32_t type)
174{
175#if HAS_CLUSTER
176 uint64_t local_mpidr;
177 /* NOTE: this logic expects that we are called in a non-preemptible
178 * context, or at least one in which the calling thread is bound
179 * to a single CPU. Otherwise we may migrate between choosing which
180 * IPI mechanism to use and issuing the IPI. */
181 MRS(local_mpidr, "MPIDR_EL1");
182 if (MPIDR_CLUSTER_ID(local_mpidr) == MPIDR_CLUSTER_ID(cpu_mpidr)) {
183 uint64_t x = type | MPIDR_CPU_ID(cpu_mpidr);
184 MSR(ARM64_REG_IPI_RR_LOCAL, x);
185 } else {
186 #define IPI_RR_TARGET_CLUSTER_SHIFT 16
187 uint64_t x = type | (MPIDR_CLUSTER_ID(cpu_mpidr) << IPI_RR_TARGET_CLUSTER_SHIFT) | MPIDR_CPU_ID(cpu_mpidr);
188 MSR(ARM64_REG_IPI_RR_GLOBAL, x);
189 }
190#else
191 uint64_t x = type | MPIDR_CPU_ID(cpu_mpidr);
192 MSR(ARM64_REG_IPI_RR, x);
193#endif
194}
195#endif
196
197#if !defined(HAS_IPI)
198__dead2
199#endif
200void
201ml_cpu_signal(unsigned int cpu_mpidr __unused)
202{
203#if defined(HAS_IPI)
204 ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_IMMEDIATE);
205#else
206 panic("Platform does not support ACC Fast IPI");
207#endif
208}
209
210#if !defined(HAS_IPI)
211__dead2
212#endif
213void
214ml_cpu_signal_deferred_adjust_timer(uint64_t nanosecs)
215{
216#if defined(HAS_IPI)
217 /* adjust IPI_CR timer countdown value for deferred IPI
218 * accepts input in nanosecs, convert to absolutetime (REFCLK ticks),
219 * clamp maximum REFCLK ticks to 0xFFFF (16 bit field)
220 *
221 * global register, should only require a single write to update all
222 * CPU cores: from Skye ACC user spec section 5.7.3.3
223 *
224 * IPICR is a global register but there are two copies in ACC: one at pBLK and one at eBLK.
225 * IPICR write SPR token also traverses both pCPM and eCPM rings and updates both copies.
226 */
227 uint64_t abstime;
228
229 nanoseconds_to_absolutetime(nanosecs, &abstime);
230
231 abstime = MIN(abstime, 0xFFFF);
232
233 /* update deferred_ipi_timer_ns with the new clamped value */
234 absolutetime_to_nanoseconds(abstime, &deferred_ipi_timer_ns);
235
236 MSR(ARM64_REG_IPI_CR, abstime);
237#else
238 (void)nanosecs;
239 panic("Platform does not support ACC Fast IPI");
240#endif
241}
242
243uint64_t
244ml_cpu_signal_deferred_get_timer()
245{
246#if defined(HAS_IPI)
247 return deferred_ipi_timer_ns;
248#else
249 return 0;
250#endif
251}
252
253#if !defined(HAS_IPI)
254__dead2
255#endif
256void
257ml_cpu_signal_deferred(unsigned int cpu_mpidr __unused)
258{
259#if defined(HAS_IPI)
260 ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_DEFERRED);
261#else
262 panic("Platform does not support ACC Fast IPI deferral");
263#endif
264}
265
266#if !defined(HAS_IPI)
267__dead2
268#endif
269void
270ml_cpu_signal_retract(unsigned int cpu_mpidr __unused)
271{
272#if defined(HAS_IPI)
273 ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_RETRACT);
274#else
275 panic("Platform does not support ACC Fast IPI retraction");
276#endif
277}
278
279void
280machine_idle(void)
281{
282 /* Interrupts are expected to be masked on entry or re-entry via
283 * Idle_load_context()
284 */
285 assert((__builtin_arm_rsr("DAIF") & DAIF_IRQF) == DAIF_IRQF);
286 Idle_context();
287 __builtin_arm_wsr("DAIFClr", (DAIFSC_IRQF | DAIFSC_FIQF));
288}
289
290void
291OSSynchronizeIO(void)
292{
293 __builtin_arm_dsb(DSB_SY);
294}
295
296uint64_t
297get_aux_control(void)
298{
299 uint64_t value;
300
301 MRS(value, "ACTLR_EL1");
302 return value;
303}
304
305uint64_t
306get_mmu_control(void)
307{
308 uint64_t value;
309
310 MRS(value, "SCTLR_EL1");
311 return value;
312}
313
314uint64_t
315get_tcr(void)
316{
317 uint64_t value;
318
319 MRS(value, "TCR_EL1");
320 return value;
321}
322
323boolean_t
324ml_get_interrupts_enabled(void)
325{
326 uint64_t value;
327
328 MRS(value, "DAIF");
329 if (value & DAIF_IRQF) {
330 return FALSE;
331 }
332 return TRUE;
333}
334
335pmap_paddr_t
336get_mmu_ttb(void)
337{
338 pmap_paddr_t value;
339
340 MRS(value, "TTBR0_EL1");
341 return value;
342}
343
344uint32_t
345get_arm_cpu_version(void)
346{
347 uint32_t value = machine_read_midr();
348
349 /* Compose the register values into 8 bits; variant[7:4], revision[3:0]. */
350 return ((value & MIDR_EL1_REV_MASK) >> MIDR_EL1_REV_SHIFT) | ((value & MIDR_EL1_VAR_MASK) >> (MIDR_EL1_VAR_SHIFT - 4));
351}
352
353bool
354ml_feature_supported(uint32_t feature_bit)
355{
356 uint64_t aidr_el1_value = 0;
357
358 MRS(aidr_el1_value, "AIDR_EL1");
359
360
361 return aidr_el1_value & feature_bit;
362}
363
364/*
365 * user_cont_hwclock_allowed()
366 *
367 * Indicates whether we allow EL0 to read the virtual timebase (CNTVCT_EL0)
368 * as a continuous time source (e.g. from mach_continuous_time)
369 */
370boolean_t
371user_cont_hwclock_allowed(void)
372{
373#if HAS_CONTINUOUS_HWCLOCK
374 return TRUE;
375#else
376 return FALSE;
377#endif
378}
379
380
381uint8_t
382user_timebase_type(void)
383{
384 return USER_TIMEBASE_SPEC;
385}
386
387void
388machine_startup(__unused boot_args * args)
389{
390#if defined(HAS_IPI) && (DEVELOPMENT || DEBUG)
391 if (!PE_parse_boot_argn("fastipi", &gFastIPI, sizeof(gFastIPI))) {
392 gFastIPI = 1;
393 }
394#endif /* defined(HAS_IPI) && (DEVELOPMENT || DEBUG)*/
395
396 machine_conf();
397
398 /*
399 * Kick off the kernel bootstrap.
400 */
401 kernel_bootstrap();
402 /* NOTREACHED */
403}
404
405
406void
407machine_lockdown(void)
408{
409 arm_vm_prot_finalize(PE_state.bootArgs);
410
411#if CONFIG_KERNEL_INTEGRITY
412#if KERNEL_INTEGRITY_WT
413 /* Watchtower
414 *
415 * Notify the monitor about the completion of early kernel bootstrap.
416 * From this point forward it will enforce the integrity of kernel text,
417 * rodata and page tables.
418 */
419
420#ifdef MONITOR
421 monitor_call(MONITOR_LOCKDOWN, 0, 0, 0);
422#endif
423#endif /* KERNEL_INTEGRITY_WT */
424
425#if XNU_MONITOR
426 pmap_lockdown_ppl();
427#endif
428
429#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
430 /* KTRR
431 *
432 * Lock physical KTRR region. KTRR region is read-only. Memory outside
433 * the region is not executable at EL1.
434 */
435
436 rorgn_lockdown();
437#endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
438
439#if HIBERNATION
440 /* sign the kernel read-only region */
441 if (ppl_hmac_init() == KERN_SUCCESS) {
442 ppl_hmac_compute_rorgn_hmac();
443 }
444#endif /* HIBERNATION */
445
446#endif /* CONFIG_KERNEL_INTEGRITY */
447
448#if HIBERNATION
449 /* Avoid configuration security issues by panic'ing if hibernation is
450 * supported but we don't know how to invalidate SIO HMAC keys, see
451 * below. */
452 if (ppl_hib_hibernation_supported() &&
453 NULL == invalidate_hmac_function) {
454 panic("Invalidate HMAC function wasn't set when needed");
455 }
456#endif /* HIBERNATION */
457
458
459 lockdown_done = 1;
460}
461
462
463char *
464machine_boot_info(
465 __unused char *buf,
466 __unused vm_size_t size)
467{
468 return PE_boot_args();
469}
470
471void
472slave_machine_init(__unused void *param)
473{
474 cpu_machine_init(); /* Initialize the processor */
475 clock_init(); /* Init the clock */
476}
477
478/*
479 * Routine: machine_processor_shutdown
480 * Function:
481 */
482thread_t
483machine_processor_shutdown(
484 __unused thread_t thread,
485 void (*doshutdown)(processor_t),
486 processor_t processor)
487{
488 return Shutdown_context(doshutdown, processor);
489}
490
491/*
492 * Routine: ml_init_lock_timeout
493 * Function:
494 */
495void
496ml_init_lock_timeout(void)
497{
498 uint64_t abstime;
499 uint64_t mtxspin;
500 uint64_t default_timeout_ns = NSEC_PER_SEC >> 2;
501 uint32_t slto;
502
503 if (PE_parse_boot_argn("slto_us", &slto, sizeof(slto))) {
504 default_timeout_ns = slto * NSEC_PER_USEC;
505 }
506
507 nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
508 LockTimeOutUsec = (uint32_t) (default_timeout_ns / NSEC_PER_USEC);
509 LockTimeOut = (uint32_t)abstime;
510
511 if (PE_parse_boot_argn("tlto_us", &slto, sizeof(slto))) {
512 nanoseconds_to_absolutetime(slto * NSEC_PER_USEC, &abstime);
513 TLockTimeOut = abstime;
514 } else {
515 TLockTimeOut = LockTimeOut >> 1;
516 }
517
518 if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof(mtxspin))) {
519 if (mtxspin > USEC_PER_SEC >> 4) {
520 mtxspin = USEC_PER_SEC >> 4;
521 }
522 nanoseconds_to_absolutetime(mtxspin * NSEC_PER_USEC, &abstime);
523 } else {
524 nanoseconds_to_absolutetime(10 * NSEC_PER_USEC, &abstime);
525 }
526 MutexSpin = abstime;
527 low_MutexSpin = MutexSpin;
528 /*
529 * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
530 * real_ncpus is not set at this time
531 *
532 * NOTE: active spinning is disabled in arm. It can be activated
533 * by setting high_MutexSpin through the sysctl.
534 */
535 high_MutexSpin = low_MutexSpin;
536
537 nanoseconds_to_absolutetime(MAX_WFE_HINT_INTERVAL_US * NSEC_PER_USEC, &ml_wfe_hint_max_interval);
538}
539
540/*
541 * This is called from the machine-independent routine cpu_up()
542 * to perform machine-dependent info updates.
543 */
544void
545ml_cpu_up(void)
546{
547 os_atomic_inc(&machine_info.physical_cpu, relaxed);
548 os_atomic_inc(&machine_info.logical_cpu, relaxed);
549}
550
551/*
552 * This is called from the machine-independent routine cpu_down()
553 * to perform machine-dependent info updates.
554 */
555void
556ml_cpu_down(void)
557{
558 cpu_data_t *cpu_data_ptr;
559
560 os_atomic_dec(&machine_info.physical_cpu, relaxed);
561 os_atomic_dec(&machine_info.logical_cpu, relaxed);
562
563 /*
564 * If we want to deal with outstanding IPIs, we need to
565 * do relatively early in the processor_doshutdown path,
566 * as we pend decrementer interrupts using the IPI
567 * mechanism if we cannot immediately service them (if
568 * IRQ is masked). Do so now.
569 *
570 * We aren't on the interrupt stack here; would it make
571 * more sense to disable signaling and then enable
572 * interrupts? It might be a bit cleaner.
573 */
574 cpu_data_ptr = getCpuDatap();
575 cpu_data_ptr->cpu_running = FALSE;
576
577 if (cpu_data_ptr != &BootCpuData) {
578 /*
579 * Move all of this cpu's timers to the master/boot cpu,
580 * and poke it in case there's a sooner deadline for it to schedule.
581 */
582 timer_queue_shutdown(&cpu_data_ptr->rtclock_timer.queue);
583 cpu_xcall(BootCpuData.cpu_number, &timer_queue_expire_local, NULL);
584 }
585
586 cpu_signal_handler_internal(TRUE);
587}
588
589/*
590 * Routine: ml_cpu_get_info
591 * Function:
592 */
593void
594ml_cpu_get_info(ml_cpu_info_t * ml_cpu_info)
595{
596 cache_info_t *cpuid_cache_info;
597
598 cpuid_cache_info = cache_info();
599 ml_cpu_info->vector_unit = 0;
600 ml_cpu_info->cache_line_size = cpuid_cache_info->c_linesz;
601 ml_cpu_info->l1_icache_size = cpuid_cache_info->c_isize;
602 ml_cpu_info->l1_dcache_size = cpuid_cache_info->c_dsize;
603
604#if (__ARM_ARCH__ >= 7)
605 ml_cpu_info->l2_settings = 1;
606 ml_cpu_info->l2_cache_size = cpuid_cache_info->c_l2size;
607#else
608 ml_cpu_info->l2_settings = 0;
609 ml_cpu_info->l2_cache_size = 0xFFFFFFFF;
610#endif
611 ml_cpu_info->l3_settings = 0;
612 ml_cpu_info->l3_cache_size = 0xFFFFFFFF;
613}
614
615unsigned int
616ml_get_machine_mem(void)
617{
618 return machine_info.memory_size;
619}
620
621__attribute__((noreturn))
622void
623halt_all_cpus(boolean_t reboot)
624{
625 if (reboot) {
626 printf("MACH Reboot\n");
627 PEHaltRestart(kPERestartCPU);
628 } else {
629 printf("CPU halted\n");
630 PEHaltRestart(kPEHaltCPU);
631 }
632 while (1) {
633 ;
634 }
635}
636
637__attribute__((noreturn))
638void
639halt_cpu(void)
640{
641 halt_all_cpus(FALSE);
642}
643
644/*
645 * Routine: machine_signal_idle
646 * Function:
647 */
648void
649machine_signal_idle(
650 processor_t processor)
651{
652 cpu_signal(processor_to_cpu_datap(processor), SIGPnop, (void *)NULL, (void *)NULL);
653 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
654}
655
656void
657machine_signal_idle_deferred(
658 processor_t processor)
659{
660 cpu_signal_deferred(processor_to_cpu_datap(processor));
661 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_DEFERRED_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
662}
663
664void
665machine_signal_idle_cancel(
666 processor_t processor)
667{
668 cpu_signal_cancel(processor_to_cpu_datap(processor));
669 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_CANCEL_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
670}
671
672/*
673 * Routine: ml_install_interrupt_handler
674 * Function: Initialize Interrupt Handler
675 */
676void
677ml_install_interrupt_handler(
678 void *nub,
679 int source,
680 void *target,
681 IOInterruptHandler handler,
682 void *refCon)
683{
684 cpu_data_t *cpu_data_ptr;
685 boolean_t current_state;
686
687 current_state = ml_set_interrupts_enabled(FALSE);
688 cpu_data_ptr = getCpuDatap();
689
690 cpu_data_ptr->interrupt_nub = nub;
691 cpu_data_ptr->interrupt_source = source;
692 cpu_data_ptr->interrupt_target = target;
693 cpu_data_ptr->interrupt_handler = handler;
694 cpu_data_ptr->interrupt_refCon = refCon;
695
696 (void) ml_set_interrupts_enabled(current_state);
697}
698
699/*
700 * Routine: ml_init_interrupt
701 * Function: Initialize Interrupts
702 */
703void
704ml_init_interrupt(void)
705{
706#if defined(HAS_IPI)
707 /*
708 * ml_init_interrupt will get called once for each CPU, but this is redundant
709 * because there is only one global copy of the register for skye. do it only
710 * on the bootstrap cpu
711 */
712 if (getCpuDatap()->cluster_master) {
713 ml_cpu_signal_deferred_adjust_timer(deferred_ipi_timer_ns);
714 }
715#endif
716}
717
718/*
719 * Routine: ml_init_timebase
720 * Function: register and setup Timebase, Decremeter services
721 */
722void
723ml_init_timebase(
724 void *args,
725 tbd_ops_t tbd_funcs,
726 vm_offset_t int_address,
727 vm_offset_t int_value __unused)
728{
729 cpu_data_t *cpu_data_ptr;
730
731 cpu_data_ptr = (cpu_data_t *)args;
732
733 if ((cpu_data_ptr == &BootCpuData)
734 && (rtclock_timebase_func.tbd_fiq_handler == (void *)NULL)) {
735 rtclock_timebase_func = *tbd_funcs;
736 rtclock_timebase_addr = int_address;
737 }
738}
739
740#define ML_READPROP_MANDATORY UINT64_MAX
741
742static uint64_t
743ml_readprop(const DTEntry entry, const char *propertyName, uint64_t default_value)
744{
745 void const *prop;
746 unsigned int propSize;
747
748 if (SecureDTGetProperty(entry, propertyName, &prop, &propSize) == kSuccess) {
749 if (propSize == sizeof(uint8_t)) {
750 return *((uint8_t const *)prop);
751 } else if (propSize == sizeof(uint16_t)) {
752 return *((uint16_t const *)prop);
753 } else if (propSize == sizeof(uint32_t)) {
754 return *((uint32_t const *)prop);
755 } else if (propSize == sizeof(uint64_t)) {
756 return *((uint64_t const *)prop);
757 } else {
758 panic("CPU property '%s' has bad size %u", propertyName, propSize);
759 }
760 } else {
761 if (default_value == ML_READPROP_MANDATORY) {
762 panic("Missing mandatory property '%s'", propertyName);
763 }
764 return default_value;
765 }
766}
767
768static boolean_t
769ml_read_reg_range(const DTEntry entry, const char *propertyName, uint64_t *pa_ptr, uint64_t *len_ptr)
770{
771 uint64_t const *prop;
772 unsigned int propSize;
773
774 if (SecureDTGetProperty(entry, propertyName, (void const **)&prop, &propSize) != kSuccess) {
775 return FALSE;
776 }
777
778 if (propSize != sizeof(uint64_t) * 2) {
779 panic("Wrong property size for %s", propertyName);
780 }
781
782 *pa_ptr = prop[0];
783 *len_ptr = prop[1];
784 return TRUE;
785}
786
787static boolean_t
788ml_is_boot_cpu(const DTEntry entry)
789{
790 void const *prop;
791 unsigned int propSize;
792
793 if (SecureDTGetProperty(entry, "state", &prop, &propSize) != kSuccess) {
794 panic("unable to retrieve state for cpu");
795 }
796
797 if (strncmp((char const *)prop, "running", propSize) == 0) {
798 return TRUE;
799 } else {
800 return FALSE;
801 }
802}
803
804static void
805ml_read_chip_revision(unsigned int *rev __unused)
806{
807 // The CPU_VERSION_* macros are only defined on APPLE_ARM64_ARCH_FAMILY builds
808#ifdef APPLE_ARM64_ARCH_FAMILY
809 DTEntry entryP;
810
811 if ((SecureDTFindEntry("name", "arm-io", &entryP) == kSuccess)) {
812 *rev = (unsigned int)ml_readprop(entryP, "chip-revision", CPU_VERSION_UNKNOWN);
813 } else {
814 *rev = CPU_VERSION_UNKNOWN;
815 }
816#endif
817}
818
819static boolean_t
820ml_parse_interrupt_prop(const DTEntry entry, ml_topology_cpu_t *cpu)
821{
822 uint32_t const *prop;
823 unsigned int propSize;
824
825 if (SecureDTGetProperty(entry, "interrupts", (void const **)&prop, &propSize) != kSuccess) {
826 return FALSE;
827 }
828
829 if (propSize == sizeof(uint32_t) * 1) {
830 cpu->pmi_irq = prop[0];
831 return TRUE;
832 } else if (propSize == sizeof(uint32_t) * 3) {
833 cpu->self_ipi_irq = prop[0];
834 cpu->pmi_irq = prop[1];
835 cpu->other_ipi_irq = prop[2];
836 return TRUE;
837 } else {
838 return FALSE;
839 }
840}
841
842void
843ml_parse_cpu_topology(void)
844{
845 DTEntry entry, child __unused;
846 OpaqueDTEntryIterator iter;
847 uint32_t cpu_boot_arg;
848 int err;
849
850 int64_t cluster_phys_to_logical[MAX_CPU_CLUSTER_PHY_ID + 1];
851 int64_t cluster_max_cpu_phys_id[MAX_CPU_CLUSTER_PHY_ID + 1];
852 cpu_boot_arg = MAX_CPUS;
853 PE_parse_boot_argn("cpus", &cpu_boot_arg, sizeof(cpu_boot_arg));
854
855 err = SecureDTLookupEntry(NULL, "/cpus", &entry);
856 assert(err == kSuccess);
857
858 err = SecureDTInitEntryIterator(entry, &iter);
859 assert(err == kSuccess);
860
861 for (int i = 0; i <= MAX_CPU_CLUSTER_PHY_ID; i++) {
862 cluster_offsets[i] = -1;
863 cluster_phys_to_logical[i] = -1;
864 cluster_max_cpu_phys_id[i] = 0;
865 }
866
867 while (kSuccess == SecureDTIterateEntries(&iter, &child)) {
868 boolean_t is_boot_cpu = ml_is_boot_cpu(child);
869
870 // If the number of CPUs is constrained by the cpus= boot-arg, and the boot CPU hasn't
871 // been added to the topology struct yet, and we only have one slot left, then skip
872 // every other non-boot CPU in order to leave room for the boot CPU.
873 //
874 // e.g. if the boot-args say "cpus=3" and CPU4 is the boot CPU, then the cpus[]
875 // array will list CPU0, CPU1, and CPU4. CPU2-CPU3 and CPU5-CPUn will be omitted.
876 if (topology_info.num_cpus >= (cpu_boot_arg - 1) && topology_info.boot_cpu == NULL && !is_boot_cpu) {
877 continue;
878 }
879 if (topology_info.num_cpus >= cpu_boot_arg) {
880 break;
881 }
882
883 ml_topology_cpu_t *cpu = &topology_info.cpus[topology_info.num_cpus];
884
885 cpu->cpu_id = topology_info.num_cpus++;
886 assert(cpu->cpu_id < MAX_CPUS);
887 topology_info.max_cpu_id = MAX(topology_info.max_cpu_id, cpu->cpu_id);
888
889 cpu->die_id = (int)ml_readprop(child, "die-id", 0);
890 topology_info.max_die_id = MAX(topology_info.max_die_id, cpu->die_id);
891
892 cpu->phys_id = (uint32_t)ml_readprop(child, "reg", ML_READPROP_MANDATORY);
893
894 cpu->l2_access_penalty = (uint32_t)ml_readprop(child, "l2-access-penalty", 0);
895 cpu->l2_cache_size = (uint32_t)ml_readprop(child, "l2-cache-size", 0);
896 cpu->l2_cache_id = (uint32_t)ml_readprop(child, "l2-cache-id", 0);
897 cpu->l3_cache_size = (uint32_t)ml_readprop(child, "l3-cache-size", 0);
898 cpu->l3_cache_id = (uint32_t)ml_readprop(child, "l3-cache-id", 0);
899
900 ml_parse_interrupt_prop(child, cpu);
901 ml_read_reg_range(child, "cpu-uttdbg-reg", &cpu->cpu_UTTDBG_pa, &cpu->cpu_UTTDBG_len);
902 ml_read_reg_range(child, "cpu-impl-reg", &cpu->cpu_IMPL_pa, &cpu->cpu_IMPL_len);
903 ml_read_reg_range(child, "coresight-reg", &cpu->coresight_pa, &cpu->coresight_len);
904 cpu->cluster_type = CLUSTER_TYPE_SMP;
905
906
907 /*
908 * Since we want to keep a linear cluster ID space, we cannot just rely
909 * on the value provided by EDT. Instead, use the MPIDR value to see if we have
910 * seen this exact cluster before. If so, then reuse that cluster ID for this CPU.
911 */
912#if HAS_CLUSTER
913 uint32_t phys_cluster_id = MPIDR_CLUSTER_ID(cpu->phys_id);
914#else
915 uint32_t phys_cluster_id = 0;
916#endif
917 assert(phys_cluster_id <= MAX_CPU_CLUSTER_PHY_ID);
918 cpu->cluster_id = ((cluster_phys_to_logical[phys_cluster_id] == -1) ?
919 topology_info.num_clusters : cluster_phys_to_logical[phys_cluster_id]);
920
921 assert(cpu->cluster_id < MAX_CPU_CLUSTERS);
922
923 ml_topology_cluster_t *cluster = &topology_info.clusters[cpu->cluster_id];
924 if (cluster->num_cpus == 0) {
925 assert(topology_info.num_clusters < MAX_CPU_CLUSTERS);
926
927 topology_info.num_clusters++;
928 topology_info.max_cluster_id = MAX(topology_info.max_cluster_id, cpu->cluster_id);
929
930 cluster->cluster_id = cpu->cluster_id;
931 cluster->cluster_type = cpu->cluster_type;
932 cluster->first_cpu_id = cpu->cpu_id;
933 assert(cluster_phys_to_logical[phys_cluster_id] == -1);
934 cluster_phys_to_logical[phys_cluster_id] = cpu->cluster_id;
935
936 // Since we don't have a per-cluster EDT node, this is repeated in each CPU node.
937 // If we wind up with a bunch of these, we might want to create separate per-cluster
938 // EDT nodes and have the CPU nodes reference them through a phandle.
939 ml_read_reg_range(child, "acc-impl-reg", &cluster->acc_IMPL_pa, &cluster->acc_IMPL_len);
940 ml_read_reg_range(child, "cpm-impl-reg", &cluster->cpm_IMPL_pa, &cluster->cpm_IMPL_len);
941 }
942
943#if HAS_CLUSTER
944 if (MPIDR_CPU_ID(cpu->phys_id) > cluster_max_cpu_phys_id[phys_cluster_id]) {
945 cluster_max_cpu_phys_id[phys_cluster_id] = MPIDR_CPU_ID(cpu->phys_id);
946 }
947#endif
948
949 cpu->die_cluster_id = (int)ml_readprop(child, "die-cluster-id", MPIDR_CLUSTER_ID(cpu->phys_id));
950 cpu->cluster_core_id = (int)ml_readprop(child, "cluster-core-id", MPIDR_CPU_ID(cpu->phys_id));
951
952 cluster->num_cpus++;
953 cluster->cpu_mask |= 1ULL << cpu->cpu_id;
954
955 if (is_boot_cpu) {
956 assert(topology_info.boot_cpu == NULL);
957 topology_info.boot_cpu = cpu;
958 topology_info.boot_cluster = cluster;
959 }
960 }
961
962#if HAS_CLUSTER
963 /*
964 * Build the cluster offset array, ensuring that the region reserved
965 * for each physical cluster contains enough entries to be indexed
966 * by the maximum physical CPU ID (AFF0) within the cluster.
967 */
968 unsigned int cur_cluster_offset = 0;
969 for (int i = 0; i <= MAX_CPU_CLUSTER_PHY_ID; i++) {
970 if (cluster_phys_to_logical[i] != -1) {
971 cluster_offsets[i] = cur_cluster_offset;
972 cur_cluster_offset += (cluster_max_cpu_phys_id[i] + 1);
973 }
974 }
975 assert(cur_cluster_offset <= MAX_CPUS);
976#else
977 /*
978 * For H10, there are really 2 physical clusters, but they are not separated
979 * into distinct ACCs. AFF1 therefore always reports 0, and AFF0 numbering
980 * is linear across both clusters. For the purpose of MPIDR_EL1-based indexing,
981 * treat H10 and earlier devices as though they contain a single cluster.
982 */
983 cluster_offsets[0] = 0;
984#endif
985 assert(topology_info.boot_cpu != NULL);
986 ml_read_chip_revision(&topology_info.chip_revision);
987
988 /*
989 * Set TPIDRRO_EL0 to indicate the correct cpu number, as we may
990 * not be booting from cpu 0. Userspace will consume the current
991 * CPU number through this register. For non-boot cores, this is
992 * done in start.s (start_cpu) using the cpu_number field of the
993 * per-cpu data object.
994 */
995 assert(__builtin_arm_rsr64("TPIDRRO_EL0") == 0);
996 __builtin_arm_wsr64("TPIDRRO_EL0", (uint64_t)topology_info.boot_cpu->cpu_id);
997}
998
999const ml_topology_info_t *
1000ml_get_topology_info(void)
1001{
1002 return &topology_info;
1003}
1004
1005void
1006ml_map_cpu_pio(void)
1007{
1008 unsigned int i;
1009
1010 for (i = 0; i < topology_info.num_cpus; i++) {
1011 ml_topology_cpu_t *cpu = &topology_info.cpus[i];
1012 if (cpu->cpu_IMPL_pa) {
1013 cpu->cpu_IMPL_regs = (vm_offset_t)ml_io_map(cpu->cpu_IMPL_pa, cpu->cpu_IMPL_len);
1014 cpu->coresight_regs = (vm_offset_t)ml_io_map(cpu->coresight_pa, cpu->coresight_len);
1015 }
1016 if (cpu->cpu_UTTDBG_pa) {
1017 cpu->cpu_UTTDBG_regs = (vm_offset_t)ml_io_map(cpu->cpu_UTTDBG_pa, cpu->cpu_UTTDBG_len);
1018 }
1019 }
1020
1021 for (i = 0; i < topology_info.num_clusters; i++) {
1022 ml_topology_cluster_t *cluster = &topology_info.clusters[i];
1023 if (cluster->acc_IMPL_pa) {
1024 cluster->acc_IMPL_regs = (vm_offset_t)ml_io_map(cluster->acc_IMPL_pa, cluster->acc_IMPL_len);
1025 }
1026 if (cluster->cpm_IMPL_pa) {
1027 cluster->cpm_IMPL_regs = (vm_offset_t)ml_io_map(cluster->cpm_IMPL_pa, cluster->cpm_IMPL_len);
1028 }
1029 }
1030}
1031
1032unsigned int
1033ml_get_cpu_count(void)
1034{
1035 return topology_info.num_cpus;
1036}
1037
1038unsigned int
1039ml_get_cluster_count(void)
1040{
1041 return topology_info.num_clusters;
1042}
1043
1044int
1045ml_get_boot_cpu_number(void)
1046{
1047 return topology_info.boot_cpu->cpu_id;
1048}
1049
1050cluster_type_t
1051ml_get_boot_cluster(void)
1052{
1053 return topology_info.boot_cluster->cluster_type;
1054}
1055
1056int
1057ml_get_cpu_number(uint32_t phys_id)
1058{
1059 phys_id &= MPIDR_AFF1_MASK | MPIDR_AFF0_MASK;
1060
1061 for (unsigned i = 0; i < topology_info.num_cpus; i++) {
1062 if (topology_info.cpus[i].phys_id == phys_id) {
1063 return i;
1064 }
1065 }
1066
1067 return -1;
1068}
1069
1070int
1071ml_get_cluster_number(uint32_t phys_id)
1072{
1073 int cpu_id = ml_get_cpu_number(phys_id);
1074 if (cpu_id < 0) {
1075 return -1;
1076 }
1077
1078 ml_topology_cpu_t *cpu = &topology_info.cpus[cpu_id];
1079
1080 return cpu->cluster_id;
1081}
1082
1083unsigned int
1084ml_get_cpu_number_local(void)
1085{
1086 uint64_t mpidr_el1_value = 0;
1087 unsigned cpu_id;
1088
1089 /* We identify the CPU based on the constant bits of MPIDR_EL1. */
1090 MRS(mpidr_el1_value, "MPIDR_EL1");
1091 cpu_id = ml_get_cpu_number((uint32_t)mpidr_el1_value);
1092
1093 assert(cpu_id <= (unsigned int)ml_get_max_cpu_number());
1094
1095 return cpu_id;
1096}
1097
1098int
1099ml_get_cluster_number_local()
1100{
1101 uint64_t mpidr_el1_value = 0;
1102 unsigned cluster_id;
1103
1104 /* We identify the cluster based on the constant bits of MPIDR_EL1. */
1105 MRS(mpidr_el1_value, "MPIDR_EL1");
1106 cluster_id = ml_get_cluster_number((uint32_t)mpidr_el1_value);
1107
1108 assert(cluster_id <= (unsigned int)ml_get_max_cluster_number());
1109
1110 return cluster_id;
1111}
1112
1113int
1114ml_get_max_cpu_number(void)
1115{
1116 return topology_info.max_cpu_id;
1117}
1118
1119int
1120ml_get_max_cluster_number(void)
1121{
1122 return topology_info.max_cluster_id;
1123}
1124
1125unsigned int
1126ml_get_first_cpu_id(unsigned int cluster_id)
1127{
1128 return topology_info.clusters[cluster_id].first_cpu_id;
1129}
1130
1131void
1132ml_lockdown_init()
1133{
1134#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
1135 rorgn_stash_range();
1136#endif
1137}
1138
1139kern_return_t
1140ml_lockdown_handler_register(lockdown_handler_t f, void *this)
1141{
1142 if (!f) {
1143 return KERN_FAILURE;
1144 }
1145
1146 assert(lockdown_done);
1147 f(this); // XXX: f this whole function
1148
1149 return KERN_SUCCESS;
1150}
1151
1152kern_return_t
1153ml_processor_register(ml_processor_info_t *in_processor_info,
1154 processor_t *processor_out, ipi_handler_t *ipi_handler_out,
1155 perfmon_interrupt_handler_func *pmi_handler_out)
1156{
1157 cpu_data_t *this_cpu_datap;
1158 processor_set_t pset;
1159 boolean_t is_boot_cpu;
1160 static unsigned int reg_cpu_count = 0;
1161
1162 if (in_processor_info->log_id > (uint32_t)ml_get_max_cpu_number()) {
1163 return KERN_FAILURE;
1164 }
1165
1166 if ((unsigned)OSIncrementAtomic((SInt32*)&reg_cpu_count) >= topology_info.num_cpus) {
1167 return KERN_FAILURE;
1168 }
1169
1170 if (in_processor_info->log_id != (uint32_t)ml_get_boot_cpu_number()) {
1171 is_boot_cpu = FALSE;
1172 this_cpu_datap = cpu_data_alloc(FALSE);
1173 cpu_data_init(this_cpu_datap);
1174 } else {
1175 this_cpu_datap = &BootCpuData;
1176 is_boot_cpu = TRUE;
1177 }
1178
1179 assert(in_processor_info->log_id <= (uint32_t)ml_get_max_cpu_number());
1180
1181 this_cpu_datap->cpu_id = in_processor_info->cpu_id;
1182
1183 this_cpu_datap->cpu_console_buf = console_cpu_alloc(is_boot_cpu);
1184 if (this_cpu_datap->cpu_console_buf == (void *)(NULL)) {
1185 goto processor_register_error;
1186 }
1187
1188 if (!is_boot_cpu) {
1189 this_cpu_datap->cpu_number = (unsigned short)(in_processor_info->log_id);
1190
1191 if (cpu_data_register(this_cpu_datap) != KERN_SUCCESS) {
1192 goto processor_register_error;
1193 }
1194 }
1195
1196 this_cpu_datap->cpu_idle_notify = in_processor_info->processor_idle;
1197 this_cpu_datap->cpu_cache_dispatch = (cache_dispatch_t)in_processor_info->platform_cache_dispatch;
1198 nanoseconds_to_absolutetime((uint64_t) in_processor_info->powergate_latency, &this_cpu_datap->cpu_idle_latency);
1199 this_cpu_datap->cpu_reset_assist = kvtophys(in_processor_info->powergate_stub_addr);
1200
1201 this_cpu_datap->idle_timer_notify = in_processor_info->idle_timer;
1202 this_cpu_datap->idle_timer_refcon = in_processor_info->idle_timer_refcon;
1203
1204 this_cpu_datap->platform_error_handler = in_processor_info->platform_error_handler;
1205 this_cpu_datap->cpu_regmap_paddr = in_processor_info->regmap_paddr;
1206 this_cpu_datap->cpu_phys_id = in_processor_info->phys_id;
1207 this_cpu_datap->cpu_l2_access_penalty = in_processor_info->l2_access_penalty;
1208
1209 this_cpu_datap->cpu_cluster_type = in_processor_info->cluster_type;
1210 this_cpu_datap->cpu_cluster_id = in_processor_info->cluster_id;
1211 this_cpu_datap->cpu_l2_id = in_processor_info->l2_cache_id;
1212 this_cpu_datap->cpu_l2_size = in_processor_info->l2_cache_size;
1213 this_cpu_datap->cpu_l3_id = in_processor_info->l3_cache_id;
1214 this_cpu_datap->cpu_l3_size = in_processor_info->l3_cache_size;
1215
1216#if HAS_CLUSTER
1217 this_cpu_datap->cluster_master = !OSTestAndSet(this_cpu_datap->cpu_cluster_id, &cluster_initialized);
1218#else /* HAS_CLUSTER */
1219 this_cpu_datap->cluster_master = is_boot_cpu;
1220#endif /* HAS_CLUSTER */
1221
1222 pset = pset_find(in_processor_info->cluster_id, processor_pset(master_processor));
1223
1224 assert(pset != NULL);
1225 kprintf("%s>cpu_id %p cluster_id %d cpu_number %d is type %d\n", __FUNCTION__, in_processor_info->cpu_id, in_processor_info->cluster_id, this_cpu_datap->cpu_number, in_processor_info->cluster_type);
1226
1227 processor_t processor = PERCPU_GET_RELATIVE(processor, cpu_data, this_cpu_datap);
1228 if (!is_boot_cpu) {
1229 processor_init(processor, this_cpu_datap->cpu_number, pset);
1230
1231 if (this_cpu_datap->cpu_l2_access_penalty) {
1232 /*
1233 * Cores that have a non-zero L2 access penalty compared
1234 * to the boot processor should be de-prioritized by the
1235 * scheduler, so that threads use the cores with better L2
1236 * preferentially.
1237 */
1238 processor_set_primary(processor, master_processor);
1239 }
1240 }
1241
1242 *processor_out = processor;
1243 *ipi_handler_out = cpu_signal_handler;
1244#if CPMU_AIC_PMI && MONOTONIC
1245 *pmi_handler_out = mt_cpmu_aic_pmi;
1246#else
1247 *pmi_handler_out = NULL;
1248#endif /* CPMU_AIC_PMI && MONOTONIC */
1249 if (in_processor_info->idle_tickle != (idle_tickle_t *) NULL) {
1250 *in_processor_info->idle_tickle = (idle_tickle_t) cpu_idle_tickle;
1251 }
1252
1253#if KPC
1254 if (kpc_register_cpu(this_cpu_datap) != TRUE) {
1255 goto processor_register_error;
1256 }
1257#endif /* KPC */
1258
1259 if (!is_boot_cpu) {
1260 random_cpu_init(this_cpu_datap->cpu_number);
1261 // now let next CPU register itself
1262 OSIncrementAtomic((SInt32*)&real_ncpus);
1263 }
1264
1265 return KERN_SUCCESS;
1266
1267processor_register_error:
1268#if KPC
1269 kpc_unregister_cpu(this_cpu_datap);
1270#endif /* KPC */
1271 if (!is_boot_cpu) {
1272 cpu_data_free(this_cpu_datap);
1273 }
1274
1275 return KERN_FAILURE;
1276}
1277
1278void
1279ml_init_arm_debug_interface(
1280 void * in_cpu_datap,
1281 vm_offset_t virt_address)
1282{
1283 ((cpu_data_t *)in_cpu_datap)->cpu_debug_interface_map = virt_address;
1284 do_debugid();
1285}
1286
1287/*
1288 * Routine: init_ast_check
1289 * Function:
1290 */
1291void
1292init_ast_check(
1293 __unused processor_t processor)
1294{
1295}
1296
1297/*
1298 * Routine: cause_ast_check
1299 * Function:
1300 */
1301void
1302cause_ast_check(
1303 processor_t processor)
1304{
1305 if (current_processor() != processor) {
1306 cpu_signal(processor_to_cpu_datap(processor), SIGPast, (void *)NULL, (void *)NULL);
1307 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), processor->cpu_id, 1 /* ast */, 0, 0, 0);
1308 }
1309}
1310
1311extern uint32_t cpu_idle_count;
1312
1313void
1314ml_get_power_state(boolean_t *icp, boolean_t *pidlep)
1315{
1316 *icp = ml_at_interrupt_context();
1317 *pidlep = (cpu_idle_count == real_ncpus);
1318}
1319
1320/*
1321 * Routine: ml_cause_interrupt
1322 * Function: Generate a fake interrupt
1323 */
1324void
1325ml_cause_interrupt(void)
1326{
1327 return; /* BS_XXX */
1328}
1329
1330/* Map memory map IO space */
1331vm_offset_t
1332ml_io_map(
1333 vm_offset_t phys_addr,
1334 vm_size_t size)
1335{
1336 return io_map(phys_addr, size, VM_WIMG_IO);
1337}
1338
1339/* Map memory map IO space (with protections specified) */
1340vm_offset_t
1341ml_io_map_with_prot(
1342 vm_offset_t phys_addr,
1343 vm_size_t size,
1344 vm_prot_t prot)
1345{
1346 return io_map_with_prot(phys_addr, size, VM_WIMG_IO, prot);
1347}
1348
1349vm_offset_t
1350ml_io_map_wcomb(
1351 vm_offset_t phys_addr,
1352 vm_size_t size)
1353{
1354 return io_map(phys_addr, size, VM_WIMG_WCOMB);
1355}
1356
1357void
1358ml_io_unmap(vm_offset_t addr, vm_size_t sz)
1359{
1360 pmap_remove(kernel_pmap, addr, addr + sz);
1361 kmem_free(kernel_map, addr, sz);
1362}
1363
1364/* boot memory allocation */
1365vm_offset_t
1366ml_static_malloc(
1367 __unused vm_size_t size)
1368{
1369 return (vm_offset_t) NULL;
1370}
1371
1372vm_map_address_t
1373ml_map_high_window(
1374 vm_offset_t phys_addr,
1375 vm_size_t len)
1376{
1377 return pmap_map_high_window_bd(phys_addr, len, VM_PROT_READ | VM_PROT_WRITE);
1378}
1379
1380vm_offset_t
1381ml_static_ptovirt(
1382 vm_offset_t paddr)
1383{
1384 return phystokv(paddr);
1385}
1386
1387vm_offset_t
1388ml_static_slide(
1389 vm_offset_t vaddr)
1390{
1391 vm_offset_t slid_vaddr = vaddr + vm_kernel_slide;
1392
1393 if ((slid_vaddr < vm_kernelcache_base) || (slid_vaddr >= vm_kernelcache_top)) {
1394 /* This is only intended for use on kernelcache addresses. */
1395 return 0;
1396 }
1397
1398 /*
1399 * Because the address is in the kernelcache, we can do a simple
1400 * slide calculation.
1401 */
1402 return slid_vaddr;
1403}
1404
1405vm_offset_t
1406ml_static_unslide(
1407 vm_offset_t vaddr)
1408{
1409 if ((vaddr < vm_kernelcache_base) || (vaddr >= vm_kernelcache_top)) {
1410 /* This is only intended for use on kernelcache addresses. */
1411 return 0;
1412 }
1413
1414 return vaddr - vm_kernel_slide;
1415}
1416
1417extern tt_entry_t *arm_kva_to_tte(vm_offset_t va);
1418
1419kern_return_t
1420ml_static_protect(
1421 vm_offset_t vaddr, /* kernel virtual address */
1422 vm_size_t size,
1423 vm_prot_t new_prot)
1424{
1425 pt_entry_t arm_prot = 0;
1426 pt_entry_t arm_block_prot = 0;
1427 vm_offset_t vaddr_cur;
1428 ppnum_t ppn;
1429 kern_return_t result = KERN_SUCCESS;
1430
1431 if (vaddr < VM_MIN_KERNEL_ADDRESS) {
1432 panic("ml_static_protect(): %p < %p", (void *) vaddr, (void *) VM_MIN_KERNEL_ADDRESS);
1433 return KERN_FAILURE;
1434 }
1435
1436 assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
1437
1438 if ((new_prot & VM_PROT_WRITE) && (new_prot & VM_PROT_EXECUTE)) {
1439 panic("ml_static_protect(): WX request on %p", (void *) vaddr);
1440 }
1441 if (lockdown_done && (new_prot & VM_PROT_EXECUTE)) {
1442 panic("ml_static_protect(): attempt to inject executable mapping on %p", (void *) vaddr);
1443 }
1444
1445 /* Set up the protection bits, and block bits so we can validate block mappings. */
1446 if (new_prot & VM_PROT_WRITE) {
1447 arm_prot |= ARM_PTE_AP(AP_RWNA);
1448 arm_block_prot |= ARM_TTE_BLOCK_AP(AP_RWNA);
1449 } else {
1450 arm_prot |= ARM_PTE_AP(AP_RONA);
1451 arm_block_prot |= ARM_TTE_BLOCK_AP(AP_RONA);
1452 }
1453
1454 arm_prot |= ARM_PTE_NX;
1455 arm_block_prot |= ARM_TTE_BLOCK_NX;
1456
1457 if (!(new_prot & VM_PROT_EXECUTE)) {
1458 arm_prot |= ARM_PTE_PNX;
1459 arm_block_prot |= ARM_TTE_BLOCK_PNX;
1460 }
1461
1462 for (vaddr_cur = vaddr;
1463 vaddr_cur < trunc_page_64(vaddr + size);
1464 vaddr_cur += PAGE_SIZE) {
1465 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
1466 if (ppn != (vm_offset_t) NULL) {
1467 tt_entry_t *tte2;
1468 pt_entry_t *pte_p;
1469 pt_entry_t ptmp;
1470
1471#if XNU_MONITOR
1472 assert(!pmap_is_monitor(ppn));
1473 assert(!TEST_PAGE_RATIO_4);
1474#endif
1475
1476 tte2 = arm_kva_to_tte(vaddr_cur);
1477
1478 if (((*tte2) & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE) {
1479 if ((((*tte2) & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) &&
1480 ((*tte2 & (ARM_TTE_BLOCK_NXMASK | ARM_TTE_BLOCK_PNXMASK | ARM_TTE_BLOCK_APMASK)) == arm_block_prot)) {
1481 /*
1482 * We can support ml_static_protect on a block mapping if the mapping already has
1483 * the desired protections. We still want to run checks on a per-page basis.
1484 */
1485 continue;
1486 }
1487
1488 result = KERN_FAILURE;
1489 break;
1490 }
1491
1492 pte_p = (pt_entry_t *)&((tt_entry_t*)(phystokv((*tte2) & ARM_TTE_TABLE_MASK)))[(((vaddr_cur) & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT)];
1493 ptmp = *pte_p;
1494
1495 if ((ptmp & ARM_PTE_HINT_MASK) && ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot)) {
1496 /*
1497 * The contiguous hint is similar to a block mapping for ml_static_protect; if the existing
1498 * protections do not match the desired protections, then we will fail (as we cannot update
1499 * this mapping without updating other mappings as well).
1500 */
1501 result = KERN_FAILURE;
1502 break;
1503 }
1504
1505 __unreachable_ok_push
1506 if (TEST_PAGE_RATIO_4) {
1507 {
1508 unsigned int i;
1509 pt_entry_t *ptep_iter;
1510
1511 ptep_iter = pte_p;
1512 for (i = 0; i < 4; i++, ptep_iter++) {
1513 /* Note that there is a hole in the HINT sanity checking here. */
1514 ptmp = *ptep_iter;
1515
1516 /* We only need to update the page tables if the protections do not match. */
1517 if ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot) {
1518 ptmp = (ptmp & ~(ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) | arm_prot;
1519 *ptep_iter = ptmp;
1520 }
1521 }
1522 }
1523 } else {
1524 ptmp = *pte_p;
1525 /* We only need to update the page tables if the protections do not match. */
1526 if ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot) {
1527 ptmp = (ptmp & ~(ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) | arm_prot;
1528 *pte_p = ptmp;
1529 }
1530 }
1531 __unreachable_ok_pop
1532 }
1533 }
1534
1535 if (vaddr_cur > vaddr) {
1536 assert(((vaddr_cur - vaddr) & 0xFFFFFFFF00000000ULL) == 0);
1537 flush_mmu_tlb_region(vaddr, (uint32_t)(vaddr_cur - vaddr));
1538 }
1539
1540
1541 return result;
1542}
1543
1544/*
1545 * Routine: ml_static_mfree
1546 * Function:
1547 */
1548void
1549ml_static_mfree(
1550 vm_offset_t vaddr,
1551 vm_size_t size)
1552{
1553 vm_offset_t vaddr_cur;
1554 ppnum_t ppn;
1555 uint32_t freed_pages = 0;
1556 uint32_t freed_kernelcache_pages = 0;
1557
1558 /* It is acceptable (if bad) to fail to free. */
1559 if (vaddr < VM_MIN_KERNEL_ADDRESS) {
1560 return;
1561 }
1562
1563 assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
1564
1565 for (vaddr_cur = vaddr;
1566 vaddr_cur < trunc_page_64(vaddr + size);
1567 vaddr_cur += PAGE_SIZE) {
1568 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
1569 if (ppn != (vm_offset_t) NULL) {
1570 /*
1571 * It is not acceptable to fail to update the protections on a page
1572 * we will release to the VM. We need to either panic or continue.
1573 * For now, we'll panic (to help flag if there is memory we can
1574 * reclaim).
1575 */
1576 if (ml_static_protect(vaddr_cur, PAGE_SIZE, VM_PROT_WRITE | VM_PROT_READ) != KERN_SUCCESS) {
1577 panic("Failed ml_static_mfree on %p", (void *) vaddr_cur);
1578 }
1579
1580 vm_page_create(ppn, (ppn + 1));
1581 freed_pages++;
1582 if (vaddr_cur >= segLOWEST && vaddr_cur < end_kern) {
1583 freed_kernelcache_pages++;
1584 }
1585 }
1586 }
1587 vm_page_lockspin_queues();
1588 vm_page_wire_count -= freed_pages;
1589 vm_page_wire_count_initial -= freed_pages;
1590 vm_page_kernelcache_count -= freed_kernelcache_pages;
1591 vm_page_unlock_queues();
1592#if DEBUG
1593 kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn);
1594#endif
1595}
1596
1597
1598/* virtual to physical on wired pages */
1599vm_offset_t
1600ml_vtophys(vm_offset_t vaddr)
1601{
1602 return kvtophys(vaddr);
1603}
1604
1605/*
1606 * Routine: ml_nofault_copy
1607 * Function: Perform a physical mode copy if the source and destination have
1608 * valid translations in the kernel pmap. If translations are present, they are
1609 * assumed to be wired; e.g., no attempt is made to guarantee that the
1610 * translations obtained remain valid for the duration of the copy process.
1611 */
1612vm_size_t
1613ml_nofault_copy(vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size)
1614{
1615 addr64_t cur_phys_dst, cur_phys_src;
1616 vm_size_t count, nbytes = 0;
1617
1618 while (size > 0) {
1619 if (!(cur_phys_src = kvtophys(virtsrc))) {
1620 break;
1621 }
1622 if (!(cur_phys_dst = kvtophys(virtdst))) {
1623 break;
1624 }
1625 if (!pmap_valid_address(trunc_page_64(cur_phys_dst)) ||
1626 !pmap_valid_address(trunc_page_64(cur_phys_src))) {
1627 break;
1628 }
1629 count = PAGE_SIZE - (cur_phys_src & PAGE_MASK);
1630 if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK))) {
1631 count = PAGE_SIZE - (cur_phys_dst & PAGE_MASK);
1632 }
1633 if (count > size) {
1634 count = size;
1635 }
1636
1637 bcopy_phys(cur_phys_src, cur_phys_dst, count);
1638
1639 nbytes += count;
1640 virtsrc += count;
1641 virtdst += count;
1642 size -= count;
1643 }
1644
1645 return nbytes;
1646}
1647
1648/*
1649 * Routine: ml_validate_nofault
1650 * Function: Validate that ths address range has a valid translations
1651 * in the kernel pmap. If translations are present, they are
1652 * assumed to be wired; i.e. no attempt is made to guarantee
1653 * that the translation persist after the check.
1654 * Returns: TRUE if the range is mapped and will not cause a fault,
1655 * FALSE otherwise.
1656 */
1657
1658boolean_t
1659ml_validate_nofault(
1660 vm_offset_t virtsrc, vm_size_t size)
1661{
1662 addr64_t cur_phys_src;
1663 uint32_t count;
1664
1665 while (size > 0) {
1666 if (!(cur_phys_src = kvtophys(virtsrc))) {
1667 return FALSE;
1668 }
1669 if (!pmap_valid_address(trunc_page_64(cur_phys_src))) {
1670 return FALSE;
1671 }
1672 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
1673 if (count > size) {
1674 count = (uint32_t)size;
1675 }
1676
1677 virtsrc += count;
1678 size -= count;
1679 }
1680
1681 return TRUE;
1682}
1683
1684void
1685ml_get_bouncepool_info(vm_offset_t * phys_addr, vm_size_t * size)
1686{
1687 *phys_addr = 0;
1688 *size = 0;
1689}
1690
1691void
1692active_rt_threads(__unused boolean_t active)
1693{
1694}
1695
1696static void
1697cpu_qos_cb_default(__unused int urgency, __unused uint64_t qos_param1, __unused uint64_t qos_param2)
1698{
1699 return;
1700}
1701
1702cpu_qos_update_t cpu_qos_update = cpu_qos_cb_default;
1703
1704void
1705cpu_qos_update_register(cpu_qos_update_t cpu_qos_cb)
1706{
1707 if (cpu_qos_cb != NULL) {
1708 cpu_qos_update = cpu_qos_cb;
1709 } else {
1710 cpu_qos_update = cpu_qos_cb_default;
1711 }
1712}
1713
1714void
1715thread_tell_urgency(thread_urgency_t urgency, uint64_t rt_period, uint64_t rt_deadline, uint64_t sched_latency __unused, __unused thread_t nthread)
1716{
1717 SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, sched_latency, 0);
1718
1719 cpu_qos_update((int)urgency, rt_period, rt_deadline);
1720
1721 SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0);
1722}
1723
1724void
1725machine_run_count(__unused uint32_t count)
1726{
1727}
1728
1729processor_t
1730machine_choose_processor(__unused processor_set_t pset, processor_t processor)
1731{
1732 return processor;
1733}
1734
1735#if KASAN
1736vm_offset_t ml_stack_base(void);
1737vm_size_t ml_stack_size(void);
1738
1739vm_offset_t
1740ml_stack_base(void)
1741{
1742 uintptr_t local = (uintptr_t) &local;
1743 vm_offset_t intstack_top_ptr;
1744
1745 intstack_top_ptr = getCpuDatap()->intstack_top;
1746 if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) {
1747 return intstack_top_ptr - INTSTACK_SIZE;
1748 } else {
1749 return current_thread()->kernel_stack;
1750 }
1751}
1752vm_size_t
1753ml_stack_size(void)
1754{
1755 uintptr_t local = (uintptr_t) &local;
1756 vm_offset_t intstack_top_ptr;
1757
1758 intstack_top_ptr = getCpuDatap()->intstack_top;
1759 if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) {
1760 return INTSTACK_SIZE;
1761 } else {
1762 return kernel_stack_size;
1763 }
1764}
1765#endif
1766
1767boolean_t
1768machine_timeout_suspended(void)
1769{
1770 return FALSE;
1771}
1772
1773kern_return_t
1774ml_interrupt_prewarm(__unused uint64_t deadline)
1775{
1776 return KERN_FAILURE;
1777}
1778
1779/*
1780 * Assumes fiq, irq disabled.
1781 */
1782void
1783ml_set_decrementer(uint32_t dec_value)
1784{
1785 cpu_data_t *cdp = getCpuDatap();
1786
1787 assert(ml_get_interrupts_enabled() == FALSE);
1788 cdp->cpu_decrementer = dec_value;
1789
1790 if (cdp->cpu_set_decrementer_func) {
1791 cdp->cpu_set_decrementer_func(dec_value);
1792 } else {
1793 __builtin_arm_wsr64("CNTV_TVAL_EL0", (uint64_t)dec_value);
1794 }
1795}
1796
1797uint64_t
1798ml_get_hwclock()
1799{
1800 uint64_t timebase;
1801
1802 // ISB required by ARMV7C.b section B8.1.2 & ARMv8 section D6.1.2
1803 // "Reads of CNT[PV]CT[_EL0] can occur speculatively and out of order relative
1804 // to other instructions executed on the same processor."
1805 __builtin_arm_isb(ISB_SY);
1806 timebase = __builtin_arm_rsr64("CNTVCT_EL0");
1807
1808 return timebase;
1809}
1810
1811uint64_t
1812ml_get_timebase()
1813{
1814 return ml_get_hwclock() + getCpuDatap()->cpu_base_timebase;
1815}
1816
1817/*
1818 * Get the speculative timebase without an ISB.
1819 */
1820__attribute__((unused))
1821static uint64_t
1822ml_get_speculative_timebase()
1823{
1824 uint64_t timebase;
1825
1826 timebase = __builtin_arm_rsr64("CNTVCT_EL0");
1827
1828 return timebase + getCpuDatap()->cpu_base_timebase;
1829}
1830
1831uint32_t
1832ml_get_decrementer()
1833{
1834 cpu_data_t *cdp = getCpuDatap();
1835 uint32_t dec;
1836
1837 assert(ml_get_interrupts_enabled() == FALSE);
1838
1839 if (cdp->cpu_get_decrementer_func) {
1840 dec = cdp->cpu_get_decrementer_func();
1841 } else {
1842 uint64_t wide_val;
1843
1844 wide_val = __builtin_arm_rsr64("CNTV_TVAL_EL0");
1845 dec = (uint32_t)wide_val;
1846 assert(wide_val == (uint64_t)dec);
1847 }
1848
1849 return dec;
1850}
1851
1852boolean_t
1853ml_get_timer_pending()
1854{
1855 uint64_t cntv_ctl = __builtin_arm_rsr64("CNTV_CTL_EL0");
1856 return ((cntv_ctl & CNTV_CTL_EL0_ISTATUS) != 0) ? TRUE : FALSE;
1857}
1858
1859static void
1860cache_trap_error(thread_t thread, vm_map_address_t fault_addr)
1861{
1862 mach_exception_data_type_t exc_data[2];
1863 arm_saved_state_t *regs = get_user_regs(thread);
1864
1865 set_saved_state_far(regs, fault_addr);
1866
1867 exc_data[0] = KERN_INVALID_ADDRESS;
1868 exc_data[1] = fault_addr;
1869
1870 exception_triage(EXC_BAD_ACCESS, exc_data, 2);
1871}
1872
1873static void
1874cache_trap_recover()
1875{
1876 vm_map_address_t fault_addr;
1877
1878 __asm__ volatile ("mrs %0, FAR_EL1" : "=r"(fault_addr));
1879
1880 cache_trap_error(current_thread(), fault_addr);
1881}
1882
1883static void
1884set_cache_trap_recover(thread_t thread)
1885{
1886#if defined(HAS_APPLE_PAC)
1887 thread->recover = (vm_address_t)ptrauth_auth_and_resign(&cache_trap_recover,
1888 ptrauth_key_function_pointer, 0,
1889 ptrauth_key_function_pointer, ptrauth_blend_discriminator(&thread->recover, PAC_DISCRIMINATOR_RECOVER));
1890#else /* defined(HAS_APPLE_PAC) */
1891 thread->recover = (vm_address_t)cache_trap_recover;
1892#endif /* defined(HAS_APPLE_PAC) */
1893}
1894
1895static void
1896dcache_flush_trap(vm_map_address_t start, vm_map_size_t size)
1897{
1898 vm_map_address_t end = start + size;
1899 thread_t thread = current_thread();
1900 vm_offset_t old_recover = thread->recover;
1901
1902 /* Check bounds */
1903 if (task_has_64Bit_addr(current_task())) {
1904 if (end > MACH_VM_MAX_ADDRESS) {
1905 cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1906 }
1907 } else {
1908 if (end > VM_MAX_ADDRESS) {
1909 cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1910 }
1911 }
1912
1913 if (start > end) {
1914 cache_trap_error(thread, start & ((1 << ARM64_CLINE_SHIFT) - 1));
1915 }
1916
1917 set_cache_trap_recover(thread);
1918
1919 /*
1920 * We're coherent on Apple ARM64 CPUs, so this could be a nop. However,
1921 * if the region given us is bad, it would be good to catch it and
1922 * crash, ergo we still do the flush.
1923 */
1924 FlushPoC_DcacheRegion(start, (uint32_t)size);
1925
1926 /* Restore recovery function */
1927 thread->recover = old_recover;
1928
1929 /* Return (caller does exception return) */
1930}
1931
1932static void
1933icache_invalidate_trap(vm_map_address_t start, vm_map_size_t size)
1934{
1935 vm_map_address_t end = start + size;
1936 thread_t thread = current_thread();
1937 vm_offset_t old_recover = thread->recover;
1938
1939 /* Check bounds */
1940 if (task_has_64Bit_addr(current_task())) {
1941 if (end > MACH_VM_MAX_ADDRESS) {
1942 cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1943 }
1944 } else {
1945 if (end > VM_MAX_ADDRESS) {
1946 cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1947 }
1948 }
1949
1950 if (start > end) {
1951 cache_trap_error(thread, start & ((1 << ARM64_CLINE_SHIFT) - 1));
1952 }
1953
1954 set_cache_trap_recover(thread);
1955
1956 /* Invalidate iCache to point of unification */
1957 InvalidatePoU_IcacheRegion(start, (uint32_t)size);
1958
1959 /* Restore recovery function */
1960 thread->recover = old_recover;
1961
1962 /* Return (caller does exception return) */
1963}
1964
1965__attribute__((noreturn))
1966void
1967platform_syscall(arm_saved_state_t *state)
1968{
1969 uint32_t code;
1970
1971#define platform_syscall_kprintf(x...) /* kprintf("platform_syscall: " x) */
1972
1973 code = (uint32_t)get_saved_state_reg(state, 3);
1974 switch (code) {
1975 case 0:
1976 /* I-Cache flush */
1977 platform_syscall_kprintf("icache flush requested.\n");
1978 icache_invalidate_trap(get_saved_state_reg(state, 0), get_saved_state_reg(state, 1));
1979 break;
1980 case 1:
1981 /* D-Cache flush */
1982 platform_syscall_kprintf("dcache flush requested.\n");
1983 dcache_flush_trap(get_saved_state_reg(state, 0), get_saved_state_reg(state, 1));
1984 break;
1985 case 2:
1986 /* set cthread */
1987 platform_syscall_kprintf("set cthread self.\n");
1988 thread_set_cthread_self(get_saved_state_reg(state, 0));
1989 break;
1990 case 3:
1991 /* get cthread */
1992 platform_syscall_kprintf("get cthread self.\n");
1993 set_saved_state_reg(state, 0, thread_get_cthread_self());
1994 break;
1995 default:
1996 platform_syscall_kprintf("unknown: %d\n", code);
1997 break;
1998 }
1999
2000 thread_exception_return();
2001}
2002
2003static void
2004_enable_timebase_event_stream(uint32_t bit_index)
2005{
2006 uint64_t cntkctl; /* One wants to use 32 bits, but "mrs" prefers it this way */
2007
2008 if (bit_index >= 64) {
2009 panic("%s: invalid bit index (%u)", __FUNCTION__, bit_index);
2010 }
2011
2012 __asm__ volatile ("mrs %0, CNTKCTL_EL1" : "=r"(cntkctl));
2013
2014 cntkctl |= (bit_index << CNTKCTL_EL1_EVENTI_SHIFT);
2015 cntkctl |= CNTKCTL_EL1_EVNTEN;
2016 cntkctl |= CNTKCTL_EL1_EVENTDIR; /* 1->0; why not? */
2017
2018 /*
2019 * If the SOC supports it (and it isn't broken), enable
2020 * EL0 access to the timebase registers.
2021 */
2022 if (user_timebase_type() != USER_TIMEBASE_NONE) {
2023 cntkctl |= (CNTKCTL_EL1_PL0PCTEN | CNTKCTL_EL1_PL0VCTEN);
2024 }
2025
2026 __builtin_arm_wsr64("CNTKCTL_EL1", cntkctl);
2027}
2028
2029/*
2030 * Turn timer on, unmask that interrupt.
2031 */
2032static void
2033_enable_virtual_timer(void)
2034{
2035 uint64_t cntvctl = CNTV_CTL_EL0_ENABLE; /* One wants to use 32 bits, but "mrs" prefers it this way */
2036
2037 __builtin_arm_wsr64("CNTV_CTL_EL0", cntvctl);
2038 /* disable the physical timer as a precaution, as its registers reset to architecturally unknown values */
2039 __builtin_arm_wsr64("CNTP_CTL_EL0", CNTP_CTL_EL0_IMASKED);
2040}
2041
2042void
2043fiq_context_init(boolean_t enable_fiq __unused)
2044{
2045 /* Interrupts still disabled. */
2046 assert(ml_get_interrupts_enabled() == FALSE);
2047 _enable_virtual_timer();
2048}
2049
2050void
2051wfe_timeout_init(void)
2052{
2053 _enable_timebase_event_stream(arm64_eventi);
2054}
2055
2056void
2057wfe_timeout_configure(void)
2058{
2059 /* Could fill in our own ops here, if we needed them */
2060 uint64_t ticks_per_sec, ticks_per_event, events_per_sec = 0;
2061 uint32_t bit_index;
2062
2063 if (PE_parse_boot_argn("wfe_events_sec", &events_per_sec, sizeof(events_per_sec))) {
2064 if (events_per_sec <= 0) {
2065 events_per_sec = 1;
2066 } else if (events_per_sec > USEC_PER_SEC) {
2067 events_per_sec = USEC_PER_SEC;
2068 }
2069 } else {
2070#if defined(ARM_BOARD_WFE_TIMEOUT_NS)
2071 events_per_sec = NSEC_PER_SEC / ARM_BOARD_WFE_TIMEOUT_NS;
2072#else /* !defined(ARM_BOARD_WFE_TIMEOUT_NS) */
2073 /* Default to 1usec (or as close as we can get) */
2074 events_per_sec = USEC_PER_SEC;
2075#endif /* !defined(ARM_BOARD_WFE_TIMEOUT_NS) */
2076 }
2077 ticks_per_sec = gPEClockFrequencyInfo.timebase_frequency_hz;
2078 ticks_per_event = ticks_per_sec / events_per_sec;
2079 bit_index = flsll(ticks_per_event) - 1; /* Highest bit set */
2080
2081 /* Round up to power of two */
2082 if ((ticks_per_event & ((1 << bit_index) - 1)) != 0) {
2083 bit_index++;
2084 }
2085
2086 /*
2087 * The timer can only trigger on rising or falling edge,
2088 * not both; we don't care which we trigger on, but we
2089 * do need to adjust which bit we are interested in to
2090 * account for this.
2091 */
2092 if (bit_index != 0) {
2093 bit_index--;
2094 }
2095
2096 arm64_eventi = bit_index;
2097 wfe_timeout_init();
2098}
2099
2100boolean_t
2101ml_delay_should_spin(uint64_t interval)
2102{
2103 cpu_data_t *cdp = getCpuDatap();
2104
2105 if (cdp->cpu_idle_latency) {
2106 return (interval < cdp->cpu_idle_latency) ? TRUE : FALSE;
2107 } else {
2108 /*
2109 * Early boot, latency is unknown. Err on the side of blocking,
2110 * which should always be safe, even if slow
2111 */
2112 return FALSE;
2113 }
2114}
2115
2116boolean_t
2117ml_thread_is64bit(thread_t thread)
2118{
2119 return thread_is_64bit_addr(thread);
2120}
2121
2122void
2123ml_delay_on_yield(void)
2124{
2125#if DEVELOPMENT || DEBUG
2126 if (yield_delay_us) {
2127 delay(yield_delay_us);
2128 }
2129#endif
2130}
2131
2132void
2133ml_timer_evaluate(void)
2134{
2135}
2136
2137boolean_t
2138ml_timer_forced_evaluation(void)
2139{
2140 return FALSE;
2141}
2142
2143uint64_t
2144ml_energy_stat(thread_t t)
2145{
2146 return t->machine.energy_estimate_nj;
2147}
2148
2149
2150void
2151ml_gpu_stat_update(__unused uint64_t gpu_ns_delta)
2152{
2153 /*
2154 * For now: update the resource coalition stats of the
2155 * current thread's coalition
2156 */
2157 task_coalition_update_gpu_stats(current_task(), gpu_ns_delta);
2158}
2159
2160uint64_t
2161ml_gpu_stat(__unused thread_t t)
2162{
2163 return 0;
2164}
2165
2166#if !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME || HAS_FAST_CNTVCT
2167
2168static void
2169timer_state_event(boolean_t switch_to_kernel)
2170{
2171 thread_t thread = current_thread();
2172 if (!thread->precise_user_kernel_time) {
2173 return;
2174 }
2175
2176 processor_t pd = current_processor();
2177 uint64_t now = ml_get_speculative_timebase();
2178
2179 timer_stop(pd->current_state, now);
2180 pd->current_state = (switch_to_kernel) ? &pd->system_state : &pd->user_state;
2181 timer_start(pd->current_state, now);
2182
2183 timer_stop(pd->thread_timer, now);
2184 pd->thread_timer = (switch_to_kernel) ? &thread->system_timer : &thread->user_timer;
2185 timer_start(pd->thread_timer, now);
2186}
2187
2188void
2189timer_state_event_user_to_kernel(void)
2190{
2191 timer_state_event(TRUE);
2192}
2193
2194void
2195timer_state_event_kernel_to_user(void)
2196{
2197 timer_state_event(FALSE);
2198}
2199#endif /* !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME || HAS_FAST_CNTVCT */
2200
2201/*
2202 * The following are required for parts of the kernel
2203 * that cannot resolve these functions as inlines:
2204 */
2205extern thread_t current_act(void) __attribute__((const));
2206thread_t
2207current_act(void)
2208{
2209 return current_thread_fast();
2210}
2211
2212#undef current_thread
2213extern thread_t current_thread(void) __attribute__((const));
2214thread_t
2215current_thread(void)
2216{
2217 return current_thread_fast();
2218}
2219
2220typedef struct{
2221 ex_cb_t cb;
2222 void *refcon;
2223}
2224ex_cb_info_t;
2225
2226ex_cb_info_t ex_cb_info[EXCB_CLASS_MAX];
2227
2228/*
2229 * Callback registration
2230 * Currently we support only one registered callback per class but
2231 * it should be possible to support more callbacks
2232 */
2233kern_return_t
2234ex_cb_register(
2235 ex_cb_class_t cb_class,
2236 ex_cb_t cb,
2237 void *refcon)
2238{
2239 ex_cb_info_t *pInfo = &ex_cb_info[cb_class];
2240
2241 if ((NULL == cb) || (cb_class >= EXCB_CLASS_MAX)) {
2242 return KERN_INVALID_VALUE;
2243 }
2244
2245 if (NULL == pInfo->cb) {
2246 pInfo->cb = cb;
2247 pInfo->refcon = refcon;
2248 return KERN_SUCCESS;
2249 }
2250 return KERN_FAILURE;
2251}
2252
2253/*
2254 * Called internally by platform kernel to invoke the registered callback for class
2255 */
2256ex_cb_action_t
2257ex_cb_invoke(
2258 ex_cb_class_t cb_class,
2259 vm_offset_t far)
2260{
2261 ex_cb_info_t *pInfo = &ex_cb_info[cb_class];
2262 ex_cb_state_t state = {far};
2263
2264 if (cb_class >= EXCB_CLASS_MAX) {
2265 panic("Invalid exception callback class 0x%x\n", cb_class);
2266 }
2267
2268 if (pInfo->cb) {
2269 return pInfo->cb(cb_class, pInfo->refcon, &state);
2270 }
2271 return EXCB_ACTION_NONE;
2272}
2273
2274#if defined(HAS_APPLE_PAC)
2275static inline bool
2276cpu_supports_userkeyen()
2277{
2278#if HAS_APCTL_EL1_USERKEYEN
2279 return true;
2280#else
2281 return false;
2282#endif
2283}
2284
2285/**
2286 * Returns the default JOP key. Depending on how the CPU diversifies userspace
2287 * JOP keys, this value may reflect either KERNKeyLo or APIAKeyLo.
2288 */
2289uint64_t
2290ml_default_jop_pid(void)
2291{
2292 if (cpu_supports_userkeyen()) {
2293 return KERNEL_KERNKEY_ID;
2294 } else {
2295 return KERNEL_JOP_ID;
2296 }
2297}
2298
2299void
2300ml_task_set_disable_user_jop(task_t task, uint8_t disable_user_jop)
2301{
2302 assert(task);
2303 task->disable_user_jop = disable_user_jop;
2304}
2305
2306void
2307ml_thread_set_disable_user_jop(thread_t thread, uint8_t disable_user_jop)
2308{
2309 assert(thread);
2310 thread->machine.disable_user_jop = disable_user_jop;
2311}
2312
2313void
2314ml_task_set_rop_pid(task_t task, task_t parent_task, boolean_t inherit)
2315{
2316 if (inherit) {
2317 task->rop_pid = parent_task->rop_pid;
2318 } else {
2319 task->rop_pid = early_random();
2320 }
2321}
2322
2323/**
2324 * jop_pid may be inherited from the parent task or generated inside the shared
2325 * region. Unfortunately these two parameters are available at very different
2326 * times during task creation, so we need to split this into two steps.
2327 */
2328void
2329ml_task_set_jop_pid(task_t task, task_t parent_task, boolean_t inherit)
2330{
2331 if (inherit) {
2332 task->jop_pid = parent_task->jop_pid;
2333 } else {
2334 task->jop_pid = ml_default_jop_pid();
2335 }
2336}
2337
2338void
2339ml_task_set_jop_pid_from_shared_region(task_t task)
2340{
2341 vm_shared_region_t sr = vm_shared_region_get(task);
2342 /*
2343 * If there's no shared region, we can assign the key arbitrarily. This
2344 * typically happens when Mach-O image activation failed part of the way
2345 * through, and this task is in the middle of dying with SIGKILL anyway.
2346 */
2347 if (__improbable(!sr)) {
2348 task->jop_pid = early_random();
2349 return;
2350 }
2351 vm_shared_region_deallocate(sr);
2352
2353 /*
2354 * Similarly we have to worry about jetsam having killed the task and
2355 * already cleared the shared_region_id.
2356 */
2357 task_lock(task);
2358 if (task->shared_region_id != NULL) {
2359 task->jop_pid = shared_region_find_key(task->shared_region_id);
2360 } else {
2361 task->jop_pid = early_random();
2362 }
2363 task_unlock(task);
2364}
2365
2366void
2367ml_thread_set_jop_pid(thread_t thread, task_t task)
2368{
2369 thread->machine.jop_pid = task->jop_pid;
2370}
2371#endif /* defined(HAS_APPLE_PAC) */
2372
2373
2374#if defined(HAS_APPLE_PAC)
2375#define _ml_auth_ptr_unchecked(_ptr, _suffix, _modifier) \
2376 asm volatile ("aut" #_suffix " %[ptr], %[modifier]" : [ptr] "+r"(_ptr) : [modifier] "r"(_modifier));
2377
2378/*
2379 * ml_auth_ptr_unchecked: call this instead of ptrauth_auth_data
2380 * instrinsic when you don't want to trap on auth fail.
2381 *
2382 */
2383void *
2384ml_auth_ptr_unchecked(void *ptr, ptrauth_key key, uint64_t modifier)
2385{
2386 switch (key & 0x3) {
2387 case ptrauth_key_asia:
2388 _ml_auth_ptr_unchecked(ptr, ia, modifier);
2389 break;
2390 case ptrauth_key_asib:
2391 _ml_auth_ptr_unchecked(ptr, ib, modifier);
2392 break;
2393 case ptrauth_key_asda:
2394 _ml_auth_ptr_unchecked(ptr, da, modifier);
2395 break;
2396 case ptrauth_key_asdb:
2397 _ml_auth_ptr_unchecked(ptr, db, modifier);
2398 break;
2399 }
2400
2401 return ptr;
2402}
2403#endif /* defined(HAS_APPLE_PAC) */
2404
2405#ifdef CONFIG_XNUPOST
2406void
2407ml_expect_fault_begin(expected_fault_handler_t expected_fault_handler, uintptr_t expected_fault_addr)
2408{
2409 thread_t thread = current_thread();
2410 thread->machine.expected_fault_handler = expected_fault_handler;
2411 thread->machine.expected_fault_addr = expected_fault_addr;
2412}
2413
2414void
2415ml_expect_fault_end(void)
2416{
2417 thread_t thread = current_thread();
2418 thread->machine.expected_fault_handler = NULL;
2419 thread->machine.expected_fault_addr = 0;
2420}
2421#endif /* CONFIG_XNUPOST */
2422
2423void
2424ml_hibernate_active_pre(void)
2425{
2426#if HIBERNATION
2427 if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) {
2428 /* validate rorgn hmac */
2429 ppl_hmac_compute_rorgn_hmac();
2430
2431 hibernate_rebuild_vm_structs();
2432 }
2433#endif /* HIBERNATION */
2434}
2435
2436void
2437ml_hibernate_active_post(void)
2438{
2439#if HIBERNATION
2440 if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) {
2441 hibernate_machine_init();
2442 hibernate_vm_lock_end();
2443 current_cpu_datap()->cpu_hibernate = 0;
2444 }
2445#endif /* HIBERNATION */
2446}
2447
2448/**
2449 * Return back a machine-dependent array of address space regions that should be
2450 * reserved by the VM (pre-mapped in the address space). This will prevent user
2451 * processes from allocating or deallocating from within these regions.
2452 *
2453 * @param vm_is64bit True if the process has a 64-bit address space.
2454 * @param regions An out parameter representing an array of regions to reserve.
2455 *
2456 * @return The number of reserved regions returned through `regions`.
2457 */
2458size_t
2459ml_get_vm_reserved_regions(bool vm_is64bit, struct vm_reserved_region **regions)
2460{
2461 assert(regions != NULL);
2462
2463 /**
2464 * Reserved regions only apply to 64-bit address spaces. This is because
2465 * we only expect to grow the maximum user VA address on 64-bit address spaces
2466 * (we've essentially already reached the max for 32-bit spaces). The reserved
2467 * regions should safely fall outside of the max user VA for 32-bit processes.
2468 */
2469 if (vm_is64bit) {
2470 *regions = vm_reserved_regions;
2471 return ARRAY_COUNT(vm_reserved_regions);
2472 } else {
2473 /* Don't reserve any VA regions on arm64_32 processes. */
2474 *regions = NULL;
2475 return 0;
2476 }
2477}
2478/* These WFE recommendations are expected to be updated on a relatively
2479 * infrequent cadence, possibly from a different cluster, hence
2480 * false cacheline sharing isn't expected to be material
2481 */
2482static uint64_t arm64_cluster_wfe_recs[MAX_CPU_CLUSTERS];
2483
2484uint32_t
2485ml_update_cluster_wfe_recommendation(uint32_t wfe_cluster_id, uint64_t wfe_timeout_abstime_interval, __unused uint64_t wfe_hint_flags)
2486{
2487 assert(wfe_cluster_id < MAX_CPU_CLUSTERS);
2488 assert(wfe_timeout_abstime_interval <= ml_wfe_hint_max_interval);
2489 os_atomic_store(&arm64_cluster_wfe_recs[wfe_cluster_id], wfe_timeout_abstime_interval, relaxed);
2490 return 0; /* Success */
2491}
2492
2493uint64_t
2494ml_cluster_wfe_timeout(uint32_t wfe_cluster_id)
2495{
2496 /* This and its consumer does not synchronize vis-a-vis updates
2497 * of the recommendation; races are acceptable.
2498 */
2499 uint64_t wfet = os_atomic_load(&arm64_cluster_wfe_recs[wfe_cluster_id], relaxed);
2500 return wfet;
2501}