2 * Copyright (c) 2007-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <arm64/proc_reg.h>
30 #include <arm/machine_cpu.h>
31 #include <arm/cpu_internal.h>
32 #include <arm/cpuid.h>
33 #include <arm/io_map_entries.h>
34 #include <arm/cpu_data.h>
35 #include <arm/cpu_data_internal.h>
36 #include <arm/caches_internal.h>
37 #include <arm/misc_protos.h>
38 #include <arm/machdep_call.h>
39 #include <arm/machine_routines.h>
40 #include <arm/rtclock.h>
41 #include <arm/cpuid_internal.h>
42 #include <arm/cpu_capabilities.h>
43 #include <console/serial_protos.h>
44 #include <kern/machine.h>
45 #include <prng/random.h>
46 #include <kern/startup.h>
47 #include <kern/thread.h>
48 #include <kern/timer_queue.h>
49 #include <mach/machine.h>
50 #include <machine/atomic.h>
52 #include <vm/vm_page.h>
53 #include <sys/kdebug.h>
54 #include <kern/coalition.h>
55 #include <pexpert/device_tree.h>
57 #include <IOKit/IOPlatformExpert.h>
59 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
60 #include <libkern/kernel_mach_header.h>
63 #include <libkern/section_keywords.h>
70 static uint8_t cluster_initialized
= 0;
74 static int max_cpus_initialized
= 0;
75 #define MAX_CPUS_SET 0x1
76 #define MAX_CPUS_WAIT 0x2
79 uint32_t LockTimeOutUsec
;
80 uint64_t TLockTimeOut
;
82 boolean_t is_clock_configured
= FALSE
;
84 uint32_t yield_delay_us
= 0; /* Must be less than cpu_idle_latency to ensure ml_delay_should_spin is true */
86 #if CONFIG_NONFATAL_ASSERTS
87 extern int mach_assert
;
89 extern volatile uint32_t debug_enabled
;
91 extern vm_offset_t segLOWEST
;
92 extern vm_offset_t segLOWESTTEXT
;
93 extern vm_offset_t segLASTB
;
94 extern unsigned long segSizeLAST
;
97 unsigned int gFastIPI
= 1;
98 #define kDeferredIPITimerDefault (64 * NSEC_PER_USEC) /* in nanoseconds */
99 static uint64_t deferred_ipi_timer_ns
= kDeferredIPITimerDefault
;
100 #endif /* defined(HAS_IPI) */
102 void machine_conf(void);
104 thread_t
Idle_context(void);
106 SECURITY_READ_ONLY_LATE(static uint32_t) cpu_phys_ids
[MAX_CPUS
] = {[0 ... MAX_CPUS
- 1] = (uint32_t)-1};
107 SECURITY_READ_ONLY_LATE(static unsigned int) avail_cpus
= 0;
108 SECURITY_READ_ONLY_LATE(static int) boot_cpu
= -1;
109 SECURITY_READ_ONLY_LATE(static int) max_cpu_number
= 0;
110 SECURITY_READ_ONLY_LATE(cluster_type_t
) boot_cluster
= CLUSTER_TYPE_SMP
;
112 SECURITY_READ_ONLY_LATE(static uint32_t) fiq_eventi
= UINT32_MAX
;
114 lockdown_handler_t lockdown_handler
;
116 lck_mtx_t lockdown_handler_lck
;
117 lck_grp_t
*lockdown_handler_grp
;
120 void ml_lockdown_init(void);
121 void ml_lockdown_run_handler(void);
122 uint32_t get_arm_cpu_version(void);
126 ml_cpu_signal_type(unsigned int cpu_mpidr
, uint32_t type
)
129 uint64_t local_mpidr
;
130 /* NOTE: this logic expects that we are called in a non-preemptible
131 * context, or at least one in which the calling thread is bound
132 * to a single CPU. Otherwise we may migrate between choosing which
133 * IPI mechanism to use and issuing the IPI. */
134 MRS(local_mpidr
, "MPIDR_EL1");
135 if ((local_mpidr
& MPIDR_AFF1_MASK
) == (cpu_mpidr
& MPIDR_AFF1_MASK
)) {
136 uint64_t x
= type
| (cpu_mpidr
& MPIDR_AFF0_MASK
);
137 MSR(ARM64_REG_IPI_RR_LOCAL
, x
);
139 #define IPI_RR_TARGET_CLUSTER_SHIFT 16
140 uint64_t x
= type
| ((cpu_mpidr
& MPIDR_AFF1_MASK
) << (IPI_RR_TARGET_CLUSTER_SHIFT
- MPIDR_AFF1_SHIFT
)) | (cpu_mpidr
& MPIDR_AFF0_MASK
);
141 MSR(ARM64_REG_IPI_RR_GLOBAL
, x
);
144 uint64_t x
= type
| (cpu_mpidr
& MPIDR_AFF0_MASK
);
145 MSR(ARM64_REG_IPI_RR
, x
);
150 #if !defined(HAS_IPI)
154 ml_cpu_signal(unsigned int cpu_mpidr __unused
)
157 ml_cpu_signal_type(cpu_mpidr
, ARM64_REG_IPI_RR_TYPE_IMMEDIATE
);
159 panic("Platform does not support ACC Fast IPI");
163 #if !defined(HAS_IPI)
167 ml_cpu_signal_deferred_adjust_timer(uint64_t nanosecs
)
170 /* adjust IPI_CR timer countdown value for deferred IPI
171 * accepts input in nanosecs, convert to absolutetime (REFCLK ticks),
172 * clamp maximum REFCLK ticks to 0xFFFF (16 bit field)
174 * global register, should only require a single write to update all
175 * CPU cores: from Skye ACC user spec section 5.7.3.3
177 * IPICR is a global register but there are two copies in ACC: one at pBLK and one at eBLK.
178 * IPICR write SPR token also traverses both pCPM and eCPM rings and updates both copies.
182 nanoseconds_to_absolutetime(nanosecs
, &abstime
);
184 abstime
= MIN(abstime
, 0xFFFF);
186 /* update deferred_ipi_timer_ns with the new clamped value */
187 absolutetime_to_nanoseconds(abstime
, &deferred_ipi_timer_ns
);
189 MSR(ARM64_REG_IPI_CR
, abstime
);
192 panic("Platform does not support ACC Fast IPI");
197 ml_cpu_signal_deferred_get_timer()
200 return deferred_ipi_timer_ns
;
206 #if !defined(HAS_IPI)
210 ml_cpu_signal_deferred(unsigned int cpu_mpidr __unused
)
213 ml_cpu_signal_type(cpu_mpidr
, ARM64_REG_IPI_RR_TYPE_DEFERRED
);
215 panic("Platform does not support ACC Fast IPI deferral");
219 #if !defined(HAS_IPI)
223 ml_cpu_signal_retract(unsigned int cpu_mpidr __unused
)
226 ml_cpu_signal_type(cpu_mpidr
, ARM64_REG_IPI_RR_TYPE_RETRACT
);
228 panic("Platform does not support ACC Fast IPI retraction");
235 __builtin_arm_wsr("DAIFSet", (DAIFSC_IRQF
| DAIFSC_FIQF
));
237 __builtin_arm_wsr("DAIFClr", (DAIFSC_IRQF
| DAIFSC_FIQF
));
247 get_vfp_enabled(void)
253 OSSynchronizeIO(void)
255 __builtin_arm_dsb(DSB_SY
);
259 get_aux_control(void)
263 MRS(value
, "ACTLR_EL1");
268 get_mmu_control(void)
272 MRS(value
, "SCTLR_EL1");
281 MRS(value
, "TCR_EL1");
286 ml_get_interrupts_enabled(void)
291 if (value
& DAIF_IRQF
) {
302 MRS(value
, "TTBR0_EL1");
307 get_arm_cpu_version(void)
309 uint32_t value
= machine_read_midr();
311 /* Compose the register values into 8 bits; variant[7:4], revision[3:0]. */
312 return ((value
& MIDR_EL1_REV_MASK
) >> MIDR_EL1_REV_SHIFT
) | ((value
& MIDR_EL1_VAR_MASK
) >> (MIDR_EL1_VAR_SHIFT
- 4));
316 * user_cont_hwclock_allowed()
318 * Indicates whether we allow EL0 to read the physical timebase (CNTPCT_EL0)
319 * as a continuous time source (e.g. from mach_continuous_time)
322 user_cont_hwclock_allowed(void)
324 #if HAS_CONTINUOUS_HWCLOCK
333 user_timebase_type(void)
335 return USER_TIMEBASE_SPEC
;
339 arm64_wfe_allowed(void)
344 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
346 uint64_t rorgn_begin
__attribute__((section("__DATA, __const"))) = 0;
347 uint64_t rorgn_end
__attribute__((section("__DATA, __const"))) = 0;
348 vm_offset_t amcc_base
;
350 static void assert_unlocked(void);
351 static void assert_amcc_cache_disabled(void);
352 static void lock_amcc(void);
353 static void lock_mmu(uint64_t begin
, uint64_t end
);
356 rorgn_stash_range(void)
358 #if DEVELOPMENT || DEBUG
359 boolean_t rorgn_disable
= FALSE
;
361 PE_parse_boot_argn("-unsafe_kernel_text", &rorgn_disable
, sizeof(rorgn_disable
));
364 /* take early out if boot arg present, don't query any machine registers to avoid
365 * dependency on amcc DT entry
371 /* Get the AMC values, and stash them into rorgn_begin, rorgn_end.
372 * gPhysBase is the base of DRAM managed by xnu. we need DRAM_BASE as
373 * the AMCC RO region begin/end registers are in units of 16KB page
374 * numbers from DRAM_BASE so we'll truncate gPhysBase at 512MB granule
375 * and assert the value is the canonical DRAM_BASE PA of 0x8_0000_0000 for arm64.
378 uint64_t dram_base
= gPhysBase
& ~0x1FFFFFFFULL
; /* 512MB */
379 assert(dram_base
== 0x800000000ULL
);
381 #if defined(KERNEL_INTEGRITY_KTRR)
382 uint64_t soc_base
= 0;
383 DTEntry entryP
= NULL
;
384 uintptr_t *reg_prop
= NULL
;
385 uint32_t prop_size
= 0;
388 soc_base
= pe_arm_get_soc_base_phys();
389 rc
= DTFindEntry("name", "mcc", &entryP
);
390 assert(rc
== kSuccess
);
391 rc
= DTGetProperty(entryP
, "reg", (void **)®_prop
, &prop_size
);
392 assert(rc
== kSuccess
);
393 amcc_base
= ml_io_map(soc_base
+ *reg_prop
, *(reg_prop
+ 1));
394 #elif defined(KERNEL_INTEGRITY_CTRR)
395 /* TODO: t8020 mcc entry not in device tree yet; we'll do it LIVE */
396 #define TEMP_AMCC_BASE_PA 0x200000000ULL
397 #define TEMP_AMCC_SZ 0x100000
398 amcc_base
= ml_io_map(TEMP_AMCC_BASE_PA
, TEMP_AMCC_SZ
);
400 #error "KERNEL_INTEGRITY config error"
403 #if defined(KERNEL_INTEGRITY_KTRR)
404 assert(rRORGNENDADDR
> rRORGNBASEADDR
);
405 rorgn_begin
= (rRORGNBASEADDR
<< AMCC_PGSHIFT
) + dram_base
;
406 rorgn_end
= (rRORGNENDADDR
<< AMCC_PGSHIFT
) + dram_base
;
407 #elif defined(KERNEL_INTEGRITY_CTRR)
408 rorgn_begin
= rCTRR_AMCC_PLANE_REG(0, CTRR_A_BASEADDR
);
409 rorgn_end
= rCTRR_AMCC_PLANE_REG(0, CTRR_A_ENDADDR
);
410 assert(rorgn_end
> rorgn_begin
);
412 for (int i
= 0; i
< CTRR_AMCC_MAX_PLANES
; ++i
) {
413 uint32_t begin
= rCTRR_AMCC_PLANE_REG(i
, CTRR_A_BASEADDR
);
414 uint32_t end
= rCTRR_AMCC_PLANE_REG(i
, CTRR_A_ENDADDR
);
415 if (!(begin
== rorgn_begin
&& end
== rorgn_end
)) {
416 #if DEVELOPMENT || DEBUG
417 panic("iboot programmed CTRR bounds are inconsistent");
419 panic("Inconsistent memory configuration");
424 // convert from page number from DRAM base to PA
425 rorgn_begin
= (rorgn_begin
<< AMCC_PGSHIFT
) + dram_base
;
426 rorgn_end
= (rorgn_end
<< AMCC_PGSHIFT
) + dram_base
;
429 #error KERNEL_INTEGRITY config error
430 #endif /* defined (KERNEL_INTEGRITY_KTRR) */
436 uint64_t ktrr_lock
= 0;
437 uint32_t rorgn_lock
= 0;
440 #if defined(KERNEL_INTEGRITY_KTRR)
441 rorgn_lock
= rRORGNLOCK
;
442 ktrr_lock
= __builtin_arm_rsr64(ARM64_REG_KTRR_LOCK_EL1
);
443 #elif defined(KERNEL_INTEGRITY_CTRR)
444 for (int i
= 0; i
< CTRR_AMCC_MAX_PLANES
; ++i
) {
445 rorgn_lock
|= rCTRR_AMCC_PLANE_REG(i
, CTRR_A_LOCK
);
447 ktrr_lock
= __builtin_arm_rsr64(ARM64_REG_CTRR_LOCK_EL1
);
449 #error KERNEL_INTEGRITY config error
450 #endif /* defined(KERNEL_INTEGRITY_KTRR) */
459 #if defined(KERNEL_INTEGRITY_KTRR)
461 __builtin_arm_isb(ISB_SY
);
462 #elif defined(KERNEL_INTEGRITY_CTRR)
463 /* lockdown planes in reverse order as plane 0 should be locked last */
464 for (int i
= 0; i
< CTRR_AMCC_MAX_PLANES
; ++i
) {
465 rCTRR_AMCC_PLANE_REG(CTRR_AMCC_MAX_PLANES
- i
- 1, CTRR_A_ENABLE
) = 1;
466 rCTRR_AMCC_PLANE_REG(CTRR_AMCC_MAX_PLANES
- i
- 1, CTRR_A_LOCK
) = 1;
467 __builtin_arm_isb(ISB_SY
);
470 #error KERNEL_INTEGRITY config error
475 lock_mmu(uint64_t begin
, uint64_t end
)
477 #if defined(KERNEL_INTEGRITY_KTRR)
479 __builtin_arm_wsr64(ARM64_REG_KTRR_LOWER_EL1
, begin
);
480 __builtin_arm_wsr64(ARM64_REG_KTRR_UPPER_EL1
, end
);
481 __builtin_arm_wsr64(ARM64_REG_KTRR_LOCK_EL1
, 1ULL);
485 __builtin_arm_isb(ISB_SY
);
488 #elif defined (KERNEL_INTEGRITY_CTRR)
489 /* this will lock the entire bootstrap cluster. non bootstrap clusters
490 * will be locked by respective cluster master in start.s */
492 __builtin_arm_wsr64(ARM64_REG_CTRR_A_LWR_EL1
, begin
);
493 __builtin_arm_wsr64(ARM64_REG_CTRR_A_UPR_EL1
, end
);
495 #if !defined(APPLEVORTEX)
496 /* H12 changed sequence, must invalidate TLB immediately after setting CTRR bounds */
497 __builtin_arm_isb(ISB_SY
); /* ensure all prior MSRs are complete */
499 #endif /* !defined(APPLEVORTEX) */
501 __builtin_arm_wsr64(ARM64_REG_CTRR_CTL_EL1
, CTRR_CTL_EL1_A_PXN
| CTRR_CTL_EL1_A_MMUON_WRPROTECT
);
502 __builtin_arm_wsr64(ARM64_REG_CTRR_LOCK_EL1
, 1ULL);
504 uint64_t current_el
= __builtin_arm_rsr64("CurrentEL");
505 if (current_el
== PSR64_MODE_EL2
) {
506 // CTRR v2 has explicit registers for cluster config. they can only be written in EL2
508 __builtin_arm_wsr64(ACC_CTRR_A_LWR_EL2
, begin
);
509 __builtin_arm_wsr64(ACC_CTRR_A_UPR_EL2
, end
);
510 __builtin_arm_wsr64(ACC_CTRR_CTL_EL2
, CTRR_CTL_EL1_A_PXN
| CTRR_CTL_EL1_A_MMUON_WRPROTECT
);
511 __builtin_arm_wsr64(ACC_CTRR_LOCK_EL2
, 1ULL);
514 __builtin_arm_isb(ISB_SY
); /* ensure all prior MSRs are complete */
515 #if defined(APPLEVORTEX)
517 #endif /* defined(APPLEVORTEX) */
519 #else /* defined(KERNEL_INTEGRITY_KTRR) */
520 #error KERNEL_INTEGRITY config error
521 #endif /* defined(KERNEL_INTEGRITY_KTRR) */
525 assert_amcc_cache_disabled()
527 #if defined(KERNEL_INTEGRITY_KTRR)
528 assert((rMCCGEN
& 1) == 0); /* assert M$ disabled or LLC clean will be unreliable */
529 #elif defined(KERNEL_INTEGRITY_CTRR) && (defined(ARM64_BOARD_CONFIG_T8006))
531 * T8006 differentiates between data and tag ways being powered up, so
532 * make sure to check that both are zero on its single memory plane.
534 assert((rCTRR_AMCC_PLANE_REG(0, CTRR_AMCC_PWRONWAYCNTSTATUS
) &
535 (AMCC_CURTAGWAYCNT_MASK
| AMCC_CURDATWAYCNT_MASK
)) == 0);
536 #elif defined (KERNEL_INTEGRITY_CTRR)
537 for (int i
= 0; i
< CTRR_AMCC_MAX_PLANES
; ++i
) {
538 assert(rCTRR_AMCC_PLANE_REG(i
, CTRR_AMCC_WAYONCNT
) == 0);
541 #error KERNEL_INTEGRITY config error
546 * void rorgn_lockdown(void)
548 * Lock the MMU and AMCC RORegion within lower and upper boundaries if not already locked
550 * [ ] - ensure this is being called ASAP on secondary CPUs: KTRR programming and lockdown handled in
551 * start.s:start_cpu() for subsequent wake/resume of all cores
556 vm_offset_t ktrr_begin
, ktrr_end
;
557 unsigned long last_segsz
;
559 #if DEVELOPMENT || DEBUG
560 boolean_t ktrr_disable
= FALSE
;
562 PE_parse_boot_argn("-unsafe_kernel_text", &ktrr_disable
, sizeof(ktrr_disable
));
566 * take early out if boot arg present, since we may not have amcc DT entry present
567 * we can't assert that iboot hasn't programmed the RO region lockdown registers
571 #endif /* DEVELOPMENT || DEBUG */
575 /* [x] - Use final method of determining all kernel text range or expect crashes */
576 ktrr_begin
= segLOWEST
;
577 assert(ktrr_begin
&& gVirtBase
&& gPhysBase
);
579 ktrr_begin
= kvtophys(ktrr_begin
);
581 ktrr_end
= kvtophys(segLASTB
);
582 last_segsz
= segSizeLAST
;
583 #if defined(KERNEL_INTEGRITY_KTRR)
584 /* __LAST is not part of the MMU KTRR region (it is however part of the AMCC KTRR region) */
585 ktrr_end
= (ktrr_end
- 1) & ~AMCC_PGMASK
;
586 /* ensure that iboot and xnu agree on the ktrr range */
587 assert(rorgn_begin
== ktrr_begin
&& rorgn_end
== (ktrr_end
+ last_segsz
));
588 /* assert that __LAST segment containing privileged insns is only a single page */
589 assert(last_segsz
== PAGE_SIZE
);
590 #elif defined(KERNEL_INTEGRITY_CTRR)
591 ktrr_end
= (ktrr_end
+ last_segsz
- 1) & ~AMCC_PGMASK
;
592 /* __LAST is part of MMU CTRR region. Can't use the KTRR style method of making
593 * __pinst no execute because PXN applies with MMU off in CTRR. */
594 assert(rorgn_begin
== ktrr_begin
&& rorgn_end
== ktrr_end
);
598 #if DEBUG || DEVELOPMENT
599 printf("KTRR Begin: %p End: %p, setting lockdown\n", (void *)ktrr_begin
, (void *)ktrr_end
);
602 /* [x] - ensure all in flight writes are flushed to AMCC before enabling RO Region Lock */
604 assert_amcc_cache_disabled();
606 CleanPoC_DcacheRegion_Force(phystokv(ktrr_begin
),
607 (unsigned)((ktrr_end
+ last_segsz
) - ktrr_begin
+ AMCC_PGMASK
));
611 lock_mmu(ktrr_begin
, ktrr_end
);
613 #if DEVELOPMENT || DEBUG
617 #if defined(KERNEL_INTEGRITY_CTRR)
619 /* wake any threads blocked on cluster master lockdown */
621 uint64_t mpidr_el1_value
;
624 MRS(mpidr_el1_value
, "MPIDR_EL1");
625 cdp
->cpu_cluster_id
= (mpidr_el1_value
& MPIDR_AFF1_MASK
) >> MPIDR_AFF1_SHIFT
;
626 assert(cdp
->cpu_cluster_id
< __ARM_CLUSTER_COUNT__
);
627 ctrr_cluster_locked
[cdp
->cpu_cluster_id
] = 1;
628 thread_wakeup(&ctrr_cluster_locked
[cdp
->cpu_cluster_id
]);
631 /* now we can run lockdown handler */
632 ml_lockdown_run_handler();
635 #endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
638 machine_startup(__unused boot_args
* args
)
642 #if defined(HAS_IPI) && (DEVELOPMENT || DEBUG)
643 if (!PE_parse_boot_argn("fastipi", &gFastIPI
, sizeof(gFastIPI
))) {
647 PE_parse_boot_argn("fastipitimeout", &deferred_ipi_timer_ns
, sizeof(deferred_ipi_timer_ns
));
648 #endif /* defined(HAS_IPI) && (DEVELOPMENT || DEBUG)*/
650 #if CONFIG_NONFATAL_ASSERTS
651 PE_parse_boot_argn("assert", &mach_assert
, sizeof(mach_assert
));
654 if (PE_parse_boot_argn("preempt", &boot_arg
, sizeof(boot_arg
))) {
655 default_preemption_rate
= boot_arg
;
657 if (PE_parse_boot_argn("bg_preempt", &boot_arg
, sizeof(boot_arg
))) {
658 default_bg_preemption_rate
= boot_arg
;
661 PE_parse_boot_argn("yield_delay_us", &yield_delay_us
, sizeof(yield_delay_us
));
666 * Kick off the kernel bootstrap.
673 machine_lockdown_preflight(void)
675 #if CONFIG_KERNEL_INTEGRITY
677 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
685 machine_lockdown(void)
687 #if CONFIG_KERNEL_INTEGRITY
688 #if KERNEL_INTEGRITY_WT
691 * Notify the monitor about the completion of early kernel bootstrap.
692 * From this point forward it will enforce the integrity of kernel text,
693 * rodata and page tables.
697 monitor_call(MONITOR_LOCKDOWN
, 0, 0, 0);
699 #endif /* KERNEL_INTEGRITY_WT */
705 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
708 * Lock physical KTRR region. KTRR region is read-only. Memory outside
709 * the region is not executable at EL1.
713 #endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
716 #endif /* CONFIG_KERNEL_INTEGRITY */
722 __unused vm_size_t size
)
724 return PE_boot_args();
731 * This is known to be inaccurate. mem_size should always be capped at 2 GB
733 machine_info
.memory_size
= (uint32_t)mem_size
;
741 is_clock_configured
= TRUE
;
748 slave_machine_init(__unused
void *param
)
750 cpu_machine_init(); /* Initialize the processor */
751 clock_init(); /* Init the clock */
755 * Routine: machine_processor_shutdown
759 machine_processor_shutdown(
760 __unused thread_t thread
,
761 void (*doshutdown
)(processor_t
),
762 processor_t processor
)
764 return Shutdown_context(doshutdown
, processor
);
768 * Routine: ml_init_max_cpus
772 ml_init_max_cpus(unsigned int max_cpus
)
774 boolean_t current_state
;
776 current_state
= ml_set_interrupts_enabled(FALSE
);
777 if (max_cpus_initialized
!= MAX_CPUS_SET
) {
778 machine_info
.max_cpus
= max_cpus
;
779 machine_info
.physical_cpu_max
= max_cpus
;
780 machine_info
.logical_cpu_max
= max_cpus
;
781 if (max_cpus_initialized
== MAX_CPUS_WAIT
) {
782 thread_wakeup((event_t
) &max_cpus_initialized
);
784 max_cpus_initialized
= MAX_CPUS_SET
;
786 (void) ml_set_interrupts_enabled(current_state
);
790 * Routine: ml_get_max_cpus
794 ml_get_max_cpus(void)
796 boolean_t current_state
;
798 current_state
= ml_set_interrupts_enabled(FALSE
);
799 if (max_cpus_initialized
!= MAX_CPUS_SET
) {
800 max_cpus_initialized
= MAX_CPUS_WAIT
;
801 assert_wait((event_t
) &max_cpus_initialized
, THREAD_UNINT
);
802 (void) thread_block(THREAD_CONTINUE_NULL
);
804 (void) ml_set_interrupts_enabled(current_state
);
805 return machine_info
.max_cpus
;
809 * Routine: ml_init_lock_timeout
813 ml_init_lock_timeout(void)
817 uint64_t default_timeout_ns
= NSEC_PER_SEC
>> 2;
820 if (PE_parse_boot_argn("slto_us", &slto
, sizeof(slto
))) {
821 default_timeout_ns
= slto
* NSEC_PER_USEC
;
824 nanoseconds_to_absolutetime(default_timeout_ns
, &abstime
);
825 LockTimeOutUsec
= (uint32_t) (default_timeout_ns
/ NSEC_PER_USEC
);
826 LockTimeOut
= (uint32_t)abstime
;
828 if (PE_parse_boot_argn("tlto_us", &slto
, sizeof(slto
))) {
829 nanoseconds_to_absolutetime(slto
* NSEC_PER_USEC
, &abstime
);
830 TLockTimeOut
= abstime
;
832 TLockTimeOut
= LockTimeOut
>> 1;
835 if (PE_parse_boot_argn("mtxspin", &mtxspin
, sizeof(mtxspin
))) {
836 if (mtxspin
> USEC_PER_SEC
>> 4) {
837 mtxspin
= USEC_PER_SEC
>> 4;
839 nanoseconds_to_absolutetime(mtxspin
* NSEC_PER_USEC
, &abstime
);
841 nanoseconds_to_absolutetime(10 * NSEC_PER_USEC
, &abstime
);
847 * This is called from the machine-independent routine cpu_up()
848 * to perform machine-dependent info updates.
853 os_atomic_inc(&machine_info
.physical_cpu
, relaxed
);
854 os_atomic_inc(&machine_info
.logical_cpu
, relaxed
);
858 * This is called from the machine-independent routine cpu_down()
859 * to perform machine-dependent info updates.
864 cpu_data_t
*cpu_data_ptr
;
866 os_atomic_dec(&machine_info
.physical_cpu
, relaxed
);
867 os_atomic_dec(&machine_info
.logical_cpu
, relaxed
);
870 * If we want to deal with outstanding IPIs, we need to
871 * do relatively early in the processor_doshutdown path,
872 * as we pend decrementer interrupts using the IPI
873 * mechanism if we cannot immediately service them (if
874 * IRQ is masked). Do so now.
876 * We aren't on the interrupt stack here; would it make
877 * more sense to disable signaling and then enable
878 * interrupts? It might be a bit cleaner.
880 cpu_data_ptr
= getCpuDatap();
881 cpu_data_ptr
->cpu_running
= FALSE
;
883 if (cpu_data_ptr
!= &BootCpuData
) {
885 * Move all of this cpu's timers to the master/boot cpu,
886 * and poke it in case there's a sooner deadline for it to schedule.
888 timer_queue_shutdown(&cpu_data_ptr
->rtclock_timer
.queue
);
889 cpu_xcall(BootCpuData
.cpu_number
, &timer_queue_expire_local
, NULL
);
892 cpu_signal_handler_internal(TRUE
);
896 * Routine: ml_cpu_get_info
900 ml_cpu_get_info(ml_cpu_info_t
* ml_cpu_info
)
902 cache_info_t
*cpuid_cache_info
;
904 cpuid_cache_info
= cache_info();
905 ml_cpu_info
->vector_unit
= 0;
906 ml_cpu_info
->cache_line_size
= cpuid_cache_info
->c_linesz
;
907 ml_cpu_info
->l1_icache_size
= cpuid_cache_info
->c_isize
;
908 ml_cpu_info
->l1_dcache_size
= cpuid_cache_info
->c_dsize
;
910 #if (__ARM_ARCH__ >= 7)
911 ml_cpu_info
->l2_settings
= 1;
912 ml_cpu_info
->l2_cache_size
= cpuid_cache_info
->c_l2size
;
914 ml_cpu_info
->l2_settings
= 0;
915 ml_cpu_info
->l2_cache_size
= 0xFFFFFFFF;
917 ml_cpu_info
->l3_settings
= 0;
918 ml_cpu_info
->l3_cache_size
= 0xFFFFFFFF;
922 ml_get_machine_mem(void)
924 return machine_info
.memory_size
;
927 __attribute__((noreturn
))
929 halt_all_cpus(boolean_t reboot
)
932 printf("MACH Reboot\n");
933 PEHaltRestart(kPERestartCPU
);
935 printf("CPU halted\n");
936 PEHaltRestart(kPEHaltCPU
);
943 __attribute__((noreturn
))
947 halt_all_cpus(FALSE
);
951 * Routine: machine_signal_idle
956 processor_t processor
)
958 cpu_signal(processor_to_cpu_datap(processor
), SIGPnop
, (void *)NULL
, (void *)NULL
);
959 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_REMOTE_AST
), processor
->cpu_id
, 0 /* nop */, 0, 0, 0);
963 machine_signal_idle_deferred(
964 processor_t processor
)
966 cpu_signal_deferred(processor_to_cpu_datap(processor
));
967 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_REMOTE_DEFERRED_AST
), processor
->cpu_id
, 0 /* nop */, 0, 0, 0);
971 machine_signal_idle_cancel(
972 processor_t processor
)
974 cpu_signal_cancel(processor_to_cpu_datap(processor
));
975 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_REMOTE_CANCEL_AST
), processor
->cpu_id
, 0 /* nop */, 0, 0, 0);
979 * Routine: ml_install_interrupt_handler
980 * Function: Initialize Interrupt Handler
983 ml_install_interrupt_handler(
987 IOInterruptHandler handler
,
990 cpu_data_t
*cpu_data_ptr
;
991 boolean_t current_state
;
993 current_state
= ml_set_interrupts_enabled(FALSE
);
994 cpu_data_ptr
= getCpuDatap();
996 cpu_data_ptr
->interrupt_nub
= nub
;
997 cpu_data_ptr
->interrupt_source
= source
;
998 cpu_data_ptr
->interrupt_target
= target
;
999 cpu_data_ptr
->interrupt_handler
= handler
;
1000 cpu_data_ptr
->interrupt_refCon
= refCon
;
1002 cpu_data_ptr
->interrupts_enabled
= TRUE
;
1003 (void) ml_set_interrupts_enabled(current_state
);
1005 initialize_screen(NULL
, kPEAcquireScreen
);
1009 * Routine: ml_init_interrupt
1010 * Function: Initialize Interrupts
1013 ml_init_interrupt(void)
1015 #if defined(HAS_IPI)
1017 * ml_init_interrupt will get called once for each CPU, but this is redundant
1018 * because there is only one global copy of the register for skye. do it only
1019 * on the bootstrap cpu
1021 if (getCpuDatap()->cluster_master
) {
1022 ml_cpu_signal_deferred_adjust_timer(deferred_ipi_timer_ns
);
1028 * Routine: ml_init_timebase
1029 * Function: register and setup Timebase, Decremeter services
1034 tbd_ops_t tbd_funcs
,
1035 vm_offset_t int_address
,
1036 vm_offset_t int_value __unused
)
1038 cpu_data_t
*cpu_data_ptr
;
1040 cpu_data_ptr
= (cpu_data_t
*)args
;
1042 if ((cpu_data_ptr
== &BootCpuData
)
1043 && (rtclock_timebase_func
.tbd_fiq_handler
== (void *)NULL
)) {
1044 rtclock_timebase_func
= *tbd_funcs
;
1045 rtclock_timebase_addr
= int_address
;
1050 ml_parse_cpu_topology(void)
1052 DTEntry entry
, child __unused
;
1053 OpaqueDTEntryIterator iter
;
1054 uint32_t cpu_boot_arg
;
1057 cpu_boot_arg
= MAX_CPUS
;
1059 PE_parse_boot_argn("cpus", &cpu_boot_arg
, sizeof(cpu_boot_arg
));
1061 err
= DTLookupEntry(NULL
, "/cpus", &entry
);
1062 assert(err
== kSuccess
);
1064 err
= DTInitEntryIterator(entry
, &iter
);
1065 assert(err
== kSuccess
);
1067 while (kSuccess
== DTIterateEntries(&iter
, &child
)) {
1068 unsigned int propSize
;
1070 int cpu_id
= avail_cpus
++;
1072 if (kSuccess
== DTGetProperty(child
, "cpu-id", &prop
, &propSize
)) {
1073 cpu_id
= *((int32_t*)prop
);
1076 assert(cpu_id
< MAX_CPUS
);
1077 assert(cpu_phys_ids
[cpu_id
] == (uint32_t)-1);
1079 if (boot_cpu
== -1) {
1080 if (kSuccess
!= DTGetProperty(child
, "state", &prop
, &propSize
)) {
1081 panic("unable to retrieve state for cpu %d", cpu_id
);
1084 if (strncmp((char*)prop
, "running", propSize
) == 0) {
1088 if (kSuccess
!= DTGetProperty(child
, "reg", &prop
, &propSize
)) {
1089 panic("unable to retrieve physical ID for cpu %d", cpu_id
);
1092 cpu_phys_ids
[cpu_id
] = *((uint32_t*)prop
);
1094 if ((cpu_id
> max_cpu_number
) && ((cpu_id
== boot_cpu
) || (avail_cpus
<= cpu_boot_arg
))) {
1095 max_cpu_number
= cpu_id
;
1099 if (avail_cpus
> cpu_boot_arg
) {
1100 avail_cpus
= cpu_boot_arg
;
1103 if (avail_cpus
== 0) {
1104 panic("No cpus found!");
1107 if (boot_cpu
== -1) {
1108 panic("unable to determine boot cpu!");
1112 * Set TPIDRRO_EL0 to indicate the correct cpu number, as we may
1113 * not be booting from cpu 0. Userspace will consume the current
1114 * CPU number through this register. For non-boot cores, this is
1115 * done in start.s (start_cpu) using the cpu_number field of the
1116 * per-cpu data object.
1118 assert(__builtin_arm_rsr64("TPIDRRO_EL0") == 0);
1119 __builtin_arm_wsr64("TPIDRRO_EL0", (uint64_t)boot_cpu
);
1123 ml_get_cpu_count(void)
1129 ml_get_boot_cpu_number(void)
1135 ml_get_boot_cluster(void)
1137 return boot_cluster
;
1141 ml_get_cpu_number(uint32_t phys_id
)
1143 for (int log_id
= 0; log_id
<= ml_get_max_cpu_number(); ++log_id
) {
1144 if (cpu_phys_ids
[log_id
] == phys_id
) {
1152 ml_get_max_cpu_number(void)
1154 return max_cpu_number
;
1161 lockdown_handler_grp
= lck_grp_alloc_init("lockdown_handler", NULL
);
1162 assert(lockdown_handler_grp
!= NULL
);
1164 lck_mtx_init(&lockdown_handler_lck
, lockdown_handler_grp
, NULL
);
1166 #if defined(KERNEL_INTEGRITY_CTRR)
1167 init_ctrr_cpu_start_lock();
1172 ml_lockdown_handler_register(lockdown_handler_t f
, void *this)
1174 if (lockdown_handler
|| !f
) {
1175 return KERN_FAILURE
;
1178 lck_mtx_lock(&lockdown_handler_lck
);
1179 lockdown_handler
= f
;
1180 lockdown_this
= this;
1182 #if !(defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR))
1184 lockdown_handler(this);
1186 if (lockdown_done
) {
1187 lockdown_handler(this);
1190 lck_mtx_unlock(&lockdown_handler_lck
);
1192 return KERN_SUCCESS
;
1196 ml_lockdown_run_handler()
1198 lck_mtx_lock(&lockdown_handler_lck
);
1199 assert(!lockdown_done
);
1202 if (lockdown_handler
) {
1203 lockdown_handler(lockdown_this
);
1205 lck_mtx_unlock(&lockdown_handler_lck
);
1209 ml_processor_register(ml_processor_info_t
*in_processor_info
,
1210 processor_t
*processor_out
, ipi_handler_t
*ipi_handler_out
,
1211 perfmon_interrupt_handler_func
*pmi_handler_out
)
1213 cpu_data_t
*this_cpu_datap
;
1214 processor_set_t pset
;
1215 boolean_t is_boot_cpu
;
1216 static unsigned int reg_cpu_count
= 0;
1218 if (in_processor_info
->log_id
> (uint32_t)ml_get_max_cpu_number()) {
1219 return KERN_FAILURE
;
1222 if ((unsigned int)OSIncrementAtomic((SInt32
*)®_cpu_count
) >= avail_cpus
) {
1223 return KERN_FAILURE
;
1226 if (in_processor_info
->log_id
!= (uint32_t)ml_get_boot_cpu_number()) {
1227 is_boot_cpu
= FALSE
;
1228 this_cpu_datap
= cpu_data_alloc(FALSE
);
1229 cpu_data_init(this_cpu_datap
);
1231 this_cpu_datap
= &BootCpuData
;
1235 assert(in_processor_info
->log_id
< MAX_CPUS
);
1237 this_cpu_datap
->cpu_id
= in_processor_info
->cpu_id
;
1239 this_cpu_datap
->cpu_console_buf
= console_cpu_alloc(is_boot_cpu
);
1240 if (this_cpu_datap
->cpu_console_buf
== (void *)(NULL
)) {
1241 goto processor_register_error
;
1245 this_cpu_datap
->cpu_number
= in_processor_info
->log_id
;
1247 if (cpu_data_register(this_cpu_datap
) != KERN_SUCCESS
) {
1248 goto processor_register_error
;
1252 this_cpu_datap
->cpu_idle_notify
= (void *) in_processor_info
->processor_idle
;
1253 this_cpu_datap
->cpu_cache_dispatch
= in_processor_info
->platform_cache_dispatch
;
1254 nanoseconds_to_absolutetime((uint64_t) in_processor_info
->powergate_latency
, &this_cpu_datap
->cpu_idle_latency
);
1255 this_cpu_datap
->cpu_reset_assist
= kvtophys(in_processor_info
->powergate_stub_addr
);
1257 this_cpu_datap
->idle_timer_notify
= (void *) in_processor_info
->idle_timer
;
1258 this_cpu_datap
->idle_timer_refcon
= in_processor_info
->idle_timer_refcon
;
1260 this_cpu_datap
->platform_error_handler
= (void *) in_processor_info
->platform_error_handler
;
1261 this_cpu_datap
->cpu_regmap_paddr
= in_processor_info
->regmap_paddr
;
1262 this_cpu_datap
->cpu_phys_id
= in_processor_info
->phys_id
;
1263 this_cpu_datap
->cpu_l2_access_penalty
= in_processor_info
->l2_access_penalty
;
1265 this_cpu_datap
->cpu_cluster_type
= in_processor_info
->cluster_type
;
1266 this_cpu_datap
->cpu_cluster_id
= in_processor_info
->cluster_id
;
1267 this_cpu_datap
->cpu_l2_id
= in_processor_info
->l2_cache_id
;
1268 this_cpu_datap
->cpu_l2_size
= in_processor_info
->l2_cache_size
;
1269 this_cpu_datap
->cpu_l3_id
= in_processor_info
->l3_cache_id
;
1270 this_cpu_datap
->cpu_l3_size
= in_processor_info
->l3_cache_size
;
1273 this_cpu_datap
->cluster_master
= !OSTestAndSet(this_cpu_datap
->cpu_cluster_id
, &cluster_initialized
);
1274 #else /* HAS_CLUSTER */
1275 this_cpu_datap
->cluster_master
= is_boot_cpu
;
1276 #endif /* HAS_CLUSTER */
1278 pset
= pset_find(in_processor_info
->cluster_id
, processor_pset(master_processor
));
1279 assert(pset
!= NULL
);
1280 kprintf("%s>cpu_id %p cluster_id %d cpu_number %d is type %d\n", __FUNCTION__
, in_processor_info
->cpu_id
, in_processor_info
->cluster_id
, this_cpu_datap
->cpu_number
, in_processor_info
->cluster_type
);
1283 processor_init((struct processor
*)this_cpu_datap
->cpu_processor
,
1284 this_cpu_datap
->cpu_number
, pset
);
1286 if (this_cpu_datap
->cpu_l2_access_penalty
) {
1288 * Cores that have a non-zero L2 access penalty compared
1289 * to the boot processor should be de-prioritized by the
1290 * scheduler, so that threads use the cores with better L2
1293 processor_set_primary(this_cpu_datap
->cpu_processor
,
1298 *processor_out
= this_cpu_datap
->cpu_processor
;
1299 *ipi_handler_out
= cpu_signal_handler
;
1300 #if CPMU_AIC_PMI && MONOTONIC
1301 *pmi_handler_out
= mt_cpmu_aic_pmi
;
1303 *pmi_handler_out
= NULL
;
1304 #endif /* CPMU_AIC_PMI && MONOTONIC */
1305 if (in_processor_info
->idle_tickle
!= (idle_tickle_t
*) NULL
) {
1306 *in_processor_info
->idle_tickle
= (idle_tickle_t
) cpu_idle_tickle
;
1310 if (kpc_register_cpu(this_cpu_datap
) != TRUE
) {
1311 goto processor_register_error
;
1316 random_cpu_init(this_cpu_datap
->cpu_number
);
1317 // now let next CPU register itself
1318 OSIncrementAtomic((SInt32
*)&real_ncpus
);
1321 return KERN_SUCCESS
;
1323 processor_register_error
:
1325 kpc_unregister_cpu(this_cpu_datap
);
1328 cpu_data_free(this_cpu_datap
);
1331 return KERN_FAILURE
;
1335 ml_init_arm_debug_interface(
1336 void * in_cpu_datap
,
1337 vm_offset_t virt_address
)
1339 ((cpu_data_t
*)in_cpu_datap
)->cpu_debug_interface_map
= virt_address
;
1344 * Routine: init_ast_check
1349 __unused processor_t processor
)
1354 * Routine: cause_ast_check
1359 processor_t processor
)
1361 if (current_processor() != processor
) {
1362 cpu_signal(processor_to_cpu_datap(processor
), SIGPast
, (void *)NULL
, (void *)NULL
);
1363 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_REMOTE_AST
), processor
->cpu_id
, 1 /* ast */, 0, 0, 0);
1367 extern uint32_t cpu_idle_count
;
1370 ml_get_power_state(boolean_t
*icp
, boolean_t
*pidlep
)
1372 *icp
= ml_at_interrupt_context();
1373 *pidlep
= (cpu_idle_count
== real_ncpus
);
1377 * Routine: ml_cause_interrupt
1378 * Function: Generate a fake interrupt
1381 ml_cause_interrupt(void)
1383 return; /* BS_XXX */
1386 /* Map memory map IO space */
1389 vm_offset_t phys_addr
,
1392 return io_map(phys_addr
, size
, VM_WIMG_IO
);
1395 /* Map memory map IO space (with protections specified) */
1397 ml_io_map_with_prot(
1398 vm_offset_t phys_addr
,
1402 return io_map_with_prot(phys_addr
, size
, VM_WIMG_IO
, prot
);
1407 vm_offset_t phys_addr
,
1410 return io_map(phys_addr
, size
, VM_WIMG_WCOMB
);
1413 /* boot memory allocation */
1416 __unused vm_size_t size
)
1418 return (vm_offset_t
) NULL
;
1423 vm_offset_t phys_addr
,
1426 return pmap_map_high_window_bd(phys_addr
, len
, VM_PROT_READ
| VM_PROT_WRITE
);
1433 return phystokv(paddr
);
1440 return phystokv(vaddr
+ vm_kernel_slide
- gVirtBase
+ gPhysBase
);
1447 return ml_static_vtop(vaddr
) - gPhysBase
+ gVirtBase
- vm_kernel_slide
;
1450 extern tt_entry_t
*arm_kva_to_tte(vm_offset_t va
);
1454 vm_offset_t vaddr
, /* kernel virtual address */
1458 pt_entry_t arm_prot
= 0;
1459 pt_entry_t arm_block_prot
= 0;
1460 vm_offset_t vaddr_cur
;
1462 kern_return_t result
= KERN_SUCCESS
;
1464 if (vaddr
< VM_MIN_KERNEL_ADDRESS
) {
1465 panic("ml_static_protect(): %p < %p", (void *) vaddr
, (void *) VM_MIN_KERNEL_ADDRESS
);
1466 return KERN_FAILURE
;
1469 assert((vaddr
& (PAGE_SIZE
- 1)) == 0); /* must be page aligned */
1471 if ((new_prot
& VM_PROT_WRITE
) && (new_prot
& VM_PROT_EXECUTE
)) {
1472 panic("ml_static_protect(): WX request on %p", (void *) vaddr
);
1475 /* Set up the protection bits, and block bits so we can validate block mappings. */
1476 if (new_prot
& VM_PROT_WRITE
) {
1477 arm_prot
|= ARM_PTE_AP(AP_RWNA
);
1478 arm_block_prot
|= ARM_TTE_BLOCK_AP(AP_RWNA
);
1480 arm_prot
|= ARM_PTE_AP(AP_RONA
);
1481 arm_block_prot
|= ARM_TTE_BLOCK_AP(AP_RONA
);
1484 arm_prot
|= ARM_PTE_NX
;
1485 arm_block_prot
|= ARM_TTE_BLOCK_NX
;
1487 if (!(new_prot
& VM_PROT_EXECUTE
)) {
1488 arm_prot
|= ARM_PTE_PNX
;
1489 arm_block_prot
|= ARM_TTE_BLOCK_PNX
;
1492 for (vaddr_cur
= vaddr
;
1493 vaddr_cur
< trunc_page_64(vaddr
+ size
);
1494 vaddr_cur
+= PAGE_SIZE
) {
1495 ppn
= pmap_find_phys(kernel_pmap
, vaddr_cur
);
1496 if (ppn
!= (vm_offset_t
) NULL
) {
1502 assert(!TEST_PAGE_RATIO_4
);
1503 assert(!pmap_is_monitor(ppn
));
1506 tte2
= arm_kva_to_tte(vaddr_cur
);
1508 if (((*tte2
) & ARM_TTE_TYPE_MASK
) != ARM_TTE_TYPE_TABLE
) {
1509 if ((((*tte2
) & ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
) &&
1510 ((*tte2
& (ARM_TTE_BLOCK_NXMASK
| ARM_TTE_BLOCK_PNXMASK
| ARM_TTE_BLOCK_APMASK
)) == arm_block_prot
)) {
1512 * We can support ml_static_protect on a block mapping if the mapping already has
1513 * the desired protections. We still want to run checks on a per-page basis.
1518 result
= KERN_FAILURE
;
1522 pte_p
= (pt_entry_t
*)&((tt_entry_t
*)(phystokv((*tte2
) & ARM_TTE_TABLE_MASK
)))[(((vaddr_cur
) & ARM_TT_L3_INDEX_MASK
) >> ARM_TT_L3_SHIFT
)];
1525 if ((ptmp
& ARM_PTE_HINT_MASK
) && ((ptmp
& (ARM_PTE_APMASK
| ARM_PTE_PNXMASK
| ARM_PTE_NXMASK
)) != arm_prot
)) {
1527 * The contiguous hint is similar to a block mapping for ml_static_protect; if the existing
1528 * protections do not match the desired protections, then we will fail (as we cannot update
1529 * this mapping without updating other mappings as well).
1531 result
= KERN_FAILURE
;
1535 __unreachable_ok_push
1536 if (TEST_PAGE_RATIO_4
) {
1539 pt_entry_t
*ptep_iter
;
1542 for (i
= 0; i
< 4; i
++, ptep_iter
++) {
1543 /* Note that there is a hole in the HINT sanity checking here. */
1546 /* We only need to update the page tables if the protections do not match. */
1547 if ((ptmp
& (ARM_PTE_APMASK
| ARM_PTE_PNXMASK
| ARM_PTE_NXMASK
)) != arm_prot
) {
1548 ptmp
= (ptmp
& ~(ARM_PTE_APMASK
| ARM_PTE_PNXMASK
| ARM_PTE_NXMASK
)) | arm_prot
;
1556 /* We only need to update the page tables if the protections do not match. */
1557 if ((ptmp
& (ARM_PTE_APMASK
| ARM_PTE_PNXMASK
| ARM_PTE_NXMASK
)) != arm_prot
) {
1558 ptmp
= (ptmp
& ~(ARM_PTE_APMASK
| ARM_PTE_PNXMASK
| ARM_PTE_NXMASK
)) | arm_prot
;
1562 __unreachable_ok_pop
1566 if (vaddr_cur
> vaddr
) {
1567 assert(((vaddr_cur
- vaddr
) & 0xFFFFFFFF00000000ULL
) == 0);
1568 flush_mmu_tlb_region(vaddr
, (uint32_t)(vaddr_cur
- vaddr
));
1576 * Routine: ml_static_mfree
1584 vm_offset_t vaddr_cur
;
1586 uint32_t freed_pages
= 0;
1588 /* It is acceptable (if bad) to fail to free. */
1589 if (vaddr
< VM_MIN_KERNEL_ADDRESS
) {
1593 assert((vaddr
& (PAGE_SIZE
- 1)) == 0); /* must be page aligned */
1595 for (vaddr_cur
= vaddr
;
1596 vaddr_cur
< trunc_page_64(vaddr
+ size
);
1597 vaddr_cur
+= PAGE_SIZE
) {
1598 ppn
= pmap_find_phys(kernel_pmap
, vaddr_cur
);
1599 if (ppn
!= (vm_offset_t
) NULL
) {
1601 * It is not acceptable to fail to update the protections on a page
1602 * we will release to the VM. We need to either panic or continue.
1603 * For now, we'll panic (to help flag if there is memory we can
1606 if (ml_static_protect(vaddr_cur
, PAGE_SIZE
, VM_PROT_WRITE
| VM_PROT_READ
) != KERN_SUCCESS
) {
1607 panic("Failed ml_static_mfree on %p", (void *) vaddr_cur
);
1612 * Must NOT tear down the "V==P" mapping for vaddr_cur as the zone alias scheme
1613 * relies on the persistence of these mappings for all time.
1615 // pmap_remove(kernel_pmap, (addr64_t) vaddr_cur, (addr64_t) (vaddr_cur + PAGE_SIZE));
1618 vm_page_create(ppn
, (ppn
+ 1));
1622 vm_page_lockspin_queues();
1623 vm_page_wire_count
-= freed_pages
;
1624 vm_page_wire_count_initial
-= freed_pages
;
1625 vm_page_unlock_queues();
1627 kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages
, (void *)vaddr
, (uint64_t)size
, ppn
);
1632 /* virtual to physical on wired pages */
1634 ml_vtophys(vm_offset_t vaddr
)
1636 return kvtophys(vaddr
);
1640 * Routine: ml_nofault_copy
1641 * Function: Perform a physical mode copy if the source and destination have
1642 * valid translations in the kernel pmap. If translations are present, they are
1643 * assumed to be wired; e.g., no attempt is made to guarantee that the
1644 * translations obtained remain valid for the duration of the copy process.
1647 ml_nofault_copy(vm_offset_t virtsrc
, vm_offset_t virtdst
, vm_size_t size
)
1649 addr64_t cur_phys_dst
, cur_phys_src
;
1650 vm_size_t count
, nbytes
= 0;
1653 if (!(cur_phys_src
= kvtophys(virtsrc
))) {
1656 if (!(cur_phys_dst
= kvtophys(virtdst
))) {
1659 if (!pmap_valid_address(trunc_page_64(cur_phys_dst
)) ||
1660 !pmap_valid_address(trunc_page_64(cur_phys_src
))) {
1663 count
= PAGE_SIZE
- (cur_phys_src
& PAGE_MASK
);
1664 if (count
> (PAGE_SIZE
- (cur_phys_dst
& PAGE_MASK
))) {
1665 count
= PAGE_SIZE
- (cur_phys_dst
& PAGE_MASK
);
1671 bcopy_phys(cur_phys_src
, cur_phys_dst
, count
);
1683 * Routine: ml_validate_nofault
1684 * Function: Validate that ths address range has a valid translations
1685 * in the kernel pmap. If translations are present, they are
1686 * assumed to be wired; i.e. no attempt is made to guarantee
1687 * that the translation persist after the check.
1688 * Returns: TRUE if the range is mapped and will not cause a fault,
1693 ml_validate_nofault(
1694 vm_offset_t virtsrc
, vm_size_t size
)
1696 addr64_t cur_phys_src
;
1700 if (!(cur_phys_src
= kvtophys(virtsrc
))) {
1703 if (!pmap_valid_address(trunc_page_64(cur_phys_src
))) {
1706 count
= (uint32_t)(PAGE_SIZE
- (cur_phys_src
& PAGE_MASK
));
1708 count
= (uint32_t)size
;
1719 ml_get_bouncepool_info(vm_offset_t
* phys_addr
, vm_size_t
* size
)
1726 active_rt_threads(__unused boolean_t active
)
1731 cpu_qos_cb_default(__unused
int urgency
, __unused
uint64_t qos_param1
, __unused
uint64_t qos_param2
)
1736 cpu_qos_update_t cpu_qos_update
= cpu_qos_cb_default
;
1739 cpu_qos_update_register(cpu_qos_update_t cpu_qos_cb
)
1741 if (cpu_qos_cb
!= NULL
) {
1742 cpu_qos_update
= cpu_qos_cb
;
1744 cpu_qos_update
= cpu_qos_cb_default
;
1749 thread_tell_urgency(thread_urgency_t urgency
, uint64_t rt_period
, uint64_t rt_deadline
, uint64_t sched_latency __unused
, __unused thread_t nthread
)
1751 SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_URGENCY
) | DBG_FUNC_START
, urgency
, rt_period
, rt_deadline
, sched_latency
, 0);
1753 cpu_qos_update((int)urgency
, rt_period
, rt_deadline
);
1755 SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
, MACH_URGENCY
) | DBG_FUNC_END
, urgency
, rt_period
, rt_deadline
, 0, 0);
1759 machine_run_count(__unused
uint32_t count
)
1764 machine_choose_processor(__unused processor_set_t pset
, processor_t processor
)
1770 vm_offset_t
ml_stack_base(void);
1771 vm_size_t
ml_stack_size(void);
1776 uintptr_t local
= (uintptr_t) &local
;
1777 vm_offset_t intstack_top_ptr
;
1779 intstack_top_ptr
= getCpuDatap()->intstack_top
;
1780 if ((local
< intstack_top_ptr
) && (local
> intstack_top_ptr
- INTSTACK_SIZE
)) {
1781 return intstack_top_ptr
- INTSTACK_SIZE
;
1783 return current_thread()->kernel_stack
;
1789 uintptr_t local
= (uintptr_t) &local
;
1790 vm_offset_t intstack_top_ptr
;
1792 intstack_top_ptr
= getCpuDatap()->intstack_top
;
1793 if ((local
< intstack_top_ptr
) && (local
> intstack_top_ptr
- INTSTACK_SIZE
)) {
1794 return INTSTACK_SIZE
;
1796 return kernel_stack_size
;
1802 machine_timeout_suspended(void)
1808 ml_interrupt_prewarm(__unused
uint64_t deadline
)
1810 return KERN_FAILURE
;
1814 * Assumes fiq, irq disabled.
1817 ml_set_decrementer(uint32_t dec_value
)
1819 cpu_data_t
*cdp
= getCpuDatap();
1821 assert(ml_get_interrupts_enabled() == FALSE
);
1822 cdp
->cpu_decrementer
= dec_value
;
1824 if (cdp
->cpu_set_decrementer_func
) {
1825 ((void (*)(uint32_t))cdp
->cpu_set_decrementer_func
)(dec_value
);
1827 __asm__
volatile ("msr CNTP_TVAL_EL0, %0" : : "r"((uint64_t)dec_value
));
1836 // ISB required by ARMV7C.b section B8.1.2 & ARMv8 section D6.1.2
1837 // "Reads of CNTPCT[_EL0] can occur speculatively and out of order relative
1838 // to other instructions executed on the same processor."
1839 __builtin_arm_isb(ISB_SY
);
1840 timebase
= __builtin_arm_rsr64("CNTPCT_EL0");
1848 return ml_get_hwclock() + getCpuDatap()->cpu_base_timebase
;
1852 ml_get_decrementer()
1854 cpu_data_t
*cdp
= getCpuDatap();
1857 assert(ml_get_interrupts_enabled() == FALSE
);
1859 if (cdp
->cpu_get_decrementer_func
) {
1860 dec
= ((uint32_t (*)(void))cdp
->cpu_get_decrementer_func
)();
1864 __asm__
volatile ("mrs %0, CNTP_TVAL_EL0" : "=r"(wide_val
));
1865 dec
= (uint32_t)wide_val
;
1866 assert(wide_val
== (uint64_t)dec
);
1873 ml_get_timer_pending()
1877 __asm__
volatile ("mrs %0, CNTP_CTL_EL0" : "=r"(cntp_ctl
));
1878 return ((cntp_ctl
& CNTP_CTL_EL0_ISTATUS
) != 0) ? TRUE
: FALSE
;
1882 ml_wants_panic_trap_to_debugger(void)
1884 boolean_t result
= FALSE
;
1887 * This looks racey, but if we are in the PPL, preemption will be
1890 result
= ((pmap_get_cpu_data()->ppl_state
== PPL_STATE_DISPATCH
) && pmap_ppl_locked_down
);
1896 cache_trap_error(thread_t thread
, vm_map_address_t fault_addr
)
1898 mach_exception_data_type_t exc_data
[2];
1899 arm_saved_state_t
*regs
= get_user_regs(thread
);
1901 set_saved_state_far(regs
, fault_addr
);
1903 exc_data
[0] = KERN_INVALID_ADDRESS
;
1904 exc_data
[1] = fault_addr
;
1906 exception_triage(EXC_BAD_ACCESS
, exc_data
, 2);
1910 cache_trap_recover()
1912 vm_map_address_t fault_addr
;
1914 __asm__
volatile ("mrs %0, FAR_EL1" : "=r"(fault_addr
));
1916 cache_trap_error(current_thread(), fault_addr
);
1920 set_cache_trap_recover(thread_t thread
)
1922 #if defined(HAS_APPLE_PAC)
1923 thread
->recover
= (vm_address_t
)ptrauth_auth_and_resign(&cache_trap_recover
,
1924 ptrauth_key_function_pointer
, 0,
1925 ptrauth_key_function_pointer
, ptrauth_blend_discriminator(&thread
->recover
, PAC_DISCRIMINATOR_RECOVER
));
1926 #else /* defined(HAS_APPLE_PAC) */
1927 thread
->recover
= (vm_address_t
)cache_trap_recover
;
1928 #endif /* defined(HAS_APPLE_PAC) */
1932 dcache_flush_trap(vm_map_address_t start
, vm_map_size_t size
)
1934 vm_map_address_t end
= start
+ size
;
1935 thread_t thread
= current_thread();
1936 vm_offset_t old_recover
= thread
->recover
;
1939 if (task_has_64Bit_addr(current_task())) {
1940 if (end
> MACH_VM_MAX_ADDRESS
) {
1941 cache_trap_error(thread
, end
& ((1 << ARM64_CLINE_SHIFT
) - 1));
1944 if (end
> VM_MAX_ADDRESS
) {
1945 cache_trap_error(thread
, end
& ((1 << ARM64_CLINE_SHIFT
) - 1));
1950 cache_trap_error(thread
, start
& ((1 << ARM64_CLINE_SHIFT
) - 1));
1953 set_cache_trap_recover(thread
);
1956 * We're coherent on Apple ARM64 CPUs, so this could be a nop. However,
1957 * if the region given us is bad, it would be good to catch it and
1958 * crash, ergo we still do the flush.
1960 FlushPoC_DcacheRegion(start
, (uint32_t)size
);
1962 /* Restore recovery function */
1963 thread
->recover
= old_recover
;
1965 /* Return (caller does exception return) */
1969 icache_invalidate_trap(vm_map_address_t start
, vm_map_size_t size
)
1971 vm_map_address_t end
= start
+ size
;
1972 thread_t thread
= current_thread();
1973 vm_offset_t old_recover
= thread
->recover
;
1976 if (task_has_64Bit_addr(current_task())) {
1977 if (end
> MACH_VM_MAX_ADDRESS
) {
1978 cache_trap_error(thread
, end
& ((1 << ARM64_CLINE_SHIFT
) - 1));
1981 if (end
> VM_MAX_ADDRESS
) {
1982 cache_trap_error(thread
, end
& ((1 << ARM64_CLINE_SHIFT
) - 1));
1987 cache_trap_error(thread
, start
& ((1 << ARM64_CLINE_SHIFT
) - 1));
1990 set_cache_trap_recover(thread
);
1992 /* Invalidate iCache to point of unification */
1993 InvalidatePoU_IcacheRegion(start
, (uint32_t)size
);
1995 /* Restore recovery function */
1996 thread
->recover
= old_recover
;
1998 /* Return (caller does exception return) */
2001 __attribute__((noreturn
))
2003 platform_syscall(arm_saved_state_t
*state
)
2007 #define platform_syscall_kprintf(x...) /* kprintf("platform_syscall: " x) */
2009 code
= (uint32_t)get_saved_state_reg(state
, 3);
2013 platform_syscall_kprintf("icache flush requested.\n");
2014 icache_invalidate_trap(get_saved_state_reg(state
, 0), get_saved_state_reg(state
, 1));
2018 platform_syscall_kprintf("dcache flush requested.\n");
2019 dcache_flush_trap(get_saved_state_reg(state
, 0), get_saved_state_reg(state
, 1));
2023 platform_syscall_kprintf("set cthread self.\n");
2024 thread_set_cthread_self(get_saved_state_reg(state
, 0));
2028 platform_syscall_kprintf("get cthread self.\n");
2029 set_saved_state_reg(state
, 0, thread_get_cthread_self());
2032 platform_syscall_kprintf("unknown: %d\n", code
);
2036 thread_exception_return();
2040 _enable_timebase_event_stream(uint32_t bit_index
)
2042 uint64_t cntkctl
; /* One wants to use 32 bits, but "mrs" prefers it this way */
2044 if (bit_index
>= 64) {
2045 panic("%s: invalid bit index (%u)", __FUNCTION__
, bit_index
);
2048 __asm__
volatile ("mrs %0, CNTKCTL_EL1" : "=r"(cntkctl
));
2050 cntkctl
|= (bit_index
<< CNTKCTL_EL1_EVENTI_SHIFT
);
2051 cntkctl
|= CNTKCTL_EL1_EVNTEN
;
2052 cntkctl
|= CNTKCTL_EL1_EVENTDIR
; /* 1->0; why not? */
2055 * If the SOC supports it (and it isn't broken), enable
2056 * EL0 access to the physical timebase register.
2058 if (user_timebase_type() != USER_TIMEBASE_NONE
) {
2059 cntkctl
|= CNTKCTL_EL1_PL0PCTEN
;
2062 __asm__
volatile ("msr CNTKCTL_EL1, %0" : : "r"(cntkctl
));
2066 * Turn timer on, unmask that interrupt.
2069 _enable_virtual_timer(void)
2071 uint64_t cntvctl
= CNTP_CTL_EL0_ENABLE
; /* One wants to use 32 bits, but "mrs" prefers it this way */
2073 __asm__
volatile ("msr CNTP_CTL_EL0, %0" : : "r"(cntvctl
));
2076 uint64_t events_per_sec
= 0;
2079 fiq_context_init(boolean_t enable_fiq __unused
)
2081 _enable_timebase_event_stream(fiq_eventi
);
2083 /* Interrupts still disabled. */
2084 assert(ml_get_interrupts_enabled() == FALSE
);
2085 _enable_virtual_timer();
2089 fiq_context_bootstrap(boolean_t enable_fiq
)
2091 #if defined(APPLE_ARM64_ARCH_FAMILY) || defined(BCM2837)
2092 /* Could fill in our own ops here, if we needed them */
2093 uint64_t ticks_per_sec
, ticks_per_event
;
2096 ticks_per_sec
= gPEClockFrequencyInfo
.timebase_frequency_hz
;
2097 ticks_per_event
= ticks_per_sec
/ events_per_sec
;
2098 bit_index
= flsll(ticks_per_event
) - 1; /* Highest bit set */
2100 /* Round up to power of two */
2101 if ((ticks_per_event
& ((1 << bit_index
) - 1)) != 0) {
2106 * The timer can only trigger on rising or falling edge,
2107 * not both; we don't care which we trigger on, but we
2108 * do need to adjust which bit we are interested in to
2111 if (bit_index
!= 0) {
2115 fiq_eventi
= bit_index
;
2117 #error Need a board configuration.
2119 fiq_context_init(enable_fiq
);
2123 ml_delay_should_spin(uint64_t interval
)
2125 cpu_data_t
*cdp
= getCpuDatap();
2127 if (cdp
->cpu_idle_latency
) {
2128 return (interval
< cdp
->cpu_idle_latency
) ? TRUE
: FALSE
;
2131 * Early boot, latency is unknown. Err on the side of blocking,
2132 * which should always be safe, even if slow
2139 ml_thread_is64bit(thread_t thread
)
2141 return thread_is_64bit_addr(thread
);
2145 ml_delay_on_yield(void)
2147 #if DEVELOPMENT || DEBUG
2148 if (yield_delay_us
) {
2149 delay(yield_delay_us
);
2155 ml_timer_evaluate(void)
2160 ml_timer_forced_evaluation(void)
2166 ml_energy_stat(thread_t t
)
2168 return t
->machine
.energy_estimate_nj
;
2173 ml_gpu_stat_update(__unused
uint64_t gpu_ns_delta
)
2177 * For now: update the resource coalition stats of the
2178 * current thread's coalition
2180 task_coalition_update_gpu_stats(current_task(), gpu_ns_delta
);
2185 ml_gpu_stat(__unused thread_t t
)
2190 #if !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME
2192 timer_state_event(boolean_t switch_to_kernel
)
2194 thread_t thread
= current_thread();
2195 if (!thread
->precise_user_kernel_time
) {
2199 processor_data_t
*pd
= &getCpuDatap()->cpu_processor
->processor_data
;
2200 uint64_t now
= ml_get_timebase();
2202 timer_stop(pd
->current_state
, now
);
2203 pd
->current_state
= (switch_to_kernel
) ? &pd
->system_state
: &pd
->user_state
;
2204 timer_start(pd
->current_state
, now
);
2206 timer_stop(pd
->thread_timer
, now
);
2207 pd
->thread_timer
= (switch_to_kernel
) ? &thread
->system_timer
: &thread
->user_timer
;
2208 timer_start(pd
->thread_timer
, now
);
2212 timer_state_event_user_to_kernel(void)
2214 timer_state_event(TRUE
);
2218 timer_state_event_kernel_to_user(void)
2220 timer_state_event(FALSE
);
2222 #endif /* !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME */
2225 * The following are required for parts of the kernel
2226 * that cannot resolve these functions as inlines:
2228 extern thread_t
current_act(void) __attribute__((const));
2232 return current_thread_fast();
2235 #undef current_thread
2236 extern thread_t
current_thread(void) __attribute__((const));
2238 current_thread(void)
2240 return current_thread_fast();
2249 ex_cb_info_t ex_cb_info
[EXCB_CLASS_MAX
];
2252 * Callback registration
2253 * Currently we support only one registered callback per class but
2254 * it should be possible to support more callbacks
2258 ex_cb_class_t cb_class
,
2262 ex_cb_info_t
*pInfo
= &ex_cb_info
[cb_class
];
2264 if ((NULL
== cb
) || (cb_class
>= EXCB_CLASS_MAX
)) {
2265 return KERN_INVALID_VALUE
;
2268 if (NULL
== pInfo
->cb
) {
2270 pInfo
->refcon
= refcon
;
2271 return KERN_SUCCESS
;
2273 return KERN_FAILURE
;
2277 * Called internally by platform kernel to invoke the registered callback for class
2281 ex_cb_class_t cb_class
,
2284 ex_cb_info_t
*pInfo
= &ex_cb_info
[cb_class
];
2285 ex_cb_state_t state
= {far
};
2287 if (cb_class
>= EXCB_CLASS_MAX
) {
2288 panic("Invalid exception callback class 0x%x\n", cb_class
);
2292 return pInfo
->cb(cb_class
, pInfo
->refcon
, &state
);
2294 return EXCB_ACTION_NONE
;
2297 #if defined(HAS_APPLE_PAC)
2299 ml_task_set_disable_user_jop(task_t task
, boolean_t disable_user_jop
)
2302 task
->disable_user_jop
= disable_user_jop
;
2306 ml_thread_set_disable_user_jop(thread_t thread
, boolean_t disable_user_jop
)
2309 thread
->machine
.disable_user_jop
= disable_user_jop
;
2313 ml_task_set_rop_pid(task_t task
, task_t parent_task
, boolean_t inherit
)
2316 task
->rop_pid
= parent_task
->rop_pid
;
2318 task
->rop_pid
= early_random();
2321 #endif /* defined(HAS_APPLE_PAC) */
2324 #if defined(HAS_APPLE_PAC)
2327 * ml_auth_ptr_unchecked: call this instead of ptrauth_auth_data
2328 * instrinsic when you don't want to trap on auth fail.
2333 ml_auth_ptr_unchecked(void *ptr
, ptrauth_key key
, uint64_t modifier
)
2335 switch (key
& 0x3) {
2336 case ptrauth_key_asia
:
2337 asm volatile ("autia %[ptr], %[modifier]" : [ptr
] "+r"(ptr
) : [modifier
] "r"(modifier
));
2339 case ptrauth_key_asib
:
2340 asm volatile ("autib %[ptr], %[modifier]" : [ptr
] "+r"(ptr
) : [modifier
] "r"(modifier
));
2342 case ptrauth_key_asda
:
2343 asm volatile ("autda %[ptr], %[modifier]" : [ptr
] "+r"(ptr
) : [modifier
] "r"(modifier
));
2345 case ptrauth_key_asdb
:
2346 asm volatile ("autdb %[ptr], %[modifier]" : [ptr
] "+r"(ptr
) : [modifier
] "r"(modifier
));
2352 #endif /* defined(HAS_APPLE_PAC) */