]> git.saurik.com Git - apple/xnu.git/blob - osfmk/arm64/machine_routines.c
xnu-4903.231.4.tar.gz
[apple/xnu.git] / osfmk / arm64 / machine_routines.c
1 /*
2 * Copyright (c) 2007-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <arm64/proc_reg.h>
30 #include <arm/machine_cpu.h>
31 #include <arm/cpu_internal.h>
32 #include <arm/cpuid.h>
33 #include <arm/io_map_entries.h>
34 #include <arm/cpu_data.h>
35 #include <arm/cpu_data_internal.h>
36 #include <arm/caches_internal.h>
37 #include <arm/misc_protos.h>
38 #include <arm/machdep_call.h>
39 #include <arm/rtclock.h>
40 #include <console/serial_protos.h>
41 #include <kern/machine.h>
42 #include <prng/random.h>
43 #include <kern/startup.h>
44 #include <kern/thread.h>
45 #include <mach/machine.h>
46 #include <machine/atomic.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_page.h>
49 #include <sys/kdebug.h>
50 #include <kern/coalition.h>
51 #include <pexpert/device_tree.h>
52
53 #include <IOKit/IOPlatformExpert.h>
54
55 #if defined(KERNEL_INTEGRITY_KTRR)
56 #include <libkern/kernel_mach_header.h>
57 #endif
58
59 #include <libkern/section_keywords.h>
60
61 #if KPC
62 #include <kern/kpc.h>
63 #endif
64
65
66 static int max_cpus_initialized = 0;
67 #define MAX_CPUS_SET 0x1
68 #define MAX_CPUS_WAIT 0x2
69
70 uint32_t LockTimeOut;
71 uint32_t LockTimeOutUsec;
72 uint64_t MutexSpin;
73 boolean_t is_clock_configured = FALSE;
74
75 uint32_t yield_delay_us = 42; /* Less than cpu_idle_latency to ensure ml_delay_should_spin is true */
76
77 extern int mach_assert;
78 extern volatile uint32_t debug_enabled;
79
80 extern vm_offset_t segEXTRADATA;
81 extern vm_offset_t segLOWESTTEXT;
82 extern vm_offset_t segLASTB;
83 extern unsigned long segSizeLAST;
84
85
86 void machine_conf(void);
87
88 thread_t Idle_context(void);
89
90 SECURITY_READ_ONLY_LATE(static uint32_t) cpu_phys_ids[MAX_CPUS] = {[0 ... MAX_CPUS - 1] = (uint32_t)-1};
91 SECURITY_READ_ONLY_LATE(static unsigned int) avail_cpus = 0;
92 SECURITY_READ_ONLY_LATE(static int) boot_cpu = -1;
93 SECURITY_READ_ONLY_LATE(static int) max_cpu_number = 0;
94 SECURITY_READ_ONLY_LATE(cluster_type_t) boot_cluster = CLUSTER_TYPE_SMP;
95
96 SECURITY_READ_ONLY_LATE(static uint32_t) fiq_eventi = UINT32_MAX;
97
98 lockdown_handler_t lockdown_handler;
99 void *lockdown_this;
100 lck_mtx_t lockdown_handler_lck;
101 lck_grp_t *lockdown_handler_grp;
102 int lockdown_done;
103
104 void ml_lockdown_init(void);
105 void ml_lockdown_run_handler(void);
106 uint32_t get_arm_cpu_version(void);
107
108
109 void ml_cpu_signal(unsigned int cpu_id __unused)
110 {
111 panic("Platform does not support ACC Fast IPI");
112 }
113
114 void ml_cpu_signal_deferred_adjust_timer(uint64_t nanosecs) {
115 (void)nanosecs;
116 panic("Platform does not support ACC Fast IPI");
117 }
118
119 uint64_t ml_cpu_signal_deferred_get_timer() {
120 return 0;
121 }
122
123 void ml_cpu_signal_deferred(unsigned int cpu_id __unused)
124 {
125 panic("Platform does not support ACC Fast IPI deferral");
126 }
127
128 void ml_cpu_signal_retract(unsigned int cpu_id __unused)
129 {
130 panic("Platform does not support ACC Fast IPI retraction");
131 }
132
133 void machine_idle(void)
134 {
135 __asm__ volatile ("msr DAIFSet, %[mask]" ::[mask] "i" (DAIFSC_IRQF | DAIFSC_FIQF));
136 Idle_context();
137 __asm__ volatile ("msr DAIFClr, %[mask]" ::[mask] "i" (DAIFSC_IRQF | DAIFSC_FIQF));
138 }
139
140 void init_vfp(void)
141 {
142 return;
143 }
144
145 boolean_t get_vfp_enabled(void)
146 {
147 return TRUE;
148 }
149
150 void OSSynchronizeIO(void)
151 {
152 __builtin_arm_dsb(DSB_SY);
153 }
154
155 uint64_t get_aux_control(void)
156 {
157 uint64_t value;
158
159 MRS(value, "ACTLR_EL1");
160 return value;
161 }
162
163 uint64_t get_mmu_control(void)
164 {
165 uint64_t value;
166
167 MRS(value, "SCTLR_EL1");
168 return value;
169 }
170
171 uint64_t get_tcr(void)
172 {
173 uint64_t value;
174
175 MRS(value, "TCR_EL1");
176 return value;
177 }
178
179 boolean_t ml_get_interrupts_enabled(void)
180 {
181 uint64_t value;
182
183 MRS(value, "DAIF");
184 if (value & DAIF_IRQF)
185 return FALSE;
186 return TRUE;
187 }
188
189 pmap_paddr_t get_mmu_ttb(void)
190 {
191 pmap_paddr_t value;
192
193 MRS(value, "TTBR0_EL1");
194 return value;
195 }
196
197 static uint32_t get_midr_el1(void)
198 {
199 uint64_t value;
200
201 MRS(value, "MIDR_EL1");
202
203 /* This is a 32-bit register. */
204 return (uint32_t) value;
205 }
206
207 uint32_t get_arm_cpu_version(void)
208 {
209 uint32_t value = get_midr_el1();
210
211 /* Compose the register values into 8 bits; variant[7:4], revision[3:0]. */
212 return ((value & MIDR_EL1_REV_MASK) >> MIDR_EL1_REV_SHIFT) | ((value & MIDR_EL1_VAR_MASK) >> (MIDR_EL1_VAR_SHIFT - 4));
213 }
214
215 /*
216 * user_cont_hwclock_allowed()
217 *
218 * Indicates whether we allow EL0 to read the physical timebase (CNTPCT_EL0)
219 * as a continuous time source (e.g. from mach_continuous_time)
220 */
221 boolean_t user_cont_hwclock_allowed(void)
222 {
223 return FALSE;
224 }
225
226 /*
227 * user_timebase_allowed()
228 *
229 * Indicates whether we allow EL0 to read the physical timebase (CNTPCT_EL0).
230 */
231 boolean_t user_timebase_allowed(void)
232 {
233 return TRUE;
234 }
235
236 boolean_t arm64_wfe_allowed(void)
237 {
238 return TRUE;
239 }
240
241 #if defined(KERNEL_INTEGRITY_KTRR)
242
243 uint64_t rorgn_begin __attribute__((section("__DATA, __const"))) = 0;
244 uint64_t rorgn_end __attribute__((section("__DATA, __const"))) = 0;
245 vm_offset_t amcc_base;
246
247 static void assert_unlocked(void);
248 static void assert_amcc_cache_disabled(void);
249 static void lock_amcc(void);
250 static void lock_mmu(uint64_t begin, uint64_t end);
251
252 void rorgn_stash_range(void)
253 {
254
255 #if DEVELOPMENT || DEBUG
256 boolean_t rorgn_disable = FALSE;
257
258 PE_parse_boot_argn("-unsafe_kernel_text", &rorgn_disable, sizeof(rorgn_disable));
259
260 if (rorgn_disable) {
261 /* take early out if boot arg present, don't query any machine registers to avoid
262 * dependency on amcc DT entry
263 */
264 return;
265 }
266 #endif
267
268 /* Get the AMC values, and stash them into rorgn_begin, rorgn_end.
269 * gPhysBase is the base of DRAM managed by xnu. we need DRAM_BASE as
270 * the AMCC RO region begin/end registers are in units of 16KB page
271 * numbers from DRAM_BASE so we'll truncate gPhysBase at 512MB granule
272 * and assert the value is the canonical DRAM_BASE PA of 0x8_0000_0000 for arm64.
273 */
274
275 uint64_t dram_base = gPhysBase & ~0x1FFFFFFFULL; /* 512MB */
276 assert(dram_base == 0x800000000ULL);
277
278 #if defined(KERNEL_INTEGRITY_KTRR)
279 uint64_t soc_base = 0;
280 DTEntry entryP = NULL;
281 uintptr_t *reg_prop = NULL;
282 uint32_t prop_size = 0;
283 int rc;
284
285 soc_base = pe_arm_get_soc_base_phys();
286 rc = DTFindEntry("name", "mcc", &entryP);
287 assert(rc == kSuccess);
288 rc = DTGetProperty(entryP, "reg", (void **)&reg_prop, &prop_size);
289 assert(rc == kSuccess);
290 amcc_base = ml_io_map(soc_base + *reg_prop, *(reg_prop + 1));
291 #else
292 #error "KERNEL_INTEGRITY config error"
293 #endif
294
295 #if defined(KERNEL_INTEGRITY_KTRR)
296 assert(rRORGNENDADDR > rRORGNBASEADDR);
297 rorgn_begin = (rRORGNBASEADDR << AMCC_PGSHIFT) + dram_base;
298 rorgn_end = (rRORGNENDADDR << AMCC_PGSHIFT) + dram_base;
299 #else
300 #error KERNEL_INTEGRITY config error
301 #endif /* defined (KERNEL_INTEGRITY_KTRR) */
302 }
303
304 static void assert_unlocked() {
305 uint64_t ktrr_lock = 0;
306 uint32_t rorgn_lock = 0;
307
308 assert(amcc_base);
309 #if defined(KERNEL_INTEGRITY_KTRR)
310 rorgn_lock = rRORGNLOCK;
311 ktrr_lock = __builtin_arm_rsr64(ARM64_REG_KTRR_LOCK_EL1);
312 #else
313 #error KERNEL_INTEGRITY config error
314 #endif /* defined(KERNEL_INTEGRITY_KTRR) */
315
316 assert(!ktrr_lock);
317 assert(!rorgn_lock);
318 }
319
320 static void lock_amcc() {
321 #if defined(KERNEL_INTEGRITY_KTRR)
322 rRORGNLOCK = 1;
323 __builtin_arm_isb(ISB_SY);
324 #else
325 #error KERNEL_INTEGRITY config error
326 #endif
327 }
328
329 static void lock_mmu(uint64_t begin, uint64_t end) {
330
331 #if defined(KERNEL_INTEGRITY_KTRR)
332
333 __builtin_arm_wsr64(ARM64_REG_KTRR_LOWER_EL1, begin);
334 __builtin_arm_wsr64(ARM64_REG_KTRR_UPPER_EL1, end);
335 __builtin_arm_wsr64(ARM64_REG_KTRR_LOCK_EL1, 1ULL);
336
337 /* flush TLB */
338
339 __builtin_arm_isb(ISB_SY);
340 flush_mmu_tlb();
341
342 #else
343 #error KERNEL_INTEGRITY config error
344 #endif
345
346 }
347
348 static void assert_amcc_cache_disabled() {
349 #if defined(KERNEL_INTEGRITY_KTRR)
350 assert((rMCCGEN & 1) == 0); /* assert M$ disabled or LLC clean will be unreliable */
351 #else
352 #error KERNEL_INTEGRITY config error
353 #endif
354 }
355
356 /*
357 * void rorgn_lockdown(void)
358 *
359 * Lock the MMU and AMCC RORegion within lower and upper boundaries if not already locked
360 *
361 * [ ] - ensure this is being called ASAP on secondary CPUs: KTRR programming and lockdown handled in
362 * start.s:start_cpu() for subsequent wake/resume of all cores
363 */
364 void rorgn_lockdown(void)
365 {
366 vm_offset_t ktrr_begin, ktrr_end;
367 unsigned long last_segsz;
368
369 #if DEVELOPMENT || DEBUG
370 boolean_t ktrr_disable = FALSE;
371
372 PE_parse_boot_argn("-unsafe_kernel_text", &ktrr_disable, sizeof(ktrr_disable));
373
374 if (ktrr_disable) {
375 /*
376 * take early out if boot arg present, since we may not have amcc DT entry present
377 * we can't assert that iboot hasn't programmed the RO region lockdown registers
378 */
379 goto out;
380 }
381 #endif /* DEVELOPMENT || DEBUG */
382
383 assert_unlocked();
384
385 /* [x] - Use final method of determining all kernel text range or expect crashes */
386 ktrr_begin = segEXTRADATA;
387 assert(ktrr_begin && gVirtBase && gPhysBase);
388
389 ktrr_begin = kvtophys(ktrr_begin);
390
391 ktrr_end = kvtophys(segLASTB);
392 last_segsz = segSizeLAST;
393 #if defined(KERNEL_INTEGRITY_KTRR)
394 /* __LAST is not part of the MMU KTRR region (it is however part of the AMCC KTRR region) */
395 ktrr_end = (ktrr_end - 1) & ~AMCC_PGMASK;
396 /* ensure that iboot and xnu agree on the ktrr range */
397 assert(rorgn_begin == ktrr_begin && rorgn_end == (ktrr_end + last_segsz));
398 /* assert that __LAST segment containing privileged insns is only a single page */
399 assert(last_segsz == PAGE_SIZE);
400 #endif
401
402
403 #if DEBUG || DEVELOPMENT
404 printf("KTRR Begin: %p End: %p, setting lockdown\n", (void *)ktrr_begin, (void *)ktrr_end);
405 #endif
406
407 /* [x] - ensure all in flight writes are flushed to AMCC before enabling RO Region Lock */
408
409 assert_amcc_cache_disabled();
410
411 CleanPoC_DcacheRegion_Force(phystokv(ktrr_begin),
412 (unsigned)((ktrr_end + last_segsz) - ktrr_begin + AMCC_PGMASK));
413
414 lock_amcc();
415
416 lock_mmu(ktrr_begin, ktrr_end);
417
418 #if DEVELOPMENT || DEBUG
419 out:
420 #endif
421
422 /* now we can run lockdown handler */
423 ml_lockdown_run_handler();
424 }
425
426 #endif /* defined(KERNEL_INTEGRITY_KTRR)*/
427
428 void
429 machine_startup(__unused boot_args * args)
430 {
431 int boot_arg;
432
433
434 PE_parse_boot_argn("assert", &mach_assert, sizeof (mach_assert));
435
436 if (PE_parse_boot_argn("preempt", &boot_arg, sizeof (boot_arg))) {
437 default_preemption_rate = boot_arg;
438 }
439 if (PE_parse_boot_argn("bg_preempt", &boot_arg, sizeof (boot_arg))) {
440 default_bg_preemption_rate = boot_arg;
441 }
442
443 PE_parse_boot_argn("yield_delay_us", &yield_delay_us, sizeof (yield_delay_us));
444
445 machine_conf();
446
447 /*
448 * Kick off the kernel bootstrap.
449 */
450 kernel_bootstrap();
451 /* NOTREACHED */
452 }
453
454 void machine_lockdown_preflight(void)
455 {
456 #if CONFIG_KERNEL_INTEGRITY
457
458 #if defined(KERNEL_INTEGRITY_KTRR)
459 rorgn_stash_range();
460 #endif
461
462 #endif
463 }
464
465 void machine_lockdown(void)
466 {
467 #if CONFIG_KERNEL_INTEGRITY
468 #if KERNEL_INTEGRITY_WT
469 /* Watchtower
470 *
471 * Notify the monitor about the completion of early kernel bootstrap.
472 * From this point forward it will enforce the integrity of kernel text,
473 * rodata and page tables.
474 */
475
476 #ifdef MONITOR
477 monitor_call(MONITOR_LOCKDOWN, 0, 0, 0);
478 #endif
479 #endif /* KERNEL_INTEGRITY_WT */
480
481
482 #if defined(KERNEL_INTEGRITY_KTRR)
483 /* KTRR
484 *
485 * Lock physical KTRR region. KTRR region is read-only. Memory outside
486 * the region is not executable at EL1.
487 */
488
489 rorgn_lockdown();
490 #endif /* defined(KERNEL_INTEGRITY_KTRR)*/
491
492
493 #endif /* CONFIG_KERNEL_INTEGRITY */
494 }
495
496 char *
497 machine_boot_info(
498 __unused char *buf,
499 __unused vm_size_t size)
500 {
501 return (PE_boot_args());
502 }
503
504 void
505 machine_conf(void)
506 {
507 /*
508 * This is known to be inaccurate. mem_size should always be capped at 2 GB
509 */
510 machine_info.memory_size = (uint32_t)mem_size;
511 }
512
513 void
514 machine_init(void)
515 {
516 debug_log_init();
517 clock_config();
518 is_clock_configured = TRUE;
519 if (debug_enabled)
520 pmap_map_globals();
521 }
522
523 void
524 slave_machine_init(__unused void *param)
525 {
526 cpu_machine_init(); /* Initialize the processor */
527 clock_init(); /* Init the clock */
528 }
529
530 /*
531 * Routine: machine_processor_shutdown
532 * Function:
533 */
534 thread_t
535 machine_processor_shutdown(
536 __unused thread_t thread,
537 void (*doshutdown) (processor_t),
538 processor_t processor)
539 {
540 return (Shutdown_context(doshutdown, processor));
541 }
542
543 /*
544 * Routine: ml_init_max_cpus
545 * Function:
546 */
547 void
548 ml_init_max_cpus(unsigned int max_cpus)
549 {
550 boolean_t current_state;
551
552 current_state = ml_set_interrupts_enabled(FALSE);
553 if (max_cpus_initialized != MAX_CPUS_SET) {
554 machine_info.max_cpus = max_cpus;
555 machine_info.physical_cpu_max = max_cpus;
556 machine_info.logical_cpu_max = max_cpus;
557 if (max_cpus_initialized == MAX_CPUS_WAIT)
558 thread_wakeup((event_t) & max_cpus_initialized);
559 max_cpus_initialized = MAX_CPUS_SET;
560 }
561 (void) ml_set_interrupts_enabled(current_state);
562 }
563
564 /*
565 * Routine: ml_get_max_cpus
566 * Function:
567 */
568 unsigned int
569 ml_get_max_cpus(void)
570 {
571 boolean_t current_state;
572
573 current_state = ml_set_interrupts_enabled(FALSE);
574 if (max_cpus_initialized != MAX_CPUS_SET) {
575 max_cpus_initialized = MAX_CPUS_WAIT;
576 assert_wait((event_t) & max_cpus_initialized, THREAD_UNINT);
577 (void) thread_block(THREAD_CONTINUE_NULL);
578 }
579 (void) ml_set_interrupts_enabled(current_state);
580 return (machine_info.max_cpus);
581 }
582
583 /*
584 * Routine: ml_init_lock_timeout
585 * Function:
586 */
587 void
588 ml_init_lock_timeout(void)
589 {
590 uint64_t abstime;
591 uint64_t mtxspin;
592 uint64_t default_timeout_ns = NSEC_PER_SEC>>2;
593 uint32_t slto;
594
595 if (PE_parse_boot_argn("slto_us", &slto, sizeof (slto)))
596 default_timeout_ns = slto * NSEC_PER_USEC;
597
598 nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
599 LockTimeOutUsec = (uint32_t)(abstime / NSEC_PER_USEC);
600 LockTimeOut = (uint32_t)abstime;
601
602 if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof (mtxspin))) {
603 if (mtxspin > USEC_PER_SEC>>4)
604 mtxspin = USEC_PER_SEC>>4;
605 nanoseconds_to_absolutetime(mtxspin*NSEC_PER_USEC, &abstime);
606 } else {
607 nanoseconds_to_absolutetime(10*NSEC_PER_USEC, &abstime);
608 }
609 MutexSpin = abstime;
610 }
611
612 /*
613 * This is called from the machine-independent routine cpu_up()
614 * to perform machine-dependent info updates.
615 */
616 void
617 ml_cpu_up(void)
618 {
619 hw_atomic_add(&machine_info.physical_cpu, 1);
620 hw_atomic_add(&machine_info.logical_cpu, 1);
621 }
622
623 /*
624 * This is called from the machine-independent routine cpu_down()
625 * to perform machine-dependent info updates.
626 */
627 void
628 ml_cpu_down(void)
629 {
630 cpu_data_t *cpu_data_ptr;
631
632 hw_atomic_sub(&machine_info.physical_cpu, 1);
633 hw_atomic_sub(&machine_info.logical_cpu, 1);
634
635 /*
636 * If we want to deal with outstanding IPIs, we need to
637 * do relatively early in the processor_doshutdown path,
638 * as we pend decrementer interrupts using the IPI
639 * mechanism if we cannot immediately service them (if
640 * IRQ is masked). Do so now.
641 *
642 * We aren't on the interrupt stack here; would it make
643 * more sense to disable signaling and then enable
644 * interrupts? It might be a bit cleaner.
645 */
646 cpu_data_ptr = getCpuDatap();
647 cpu_data_ptr->cpu_running = FALSE;
648 cpu_signal_handler_internal(TRUE);
649 }
650
651 /*
652 * Routine: ml_cpu_get_info
653 * Function:
654 */
655 void
656 ml_cpu_get_info(ml_cpu_info_t * ml_cpu_info)
657 {
658 cache_info_t *cpuid_cache_info;
659
660 cpuid_cache_info = cache_info();
661 ml_cpu_info->vector_unit = 0;
662 ml_cpu_info->cache_line_size = cpuid_cache_info->c_linesz;
663 ml_cpu_info->l1_icache_size = cpuid_cache_info->c_isize;
664 ml_cpu_info->l1_dcache_size = cpuid_cache_info->c_dsize;
665
666 #if (__ARM_ARCH__ >= 7)
667 ml_cpu_info->l2_settings = 1;
668 ml_cpu_info->l2_cache_size = cpuid_cache_info->c_l2size;
669 #else
670 ml_cpu_info->l2_settings = 0;
671 ml_cpu_info->l2_cache_size = 0xFFFFFFFF;
672 #endif
673 ml_cpu_info->l3_settings = 0;
674 ml_cpu_info->l3_cache_size = 0xFFFFFFFF;
675 }
676
677 unsigned int
678 ml_get_machine_mem(void)
679 {
680 return (machine_info.memory_size);
681 }
682
683 __attribute__((noreturn))
684 void
685 halt_all_cpus(boolean_t reboot)
686 {
687 if (reboot) {
688 printf("MACH Reboot\n");
689 PEHaltRestart(kPERestartCPU);
690 } else {
691 printf("CPU halted\n");
692 PEHaltRestart(kPEHaltCPU);
693 }
694 while (1);
695 }
696
697 __attribute__((noreturn))
698 void
699 halt_cpu(void)
700 {
701 halt_all_cpus(FALSE);
702 }
703
704 /*
705 * Routine: machine_signal_idle
706 * Function:
707 */
708 void
709 machine_signal_idle(
710 processor_t processor)
711 {
712 cpu_signal(processor_to_cpu_datap(processor), SIGPnop, (void *)NULL, (void *)NULL);
713 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
714 }
715
716 void
717 machine_signal_idle_deferred(
718 processor_t processor)
719 {
720 cpu_signal_deferred(processor_to_cpu_datap(processor));
721 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_DEFERRED_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
722 }
723
724 void
725 machine_signal_idle_cancel(
726 processor_t processor)
727 {
728 cpu_signal_cancel(processor_to_cpu_datap(processor));
729 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_CANCEL_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
730 }
731
732 /*
733 * Routine: ml_install_interrupt_handler
734 * Function: Initialize Interrupt Handler
735 */
736 void
737 ml_install_interrupt_handler(
738 void *nub,
739 int source,
740 void *target,
741 IOInterruptHandler handler,
742 void *refCon)
743 {
744 cpu_data_t *cpu_data_ptr;
745 boolean_t current_state;
746
747 current_state = ml_set_interrupts_enabled(FALSE);
748 cpu_data_ptr = getCpuDatap();
749
750 cpu_data_ptr->interrupt_nub = nub;
751 cpu_data_ptr->interrupt_source = source;
752 cpu_data_ptr->interrupt_target = target;
753 cpu_data_ptr->interrupt_handler = handler;
754 cpu_data_ptr->interrupt_refCon = refCon;
755
756 cpu_data_ptr->interrupts_enabled = TRUE;
757 (void) ml_set_interrupts_enabled(current_state);
758
759 initialize_screen(NULL, kPEAcquireScreen);
760 }
761
762 /*
763 * Routine: ml_init_interrupt
764 * Function: Initialize Interrupts
765 */
766 void
767 ml_init_interrupt(void)
768 {
769 }
770
771 /*
772 * Routine: ml_init_timebase
773 * Function: register and setup Timebase, Decremeter services
774 */
775 void ml_init_timebase(
776 void *args,
777 tbd_ops_t tbd_funcs,
778 vm_offset_t int_address,
779 vm_offset_t int_value __unused)
780 {
781 cpu_data_t *cpu_data_ptr;
782
783 cpu_data_ptr = (cpu_data_t *)args;
784
785 if ((cpu_data_ptr == &BootCpuData)
786 && (rtclock_timebase_func.tbd_fiq_handler == (void *)NULL)) {
787 rtclock_timebase_func = *tbd_funcs;
788 rtclock_timebase_addr = int_address;
789 }
790 }
791
792 void
793 ml_parse_cpu_topology(void)
794 {
795 DTEntry entry, child __unused;
796 OpaqueDTEntryIterator iter;
797 uint32_t cpu_boot_arg;
798 int err;
799
800 cpu_boot_arg = MAX_CPUS;
801
802 PE_parse_boot_argn("cpus", &cpu_boot_arg, sizeof(cpu_boot_arg));
803
804 err = DTLookupEntry(NULL, "/cpus", &entry);
805 assert(err == kSuccess);
806
807 err = DTInitEntryIterator(entry, &iter);
808 assert(err == kSuccess);
809
810 while (kSuccess == DTIterateEntries(&iter, &child)) {
811 unsigned int propSize;
812 void *prop = NULL;
813 int cpu_id = avail_cpus++;
814
815 if (kSuccess == DTGetProperty(child, "cpu-id", &prop, &propSize))
816 cpu_id = *((int32_t*)prop);
817
818 assert(cpu_id < MAX_CPUS);
819 assert(cpu_phys_ids[cpu_id] == (uint32_t)-1);
820
821 if (boot_cpu == -1) {
822 if (kSuccess != DTGetProperty(child, "state", &prop, &propSize))
823 panic("unable to retrieve state for cpu %d", cpu_id);
824
825 if (strncmp((char*)prop, "running", propSize) == 0) {
826 boot_cpu = cpu_id;
827 }
828 }
829 if (kSuccess != DTGetProperty(child, "reg", &prop, &propSize))
830 panic("unable to retrieve physical ID for cpu %d", cpu_id);
831
832 cpu_phys_ids[cpu_id] = *((uint32_t*)prop);
833
834 if ((cpu_id > max_cpu_number) && ((cpu_id == boot_cpu) || (avail_cpus <= cpu_boot_arg)))
835 max_cpu_number = cpu_id;
836 }
837
838 if (avail_cpus > cpu_boot_arg)
839 avail_cpus = cpu_boot_arg;
840
841 if (avail_cpus == 0)
842 panic("No cpus found!");
843
844 if (boot_cpu == -1)
845 panic("unable to determine boot cpu!");
846
847 /*
848 * Set TPIDRRO_EL0 to indicate the correct cpu number, as we may
849 * not be booting from cpu 0. Userspace will consume the current
850 * CPU number through this register. For non-boot cores, this is
851 * done in start.s (start_cpu) using the cpu_number field of the
852 * per-cpu data object.
853 */
854 assert(__builtin_arm_rsr64("TPIDRRO_EL0") == 0);
855 __builtin_arm_wsr64("TPIDRRO_EL0", (uint64_t)boot_cpu);
856 }
857
858 unsigned int
859 ml_get_cpu_count(void)
860 {
861 return avail_cpus;
862 }
863
864 int
865 ml_get_boot_cpu_number(void)
866 {
867 return boot_cpu;
868 }
869
870 cluster_type_t
871 ml_get_boot_cluster(void)
872 {
873 return boot_cluster;
874 }
875
876 int
877 ml_get_cpu_number(uint32_t phys_id)
878 {
879 for (int log_id = 0; log_id <= ml_get_max_cpu_number(); ++log_id) {
880 if (cpu_phys_ids[log_id] == phys_id)
881 return log_id;
882 }
883 return -1;
884 }
885
886 int
887 ml_get_max_cpu_number(void)
888 {
889 return max_cpu_number;
890 }
891
892
893 void ml_lockdown_init() {
894 lockdown_handler_grp = lck_grp_alloc_init("lockdown_handler", NULL);
895 assert(lockdown_handler_grp != NULL);
896
897 lck_mtx_init(&lockdown_handler_lck, lockdown_handler_grp, NULL);
898
899 }
900
901 kern_return_t
902 ml_lockdown_handler_register(lockdown_handler_t f, void *this)
903 {
904 if (lockdown_handler || !f) {
905 return KERN_FAILURE;
906 }
907
908 lck_mtx_lock(&lockdown_handler_lck);
909 lockdown_handler = f;
910 lockdown_this = this;
911
912 #if !(defined(KERNEL_INTEGRITY_KTRR))
913 lockdown_done=1;
914 lockdown_handler(this);
915 #else
916 if (lockdown_done) {
917 lockdown_handler(this);
918 }
919 #endif
920 lck_mtx_unlock(&lockdown_handler_lck);
921
922 return KERN_SUCCESS;
923 }
924
925 void ml_lockdown_run_handler() {
926 lck_mtx_lock(&lockdown_handler_lck);
927 assert(!lockdown_done);
928
929 lockdown_done = 1;
930 if (lockdown_handler) {
931 lockdown_handler(lockdown_this);
932 }
933 lck_mtx_unlock(&lockdown_handler_lck);
934 }
935
936 kern_return_t
937 ml_processor_register(
938 ml_processor_info_t * in_processor_info,
939 processor_t * processor_out,
940 ipi_handler_t * ipi_handler)
941 {
942 cpu_data_t *this_cpu_datap;
943 processor_set_t pset;
944 boolean_t is_boot_cpu;
945 static unsigned int reg_cpu_count = 0;
946
947 if (in_processor_info->log_id > (uint32_t)ml_get_max_cpu_number())
948 return KERN_FAILURE;
949
950 if ((unsigned int)OSIncrementAtomic((SInt32*)&reg_cpu_count) >= avail_cpus)
951 return KERN_FAILURE;
952
953 if (in_processor_info->log_id != (uint32_t)ml_get_boot_cpu_number()) {
954 is_boot_cpu = FALSE;
955 this_cpu_datap = cpu_data_alloc(FALSE);
956 cpu_data_init(this_cpu_datap);
957 } else {
958 this_cpu_datap = &BootCpuData;
959 is_boot_cpu = TRUE;
960 }
961
962 assert(in_processor_info->log_id < MAX_CPUS);
963
964 this_cpu_datap->cpu_id = in_processor_info->cpu_id;
965
966 this_cpu_datap->cpu_console_buf = console_cpu_alloc(is_boot_cpu);
967 if (this_cpu_datap->cpu_console_buf == (void *)(NULL))
968 goto processor_register_error;
969
970 if (!is_boot_cpu) {
971 this_cpu_datap->cpu_number = in_processor_info->log_id;
972
973 if (cpu_data_register(this_cpu_datap) != KERN_SUCCESS)
974 goto processor_register_error;
975 }
976
977 this_cpu_datap->cpu_idle_notify = (void *) in_processor_info->processor_idle;
978 this_cpu_datap->cpu_cache_dispatch = in_processor_info->platform_cache_dispatch;
979 nanoseconds_to_absolutetime((uint64_t) in_processor_info->powergate_latency, &this_cpu_datap->cpu_idle_latency);
980 this_cpu_datap->cpu_reset_assist = kvtophys(in_processor_info->powergate_stub_addr);
981
982 this_cpu_datap->idle_timer_notify = (void *) in_processor_info->idle_timer;
983 this_cpu_datap->idle_timer_refcon = in_processor_info->idle_timer_refcon;
984
985 this_cpu_datap->platform_error_handler = (void *) in_processor_info->platform_error_handler;
986 this_cpu_datap->cpu_regmap_paddr = in_processor_info->regmap_paddr;
987 this_cpu_datap->cpu_phys_id = in_processor_info->phys_id;
988 this_cpu_datap->cpu_l2_access_penalty = in_processor_info->l2_access_penalty;
989
990 this_cpu_datap->cpu_cluster_type = in_processor_info->cluster_type;
991 this_cpu_datap->cpu_cluster_id = in_processor_info->cluster_id;
992 this_cpu_datap->cpu_l2_id = in_processor_info->l2_cache_id;
993 this_cpu_datap->cpu_l2_size = in_processor_info->l2_cache_size;
994 this_cpu_datap->cpu_l3_id = in_processor_info->l3_cache_id;
995 this_cpu_datap->cpu_l3_size = in_processor_info->l3_cache_size;
996
997 this_cpu_datap->cluster_master = is_boot_cpu;
998
999 pset = pset_find(in_processor_info->cluster_id, processor_pset(master_processor));
1000 assert(pset != NULL);
1001 kprintf("%s>cpu_id %p cluster_id %d cpu_number %d is type %d\n", __FUNCTION__, in_processor_info->cpu_id, in_processor_info->cluster_id, this_cpu_datap->cpu_number, in_processor_info->cluster_type);
1002
1003 if (!is_boot_cpu) {
1004 processor_init((struct processor *)this_cpu_datap->cpu_processor,
1005 this_cpu_datap->cpu_number, pset);
1006
1007 if (this_cpu_datap->cpu_l2_access_penalty) {
1008 /*
1009 * Cores that have a non-zero L2 access penalty compared
1010 * to the boot processor should be de-prioritized by the
1011 * scheduler, so that threads use the cores with better L2
1012 * preferentially.
1013 */
1014 processor_set_primary(this_cpu_datap->cpu_processor,
1015 master_processor);
1016 }
1017 }
1018
1019 *processor_out = this_cpu_datap->cpu_processor;
1020 *ipi_handler = cpu_signal_handler;
1021 if (in_processor_info->idle_tickle != (idle_tickle_t *) NULL)
1022 *in_processor_info->idle_tickle = (idle_tickle_t) cpu_idle_tickle;
1023
1024 #if KPC
1025 if (kpc_register_cpu(this_cpu_datap) != TRUE)
1026 goto processor_register_error;
1027 #endif
1028
1029 if (!is_boot_cpu) {
1030 early_random_cpu_init(this_cpu_datap->cpu_number);
1031 // now let next CPU register itself
1032 OSIncrementAtomic((SInt32*)&real_ncpus);
1033 }
1034
1035 return KERN_SUCCESS;
1036
1037 processor_register_error:
1038 #if KPC
1039 kpc_unregister_cpu(this_cpu_datap);
1040 #endif
1041 if (!is_boot_cpu)
1042 cpu_data_free(this_cpu_datap);
1043
1044 return KERN_FAILURE;
1045 }
1046
1047 void
1048 ml_init_arm_debug_interface(
1049 void * in_cpu_datap,
1050 vm_offset_t virt_address)
1051 {
1052 ((cpu_data_t *)in_cpu_datap)->cpu_debug_interface_map = virt_address;
1053 do_debugid();
1054 }
1055
1056 /*
1057 * Routine: init_ast_check
1058 * Function:
1059 */
1060 void
1061 init_ast_check(
1062 __unused processor_t processor)
1063 {
1064 }
1065
1066 /*
1067 * Routine: cause_ast_check
1068 * Function:
1069 */
1070 void
1071 cause_ast_check(
1072 processor_t processor)
1073 {
1074 if (current_processor() != processor) {
1075 cpu_signal(processor_to_cpu_datap(processor), SIGPast, (void *)NULL, (void *)NULL);
1076 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), processor->cpu_id, 1 /* ast */, 0, 0, 0);
1077 }
1078 }
1079
1080 extern uint32_t cpu_idle_count;
1081
1082 void ml_get_power_state(boolean_t *icp, boolean_t *pidlep) {
1083 *icp = ml_at_interrupt_context();
1084 *pidlep = (cpu_idle_count == real_ncpus);
1085 }
1086
1087 /*
1088 * Routine: ml_cause_interrupt
1089 * Function: Generate a fake interrupt
1090 */
1091 void
1092 ml_cause_interrupt(void)
1093 {
1094 return; /* BS_XXX */
1095 }
1096
1097 /* Map memory map IO space */
1098 vm_offset_t
1099 ml_io_map(
1100 vm_offset_t phys_addr,
1101 vm_size_t size)
1102 {
1103 return (io_map(phys_addr, size, VM_WIMG_IO));
1104 }
1105
1106 vm_offset_t
1107 ml_io_map_wcomb(
1108 vm_offset_t phys_addr,
1109 vm_size_t size)
1110 {
1111 return (io_map(phys_addr, size, VM_WIMG_WCOMB));
1112 }
1113
1114 /* boot memory allocation */
1115 vm_offset_t
1116 ml_static_malloc(
1117 __unused vm_size_t size)
1118 {
1119 return ((vm_offset_t) NULL);
1120 }
1121
1122 vm_map_address_t
1123 ml_map_high_window(
1124 vm_offset_t phys_addr,
1125 vm_size_t len)
1126 {
1127 return pmap_map_high_window_bd(phys_addr, len, VM_PROT_READ | VM_PROT_WRITE);
1128 }
1129
1130 vm_offset_t
1131 ml_static_ptovirt(
1132 vm_offset_t paddr)
1133 {
1134 return phystokv(paddr);
1135 }
1136
1137 vm_offset_t
1138 ml_static_slide(
1139 vm_offset_t vaddr)
1140 {
1141 return phystokv(vaddr + vm_kernel_slide - gVirtBase + gPhysBase);
1142 }
1143
1144 vm_offset_t
1145 ml_static_unslide(
1146 vm_offset_t vaddr)
1147 {
1148 return (ml_static_vtop(vaddr) - gPhysBase + gVirtBase - vm_kernel_slide) ;
1149 }
1150
1151 extern tt_entry_t *arm_kva_to_tte(vm_offset_t va);
1152
1153 kern_return_t
1154 ml_static_protect(
1155 vm_offset_t vaddr, /* kernel virtual address */
1156 vm_size_t size,
1157 vm_prot_t new_prot)
1158 {
1159 pt_entry_t arm_prot = 0;
1160 pt_entry_t arm_block_prot = 0;
1161 vm_offset_t vaddr_cur;
1162 ppnum_t ppn;
1163 kern_return_t result = KERN_SUCCESS;
1164
1165 if (vaddr < VM_MIN_KERNEL_ADDRESS) {
1166 panic("ml_static_protect(): %p < %p", (void *) vaddr, (void *) VM_MIN_KERNEL_ADDRESS);
1167 return KERN_FAILURE;
1168 }
1169
1170 assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
1171
1172 if ((new_prot & VM_PROT_WRITE) && (new_prot & VM_PROT_EXECUTE)) {
1173 panic("ml_static_protect(): WX request on %p", (void *) vaddr);
1174 }
1175
1176 /* Set up the protection bits, and block bits so we can validate block mappings. */
1177 if (new_prot & VM_PROT_WRITE) {
1178 arm_prot |= ARM_PTE_AP(AP_RWNA);
1179 arm_block_prot |= ARM_TTE_BLOCK_AP(AP_RWNA);
1180 } else {
1181 arm_prot |= ARM_PTE_AP(AP_RONA);
1182 arm_block_prot |= ARM_TTE_BLOCK_AP(AP_RONA);
1183 }
1184
1185 arm_prot |= ARM_PTE_NX;
1186 arm_block_prot |= ARM_TTE_BLOCK_NX;
1187
1188 if (!(new_prot & VM_PROT_EXECUTE)) {
1189 arm_prot |= ARM_PTE_PNX;
1190 arm_block_prot |= ARM_TTE_BLOCK_PNX;
1191 }
1192
1193 for (vaddr_cur = vaddr;
1194 vaddr_cur < trunc_page_64(vaddr + size);
1195 vaddr_cur += PAGE_SIZE) {
1196 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
1197 if (ppn != (vm_offset_t) NULL) {
1198 tt_entry_t *tte2;
1199 pt_entry_t *pte_p;
1200 pt_entry_t ptmp;
1201
1202
1203 tte2 = arm_kva_to_tte(vaddr_cur);
1204
1205 if (((*tte2) & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE) {
1206 if ((((*tte2) & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) &&
1207 ((*tte2 & (ARM_TTE_BLOCK_NXMASK | ARM_TTE_BLOCK_PNXMASK | ARM_TTE_BLOCK_APMASK)) == arm_block_prot)) {
1208 /*
1209 * We can support ml_static_protect on a block mapping if the mapping already has
1210 * the desired protections. We still want to run checks on a per-page basis.
1211 */
1212 continue;
1213 }
1214
1215 result = KERN_FAILURE;
1216 break;
1217 }
1218
1219 pte_p = (pt_entry_t *)&((tt_entry_t*)(phystokv((*tte2) & ARM_TTE_TABLE_MASK)))[(((vaddr_cur) & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT)];
1220 ptmp = *pte_p;
1221
1222 if ((ptmp & ARM_PTE_HINT_MASK) && ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot)) {
1223 /*
1224 * The contiguous hint is similar to a block mapping for ml_static_protect; if the existing
1225 * protections do not match the desired protections, then we will fail (as we cannot update
1226 * this mapping without updating other mappings as well).
1227 */
1228 result = KERN_FAILURE;
1229 break;
1230 }
1231
1232 __unreachable_ok_push
1233 if (TEST_PAGE_RATIO_4) {
1234 {
1235 unsigned int i;
1236 pt_entry_t *ptep_iter;
1237
1238 ptep_iter = pte_p;
1239 for (i=0; i<4; i++, ptep_iter++) {
1240 /* Note that there is a hole in the HINT sanity checking here. */
1241 ptmp = *ptep_iter;
1242
1243 /* We only need to update the page tables if the protections do not match. */
1244 if ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot) {
1245 ptmp = (ptmp & ~(ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) | arm_prot;
1246 *ptep_iter = ptmp;
1247 }
1248 }
1249 }
1250 #ifndef __ARM_L1_PTW__
1251 FlushPoC_DcacheRegion( trunc_page_32(pte_p), 4*sizeof(*pte_p));
1252 #endif
1253 } else {
1254 ptmp = *pte_p;
1255
1256 /* We only need to update the page tables if the protections do not match. */
1257 if ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot) {
1258 ptmp = (ptmp & ~(ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) | arm_prot;
1259 *pte_p = ptmp;
1260 }
1261
1262 #ifndef __ARM_L1_PTW__
1263 FlushPoC_DcacheRegion( trunc_page_32(pte_p), sizeof(*pte_p));
1264 #endif
1265 }
1266 __unreachable_ok_pop
1267 }
1268 }
1269
1270 if (vaddr_cur > vaddr) {
1271 assert(((vaddr_cur - vaddr) & 0xFFFFFFFF00000000ULL) == 0);
1272 flush_mmu_tlb_region(vaddr, (uint32_t)(vaddr_cur - vaddr));
1273 }
1274
1275
1276 return result;
1277 }
1278
1279 /*
1280 * Routine: ml_static_mfree
1281 * Function:
1282 */
1283 void
1284 ml_static_mfree(
1285 vm_offset_t vaddr,
1286 vm_size_t size)
1287 {
1288 vm_offset_t vaddr_cur;
1289 ppnum_t ppn;
1290 uint32_t freed_pages = 0;
1291
1292 /* It is acceptable (if bad) to fail to free. */
1293 if (vaddr < VM_MIN_KERNEL_ADDRESS)
1294 return;
1295
1296 assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
1297
1298 for (vaddr_cur = vaddr;
1299 vaddr_cur < trunc_page_64(vaddr + size);
1300 vaddr_cur += PAGE_SIZE) {
1301
1302 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
1303 if (ppn != (vm_offset_t) NULL) {
1304 /*
1305 * It is not acceptable to fail to update the protections on a page
1306 * we will release to the VM. We need to either panic or continue.
1307 * For now, we'll panic (to help flag if there is memory we can
1308 * reclaim).
1309 */
1310 if (ml_static_protect(vaddr_cur, PAGE_SIZE, VM_PROT_WRITE | VM_PROT_READ) != KERN_SUCCESS) {
1311 panic("Failed ml_static_mfree on %p", (void *) vaddr_cur);
1312 }
1313
1314 #if 0
1315 /*
1316 * Must NOT tear down the "V==P" mapping for vaddr_cur as the zone alias scheme
1317 * relies on the persistence of these mappings for all time.
1318 */
1319 // pmap_remove(kernel_pmap, (addr64_t) vaddr_cur, (addr64_t) (vaddr_cur + PAGE_SIZE));
1320 #endif
1321
1322 vm_page_create(ppn, (ppn + 1));
1323 freed_pages++;
1324 }
1325 }
1326 vm_page_lockspin_queues();
1327 vm_page_wire_count -= freed_pages;
1328 vm_page_wire_count_initial -= freed_pages;
1329 vm_page_unlock_queues();
1330 #if DEBUG
1331 kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn);
1332 #endif
1333 }
1334
1335
1336 /* virtual to physical on wired pages */
1337 vm_offset_t
1338 ml_vtophys(vm_offset_t vaddr)
1339 {
1340 return kvtophys(vaddr);
1341 }
1342
1343 /*
1344 * Routine: ml_nofault_copy
1345 * Function: Perform a physical mode copy if the source and destination have
1346 * valid translations in the kernel pmap. If translations are present, they are
1347 * assumed to be wired; e.g., no attempt is made to guarantee that the
1348 * translations obtained remain valid for the duration of the copy process.
1349 */
1350 vm_size_t
1351 ml_nofault_copy(vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size)
1352 {
1353 addr64_t cur_phys_dst, cur_phys_src;
1354 vm_size_t count, nbytes = 0;
1355
1356 while (size > 0) {
1357 if (!(cur_phys_src = kvtophys(virtsrc)))
1358 break;
1359 if (!(cur_phys_dst = kvtophys(virtdst)))
1360 break;
1361 if (!pmap_valid_address(trunc_page_64(cur_phys_dst)) ||
1362 !pmap_valid_address(trunc_page_64(cur_phys_src)))
1363 break;
1364 count = PAGE_SIZE - (cur_phys_src & PAGE_MASK);
1365 if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK)))
1366 count = PAGE_SIZE - (cur_phys_dst & PAGE_MASK);
1367 if (count > size)
1368 count = size;
1369
1370 bcopy_phys(cur_phys_src, cur_phys_dst, count);
1371
1372 nbytes += count;
1373 virtsrc += count;
1374 virtdst += count;
1375 size -= count;
1376 }
1377
1378 return nbytes;
1379 }
1380
1381 /*
1382 * Routine: ml_validate_nofault
1383 * Function: Validate that ths address range has a valid translations
1384 * in the kernel pmap. If translations are present, they are
1385 * assumed to be wired; i.e. no attempt is made to guarantee
1386 * that the translation persist after the check.
1387 * Returns: TRUE if the range is mapped and will not cause a fault,
1388 * FALSE otherwise.
1389 */
1390
1391 boolean_t ml_validate_nofault(
1392 vm_offset_t virtsrc, vm_size_t size)
1393 {
1394 addr64_t cur_phys_src;
1395 uint32_t count;
1396
1397 while (size > 0) {
1398 if (!(cur_phys_src = kvtophys(virtsrc)))
1399 return FALSE;
1400 if (!pmap_valid_address(trunc_page_64(cur_phys_src)))
1401 return FALSE;
1402 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
1403 if (count > size)
1404 count = (uint32_t)size;
1405
1406 virtsrc += count;
1407 size -= count;
1408 }
1409
1410 return TRUE;
1411 }
1412
1413 void
1414 ml_get_bouncepool_info(vm_offset_t * phys_addr, vm_size_t * size)
1415 {
1416 *phys_addr = 0;
1417 *size = 0;
1418 }
1419
1420 void
1421 active_rt_threads(__unused boolean_t active)
1422 {
1423 }
1424
1425 static void cpu_qos_cb_default(__unused int urgency, __unused uint64_t qos_param1, __unused uint64_t qos_param2) {
1426 return;
1427 }
1428
1429 cpu_qos_update_t cpu_qos_update = cpu_qos_cb_default;
1430
1431 void cpu_qos_update_register(cpu_qos_update_t cpu_qos_cb) {
1432 if (cpu_qos_cb != NULL) {
1433 cpu_qos_update = cpu_qos_cb;
1434 } else {
1435 cpu_qos_update = cpu_qos_cb_default;
1436 }
1437 }
1438
1439 void
1440 thread_tell_urgency(int urgency, uint64_t rt_period, uint64_t rt_deadline, uint64_t sched_latency __unused, __unused thread_t nthread)
1441 {
1442 SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, sched_latency, 0);
1443
1444 cpu_qos_update(urgency, rt_period, rt_deadline);
1445
1446 SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0);
1447 }
1448
1449 void
1450 machine_run_count(__unused uint32_t count)
1451 {
1452 }
1453
1454 processor_t
1455 machine_choose_processor(__unused processor_set_t pset, processor_t processor)
1456 {
1457 return (processor);
1458 }
1459
1460 #if KASAN
1461 vm_offset_t ml_stack_base(void);
1462 vm_size_t ml_stack_size(void);
1463
1464 vm_offset_t
1465 ml_stack_base(void)
1466 {
1467 uintptr_t local = (uintptr_t) &local;
1468 vm_offset_t intstack_top_ptr;
1469
1470 intstack_top_ptr = getCpuDatap()->intstack_top;
1471 if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) {
1472 return intstack_top_ptr - INTSTACK_SIZE;
1473 } else {
1474 return current_thread()->kernel_stack;
1475 }
1476 }
1477 vm_size_t
1478 ml_stack_size(void)
1479 {
1480 uintptr_t local = (uintptr_t) &local;
1481 vm_offset_t intstack_top_ptr;
1482
1483 intstack_top_ptr = getCpuDatap()->intstack_top;
1484 if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) {
1485 return INTSTACK_SIZE;
1486 } else {
1487 return kernel_stack_size;
1488 }
1489 }
1490 #endif
1491
1492 boolean_t machine_timeout_suspended(void) {
1493 return FALSE;
1494 }
1495
1496 kern_return_t
1497 ml_interrupt_prewarm(__unused uint64_t deadline)
1498 {
1499 return KERN_FAILURE;
1500 }
1501
1502 /*
1503 * Assumes fiq, irq disabled.
1504 */
1505 void
1506 ml_set_decrementer(uint32_t dec_value)
1507 {
1508 cpu_data_t *cdp = getCpuDatap();
1509
1510 assert(ml_get_interrupts_enabled() == FALSE);
1511 cdp->cpu_decrementer = dec_value;
1512
1513 if (cdp->cpu_set_decrementer_func) {
1514 ((void (*)(uint32_t))cdp->cpu_set_decrementer_func)(dec_value);
1515 } else {
1516 __asm__ volatile("msr CNTP_TVAL_EL0, %0" : : "r"((uint64_t)dec_value));
1517 }
1518 }
1519
1520 uint64_t ml_get_hwclock()
1521 {
1522 uint64_t timebase;
1523
1524 // ISB required by ARMV7C.b section B8.1.2 & ARMv8 section D6.1.2
1525 // "Reads of CNTPCT[_EL0] can occur speculatively and out of order relative
1526 // to other instructions executed on the same processor."
1527 __asm__ volatile("isb\n"
1528 "mrs %0, CNTPCT_EL0"
1529 : "=r"(timebase));
1530
1531 return timebase;
1532 }
1533
1534 uint64_t
1535 ml_get_timebase()
1536 {
1537 return (ml_get_hwclock() + getCpuDatap()->cpu_base_timebase);
1538 }
1539
1540 uint32_t
1541 ml_get_decrementer()
1542 {
1543 cpu_data_t *cdp = getCpuDatap();
1544 uint32_t dec;
1545
1546 assert(ml_get_interrupts_enabled() == FALSE);
1547
1548 if (cdp->cpu_get_decrementer_func) {
1549 dec = ((uint32_t (*)(void))cdp->cpu_get_decrementer_func)();
1550 } else {
1551 uint64_t wide_val;
1552
1553 __asm__ volatile("mrs %0, CNTP_TVAL_EL0" : "=r"(wide_val));
1554 dec = (uint32_t)wide_val;
1555 assert(wide_val == (uint64_t)dec);
1556 }
1557
1558 return dec;
1559 }
1560
1561 boolean_t
1562 ml_get_timer_pending()
1563 {
1564 uint64_t cntp_ctl;
1565
1566 __asm__ volatile("mrs %0, CNTP_CTL_EL0" : "=r"(cntp_ctl));
1567 return ((cntp_ctl & CNTP_CTL_EL0_ISTATUS) != 0) ? TRUE : FALSE;
1568 }
1569
1570 boolean_t
1571 ml_wants_panic_trap_to_debugger(void)
1572 {
1573 boolean_t result = FALSE;
1574 return result;
1575 }
1576
1577 static void
1578 cache_trap_error(thread_t thread, vm_map_address_t fault_addr)
1579 {
1580 mach_exception_data_type_t exc_data[2];
1581 arm_saved_state_t *regs = get_user_regs(thread);
1582
1583 set_saved_state_far(regs, fault_addr);
1584
1585 exc_data[0] = KERN_INVALID_ADDRESS;
1586 exc_data[1] = fault_addr;
1587
1588 exception_triage(EXC_BAD_ACCESS, exc_data, 2);
1589 }
1590
1591 static void
1592 cache_trap_recover()
1593 {
1594 vm_map_address_t fault_addr;
1595
1596 __asm__ volatile("mrs %0, FAR_EL1" : "=r"(fault_addr));
1597
1598 cache_trap_error(current_thread(), fault_addr);
1599 }
1600
1601 static void
1602 dcache_flush_trap(vm_map_address_t start, vm_map_size_t size)
1603 {
1604 vm_map_address_t end = start + size;
1605 thread_t thread = current_thread();
1606 vm_offset_t old_recover = thread->recover;
1607
1608 /* Check bounds */
1609 if (task_has_64Bit_addr(current_task())) {
1610 if (end > MACH_VM_MAX_ADDRESS) {
1611 cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1612 }
1613 } else {
1614 if (end > VM_MAX_ADDRESS) {
1615 cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1616 }
1617 }
1618
1619 if (start > end) {
1620 cache_trap_error(thread, start & ((1 << ARM64_CLINE_SHIFT) - 1));
1621 }
1622
1623 /* Set recovery function */
1624 thread->recover = (vm_address_t)cache_trap_recover;
1625
1626 /*
1627 * We're coherent on Apple ARM64 CPUs, so this could be a nop. However,
1628 * if the region given us is bad, it would be good to catch it and
1629 * crash, ergo we still do the flush.
1630 */
1631 FlushPoC_DcacheRegion(start, (uint32_t)size);
1632
1633 /* Restore recovery function */
1634 thread->recover = old_recover;
1635
1636 /* Return (caller does exception return) */
1637 }
1638
1639 static void
1640 icache_invalidate_trap(vm_map_address_t start, vm_map_size_t size)
1641 {
1642 vm_map_address_t end = start + size;
1643 thread_t thread = current_thread();
1644 vm_offset_t old_recover = thread->recover;
1645
1646 /* Check bounds */
1647 if (task_has_64Bit_addr(current_task())) {
1648 if (end > MACH_VM_MAX_ADDRESS) {
1649 cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1650 }
1651 } else {
1652 if (end > VM_MAX_ADDRESS) {
1653 cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1654 }
1655 }
1656
1657 if (start > end) {
1658 cache_trap_error(thread, start & ((1 << ARM64_CLINE_SHIFT) - 1));
1659 }
1660
1661 /* Set recovery function */
1662 thread->recover = (vm_address_t)cache_trap_recover;
1663
1664 CleanPoU_DcacheRegion(start, (uint32_t) size);
1665
1666 /* Invalidate iCache to point of unification */
1667 #if __ARM_IC_NOALIAS_ICACHE__
1668 InvalidatePoU_IcacheRegion(start, (uint32_t)size);
1669 #else
1670 InvalidatePoU_Icache();
1671 #endif
1672
1673 /* Restore recovery function */
1674 thread->recover = old_recover;
1675
1676 /* Return (caller does exception return) */
1677 }
1678
1679 __attribute__((noreturn))
1680 void
1681 platform_syscall(arm_saved_state_t *state)
1682 {
1683 uint32_t code;
1684
1685 #define platform_syscall_kprintf(x...) /* kprintf("platform_syscall: " x) */
1686
1687 code = (uint32_t)get_saved_state_reg(state, 3);
1688 switch (code) {
1689 case 0:
1690 /* I-Cache flush */
1691 platform_syscall_kprintf("icache flush requested.\n");
1692 icache_invalidate_trap(get_saved_state_reg(state, 0), get_saved_state_reg(state, 1));
1693 break;
1694 case 1:
1695 /* D-Cache flush */
1696 platform_syscall_kprintf("dcache flush requested.\n");
1697 dcache_flush_trap(get_saved_state_reg(state, 0), get_saved_state_reg(state, 1));
1698 break;
1699 case 2:
1700 /* set cthread */
1701 platform_syscall_kprintf("set cthread self.\n");
1702 thread_set_cthread_self(get_saved_state_reg(state, 0));
1703 break;
1704 case 3:
1705 /* get cthread */
1706 platform_syscall_kprintf("get cthread self.\n");
1707 set_saved_state_reg(state, 0, thread_get_cthread_self());
1708 break;
1709 default:
1710 platform_syscall_kprintf("unknown: %d\n", code);
1711 break;
1712 }
1713
1714 thread_exception_return();
1715 }
1716
1717 static void
1718 _enable_timebase_event_stream(uint32_t bit_index)
1719 {
1720 uint64_t cntkctl; /* One wants to use 32 bits, but "mrs" prefers it this way */
1721
1722 if (bit_index >= 64) {
1723 panic("%s: invalid bit index (%u)", __FUNCTION__, bit_index);
1724 }
1725
1726 __asm__ volatile ("mrs %0, CNTKCTL_EL1" : "=r"(cntkctl));
1727
1728 cntkctl |= (bit_index << CNTKCTL_EL1_EVENTI_SHIFT);
1729 cntkctl |= CNTKCTL_EL1_EVNTEN;
1730 cntkctl |= CNTKCTL_EL1_EVENTDIR; /* 1->0; why not? */
1731
1732 /*
1733 * If the SOC supports it (and it isn't broken), enable
1734 * EL0 access to the physical timebase register.
1735 */
1736 if (user_timebase_allowed()) {
1737 cntkctl |= CNTKCTL_EL1_PL0PCTEN;
1738 }
1739
1740 __asm__ volatile ("msr CNTKCTL_EL1, %0" : : "r"(cntkctl));
1741 }
1742
1743 /*
1744 * Turn timer on, unmask that interrupt.
1745 */
1746 static void
1747 _enable_virtual_timer(void)
1748 {
1749 uint64_t cntvctl = CNTP_CTL_EL0_ENABLE; /* One wants to use 32 bits, but "mrs" prefers it this way */
1750
1751 __asm__ volatile ("msr CNTP_CTL_EL0, %0" : : "r"(cntvctl));
1752 }
1753
1754 void
1755 fiq_context_init(boolean_t enable_fiq __unused)
1756 {
1757 _enable_timebase_event_stream(fiq_eventi);
1758
1759 /* Interrupts still disabled. */
1760 assert(ml_get_interrupts_enabled() == FALSE);
1761 _enable_virtual_timer();
1762 }
1763
1764 void
1765 fiq_context_bootstrap(boolean_t enable_fiq)
1766 {
1767 #if defined(APPLE_ARM64_ARCH_FAMILY) || defined(BCM2837)
1768 /* Could fill in our own ops here, if we needed them */
1769 uint64_t ticks_per_sec, ticks_per_event, events_per_sec;
1770 uint32_t bit_index;
1771
1772 ticks_per_sec = gPEClockFrequencyInfo.timebase_frequency_hz;
1773 #if defined(ARM_BOARD_WFE_TIMEOUT_NS)
1774 events_per_sec = 1000000000 / ARM_BOARD_WFE_TIMEOUT_NS;
1775 #else
1776 /* Default to 1usec (or as close as we can get) */
1777 events_per_sec = 1000000;
1778 #endif
1779 ticks_per_event = ticks_per_sec / events_per_sec;
1780 bit_index = flsll(ticks_per_event) - 1; /* Highest bit set */
1781
1782 /* Round up to power of two */
1783 if ((ticks_per_event & ((1 << bit_index) - 1)) != 0)
1784 bit_index++;
1785
1786 /*
1787 * The timer can only trigger on rising or falling edge,
1788 * not both; we don't care which we trigger on, but we
1789 * do need to adjust which bit we are interested in to
1790 * account for this.
1791 */
1792 if (bit_index != 0)
1793 bit_index--;
1794
1795 fiq_eventi = bit_index;
1796 #else
1797 #error Need a board configuration.
1798 #endif
1799 fiq_context_init(enable_fiq);
1800 }
1801
1802 boolean_t
1803 ml_delay_should_spin(uint64_t interval)
1804 {
1805 cpu_data_t *cdp = getCpuDatap();
1806
1807 if (cdp->cpu_idle_latency) {
1808 return (interval < cdp->cpu_idle_latency) ? TRUE : FALSE;
1809 } else {
1810 /*
1811 * Early boot, latency is unknown. Err on the side of blocking,
1812 * which should always be safe, even if slow
1813 */
1814 return FALSE;
1815 }
1816 }
1817
1818 void
1819 ml_delay_on_yield(void)
1820 {
1821 }
1822
1823 boolean_t ml_thread_is64bit(thread_t thread) {
1824 return (thread_is_64bit_addr(thread));
1825 }
1826
1827 void ml_timer_evaluate(void) {
1828 }
1829
1830 boolean_t
1831 ml_timer_forced_evaluation(void) {
1832 return FALSE;
1833 }
1834
1835 uint64_t
1836 ml_energy_stat(thread_t t) {
1837 return t->machine.energy_estimate_nj;
1838 }
1839
1840
1841 void
1842 ml_gpu_stat_update(__unused uint64_t gpu_ns_delta) {
1843 #if CONFIG_EMBEDDED
1844 /*
1845 * For now: update the resource coalition stats of the
1846 * current thread's coalition
1847 */
1848 task_coalition_update_gpu_stats(current_task(), gpu_ns_delta);
1849 #endif
1850 }
1851
1852 uint64_t
1853 ml_gpu_stat(__unused thread_t t) {
1854 return 0;
1855 }
1856
1857 #if !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME
1858 static void
1859 timer_state_event(boolean_t switch_to_kernel)
1860 {
1861 thread_t thread = current_thread();
1862 if (!thread->precise_user_kernel_time) return;
1863
1864 processor_data_t *pd = &getCpuDatap()->cpu_processor->processor_data;
1865 uint64_t now = ml_get_timebase();
1866
1867 timer_stop(pd->current_state, now);
1868 pd->current_state = (switch_to_kernel) ? &pd->system_state : &pd->user_state;
1869 timer_start(pd->current_state, now);
1870
1871 timer_stop(pd->thread_timer, now);
1872 pd->thread_timer = (switch_to_kernel) ? &thread->system_timer : &thread->user_timer;
1873 timer_start(pd->thread_timer, now);
1874 }
1875
1876 void
1877 timer_state_event_user_to_kernel(void)
1878 {
1879 timer_state_event(TRUE);
1880 }
1881
1882 void
1883 timer_state_event_kernel_to_user(void)
1884 {
1885 timer_state_event(FALSE);
1886 }
1887 #endif /* !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME */
1888
1889 /*
1890 * The following are required for parts of the kernel
1891 * that cannot resolve these functions as inlines:
1892 */
1893 extern thread_t current_act(void);
1894 thread_t
1895 current_act(void)
1896 {
1897 return current_thread_fast();
1898 }
1899
1900 #undef current_thread
1901 extern thread_t current_thread(void);
1902 thread_t
1903 current_thread(void)
1904 {
1905 return current_thread_fast();
1906 }
1907
1908 typedef struct
1909 {
1910 ex_cb_t cb;
1911 void *refcon;
1912 }
1913 ex_cb_info_t;
1914
1915 ex_cb_info_t ex_cb_info[EXCB_CLASS_MAX];
1916
1917 /*
1918 * Callback registration
1919 * Currently we support only one registered callback per class but
1920 * it should be possible to support more callbacks
1921 */
1922 kern_return_t ex_cb_register(
1923 ex_cb_class_t cb_class,
1924 ex_cb_t cb,
1925 void *refcon)
1926 {
1927 ex_cb_info_t *pInfo = &ex_cb_info[cb_class];
1928
1929 if ((NULL == cb) || (cb_class >= EXCB_CLASS_MAX))
1930 {
1931 return KERN_INVALID_VALUE;
1932 }
1933
1934 if (NULL == pInfo->cb)
1935 {
1936 pInfo->cb = cb;
1937 pInfo->refcon = refcon;
1938 return KERN_SUCCESS;
1939 }
1940 return KERN_FAILURE;
1941 }
1942
1943 /*
1944 * Called internally by platform kernel to invoke the registered callback for class
1945 */
1946 ex_cb_action_t ex_cb_invoke(
1947 ex_cb_class_t cb_class,
1948 vm_offset_t far)
1949 {
1950 ex_cb_info_t *pInfo = &ex_cb_info[cb_class];
1951 ex_cb_state_t state = {far};
1952
1953 if (cb_class >= EXCB_CLASS_MAX)
1954 {
1955 panic("Invalid exception callback class 0x%x\n", cb_class);
1956 }
1957
1958 if (pInfo->cb)
1959 {
1960 return pInfo->cb(cb_class, pInfo->refcon, &state);
1961 }
1962 return EXCB_ACTION_NONE;
1963 }
1964