osfmk/arm64/machine_routines.c

   1 /*
   2  * Copyright (c) 2007-2017 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <arm64/proc_reg.h>
  30 #include <arm/machine_cpu.h>
  31 #include <arm/cpu_internal.h>
  32 #include <arm/cpuid.h>
  33 #include <arm/io_map_entries.h>
  34 #include <arm/cpu_data.h>
  35 #include <arm/cpu_data_internal.h>
  36 #include <arm/caches_internal.h>
  37 #include <arm/misc_protos.h>
  38 #include <arm/machdep_call.h>
  39 #include <arm/machine_routines.h>
  40 #include <arm/rtclock.h>
  41 #include <arm/cpuid_internal.h>
  42 #include <arm/cpu_capabilities.h>
  43 #include <console/serial_protos.h>
  44 #include <kern/machine.h>
  45 #include <prng/random.h>
  46 #include <kern/startup.h>
  47 #include <kern/thread.h>
  48 #include <kern/timer_queue.h>
  49 #include <mach/machine.h>
  50 #include <machine/atomic.h>
  51 #include <vm/pmap.h>
  52 #include <vm/vm_page.h>
  53 #include <sys/kdebug.h>
  54 #include <kern/coalition.h>
  55 #include <pexpert/device_tree.h>
  56
  57 #include <IOKit/IOPlatformExpert.h>
  58
  59 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
  60 #include <libkern/kernel_mach_header.h>
  61 #endif
  62
  63 #include <libkern/section_keywords.h>
  64
  65 #if KPC
  66 #include <kern/kpc.h>
  67 #endif
  68
  69 #if HAS_CLUSTER
  70 static uint8_t cluster_initialized = 0;
  71 #endif
  72
  73
  74 static int max_cpus_initialized = 0;
  75 #define MAX_CPUS_SET    0x1
  76 #define MAX_CPUS_WAIT   0x2
  77
  78 uint32_t LockTimeOut;
  79 uint32_t LockTimeOutUsec;
  80 uint64_t TLockTimeOut;
  81 uint64_t MutexSpin;
  82 uint64_t low_MutexSpin;
  83 int64_t high_MutexSpin;
  84
  85 boolean_t is_clock_configured = FALSE;
  86
  87 uint32_t yield_delay_us = 0; /* Must be less than cpu_idle_latency to ensure ml_delay_should_spin is true */
  88
  89 #if CONFIG_NONFATAL_ASSERTS
  90 extern int mach_assert;
  91 #endif
  92 extern volatile uint32_t debug_enabled;
  93
  94 extern vm_offset_t   segLOWEST;
  95 extern vm_offset_t   segLOWESTTEXT;
  96 extern vm_offset_t   segLASTB;
  97 extern unsigned long segSizeLAST;
  98
  99 #if defined(HAS_IPI)
 100 unsigned int gFastIPI = 1;
 101 #define kDeferredIPITimerDefault (64 * NSEC_PER_USEC) /* in nanoseconds */
 102 static uint64_t deferred_ipi_timer_ns = kDeferredIPITimerDefault;
 103 #endif /* defined(HAS_IPI) */
 104
 105 void machine_conf(void);
 106
 107 thread_t Idle_context(void);
 108
 109 SECURITY_READ_ONLY_LATE(static uint32_t) cpu_phys_ids[MAX_CPUS] = {[0 ... MAX_CPUS - 1] = (uint32_t)-1};
 110 SECURITY_READ_ONLY_LATE(static unsigned int) avail_cpus = 0;
 111 SECURITY_READ_ONLY_LATE(static int) boot_cpu = -1;
 112 SECURITY_READ_ONLY_LATE(static int) max_cpu_number = 0;
 113 SECURITY_READ_ONLY_LATE(cluster_type_t) boot_cluster = CLUSTER_TYPE_SMP;
 114
 115 SECURITY_READ_ONLY_LATE(static uint32_t) fiq_eventi = UINT32_MAX;
 116
 117 lockdown_handler_t lockdown_handler;
 118 void *lockdown_this;
 119 lck_mtx_t lockdown_handler_lck;
 120 lck_grp_t *lockdown_handler_grp;
 121 uint32_t lockdown_done;
 122
 123 void ml_lockdown_init(void);
 124 void ml_lockdown_run_handler(void);
 125 uint32_t get_arm_cpu_version(void);
 126
 127 #if defined(HAS_IPI)
 128 static inline void
 129 ml_cpu_signal_type(unsigned int cpu_mpidr, uint32_t type)
 130 {
 131 #if HAS_CLUSTER
 132         uint64_t local_mpidr;
 133         /* NOTE: this logic expects that we are called in a non-preemptible
 134          * context, or at least one in which the calling thread is bound
 135          * to a single CPU.  Otherwise we may migrate between choosing which
 136          * IPI mechanism to use and issuing the IPI. */
 137         MRS(local_mpidr, "MPIDR_EL1");
 138         if ((local_mpidr & MPIDR_AFF1_MASK) == (cpu_mpidr & MPIDR_AFF1_MASK)) {
 139                 uint64_t x = type | (cpu_mpidr & MPIDR_AFF0_MASK);
 140                 MSR(ARM64_REG_IPI_RR_LOCAL, x);
 141         } else {
 142                 #define IPI_RR_TARGET_CLUSTER_SHIFT 16
 143                 uint64_t x = type | ((cpu_mpidr & MPIDR_AFF1_MASK) << (IPI_RR_TARGET_CLUSTER_SHIFT - MPIDR_AFF1_SHIFT)) | (cpu_mpidr & MPIDR_AFF0_MASK);
 144                 MSR(ARM64_REG_IPI_RR_GLOBAL, x);
 145         }
 146 #else
 147         uint64_t x = type | (cpu_mpidr & MPIDR_AFF0_MASK);
 148         MSR(ARM64_REG_IPI_RR, x);
 149 #endif
 150 }
 151 #endif
 152
 153 #if !defined(HAS_IPI)
 154 __dead2
 155 #endif
 156 void
 157 ml_cpu_signal(unsigned int cpu_mpidr __unused)
 158 {
 159 #if defined(HAS_IPI)
 160         ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_IMMEDIATE);
 161 #else
 162         panic("Platform does not support ACC Fast IPI");
 163 #endif
 164 }
 165
 166 #if !defined(HAS_IPI)
 167 __dead2
 168 #endif
 169 void
 170 ml_cpu_signal_deferred_adjust_timer(uint64_t nanosecs)
 171 {
 172 #if defined(HAS_IPI)
 173         /* adjust IPI_CR timer countdown value for deferred IPI
 174          * accepts input in nanosecs, convert to absolutetime (REFCLK ticks),
 175          * clamp maximum REFCLK ticks to 0xFFFF (16 bit field)
 176          *
 177          * global register, should only require a single write to update all
 178          * CPU cores: from Skye ACC user spec section 5.7.3.3
 179          *
 180          * IPICR is a global register but there are two copies in ACC: one at pBLK and one at eBLK.
 181          * IPICR write SPR token also traverses both pCPM and eCPM rings and updates both copies.
 182          */
 183         uint64_t abstime;
 184
 185         nanoseconds_to_absolutetime(nanosecs, &abstime);
 186
 187         abstime = MIN(abstime, 0xFFFF);
 188
 189         /* update deferred_ipi_timer_ns with the new clamped value */
 190         absolutetime_to_nanoseconds(abstime, &deferred_ipi_timer_ns);
 191
 192         MSR(ARM64_REG_IPI_CR, abstime);
 193 #else
 194         (void)nanosecs;
 195         panic("Platform does not support ACC Fast IPI");
 196 #endif
 197 }
 198
 199 uint64_t
 200 ml_cpu_signal_deferred_get_timer()
 201 {
 202 #if defined(HAS_IPI)
 203         return deferred_ipi_timer_ns;
 204 #else
 205         return 0;
 206 #endif
 207 }
 208
 209 #if !defined(HAS_IPI)
 210 __dead2
 211 #endif
 212 void
 213 ml_cpu_signal_deferred(unsigned int cpu_mpidr __unused)
 214 {
 215 #if defined(HAS_IPI)
 216         ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_DEFERRED);
 217 #else
 218         panic("Platform does not support ACC Fast IPI deferral");
 219 #endif
 220 }
 221
 222 #if !defined(HAS_IPI)
 223 __dead2
 224 #endif
 225 void
 226 ml_cpu_signal_retract(unsigned int cpu_mpidr __unused)
 227 {
 228 #if defined(HAS_IPI)
 229         ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_RETRACT);
 230 #else
 231         panic("Platform does not support ACC Fast IPI retraction");
 232 #endif
 233 }
 234
 235 void
 236 machine_idle(void)
 237 {
 238         __builtin_arm_wsr("DAIFSet", (DAIFSC_IRQF | DAIFSC_FIQF));
 239         Idle_context();
 240         __builtin_arm_wsr("DAIFClr", (DAIFSC_IRQF | DAIFSC_FIQF));
 241 }
 242
 243 void
 244 init_vfp(void)
 245 {
 246         return;
 247 }
 248
 249 boolean_t
 250 get_vfp_enabled(void)
 251 {
 252         return TRUE;
 253 }
 254
 255 void
 256 OSSynchronizeIO(void)
 257 {
 258         __builtin_arm_dsb(DSB_SY);
 259 }
 260
 261 uint64_t
 262 get_aux_control(void)
 263 {
 264         uint64_t        value;
 265
 266         MRS(value, "ACTLR_EL1");
 267         return value;
 268 }
 269
 270 uint64_t
 271 get_mmu_control(void)
 272 {
 273         uint64_t        value;
 274
 275         MRS(value, "SCTLR_EL1");
 276         return value;
 277 }
 278
 279 uint64_t
 280 get_tcr(void)
 281 {
 282         uint64_t        value;
 283
 284         MRS(value, "TCR_EL1");
 285         return value;
 286 }
 287
 288 boolean_t
 289 ml_get_interrupts_enabled(void)
 290 {
 291         uint64_t        value;
 292
 293         MRS(value, "DAIF");
 294         if (value & DAIF_IRQF) {
 295                 return FALSE;
 296         }
 297         return TRUE;
 298 }
 299
 300 pmap_paddr_t
 301 get_mmu_ttb(void)
 302 {
 303         pmap_paddr_t    value;
 304
 305         MRS(value, "TTBR0_EL1");
 306         return value;
 307 }
 308
 309 uint32_t
 310 get_arm_cpu_version(void)
 311 {
 312         uint32_t value = machine_read_midr();
 313
 314         /* Compose the register values into 8 bits; variant[7:4], revision[3:0]. */
 315         return ((value & MIDR_EL1_REV_MASK) >> MIDR_EL1_REV_SHIFT) | ((value & MIDR_EL1_VAR_MASK) >> (MIDR_EL1_VAR_SHIFT - 4));
 316 }
 317
 318 /*
 319  * user_cont_hwclock_allowed()
 320  *
 321  * Indicates whether we allow EL0 to read the physical timebase (CNTPCT_EL0)
 322  * as a continuous time source (e.g. from mach_continuous_time)
 323  */
 324 boolean_t
 325 user_cont_hwclock_allowed(void)
 326 {
 327 #if HAS_CONTINUOUS_HWCLOCK
 328         return TRUE;
 329 #else
 330         return FALSE;
 331 #endif
 332 }
 333
 334
 335 uint8_t
 336 user_timebase_type(void)
 337 {
 338         return USER_TIMEBASE_SPEC;
 339 }
 340
 341 boolean_t
 342 arm64_wfe_allowed(void)
 343 {
 344         return TRUE;
 345 }
 346
 347 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
 348
 349 uint64_t rorgn_begin __attribute__((section("__DATA, __const"))) = 0;
 350 uint64_t rorgn_end   __attribute__((section("__DATA, __const"))) = 0;
 351 vm_offset_t amcc_base;
 352
 353 static void assert_unlocked(void);
 354 static void assert_amcc_cache_disabled(void);
 355 static void lock_amcc(void);
 356 static void lock_mmu(uint64_t begin, uint64_t end);
 357
 358 void
 359 rorgn_stash_range(void)
 360 {
 361 #if DEVELOPMENT || DEBUG
 362         boolean_t rorgn_disable = FALSE;
 363
 364         PE_parse_boot_argn("-unsafe_kernel_text", &rorgn_disable, sizeof(rorgn_disable));
 365
 366         if (rorgn_disable) {
 367                 /* take early out if boot arg present, don't query any machine registers to avoid
 368                  * dependency on amcc DT entry
 369                  */
 370                 return;
 371         }
 372 #endif
 373
 374         /* Get the AMC values, and stash them into rorgn_begin, rorgn_end.
 375          * gPhysBase is the base of DRAM managed by xnu. we need DRAM_BASE as
 376          * the AMCC RO region begin/end registers are in units of 16KB page
 377          * numbers from DRAM_BASE so we'll truncate gPhysBase at 512MB granule
 378          * and assert the value is the canonical DRAM_BASE PA of 0x8_0000_0000 for arm64.
 379          */
 380
 381         uint64_t dram_base = gPhysBase & ~0x1FFFFFFFULL;  /* 512MB */
 382         assert(dram_base == 0x800000000ULL);
 383
 384 #if defined(KERNEL_INTEGRITY_KTRR)
 385         uint64_t soc_base = 0;
 386         DTEntry entryP = NULL;
 387         uintptr_t *reg_prop = NULL;
 388         uint32_t prop_size = 0;
 389         int rc;
 390
 391         soc_base = pe_arm_get_soc_base_phys();
 392         rc = DTFindEntry("name", "mcc", &entryP);
 393         assert(rc == kSuccess);
 394         rc = DTGetProperty(entryP, "reg", (void **)&reg_prop, &prop_size);
 395         assert(rc == kSuccess);
 396         amcc_base = ml_io_map(soc_base + *reg_prop, *(reg_prop + 1));
 397 #elif defined(KERNEL_INTEGRITY_CTRR)
 398         /* TODO: t8020 mcc entry not in device tree yet; we'll do it LIVE */
 399 #define TEMP_AMCC_BASE_PA 0x200000000ULL
 400 #define TEMP_AMCC_SZ      0x100000
 401         amcc_base = ml_io_map(TEMP_AMCC_BASE_PA, TEMP_AMCC_SZ);
 402 #else
 403 #error "KERNEL_INTEGRITY config error"
 404 #endif
 405
 406 #if defined(KERNEL_INTEGRITY_KTRR)
 407         assert(rRORGNENDADDR > rRORGNBASEADDR);
 408         rorgn_begin = (rRORGNBASEADDR << AMCC_PGSHIFT) + dram_base;
 409         rorgn_end   = (rRORGNENDADDR << AMCC_PGSHIFT) + dram_base;
 410 #elif defined(KERNEL_INTEGRITY_CTRR)
 411         rorgn_begin = rCTRR_AMCC_PLANE_REG(0, CTRR_A_BASEADDR);
 412         rorgn_end   = rCTRR_AMCC_PLANE_REG(0, CTRR_A_ENDADDR);
 413         assert(rorgn_end > rorgn_begin);
 414
 415         for (int i = 0; i < CTRR_AMCC_MAX_PLANES; ++i) {
 416                 uint32_t begin = rCTRR_AMCC_PLANE_REG(i, CTRR_A_BASEADDR);
 417                 uint32_t end = rCTRR_AMCC_PLANE_REG(i, CTRR_A_ENDADDR);
 418                 if (!(begin == rorgn_begin && end == rorgn_end)) {
 419 #if DEVELOPMENT || DEBUG
 420                         panic("iboot programmed CTRR bounds are inconsistent");
 421 #else
 422                         panic("Inconsistent memory configuration");
 423 #endif
 424                 }
 425         }
 426
 427         // convert from page number from DRAM base to PA
 428         rorgn_begin = (rorgn_begin << AMCC_PGSHIFT) + dram_base;
 429         rorgn_end   = (rorgn_end << AMCC_PGSHIFT) + dram_base;
 430
 431 #else
 432 #error KERNEL_INTEGRITY config error
 433 #endif /* defined (KERNEL_INTEGRITY_KTRR) */
 434 }
 435
 436 static void
 437 assert_unlocked()
 438 {
 439         uint64_t ktrr_lock = 0;
 440         uint32_t rorgn_lock = 0;
 441
 442         assert(amcc_base);
 443 #if defined(KERNEL_INTEGRITY_KTRR)
 444         rorgn_lock = rRORGNLOCK;
 445         ktrr_lock = __builtin_arm_rsr64(ARM64_REG_KTRR_LOCK_EL1);
 446 #elif defined(KERNEL_INTEGRITY_CTRR)
 447         for (int i = 0; i < CTRR_AMCC_MAX_PLANES; ++i) {
 448                 rorgn_lock |= rCTRR_AMCC_PLANE_REG(i, CTRR_A_LOCK);
 449         }
 450         ktrr_lock = __builtin_arm_rsr64(ARM64_REG_CTRR_LOCK_EL1);
 451 #else
 452 #error KERNEL_INTEGRITY config error
 453 #endif /* defined(KERNEL_INTEGRITY_KTRR) */
 454
 455         assert(!ktrr_lock);
 456         assert(!rorgn_lock);
 457 }
 458
 459 static void
 460 lock_amcc()
 461 {
 462 #if defined(KERNEL_INTEGRITY_KTRR)
 463         rRORGNLOCK = 1;
 464         __builtin_arm_isb(ISB_SY);
 465 #elif defined(KERNEL_INTEGRITY_CTRR)
 466         /* lockdown planes in reverse order as plane 0 should be locked last */
 467         for (int i = 0; i < CTRR_AMCC_MAX_PLANES; ++i) {
 468                 rCTRR_AMCC_PLANE_REG(CTRR_AMCC_MAX_PLANES - i - 1, CTRR_A_ENABLE) = 1;
 469                 rCTRR_AMCC_PLANE_REG(CTRR_AMCC_MAX_PLANES - i - 1, CTRR_A_LOCK) = 1;
 470                 __builtin_arm_isb(ISB_SY);
 471         }
 472 #else
 473 #error KERNEL_INTEGRITY config error
 474 #endif
 475 }
 476
 477 static void
 478 lock_mmu(uint64_t begin, uint64_t end)
 479 {
 480 #if defined(KERNEL_INTEGRITY_KTRR)
 481
 482         __builtin_arm_wsr64(ARM64_REG_KTRR_LOWER_EL1, begin);
 483         __builtin_arm_wsr64(ARM64_REG_KTRR_UPPER_EL1, end);
 484         __builtin_arm_wsr64(ARM64_REG_KTRR_LOCK_EL1, 1ULL);
 485
 486         /* flush TLB */
 487
 488         __builtin_arm_isb(ISB_SY);
 489         flush_mmu_tlb();
 490
 491 #elif defined (KERNEL_INTEGRITY_CTRR)
 492         /* this will lock the entire bootstrap cluster. non bootstrap clusters
 493          * will be locked by respective cluster master in start.s */
 494
 495         __builtin_arm_wsr64(ARM64_REG_CTRR_A_LWR_EL1, begin);
 496         __builtin_arm_wsr64(ARM64_REG_CTRR_A_UPR_EL1, end);
 497
 498 #if !defined(APPLEVORTEX)
 499         /* H12 changed sequence, must invalidate TLB immediately after setting CTRR bounds */
 500         __builtin_arm_isb(ISB_SY); /* ensure all prior MSRs are complete */
 501         flush_mmu_tlb();
 502 #endif /* !defined(APPLEVORTEX) */
 503
 504         __builtin_arm_wsr64(ARM64_REG_CTRR_CTL_EL1, CTRR_CTL_EL1_A_PXN | CTRR_CTL_EL1_A_MMUON_WRPROTECT);
 505         __builtin_arm_wsr64(ARM64_REG_CTRR_LOCK_EL1, 1ULL);
 506
 507         uint64_t current_el = __builtin_arm_rsr64("CurrentEL");
 508         if (current_el == PSR64_MODE_EL2) {
 509                 // CTRR v2 has explicit registers for cluster config. they can only be written in EL2
 510
 511                 __builtin_arm_wsr64(ACC_CTRR_A_LWR_EL2, begin);
 512                 __builtin_arm_wsr64(ACC_CTRR_A_UPR_EL2, end);
 513                 __builtin_arm_wsr64(ACC_CTRR_CTL_EL2, CTRR_CTL_EL1_A_PXN | CTRR_CTL_EL1_A_MMUON_WRPROTECT);
 514                 __builtin_arm_wsr64(ACC_CTRR_LOCK_EL2, 1ULL);
 515         }
 516
 517         __builtin_arm_isb(ISB_SY); /* ensure all prior MSRs are complete */
 518 #if defined(APPLEVORTEX)
 519         flush_mmu_tlb();
 520 #endif /* defined(APPLEVORTEX) */
 521
 522 #else /* defined(KERNEL_INTEGRITY_KTRR) */
 523 #error KERNEL_INTEGRITY config error
 524 #endif /* defined(KERNEL_INTEGRITY_KTRR) */
 525 }
 526
 527 static void
 528 assert_amcc_cache_disabled()
 529 {
 530 #if defined(KERNEL_INTEGRITY_KTRR)
 531         assert((rMCCGEN & 1) == 0); /* assert M$ disabled or LLC clean will be unreliable */
 532 #elif defined(KERNEL_INTEGRITY_CTRR) && (defined(ARM64_BOARD_CONFIG_T8006))
 533         /*
 534          * T8006 differentiates between data and tag ways being powered up, so
 535          * make sure to check that both are zero on its single memory plane.
 536          */
 537         assert((rCTRR_AMCC_PLANE_REG(0, CTRR_AMCC_PWRONWAYCNTSTATUS) &
 538             (AMCC_CURTAGWAYCNT_MASK | AMCC_CURDATWAYCNT_MASK)) == 0);
 539 #elif defined (KERNEL_INTEGRITY_CTRR)
 540         for (int i = 0; i < CTRR_AMCC_MAX_PLANES; ++i) {
 541                 assert(rCTRR_AMCC_PLANE_REG(i, CTRR_AMCC_WAYONCNT) == 0);
 542         }
 543 #else
 544 #error KERNEL_INTEGRITY config error
 545 #endif
 546 }
 547
 548 /*
 549  * void rorgn_lockdown(void)
 550  *
 551  * Lock the MMU and AMCC RORegion within lower and upper boundaries if not already locked
 552  *
 553  * [ ] - ensure this is being called ASAP on secondary CPUs: KTRR programming and lockdown handled in
 554  *       start.s:start_cpu() for subsequent wake/resume of all cores
 555  */
 556 void
 557 rorgn_lockdown(void)
 558 {
 559         vm_offset_t ktrr_begin, ktrr_end;
 560         unsigned long last_segsz;
 561
 562 #if DEVELOPMENT || DEBUG
 563         boolean_t ktrr_disable = FALSE;
 564
 565         PE_parse_boot_argn("-unsafe_kernel_text", &ktrr_disable, sizeof(ktrr_disable));
 566
 567         if (ktrr_disable) {
 568                 /*
 569                  * take early out if boot arg present, since we may not have amcc DT entry present
 570                  * we can't assert that iboot hasn't programmed the RO region lockdown registers
 571                  */
 572                 goto out;
 573         }
 574 #endif /* DEVELOPMENT || DEBUG */
 575
 576         assert_unlocked();
 577
 578         /* [x] - Use final method of determining all kernel text range or expect crashes */
 579         ktrr_begin = segLOWEST;
 580         assert(ktrr_begin && gVirtBase && gPhysBase);
 581
 582         ktrr_begin = kvtophys(ktrr_begin);
 583
 584         ktrr_end   = kvtophys(segLASTB);
 585         last_segsz = segSizeLAST;
 586 #if defined(KERNEL_INTEGRITY_KTRR)
 587         /* __LAST is not part of the MMU KTRR region (it is however part of the AMCC KTRR region) */
 588         ktrr_end = (ktrr_end - 1) & ~AMCC_PGMASK;
 589         /* ensure that iboot and xnu agree on the ktrr range */
 590         assert(rorgn_begin == ktrr_begin && rorgn_end == (ktrr_end + last_segsz));
 591         /* assert that __LAST segment containing privileged insns is only a single page */
 592         assert(last_segsz == PAGE_SIZE);
 593 #elif defined(KERNEL_INTEGRITY_CTRR)
 594         ktrr_end = (ktrr_end + last_segsz - 1) & ~AMCC_PGMASK;
 595         /* __LAST is part of MMU CTRR region. Can't use the KTRR style method of making
 596          * __pinst no execute because PXN applies with MMU off in CTRR. */
 597         assert(rorgn_begin == ktrr_begin && rorgn_end == ktrr_end);
 598 #endif
 599
 600
 601 #if DEBUG || DEVELOPMENT
 602         printf("KTRR Begin: %p End: %p, setting lockdown\n", (void *)ktrr_begin, (void *)ktrr_end);
 603 #endif
 604
 605         /* [x] - ensure all in flight writes are flushed to AMCC before enabling RO Region Lock */
 606
 607         assert_amcc_cache_disabled();
 608
 609         CleanPoC_DcacheRegion_Force(phystokv(ktrr_begin),
 610             (unsigned)((ktrr_end + last_segsz) - ktrr_begin + AMCC_PGMASK));
 611
 612         lock_amcc();
 613
 614         lock_mmu(ktrr_begin, ktrr_end);
 615
 616 #if DEVELOPMENT || DEBUG
 617 out:
 618 #endif
 619
 620 #if defined(KERNEL_INTEGRITY_CTRR)
 621         {
 622                 /* wake any threads blocked on cluster master lockdown */
 623                 cpu_data_t *cdp;
 624                 uint64_t mpidr_el1_value;
 625
 626                 cdp = getCpuDatap();
 627                 MRS(mpidr_el1_value, "MPIDR_EL1");
 628                 cdp->cpu_cluster_id = (mpidr_el1_value & MPIDR_AFF1_MASK) >> MPIDR_AFF1_SHIFT;
 629                 assert(cdp->cpu_cluster_id < __ARM_CLUSTER_COUNT__);
 630                 ctrr_cluster_locked[cdp->cpu_cluster_id] = 1;
 631                 thread_wakeup(&ctrr_cluster_locked[cdp->cpu_cluster_id]);
 632         }
 633 #endif
 634         /* now we can run lockdown handler */
 635         ml_lockdown_run_handler();
 636 }
 637
 638 #endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
 639
 640 void
 641 machine_startup(__unused boot_args * args)
 642 {
 643         int boot_arg;
 644
 645 #if defined(HAS_IPI) && (DEVELOPMENT || DEBUG)
 646         if (!PE_parse_boot_argn("fastipi", &gFastIPI, sizeof(gFastIPI))) {
 647                 gFastIPI = 1;
 648         }
 649
 650         PE_parse_boot_argn("fastipitimeout", &deferred_ipi_timer_ns, sizeof(deferred_ipi_timer_ns));
 651 #endif /* defined(HAS_IPI) && (DEVELOPMENT || DEBUG)*/
 652
 653 #if CONFIG_NONFATAL_ASSERTS
 654         PE_parse_boot_argn("assert", &mach_assert, sizeof(mach_assert));
 655 #endif
 656
 657         if (PE_parse_boot_argn("preempt", &boot_arg, sizeof(boot_arg))) {
 658                 default_preemption_rate = boot_arg;
 659         }
 660         if (PE_parse_boot_argn("bg_preempt", &boot_arg, sizeof(boot_arg))) {
 661                 default_bg_preemption_rate = boot_arg;
 662         }
 663
 664         PE_parse_boot_argn("yield_delay_us", &yield_delay_us, sizeof(yield_delay_us));
 665
 666         machine_conf();
 667
 668         /*
 669          * Kick off the kernel bootstrap.
 670          */
 671         kernel_bootstrap();
 672         /* NOTREACHED */
 673 }
 674
 675 void
 676 machine_lockdown_preflight(void)
 677 {
 678 #if CONFIG_KERNEL_INTEGRITY
 679
 680 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
 681         rorgn_stash_range();
 682 #endif
 683
 684 #endif
 685 }
 686
 687 void
 688 machine_lockdown(void)
 689 {
 690 #if CONFIG_KERNEL_INTEGRITY
 691 #if KERNEL_INTEGRITY_WT
 692         /* Watchtower
 693          *
 694          * Notify the monitor about the completion of early kernel bootstrap.
 695          * From this point forward it will enforce the integrity of kernel text,
 696          * rodata and page tables.
 697          */
 698
 699 #ifdef MONITOR
 700         monitor_call(MONITOR_LOCKDOWN, 0, 0, 0);
 701 #endif
 702 #endif /* KERNEL_INTEGRITY_WT */
 703
 704 #if XNU_MONITOR
 705         pmap_lockdown_ppl();
 706 #endif
 707
 708 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
 709         /* KTRR
 710          *
 711          * Lock physical KTRR region. KTRR region is read-only. Memory outside
 712          * the region is not executable at EL1.
 713          */
 714
 715         rorgn_lockdown();
 716 #endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
 717
 718
 719 #endif /* CONFIG_KERNEL_INTEGRITY */
 720 }
 721
 722 char           *
 723 machine_boot_info(
 724         __unused char *buf,
 725         __unused vm_size_t size)
 726 {
 727         return PE_boot_args();
 728 }
 729
 730 void
 731 machine_conf(void)
 732 {
 733         /*
 734          * This is known to be inaccurate. mem_size should always be capped at 2 GB
 735          */
 736         machine_info.memory_size = (uint32_t)mem_size;
 737 }
 738
 739 void
 740 machine_init(void)
 741 {
 742         debug_log_init();
 743         clock_config();
 744         is_clock_configured = TRUE;
 745         if (debug_enabled) {
 746                 pmap_map_globals();
 747         }
 748 }
 749
 750 void
 751 slave_machine_init(__unused void *param)
 752 {
 753         cpu_machine_init();     /* Initialize the processor */
 754         clock_init();           /* Init the clock */
 755 }
 756
 757 /*
 758  *      Routine:        machine_processor_shutdown
 759  *      Function:
 760  */
 761 thread_t
 762 machine_processor_shutdown(
 763         __unused thread_t thread,
 764         void (*doshutdown)(processor_t),
 765         processor_t processor)
 766 {
 767         return Shutdown_context(doshutdown, processor);
 768 }
 769
 770 /*
 771  *      Routine:        ml_init_max_cpus
 772  *      Function:
 773  */
 774 void
 775 ml_init_max_cpus(unsigned int max_cpus)
 776 {
 777         boolean_t       current_state;
 778
 779         current_state = ml_set_interrupts_enabled(FALSE);
 780         if (max_cpus_initialized != MAX_CPUS_SET) {
 781                 machine_info.max_cpus = max_cpus;
 782                 machine_info.physical_cpu_max = max_cpus;
 783                 machine_info.logical_cpu_max = max_cpus;
 784                 if (max_cpus_initialized == MAX_CPUS_WAIT) {
 785                         thread_wakeup((event_t) &max_cpus_initialized);
 786                 }
 787                 max_cpus_initialized = MAX_CPUS_SET;
 788         }
 789         (void) ml_set_interrupts_enabled(current_state);
 790 }
 791
 792 /*
 793  *      Routine:        ml_get_max_cpus
 794  *      Function:
 795  */
 796 unsigned int
 797 ml_get_max_cpus(void)
 798 {
 799         boolean_t       current_state;
 800
 801         current_state = ml_set_interrupts_enabled(FALSE);
 802         if (max_cpus_initialized != MAX_CPUS_SET) {
 803                 max_cpus_initialized = MAX_CPUS_WAIT;
 804                 assert_wait((event_t) &max_cpus_initialized, THREAD_UNINT);
 805                 (void) thread_block(THREAD_CONTINUE_NULL);
 806         }
 807         (void) ml_set_interrupts_enabled(current_state);
 808         return machine_info.max_cpus;
 809 }
 810
 811 /*
 812  *      Routine:        ml_init_lock_timeout
 813  *      Function:
 814  */
 815 void
 816 ml_init_lock_timeout(void)
 817 {
 818         uint64_t        abstime;
 819         uint64_t        mtxspin;
 820         uint64_t        default_timeout_ns = NSEC_PER_SEC >> 2;
 821         uint32_t        slto;
 822
 823         if (PE_parse_boot_argn("slto_us", &slto, sizeof(slto))) {
 824                 default_timeout_ns = slto * NSEC_PER_USEC;
 825         }
 826
 827         nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
 828         LockTimeOutUsec = (uint32_t) (default_timeout_ns / NSEC_PER_USEC);
 829         LockTimeOut = (uint32_t)abstime;
 830
 831         if (PE_parse_boot_argn("tlto_us", &slto, sizeof(slto))) {
 832                 nanoseconds_to_absolutetime(slto * NSEC_PER_USEC, &abstime);
 833                 TLockTimeOut = abstime;
 834         } else {
 835                 TLockTimeOut = LockTimeOut >> 1;
 836         }
 837
 838         if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof(mtxspin))) {
 839                 if (mtxspin > USEC_PER_SEC >> 4) {
 840                         mtxspin =  USEC_PER_SEC >> 4;
 841                 }
 842                 nanoseconds_to_absolutetime(mtxspin * NSEC_PER_USEC, &abstime);
 843         } else {
 844                 nanoseconds_to_absolutetime(10 * NSEC_PER_USEC, &abstime);
 845         }
 846         MutexSpin = abstime;
 847         low_MutexSpin = MutexSpin;
 848         /*
 849          * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
 850          * real_ncpus is not set at this time
 851          *
 852          * NOTE: active spinning is disabled in arm. It can be activated
 853          * by setting high_MutexSpin through the sysctl.
 854          */
 855         high_MutexSpin = low_MutexSpin;
 856 }
 857
 858 /*
 859  * This is called from the machine-independent routine cpu_up()
 860  * to perform machine-dependent info updates.
 861  */
 862 void
 863 ml_cpu_up(void)
 864 {
 865         os_atomic_inc(&machine_info.physical_cpu, relaxed);
 866         os_atomic_inc(&machine_info.logical_cpu, relaxed);
 867 }
 868
 869 /*
 870  * This is called from the machine-independent routine cpu_down()
 871  * to perform machine-dependent info updates.
 872  */
 873 void
 874 ml_cpu_down(void)
 875 {
 876         cpu_data_t      *cpu_data_ptr;
 877
 878         os_atomic_dec(&machine_info.physical_cpu, relaxed);
 879         os_atomic_dec(&machine_info.logical_cpu, relaxed);
 880
 881         /*
 882          * If we want to deal with outstanding IPIs, we need to
 883          * do relatively early in the processor_doshutdown path,
 884          * as we pend decrementer interrupts using the IPI
 885          * mechanism if we cannot immediately service them (if
 886          * IRQ is masked).  Do so now.
 887          *
 888          * We aren't on the interrupt stack here; would it make
 889          * more sense to disable signaling and then enable
 890          * interrupts?  It might be a bit cleaner.
 891          */
 892         cpu_data_ptr = getCpuDatap();
 893         cpu_data_ptr->cpu_running = FALSE;
 894
 895         if (cpu_data_ptr != &BootCpuData) {
 896                 /*
 897                  * Move all of this cpu's timers to the master/boot cpu,
 898                  * and poke it in case there's a sooner deadline for it to schedule.
 899                  */
 900                 timer_queue_shutdown(&cpu_data_ptr->rtclock_timer.queue);
 901                 cpu_xcall(BootCpuData.cpu_number, &timer_queue_expire_local, NULL);
 902         }
 903
 904         cpu_signal_handler_internal(TRUE);
 905 }
 906
 907 /*
 908  *      Routine:        ml_cpu_get_info
 909  *      Function:
 910  */
 911 void
 912 ml_cpu_get_info(ml_cpu_info_t * ml_cpu_info)
 913 {
 914         cache_info_t   *cpuid_cache_info;
 915
 916         cpuid_cache_info = cache_info();
 917         ml_cpu_info->vector_unit = 0;
 918         ml_cpu_info->cache_line_size = cpuid_cache_info->c_linesz;
 919         ml_cpu_info->l1_icache_size = cpuid_cache_info->c_isize;
 920         ml_cpu_info->l1_dcache_size = cpuid_cache_info->c_dsize;
 921
 922 #if (__ARM_ARCH__ >= 7)
 923         ml_cpu_info->l2_settings = 1;
 924         ml_cpu_info->l2_cache_size = cpuid_cache_info->c_l2size;
 925 #else
 926         ml_cpu_info->l2_settings = 0;
 927         ml_cpu_info->l2_cache_size = 0xFFFFFFFF;
 928 #endif
 929         ml_cpu_info->l3_settings = 0;
 930         ml_cpu_info->l3_cache_size = 0xFFFFFFFF;
 931 }
 932
 933 unsigned int
 934 ml_get_machine_mem(void)
 935 {
 936         return machine_info.memory_size;
 937 }
 938
 939 __attribute__((noreturn))
 940 void
 941 halt_all_cpus(boolean_t reboot)
 942 {
 943         if (reboot) {
 944                 printf("MACH Reboot\n");
 945                 PEHaltRestart(kPERestartCPU);
 946         } else {
 947                 printf("CPU halted\n");
 948                 PEHaltRestart(kPEHaltCPU);
 949         }
 950         while (1) {
 951                 ;
 952         }
 953 }
 954
 955 __attribute__((noreturn))
 956 void
 957 halt_cpu(void)
 958 {
 959         halt_all_cpus(FALSE);
 960 }
 961
 962 /*
 963  *      Routine:        machine_signal_idle
 964  *      Function:
 965  */
 966 void
 967 machine_signal_idle(
 968         processor_t processor)
 969 {
 970         cpu_signal(processor_to_cpu_datap(processor), SIGPnop, (void *)NULL, (void *)NULL);
 971         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
 972 }
 973
 974 void
 975 machine_signal_idle_deferred(
 976         processor_t processor)
 977 {
 978         cpu_signal_deferred(processor_to_cpu_datap(processor));
 979         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_DEFERRED_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
 980 }
 981
 982 void
 983 machine_signal_idle_cancel(
 984         processor_t processor)
 985 {
 986         cpu_signal_cancel(processor_to_cpu_datap(processor));
 987         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_CANCEL_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
 988 }
 989
 990 /*
 991  *      Routine:        ml_install_interrupt_handler
 992  *      Function:       Initialize Interrupt Handler
 993  */
 994 void
 995 ml_install_interrupt_handler(
 996         void *nub,
 997         int source,
 998         void *target,
 999         IOInterruptHandler handler,
1000         void *refCon)
1001 {
1002         cpu_data_t     *cpu_data_ptr;
1003         boolean_t       current_state;
1004
1005         current_state = ml_set_interrupts_enabled(FALSE);
1006         cpu_data_ptr = getCpuDatap();
1007
1008         cpu_data_ptr->interrupt_nub = nub;
1009         cpu_data_ptr->interrupt_source = source;
1010         cpu_data_ptr->interrupt_target = target;
1011         cpu_data_ptr->interrupt_handler = handler;
1012         cpu_data_ptr->interrupt_refCon = refCon;
1013
1014         cpu_data_ptr->interrupts_enabled = TRUE;
1015         (void) ml_set_interrupts_enabled(current_state);
1016
1017         initialize_screen(NULL, kPEAcquireScreen);
1018 }
1019
1020 /*
1021  *      Routine:        ml_init_interrupt
1022  *      Function:       Initialize Interrupts
1023  */
1024 void
1025 ml_init_interrupt(void)
1026 {
1027 #if defined(HAS_IPI)
1028         /*
1029          * ml_init_interrupt will get called once for each CPU, but this is redundant
1030          * because there is only one global copy of the register for skye. do it only
1031          * on the bootstrap cpu
1032          */
1033         if (getCpuDatap()->cluster_master) {
1034                 ml_cpu_signal_deferred_adjust_timer(deferred_ipi_timer_ns);
1035         }
1036 #endif
1037 }
1038
1039 /*
1040  *      Routine:        ml_init_timebase
1041  *      Function:       register and setup Timebase, Decremeter services
1042  */
1043 void
1044 ml_init_timebase(
1045         void            *args,
1046         tbd_ops_t       tbd_funcs,
1047         vm_offset_t     int_address,
1048         vm_offset_t     int_value __unused)
1049 {
1050         cpu_data_t     *cpu_data_ptr;
1051
1052         cpu_data_ptr = (cpu_data_t *)args;
1053
1054         if ((cpu_data_ptr == &BootCpuData)
1055             && (rtclock_timebase_func.tbd_fiq_handler == (void *)NULL)) {
1056                 rtclock_timebase_func = *tbd_funcs;
1057                 rtclock_timebase_addr = int_address;
1058         }
1059 }
1060
1061 void
1062 ml_parse_cpu_topology(void)
1063 {
1064         DTEntry entry, child __unused;
1065         OpaqueDTEntryIterator iter;
1066         uint32_t cpu_boot_arg;
1067         int err;
1068
1069         cpu_boot_arg = MAX_CPUS;
1070
1071         PE_parse_boot_argn("cpus", &cpu_boot_arg, sizeof(cpu_boot_arg));
1072
1073         err = DTLookupEntry(NULL, "/cpus", &entry);
1074         assert(err == kSuccess);
1075
1076         err = DTInitEntryIterator(entry, &iter);
1077         assert(err == kSuccess);
1078
1079         while (kSuccess == DTIterateEntries(&iter, &child)) {
1080                 unsigned int propSize;
1081                 void *prop = NULL;
1082                 int cpu_id = avail_cpus++;
1083
1084                 if (kSuccess == DTGetProperty(child, "cpu-id", &prop, &propSize)) {
1085                         cpu_id = *((int32_t*)prop);
1086                 }
1087
1088                 assert(cpu_id < MAX_CPUS);
1089                 assert(cpu_phys_ids[cpu_id] == (uint32_t)-1);
1090
1091                 if (boot_cpu == -1) {
1092                         if (kSuccess != DTGetProperty(child, "state", &prop, &propSize)) {
1093                                 panic("unable to retrieve state for cpu %d", cpu_id);
1094                         }
1095
1096                         if (strncmp((char*)prop, "running", propSize) == 0) {
1097                                 boot_cpu = cpu_id;
1098                         }
1099                 }
1100                 if (kSuccess != DTGetProperty(child, "reg", &prop, &propSize)) {
1101                         panic("unable to retrieve physical ID for cpu %d", cpu_id);
1102                 }
1103
1104                 cpu_phys_ids[cpu_id] = *((uint32_t*)prop);
1105
1106                 if ((cpu_id > max_cpu_number) && ((cpu_id == boot_cpu) || (avail_cpus <= cpu_boot_arg))) {
1107                         max_cpu_number = cpu_id;
1108                 }
1109         }
1110
1111         if (avail_cpus > cpu_boot_arg) {
1112                 avail_cpus = cpu_boot_arg;
1113         }
1114
1115         if (avail_cpus == 0) {
1116                 panic("No cpus found!");
1117         }
1118
1119         if (boot_cpu == -1) {
1120                 panic("unable to determine boot cpu!");
1121         }
1122
1123         /*
1124          * Set TPIDRRO_EL0 to indicate the correct cpu number, as we may
1125          * not be booting from cpu 0.  Userspace will consume the current
1126          * CPU number through this register.  For non-boot cores, this is
1127          * done in start.s (start_cpu) using the cpu_number field of the
1128          * per-cpu data object.
1129          */
1130         assert(__builtin_arm_rsr64("TPIDRRO_EL0") == 0);
1131         __builtin_arm_wsr64("TPIDRRO_EL0", (uint64_t)boot_cpu);
1132 }
1133
1134 unsigned int
1135 ml_get_cpu_count(void)
1136 {
1137         return avail_cpus;
1138 }
1139
1140 int
1141 ml_get_boot_cpu_number(void)
1142 {
1143         return boot_cpu;
1144 }
1145
1146 cluster_type_t
1147 ml_get_boot_cluster(void)
1148 {
1149         return boot_cluster;
1150 }
1151
1152 int
1153 ml_get_cpu_number(uint32_t phys_id)
1154 {
1155         for (int log_id = 0; log_id <= ml_get_max_cpu_number(); ++log_id) {
1156                 if (cpu_phys_ids[log_id] == phys_id) {
1157                         return log_id;
1158                 }
1159         }
1160         return -1;
1161 }
1162
1163 int
1164 ml_get_max_cpu_number(void)
1165 {
1166         return max_cpu_number;
1167 }
1168
1169
1170 void
1171 ml_lockdown_init()
1172 {
1173         lockdown_handler_grp = lck_grp_alloc_init("lockdown_handler", NULL);
1174         assert(lockdown_handler_grp != NULL);
1175
1176         lck_mtx_init(&lockdown_handler_lck, lockdown_handler_grp, NULL);
1177
1178 #if defined(KERNEL_INTEGRITY_CTRR)
1179         init_ctrr_cpu_start_lock();
1180 #endif
1181 }
1182
1183 kern_return_t
1184 ml_lockdown_handler_register(lockdown_handler_t f, void *this)
1185 {
1186         if (lockdown_handler || !f) {
1187                 return KERN_FAILURE;
1188         }
1189
1190         lck_mtx_lock(&lockdown_handler_lck);
1191         lockdown_handler = f;
1192         lockdown_this = this;
1193
1194 #if !(defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR))
1195         lockdown_done = 1;
1196         lockdown_handler(this);
1197 #else
1198         if (lockdown_done) {
1199                 lockdown_handler(this);
1200         }
1201 #endif
1202         lck_mtx_unlock(&lockdown_handler_lck);
1203
1204         return KERN_SUCCESS;
1205 }
1206
1207 void
1208 ml_lockdown_run_handler()
1209 {
1210         lck_mtx_lock(&lockdown_handler_lck);
1211         assert(!lockdown_done);
1212
1213         lockdown_done = 1;
1214         if (lockdown_handler) {
1215                 lockdown_handler(lockdown_this);
1216         }
1217         lck_mtx_unlock(&lockdown_handler_lck);
1218 }
1219
1220 kern_return_t
1221 ml_processor_register(ml_processor_info_t *in_processor_info,
1222     processor_t *processor_out, ipi_handler_t *ipi_handler_out,
1223     perfmon_interrupt_handler_func *pmi_handler_out)
1224 {
1225         cpu_data_t *this_cpu_datap;
1226         processor_set_t pset;
1227         boolean_t  is_boot_cpu;
1228         static unsigned int reg_cpu_count = 0;
1229
1230         if (in_processor_info->log_id > (uint32_t)ml_get_max_cpu_number()) {
1231                 return KERN_FAILURE;
1232         }
1233
1234         if ((unsigned int)OSIncrementAtomic((SInt32*)&reg_cpu_count) >= avail_cpus) {
1235                 return KERN_FAILURE;
1236         }
1237
1238         if (in_processor_info->log_id != (uint32_t)ml_get_boot_cpu_number()) {
1239                 is_boot_cpu = FALSE;
1240                 this_cpu_datap = cpu_data_alloc(FALSE);
1241                 cpu_data_init(this_cpu_datap);
1242         } else {
1243                 this_cpu_datap = &BootCpuData;
1244                 is_boot_cpu = TRUE;
1245         }
1246
1247         assert(in_processor_info->log_id < MAX_CPUS);
1248
1249         this_cpu_datap->cpu_id = in_processor_info->cpu_id;
1250
1251         this_cpu_datap->cpu_console_buf = console_cpu_alloc(is_boot_cpu);
1252         if (this_cpu_datap->cpu_console_buf == (void *)(NULL)) {
1253                 goto processor_register_error;
1254         }
1255
1256         if (!is_boot_cpu) {
1257                 this_cpu_datap->cpu_number = in_processor_info->log_id;
1258
1259                 if (cpu_data_register(this_cpu_datap) != KERN_SUCCESS) {
1260                         goto processor_register_error;
1261                 }
1262         }
1263
1264         this_cpu_datap->cpu_idle_notify = (void *) in_processor_info->processor_idle;
1265         this_cpu_datap->cpu_cache_dispatch = in_processor_info->platform_cache_dispatch;
1266         nanoseconds_to_absolutetime((uint64_t) in_processor_info->powergate_latency, &this_cpu_datap->cpu_idle_latency);
1267         this_cpu_datap->cpu_reset_assist = kvtophys(in_processor_info->powergate_stub_addr);
1268
1269         this_cpu_datap->idle_timer_notify = (void *) in_processor_info->idle_timer;
1270         this_cpu_datap->idle_timer_refcon = in_processor_info->idle_timer_refcon;
1271
1272         this_cpu_datap->platform_error_handler = (void *) in_processor_info->platform_error_handler;
1273         this_cpu_datap->cpu_regmap_paddr = in_processor_info->regmap_paddr;
1274         this_cpu_datap->cpu_phys_id = in_processor_info->phys_id;
1275         this_cpu_datap->cpu_l2_access_penalty = in_processor_info->l2_access_penalty;
1276
1277         this_cpu_datap->cpu_cluster_type = in_processor_info->cluster_type;
1278         this_cpu_datap->cpu_cluster_id = in_processor_info->cluster_id;
1279         this_cpu_datap->cpu_l2_id = in_processor_info->l2_cache_id;
1280         this_cpu_datap->cpu_l2_size = in_processor_info->l2_cache_size;
1281         this_cpu_datap->cpu_l3_id = in_processor_info->l3_cache_id;
1282         this_cpu_datap->cpu_l3_size = in_processor_info->l3_cache_size;
1283
1284 #if HAS_CLUSTER
1285         this_cpu_datap->cluster_master = !OSTestAndSet(this_cpu_datap->cpu_cluster_id, &cluster_initialized);
1286 #else /* HAS_CLUSTER */
1287         this_cpu_datap->cluster_master = is_boot_cpu;
1288 #endif /* HAS_CLUSTER */
1289
1290         pset = pset_find(in_processor_info->cluster_id, processor_pset(master_processor));
1291         assert(pset != NULL);
1292         kprintf("%s>cpu_id %p cluster_id %d cpu_number %d is type %d\n", __FUNCTION__, in_processor_info->cpu_id, in_processor_info->cluster_id, this_cpu_datap->cpu_number, in_processor_info->cluster_type);
1293
1294         if (!is_boot_cpu) {
1295                 processor_init((struct processor *)this_cpu_datap->cpu_processor,
1296                     this_cpu_datap->cpu_number, pset);
1297
1298                 if (this_cpu_datap->cpu_l2_access_penalty) {
1299                         /*
1300                          * Cores that have a non-zero L2 access penalty compared
1301                          * to the boot processor should be de-prioritized by the
1302                          * scheduler, so that threads use the cores with better L2
1303                          * preferentially.
1304                          */
1305                         processor_set_primary(this_cpu_datap->cpu_processor,
1306                             master_processor);
1307                 }
1308         }
1309
1310         *processor_out = this_cpu_datap->cpu_processor;
1311         *ipi_handler_out = cpu_signal_handler;
1312 #if CPMU_AIC_PMI && MONOTONIC
1313         *pmi_handler_out = mt_cpmu_aic_pmi;
1314 #else
1315         *pmi_handler_out = NULL;
1316 #endif /* CPMU_AIC_PMI && MONOTONIC */
1317         if (in_processor_info->idle_tickle != (idle_tickle_t *) NULL) {
1318                 *in_processor_info->idle_tickle = (idle_tickle_t) cpu_idle_tickle;
1319         }
1320
1321 #if KPC
1322         if (kpc_register_cpu(this_cpu_datap) != TRUE) {
1323                 goto processor_register_error;
1324         }
1325 #endif /* KPC */
1326
1327         if (!is_boot_cpu) {
1328                 random_cpu_init(this_cpu_datap->cpu_number);
1329                 // now let next CPU register itself
1330                 OSIncrementAtomic((SInt32*)&real_ncpus);
1331         }
1332
1333         return KERN_SUCCESS;
1334
1335 processor_register_error:
1336 #if KPC
1337         kpc_unregister_cpu(this_cpu_datap);
1338 #endif /* KPC */
1339         if (!is_boot_cpu) {
1340                 cpu_data_free(this_cpu_datap);
1341         }
1342
1343         return KERN_FAILURE;
1344 }
1345
1346 void
1347 ml_init_arm_debug_interface(
1348         void * in_cpu_datap,
1349         vm_offset_t virt_address)
1350 {
1351         ((cpu_data_t *)in_cpu_datap)->cpu_debug_interface_map = virt_address;
1352         do_debugid();
1353 }
1354
1355 /*
1356  *      Routine:        init_ast_check
1357  *      Function:
1358  */
1359 void
1360 init_ast_check(
1361         __unused processor_t processor)
1362 {
1363 }
1364
1365 /*
1366  *      Routine:        cause_ast_check
1367  *      Function:
1368  */
1369 void
1370 cause_ast_check(
1371         processor_t processor)
1372 {
1373         if (current_processor() != processor) {
1374                 cpu_signal(processor_to_cpu_datap(processor), SIGPast, (void *)NULL, (void *)NULL);
1375                 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), processor->cpu_id, 1 /* ast */, 0, 0, 0);
1376         }
1377 }
1378
1379 extern uint32_t cpu_idle_count;
1380
1381 void
1382 ml_get_power_state(boolean_t *icp, boolean_t *pidlep)
1383 {
1384         *icp = ml_at_interrupt_context();
1385         *pidlep = (cpu_idle_count == real_ncpus);
1386 }
1387
1388 /*
1389  *      Routine:        ml_cause_interrupt
1390  *      Function:       Generate a fake interrupt
1391  */
1392 void
1393 ml_cause_interrupt(void)
1394 {
1395         return;                 /* BS_XXX */
1396 }
1397
1398 /* Map memory map IO space */
1399 vm_offset_t
1400 ml_io_map(
1401         vm_offset_t phys_addr,
1402         vm_size_t size)
1403 {
1404         return io_map(phys_addr, size, VM_WIMG_IO);
1405 }
1406
1407 /* Map memory map IO space (with protections specified) */
1408 vm_offset_t
1409 ml_io_map_with_prot(
1410         vm_offset_t phys_addr,
1411         vm_size_t size,
1412         vm_prot_t prot)
1413 {
1414         return io_map_with_prot(phys_addr, size, VM_WIMG_IO, prot);
1415 }
1416
1417 vm_offset_t
1418 ml_io_map_wcomb(
1419         vm_offset_t phys_addr,
1420         vm_size_t size)
1421 {
1422         return io_map(phys_addr, size, VM_WIMG_WCOMB);
1423 }
1424
1425 /* boot memory allocation */
1426 vm_offset_t
1427 ml_static_malloc(
1428         __unused vm_size_t size)
1429 {
1430         return (vm_offset_t) NULL;
1431 }
1432
1433 vm_map_address_t
1434 ml_map_high_window(
1435         vm_offset_t     phys_addr,
1436         vm_size_t       len)
1437 {
1438         return pmap_map_high_window_bd(phys_addr, len, VM_PROT_READ | VM_PROT_WRITE);
1439 }
1440
1441 vm_offset_t
1442 ml_static_ptovirt(
1443         vm_offset_t paddr)
1444 {
1445         return phystokv(paddr);
1446 }
1447
1448 vm_offset_t
1449 ml_static_slide(
1450         vm_offset_t vaddr)
1451 {
1452         return phystokv(vaddr + vm_kernel_slide - gVirtBase + gPhysBase);
1453 }
1454
1455 vm_offset_t
1456 ml_static_unslide(
1457         vm_offset_t vaddr)
1458 {
1459         return ml_static_vtop(vaddr) - gPhysBase + gVirtBase - vm_kernel_slide;
1460 }
1461
1462 extern tt_entry_t *arm_kva_to_tte(vm_offset_t va);
1463
1464 kern_return_t
1465 ml_static_protect(
1466         vm_offset_t vaddr, /* kernel virtual address */
1467         vm_size_t size,
1468         vm_prot_t new_prot)
1469 {
1470         pt_entry_t    arm_prot = 0;
1471         pt_entry_t    arm_block_prot = 0;
1472         vm_offset_t   vaddr_cur;
1473         ppnum_t       ppn;
1474         kern_return_t result = KERN_SUCCESS;
1475
1476         if (vaddr < VM_MIN_KERNEL_ADDRESS) {
1477                 panic("ml_static_protect(): %p < %p", (void *) vaddr, (void *) VM_MIN_KERNEL_ADDRESS);
1478                 return KERN_FAILURE;
1479         }
1480
1481         assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
1482
1483         if ((new_prot & VM_PROT_WRITE) && (new_prot & VM_PROT_EXECUTE)) {
1484                 panic("ml_static_protect(): WX request on %p", (void *) vaddr);
1485         }
1486
1487         /* Set up the protection bits, and block bits so we can validate block mappings. */
1488         if (new_prot & VM_PROT_WRITE) {
1489                 arm_prot |= ARM_PTE_AP(AP_RWNA);
1490                 arm_block_prot |= ARM_TTE_BLOCK_AP(AP_RWNA);
1491         } else {
1492                 arm_prot |= ARM_PTE_AP(AP_RONA);
1493                 arm_block_prot |= ARM_TTE_BLOCK_AP(AP_RONA);
1494         }
1495
1496         arm_prot |= ARM_PTE_NX;
1497         arm_block_prot |= ARM_TTE_BLOCK_NX;
1498
1499         if (!(new_prot & VM_PROT_EXECUTE)) {
1500                 arm_prot |= ARM_PTE_PNX;
1501                 arm_block_prot |= ARM_TTE_BLOCK_PNX;
1502         }
1503
1504         for (vaddr_cur = vaddr;
1505             vaddr_cur < trunc_page_64(vaddr + size);
1506             vaddr_cur += PAGE_SIZE) {
1507                 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
1508                 if (ppn != (vm_offset_t) NULL) {
1509                         tt_entry_t      *tte2;
1510                         pt_entry_t      *pte_p;
1511                         pt_entry_t      ptmp;
1512
1513 #if XNU_MONITOR
1514                         assert(!TEST_PAGE_RATIO_4);
1515                         assert(!pmap_is_monitor(ppn));
1516 #endif
1517
1518                         tte2 = arm_kva_to_tte(vaddr_cur);
1519
1520                         if (((*tte2) & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE) {
1521                                 if ((((*tte2) & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) &&
1522                                     ((*tte2 & (ARM_TTE_BLOCK_NXMASK | ARM_TTE_BLOCK_PNXMASK | ARM_TTE_BLOCK_APMASK)) == arm_block_prot)) {
1523                                         /*
1524                                          * We can support ml_static_protect on a block mapping if the mapping already has
1525                                          * the desired protections.  We still want to run checks on a per-page basis.
1526                                          */
1527                                         continue;
1528                                 }
1529
1530                                 result = KERN_FAILURE;
1531                                 break;
1532                         }
1533
1534                         pte_p = (pt_entry_t *)&((tt_entry_t*)(phystokv((*tte2) & ARM_TTE_TABLE_MASK)))[(((vaddr_cur) & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT)];
1535                         ptmp = *pte_p;
1536
1537                         if ((ptmp & ARM_PTE_HINT_MASK) && ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot)) {
1538                                 /*
1539                                  * The contiguous hint is similar to a block mapping for ml_static_protect; if the existing
1540                                  * protections do not match the desired protections, then we will fail (as we cannot update
1541                                  * this mapping without updating other mappings as well).
1542                                  */
1543                                 result = KERN_FAILURE;
1544                                 break;
1545                         }
1546
1547                         __unreachable_ok_push
1548                         if (TEST_PAGE_RATIO_4) {
1549                                 {
1550                                         unsigned int    i;
1551                                         pt_entry_t      *ptep_iter;
1552
1553                                         ptep_iter = pte_p;
1554                                         for (i = 0; i < 4; i++, ptep_iter++) {
1555                                                 /* Note that there is a hole in the HINT sanity checking here. */
1556                                                 ptmp = *ptep_iter;
1557
1558                                                 /* We only need to update the page tables if the protections do not match. */
1559                                                 if ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot) {
1560                                                         ptmp = (ptmp & ~(ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) | arm_prot;
1561                                                         *ptep_iter = ptmp;
1562                                                 }
1563                                         }
1564                                 }
1565                         } else {
1566                                 ptmp = *pte_p;
1567
1568                                 /* We only need to update the page tables if the protections do not match. */
1569                                 if ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot) {
1570                                         ptmp = (ptmp & ~(ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) | arm_prot;
1571                                         *pte_p = ptmp;
1572                                 }
1573                         }
1574                         __unreachable_ok_pop
1575                 }
1576         }
1577
1578         if (vaddr_cur > vaddr) {
1579                 assert(((vaddr_cur - vaddr) & 0xFFFFFFFF00000000ULL) == 0);
1580                 flush_mmu_tlb_region(vaddr, (uint32_t)(vaddr_cur - vaddr));
1581         }
1582
1583
1584         return result;
1585 }
1586
1587 /*
1588  *      Routine:        ml_static_mfree
1589  *      Function:
1590  */
1591 void
1592 ml_static_mfree(
1593         vm_offset_t vaddr,
1594         vm_size_t size)
1595 {
1596         vm_offset_t     vaddr_cur;
1597         ppnum_t         ppn;
1598         uint32_t freed_pages = 0;
1599
1600         /* It is acceptable (if bad) to fail to free. */
1601         if (vaddr < VM_MIN_KERNEL_ADDRESS) {
1602                 return;
1603         }
1604
1605         assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
1606
1607         for (vaddr_cur = vaddr;
1608             vaddr_cur < trunc_page_64(vaddr + size);
1609             vaddr_cur += PAGE_SIZE) {
1610                 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
1611                 if (ppn != (vm_offset_t) NULL) {
1612                         /*
1613                          * It is not acceptable to fail to update the protections on a page
1614                          * we will release to the VM.  We need to either panic or continue.
1615                          * For now, we'll panic (to help flag if there is memory we can
1616                          * reclaim).
1617                          */
1618                         if (ml_static_protect(vaddr_cur, PAGE_SIZE, VM_PROT_WRITE | VM_PROT_READ) != KERN_SUCCESS) {
1619                                 panic("Failed ml_static_mfree on %p", (void *) vaddr_cur);
1620                         }
1621
1622 #if 0
1623                         /*
1624                          * Must NOT tear down the "V==P" mapping for vaddr_cur as the zone alias scheme
1625                          * relies on the persistence of these mappings for all time.
1626                          */
1627                         // pmap_remove(kernel_pmap, (addr64_t) vaddr_cur, (addr64_t) (vaddr_cur + PAGE_SIZE));
1628 #endif
1629
1630                         vm_page_create(ppn, (ppn + 1));
1631                         freed_pages++;
1632                 }
1633         }
1634         vm_page_lockspin_queues();
1635         vm_page_wire_count -= freed_pages;
1636         vm_page_wire_count_initial -= freed_pages;
1637         vm_page_unlock_queues();
1638 #if     DEBUG
1639         kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn);
1640 #endif
1641 }
1642
1643
1644 /* virtual to physical on wired pages */
1645 vm_offset_t
1646 ml_vtophys(vm_offset_t vaddr)
1647 {
1648         return kvtophys(vaddr);
1649 }
1650
1651 /*
1652  * Routine: ml_nofault_copy
1653  * Function: Perform a physical mode copy if the source and destination have
1654  * valid translations in the kernel pmap. If translations are present, they are
1655  * assumed to be wired; e.g., no attempt is made to guarantee that the
1656  * translations obtained remain valid for the duration of the copy process.
1657  */
1658 vm_size_t
1659 ml_nofault_copy(vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size)
1660 {
1661         addr64_t        cur_phys_dst, cur_phys_src;
1662         vm_size_t       count, nbytes = 0;
1663
1664         while (size > 0) {
1665                 if (!(cur_phys_src = kvtophys(virtsrc))) {
1666                         break;
1667                 }
1668                 if (!(cur_phys_dst = kvtophys(virtdst))) {
1669                         break;
1670                 }
1671                 if (!pmap_valid_address(trunc_page_64(cur_phys_dst)) ||
1672                     !pmap_valid_address(trunc_page_64(cur_phys_src))) {
1673                         break;
1674                 }
1675                 count = PAGE_SIZE - (cur_phys_src & PAGE_MASK);
1676                 if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK))) {
1677                         count = PAGE_SIZE - (cur_phys_dst & PAGE_MASK);
1678                 }
1679                 if (count > size) {
1680                         count = size;
1681                 }
1682
1683                 bcopy_phys(cur_phys_src, cur_phys_dst, count);
1684
1685                 nbytes += count;
1686                 virtsrc += count;
1687                 virtdst += count;
1688                 size -= count;
1689         }
1690
1691         return nbytes;
1692 }
1693
1694 /*
1695  *      Routine:        ml_validate_nofault
1696  *      Function: Validate that ths address range has a valid translations
1697  *                      in the kernel pmap.  If translations are present, they are
1698  *                      assumed to be wired; i.e. no attempt is made to guarantee
1699  *                      that the translation persist after the check.
1700  *  Returns: TRUE if the range is mapped and will not cause a fault,
1701  *                      FALSE otherwise.
1702  */
1703
1704 boolean_t
1705 ml_validate_nofault(
1706         vm_offset_t virtsrc, vm_size_t size)
1707 {
1708         addr64_t cur_phys_src;
1709         uint32_t count;
1710
1711         while (size > 0) {
1712                 if (!(cur_phys_src = kvtophys(virtsrc))) {
1713                         return FALSE;
1714                 }
1715                 if (!pmap_valid_address(trunc_page_64(cur_phys_src))) {
1716                         return FALSE;
1717                 }
1718                 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
1719                 if (count > size) {
1720                         count = (uint32_t)size;
1721                 }
1722
1723                 virtsrc += count;
1724                 size -= count;
1725         }
1726
1727         return TRUE;
1728 }
1729
1730 void
1731 ml_get_bouncepool_info(vm_offset_t * phys_addr, vm_size_t * size)
1732 {
1733         *phys_addr = 0;
1734         *size = 0;
1735 }
1736
1737 void
1738 active_rt_threads(__unused boolean_t active)
1739 {
1740 }
1741
1742 static void
1743 cpu_qos_cb_default(__unused int urgency, __unused uint64_t qos_param1, __unused uint64_t qos_param2)
1744 {
1745         return;
1746 }
1747
1748 cpu_qos_update_t cpu_qos_update = cpu_qos_cb_default;
1749
1750 void
1751 cpu_qos_update_register(cpu_qos_update_t cpu_qos_cb)
1752 {
1753         if (cpu_qos_cb != NULL) {
1754                 cpu_qos_update = cpu_qos_cb;
1755         } else {
1756                 cpu_qos_update = cpu_qos_cb_default;
1757         }
1758 }
1759
1760 void
1761 thread_tell_urgency(thread_urgency_t urgency, uint64_t rt_period, uint64_t rt_deadline, uint64_t sched_latency __unused, __unused thread_t nthread)
1762 {
1763         SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, sched_latency, 0);
1764
1765         cpu_qos_update((int)urgency, rt_period, rt_deadline);
1766
1767         SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0);
1768 }
1769
1770 void
1771 machine_run_count(__unused uint32_t count)
1772 {
1773 }
1774
1775 processor_t
1776 machine_choose_processor(__unused processor_set_t pset, processor_t processor)
1777 {
1778         return processor;
1779 }
1780
1781 #if KASAN
1782 vm_offset_t ml_stack_base(void);
1783 vm_size_t ml_stack_size(void);
1784
1785 vm_offset_t
1786 ml_stack_base(void)
1787 {
1788         uintptr_t local = (uintptr_t) &local;
1789         vm_offset_t     intstack_top_ptr;
1790
1791         intstack_top_ptr = getCpuDatap()->intstack_top;
1792         if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) {
1793                 return intstack_top_ptr - INTSTACK_SIZE;
1794         } else {
1795                 return current_thread()->kernel_stack;
1796         }
1797 }
1798 vm_size_t
1799 ml_stack_size(void)
1800 {
1801         uintptr_t local = (uintptr_t) &local;
1802         vm_offset_t     intstack_top_ptr;
1803
1804         intstack_top_ptr = getCpuDatap()->intstack_top;
1805         if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) {
1806                 return INTSTACK_SIZE;
1807         } else {
1808                 return kernel_stack_size;
1809         }
1810 }
1811 #endif
1812
1813 boolean_t
1814 machine_timeout_suspended(void)
1815 {
1816         return FALSE;
1817 }
1818
1819 kern_return_t
1820 ml_interrupt_prewarm(__unused uint64_t deadline)
1821 {
1822         return KERN_FAILURE;
1823 }
1824
1825 /*
1826  * Assumes fiq, irq disabled.
1827  */
1828 void
1829 ml_set_decrementer(uint32_t dec_value)
1830 {
1831         cpu_data_t      *cdp = getCpuDatap();
1832
1833         assert(ml_get_interrupts_enabled() == FALSE);
1834         cdp->cpu_decrementer = dec_value;
1835
1836         if (cdp->cpu_set_decrementer_func) {
1837                 ((void (*)(uint32_t))cdp->cpu_set_decrementer_func)(dec_value);
1838         } else {
1839                 __asm__ volatile ("msr CNTP_TVAL_EL0, %0" : : "r"((uint64_t)dec_value));
1840         }
1841 }
1842
1843 uint64_t
1844 ml_get_hwclock()
1845 {
1846         uint64_t timebase;
1847
1848         // ISB required by ARMV7C.b section B8.1.2 & ARMv8 section D6.1.2
1849         // "Reads of CNTPCT[_EL0] can occur speculatively and out of order relative
1850         // to other instructions executed on the same processor."
1851         __builtin_arm_isb(ISB_SY);
1852         timebase = __builtin_arm_rsr64("CNTPCT_EL0");
1853
1854         return timebase;
1855 }
1856
1857 uint64_t
1858 ml_get_timebase()
1859 {
1860         return ml_get_hwclock() + getCpuDatap()->cpu_base_timebase;
1861 }
1862
1863 uint32_t
1864 ml_get_decrementer()
1865 {
1866         cpu_data_t *cdp = getCpuDatap();
1867         uint32_t dec;
1868
1869         assert(ml_get_interrupts_enabled() == FALSE);
1870
1871         if (cdp->cpu_get_decrementer_func) {
1872                 dec = ((uint32_t (*)(void))cdp->cpu_get_decrementer_func)();
1873         } else {
1874                 uint64_t wide_val;
1875
1876                 __asm__ volatile ("mrs %0, CNTP_TVAL_EL0" : "=r"(wide_val));
1877                 dec = (uint32_t)wide_val;
1878                 assert(wide_val == (uint64_t)dec);
1879         }
1880
1881         return dec;
1882 }
1883
1884 boolean_t
1885 ml_get_timer_pending()
1886 {
1887         uint64_t cntp_ctl;
1888
1889         __asm__ volatile ("mrs %0, CNTP_CTL_EL0" : "=r"(cntp_ctl));
1890         return ((cntp_ctl & CNTP_CTL_EL0_ISTATUS) != 0) ? TRUE : FALSE;
1891 }
1892
1893 boolean_t
1894 ml_wants_panic_trap_to_debugger(void)
1895 {
1896         boolean_t result = FALSE;
1897 #if XNU_MONITOR
1898         /*
1899          * This looks racey, but if we are in the PPL, preemption will be
1900          * disabled.
1901          */
1902         result = ((pmap_get_cpu_data()->ppl_state == PPL_STATE_DISPATCH) && pmap_ppl_locked_down);
1903 #endif
1904         return result;
1905 }
1906
1907 static void
1908 cache_trap_error(thread_t thread, vm_map_address_t fault_addr)
1909 {
1910         mach_exception_data_type_t exc_data[2];
1911         arm_saved_state_t *regs = get_user_regs(thread);
1912
1913         set_saved_state_far(regs, fault_addr);
1914
1915         exc_data[0] = KERN_INVALID_ADDRESS;
1916         exc_data[1] = fault_addr;
1917
1918         exception_triage(EXC_BAD_ACCESS, exc_data, 2);
1919 }
1920
1921 static void
1922 cache_trap_recover()
1923 {
1924         vm_map_address_t fault_addr;
1925
1926         __asm__ volatile ("mrs %0, FAR_EL1" : "=r"(fault_addr));
1927
1928         cache_trap_error(current_thread(), fault_addr);
1929 }
1930
1931 static void
1932 set_cache_trap_recover(thread_t thread)
1933 {
1934 #if defined(HAS_APPLE_PAC)
1935         thread->recover = (vm_address_t)ptrauth_auth_and_resign(&cache_trap_recover,
1936             ptrauth_key_function_pointer, 0,
1937             ptrauth_key_function_pointer, ptrauth_blend_discriminator(&thread->recover, PAC_DISCRIMINATOR_RECOVER));
1938 #else /* defined(HAS_APPLE_PAC) */
1939         thread->recover = (vm_address_t)cache_trap_recover;
1940 #endif /* defined(HAS_APPLE_PAC) */
1941 }
1942
1943 static void
1944 dcache_flush_trap(vm_map_address_t start, vm_map_size_t size)
1945 {
1946         vm_map_address_t end = start + size;
1947         thread_t thread = current_thread();
1948         vm_offset_t old_recover = thread->recover;
1949
1950         /* Check bounds */
1951         if (task_has_64Bit_addr(current_task())) {
1952                 if (end > MACH_VM_MAX_ADDRESS) {
1953                         cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1954                 }
1955         } else {
1956                 if (end > VM_MAX_ADDRESS) {
1957                         cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1958                 }
1959         }
1960
1961         if (start > end) {
1962                 cache_trap_error(thread, start & ((1 << ARM64_CLINE_SHIFT) - 1));
1963         }
1964
1965         set_cache_trap_recover(thread);
1966
1967         /*
1968          * We're coherent on Apple ARM64 CPUs, so this could be a nop.  However,
1969          * if the region given us is bad, it would be good to catch it and
1970          * crash, ergo we still do the flush.
1971          */
1972         FlushPoC_DcacheRegion(start, (uint32_t)size);
1973
1974         /* Restore recovery function */
1975         thread->recover = old_recover;
1976
1977         /* Return (caller does exception return) */
1978 }
1979
1980 static void
1981 icache_invalidate_trap(vm_map_address_t start, vm_map_size_t size)
1982 {
1983         vm_map_address_t end = start + size;
1984         thread_t thread = current_thread();
1985         vm_offset_t old_recover = thread->recover;
1986
1987         /* Check bounds */
1988         if (task_has_64Bit_addr(current_task())) {
1989                 if (end > MACH_VM_MAX_ADDRESS) {
1990                         cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1991                 }
1992         } else {
1993                 if (end > VM_MAX_ADDRESS) {
1994                         cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1995                 }
1996         }
1997
1998         if (start > end) {
1999                 cache_trap_error(thread, start & ((1 << ARM64_CLINE_SHIFT) - 1));
2000         }
2001
2002         set_cache_trap_recover(thread);
2003
2004         /* Invalidate iCache to point of unification */
2005         InvalidatePoU_IcacheRegion(start, (uint32_t)size);
2006
2007         /* Restore recovery function */
2008         thread->recover = old_recover;
2009
2010         /* Return (caller does exception return) */
2011 }
2012
2013 __attribute__((noreturn))
2014 void
2015 platform_syscall(arm_saved_state_t *state)
2016 {
2017         uint32_t code;
2018
2019 #define platform_syscall_kprintf(x...) /* kprintf("platform_syscall: " x) */
2020
2021         code = (uint32_t)get_saved_state_reg(state, 3);
2022         switch (code) {
2023         case 0:
2024                 /* I-Cache flush */
2025                 platform_syscall_kprintf("icache flush requested.\n");
2026                 icache_invalidate_trap(get_saved_state_reg(state, 0), get_saved_state_reg(state, 1));
2027                 break;
2028         case 1:
2029                 /* D-Cache flush */
2030                 platform_syscall_kprintf("dcache flush requested.\n");
2031                 dcache_flush_trap(get_saved_state_reg(state, 0), get_saved_state_reg(state, 1));
2032                 break;
2033         case 2:
2034                 /* set cthread */
2035                 platform_syscall_kprintf("set cthread self.\n");
2036                 thread_set_cthread_self(get_saved_state_reg(state, 0));
2037                 break;
2038         case 3:
2039                 /* get cthread */
2040                 platform_syscall_kprintf("get cthread self.\n");
2041                 set_saved_state_reg(state, 0, thread_get_cthread_self());
2042                 break;
2043         default:
2044                 platform_syscall_kprintf("unknown: %d\n", code);
2045                 break;
2046         }
2047
2048         thread_exception_return();
2049 }
2050
2051 static void
2052 _enable_timebase_event_stream(uint32_t bit_index)
2053 {
2054         uint64_t cntkctl; /* One wants to use 32 bits, but "mrs" prefers it this way */
2055
2056         if (bit_index >= 64) {
2057                 panic("%s: invalid bit index (%u)", __FUNCTION__, bit_index);
2058         }
2059
2060         __asm__ volatile ("mrs  %0, CNTKCTL_EL1" : "=r"(cntkctl));
2061
2062         cntkctl |= (bit_index << CNTKCTL_EL1_EVENTI_SHIFT);
2063         cntkctl |= CNTKCTL_EL1_EVNTEN;
2064         cntkctl |= CNTKCTL_EL1_EVENTDIR; /* 1->0; why not? */
2065
2066         /*
2067          * If the SOC supports it (and it isn't broken), enable
2068          * EL0 access to the physical timebase register.
2069          */
2070         if (user_timebase_type() != USER_TIMEBASE_NONE) {
2071                 cntkctl |= CNTKCTL_EL1_PL0PCTEN;
2072         }
2073
2074         __asm__ volatile ("msr  CNTKCTL_EL1, %0" : : "r"(cntkctl));
2075 }
2076
2077 /*
2078  * Turn timer on, unmask that interrupt.
2079  */
2080 static void
2081 _enable_virtual_timer(void)
2082 {
2083         uint64_t cntvctl = CNTP_CTL_EL0_ENABLE; /* One wants to use 32 bits, but "mrs" prefers it this way */
2084
2085         __asm__ volatile ("msr CNTP_CTL_EL0, %0" : : "r"(cntvctl));
2086 }
2087
2088 uint64_t events_per_sec = 0;
2089
2090 void
2091 fiq_context_init(boolean_t enable_fiq __unused)
2092 {
2093         _enable_timebase_event_stream(fiq_eventi);
2094
2095         /* Interrupts still disabled. */
2096         assert(ml_get_interrupts_enabled() == FALSE);
2097         _enable_virtual_timer();
2098 }
2099
2100 void
2101 fiq_context_bootstrap(boolean_t enable_fiq)
2102 {
2103 #if defined(APPLE_ARM64_ARCH_FAMILY) || defined(BCM2837)
2104         /* Could fill in our own ops here, if we needed them */
2105         uint64_t        ticks_per_sec, ticks_per_event;
2106         uint32_t        bit_index;
2107
2108         ticks_per_sec = gPEClockFrequencyInfo.timebase_frequency_hz;
2109         ticks_per_event = ticks_per_sec / events_per_sec;
2110         bit_index = flsll(ticks_per_event) - 1; /* Highest bit set */
2111
2112         /* Round up to power of two */
2113         if ((ticks_per_event & ((1 << bit_index) - 1)) != 0) {
2114                 bit_index++;
2115         }
2116
2117         /*
2118          * The timer can only trigger on rising or falling edge,
2119          * not both; we don't care which we trigger on, but we
2120          * do need to adjust which bit we are interested in to
2121          * account for this.
2122          */
2123         if (bit_index != 0) {
2124                 bit_index--;
2125         }
2126
2127         fiq_eventi = bit_index;
2128 #else
2129 #error Need a board configuration.
2130 #endif
2131         fiq_context_init(enable_fiq);
2132 }
2133
2134 boolean_t
2135 ml_delay_should_spin(uint64_t interval)
2136 {
2137         cpu_data_t     *cdp = getCpuDatap();
2138
2139         if (cdp->cpu_idle_latency) {
2140                 return (interval < cdp->cpu_idle_latency) ? TRUE : FALSE;
2141         } else {
2142                 /*
2143                  * Early boot, latency is unknown. Err on the side of blocking,
2144                  * which should always be safe, even if slow
2145                  */
2146                 return FALSE;
2147         }
2148 }
2149
2150 boolean_t
2151 ml_thread_is64bit(thread_t thread)
2152 {
2153         return thread_is_64bit_addr(thread);
2154 }
2155
2156 void
2157 ml_delay_on_yield(void)
2158 {
2159 #if DEVELOPMENT || DEBUG
2160         if (yield_delay_us) {
2161                 delay(yield_delay_us);
2162         }
2163 #endif
2164 }
2165
2166 void
2167 ml_timer_evaluate(void)
2168 {
2169 }
2170
2171 boolean_t
2172 ml_timer_forced_evaluation(void)
2173 {
2174         return FALSE;
2175 }
2176
2177 uint64_t
2178 ml_energy_stat(thread_t t)
2179 {
2180         return t->machine.energy_estimate_nj;
2181 }
2182
2183
2184 void
2185 ml_gpu_stat_update(__unused uint64_t gpu_ns_delta)
2186 {
2187 #if CONFIG_EMBEDDED
2188         /*
2189          * For now: update the resource coalition stats of the
2190          * current thread's coalition
2191          */
2192         task_coalition_update_gpu_stats(current_task(), gpu_ns_delta);
2193 #endif
2194 }
2195
2196 uint64_t
2197 ml_gpu_stat(__unused thread_t t)
2198 {
2199         return 0;
2200 }
2201
2202 #if !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME
2203 static void
2204 timer_state_event(boolean_t switch_to_kernel)
2205 {
2206         thread_t thread = current_thread();
2207         if (!thread->precise_user_kernel_time) {
2208                 return;
2209         }
2210
2211         processor_data_t *pd = &getCpuDatap()->cpu_processor->processor_data;
2212         uint64_t now = ml_get_timebase();
2213
2214         timer_stop(pd->current_state, now);
2215         pd->current_state = (switch_to_kernel) ? &pd->system_state : &pd->user_state;
2216         timer_start(pd->current_state, now);
2217
2218         timer_stop(pd->thread_timer, now);
2219         pd->thread_timer = (switch_to_kernel) ? &thread->system_timer : &thread->user_timer;
2220         timer_start(pd->thread_timer, now);
2221 }
2222
2223 void
2224 timer_state_event_user_to_kernel(void)
2225 {
2226         timer_state_event(TRUE);
2227 }
2228
2229 void
2230 timer_state_event_kernel_to_user(void)
2231 {
2232         timer_state_event(FALSE);
2233 }
2234 #endif /* !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME */
2235
2236 /*
2237  * The following are required for parts of the kernel
2238  * that cannot resolve these functions as inlines:
2239  */
2240 extern thread_t current_act(void) __attribute__((const));
2241 thread_t
2242 current_act(void)
2243 {
2244         return current_thread_fast();
2245 }
2246
2247 #undef current_thread
2248 extern thread_t current_thread(void) __attribute__((const));
2249 thread_t
2250 current_thread(void)
2251 {
2252         return current_thread_fast();
2253 }
2254
2255 typedef struct{
2256         ex_cb_t         cb;
2257         void            *refcon;
2258 }
2259 ex_cb_info_t;
2260
2261 ex_cb_info_t ex_cb_info[EXCB_CLASS_MAX];
2262
2263 /*
2264  * Callback registration
2265  * Currently we support only one registered callback per class but
2266  * it should be possible to support more callbacks
2267  */
2268 kern_return_t
2269 ex_cb_register(
2270         ex_cb_class_t   cb_class,
2271         ex_cb_t                 cb,
2272         void                    *refcon)
2273 {
2274         ex_cb_info_t *pInfo = &ex_cb_info[cb_class];
2275
2276         if ((NULL == cb) || (cb_class >= EXCB_CLASS_MAX)) {
2277                 return KERN_INVALID_VALUE;
2278         }
2279
2280         if (NULL == pInfo->cb) {
2281                 pInfo->cb = cb;
2282                 pInfo->refcon = refcon;
2283                 return KERN_SUCCESS;
2284         }
2285         return KERN_FAILURE;
2286 }
2287
2288 /*
2289  * Called internally by platform kernel to invoke the registered callback for class
2290  */
2291 ex_cb_action_t
2292 ex_cb_invoke(
2293         ex_cb_class_t   cb_class,
2294         vm_offset_t             far)
2295 {
2296         ex_cb_info_t *pInfo = &ex_cb_info[cb_class];
2297         ex_cb_state_t state = {far};
2298
2299         if (cb_class >= EXCB_CLASS_MAX) {
2300                 panic("Invalid exception callback class 0x%x\n", cb_class);
2301         }
2302
2303         if (pInfo->cb) {
2304                 return pInfo->cb(cb_class, pInfo->refcon, &state);
2305         }
2306         return EXCB_ACTION_NONE;
2307 }
2308
2309 #if defined(HAS_APPLE_PAC)
2310 void
2311 ml_task_set_disable_user_jop(task_t task, boolean_t disable_user_jop)
2312 {
2313         assert(task);
2314         task->disable_user_jop = disable_user_jop;
2315 }
2316
2317 void
2318 ml_thread_set_disable_user_jop(thread_t thread, boolean_t disable_user_jop)
2319 {
2320         assert(thread);
2321         thread->machine.disable_user_jop = disable_user_jop;
2322 }
2323
2324 void
2325 ml_task_set_rop_pid(task_t task, task_t parent_task, boolean_t inherit)
2326 {
2327         if (inherit) {
2328                 task->rop_pid = parent_task->rop_pid;
2329         } else {
2330                 task->rop_pid = early_random();
2331         }
2332 }
2333 #endif /* defined(HAS_APPLE_PAC) */
2334
2335
2336 #if defined(HAS_APPLE_PAC)
2337
2338 /*
2339  * ml_auth_ptr_unchecked: call this instead of ptrauth_auth_data
2340  * instrinsic when you don't want to trap on auth fail.
2341  *
2342  */
2343
2344 void *
2345 ml_auth_ptr_unchecked(void *ptr, ptrauth_key key, uint64_t modifier)
2346 {
2347         switch (key & 0x3) {
2348         case ptrauth_key_asia:
2349                 asm volatile ("autia %[ptr], %[modifier]" : [ptr] "+r"(ptr) : [modifier] "r"(modifier));
2350                 break;
2351         case ptrauth_key_asib:
2352                 asm volatile ("autib %[ptr], %[modifier]" : [ptr] "+r"(ptr) : [modifier] "r"(modifier));
2353                 break;
2354         case ptrauth_key_asda:
2355                 asm volatile ("autda %[ptr], %[modifier]" : [ptr] "+r"(ptr) : [modifier] "r"(modifier));
2356                 break;
2357         case ptrauth_key_asdb:
2358                 asm volatile ("autdb %[ptr], %[modifier]" : [ptr] "+r"(ptr) : [modifier] "r"(modifier));
2359                 break;
2360         }
2361
2362         return ptr;
2363 }
2364 #endif /* defined(HAS_APPLE_PAC) */