osfmk/arm64/machine_routines.c

   1 /*
   2  * Copyright (c) 2007-2017 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <arm64/proc_reg.h>
  30 #include <arm/machine_cpu.h>
  31 #include <arm/cpu_internal.h>
  32 #include <arm/cpuid.h>
  33 #include <arm/io_map_entries.h>
  34 #include <arm/cpu_data.h>
  35 #include <arm/cpu_data_internal.h>
  36 #include <arm/caches_internal.h>
  37 #include <arm/misc_protos.h>
  38 #include <arm/machdep_call.h>
  39 #include <arm/machine_routines.h>
  40 #include <arm/rtclock.h>
  41 #include <arm/cpuid_internal.h>
  42 #include <arm/cpu_capabilities.h>
  43 #include <console/serial_protos.h>
  44 #include <kern/machine.h>
  45 #include <prng/random.h>
  46 #include <kern/startup.h>
  47 #include <kern/thread.h>
  48 #include <kern/timer_queue.h>
  49 #include <mach/machine.h>
  50 #include <machine/atomic.h>
  51 #include <vm/pmap.h>
  52 #include <vm/vm_page.h>
  53 #include <sys/kdebug.h>
  54 #include <kern/coalition.h>
  55 #include <pexpert/device_tree.h>
  56
  57 #include <IOKit/IOPlatformExpert.h>
  58
  59 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
  60 #include <libkern/kernel_mach_header.h>
  61 #endif
  62
  63 #include <libkern/section_keywords.h>
  64
  65 #if KPC
  66 #include <kern/kpc.h>
  67 #endif
  68
  69 #if HAS_CLUSTER
  70 static uint8_t cluster_initialized = 0;
  71 #endif
  72
  73
  74 static int max_cpus_initialized = 0;
  75 #define MAX_CPUS_SET    0x1
  76 #define MAX_CPUS_WAIT   0x2
  77
  78 uint32_t LockTimeOut;
  79 uint32_t LockTimeOutUsec;
  80 uint64_t TLockTimeOut;
  81 uint64_t MutexSpin;
  82 boolean_t is_clock_configured = FALSE;
  83
  84 uint32_t yield_delay_us = 0; /* Must be less than cpu_idle_latency to ensure ml_delay_should_spin is true */
  85
  86 #if CONFIG_NONFATAL_ASSERTS
  87 extern int mach_assert;
  88 #endif
  89 extern volatile uint32_t debug_enabled;
  90
  91 extern vm_offset_t   segLOWEST;
  92 extern vm_offset_t   segLOWESTTEXT;
  93 extern vm_offset_t   segLASTB;
  94 extern unsigned long segSizeLAST;
  95
  96 #if defined(HAS_IPI)
  97 unsigned int gFastIPI = 1;
  98 #define kDeferredIPITimerDefault (64 * NSEC_PER_USEC) /* in nanoseconds */
  99 static uint64_t deferred_ipi_timer_ns = kDeferredIPITimerDefault;
 100 #endif /* defined(HAS_IPI) */
 101
 102 void machine_conf(void);
 103
 104 thread_t Idle_context(void);
 105
 106 SECURITY_READ_ONLY_LATE(static uint32_t) cpu_phys_ids[MAX_CPUS] = {[0 ... MAX_CPUS - 1] = (uint32_t)-1};
 107 SECURITY_READ_ONLY_LATE(static unsigned int) avail_cpus = 0;
 108 SECURITY_READ_ONLY_LATE(static int) boot_cpu = -1;
 109 SECURITY_READ_ONLY_LATE(static int) max_cpu_number = 0;
 110 SECURITY_READ_ONLY_LATE(cluster_type_t) boot_cluster = CLUSTER_TYPE_SMP;
 111
 112 SECURITY_READ_ONLY_LATE(static uint32_t) fiq_eventi = UINT32_MAX;
 113
 114 lockdown_handler_t lockdown_handler;
 115 void *lockdown_this;
 116 lck_mtx_t lockdown_handler_lck;
 117 lck_grp_t *lockdown_handler_grp;
 118 int lockdown_done;
 119
 120 void ml_lockdown_init(void);
 121 void ml_lockdown_run_handler(void);
 122 uint32_t get_arm_cpu_version(void);
 123
 124 #if defined(HAS_IPI)
 125 static inline void
 126 ml_cpu_signal_type(unsigned int cpu_mpidr, uint32_t type)
 127 {
 128 #if HAS_CLUSTER
 129         uint64_t local_mpidr;
 130         /* NOTE: this logic expects that we are called in a non-preemptible
 131          * context, or at least one in which the calling thread is bound
 132          * to a single CPU.  Otherwise we may migrate between choosing which
 133          * IPI mechanism to use and issuing the IPI. */
 134         MRS(local_mpidr, "MPIDR_EL1");
 135         if ((local_mpidr & MPIDR_AFF1_MASK) == (cpu_mpidr & MPIDR_AFF1_MASK)) {
 136                 uint64_t x = type | (cpu_mpidr & MPIDR_AFF0_MASK);
 137                 MSR(ARM64_REG_IPI_RR_LOCAL, x);
 138         } else {
 139                 #define IPI_RR_TARGET_CLUSTER_SHIFT 16
 140                 uint64_t x = type | ((cpu_mpidr & MPIDR_AFF1_MASK) << (IPI_RR_TARGET_CLUSTER_SHIFT - MPIDR_AFF1_SHIFT)) | (cpu_mpidr & MPIDR_AFF0_MASK);
 141                 MSR(ARM64_REG_IPI_RR_GLOBAL, x);
 142         }
 143 #else
 144         uint64_t x = type | (cpu_mpidr & MPIDR_AFF0_MASK);
 145         MSR(ARM64_REG_IPI_RR, x);
 146 #endif
 147 }
 148 #endif
 149
 150 #if !defined(HAS_IPI)
 151 __dead2
 152 #endif
 153 void
 154 ml_cpu_signal(unsigned int cpu_mpidr __unused)
 155 {
 156 #if defined(HAS_IPI)
 157         ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_IMMEDIATE);
 158 #else
 159         panic("Platform does not support ACC Fast IPI");
 160 #endif
 161 }
 162
 163 #if !defined(HAS_IPI)
 164 __dead2
 165 #endif
 166 void
 167 ml_cpu_signal_deferred_adjust_timer(uint64_t nanosecs)
 168 {
 169 #if defined(HAS_IPI)
 170         /* adjust IPI_CR timer countdown value for deferred IPI
 171          * accepts input in nanosecs, convert to absolutetime (REFCLK ticks),
 172          * clamp maximum REFCLK ticks to 0xFFFF (16 bit field)
 173          *
 174          * global register, should only require a single write to update all
 175          * CPU cores: from Skye ACC user spec section 5.7.3.3
 176          *
 177          * IPICR is a global register but there are two copies in ACC: one at pBLK and one at eBLK.
 178          * IPICR write SPR token also traverses both pCPM and eCPM rings and updates both copies.
 179          */
 180         uint64_t abstime;
 181
 182         nanoseconds_to_absolutetime(nanosecs, &abstime);
 183
 184         abstime = MIN(abstime, 0xFFFF);
 185
 186         /* update deferred_ipi_timer_ns with the new clamped value */
 187         absolutetime_to_nanoseconds(abstime, &deferred_ipi_timer_ns);
 188
 189         MSR(ARM64_REG_IPI_CR, abstime);
 190 #else
 191         (void)nanosecs;
 192         panic("Platform does not support ACC Fast IPI");
 193 #endif
 194 }
 195
 196 uint64_t
 197 ml_cpu_signal_deferred_get_timer()
 198 {
 199 #if defined(HAS_IPI)
 200         return deferred_ipi_timer_ns;
 201 #else
 202         return 0;
 203 #endif
 204 }
 205
 206 #if !defined(HAS_IPI)
 207 __dead2
 208 #endif
 209 void
 210 ml_cpu_signal_deferred(unsigned int cpu_mpidr __unused)
 211 {
 212 #if defined(HAS_IPI)
 213         ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_DEFERRED);
 214 #else
 215         panic("Platform does not support ACC Fast IPI deferral");
 216 #endif
 217 }
 218
 219 #if !defined(HAS_IPI)
 220 __dead2
 221 #endif
 222 void
 223 ml_cpu_signal_retract(unsigned int cpu_mpidr __unused)
 224 {
 225 #if defined(HAS_IPI)
 226         ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_RETRACT);
 227 #else
 228         panic("Platform does not support ACC Fast IPI retraction");
 229 #endif
 230 }
 231
 232 void
 233 machine_idle(void)
 234 {
 235         __builtin_arm_wsr("DAIFSet", (DAIFSC_IRQF | DAIFSC_FIQF));
 236         Idle_context();
 237         __builtin_arm_wsr("DAIFClr", (DAIFSC_IRQF | DAIFSC_FIQF));
 238 }
 239
 240 void
 241 init_vfp(void)
 242 {
 243         return;
 244 }
 245
 246 boolean_t
 247 get_vfp_enabled(void)
 248 {
 249         return TRUE;
 250 }
 251
 252 void
 253 OSSynchronizeIO(void)
 254 {
 255         __builtin_arm_dsb(DSB_SY);
 256 }
 257
 258 uint64_t
 259 get_aux_control(void)
 260 {
 261         uint64_t        value;
 262
 263         MRS(value, "ACTLR_EL1");
 264         return value;
 265 }
 266
 267 uint64_t
 268 get_mmu_control(void)
 269 {
 270         uint64_t        value;
 271
 272         MRS(value, "SCTLR_EL1");
 273         return value;
 274 }
 275
 276 uint64_t
 277 get_tcr(void)
 278 {
 279         uint64_t        value;
 280
 281         MRS(value, "TCR_EL1");
 282         return value;
 283 }
 284
 285 boolean_t
 286 ml_get_interrupts_enabled(void)
 287 {
 288         uint64_t        value;
 289
 290         MRS(value, "DAIF");
 291         if (value & DAIF_IRQF) {
 292                 return FALSE;
 293         }
 294         return TRUE;
 295 }
 296
 297 pmap_paddr_t
 298 get_mmu_ttb(void)
 299 {
 300         pmap_paddr_t    value;
 301
 302         MRS(value, "TTBR0_EL1");
 303         return value;
 304 }
 305
 306 uint32_t
 307 get_arm_cpu_version(void)
 308 {
 309         uint32_t value = machine_read_midr();
 310
 311         /* Compose the register values into 8 bits; variant[7:4], revision[3:0]. */
 312         return ((value & MIDR_EL1_REV_MASK) >> MIDR_EL1_REV_SHIFT) | ((value & MIDR_EL1_VAR_MASK) >> (MIDR_EL1_VAR_SHIFT - 4));
 313 }
 314
 315 /*
 316  * user_cont_hwclock_allowed()
 317  *
 318  * Indicates whether we allow EL0 to read the physical timebase (CNTPCT_EL0)
 319  * as a continuous time source (e.g. from mach_continuous_time)
 320  */
 321 boolean_t
 322 user_cont_hwclock_allowed(void)
 323 {
 324 #if HAS_CONTINUOUS_HWCLOCK
 325         return TRUE;
 326 #else
 327         return FALSE;
 328 #endif
 329 }
 330
 331
 332 uint8_t
 333 user_timebase_type(void)
 334 {
 335         return USER_TIMEBASE_SPEC;
 336 }
 337
 338 boolean_t
 339 arm64_wfe_allowed(void)
 340 {
 341         return TRUE;
 342 }
 343
 344 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
 345
 346 uint64_t rorgn_begin __attribute__((section("__DATA, __const"))) = 0;
 347 uint64_t rorgn_end   __attribute__((section("__DATA, __const"))) = 0;
 348 vm_offset_t amcc_base;
 349
 350 static void assert_unlocked(void);
 351 static void assert_amcc_cache_disabled(void);
 352 static void lock_amcc(void);
 353 static void lock_mmu(uint64_t begin, uint64_t end);
 354
 355 void
 356 rorgn_stash_range(void)
 357 {
 358 #if DEVELOPMENT || DEBUG
 359         boolean_t rorgn_disable = FALSE;
 360
 361         PE_parse_boot_argn("-unsafe_kernel_text", &rorgn_disable, sizeof(rorgn_disable));
 362
 363         if (rorgn_disable) {
 364                 /* take early out if boot arg present, don't query any machine registers to avoid
 365                  * dependency on amcc DT entry
 366                  */
 367                 return;
 368         }
 369 #endif
 370
 371         /* Get the AMC values, and stash them into rorgn_begin, rorgn_end.
 372          * gPhysBase is the base of DRAM managed by xnu. we need DRAM_BASE as
 373          * the AMCC RO region begin/end registers are in units of 16KB page
 374          * numbers from DRAM_BASE so we'll truncate gPhysBase at 512MB granule
 375          * and assert the value is the canonical DRAM_BASE PA of 0x8_0000_0000 for arm64.
 376          */
 377
 378         uint64_t dram_base = gPhysBase & ~0x1FFFFFFFULL;  /* 512MB */
 379         assert(dram_base == 0x800000000ULL);
 380
 381 #if defined(KERNEL_INTEGRITY_KTRR)
 382         uint64_t soc_base = 0;
 383         DTEntry entryP = NULL;
 384         uintptr_t *reg_prop = NULL;
 385         uint32_t prop_size = 0;
 386         int rc;
 387
 388         soc_base = pe_arm_get_soc_base_phys();
 389         rc = DTFindEntry("name", "mcc", &entryP);
 390         assert(rc == kSuccess);
 391         rc = DTGetProperty(entryP, "reg", (void **)&reg_prop, &prop_size);
 392         assert(rc == kSuccess);
 393         amcc_base = ml_io_map(soc_base + *reg_prop, *(reg_prop + 1));
 394 #elif defined(KERNEL_INTEGRITY_CTRR)
 395         /* TODO: t8020 mcc entry not in device tree yet; we'll do it LIVE */
 396 #define TEMP_AMCC_BASE_PA 0x200000000ULL
 397 #define TEMP_AMCC_SZ      0x100000
 398         amcc_base = ml_io_map(TEMP_AMCC_BASE_PA, TEMP_AMCC_SZ);
 399 #else
 400 #error "KERNEL_INTEGRITY config error"
 401 #endif
 402
 403 #if defined(KERNEL_INTEGRITY_KTRR)
 404         assert(rRORGNENDADDR > rRORGNBASEADDR);
 405         rorgn_begin = (rRORGNBASEADDR << AMCC_PGSHIFT) + dram_base;
 406         rorgn_end   = (rRORGNENDADDR << AMCC_PGSHIFT) + dram_base;
 407 #elif defined(KERNEL_INTEGRITY_CTRR)
 408         rorgn_begin = rCTRR_AMCC_PLANE_REG(0, CTRR_A_BASEADDR);
 409         rorgn_end   = rCTRR_AMCC_PLANE_REG(0, CTRR_A_ENDADDR);
 410         assert(rorgn_end > rorgn_begin);
 411
 412         for (int i = 0; i < CTRR_AMCC_MAX_PLANES; ++i) {
 413                 uint32_t begin = rCTRR_AMCC_PLANE_REG(i, CTRR_A_BASEADDR);
 414                 uint32_t end = rCTRR_AMCC_PLANE_REG(i, CTRR_A_ENDADDR);
 415                 if (!(begin == rorgn_begin && end == rorgn_end)) {
 416 #if DEVELOPMENT || DEBUG
 417                         panic("iboot programmed CTRR bounds are inconsistent");
 418 #else
 419                         panic("Inconsistent memory configuration");
 420 #endif
 421                 }
 422         }
 423
 424         // convert from page number from DRAM base to PA
 425         rorgn_begin = (rorgn_begin << AMCC_PGSHIFT) + dram_base;
 426         rorgn_end   = (rorgn_end << AMCC_PGSHIFT) + dram_base;
 427
 428 #else
 429 #error KERNEL_INTEGRITY config error
 430 #endif /* defined (KERNEL_INTEGRITY_KTRR) */
 431 }
 432
 433 static void
 434 assert_unlocked()
 435 {
 436         uint64_t ktrr_lock = 0;
 437         uint32_t rorgn_lock = 0;
 438
 439         assert(amcc_base);
 440 #if defined(KERNEL_INTEGRITY_KTRR)
 441         rorgn_lock = rRORGNLOCK;
 442         ktrr_lock = __builtin_arm_rsr64(ARM64_REG_KTRR_LOCK_EL1);
 443 #elif defined(KERNEL_INTEGRITY_CTRR)
 444         for (int i = 0; i < CTRR_AMCC_MAX_PLANES; ++i) {
 445                 rorgn_lock |= rCTRR_AMCC_PLANE_REG(i, CTRR_A_LOCK);
 446         }
 447         ktrr_lock = __builtin_arm_rsr64(ARM64_REG_CTRR_LOCK_EL1);
 448 #else
 449 #error KERNEL_INTEGRITY config error
 450 #endif /* defined(KERNEL_INTEGRITY_KTRR) */
 451
 452         assert(!ktrr_lock);
 453         assert(!rorgn_lock);
 454 }
 455
 456 static void
 457 lock_amcc()
 458 {
 459 #if defined(KERNEL_INTEGRITY_KTRR)
 460         rRORGNLOCK = 1;
 461         __builtin_arm_isb(ISB_SY);
 462 #elif defined(KERNEL_INTEGRITY_CTRR)
 463         /* lockdown planes in reverse order as plane 0 should be locked last */
 464         for (int i = 0; i < CTRR_AMCC_MAX_PLANES; ++i) {
 465                 rCTRR_AMCC_PLANE_REG(CTRR_AMCC_MAX_PLANES - i - 1, CTRR_A_ENABLE) = 1;
 466                 rCTRR_AMCC_PLANE_REG(CTRR_AMCC_MAX_PLANES - i - 1, CTRR_A_LOCK) = 1;
 467                 __builtin_arm_isb(ISB_SY);
 468         }
 469 #else
 470 #error KERNEL_INTEGRITY config error
 471 #endif
 472 }
 473
 474 static void
 475 lock_mmu(uint64_t begin, uint64_t end)
 476 {
 477 #if defined(KERNEL_INTEGRITY_KTRR)
 478
 479         __builtin_arm_wsr64(ARM64_REG_KTRR_LOWER_EL1, begin);
 480         __builtin_arm_wsr64(ARM64_REG_KTRR_UPPER_EL1, end);
 481         __builtin_arm_wsr64(ARM64_REG_KTRR_LOCK_EL1, 1ULL);
 482
 483         /* flush TLB */
 484
 485         __builtin_arm_isb(ISB_SY);
 486         flush_mmu_tlb();
 487
 488 #elif defined (KERNEL_INTEGRITY_CTRR)
 489         /* this will lock the entire bootstrap cluster. non bootstrap clusters
 490          * will be locked by respective cluster master in start.s */
 491
 492         __builtin_arm_wsr64(ARM64_REG_CTRR_A_LWR_EL1, begin);
 493         __builtin_arm_wsr64(ARM64_REG_CTRR_A_UPR_EL1, end);
 494
 495 #if !defined(APPLEVORTEX)
 496         /* H12 changed sequence, must invalidate TLB immediately after setting CTRR bounds */
 497         __builtin_arm_isb(ISB_SY); /* ensure all prior MSRs are complete */
 498         flush_mmu_tlb();
 499 #endif /* !defined(APPLEVORTEX) */
 500
 501         __builtin_arm_wsr64(ARM64_REG_CTRR_CTL_EL1, CTRR_CTL_EL1_A_PXN | CTRR_CTL_EL1_A_MMUON_WRPROTECT);
 502         __builtin_arm_wsr64(ARM64_REG_CTRR_LOCK_EL1, 1ULL);
 503
 504         uint64_t current_el = __builtin_arm_rsr64("CurrentEL");
 505         if (current_el == PSR64_MODE_EL2) {
 506                 // CTRR v2 has explicit registers for cluster config. they can only be written in EL2
 507
 508                 __builtin_arm_wsr64(ACC_CTRR_A_LWR_EL2, begin);
 509                 __builtin_arm_wsr64(ACC_CTRR_A_UPR_EL2, end);
 510                 __builtin_arm_wsr64(ACC_CTRR_CTL_EL2, CTRR_CTL_EL1_A_PXN | CTRR_CTL_EL1_A_MMUON_WRPROTECT);
 511                 __builtin_arm_wsr64(ACC_CTRR_LOCK_EL2, 1ULL);
 512         }
 513
 514         __builtin_arm_isb(ISB_SY); /* ensure all prior MSRs are complete */
 515 #if defined(APPLEVORTEX)
 516         flush_mmu_tlb();
 517 #endif /* defined(APPLEVORTEX) */
 518
 519 #else /* defined(KERNEL_INTEGRITY_KTRR) */
 520 #error KERNEL_INTEGRITY config error
 521 #endif /* defined(KERNEL_INTEGRITY_KTRR) */
 522 }
 523
 524 static void
 525 assert_amcc_cache_disabled()
 526 {
 527 #if defined(KERNEL_INTEGRITY_KTRR)
 528         assert((rMCCGEN & 1) == 0); /* assert M$ disabled or LLC clean will be unreliable */
 529 #elif defined(KERNEL_INTEGRITY_CTRR) && (defined(ARM64_BOARD_CONFIG_T8006))
 530         /*
 531          * T8006 differentiates between data and tag ways being powered up, so
 532          * make sure to check that both are zero on its single memory plane.
 533          */
 534         assert((rCTRR_AMCC_PLANE_REG(0, CTRR_AMCC_PWRONWAYCNTSTATUS) &
 535             (AMCC_CURTAGWAYCNT_MASK | AMCC_CURDATWAYCNT_MASK)) == 0);
 536 #elif defined (KERNEL_INTEGRITY_CTRR)
 537         for (int i = 0; i < CTRR_AMCC_MAX_PLANES; ++i) {
 538                 assert(rCTRR_AMCC_PLANE_REG(i, CTRR_AMCC_WAYONCNT) == 0);
 539         }
 540 #else
 541 #error KERNEL_INTEGRITY config error
 542 #endif
 543 }
 544
 545 /*
 546  * void rorgn_lockdown(void)
 547  *
 548  * Lock the MMU and AMCC RORegion within lower and upper boundaries if not already locked
 549  *
 550  * [ ] - ensure this is being called ASAP on secondary CPUs: KTRR programming and lockdown handled in
 551  *       start.s:start_cpu() for subsequent wake/resume of all cores
 552  */
 553 void
 554 rorgn_lockdown(void)
 555 {
 556         vm_offset_t ktrr_begin, ktrr_end;
 557         unsigned long last_segsz;
 558
 559 #if DEVELOPMENT || DEBUG
 560         boolean_t ktrr_disable = FALSE;
 561
 562         PE_parse_boot_argn("-unsafe_kernel_text", &ktrr_disable, sizeof(ktrr_disable));
 563
 564         if (ktrr_disable) {
 565                 /*
 566                  * take early out if boot arg present, since we may not have amcc DT entry present
 567                  * we can't assert that iboot hasn't programmed the RO region lockdown registers
 568                  */
 569                 goto out;
 570         }
 571 #endif /* DEVELOPMENT || DEBUG */
 572
 573         assert_unlocked();
 574
 575         /* [x] - Use final method of determining all kernel text range or expect crashes */
 576         ktrr_begin = segLOWEST;
 577         assert(ktrr_begin && gVirtBase && gPhysBase);
 578
 579         ktrr_begin = kvtophys(ktrr_begin);
 580
 581         ktrr_end   = kvtophys(segLASTB);
 582         last_segsz = segSizeLAST;
 583 #if defined(KERNEL_INTEGRITY_KTRR)
 584         /* __LAST is not part of the MMU KTRR region (it is however part of the AMCC KTRR region) */
 585         ktrr_end = (ktrr_end - 1) & ~AMCC_PGMASK;
 586         /* ensure that iboot and xnu agree on the ktrr range */
 587         assert(rorgn_begin == ktrr_begin && rorgn_end == (ktrr_end + last_segsz));
 588         /* assert that __LAST segment containing privileged insns is only a single page */
 589         assert(last_segsz == PAGE_SIZE);
 590 #elif defined(KERNEL_INTEGRITY_CTRR)
 591         ktrr_end = (ktrr_end + last_segsz - 1) & ~AMCC_PGMASK;
 592         /* __LAST is part of MMU CTRR region. Can't use the KTRR style method of making
 593          * __pinst no execute because PXN applies with MMU off in CTRR. */
 594         assert(rorgn_begin == ktrr_begin && rorgn_end == ktrr_end);
 595 #endif
 596
 597
 598 #if DEBUG || DEVELOPMENT
 599         printf("KTRR Begin: %p End: %p, setting lockdown\n", (void *)ktrr_begin, (void *)ktrr_end);
 600 #endif
 601
 602         /* [x] - ensure all in flight writes are flushed to AMCC before enabling RO Region Lock */
 603
 604         assert_amcc_cache_disabled();
 605
 606         CleanPoC_DcacheRegion_Force(phystokv(ktrr_begin),
 607             (unsigned)((ktrr_end + last_segsz) - ktrr_begin + AMCC_PGMASK));
 608
 609         lock_amcc();
 610
 611         lock_mmu(ktrr_begin, ktrr_end);
 612
 613 #if DEVELOPMENT || DEBUG
 614 out:
 615 #endif
 616
 617 #if defined(KERNEL_INTEGRITY_CTRR)
 618         {
 619                 /* wake any threads blocked on cluster master lockdown */
 620                 cpu_data_t *cdp;
 621                 uint64_t mpidr_el1_value;
 622
 623                 cdp = getCpuDatap();
 624                 MRS(mpidr_el1_value, "MPIDR_EL1");
 625                 cdp->cpu_cluster_id = (mpidr_el1_value & MPIDR_AFF1_MASK) >> MPIDR_AFF1_SHIFT;
 626                 assert(cdp->cpu_cluster_id < __ARM_CLUSTER_COUNT__);
 627                 ctrr_cluster_locked[cdp->cpu_cluster_id] = 1;
 628                 thread_wakeup(&ctrr_cluster_locked[cdp->cpu_cluster_id]);
 629         }
 630 #endif
 631         /* now we can run lockdown handler */
 632         ml_lockdown_run_handler();
 633 }
 634
 635 #endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
 636
 637 void
 638 machine_startup(__unused boot_args * args)
 639 {
 640         int boot_arg;
 641
 642 #if defined(HAS_IPI) && (DEVELOPMENT || DEBUG)
 643         if (!PE_parse_boot_argn("fastipi", &gFastIPI, sizeof(gFastIPI))) {
 644                 gFastIPI = 1;
 645         }
 646
 647         PE_parse_boot_argn("fastipitimeout", &deferred_ipi_timer_ns, sizeof(deferred_ipi_timer_ns));
 648 #endif /* defined(HAS_IPI) && (DEVELOPMENT || DEBUG)*/
 649
 650 #if CONFIG_NONFATAL_ASSERTS
 651         PE_parse_boot_argn("assert", &mach_assert, sizeof(mach_assert));
 652 #endif
 653
 654         if (PE_parse_boot_argn("preempt", &boot_arg, sizeof(boot_arg))) {
 655                 default_preemption_rate = boot_arg;
 656         }
 657         if (PE_parse_boot_argn("bg_preempt", &boot_arg, sizeof(boot_arg))) {
 658                 default_bg_preemption_rate = boot_arg;
 659         }
 660
 661         PE_parse_boot_argn("yield_delay_us", &yield_delay_us, sizeof(yield_delay_us));
 662
 663         machine_conf();
 664
 665         /*
 666          * Kick off the kernel bootstrap.
 667          */
 668         kernel_bootstrap();
 669         /* NOTREACHED */
 670 }
 671
 672 void
 673 machine_lockdown_preflight(void)
 674 {
 675 #if CONFIG_KERNEL_INTEGRITY
 676
 677 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
 678         rorgn_stash_range();
 679 #endif
 680
 681 #endif
 682 }
 683
 684 void
 685 machine_lockdown(void)
 686 {
 687 #if CONFIG_KERNEL_INTEGRITY
 688 #if KERNEL_INTEGRITY_WT
 689         /* Watchtower
 690          *
 691          * Notify the monitor about the completion of early kernel bootstrap.
 692          * From this point forward it will enforce the integrity of kernel text,
 693          * rodata and page tables.
 694          */
 695
 696 #ifdef MONITOR
 697         monitor_call(MONITOR_LOCKDOWN, 0, 0, 0);
 698 #endif
 699 #endif /* KERNEL_INTEGRITY_WT */
 700
 701 #if XNU_MONITOR
 702         pmap_lockdown_ppl();
 703 #endif
 704
 705 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
 706         /* KTRR
 707          *
 708          * Lock physical KTRR region. KTRR region is read-only. Memory outside
 709          * the region is not executable at EL1.
 710          */
 711
 712         rorgn_lockdown();
 713 #endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
 714
 715
 716 #endif /* CONFIG_KERNEL_INTEGRITY */
 717 }
 718
 719 char           *
 720 machine_boot_info(
 721         __unused char *buf,
 722         __unused vm_size_t size)
 723 {
 724         return PE_boot_args();
 725 }
 726
 727 void
 728 machine_conf(void)
 729 {
 730         /*
 731          * This is known to be inaccurate. mem_size should always be capped at 2 GB
 732          */
 733         machine_info.memory_size = (uint32_t)mem_size;
 734 }
 735
 736 void
 737 machine_init(void)
 738 {
 739         debug_log_init();
 740         clock_config();
 741         is_clock_configured = TRUE;
 742         if (debug_enabled) {
 743                 pmap_map_globals();
 744         }
 745 }
 746
 747 void
 748 slave_machine_init(__unused void *param)
 749 {
 750         cpu_machine_init();     /* Initialize the processor */
 751         clock_init();           /* Init the clock */
 752 }
 753
 754 /*
 755  *      Routine:        machine_processor_shutdown
 756  *      Function:
 757  */
 758 thread_t
 759 machine_processor_shutdown(
 760         __unused thread_t thread,
 761         void (*doshutdown)(processor_t),
 762         processor_t processor)
 763 {
 764         return Shutdown_context(doshutdown, processor);
 765 }
 766
 767 /*
 768  *      Routine:        ml_init_max_cpus
 769  *      Function:
 770  */
 771 void
 772 ml_init_max_cpus(unsigned int max_cpus)
 773 {
 774         boolean_t       current_state;
 775
 776         current_state = ml_set_interrupts_enabled(FALSE);
 777         if (max_cpus_initialized != MAX_CPUS_SET) {
 778                 machine_info.max_cpus = max_cpus;
 779                 machine_info.physical_cpu_max = max_cpus;
 780                 machine_info.logical_cpu_max = max_cpus;
 781                 if (max_cpus_initialized == MAX_CPUS_WAIT) {
 782                         thread_wakeup((event_t) &max_cpus_initialized);
 783                 }
 784                 max_cpus_initialized = MAX_CPUS_SET;
 785         }
 786         (void) ml_set_interrupts_enabled(current_state);
 787 }
 788
 789 /*
 790  *      Routine:        ml_get_max_cpus
 791  *      Function:
 792  */
 793 unsigned int
 794 ml_get_max_cpus(void)
 795 {
 796         boolean_t       current_state;
 797
 798         current_state = ml_set_interrupts_enabled(FALSE);
 799         if (max_cpus_initialized != MAX_CPUS_SET) {
 800                 max_cpus_initialized = MAX_CPUS_WAIT;
 801                 assert_wait((event_t) &max_cpus_initialized, THREAD_UNINT);
 802                 (void) thread_block(THREAD_CONTINUE_NULL);
 803         }
 804         (void) ml_set_interrupts_enabled(current_state);
 805         return machine_info.max_cpus;
 806 }
 807
 808 /*
 809  *      Routine:        ml_init_lock_timeout
 810  *      Function:
 811  */
 812 void
 813 ml_init_lock_timeout(void)
 814 {
 815         uint64_t        abstime;
 816         uint64_t        mtxspin;
 817         uint64_t        default_timeout_ns = NSEC_PER_SEC >> 2;
 818         uint32_t        slto;
 819
 820         if (PE_parse_boot_argn("slto_us", &slto, sizeof(slto))) {
 821                 default_timeout_ns = slto * NSEC_PER_USEC;
 822         }
 823
 824         nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
 825         LockTimeOutUsec = (uint32_t) (default_timeout_ns / NSEC_PER_USEC);
 826         LockTimeOut = (uint32_t)abstime;
 827
 828         if (PE_parse_boot_argn("tlto_us", &slto, sizeof(slto))) {
 829                 nanoseconds_to_absolutetime(slto * NSEC_PER_USEC, &abstime);
 830                 TLockTimeOut = abstime;
 831         } else {
 832                 TLockTimeOut = LockTimeOut >> 1;
 833         }
 834
 835         if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof(mtxspin))) {
 836                 if (mtxspin > USEC_PER_SEC >> 4) {
 837                         mtxspin =  USEC_PER_SEC >> 4;
 838                 }
 839                 nanoseconds_to_absolutetime(mtxspin * NSEC_PER_USEC, &abstime);
 840         } else {
 841                 nanoseconds_to_absolutetime(10 * NSEC_PER_USEC, &abstime);
 842         }
 843         MutexSpin = abstime;
 844 }
 845
 846 /*
 847  * This is called from the machine-independent routine cpu_up()
 848  * to perform machine-dependent info updates.
 849  */
 850 void
 851 ml_cpu_up(void)
 852 {
 853         os_atomic_inc(&machine_info.physical_cpu, relaxed);
 854         os_atomic_inc(&machine_info.logical_cpu, relaxed);
 855 }
 856
 857 /*
 858  * This is called from the machine-independent routine cpu_down()
 859  * to perform machine-dependent info updates.
 860  */
 861 void
 862 ml_cpu_down(void)
 863 {
 864         cpu_data_t      *cpu_data_ptr;
 865
 866         os_atomic_dec(&machine_info.physical_cpu, relaxed);
 867         os_atomic_dec(&machine_info.logical_cpu, relaxed);
 868
 869         /*
 870          * If we want to deal with outstanding IPIs, we need to
 871          * do relatively early in the processor_doshutdown path,
 872          * as we pend decrementer interrupts using the IPI
 873          * mechanism if we cannot immediately service them (if
 874          * IRQ is masked).  Do so now.
 875          *
 876          * We aren't on the interrupt stack here; would it make
 877          * more sense to disable signaling and then enable
 878          * interrupts?  It might be a bit cleaner.
 879          */
 880         cpu_data_ptr = getCpuDatap();
 881         cpu_data_ptr->cpu_running = FALSE;
 882
 883         if (cpu_data_ptr != &BootCpuData) {
 884                 /*
 885                  * Move all of this cpu's timers to the master/boot cpu,
 886                  * and poke it in case there's a sooner deadline for it to schedule.
 887                  */
 888                 timer_queue_shutdown(&cpu_data_ptr->rtclock_timer.queue);
 889                 cpu_xcall(BootCpuData.cpu_number, &timer_queue_expire_local, NULL);
 890         }
 891
 892         cpu_signal_handler_internal(TRUE);
 893 }
 894
 895 /*
 896  *      Routine:        ml_cpu_get_info
 897  *      Function:
 898  */
 899 void
 900 ml_cpu_get_info(ml_cpu_info_t * ml_cpu_info)
 901 {
 902         cache_info_t   *cpuid_cache_info;
 903
 904         cpuid_cache_info = cache_info();
 905         ml_cpu_info->vector_unit = 0;
 906         ml_cpu_info->cache_line_size = cpuid_cache_info->c_linesz;
 907         ml_cpu_info->l1_icache_size = cpuid_cache_info->c_isize;
 908         ml_cpu_info->l1_dcache_size = cpuid_cache_info->c_dsize;
 909
 910 #if (__ARM_ARCH__ >= 7)
 911         ml_cpu_info->l2_settings = 1;
 912         ml_cpu_info->l2_cache_size = cpuid_cache_info->c_l2size;
 913 #else
 914         ml_cpu_info->l2_settings = 0;
 915         ml_cpu_info->l2_cache_size = 0xFFFFFFFF;
 916 #endif
 917         ml_cpu_info->l3_settings = 0;
 918         ml_cpu_info->l3_cache_size = 0xFFFFFFFF;
 919 }
 920
 921 unsigned int
 922 ml_get_machine_mem(void)
 923 {
 924         return machine_info.memory_size;
 925 }
 926
 927 __attribute__((noreturn))
 928 void
 929 halt_all_cpus(boolean_t reboot)
 930 {
 931         if (reboot) {
 932                 printf("MACH Reboot\n");
 933                 PEHaltRestart(kPERestartCPU);
 934         } else {
 935                 printf("CPU halted\n");
 936                 PEHaltRestart(kPEHaltCPU);
 937         }
 938         while (1) {
 939                 ;
 940         }
 941 }
 942
 943 __attribute__((noreturn))
 944 void
 945 halt_cpu(void)
 946 {
 947         halt_all_cpus(FALSE);
 948 }
 949
 950 /*
 951  *      Routine:        machine_signal_idle
 952  *      Function:
 953  */
 954 void
 955 machine_signal_idle(
 956         processor_t processor)
 957 {
 958         cpu_signal(processor_to_cpu_datap(processor), SIGPnop, (void *)NULL, (void *)NULL);
 959         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
 960 }
 961
 962 void
 963 machine_signal_idle_deferred(
 964         processor_t processor)
 965 {
 966         cpu_signal_deferred(processor_to_cpu_datap(processor));
 967         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_DEFERRED_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
 968 }
 969
 970 void
 971 machine_signal_idle_cancel(
 972         processor_t processor)
 973 {
 974         cpu_signal_cancel(processor_to_cpu_datap(processor));
 975         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_CANCEL_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
 976 }
 977
 978 /*
 979  *      Routine:        ml_install_interrupt_handler
 980  *      Function:       Initialize Interrupt Handler
 981  */
 982 void
 983 ml_install_interrupt_handler(
 984         void *nub,
 985         int source,
 986         void *target,
 987         IOInterruptHandler handler,
 988         void *refCon)
 989 {
 990         cpu_data_t     *cpu_data_ptr;
 991         boolean_t       current_state;
 992
 993         current_state = ml_set_interrupts_enabled(FALSE);
 994         cpu_data_ptr = getCpuDatap();
 995
 996         cpu_data_ptr->interrupt_nub = nub;
 997         cpu_data_ptr->interrupt_source = source;
 998         cpu_data_ptr->interrupt_target = target;
 999         cpu_data_ptr->interrupt_handler = handler;
1000         cpu_data_ptr->interrupt_refCon = refCon;
1001
1002         cpu_data_ptr->interrupts_enabled = TRUE;
1003         (void) ml_set_interrupts_enabled(current_state);
1004
1005         initialize_screen(NULL, kPEAcquireScreen);
1006 }
1007
1008 /*
1009  *      Routine:        ml_init_interrupt
1010  *      Function:       Initialize Interrupts
1011  */
1012 void
1013 ml_init_interrupt(void)
1014 {
1015 #if defined(HAS_IPI)
1016         /*
1017          * ml_init_interrupt will get called once for each CPU, but this is redundant
1018          * because there is only one global copy of the register for skye. do it only
1019          * on the bootstrap cpu
1020          */
1021         if (getCpuDatap()->cluster_master) {
1022                 ml_cpu_signal_deferred_adjust_timer(deferred_ipi_timer_ns);
1023         }
1024 #endif
1025 }
1026
1027 /*
1028  *      Routine:        ml_init_timebase
1029  *      Function:       register and setup Timebase, Decremeter services
1030  */
1031 void
1032 ml_init_timebase(
1033         void            *args,
1034         tbd_ops_t       tbd_funcs,
1035         vm_offset_t     int_address,
1036         vm_offset_t     int_value __unused)
1037 {
1038         cpu_data_t     *cpu_data_ptr;
1039
1040         cpu_data_ptr = (cpu_data_t *)args;
1041
1042         if ((cpu_data_ptr == &BootCpuData)
1043             && (rtclock_timebase_func.tbd_fiq_handler == (void *)NULL)) {
1044                 rtclock_timebase_func = *tbd_funcs;
1045                 rtclock_timebase_addr = int_address;
1046         }
1047 }
1048
1049 void
1050 ml_parse_cpu_topology(void)
1051 {
1052         DTEntry entry, child __unused;
1053         OpaqueDTEntryIterator iter;
1054         uint32_t cpu_boot_arg;
1055         int err;
1056
1057         cpu_boot_arg = MAX_CPUS;
1058
1059         PE_parse_boot_argn("cpus", &cpu_boot_arg, sizeof(cpu_boot_arg));
1060
1061         err = DTLookupEntry(NULL, "/cpus", &entry);
1062         assert(err == kSuccess);
1063
1064         err = DTInitEntryIterator(entry, &iter);
1065         assert(err == kSuccess);
1066
1067         while (kSuccess == DTIterateEntries(&iter, &child)) {
1068                 unsigned int propSize;
1069                 void *prop = NULL;
1070                 int cpu_id = avail_cpus++;
1071
1072                 if (kSuccess == DTGetProperty(child, "cpu-id", &prop, &propSize)) {
1073                         cpu_id = *((int32_t*)prop);
1074                 }
1075
1076                 assert(cpu_id < MAX_CPUS);
1077                 assert(cpu_phys_ids[cpu_id] == (uint32_t)-1);
1078
1079                 if (boot_cpu == -1) {
1080                         if (kSuccess != DTGetProperty(child, "state", &prop, &propSize)) {
1081                                 panic("unable to retrieve state for cpu %d", cpu_id);
1082                         }
1083
1084                         if (strncmp((char*)prop, "running", propSize) == 0) {
1085                                 boot_cpu = cpu_id;
1086                         }
1087                 }
1088                 if (kSuccess != DTGetProperty(child, "reg", &prop, &propSize)) {
1089                         panic("unable to retrieve physical ID for cpu %d", cpu_id);
1090                 }
1091
1092                 cpu_phys_ids[cpu_id] = *((uint32_t*)prop);
1093
1094                 if ((cpu_id > max_cpu_number) && ((cpu_id == boot_cpu) || (avail_cpus <= cpu_boot_arg))) {
1095                         max_cpu_number = cpu_id;
1096                 }
1097         }
1098
1099         if (avail_cpus > cpu_boot_arg) {
1100                 avail_cpus = cpu_boot_arg;
1101         }
1102
1103         if (avail_cpus == 0) {
1104                 panic("No cpus found!");
1105         }
1106
1107         if (boot_cpu == -1) {
1108                 panic("unable to determine boot cpu!");
1109         }
1110
1111         /*
1112          * Set TPIDRRO_EL0 to indicate the correct cpu number, as we may
1113          * not be booting from cpu 0.  Userspace will consume the current
1114          * CPU number through this register.  For non-boot cores, this is
1115          * done in start.s (start_cpu) using the cpu_number field of the
1116          * per-cpu data object.
1117          */
1118         assert(__builtin_arm_rsr64("TPIDRRO_EL0") == 0);
1119         __builtin_arm_wsr64("TPIDRRO_EL0", (uint64_t)boot_cpu);
1120 }
1121
1122 unsigned int
1123 ml_get_cpu_count(void)
1124 {
1125         return avail_cpus;
1126 }
1127
1128 int
1129 ml_get_boot_cpu_number(void)
1130 {
1131         return boot_cpu;
1132 }
1133
1134 cluster_type_t
1135 ml_get_boot_cluster(void)
1136 {
1137         return boot_cluster;
1138 }
1139
1140 int
1141 ml_get_cpu_number(uint32_t phys_id)
1142 {
1143         for (int log_id = 0; log_id <= ml_get_max_cpu_number(); ++log_id) {
1144                 if (cpu_phys_ids[log_id] == phys_id) {
1145                         return log_id;
1146                 }
1147         }
1148         return -1;
1149 }
1150
1151 int
1152 ml_get_max_cpu_number(void)
1153 {
1154         return max_cpu_number;
1155 }
1156
1157
1158 void
1159 ml_lockdown_init()
1160 {
1161         lockdown_handler_grp = lck_grp_alloc_init("lockdown_handler", NULL);
1162         assert(lockdown_handler_grp != NULL);
1163
1164         lck_mtx_init(&lockdown_handler_lck, lockdown_handler_grp, NULL);
1165
1166 #if defined(KERNEL_INTEGRITY_CTRR)
1167         init_ctrr_cpu_start_lock();
1168 #endif
1169 }
1170
1171 kern_return_t
1172 ml_lockdown_handler_register(lockdown_handler_t f, void *this)
1173 {
1174         if (lockdown_handler || !f) {
1175                 return KERN_FAILURE;
1176         }
1177
1178         lck_mtx_lock(&lockdown_handler_lck);
1179         lockdown_handler = f;
1180         lockdown_this = this;
1181
1182 #if !(defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR))
1183         lockdown_done = 1;
1184         lockdown_handler(this);
1185 #else
1186         if (lockdown_done) {
1187                 lockdown_handler(this);
1188         }
1189 #endif
1190         lck_mtx_unlock(&lockdown_handler_lck);
1191
1192         return KERN_SUCCESS;
1193 }
1194
1195 void
1196 ml_lockdown_run_handler()
1197 {
1198         lck_mtx_lock(&lockdown_handler_lck);
1199         assert(!lockdown_done);
1200
1201         lockdown_done = 1;
1202         if (lockdown_handler) {
1203                 lockdown_handler(lockdown_this);
1204         }
1205         lck_mtx_unlock(&lockdown_handler_lck);
1206 }
1207
1208 kern_return_t
1209 ml_processor_register(ml_processor_info_t *in_processor_info,
1210     processor_t *processor_out, ipi_handler_t *ipi_handler_out,
1211     perfmon_interrupt_handler_func *pmi_handler_out)
1212 {
1213         cpu_data_t *this_cpu_datap;
1214         processor_set_t pset;
1215         boolean_t  is_boot_cpu;
1216         static unsigned int reg_cpu_count = 0;
1217
1218         if (in_processor_info->log_id > (uint32_t)ml_get_max_cpu_number()) {
1219                 return KERN_FAILURE;
1220         }
1221
1222         if ((unsigned int)OSIncrementAtomic((SInt32*)&reg_cpu_count) >= avail_cpus) {
1223                 return KERN_FAILURE;
1224         }
1225
1226         if (in_processor_info->log_id != (uint32_t)ml_get_boot_cpu_number()) {
1227                 is_boot_cpu = FALSE;
1228                 this_cpu_datap = cpu_data_alloc(FALSE);
1229                 cpu_data_init(this_cpu_datap);
1230         } else {
1231                 this_cpu_datap = &BootCpuData;
1232                 is_boot_cpu = TRUE;
1233         }
1234
1235         assert(in_processor_info->log_id < MAX_CPUS);
1236
1237         this_cpu_datap->cpu_id = in_processor_info->cpu_id;
1238
1239         this_cpu_datap->cpu_console_buf = console_cpu_alloc(is_boot_cpu);
1240         if (this_cpu_datap->cpu_console_buf == (void *)(NULL)) {
1241                 goto processor_register_error;
1242         }
1243
1244         if (!is_boot_cpu) {
1245                 this_cpu_datap->cpu_number = in_processor_info->log_id;
1246
1247                 if (cpu_data_register(this_cpu_datap) != KERN_SUCCESS) {
1248                         goto processor_register_error;
1249                 }
1250         }
1251
1252         this_cpu_datap->cpu_idle_notify = (void *) in_processor_info->processor_idle;
1253         this_cpu_datap->cpu_cache_dispatch = in_processor_info->platform_cache_dispatch;
1254         nanoseconds_to_absolutetime((uint64_t) in_processor_info->powergate_latency, &this_cpu_datap->cpu_idle_latency);
1255         this_cpu_datap->cpu_reset_assist = kvtophys(in_processor_info->powergate_stub_addr);
1256
1257         this_cpu_datap->idle_timer_notify = (void *) in_processor_info->idle_timer;
1258         this_cpu_datap->idle_timer_refcon = in_processor_info->idle_timer_refcon;
1259
1260         this_cpu_datap->platform_error_handler = (void *) in_processor_info->platform_error_handler;
1261         this_cpu_datap->cpu_regmap_paddr = in_processor_info->regmap_paddr;
1262         this_cpu_datap->cpu_phys_id = in_processor_info->phys_id;
1263         this_cpu_datap->cpu_l2_access_penalty = in_processor_info->l2_access_penalty;
1264
1265         this_cpu_datap->cpu_cluster_type = in_processor_info->cluster_type;
1266         this_cpu_datap->cpu_cluster_id = in_processor_info->cluster_id;
1267         this_cpu_datap->cpu_l2_id = in_processor_info->l2_cache_id;
1268         this_cpu_datap->cpu_l2_size = in_processor_info->l2_cache_size;
1269         this_cpu_datap->cpu_l3_id = in_processor_info->l3_cache_id;
1270         this_cpu_datap->cpu_l3_size = in_processor_info->l3_cache_size;
1271
1272 #if HAS_CLUSTER
1273         this_cpu_datap->cluster_master = !OSTestAndSet(this_cpu_datap->cpu_cluster_id, &cluster_initialized);
1274 #else /* HAS_CLUSTER */
1275         this_cpu_datap->cluster_master = is_boot_cpu;
1276 #endif /* HAS_CLUSTER */
1277
1278         pset = pset_find(in_processor_info->cluster_id, processor_pset(master_processor));
1279         assert(pset != NULL);
1280         kprintf("%s>cpu_id %p cluster_id %d cpu_number %d is type %d\n", __FUNCTION__, in_processor_info->cpu_id, in_processor_info->cluster_id, this_cpu_datap->cpu_number, in_processor_info->cluster_type);
1281
1282         if (!is_boot_cpu) {
1283                 processor_init((struct processor *)this_cpu_datap->cpu_processor,
1284                     this_cpu_datap->cpu_number, pset);
1285
1286                 if (this_cpu_datap->cpu_l2_access_penalty) {
1287                         /*
1288                          * Cores that have a non-zero L2 access penalty compared
1289                          * to the boot processor should be de-prioritized by the
1290                          * scheduler, so that threads use the cores with better L2
1291                          * preferentially.
1292                          */
1293                         processor_set_primary(this_cpu_datap->cpu_processor,
1294                             master_processor);
1295                 }
1296         }
1297
1298         *processor_out = this_cpu_datap->cpu_processor;
1299         *ipi_handler_out = cpu_signal_handler;
1300 #if CPMU_AIC_PMI && MONOTONIC
1301         *pmi_handler_out = mt_cpmu_aic_pmi;
1302 #else
1303         *pmi_handler_out = NULL;
1304 #endif /* CPMU_AIC_PMI && MONOTONIC */
1305         if (in_processor_info->idle_tickle != (idle_tickle_t *) NULL) {
1306                 *in_processor_info->idle_tickle = (idle_tickle_t) cpu_idle_tickle;
1307         }
1308
1309 #if KPC
1310         if (kpc_register_cpu(this_cpu_datap) != TRUE) {
1311                 goto processor_register_error;
1312         }
1313 #endif /* KPC */
1314
1315         if (!is_boot_cpu) {
1316                 random_cpu_init(this_cpu_datap->cpu_number);
1317                 // now let next CPU register itself
1318                 OSIncrementAtomic((SInt32*)&real_ncpus);
1319         }
1320
1321         return KERN_SUCCESS;
1322
1323 processor_register_error:
1324 #if KPC
1325         kpc_unregister_cpu(this_cpu_datap);
1326 #endif /* KPC */
1327         if (!is_boot_cpu) {
1328                 cpu_data_free(this_cpu_datap);
1329         }
1330
1331         return KERN_FAILURE;
1332 }
1333
1334 void
1335 ml_init_arm_debug_interface(
1336         void * in_cpu_datap,
1337         vm_offset_t virt_address)
1338 {
1339         ((cpu_data_t *)in_cpu_datap)->cpu_debug_interface_map = virt_address;
1340         do_debugid();
1341 }
1342
1343 /*
1344  *      Routine:        init_ast_check
1345  *      Function:
1346  */
1347 void
1348 init_ast_check(
1349         __unused processor_t processor)
1350 {
1351 }
1352
1353 /*
1354  *      Routine:        cause_ast_check
1355  *      Function:
1356  */
1357 void
1358 cause_ast_check(
1359         processor_t processor)
1360 {
1361         if (current_processor() != processor) {
1362                 cpu_signal(processor_to_cpu_datap(processor), SIGPast, (void *)NULL, (void *)NULL);
1363                 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), processor->cpu_id, 1 /* ast */, 0, 0, 0);
1364         }
1365 }
1366
1367 extern uint32_t cpu_idle_count;
1368
1369 void
1370 ml_get_power_state(boolean_t *icp, boolean_t *pidlep)
1371 {
1372         *icp = ml_at_interrupt_context();
1373         *pidlep = (cpu_idle_count == real_ncpus);
1374 }
1375
1376 /*
1377  *      Routine:        ml_cause_interrupt
1378  *      Function:       Generate a fake interrupt
1379  */
1380 void
1381 ml_cause_interrupt(void)
1382 {
1383         return;                 /* BS_XXX */
1384 }
1385
1386 /* Map memory map IO space */
1387 vm_offset_t
1388 ml_io_map(
1389         vm_offset_t phys_addr,
1390         vm_size_t size)
1391 {
1392         return io_map(phys_addr, size, VM_WIMG_IO);
1393 }
1394
1395 /* Map memory map IO space (with protections specified) */
1396 vm_offset_t
1397 ml_io_map_with_prot(
1398         vm_offset_t phys_addr,
1399         vm_size_t size,
1400         vm_prot_t prot)
1401 {
1402         return io_map_with_prot(phys_addr, size, VM_WIMG_IO, prot);
1403 }
1404
1405 vm_offset_t
1406 ml_io_map_wcomb(
1407         vm_offset_t phys_addr,
1408         vm_size_t size)
1409 {
1410         return io_map(phys_addr, size, VM_WIMG_WCOMB);
1411 }
1412
1413 /* boot memory allocation */
1414 vm_offset_t
1415 ml_static_malloc(
1416         __unused vm_size_t size)
1417 {
1418         return (vm_offset_t) NULL;
1419 }
1420
1421 vm_map_address_t
1422 ml_map_high_window(
1423         vm_offset_t     phys_addr,
1424         vm_size_t       len)
1425 {
1426         return pmap_map_high_window_bd(phys_addr, len, VM_PROT_READ | VM_PROT_WRITE);
1427 }
1428
1429 vm_offset_t
1430 ml_static_ptovirt(
1431         vm_offset_t paddr)
1432 {
1433         return phystokv(paddr);
1434 }
1435
1436 vm_offset_t
1437 ml_static_slide(
1438         vm_offset_t vaddr)
1439 {
1440         return phystokv(vaddr + vm_kernel_slide - gVirtBase + gPhysBase);
1441 }
1442
1443 vm_offset_t
1444 ml_static_unslide(
1445         vm_offset_t vaddr)
1446 {
1447         return ml_static_vtop(vaddr) - gPhysBase + gVirtBase - vm_kernel_slide;
1448 }
1449
1450 extern tt_entry_t *arm_kva_to_tte(vm_offset_t va);
1451
1452 kern_return_t
1453 ml_static_protect(
1454         vm_offset_t vaddr, /* kernel virtual address */
1455         vm_size_t size,
1456         vm_prot_t new_prot)
1457 {
1458         pt_entry_t    arm_prot = 0;
1459         pt_entry_t    arm_block_prot = 0;
1460         vm_offset_t   vaddr_cur;
1461         ppnum_t       ppn;
1462         kern_return_t result = KERN_SUCCESS;
1463
1464         if (vaddr < VM_MIN_KERNEL_ADDRESS) {
1465                 panic("ml_static_protect(): %p < %p", (void *) vaddr, (void *) VM_MIN_KERNEL_ADDRESS);
1466                 return KERN_FAILURE;
1467         }
1468
1469         assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
1470
1471         if ((new_prot & VM_PROT_WRITE) && (new_prot & VM_PROT_EXECUTE)) {
1472                 panic("ml_static_protect(): WX request on %p", (void *) vaddr);
1473         }
1474
1475         /* Set up the protection bits, and block bits so we can validate block mappings. */
1476         if (new_prot & VM_PROT_WRITE) {
1477                 arm_prot |= ARM_PTE_AP(AP_RWNA);
1478                 arm_block_prot |= ARM_TTE_BLOCK_AP(AP_RWNA);
1479         } else {
1480                 arm_prot |= ARM_PTE_AP(AP_RONA);
1481                 arm_block_prot |= ARM_TTE_BLOCK_AP(AP_RONA);
1482         }
1483
1484         arm_prot |= ARM_PTE_NX;
1485         arm_block_prot |= ARM_TTE_BLOCK_NX;
1486
1487         if (!(new_prot & VM_PROT_EXECUTE)) {
1488                 arm_prot |= ARM_PTE_PNX;
1489                 arm_block_prot |= ARM_TTE_BLOCK_PNX;
1490         }
1491
1492         for (vaddr_cur = vaddr;
1493             vaddr_cur < trunc_page_64(vaddr + size);
1494             vaddr_cur += PAGE_SIZE) {
1495                 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
1496                 if (ppn != (vm_offset_t) NULL) {
1497                         tt_entry_t      *tte2;
1498                         pt_entry_t      *pte_p;
1499                         pt_entry_t      ptmp;
1500
1501 #if XNU_MONITOR
1502                         assert(!TEST_PAGE_RATIO_4);
1503                         assert(!pmap_is_monitor(ppn));
1504 #endif
1505
1506                         tte2 = arm_kva_to_tte(vaddr_cur);
1507
1508                         if (((*tte2) & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE) {
1509                                 if ((((*tte2) & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) &&
1510                                     ((*tte2 & (ARM_TTE_BLOCK_NXMASK | ARM_TTE_BLOCK_PNXMASK | ARM_TTE_BLOCK_APMASK)) == arm_block_prot)) {
1511                                         /*
1512                                          * We can support ml_static_protect on a block mapping if the mapping already has
1513                                          * the desired protections.  We still want to run checks on a per-page basis.
1514                                          */
1515                                         continue;
1516                                 }
1517
1518                                 result = KERN_FAILURE;
1519                                 break;
1520                         }
1521
1522                         pte_p = (pt_entry_t *)&((tt_entry_t*)(phystokv((*tte2) & ARM_TTE_TABLE_MASK)))[(((vaddr_cur) & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT)];
1523                         ptmp = *pte_p;
1524
1525                         if ((ptmp & ARM_PTE_HINT_MASK) && ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot)) {
1526                                 /*
1527                                  * The contiguous hint is similar to a block mapping for ml_static_protect; if the existing
1528                                  * protections do not match the desired protections, then we will fail (as we cannot update
1529                                  * this mapping without updating other mappings as well).
1530                                  */
1531                                 result = KERN_FAILURE;
1532                                 break;
1533                         }
1534
1535                         __unreachable_ok_push
1536                         if (TEST_PAGE_RATIO_4) {
1537                                 {
1538                                         unsigned int    i;
1539                                         pt_entry_t      *ptep_iter;
1540
1541                                         ptep_iter = pte_p;
1542                                         for (i = 0; i < 4; i++, ptep_iter++) {
1543                                                 /* Note that there is a hole in the HINT sanity checking here. */
1544                                                 ptmp = *ptep_iter;
1545
1546                                                 /* We only need to update the page tables if the protections do not match. */
1547                                                 if ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot) {
1548                                                         ptmp = (ptmp & ~(ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) | arm_prot;
1549                                                         *ptep_iter = ptmp;
1550                                                 }
1551                                         }
1552                                 }
1553                         } else {
1554                                 ptmp = *pte_p;
1555
1556                                 /* We only need to update the page tables if the protections do not match. */
1557                                 if ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot) {
1558                                         ptmp = (ptmp & ~(ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) | arm_prot;
1559                                         *pte_p = ptmp;
1560                                 }
1561                         }
1562                         __unreachable_ok_pop
1563                 }
1564         }
1565
1566         if (vaddr_cur > vaddr) {
1567                 assert(((vaddr_cur - vaddr) & 0xFFFFFFFF00000000ULL) == 0);
1568                 flush_mmu_tlb_region(vaddr, (uint32_t)(vaddr_cur - vaddr));
1569         }
1570
1571
1572         return result;
1573 }
1574
1575 /*
1576  *      Routine:        ml_static_mfree
1577  *      Function:
1578  */
1579 void
1580 ml_static_mfree(
1581         vm_offset_t vaddr,
1582         vm_size_t size)
1583 {
1584         vm_offset_t     vaddr_cur;
1585         ppnum_t         ppn;
1586         uint32_t freed_pages = 0;
1587
1588         /* It is acceptable (if bad) to fail to free. */
1589         if (vaddr < VM_MIN_KERNEL_ADDRESS) {
1590                 return;
1591         }
1592
1593         assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
1594
1595         for (vaddr_cur = vaddr;
1596             vaddr_cur < trunc_page_64(vaddr + size);
1597             vaddr_cur += PAGE_SIZE) {
1598                 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
1599                 if (ppn != (vm_offset_t) NULL) {
1600                         /*
1601                          * It is not acceptable to fail to update the protections on a page
1602                          * we will release to the VM.  We need to either panic or continue.
1603                          * For now, we'll panic (to help flag if there is memory we can
1604                          * reclaim).
1605                          */
1606                         if (ml_static_protect(vaddr_cur, PAGE_SIZE, VM_PROT_WRITE | VM_PROT_READ) != KERN_SUCCESS) {
1607                                 panic("Failed ml_static_mfree on %p", (void *) vaddr_cur);
1608                         }
1609
1610 #if 0
1611                         /*
1612                          * Must NOT tear down the "V==P" mapping for vaddr_cur as the zone alias scheme
1613                          * relies on the persistence of these mappings for all time.
1614                          */
1615                         // pmap_remove(kernel_pmap, (addr64_t) vaddr_cur, (addr64_t) (vaddr_cur + PAGE_SIZE));
1616 #endif
1617
1618                         vm_page_create(ppn, (ppn + 1));
1619                         freed_pages++;
1620                 }
1621         }
1622         vm_page_lockspin_queues();
1623         vm_page_wire_count -= freed_pages;
1624         vm_page_wire_count_initial -= freed_pages;
1625         vm_page_unlock_queues();
1626 #if     DEBUG
1627         kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn);
1628 #endif
1629 }
1630
1631
1632 /* virtual to physical on wired pages */
1633 vm_offset_t
1634 ml_vtophys(vm_offset_t vaddr)
1635 {
1636         return kvtophys(vaddr);
1637 }
1638
1639 /*
1640  * Routine: ml_nofault_copy
1641  * Function: Perform a physical mode copy if the source and destination have
1642  * valid translations in the kernel pmap. If translations are present, they are
1643  * assumed to be wired; e.g., no attempt is made to guarantee that the
1644  * translations obtained remain valid for the duration of the copy process.
1645  */
1646 vm_size_t
1647 ml_nofault_copy(vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size)
1648 {
1649         addr64_t        cur_phys_dst, cur_phys_src;
1650         vm_size_t       count, nbytes = 0;
1651
1652         while (size > 0) {
1653                 if (!(cur_phys_src = kvtophys(virtsrc))) {
1654                         break;
1655                 }
1656                 if (!(cur_phys_dst = kvtophys(virtdst))) {
1657                         break;
1658                 }
1659                 if (!pmap_valid_address(trunc_page_64(cur_phys_dst)) ||
1660                     !pmap_valid_address(trunc_page_64(cur_phys_src))) {
1661                         break;
1662                 }
1663                 count = PAGE_SIZE - (cur_phys_src & PAGE_MASK);
1664                 if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK))) {
1665                         count = PAGE_SIZE - (cur_phys_dst & PAGE_MASK);
1666                 }
1667                 if (count > size) {
1668                         count = size;
1669                 }
1670
1671                 bcopy_phys(cur_phys_src, cur_phys_dst, count);
1672
1673                 nbytes += count;
1674                 virtsrc += count;
1675                 virtdst += count;
1676                 size -= count;
1677         }
1678
1679         return nbytes;
1680 }
1681
1682 /*
1683  *      Routine:        ml_validate_nofault
1684  *      Function: Validate that ths address range has a valid translations
1685  *                      in the kernel pmap.  If translations are present, they are
1686  *                      assumed to be wired; i.e. no attempt is made to guarantee
1687  *                      that the translation persist after the check.
1688  *  Returns: TRUE if the range is mapped and will not cause a fault,
1689  *                      FALSE otherwise.
1690  */
1691
1692 boolean_t
1693 ml_validate_nofault(
1694         vm_offset_t virtsrc, vm_size_t size)
1695 {
1696         addr64_t cur_phys_src;
1697         uint32_t count;
1698
1699         while (size > 0) {
1700                 if (!(cur_phys_src = kvtophys(virtsrc))) {
1701                         return FALSE;
1702                 }
1703                 if (!pmap_valid_address(trunc_page_64(cur_phys_src))) {
1704                         return FALSE;
1705                 }
1706                 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
1707                 if (count > size) {
1708                         count = (uint32_t)size;
1709                 }
1710
1711                 virtsrc += count;
1712                 size -= count;
1713         }
1714
1715         return TRUE;
1716 }
1717
1718 void
1719 ml_get_bouncepool_info(vm_offset_t * phys_addr, vm_size_t * size)
1720 {
1721         *phys_addr = 0;
1722         *size = 0;
1723 }
1724
1725 void
1726 active_rt_threads(__unused boolean_t active)
1727 {
1728 }
1729
1730 static void
1731 cpu_qos_cb_default(__unused int urgency, __unused uint64_t qos_param1, __unused uint64_t qos_param2)
1732 {
1733         return;
1734 }
1735
1736 cpu_qos_update_t cpu_qos_update = cpu_qos_cb_default;
1737
1738 void
1739 cpu_qos_update_register(cpu_qos_update_t cpu_qos_cb)
1740 {
1741         if (cpu_qos_cb != NULL) {
1742                 cpu_qos_update = cpu_qos_cb;
1743         } else {
1744                 cpu_qos_update = cpu_qos_cb_default;
1745         }
1746 }
1747
1748 void
1749 thread_tell_urgency(thread_urgency_t urgency, uint64_t rt_period, uint64_t rt_deadline, uint64_t sched_latency __unused, __unused thread_t nthread)
1750 {
1751         SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, sched_latency, 0);
1752
1753         cpu_qos_update((int)urgency, rt_period, rt_deadline);
1754
1755         SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0);
1756 }
1757
1758 void
1759 machine_run_count(__unused uint32_t count)
1760 {
1761 }
1762
1763 processor_t
1764 machine_choose_processor(__unused processor_set_t pset, processor_t processor)
1765 {
1766         return processor;
1767 }
1768
1769 #if KASAN
1770 vm_offset_t ml_stack_base(void);
1771 vm_size_t ml_stack_size(void);
1772
1773 vm_offset_t
1774 ml_stack_base(void)
1775 {
1776         uintptr_t local = (uintptr_t) &local;
1777         vm_offset_t     intstack_top_ptr;
1778
1779         intstack_top_ptr = getCpuDatap()->intstack_top;
1780         if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) {
1781                 return intstack_top_ptr - INTSTACK_SIZE;
1782         } else {
1783                 return current_thread()->kernel_stack;
1784         }
1785 }
1786 vm_size_t
1787 ml_stack_size(void)
1788 {
1789         uintptr_t local = (uintptr_t) &local;
1790         vm_offset_t     intstack_top_ptr;
1791
1792         intstack_top_ptr = getCpuDatap()->intstack_top;
1793         if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) {
1794                 return INTSTACK_SIZE;
1795         } else {
1796                 return kernel_stack_size;
1797         }
1798 }
1799 #endif
1800
1801 boolean_t
1802 machine_timeout_suspended(void)
1803 {
1804         return FALSE;
1805 }
1806
1807 kern_return_t
1808 ml_interrupt_prewarm(__unused uint64_t deadline)
1809 {
1810         return KERN_FAILURE;
1811 }
1812
1813 /*
1814  * Assumes fiq, irq disabled.
1815  */
1816 void
1817 ml_set_decrementer(uint32_t dec_value)
1818 {
1819         cpu_data_t      *cdp = getCpuDatap();
1820
1821         assert(ml_get_interrupts_enabled() == FALSE);
1822         cdp->cpu_decrementer = dec_value;
1823
1824         if (cdp->cpu_set_decrementer_func) {
1825                 ((void (*)(uint32_t))cdp->cpu_set_decrementer_func)(dec_value);
1826         } else {
1827                 __asm__ volatile ("msr CNTP_TVAL_EL0, %0" : : "r"((uint64_t)dec_value));
1828         }
1829 }
1830
1831 uint64_t
1832 ml_get_hwclock()
1833 {
1834         uint64_t timebase;
1835
1836         // ISB required by ARMV7C.b section B8.1.2 & ARMv8 section D6.1.2
1837         // "Reads of CNTPCT[_EL0] can occur speculatively and out of order relative
1838         // to other instructions executed on the same processor."
1839         __builtin_arm_isb(ISB_SY);
1840         timebase = __builtin_arm_rsr64("CNTPCT_EL0");
1841
1842         return timebase;
1843 }
1844
1845 uint64_t
1846 ml_get_timebase()
1847 {
1848         return ml_get_hwclock() + getCpuDatap()->cpu_base_timebase;
1849 }
1850
1851 uint32_t
1852 ml_get_decrementer()
1853 {
1854         cpu_data_t *cdp = getCpuDatap();
1855         uint32_t dec;
1856
1857         assert(ml_get_interrupts_enabled() == FALSE);
1858
1859         if (cdp->cpu_get_decrementer_func) {
1860                 dec = ((uint32_t (*)(void))cdp->cpu_get_decrementer_func)();
1861         } else {
1862                 uint64_t wide_val;
1863
1864                 __asm__ volatile ("mrs %0, CNTP_TVAL_EL0" : "=r"(wide_val));
1865                 dec = (uint32_t)wide_val;
1866                 assert(wide_val == (uint64_t)dec);
1867         }
1868
1869         return dec;
1870 }
1871
1872 boolean_t
1873 ml_get_timer_pending()
1874 {
1875         uint64_t cntp_ctl;
1876
1877         __asm__ volatile ("mrs %0, CNTP_CTL_EL0" : "=r"(cntp_ctl));
1878         return ((cntp_ctl & CNTP_CTL_EL0_ISTATUS) != 0) ? TRUE : FALSE;
1879 }
1880
1881 boolean_t
1882 ml_wants_panic_trap_to_debugger(void)
1883 {
1884         boolean_t result = FALSE;
1885 #if XNU_MONITOR
1886         /*
1887          * This looks racey, but if we are in the PPL, preemption will be
1888          * disabled.
1889          */
1890         result = ((pmap_get_cpu_data()->ppl_state == PPL_STATE_DISPATCH) && pmap_ppl_locked_down);
1891 #endif
1892         return result;
1893 }
1894
1895 static void
1896 cache_trap_error(thread_t thread, vm_map_address_t fault_addr)
1897 {
1898         mach_exception_data_type_t exc_data[2];
1899         arm_saved_state_t *regs = get_user_regs(thread);
1900
1901         set_saved_state_far(regs, fault_addr);
1902
1903         exc_data[0] = KERN_INVALID_ADDRESS;
1904         exc_data[1] = fault_addr;
1905
1906         exception_triage(EXC_BAD_ACCESS, exc_data, 2);
1907 }
1908
1909 static void
1910 cache_trap_recover()
1911 {
1912         vm_map_address_t fault_addr;
1913
1914         __asm__ volatile ("mrs %0, FAR_EL1" : "=r"(fault_addr));
1915
1916         cache_trap_error(current_thread(), fault_addr);
1917 }
1918
1919 static void
1920 set_cache_trap_recover(thread_t thread)
1921 {
1922 #if defined(HAS_APPLE_PAC)
1923         thread->recover = (vm_address_t)ptrauth_auth_and_resign(&cache_trap_recover,
1924             ptrauth_key_function_pointer, 0,
1925             ptrauth_key_function_pointer, ptrauth_blend_discriminator(&thread->recover, PAC_DISCRIMINATOR_RECOVER));
1926 #else /* defined(HAS_APPLE_PAC) */
1927         thread->recover = (vm_address_t)cache_trap_recover;
1928 #endif /* defined(HAS_APPLE_PAC) */
1929 }
1930
1931 static void
1932 dcache_flush_trap(vm_map_address_t start, vm_map_size_t size)
1933 {
1934         vm_map_address_t end = start + size;
1935         thread_t thread = current_thread();
1936         vm_offset_t old_recover = thread->recover;
1937
1938         /* Check bounds */
1939         if (task_has_64Bit_addr(current_task())) {
1940                 if (end > MACH_VM_MAX_ADDRESS) {
1941                         cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1942                 }
1943         } else {
1944                 if (end > VM_MAX_ADDRESS) {
1945                         cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1946                 }
1947         }
1948
1949         if (start > end) {
1950                 cache_trap_error(thread, start & ((1 << ARM64_CLINE_SHIFT) - 1));
1951         }
1952
1953         set_cache_trap_recover(thread);
1954
1955         /*
1956          * We're coherent on Apple ARM64 CPUs, so this could be a nop.  However,
1957          * if the region given us is bad, it would be good to catch it and
1958          * crash, ergo we still do the flush.
1959          */
1960         FlushPoC_DcacheRegion(start, (uint32_t)size);
1961
1962         /* Restore recovery function */
1963         thread->recover = old_recover;
1964
1965         /* Return (caller does exception return) */
1966 }
1967
1968 static void
1969 icache_invalidate_trap(vm_map_address_t start, vm_map_size_t size)
1970 {
1971         vm_map_address_t end = start + size;
1972         thread_t thread = current_thread();
1973         vm_offset_t old_recover = thread->recover;
1974
1975         /* Check bounds */
1976         if (task_has_64Bit_addr(current_task())) {
1977                 if (end > MACH_VM_MAX_ADDRESS) {
1978                         cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1979                 }
1980         } else {
1981                 if (end > VM_MAX_ADDRESS) {
1982                         cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1983                 }
1984         }
1985
1986         if (start > end) {
1987                 cache_trap_error(thread, start & ((1 << ARM64_CLINE_SHIFT) - 1));
1988         }
1989
1990         set_cache_trap_recover(thread);
1991
1992         /* Invalidate iCache to point of unification */
1993         InvalidatePoU_IcacheRegion(start, (uint32_t)size);
1994
1995         /* Restore recovery function */
1996         thread->recover = old_recover;
1997
1998         /* Return (caller does exception return) */
1999 }
2000
2001 __attribute__((noreturn))
2002 void
2003 platform_syscall(arm_saved_state_t *state)
2004 {
2005         uint32_t code;
2006
2007 #define platform_syscall_kprintf(x...) /* kprintf("platform_syscall: " x) */
2008
2009         code = (uint32_t)get_saved_state_reg(state, 3);
2010         switch (code) {
2011         case 0:
2012                 /* I-Cache flush */
2013                 platform_syscall_kprintf("icache flush requested.\n");
2014                 icache_invalidate_trap(get_saved_state_reg(state, 0), get_saved_state_reg(state, 1));
2015                 break;
2016         case 1:
2017                 /* D-Cache flush */
2018                 platform_syscall_kprintf("dcache flush requested.\n");
2019                 dcache_flush_trap(get_saved_state_reg(state, 0), get_saved_state_reg(state, 1));
2020                 break;
2021         case 2:
2022                 /* set cthread */
2023                 platform_syscall_kprintf("set cthread self.\n");
2024                 thread_set_cthread_self(get_saved_state_reg(state, 0));
2025                 break;
2026         case 3:
2027                 /* get cthread */
2028                 platform_syscall_kprintf("get cthread self.\n");
2029                 set_saved_state_reg(state, 0, thread_get_cthread_self());
2030                 break;
2031         default:
2032                 platform_syscall_kprintf("unknown: %d\n", code);
2033                 break;
2034         }
2035
2036         thread_exception_return();
2037 }
2038
2039 static void
2040 _enable_timebase_event_stream(uint32_t bit_index)
2041 {
2042         uint64_t cntkctl; /* One wants to use 32 bits, but "mrs" prefers it this way */
2043
2044         if (bit_index >= 64) {
2045                 panic("%s: invalid bit index (%u)", __FUNCTION__, bit_index);
2046         }
2047
2048         __asm__ volatile ("mrs  %0, CNTKCTL_EL1" : "=r"(cntkctl));
2049
2050         cntkctl |= (bit_index << CNTKCTL_EL1_EVENTI_SHIFT);
2051         cntkctl |= CNTKCTL_EL1_EVNTEN;
2052         cntkctl |= CNTKCTL_EL1_EVENTDIR; /* 1->0; why not? */
2053
2054         /*
2055          * If the SOC supports it (and it isn't broken), enable
2056          * EL0 access to the physical timebase register.
2057          */
2058         if (user_timebase_type() != USER_TIMEBASE_NONE) {
2059                 cntkctl |= CNTKCTL_EL1_PL0PCTEN;
2060         }
2061
2062         __asm__ volatile ("msr  CNTKCTL_EL1, %0" : : "r"(cntkctl));
2063 }
2064
2065 /*
2066  * Turn timer on, unmask that interrupt.
2067  */
2068 static void
2069 _enable_virtual_timer(void)
2070 {
2071         uint64_t cntvctl = CNTP_CTL_EL0_ENABLE; /* One wants to use 32 bits, but "mrs" prefers it this way */
2072
2073         __asm__ volatile ("msr CNTP_CTL_EL0, %0" : : "r"(cntvctl));
2074 }
2075
2076 uint64_t events_per_sec = 0;
2077
2078 void
2079 fiq_context_init(boolean_t enable_fiq __unused)
2080 {
2081         _enable_timebase_event_stream(fiq_eventi);
2082
2083         /* Interrupts still disabled. */
2084         assert(ml_get_interrupts_enabled() == FALSE);
2085         _enable_virtual_timer();
2086 }
2087
2088 void
2089 fiq_context_bootstrap(boolean_t enable_fiq)
2090 {
2091 #if defined(APPLE_ARM64_ARCH_FAMILY) || defined(BCM2837)
2092         /* Could fill in our own ops here, if we needed them */
2093         uint64_t        ticks_per_sec, ticks_per_event;
2094         uint32_t        bit_index;
2095
2096         ticks_per_sec = gPEClockFrequencyInfo.timebase_frequency_hz;
2097         ticks_per_event = ticks_per_sec / events_per_sec;
2098         bit_index = flsll(ticks_per_event) - 1; /* Highest bit set */
2099
2100         /* Round up to power of two */
2101         if ((ticks_per_event & ((1 << bit_index) - 1)) != 0) {
2102                 bit_index++;
2103         }
2104
2105         /*
2106          * The timer can only trigger on rising or falling edge,
2107          * not both; we don't care which we trigger on, but we
2108          * do need to adjust which bit we are interested in to
2109          * account for this.
2110          */
2111         if (bit_index != 0) {
2112                 bit_index--;
2113         }
2114
2115         fiq_eventi = bit_index;
2116 #else
2117 #error Need a board configuration.
2118 #endif
2119         fiq_context_init(enable_fiq);
2120 }
2121
2122 boolean_t
2123 ml_delay_should_spin(uint64_t interval)
2124 {
2125         cpu_data_t     *cdp = getCpuDatap();
2126
2127         if (cdp->cpu_idle_latency) {
2128                 return (interval < cdp->cpu_idle_latency) ? TRUE : FALSE;
2129         } else {
2130                 /*
2131                  * Early boot, latency is unknown. Err on the side of blocking,
2132                  * which should always be safe, even if slow
2133                  */
2134                 return FALSE;
2135         }
2136 }
2137
2138 boolean_t
2139 ml_thread_is64bit(thread_t thread)
2140 {
2141         return thread_is_64bit_addr(thread);
2142 }
2143
2144 void
2145 ml_delay_on_yield(void)
2146 {
2147 #if DEVELOPMENT || DEBUG
2148         if (yield_delay_us) {
2149                 delay(yield_delay_us);
2150         }
2151 #endif
2152 }
2153
2154 void
2155 ml_timer_evaluate(void)
2156 {
2157 }
2158
2159 boolean_t
2160 ml_timer_forced_evaluation(void)
2161 {
2162         return FALSE;
2163 }
2164
2165 uint64_t
2166 ml_energy_stat(thread_t t)
2167 {
2168         return t->machine.energy_estimate_nj;
2169 }
2170
2171
2172 void
2173 ml_gpu_stat_update(__unused uint64_t gpu_ns_delta)
2174 {
2175 #if CONFIG_EMBEDDED
2176         /*
2177          * For now: update the resource coalition stats of the
2178          * current thread's coalition
2179          */
2180         task_coalition_update_gpu_stats(current_task(), gpu_ns_delta);
2181 #endif
2182 }
2183
2184 uint64_t
2185 ml_gpu_stat(__unused thread_t t)
2186 {
2187         return 0;
2188 }
2189
2190 #if !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME
2191 static void
2192 timer_state_event(boolean_t switch_to_kernel)
2193 {
2194         thread_t thread = current_thread();
2195         if (!thread->precise_user_kernel_time) {
2196                 return;
2197         }
2198
2199         processor_data_t *pd = &getCpuDatap()->cpu_processor->processor_data;
2200         uint64_t now = ml_get_timebase();
2201
2202         timer_stop(pd->current_state, now);
2203         pd->current_state = (switch_to_kernel) ? &pd->system_state : &pd->user_state;
2204         timer_start(pd->current_state, now);
2205
2206         timer_stop(pd->thread_timer, now);
2207         pd->thread_timer = (switch_to_kernel) ? &thread->system_timer : &thread->user_timer;
2208         timer_start(pd->thread_timer, now);
2209 }
2210
2211 void
2212 timer_state_event_user_to_kernel(void)
2213 {
2214         timer_state_event(TRUE);
2215 }
2216
2217 void
2218 timer_state_event_kernel_to_user(void)
2219 {
2220         timer_state_event(FALSE);
2221 }
2222 #endif /* !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME */
2223
2224 /*
2225  * The following are required for parts of the kernel
2226  * that cannot resolve these functions as inlines:
2227  */
2228 extern thread_t current_act(void) __attribute__((const));
2229 thread_t
2230 current_act(void)
2231 {
2232         return current_thread_fast();
2233 }
2234
2235 #undef current_thread
2236 extern thread_t current_thread(void) __attribute__((const));
2237 thread_t
2238 current_thread(void)
2239 {
2240         return current_thread_fast();
2241 }
2242
2243 typedef struct{
2244         ex_cb_t         cb;
2245         void            *refcon;
2246 }
2247 ex_cb_info_t;
2248
2249 ex_cb_info_t ex_cb_info[EXCB_CLASS_MAX];
2250
2251 /*
2252  * Callback registration
2253  * Currently we support only one registered callback per class but
2254  * it should be possible to support more callbacks
2255  */
2256 kern_return_t
2257 ex_cb_register(
2258         ex_cb_class_t   cb_class,
2259         ex_cb_t                 cb,
2260         void                    *refcon)
2261 {
2262         ex_cb_info_t *pInfo = &ex_cb_info[cb_class];
2263
2264         if ((NULL == cb) || (cb_class >= EXCB_CLASS_MAX)) {
2265                 return KERN_INVALID_VALUE;
2266         }
2267
2268         if (NULL == pInfo->cb) {
2269                 pInfo->cb = cb;
2270                 pInfo->refcon = refcon;
2271                 return KERN_SUCCESS;
2272         }
2273         return KERN_FAILURE;
2274 }
2275
2276 /*
2277  * Called internally by platform kernel to invoke the registered callback for class
2278  */
2279 ex_cb_action_t
2280 ex_cb_invoke(
2281         ex_cb_class_t   cb_class,
2282         vm_offset_t             far)
2283 {
2284         ex_cb_info_t *pInfo = &ex_cb_info[cb_class];
2285         ex_cb_state_t state = {far};
2286
2287         if (cb_class >= EXCB_CLASS_MAX) {
2288                 panic("Invalid exception callback class 0x%x\n", cb_class);
2289         }
2290
2291         if (pInfo->cb) {
2292                 return pInfo->cb(cb_class, pInfo->refcon, &state);
2293         }
2294         return EXCB_ACTION_NONE;
2295 }
2296
2297 #if defined(HAS_APPLE_PAC)
2298 void
2299 ml_task_set_disable_user_jop(task_t task, boolean_t disable_user_jop)
2300 {
2301         assert(task);
2302         task->disable_user_jop = disable_user_jop;
2303 }
2304
2305 void
2306 ml_thread_set_disable_user_jop(thread_t thread, boolean_t disable_user_jop)
2307 {
2308         assert(thread);
2309         thread->machine.disable_user_jop = disable_user_jop;
2310 }
2311
2312 void
2313 ml_task_set_rop_pid(task_t task, task_t parent_task, boolean_t inherit)
2314 {
2315         if (inherit) {
2316                 task->rop_pid = parent_task->rop_pid;
2317         } else {
2318                 task->rop_pid = early_random();
2319         }
2320 }
2321 #endif /* defined(HAS_APPLE_PAC) */
2322
2323
2324 #if defined(HAS_APPLE_PAC)
2325
2326 /*
2327  * ml_auth_ptr_unchecked: call this instead of ptrauth_auth_data
2328  * instrinsic when you don't want to trap on auth fail.
2329  *
2330  */
2331
2332 void *
2333 ml_auth_ptr_unchecked(void *ptr, ptrauth_key key, uint64_t modifier)
2334 {
2335         switch (key & 0x3) {
2336         case ptrauth_key_asia:
2337                 asm volatile ("autia %[ptr], %[modifier]" : [ptr] "+r"(ptr) : [modifier] "r"(modifier));
2338                 break;
2339         case ptrauth_key_asib:
2340                 asm volatile ("autib %[ptr], %[modifier]" : [ptr] "+r"(ptr) : [modifier] "r"(modifier));
2341                 break;
2342         case ptrauth_key_asda:
2343                 asm volatile ("autda %[ptr], %[modifier]" : [ptr] "+r"(ptr) : [modifier] "r"(modifier));
2344                 break;
2345         case ptrauth_key_asdb:
2346                 asm volatile ("autdb %[ptr], %[modifier]" : [ptr] "+r"(ptr) : [modifier] "r"(modifier));
2347                 break;
2348         }
2349
2350         return ptr;
2351 }
2352 #endif /* defined(HAS_APPLE_PAC) */