osfmk/arm64/machine_routines.c

   1 /*
   2  * Copyright (c) 2007-2017 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <arm64/proc_reg.h>
  30 #include <arm/machine_cpu.h>
  31 #include <arm/cpu_internal.h>
  32 #include <arm/cpuid.h>
  33 #include <arm/io_map_entries.h>
  34 #include <arm/cpu_data.h>
  35 #include <arm/cpu_data_internal.h>
  36 #include <arm/caches_internal.h>
  37 #include <arm/misc_protos.h>
  38 #include <arm/machdep_call.h>
  39 #include <arm/machine_routines.h>
  40 #include <arm/rtclock.h>
  41 #include <arm/cpuid_internal.h>
  42 #include <arm/cpu_capabilities.h>
  43 #include <console/serial_protos.h>
  44 #include <kern/machine.h>
  45 #include <kern/misc_protos.h>
  46 #include <prng/random.h>
  47 #include <kern/startup.h>
  48 #include <kern/thread.h>
  49 #include <kern/timer_queue.h>
  50 #include <mach/machine.h>
  51 #include <machine/atomic.h>
  52 #include <machine/config.h>
  53 #include <vm/pmap.h>
  54 #include <vm/vm_page.h>
  55 #include <vm/vm_shared_region.h>
  56 #include <vm/vm_map.h>
  57 #include <sys/codesign.h>
  58 #include <sys/kdebug.h>
  59 #include <kern/coalition.h>
  60 #include <pexpert/device_tree.h>
  61
  62 #include <IOKit/IOPlatformExpert.h>
  63 #if HIBERNATION
  64 #include <IOKit/IOHibernatePrivate.h>
  65 #include <arm64/hibernate_ppl_hmac.h>
  66 #include <arm64/ppl/ppl_hib.h>
  67 #endif /* HIBERNATION */
  68
  69 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
  70 #include <arm64/amcc_rorgn.h>
  71 #endif
  72
  73 #include <libkern/section_keywords.h>
  74
  75 /**
  76  * On supported hardware, debuggable builds make the HID bits read-only
  77  * without locking them.  This lets people manually modify HID bits while
  78  * debugging, since they can use a debugging tool to first reset the HID
  79  * bits back to read/write.  However it will still catch xnu changes that
  80  * accidentally write to HID bits after they've been made read-only.
  81  */
  82 #if HAS_TWO_STAGE_SPR_LOCK && !(DEVELOPMENT || DEBUG)
  83 #define USE_TWO_STAGE_SPR_LOCK
  84 #endif
  85
  86 #if KPC
  87 #include <kern/kpc.h>
  88 #endif
  89
  90 #define MPIDR_CPU_ID(mpidr_el1_val)             (((mpidr_el1_val) & MPIDR_AFF0_MASK) >> MPIDR_AFF0_SHIFT)
  91 #define MPIDR_CLUSTER_ID(mpidr_el1_val)         (((mpidr_el1_val) & MPIDR_AFF1_MASK) >> MPIDR_AFF1_SHIFT)
  92
  93 #if HAS_CLUSTER
  94 static uint8_t cluster_initialized = 0;
  95 #endif
  96
  97 uint32_t LockTimeOut;
  98 uint32_t LockTimeOutUsec;
  99 uint64_t TLockTimeOut;
 100 uint64_t MutexSpin;
 101 uint64_t low_MutexSpin;
 102 int64_t high_MutexSpin;
 103
 104 static uint64_t ml_wfe_hint_max_interval;
 105 #define MAX_WFE_HINT_INTERVAL_US (500ULL)
 106
 107 /* Must be less than cpu_idle_latency to ensure ml_delay_should_spin is true */
 108 TUNABLE(uint32_t, yield_delay_us, "yield_delay_us", 0);
 109
 110 extern vm_offset_t   segLOWEST;
 111 extern vm_offset_t   segLOWESTTEXT;
 112 extern vm_offset_t   segLASTB;
 113 extern unsigned long segSizeLAST;
 114
 115 /* ARM64 specific bounds; used to test for presence in the kernelcache. */
 116 extern vm_offset_t   vm_kernelcache_base;
 117 extern vm_offset_t   vm_kernelcache_top;
 118
 119 #if defined(HAS_IPI)
 120 unsigned int gFastIPI = 1;
 121 #define kDeferredIPITimerDefault (64 * NSEC_PER_USEC) /* in nanoseconds */
 122 static TUNABLE_WRITEABLE(uint64_t, deferred_ipi_timer_ns, "fastipitimeout",
 123     kDeferredIPITimerDefault);
 124 #endif /* defined(HAS_IPI) */
 125
 126 thread_t Idle_context(void);
 127
 128 SECURITY_READ_ONLY_LATE(static ml_topology_cpu_t) topology_cpu_array[MAX_CPUS];
 129 SECURITY_READ_ONLY_LATE(static ml_topology_cluster_t) topology_cluster_array[MAX_CPU_CLUSTERS];
 130 SECURITY_READ_ONLY_LATE(static ml_topology_info_t) topology_info = {
 131         .version = CPU_TOPOLOGY_VERSION,
 132         .cpus = topology_cpu_array,
 133         .clusters = topology_cluster_array,
 134 };
 135 /**
 136  * Represents the offset of each cluster within a hypothetical array of MAX_CPUS
 137  * entries of an arbitrary data type.  This is intended for use by specialized consumers
 138  * that must quickly access per-CPU data using only the physical CPU ID (MPIDR_EL1),
 139  * as follows:
 140  *      hypothetical_array[cluster_offsets[AFF1] + AFF0]
 141  * Most consumers should instead use general-purpose facilities such as PERCPU or
 142  * ml_get_cpu_number().
 143  */
 144 SECURITY_READ_ONLY_LATE(int64_t) cluster_offsets[MAX_CPU_CLUSTER_PHY_ID + 1];
 145
 146 SECURITY_READ_ONLY_LATE(static uint32_t) arm64_eventi = UINT32_MAX;
 147
 148 extern uint32_t lockdown_done;
 149
 150 /**
 151  * Represents regions of virtual address space that should be reserved
 152  * (pre-mapped) in each user address space.
 153  */
 154 SECURITY_READ_ONLY_LATE(static struct vm_reserved_region) vm_reserved_regions[] = {
 155         /*
 156          * Reserve the virtual memory space representing the commpage nesting region
 157          * to prevent user processes from allocating memory within it. The actual
 158          * page table entries for the commpage are inserted by vm_commpage_enter().
 159          * This vm_map_enter() just prevents userspace from allocating/deallocating
 160          * anything within the entire commpage nested region.
 161          */
 162         {
 163                 .vmrr_name = "commpage nesting",
 164                 .vmrr_addr = _COMM_PAGE64_NESTING_START,
 165                 .vmrr_size = _COMM_PAGE64_NESTING_SIZE
 166         }
 167 };
 168
 169 uint32_t get_arm_cpu_version(void);
 170
 171 #if defined(HAS_IPI)
 172 static inline void
 173 ml_cpu_signal_type(unsigned int cpu_mpidr, uint32_t type)
 174 {
 175 #if HAS_CLUSTER
 176         uint64_t local_mpidr;
 177         /* NOTE: this logic expects that we are called in a non-preemptible
 178          * context, or at least one in which the calling thread is bound
 179          * to a single CPU.  Otherwise we may migrate between choosing which
 180          * IPI mechanism to use and issuing the IPI. */
 181         MRS(local_mpidr, "MPIDR_EL1");
 182         if (MPIDR_CLUSTER_ID(local_mpidr) == MPIDR_CLUSTER_ID(cpu_mpidr)) {
 183                 uint64_t x = type | MPIDR_CPU_ID(cpu_mpidr);
 184                 MSR(ARM64_REG_IPI_RR_LOCAL, x);
 185         } else {
 186                 #define IPI_RR_TARGET_CLUSTER_SHIFT 16
 187                 uint64_t x = type | (MPIDR_CLUSTER_ID(cpu_mpidr) << IPI_RR_TARGET_CLUSTER_SHIFT) | MPIDR_CPU_ID(cpu_mpidr);
 188                 MSR(ARM64_REG_IPI_RR_GLOBAL, x);
 189         }
 190 #else
 191         uint64_t x = type | MPIDR_CPU_ID(cpu_mpidr);
 192         MSR(ARM64_REG_IPI_RR, x);
 193 #endif
 194 }
 195 #endif
 196
 197 #if !defined(HAS_IPI)
 198 __dead2
 199 #endif
 200 void
 201 ml_cpu_signal(unsigned int cpu_mpidr __unused)
 202 {
 203 #if defined(HAS_IPI)
 204         ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_IMMEDIATE);
 205 #else
 206         panic("Platform does not support ACC Fast IPI");
 207 #endif
 208 }
 209
 210 #if !defined(HAS_IPI)
 211 __dead2
 212 #endif
 213 void
 214 ml_cpu_signal_deferred_adjust_timer(uint64_t nanosecs)
 215 {
 216 #if defined(HAS_IPI)
 217         /* adjust IPI_CR timer countdown value for deferred IPI
 218          * accepts input in nanosecs, convert to absolutetime (REFCLK ticks),
 219          * clamp maximum REFCLK ticks to 0xFFFF (16 bit field)
 220          *
 221          * global register, should only require a single write to update all
 222          * CPU cores: from Skye ACC user spec section 5.7.3.3
 223          *
 224          * IPICR is a global register but there are two copies in ACC: one at pBLK and one at eBLK.
 225          * IPICR write SPR token also traverses both pCPM and eCPM rings and updates both copies.
 226          */
 227         uint64_t abstime;
 228
 229         nanoseconds_to_absolutetime(nanosecs, &abstime);
 230
 231         abstime = MIN(abstime, 0xFFFF);
 232
 233         /* update deferred_ipi_timer_ns with the new clamped value */
 234         absolutetime_to_nanoseconds(abstime, &deferred_ipi_timer_ns);
 235
 236         MSR(ARM64_REG_IPI_CR, abstime);
 237 #else
 238         (void)nanosecs;
 239         panic("Platform does not support ACC Fast IPI");
 240 #endif
 241 }
 242
 243 uint64_t
 244 ml_cpu_signal_deferred_get_timer()
 245 {
 246 #if defined(HAS_IPI)
 247         return deferred_ipi_timer_ns;
 248 #else
 249         return 0;
 250 #endif
 251 }
 252
 253 #if !defined(HAS_IPI)
 254 __dead2
 255 #endif
 256 void
 257 ml_cpu_signal_deferred(unsigned int cpu_mpidr __unused)
 258 {
 259 #if defined(HAS_IPI)
 260         ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_DEFERRED);
 261 #else
 262         panic("Platform does not support ACC Fast IPI deferral");
 263 #endif
 264 }
 265
 266 #if !defined(HAS_IPI)
 267 __dead2
 268 #endif
 269 void
 270 ml_cpu_signal_retract(unsigned int cpu_mpidr __unused)
 271 {
 272 #if defined(HAS_IPI)
 273         ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_RETRACT);
 274 #else
 275         panic("Platform does not support ACC Fast IPI retraction");
 276 #endif
 277 }
 278
 279 void
 280 machine_idle(void)
 281 {
 282         /* Interrupts are expected to be masked on entry or re-entry via
 283          * Idle_load_context()
 284          */
 285         assert((__builtin_arm_rsr("DAIF") & DAIF_IRQF) == DAIF_IRQF);
 286         Idle_context();
 287         __builtin_arm_wsr("DAIFClr", (DAIFSC_IRQF | DAIFSC_FIQF));
 288 }
 289
 290 void
 291 OSSynchronizeIO(void)
 292 {
 293         __builtin_arm_dsb(DSB_SY);
 294 }
 295
 296 uint64_t
 297 get_aux_control(void)
 298 {
 299         uint64_t        value;
 300
 301         MRS(value, "ACTLR_EL1");
 302         return value;
 303 }
 304
 305 uint64_t
 306 get_mmu_control(void)
 307 {
 308         uint64_t        value;
 309
 310         MRS(value, "SCTLR_EL1");
 311         return value;
 312 }
 313
 314 uint64_t
 315 get_tcr(void)
 316 {
 317         uint64_t        value;
 318
 319         MRS(value, "TCR_EL1");
 320         return value;
 321 }
 322
 323 boolean_t
 324 ml_get_interrupts_enabled(void)
 325 {
 326         uint64_t        value;
 327
 328         MRS(value, "DAIF");
 329         if (value & DAIF_IRQF) {
 330                 return FALSE;
 331         }
 332         return TRUE;
 333 }
 334
 335 pmap_paddr_t
 336 get_mmu_ttb(void)
 337 {
 338         pmap_paddr_t    value;
 339
 340         MRS(value, "TTBR0_EL1");
 341         return value;
 342 }
 343
 344 uint32_t
 345 get_arm_cpu_version(void)
 346 {
 347         uint32_t value = machine_read_midr();
 348
 349         /* Compose the register values into 8 bits; variant[7:4], revision[3:0]. */
 350         return ((value & MIDR_EL1_REV_MASK) >> MIDR_EL1_REV_SHIFT) | ((value & MIDR_EL1_VAR_MASK) >> (MIDR_EL1_VAR_SHIFT - 4));
 351 }
 352
 353 bool
 354 ml_feature_supported(uint32_t feature_bit)
 355 {
 356         uint64_t aidr_el1_value = 0;
 357
 358         MRS(aidr_el1_value, "AIDR_EL1");
 359
 360
 361         return aidr_el1_value & feature_bit;
 362 }
 363
 364 /*
 365  * user_cont_hwclock_allowed()
 366  *
 367  * Indicates whether we allow EL0 to read the virtual timebase (CNTVCT_EL0)
 368  * as a continuous time source (e.g. from mach_continuous_time)
 369  */
 370 boolean_t
 371 user_cont_hwclock_allowed(void)
 372 {
 373 #if HAS_CONTINUOUS_HWCLOCK
 374         return TRUE;
 375 #else
 376         return FALSE;
 377 #endif
 378 }
 379
 380
 381 uint8_t
 382 user_timebase_type(void)
 383 {
 384         return USER_TIMEBASE_SPEC;
 385 }
 386
 387 void
 388 machine_startup(__unused boot_args * args)
 389 {
 390 #if defined(HAS_IPI) && (DEVELOPMENT || DEBUG)
 391         if (!PE_parse_boot_argn("fastipi", &gFastIPI, sizeof(gFastIPI))) {
 392                 gFastIPI = 1;
 393         }
 394 #endif /* defined(HAS_IPI) && (DEVELOPMENT || DEBUG)*/
 395
 396         machine_conf();
 397
 398         /*
 399          * Kick off the kernel bootstrap.
 400          */
 401         kernel_bootstrap();
 402         /* NOTREACHED */
 403 }
 404
 405
 406 void
 407 machine_lockdown(void)
 408 {
 409         arm_vm_prot_finalize(PE_state.bootArgs);
 410
 411 #if CONFIG_KERNEL_INTEGRITY
 412 #if KERNEL_INTEGRITY_WT
 413         /* Watchtower
 414          *
 415          * Notify the monitor about the completion of early kernel bootstrap.
 416          * From this point forward it will enforce the integrity of kernel text,
 417          * rodata and page tables.
 418          */
 419
 420 #ifdef MONITOR
 421         monitor_call(MONITOR_LOCKDOWN, 0, 0, 0);
 422 #endif
 423 #endif /* KERNEL_INTEGRITY_WT */
 424
 425 #if XNU_MONITOR
 426         pmap_lockdown_ppl();
 427 #endif
 428
 429 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
 430         /* KTRR
 431          *
 432          * Lock physical KTRR region. KTRR region is read-only. Memory outside
 433          * the region is not executable at EL1.
 434          */
 435
 436         rorgn_lockdown();
 437 #endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
 438
 439 #if HIBERNATION
 440         /* sign the kernel read-only region */
 441         if (ppl_hmac_init() == KERN_SUCCESS) {
 442                 ppl_hmac_compute_rorgn_hmac();
 443         }
 444 #endif /* HIBERNATION */
 445
 446 #endif /* CONFIG_KERNEL_INTEGRITY */
 447
 448 #if HIBERNATION
 449         /* Avoid configuration security issues by panic'ing if hibernation is
 450          * supported but we don't know how to invalidate SIO HMAC keys, see
 451          * below. */
 452         if (ppl_hib_hibernation_supported() &&
 453             NULL == invalidate_hmac_function) {
 454                 panic("Invalidate HMAC function wasn't set when needed");
 455         }
 456 #endif  /* HIBERNATION */
 457
 458
 459         lockdown_done = 1;
 460 }
 461
 462
 463 char           *
 464 machine_boot_info(
 465         __unused char *buf,
 466         __unused vm_size_t size)
 467 {
 468         return PE_boot_args();
 469 }
 470
 471 void
 472 slave_machine_init(__unused void *param)
 473 {
 474         cpu_machine_init();     /* Initialize the processor */
 475         clock_init();           /* Init the clock */
 476 }
 477
 478 /*
 479  *      Routine:        machine_processor_shutdown
 480  *      Function:
 481  */
 482 thread_t
 483 machine_processor_shutdown(
 484         __unused thread_t thread,
 485         void (*doshutdown)(processor_t),
 486         processor_t processor)
 487 {
 488         return Shutdown_context(doshutdown, processor);
 489 }
 490
 491 /*
 492  *      Routine:        ml_init_lock_timeout
 493  *      Function:
 494  */
 495 void
 496 ml_init_lock_timeout(void)
 497 {
 498         uint64_t        abstime;
 499         uint64_t        mtxspin;
 500         uint64_t        default_timeout_ns = NSEC_PER_SEC >> 2;
 501         uint32_t        slto;
 502
 503         if (PE_parse_boot_argn("slto_us", &slto, sizeof(slto))) {
 504                 default_timeout_ns = slto * NSEC_PER_USEC;
 505         }
 506
 507         nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
 508         LockTimeOutUsec = (uint32_t) (default_timeout_ns / NSEC_PER_USEC);
 509         LockTimeOut = (uint32_t)abstime;
 510
 511         if (PE_parse_boot_argn("tlto_us", &slto, sizeof(slto))) {
 512                 nanoseconds_to_absolutetime(slto * NSEC_PER_USEC, &abstime);
 513                 TLockTimeOut = abstime;
 514         } else {
 515                 TLockTimeOut = LockTimeOut >> 1;
 516         }
 517
 518         if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof(mtxspin))) {
 519                 if (mtxspin > USEC_PER_SEC >> 4) {
 520                         mtxspin =  USEC_PER_SEC >> 4;
 521                 }
 522                 nanoseconds_to_absolutetime(mtxspin * NSEC_PER_USEC, &abstime);
 523         } else {
 524                 nanoseconds_to_absolutetime(10 * NSEC_PER_USEC, &abstime);
 525         }
 526         MutexSpin = abstime;
 527         low_MutexSpin = MutexSpin;
 528         /*
 529          * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
 530          * real_ncpus is not set at this time
 531          *
 532          * NOTE: active spinning is disabled in arm. It can be activated
 533          * by setting high_MutexSpin through the sysctl.
 534          */
 535         high_MutexSpin = low_MutexSpin;
 536
 537         nanoseconds_to_absolutetime(MAX_WFE_HINT_INTERVAL_US * NSEC_PER_USEC, &ml_wfe_hint_max_interval);
 538 }
 539
 540 /*
 541  * This is called from the machine-independent routine cpu_up()
 542  * to perform machine-dependent info updates.
 543  */
 544 void
 545 ml_cpu_up(void)
 546 {
 547         os_atomic_inc(&machine_info.physical_cpu, relaxed);
 548         os_atomic_inc(&machine_info.logical_cpu, relaxed);
 549 }
 550
 551 /*
 552  * This is called from the machine-independent routine cpu_down()
 553  * to perform machine-dependent info updates.
 554  */
 555 void
 556 ml_cpu_down(void)
 557 {
 558         cpu_data_t      *cpu_data_ptr;
 559
 560         os_atomic_dec(&machine_info.physical_cpu, relaxed);
 561         os_atomic_dec(&machine_info.logical_cpu, relaxed);
 562
 563         /*
 564          * If we want to deal with outstanding IPIs, we need to
 565          * do relatively early in the processor_doshutdown path,
 566          * as we pend decrementer interrupts using the IPI
 567          * mechanism if we cannot immediately service them (if
 568          * IRQ is masked).  Do so now.
 569          *
 570          * We aren't on the interrupt stack here; would it make
 571          * more sense to disable signaling and then enable
 572          * interrupts?  It might be a bit cleaner.
 573          */
 574         cpu_data_ptr = getCpuDatap();
 575         cpu_data_ptr->cpu_running = FALSE;
 576
 577         if (cpu_data_ptr != &BootCpuData) {
 578                 /*
 579                  * Move all of this cpu's timers to the master/boot cpu,
 580                  * and poke it in case there's a sooner deadline for it to schedule.
 581                  */
 582                 timer_queue_shutdown(&cpu_data_ptr->rtclock_timer.queue);
 583                 cpu_xcall(BootCpuData.cpu_number, &timer_queue_expire_local, NULL);
 584         }
 585
 586         cpu_signal_handler_internal(TRUE);
 587 }
 588
 589 /*
 590  *      Routine:        ml_cpu_get_info
 591  *      Function:
 592  */
 593 void
 594 ml_cpu_get_info(ml_cpu_info_t * ml_cpu_info)
 595 {
 596         cache_info_t   *cpuid_cache_info;
 597
 598         cpuid_cache_info = cache_info();
 599         ml_cpu_info->vector_unit = 0;
 600         ml_cpu_info->cache_line_size = cpuid_cache_info->c_linesz;
 601         ml_cpu_info->l1_icache_size = cpuid_cache_info->c_isize;
 602         ml_cpu_info->l1_dcache_size = cpuid_cache_info->c_dsize;
 603
 604 #if (__ARM_ARCH__ >= 7)
 605         ml_cpu_info->l2_settings = 1;
 606         ml_cpu_info->l2_cache_size = cpuid_cache_info->c_l2size;
 607 #else
 608         ml_cpu_info->l2_settings = 0;
 609         ml_cpu_info->l2_cache_size = 0xFFFFFFFF;
 610 #endif
 611         ml_cpu_info->l3_settings = 0;
 612         ml_cpu_info->l3_cache_size = 0xFFFFFFFF;
 613 }
 614
 615 unsigned int
 616 ml_get_machine_mem(void)
 617 {
 618         return machine_info.memory_size;
 619 }
 620
 621 __attribute__((noreturn))
 622 void
 623 halt_all_cpus(boolean_t reboot)
 624 {
 625         if (reboot) {
 626                 printf("MACH Reboot\n");
 627                 PEHaltRestart(kPERestartCPU);
 628         } else {
 629                 printf("CPU halted\n");
 630                 PEHaltRestart(kPEHaltCPU);
 631         }
 632         while (1) {
 633                 ;
 634         }
 635 }
 636
 637 __attribute__((noreturn))
 638 void
 639 halt_cpu(void)
 640 {
 641         halt_all_cpus(FALSE);
 642 }
 643
 644 /*
 645  *      Routine:        machine_signal_idle
 646  *      Function:
 647  */
 648 void
 649 machine_signal_idle(
 650         processor_t processor)
 651 {
 652         cpu_signal(processor_to_cpu_datap(processor), SIGPnop, (void *)NULL, (void *)NULL);
 653         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
 654 }
 655
 656 void
 657 machine_signal_idle_deferred(
 658         processor_t processor)
 659 {
 660         cpu_signal_deferred(processor_to_cpu_datap(processor));
 661         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_DEFERRED_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
 662 }
 663
 664 void
 665 machine_signal_idle_cancel(
 666         processor_t processor)
 667 {
 668         cpu_signal_cancel(processor_to_cpu_datap(processor));
 669         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_CANCEL_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
 670 }
 671
 672 /*
 673  *      Routine:        ml_install_interrupt_handler
 674  *      Function:       Initialize Interrupt Handler
 675  */
 676 void
 677 ml_install_interrupt_handler(
 678         void *nub,
 679         int source,
 680         void *target,
 681         IOInterruptHandler handler,
 682         void *refCon)
 683 {
 684         cpu_data_t     *cpu_data_ptr;
 685         boolean_t       current_state;
 686
 687         current_state = ml_set_interrupts_enabled(FALSE);
 688         cpu_data_ptr = getCpuDatap();
 689
 690         cpu_data_ptr->interrupt_nub = nub;
 691         cpu_data_ptr->interrupt_source = source;
 692         cpu_data_ptr->interrupt_target = target;
 693         cpu_data_ptr->interrupt_handler = handler;
 694         cpu_data_ptr->interrupt_refCon = refCon;
 695
 696         (void) ml_set_interrupts_enabled(current_state);
 697 }
 698
 699 /*
 700  *      Routine:        ml_init_interrupt
 701  *      Function:       Initialize Interrupts
 702  */
 703 void
 704 ml_init_interrupt(void)
 705 {
 706 #if defined(HAS_IPI)
 707         /*
 708          * ml_init_interrupt will get called once for each CPU, but this is redundant
 709          * because there is only one global copy of the register for skye. do it only
 710          * on the bootstrap cpu
 711          */
 712         if (getCpuDatap()->cluster_master) {
 713                 ml_cpu_signal_deferred_adjust_timer(deferred_ipi_timer_ns);
 714         }
 715 #endif
 716 }
 717
 718 /*
 719  *      Routine:        ml_init_timebase
 720  *      Function:       register and setup Timebase, Decremeter services
 721  */
 722 void
 723 ml_init_timebase(
 724         void            *args,
 725         tbd_ops_t       tbd_funcs,
 726         vm_offset_t     int_address,
 727         vm_offset_t     int_value __unused)
 728 {
 729         cpu_data_t     *cpu_data_ptr;
 730
 731         cpu_data_ptr = (cpu_data_t *)args;
 732
 733         if ((cpu_data_ptr == &BootCpuData)
 734             && (rtclock_timebase_func.tbd_fiq_handler == (void *)NULL)) {
 735                 rtclock_timebase_func = *tbd_funcs;
 736                 rtclock_timebase_addr = int_address;
 737         }
 738 }
 739
 740 #define ML_READPROP_MANDATORY UINT64_MAX
 741
 742 static uint64_t
 743 ml_readprop(const DTEntry entry, const char *propertyName, uint64_t default_value)
 744 {
 745         void const *prop;
 746         unsigned int propSize;
 747
 748         if (SecureDTGetProperty(entry, propertyName, &prop, &propSize) == kSuccess) {
 749                 if (propSize == sizeof(uint8_t)) {
 750                         return *((uint8_t const *)prop);
 751                 } else if (propSize == sizeof(uint16_t)) {
 752                         return *((uint16_t const *)prop);
 753                 } else if (propSize == sizeof(uint32_t)) {
 754                         return *((uint32_t const *)prop);
 755                 } else if (propSize == sizeof(uint64_t)) {
 756                         return *((uint64_t const *)prop);
 757                 } else {
 758                         panic("CPU property '%s' has bad size %u", propertyName, propSize);
 759                 }
 760         } else {
 761                 if (default_value == ML_READPROP_MANDATORY) {
 762                         panic("Missing mandatory property '%s'", propertyName);
 763                 }
 764                 return default_value;
 765         }
 766 }
 767
 768 static boolean_t
 769 ml_read_reg_range(const DTEntry entry, const char *propertyName, uint64_t *pa_ptr, uint64_t *len_ptr)
 770 {
 771         uint64_t const *prop;
 772         unsigned int propSize;
 773
 774         if (SecureDTGetProperty(entry, propertyName, (void const **)&prop, &propSize) != kSuccess) {
 775                 return FALSE;
 776         }
 777
 778         if (propSize != sizeof(uint64_t) * 2) {
 779                 panic("Wrong property size for %s", propertyName);
 780         }
 781
 782         *pa_ptr = prop[0];
 783         *len_ptr = prop[1];
 784         return TRUE;
 785 }
 786
 787 static boolean_t
 788 ml_is_boot_cpu(const DTEntry entry)
 789 {
 790         void const *prop;
 791         unsigned int propSize;
 792
 793         if (SecureDTGetProperty(entry, "state", &prop, &propSize) != kSuccess) {
 794                 panic("unable to retrieve state for cpu");
 795         }
 796
 797         if (strncmp((char const *)prop, "running", propSize) == 0) {
 798                 return TRUE;
 799         } else {
 800                 return FALSE;
 801         }
 802 }
 803
 804 static void
 805 ml_read_chip_revision(unsigned int *rev __unused)
 806 {
 807         // The CPU_VERSION_* macros are only defined on APPLE_ARM64_ARCH_FAMILY builds
 808 #ifdef APPLE_ARM64_ARCH_FAMILY
 809         DTEntry         entryP;
 810
 811         if ((SecureDTFindEntry("name", "arm-io", &entryP) == kSuccess)) {
 812                 *rev = (unsigned int)ml_readprop(entryP, "chip-revision", CPU_VERSION_UNKNOWN);
 813         } else {
 814                 *rev = CPU_VERSION_UNKNOWN;
 815         }
 816 #endif
 817 }
 818
 819 static boolean_t
 820 ml_parse_interrupt_prop(const DTEntry entry, ml_topology_cpu_t *cpu)
 821 {
 822         uint32_t const *prop;
 823         unsigned int propSize;
 824
 825         if (SecureDTGetProperty(entry, "interrupts", (void const **)&prop, &propSize) != kSuccess) {
 826                 return FALSE;
 827         }
 828
 829         if (propSize == sizeof(uint32_t) * 1) {
 830                 cpu->pmi_irq = prop[0];
 831                 return TRUE;
 832         } else if (propSize == sizeof(uint32_t) * 3) {
 833                 cpu->self_ipi_irq = prop[0];
 834                 cpu->pmi_irq = prop[1];
 835                 cpu->other_ipi_irq = prop[2];
 836                 return TRUE;
 837         } else {
 838                 return FALSE;
 839         }
 840 }
 841
 842 void
 843 ml_parse_cpu_topology(void)
 844 {
 845         DTEntry entry, child __unused;
 846         OpaqueDTEntryIterator iter;
 847         uint32_t cpu_boot_arg;
 848         int err;
 849
 850         int64_t cluster_phys_to_logical[MAX_CPU_CLUSTER_PHY_ID + 1];
 851         int64_t cluster_max_cpu_phys_id[MAX_CPU_CLUSTER_PHY_ID + 1];
 852         cpu_boot_arg = MAX_CPUS;
 853         PE_parse_boot_argn("cpus", &cpu_boot_arg, sizeof(cpu_boot_arg));
 854
 855         err = SecureDTLookupEntry(NULL, "/cpus", &entry);
 856         assert(err == kSuccess);
 857
 858         err = SecureDTInitEntryIterator(entry, &iter);
 859         assert(err == kSuccess);
 860
 861         for (int i = 0; i <= MAX_CPU_CLUSTER_PHY_ID; i++) {
 862                 cluster_offsets[i] = -1;
 863                 cluster_phys_to_logical[i] = -1;
 864                 cluster_max_cpu_phys_id[i] = 0;
 865         }
 866
 867         while (kSuccess == SecureDTIterateEntries(&iter, &child)) {
 868                 boolean_t is_boot_cpu = ml_is_boot_cpu(child);
 869
 870                 // If the number of CPUs is constrained by the cpus= boot-arg, and the boot CPU hasn't
 871                 // been added to the topology struct yet, and we only have one slot left, then skip
 872                 // every other non-boot CPU in order to leave room for the boot CPU.
 873                 //
 874                 // e.g. if the boot-args say "cpus=3" and CPU4 is the boot CPU, then the cpus[]
 875                 // array will list CPU0, CPU1, and CPU4.  CPU2-CPU3 and CPU5-CPUn will be omitted.
 876                 if (topology_info.num_cpus >= (cpu_boot_arg - 1) && topology_info.boot_cpu == NULL && !is_boot_cpu) {
 877                         continue;
 878                 }
 879                 if (topology_info.num_cpus >= cpu_boot_arg) {
 880                         break;
 881                 }
 882
 883                 ml_topology_cpu_t *cpu = &topology_info.cpus[topology_info.num_cpus];
 884
 885                 cpu->cpu_id = topology_info.num_cpus++;
 886                 assert(cpu->cpu_id < MAX_CPUS);
 887                 topology_info.max_cpu_id = MAX(topology_info.max_cpu_id, cpu->cpu_id);
 888
 889                 cpu->die_id = (int)ml_readprop(child, "die-id", 0);
 890                 topology_info.max_die_id = MAX(topology_info.max_die_id, cpu->die_id);
 891
 892                 cpu->phys_id = (uint32_t)ml_readprop(child, "reg", ML_READPROP_MANDATORY);
 893
 894                 cpu->l2_access_penalty = (uint32_t)ml_readprop(child, "l2-access-penalty", 0);
 895                 cpu->l2_cache_size = (uint32_t)ml_readprop(child, "l2-cache-size", 0);
 896                 cpu->l2_cache_id = (uint32_t)ml_readprop(child, "l2-cache-id", 0);
 897                 cpu->l3_cache_size = (uint32_t)ml_readprop(child, "l3-cache-size", 0);
 898                 cpu->l3_cache_id = (uint32_t)ml_readprop(child, "l3-cache-id", 0);
 899
 900                 ml_parse_interrupt_prop(child, cpu);
 901                 ml_read_reg_range(child, "cpu-uttdbg-reg", &cpu->cpu_UTTDBG_pa, &cpu->cpu_UTTDBG_len);
 902                 ml_read_reg_range(child, "cpu-impl-reg", &cpu->cpu_IMPL_pa, &cpu->cpu_IMPL_len);
 903                 ml_read_reg_range(child, "coresight-reg", &cpu->coresight_pa, &cpu->coresight_len);
 904                 cpu->cluster_type = CLUSTER_TYPE_SMP;
 905
 906
 907                 /*
 908                  * Since we want to keep a linear cluster ID space, we cannot just rely
 909                  * on the value provided by EDT. Instead, use the MPIDR value to see if we have
 910                  * seen this exact cluster before. If so, then reuse that cluster ID for this CPU.
 911                  */
 912 #if HAS_CLUSTER
 913                 uint32_t phys_cluster_id = MPIDR_CLUSTER_ID(cpu->phys_id);
 914 #else
 915                 uint32_t phys_cluster_id = 0;
 916 #endif
 917                 assert(phys_cluster_id <= MAX_CPU_CLUSTER_PHY_ID);
 918                 cpu->cluster_id = ((cluster_phys_to_logical[phys_cluster_id] == -1) ?
 919                     topology_info.num_clusters : cluster_phys_to_logical[phys_cluster_id]);
 920
 921                 assert(cpu->cluster_id < MAX_CPU_CLUSTERS);
 922
 923                 ml_topology_cluster_t *cluster = &topology_info.clusters[cpu->cluster_id];
 924                 if (cluster->num_cpus == 0) {
 925                         assert(topology_info.num_clusters < MAX_CPU_CLUSTERS);
 926
 927                         topology_info.num_clusters++;
 928                         topology_info.max_cluster_id = MAX(topology_info.max_cluster_id, cpu->cluster_id);
 929
 930                         cluster->cluster_id = cpu->cluster_id;
 931                         cluster->cluster_type = cpu->cluster_type;
 932                         cluster->first_cpu_id = cpu->cpu_id;
 933                         assert(cluster_phys_to_logical[phys_cluster_id] == -1);
 934                         cluster_phys_to_logical[phys_cluster_id] = cpu->cluster_id;
 935
 936                         // Since we don't have a per-cluster EDT node, this is repeated in each CPU node.
 937                         // If we wind up with a bunch of these, we might want to create separate per-cluster
 938                         // EDT nodes and have the CPU nodes reference them through a phandle.
 939                         ml_read_reg_range(child, "acc-impl-reg", &cluster->acc_IMPL_pa, &cluster->acc_IMPL_len);
 940                         ml_read_reg_range(child, "cpm-impl-reg", &cluster->cpm_IMPL_pa, &cluster->cpm_IMPL_len);
 941                 }
 942
 943 #if HAS_CLUSTER
 944                 if (MPIDR_CPU_ID(cpu->phys_id) > cluster_max_cpu_phys_id[phys_cluster_id]) {
 945                         cluster_max_cpu_phys_id[phys_cluster_id] = MPIDR_CPU_ID(cpu->phys_id);
 946                 }
 947 #endif
 948
 949                 cpu->die_cluster_id = (int)ml_readprop(child, "die-cluster-id", MPIDR_CLUSTER_ID(cpu->phys_id));
 950                 cpu->cluster_core_id = (int)ml_readprop(child, "cluster-core-id", MPIDR_CPU_ID(cpu->phys_id));
 951
 952                 cluster->num_cpus++;
 953                 cluster->cpu_mask |= 1ULL << cpu->cpu_id;
 954
 955                 if (is_boot_cpu) {
 956                         assert(topology_info.boot_cpu == NULL);
 957                         topology_info.boot_cpu = cpu;
 958                         topology_info.boot_cluster = cluster;
 959                 }
 960         }
 961
 962 #if HAS_CLUSTER
 963         /*
 964          * Build the cluster offset array, ensuring that the region reserved
 965          * for each physical cluster contains enough entries to be indexed
 966          * by the maximum physical CPU ID (AFF0) within the cluster.
 967          */
 968         unsigned int cur_cluster_offset = 0;
 969         for (int i = 0; i <= MAX_CPU_CLUSTER_PHY_ID; i++) {
 970                 if (cluster_phys_to_logical[i] != -1) {
 971                         cluster_offsets[i] = cur_cluster_offset;
 972                         cur_cluster_offset += (cluster_max_cpu_phys_id[i] + 1);
 973                 }
 974         }
 975         assert(cur_cluster_offset <= MAX_CPUS);
 976 #else
 977         /*
 978          * For H10, there are really 2 physical clusters, but they are not separated
 979          * into distinct ACCs.  AFF1 therefore always reports 0, and AFF0 numbering
 980          * is linear across both clusters.   For the purpose of MPIDR_EL1-based indexing,
 981          * treat H10 and earlier devices as though they contain a single cluster.
 982          */
 983         cluster_offsets[0] = 0;
 984 #endif
 985         assert(topology_info.boot_cpu != NULL);
 986         ml_read_chip_revision(&topology_info.chip_revision);
 987
 988         /*
 989          * Set TPIDRRO_EL0 to indicate the correct cpu number, as we may
 990          * not be booting from cpu 0.  Userspace will consume the current
 991          * CPU number through this register.  For non-boot cores, this is
 992          * done in start.s (start_cpu) using the cpu_number field of the
 993          * per-cpu data object.
 994          */
 995         assert(__builtin_arm_rsr64("TPIDRRO_EL0") == 0);
 996         __builtin_arm_wsr64("TPIDRRO_EL0", (uint64_t)topology_info.boot_cpu->cpu_id);
 997 }
 998
 999 const ml_topology_info_t *
1000 ml_get_topology_info(void)
1001 {
1002         return &topology_info;
1003 }
1004
1005 void
1006 ml_map_cpu_pio(void)
1007 {
1008         unsigned int i;
1009
1010         for (i = 0; i < topology_info.num_cpus; i++) {
1011                 ml_topology_cpu_t *cpu = &topology_info.cpus[i];
1012                 if (cpu->cpu_IMPL_pa) {
1013                         cpu->cpu_IMPL_regs = (vm_offset_t)ml_io_map(cpu->cpu_IMPL_pa, cpu->cpu_IMPL_len);
1014                         cpu->coresight_regs = (vm_offset_t)ml_io_map(cpu->coresight_pa, cpu->coresight_len);
1015                 }
1016                 if (cpu->cpu_UTTDBG_pa) {
1017                         cpu->cpu_UTTDBG_regs = (vm_offset_t)ml_io_map(cpu->cpu_UTTDBG_pa, cpu->cpu_UTTDBG_len);
1018                 }
1019         }
1020
1021         for (i = 0; i < topology_info.num_clusters; i++) {
1022                 ml_topology_cluster_t *cluster = &topology_info.clusters[i];
1023                 if (cluster->acc_IMPL_pa) {
1024                         cluster->acc_IMPL_regs = (vm_offset_t)ml_io_map(cluster->acc_IMPL_pa, cluster->acc_IMPL_len);
1025                 }
1026                 if (cluster->cpm_IMPL_pa) {
1027                         cluster->cpm_IMPL_regs = (vm_offset_t)ml_io_map(cluster->cpm_IMPL_pa, cluster->cpm_IMPL_len);
1028                 }
1029         }
1030 }
1031
1032 unsigned int
1033 ml_get_cpu_count(void)
1034 {
1035         return topology_info.num_cpus;
1036 }
1037
1038 unsigned int
1039 ml_get_cluster_count(void)
1040 {
1041         return topology_info.num_clusters;
1042 }
1043
1044 int
1045 ml_get_boot_cpu_number(void)
1046 {
1047         return topology_info.boot_cpu->cpu_id;
1048 }
1049
1050 cluster_type_t
1051 ml_get_boot_cluster(void)
1052 {
1053         return topology_info.boot_cluster->cluster_type;
1054 }
1055
1056 int
1057 ml_get_cpu_number(uint32_t phys_id)
1058 {
1059         phys_id &= MPIDR_AFF1_MASK | MPIDR_AFF0_MASK;
1060
1061         for (unsigned i = 0; i < topology_info.num_cpus; i++) {
1062                 if (topology_info.cpus[i].phys_id == phys_id) {
1063                         return i;
1064                 }
1065         }
1066
1067         return -1;
1068 }
1069
1070 int
1071 ml_get_cluster_number(uint32_t phys_id)
1072 {
1073         int cpu_id = ml_get_cpu_number(phys_id);
1074         if (cpu_id < 0) {
1075                 return -1;
1076         }
1077
1078         ml_topology_cpu_t *cpu = &topology_info.cpus[cpu_id];
1079
1080         return cpu->cluster_id;
1081 }
1082
1083 unsigned int
1084 ml_get_cpu_number_local(void)
1085 {
1086         uint64_t mpidr_el1_value = 0;
1087         unsigned cpu_id;
1088
1089         /* We identify the CPU based on the constant bits of MPIDR_EL1. */
1090         MRS(mpidr_el1_value, "MPIDR_EL1");
1091         cpu_id = ml_get_cpu_number((uint32_t)mpidr_el1_value);
1092
1093         assert(cpu_id <= (unsigned int)ml_get_max_cpu_number());
1094
1095         return cpu_id;
1096 }
1097
1098 int
1099 ml_get_cluster_number_local()
1100 {
1101         uint64_t mpidr_el1_value = 0;
1102         unsigned cluster_id;
1103
1104         /* We identify the cluster based on the constant bits of MPIDR_EL1. */
1105         MRS(mpidr_el1_value, "MPIDR_EL1");
1106         cluster_id = ml_get_cluster_number((uint32_t)mpidr_el1_value);
1107
1108         assert(cluster_id <= (unsigned int)ml_get_max_cluster_number());
1109
1110         return cluster_id;
1111 }
1112
1113 int
1114 ml_get_max_cpu_number(void)
1115 {
1116         return topology_info.max_cpu_id;
1117 }
1118
1119 int
1120 ml_get_max_cluster_number(void)
1121 {
1122         return topology_info.max_cluster_id;
1123 }
1124
1125 unsigned int
1126 ml_get_first_cpu_id(unsigned int cluster_id)
1127 {
1128         return topology_info.clusters[cluster_id].first_cpu_id;
1129 }
1130
1131 void
1132 ml_lockdown_init()
1133 {
1134 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
1135         rorgn_stash_range();
1136 #endif
1137 }
1138
1139 kern_return_t
1140 ml_lockdown_handler_register(lockdown_handler_t f, void *this)
1141 {
1142         if (!f) {
1143                 return KERN_FAILURE;
1144         }
1145
1146         assert(lockdown_done);
1147         f(this); // XXX: f this whole function
1148
1149         return KERN_SUCCESS;
1150 }
1151
1152 kern_return_t
1153 ml_processor_register(ml_processor_info_t *in_processor_info,
1154     processor_t *processor_out, ipi_handler_t *ipi_handler_out,
1155     perfmon_interrupt_handler_func *pmi_handler_out)
1156 {
1157         cpu_data_t *this_cpu_datap;
1158         processor_set_t pset;
1159         boolean_t  is_boot_cpu;
1160         static unsigned int reg_cpu_count = 0;
1161
1162         if (in_processor_info->log_id > (uint32_t)ml_get_max_cpu_number()) {
1163                 return KERN_FAILURE;
1164         }
1165
1166         if ((unsigned)OSIncrementAtomic((SInt32*)&reg_cpu_count) >= topology_info.num_cpus) {
1167                 return KERN_FAILURE;
1168         }
1169
1170         if (in_processor_info->log_id != (uint32_t)ml_get_boot_cpu_number()) {
1171                 is_boot_cpu = FALSE;
1172                 this_cpu_datap = cpu_data_alloc(FALSE);
1173                 cpu_data_init(this_cpu_datap);
1174         } else {
1175                 this_cpu_datap = &BootCpuData;
1176                 is_boot_cpu = TRUE;
1177         }
1178
1179         assert(in_processor_info->log_id <= (uint32_t)ml_get_max_cpu_number());
1180
1181         this_cpu_datap->cpu_id = in_processor_info->cpu_id;
1182
1183         this_cpu_datap->cpu_console_buf = console_cpu_alloc(is_boot_cpu);
1184         if (this_cpu_datap->cpu_console_buf == (void *)(NULL)) {
1185                 goto processor_register_error;
1186         }
1187
1188         if (!is_boot_cpu) {
1189                 this_cpu_datap->cpu_number = (unsigned short)(in_processor_info->log_id);
1190
1191                 if (cpu_data_register(this_cpu_datap) != KERN_SUCCESS) {
1192                         goto processor_register_error;
1193                 }
1194         }
1195
1196         this_cpu_datap->cpu_idle_notify = in_processor_info->processor_idle;
1197         this_cpu_datap->cpu_cache_dispatch = (cache_dispatch_t)in_processor_info->platform_cache_dispatch;
1198         nanoseconds_to_absolutetime((uint64_t) in_processor_info->powergate_latency, &this_cpu_datap->cpu_idle_latency);
1199         this_cpu_datap->cpu_reset_assist = kvtophys(in_processor_info->powergate_stub_addr);
1200
1201         this_cpu_datap->idle_timer_notify = in_processor_info->idle_timer;
1202         this_cpu_datap->idle_timer_refcon = in_processor_info->idle_timer_refcon;
1203
1204         this_cpu_datap->platform_error_handler = in_processor_info->platform_error_handler;
1205         this_cpu_datap->cpu_regmap_paddr = in_processor_info->regmap_paddr;
1206         this_cpu_datap->cpu_phys_id = in_processor_info->phys_id;
1207         this_cpu_datap->cpu_l2_access_penalty = in_processor_info->l2_access_penalty;
1208
1209         this_cpu_datap->cpu_cluster_type = in_processor_info->cluster_type;
1210         this_cpu_datap->cpu_cluster_id = in_processor_info->cluster_id;
1211         this_cpu_datap->cpu_l2_id = in_processor_info->l2_cache_id;
1212         this_cpu_datap->cpu_l2_size = in_processor_info->l2_cache_size;
1213         this_cpu_datap->cpu_l3_id = in_processor_info->l3_cache_id;
1214         this_cpu_datap->cpu_l3_size = in_processor_info->l3_cache_size;
1215
1216 #if HAS_CLUSTER
1217         this_cpu_datap->cluster_master = !OSTestAndSet(this_cpu_datap->cpu_cluster_id, &cluster_initialized);
1218 #else /* HAS_CLUSTER */
1219         this_cpu_datap->cluster_master = is_boot_cpu;
1220 #endif /* HAS_CLUSTER */
1221
1222         pset = pset_find(in_processor_info->cluster_id, processor_pset(master_processor));
1223
1224         assert(pset != NULL);
1225         kprintf("%s>cpu_id %p cluster_id %d cpu_number %d is type %d\n", __FUNCTION__, in_processor_info->cpu_id, in_processor_info->cluster_id, this_cpu_datap->cpu_number, in_processor_info->cluster_type);
1226
1227         processor_t processor = PERCPU_GET_RELATIVE(processor, cpu_data, this_cpu_datap);
1228         if (!is_boot_cpu) {
1229                 processor_init(processor, this_cpu_datap->cpu_number, pset);
1230
1231                 if (this_cpu_datap->cpu_l2_access_penalty) {
1232                         /*
1233                          * Cores that have a non-zero L2 access penalty compared
1234                          * to the boot processor should be de-prioritized by the
1235                          * scheduler, so that threads use the cores with better L2
1236                          * preferentially.
1237                          */
1238                         processor_set_primary(processor, master_processor);
1239                 }
1240         }
1241
1242         *processor_out = processor;
1243         *ipi_handler_out = cpu_signal_handler;
1244 #if CPMU_AIC_PMI && MONOTONIC
1245         *pmi_handler_out = mt_cpmu_aic_pmi;
1246 #else
1247         *pmi_handler_out = NULL;
1248 #endif /* CPMU_AIC_PMI && MONOTONIC */
1249         if (in_processor_info->idle_tickle != (idle_tickle_t *) NULL) {
1250                 *in_processor_info->idle_tickle = (idle_tickle_t) cpu_idle_tickle;
1251         }
1252
1253 #if KPC
1254         if (kpc_register_cpu(this_cpu_datap) != TRUE) {
1255                 goto processor_register_error;
1256         }
1257 #endif /* KPC */
1258
1259         if (!is_boot_cpu) {
1260                 random_cpu_init(this_cpu_datap->cpu_number);
1261                 // now let next CPU register itself
1262                 OSIncrementAtomic((SInt32*)&real_ncpus);
1263         }
1264
1265         return KERN_SUCCESS;
1266
1267 processor_register_error:
1268 #if KPC
1269         kpc_unregister_cpu(this_cpu_datap);
1270 #endif /* KPC */
1271         if (!is_boot_cpu) {
1272                 cpu_data_free(this_cpu_datap);
1273         }
1274
1275         return KERN_FAILURE;
1276 }
1277
1278 void
1279 ml_init_arm_debug_interface(
1280         void * in_cpu_datap,
1281         vm_offset_t virt_address)
1282 {
1283         ((cpu_data_t *)in_cpu_datap)->cpu_debug_interface_map = virt_address;
1284         do_debugid();
1285 }
1286
1287 /*
1288  *      Routine:        init_ast_check
1289  *      Function:
1290  */
1291 void
1292 init_ast_check(
1293         __unused processor_t processor)
1294 {
1295 }
1296
1297 /*
1298  *      Routine:        cause_ast_check
1299  *      Function:
1300  */
1301 void
1302 cause_ast_check(
1303         processor_t processor)
1304 {
1305         if (current_processor() != processor) {
1306                 cpu_signal(processor_to_cpu_datap(processor), SIGPast, (void *)NULL, (void *)NULL);
1307                 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), processor->cpu_id, 1 /* ast */, 0, 0, 0);
1308         }
1309 }
1310
1311 extern uint32_t cpu_idle_count;
1312
1313 void
1314 ml_get_power_state(boolean_t *icp, boolean_t *pidlep)
1315 {
1316         *icp = ml_at_interrupt_context();
1317         *pidlep = (cpu_idle_count == real_ncpus);
1318 }
1319
1320 /*
1321  *      Routine:        ml_cause_interrupt
1322  *      Function:       Generate a fake interrupt
1323  */
1324 void
1325 ml_cause_interrupt(void)
1326 {
1327         return;                 /* BS_XXX */
1328 }
1329
1330 /* Map memory map IO space */
1331 vm_offset_t
1332 ml_io_map(
1333         vm_offset_t phys_addr,
1334         vm_size_t size)
1335 {
1336         return io_map(phys_addr, size, VM_WIMG_IO);
1337 }
1338
1339 /* Map memory map IO space (with protections specified) */
1340 vm_offset_t
1341 ml_io_map_with_prot(
1342         vm_offset_t phys_addr,
1343         vm_size_t size,
1344         vm_prot_t prot)
1345 {
1346         return io_map_with_prot(phys_addr, size, VM_WIMG_IO, prot);
1347 }
1348
1349 vm_offset_t
1350 ml_io_map_wcomb(
1351         vm_offset_t phys_addr,
1352         vm_size_t size)
1353 {
1354         return io_map(phys_addr, size, VM_WIMG_WCOMB);
1355 }
1356
1357 void
1358 ml_io_unmap(vm_offset_t addr, vm_size_t sz)
1359 {
1360         pmap_remove(kernel_pmap, addr, addr + sz);
1361         kmem_free(kernel_map, addr, sz);
1362 }
1363
1364 /* boot memory allocation */
1365 vm_offset_t
1366 ml_static_malloc(
1367         __unused vm_size_t size)
1368 {
1369         return (vm_offset_t) NULL;
1370 }
1371
1372 vm_map_address_t
1373 ml_map_high_window(
1374         vm_offset_t     phys_addr,
1375         vm_size_t       len)
1376 {
1377         return pmap_map_high_window_bd(phys_addr, len, VM_PROT_READ | VM_PROT_WRITE);
1378 }
1379
1380 vm_offset_t
1381 ml_static_ptovirt(
1382         vm_offset_t paddr)
1383 {
1384         return phystokv(paddr);
1385 }
1386
1387 vm_offset_t
1388 ml_static_slide(
1389         vm_offset_t vaddr)
1390 {
1391         vm_offset_t slid_vaddr = vaddr + vm_kernel_slide;
1392
1393         if ((slid_vaddr < vm_kernelcache_base) || (slid_vaddr >= vm_kernelcache_top)) {
1394                 /* This is only intended for use on kernelcache addresses. */
1395                 return 0;
1396         }
1397
1398         /*
1399          * Because the address is in the kernelcache, we can do a simple
1400          * slide calculation.
1401          */
1402         return slid_vaddr;
1403 }
1404
1405 vm_offset_t
1406 ml_static_unslide(
1407         vm_offset_t vaddr)
1408 {
1409         if ((vaddr < vm_kernelcache_base) || (vaddr >= vm_kernelcache_top)) {
1410                 /* This is only intended for use on kernelcache addresses. */
1411                 return 0;
1412         }
1413
1414         return vaddr - vm_kernel_slide;
1415 }
1416
1417 extern tt_entry_t *arm_kva_to_tte(vm_offset_t va);
1418
1419 kern_return_t
1420 ml_static_protect(
1421         vm_offset_t vaddr, /* kernel virtual address */
1422         vm_size_t size,
1423         vm_prot_t new_prot)
1424 {
1425         pt_entry_t    arm_prot = 0;
1426         pt_entry_t    arm_block_prot = 0;
1427         vm_offset_t   vaddr_cur;
1428         ppnum_t       ppn;
1429         kern_return_t result = KERN_SUCCESS;
1430
1431         if (vaddr < VM_MIN_KERNEL_ADDRESS) {
1432                 panic("ml_static_protect(): %p < %p", (void *) vaddr, (void *) VM_MIN_KERNEL_ADDRESS);
1433                 return KERN_FAILURE;
1434         }
1435
1436         assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
1437
1438         if ((new_prot & VM_PROT_WRITE) && (new_prot & VM_PROT_EXECUTE)) {
1439                 panic("ml_static_protect(): WX request on %p", (void *) vaddr);
1440         }
1441         if (lockdown_done && (new_prot & VM_PROT_EXECUTE)) {
1442                 panic("ml_static_protect(): attempt to inject executable mapping on %p", (void *) vaddr);
1443         }
1444
1445         /* Set up the protection bits, and block bits so we can validate block mappings. */
1446         if (new_prot & VM_PROT_WRITE) {
1447                 arm_prot |= ARM_PTE_AP(AP_RWNA);
1448                 arm_block_prot |= ARM_TTE_BLOCK_AP(AP_RWNA);
1449         } else {
1450                 arm_prot |= ARM_PTE_AP(AP_RONA);
1451                 arm_block_prot |= ARM_TTE_BLOCK_AP(AP_RONA);
1452         }
1453
1454         arm_prot |= ARM_PTE_NX;
1455         arm_block_prot |= ARM_TTE_BLOCK_NX;
1456
1457         if (!(new_prot & VM_PROT_EXECUTE)) {
1458                 arm_prot |= ARM_PTE_PNX;
1459                 arm_block_prot |= ARM_TTE_BLOCK_PNX;
1460         }
1461
1462         for (vaddr_cur = vaddr;
1463             vaddr_cur < trunc_page_64(vaddr + size);
1464             vaddr_cur += PAGE_SIZE) {
1465                 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
1466                 if (ppn != (vm_offset_t) NULL) {
1467                         tt_entry_t      *tte2;
1468                         pt_entry_t      *pte_p;
1469                         pt_entry_t      ptmp;
1470
1471 #if XNU_MONITOR
1472                         assert(!pmap_is_monitor(ppn));
1473                         assert(!TEST_PAGE_RATIO_4);
1474 #endif
1475
1476                         tte2 = arm_kva_to_tte(vaddr_cur);
1477
1478                         if (((*tte2) & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE) {
1479                                 if ((((*tte2) & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) &&
1480                                     ((*tte2 & (ARM_TTE_BLOCK_NXMASK | ARM_TTE_BLOCK_PNXMASK | ARM_TTE_BLOCK_APMASK)) == arm_block_prot)) {
1481                                         /*
1482                                          * We can support ml_static_protect on a block mapping if the mapping already has
1483                                          * the desired protections.  We still want to run checks on a per-page basis.
1484                                          */
1485                                         continue;
1486                                 }
1487
1488                                 result = KERN_FAILURE;
1489                                 break;
1490                         }
1491
1492                         pte_p = (pt_entry_t *)&((tt_entry_t*)(phystokv((*tte2) & ARM_TTE_TABLE_MASK)))[(((vaddr_cur) & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT)];
1493                         ptmp = *pte_p;
1494
1495                         if ((ptmp & ARM_PTE_HINT_MASK) && ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot)) {
1496                                 /*
1497                                  * The contiguous hint is similar to a block mapping for ml_static_protect; if the existing
1498                                  * protections do not match the desired protections, then we will fail (as we cannot update
1499                                  * this mapping without updating other mappings as well).
1500                                  */
1501                                 result = KERN_FAILURE;
1502                                 break;
1503                         }
1504
1505                         __unreachable_ok_push
1506                         if (TEST_PAGE_RATIO_4) {
1507                                 {
1508                                         unsigned int    i;
1509                                         pt_entry_t      *ptep_iter;
1510
1511                                         ptep_iter = pte_p;
1512                                         for (i = 0; i < 4; i++, ptep_iter++) {
1513                                                 /* Note that there is a hole in the HINT sanity checking here. */
1514                                                 ptmp = *ptep_iter;
1515
1516                                                 /* We only need to update the page tables if the protections do not match. */
1517                                                 if ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot) {
1518                                                         ptmp = (ptmp & ~(ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) | arm_prot;
1519                                                         *ptep_iter = ptmp;
1520                                                 }
1521                                         }
1522                                 }
1523                         } else {
1524                                 ptmp = *pte_p;
1525                                 /* We only need to update the page tables if the protections do not match. */
1526                                 if ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot) {
1527                                         ptmp = (ptmp & ~(ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) | arm_prot;
1528                                         *pte_p = ptmp;
1529                                 }
1530                         }
1531                         __unreachable_ok_pop
1532                 }
1533         }
1534
1535         if (vaddr_cur > vaddr) {
1536                 assert(((vaddr_cur - vaddr) & 0xFFFFFFFF00000000ULL) == 0);
1537                 flush_mmu_tlb_region(vaddr, (uint32_t)(vaddr_cur - vaddr));
1538         }
1539
1540
1541         return result;
1542 }
1543
1544 /*
1545  *      Routine:        ml_static_mfree
1546  *      Function:
1547  */
1548 void
1549 ml_static_mfree(
1550         vm_offset_t vaddr,
1551         vm_size_t size)
1552 {
1553         vm_offset_t     vaddr_cur;
1554         ppnum_t         ppn;
1555         uint32_t freed_pages = 0;
1556         uint32_t freed_kernelcache_pages = 0;
1557
1558         /* It is acceptable (if bad) to fail to free. */
1559         if (vaddr < VM_MIN_KERNEL_ADDRESS) {
1560                 return;
1561         }
1562
1563         assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
1564
1565         for (vaddr_cur = vaddr;
1566             vaddr_cur < trunc_page_64(vaddr + size);
1567             vaddr_cur += PAGE_SIZE) {
1568                 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
1569                 if (ppn != (vm_offset_t) NULL) {
1570                         /*
1571                          * It is not acceptable to fail to update the protections on a page
1572                          * we will release to the VM.  We need to either panic or continue.
1573                          * For now, we'll panic (to help flag if there is memory we can
1574                          * reclaim).
1575                          */
1576                         if (ml_static_protect(vaddr_cur, PAGE_SIZE, VM_PROT_WRITE | VM_PROT_READ) != KERN_SUCCESS) {
1577                                 panic("Failed ml_static_mfree on %p", (void *) vaddr_cur);
1578                         }
1579
1580                         vm_page_create(ppn, (ppn + 1));
1581                         freed_pages++;
1582                         if (vaddr_cur >= segLOWEST && vaddr_cur < end_kern) {
1583                                 freed_kernelcache_pages++;
1584                         }
1585                 }
1586         }
1587         vm_page_lockspin_queues();
1588         vm_page_wire_count -= freed_pages;
1589         vm_page_wire_count_initial -= freed_pages;
1590         vm_page_kernelcache_count -= freed_kernelcache_pages;
1591         vm_page_unlock_queues();
1592 #if     DEBUG
1593         kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn);
1594 #endif
1595 }
1596
1597
1598 /* virtual to physical on wired pages */
1599 vm_offset_t
1600 ml_vtophys(vm_offset_t vaddr)
1601 {
1602         return kvtophys(vaddr);
1603 }
1604
1605 /*
1606  * Routine: ml_nofault_copy
1607  * Function: Perform a physical mode copy if the source and destination have
1608  * valid translations in the kernel pmap. If translations are present, they are
1609  * assumed to be wired; e.g., no attempt is made to guarantee that the
1610  * translations obtained remain valid for the duration of the copy process.
1611  */
1612 vm_size_t
1613 ml_nofault_copy(vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size)
1614 {
1615         addr64_t        cur_phys_dst, cur_phys_src;
1616         vm_size_t       count, nbytes = 0;
1617
1618         while (size > 0) {
1619                 if (!(cur_phys_src = kvtophys(virtsrc))) {
1620                         break;
1621                 }
1622                 if (!(cur_phys_dst = kvtophys(virtdst))) {
1623                         break;
1624                 }
1625                 if (!pmap_valid_address(trunc_page_64(cur_phys_dst)) ||
1626                     !pmap_valid_address(trunc_page_64(cur_phys_src))) {
1627                         break;
1628                 }
1629                 count = PAGE_SIZE - (cur_phys_src & PAGE_MASK);
1630                 if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK))) {
1631                         count = PAGE_SIZE - (cur_phys_dst & PAGE_MASK);
1632                 }
1633                 if (count > size) {
1634                         count = size;
1635                 }
1636
1637                 bcopy_phys(cur_phys_src, cur_phys_dst, count);
1638
1639                 nbytes += count;
1640                 virtsrc += count;
1641                 virtdst += count;
1642                 size -= count;
1643         }
1644
1645         return nbytes;
1646 }
1647
1648 /*
1649  *      Routine:        ml_validate_nofault
1650  *      Function: Validate that ths address range has a valid translations
1651  *                      in the kernel pmap.  If translations are present, they are
1652  *                      assumed to be wired; i.e. no attempt is made to guarantee
1653  *                      that the translation persist after the check.
1654  *  Returns: TRUE if the range is mapped and will not cause a fault,
1655  *                      FALSE otherwise.
1656  */
1657
1658 boolean_t
1659 ml_validate_nofault(
1660         vm_offset_t virtsrc, vm_size_t size)
1661 {
1662         addr64_t cur_phys_src;
1663         uint32_t count;
1664
1665         while (size > 0) {
1666                 if (!(cur_phys_src = kvtophys(virtsrc))) {
1667                         return FALSE;
1668                 }
1669                 if (!pmap_valid_address(trunc_page_64(cur_phys_src))) {
1670                         return FALSE;
1671                 }
1672                 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
1673                 if (count > size) {
1674                         count = (uint32_t)size;
1675                 }
1676
1677                 virtsrc += count;
1678                 size -= count;
1679         }
1680
1681         return TRUE;
1682 }
1683
1684 void
1685 ml_get_bouncepool_info(vm_offset_t * phys_addr, vm_size_t * size)
1686 {
1687         *phys_addr = 0;
1688         *size = 0;
1689 }
1690
1691 void
1692 active_rt_threads(__unused boolean_t active)
1693 {
1694 }
1695
1696 static void
1697 cpu_qos_cb_default(__unused int urgency, __unused uint64_t qos_param1, __unused uint64_t qos_param2)
1698 {
1699         return;
1700 }
1701
1702 cpu_qos_update_t cpu_qos_update = cpu_qos_cb_default;
1703
1704 void
1705 cpu_qos_update_register(cpu_qos_update_t cpu_qos_cb)
1706 {
1707         if (cpu_qos_cb != NULL) {
1708                 cpu_qos_update = cpu_qos_cb;
1709         } else {
1710                 cpu_qos_update = cpu_qos_cb_default;
1711         }
1712 }
1713
1714 void
1715 thread_tell_urgency(thread_urgency_t urgency, uint64_t rt_period, uint64_t rt_deadline, uint64_t sched_latency __unused, __unused thread_t nthread)
1716 {
1717         SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, sched_latency, 0);
1718
1719         cpu_qos_update((int)urgency, rt_period, rt_deadline);
1720
1721         SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0);
1722 }
1723
1724 void
1725 machine_run_count(__unused uint32_t count)
1726 {
1727 }
1728
1729 processor_t
1730 machine_choose_processor(__unused processor_set_t pset, processor_t processor)
1731 {
1732         return processor;
1733 }
1734
1735 #if KASAN
1736 vm_offset_t ml_stack_base(void);
1737 vm_size_t ml_stack_size(void);
1738
1739 vm_offset_t
1740 ml_stack_base(void)
1741 {
1742         uintptr_t local = (uintptr_t) &local;
1743         vm_offset_t     intstack_top_ptr;
1744
1745         intstack_top_ptr = getCpuDatap()->intstack_top;
1746         if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) {
1747                 return intstack_top_ptr - INTSTACK_SIZE;
1748         } else {
1749                 return current_thread()->kernel_stack;
1750         }
1751 }
1752 vm_size_t
1753 ml_stack_size(void)
1754 {
1755         uintptr_t local = (uintptr_t) &local;
1756         vm_offset_t     intstack_top_ptr;
1757
1758         intstack_top_ptr = getCpuDatap()->intstack_top;
1759         if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) {
1760                 return INTSTACK_SIZE;
1761         } else {
1762                 return kernel_stack_size;
1763         }
1764 }
1765 #endif
1766
1767 boolean_t
1768 machine_timeout_suspended(void)
1769 {
1770         return FALSE;
1771 }
1772
1773 kern_return_t
1774 ml_interrupt_prewarm(__unused uint64_t deadline)
1775 {
1776         return KERN_FAILURE;
1777 }
1778
1779 /*
1780  * Assumes fiq, irq disabled.
1781  */
1782 void
1783 ml_set_decrementer(uint32_t dec_value)
1784 {
1785         cpu_data_t      *cdp = getCpuDatap();
1786
1787         assert(ml_get_interrupts_enabled() == FALSE);
1788         cdp->cpu_decrementer = dec_value;
1789
1790         if (cdp->cpu_set_decrementer_func) {
1791                 cdp->cpu_set_decrementer_func(dec_value);
1792         } else {
1793                 __builtin_arm_wsr64("CNTV_TVAL_EL0", (uint64_t)dec_value);
1794         }
1795 }
1796
1797 uint64_t
1798 ml_get_hwclock()
1799 {
1800         uint64_t timebase;
1801
1802         // ISB required by ARMV7C.b section B8.1.2 & ARMv8 section D6.1.2
1803         // "Reads of CNT[PV]CT[_EL0] can occur speculatively and out of order relative
1804         // to other instructions executed on the same processor."
1805         __builtin_arm_isb(ISB_SY);
1806         timebase = __builtin_arm_rsr64("CNTVCT_EL0");
1807
1808         return timebase;
1809 }
1810
1811 uint64_t
1812 ml_get_timebase()
1813 {
1814         return ml_get_hwclock() + getCpuDatap()->cpu_base_timebase;
1815 }
1816
1817 /*
1818  * Get the speculative timebase without an ISB.
1819  */
1820 __attribute__((unused))
1821 static uint64_t
1822 ml_get_speculative_timebase()
1823 {
1824         uint64_t timebase;
1825
1826         timebase = __builtin_arm_rsr64("CNTVCT_EL0");
1827
1828         return timebase + getCpuDatap()->cpu_base_timebase;
1829 }
1830
1831 uint32_t
1832 ml_get_decrementer()
1833 {
1834         cpu_data_t *cdp = getCpuDatap();
1835         uint32_t dec;
1836
1837         assert(ml_get_interrupts_enabled() == FALSE);
1838
1839         if (cdp->cpu_get_decrementer_func) {
1840                 dec = cdp->cpu_get_decrementer_func();
1841         } else {
1842                 uint64_t wide_val;
1843
1844                 wide_val = __builtin_arm_rsr64("CNTV_TVAL_EL0");
1845                 dec = (uint32_t)wide_val;
1846                 assert(wide_val == (uint64_t)dec);
1847         }
1848
1849         return dec;
1850 }
1851
1852 boolean_t
1853 ml_get_timer_pending()
1854 {
1855         uint64_t cntv_ctl = __builtin_arm_rsr64("CNTV_CTL_EL0");
1856         return ((cntv_ctl & CNTV_CTL_EL0_ISTATUS) != 0) ? TRUE : FALSE;
1857 }
1858
1859 static void
1860 cache_trap_error(thread_t thread, vm_map_address_t fault_addr)
1861 {
1862         mach_exception_data_type_t exc_data[2];
1863         arm_saved_state_t *regs = get_user_regs(thread);
1864
1865         set_saved_state_far(regs, fault_addr);
1866
1867         exc_data[0] = KERN_INVALID_ADDRESS;
1868         exc_data[1] = fault_addr;
1869
1870         exception_triage(EXC_BAD_ACCESS, exc_data, 2);
1871 }
1872
1873 static void
1874 cache_trap_recover()
1875 {
1876         vm_map_address_t fault_addr;
1877
1878         __asm__ volatile ("mrs %0, FAR_EL1" : "=r"(fault_addr));
1879
1880         cache_trap_error(current_thread(), fault_addr);
1881 }
1882
1883 static void
1884 set_cache_trap_recover(thread_t thread)
1885 {
1886 #if defined(HAS_APPLE_PAC)
1887         thread->recover = (vm_address_t)ptrauth_auth_and_resign(&cache_trap_recover,
1888             ptrauth_key_function_pointer, 0,
1889             ptrauth_key_function_pointer, ptrauth_blend_discriminator(&thread->recover, PAC_DISCRIMINATOR_RECOVER));
1890 #else /* defined(HAS_APPLE_PAC) */
1891         thread->recover = (vm_address_t)cache_trap_recover;
1892 #endif /* defined(HAS_APPLE_PAC) */
1893 }
1894
1895 static void
1896 dcache_flush_trap(vm_map_address_t start, vm_map_size_t size)
1897 {
1898         vm_map_address_t end = start + size;
1899         thread_t thread = current_thread();
1900         vm_offset_t old_recover = thread->recover;
1901
1902         /* Check bounds */
1903         if (task_has_64Bit_addr(current_task())) {
1904                 if (end > MACH_VM_MAX_ADDRESS) {
1905                         cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1906                 }
1907         } else {
1908                 if (end > VM_MAX_ADDRESS) {
1909                         cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1910                 }
1911         }
1912
1913         if (start > end) {
1914                 cache_trap_error(thread, start & ((1 << ARM64_CLINE_SHIFT) - 1));
1915         }
1916
1917         set_cache_trap_recover(thread);
1918
1919         /*
1920          * We're coherent on Apple ARM64 CPUs, so this could be a nop.  However,
1921          * if the region given us is bad, it would be good to catch it and
1922          * crash, ergo we still do the flush.
1923          */
1924         FlushPoC_DcacheRegion(start, (uint32_t)size);
1925
1926         /* Restore recovery function */
1927         thread->recover = old_recover;
1928
1929         /* Return (caller does exception return) */
1930 }
1931
1932 static void
1933 icache_invalidate_trap(vm_map_address_t start, vm_map_size_t size)
1934 {
1935         vm_map_address_t end = start + size;
1936         thread_t thread = current_thread();
1937         vm_offset_t old_recover = thread->recover;
1938
1939         /* Check bounds */
1940         if (task_has_64Bit_addr(current_task())) {
1941                 if (end > MACH_VM_MAX_ADDRESS) {
1942                         cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1943                 }
1944         } else {
1945                 if (end > VM_MAX_ADDRESS) {
1946                         cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1947                 }
1948         }
1949
1950         if (start > end) {
1951                 cache_trap_error(thread, start & ((1 << ARM64_CLINE_SHIFT) - 1));
1952         }
1953
1954         set_cache_trap_recover(thread);
1955
1956         /* Invalidate iCache to point of unification */
1957         InvalidatePoU_IcacheRegion(start, (uint32_t)size);
1958
1959         /* Restore recovery function */
1960         thread->recover = old_recover;
1961
1962         /* Return (caller does exception return) */
1963 }
1964
1965 __attribute__((noreturn))
1966 void
1967 platform_syscall(arm_saved_state_t *state)
1968 {
1969         uint32_t code;
1970
1971 #define platform_syscall_kprintf(x...) /* kprintf("platform_syscall: " x) */
1972
1973         code = (uint32_t)get_saved_state_reg(state, 3);
1974         switch (code) {
1975         case 0:
1976                 /* I-Cache flush */
1977                 platform_syscall_kprintf("icache flush requested.\n");
1978                 icache_invalidate_trap(get_saved_state_reg(state, 0), get_saved_state_reg(state, 1));
1979                 break;
1980         case 1:
1981                 /* D-Cache flush */
1982                 platform_syscall_kprintf("dcache flush requested.\n");
1983                 dcache_flush_trap(get_saved_state_reg(state, 0), get_saved_state_reg(state, 1));
1984                 break;
1985         case 2:
1986                 /* set cthread */
1987                 platform_syscall_kprintf("set cthread self.\n");
1988                 thread_set_cthread_self(get_saved_state_reg(state, 0));
1989                 break;
1990         case 3:
1991                 /* get cthread */
1992                 platform_syscall_kprintf("get cthread self.\n");
1993                 set_saved_state_reg(state, 0, thread_get_cthread_self());
1994                 break;
1995         default:
1996                 platform_syscall_kprintf("unknown: %d\n", code);
1997                 break;
1998         }
1999
2000         thread_exception_return();
2001 }
2002
2003 static void
2004 _enable_timebase_event_stream(uint32_t bit_index)
2005 {
2006         uint64_t cntkctl; /* One wants to use 32 bits, but "mrs" prefers it this way */
2007
2008         if (bit_index >= 64) {
2009                 panic("%s: invalid bit index (%u)", __FUNCTION__, bit_index);
2010         }
2011
2012         __asm__ volatile ("mrs  %0, CNTKCTL_EL1" : "=r"(cntkctl));
2013
2014         cntkctl |= (bit_index << CNTKCTL_EL1_EVENTI_SHIFT);
2015         cntkctl |= CNTKCTL_EL1_EVNTEN;
2016         cntkctl |= CNTKCTL_EL1_EVENTDIR; /* 1->0; why not? */
2017
2018         /*
2019          * If the SOC supports it (and it isn't broken), enable
2020          * EL0 access to the timebase registers.
2021          */
2022         if (user_timebase_type() != USER_TIMEBASE_NONE) {
2023                 cntkctl |= (CNTKCTL_EL1_PL0PCTEN | CNTKCTL_EL1_PL0VCTEN);
2024         }
2025
2026         __builtin_arm_wsr64("CNTKCTL_EL1", cntkctl);
2027 }
2028
2029 /*
2030  * Turn timer on, unmask that interrupt.
2031  */
2032 static void
2033 _enable_virtual_timer(void)
2034 {
2035         uint64_t cntvctl = CNTV_CTL_EL0_ENABLE; /* One wants to use 32 bits, but "mrs" prefers it this way */
2036
2037         __builtin_arm_wsr64("CNTV_CTL_EL0", cntvctl);
2038         /* disable the physical timer as a precaution, as its registers reset to architecturally unknown values */
2039         __builtin_arm_wsr64("CNTP_CTL_EL0", CNTP_CTL_EL0_IMASKED);
2040 }
2041
2042 void
2043 fiq_context_init(boolean_t enable_fiq __unused)
2044 {
2045         /* Interrupts still disabled. */
2046         assert(ml_get_interrupts_enabled() == FALSE);
2047         _enable_virtual_timer();
2048 }
2049
2050 void
2051 wfe_timeout_init(void)
2052 {
2053         _enable_timebase_event_stream(arm64_eventi);
2054 }
2055
2056 void
2057 wfe_timeout_configure(void)
2058 {
2059         /* Could fill in our own ops here, if we needed them */
2060         uint64_t        ticks_per_sec, ticks_per_event, events_per_sec = 0;
2061         uint32_t        bit_index;
2062
2063         if (PE_parse_boot_argn("wfe_events_sec", &events_per_sec, sizeof(events_per_sec))) {
2064                 if (events_per_sec <= 0) {
2065                         events_per_sec = 1;
2066                 } else if (events_per_sec > USEC_PER_SEC) {
2067                         events_per_sec = USEC_PER_SEC;
2068                 }
2069         } else {
2070 #if defined(ARM_BOARD_WFE_TIMEOUT_NS)
2071                 events_per_sec = NSEC_PER_SEC / ARM_BOARD_WFE_TIMEOUT_NS;
2072 #else /* !defined(ARM_BOARD_WFE_TIMEOUT_NS) */
2073                 /* Default to 1usec (or as close as we can get) */
2074                 events_per_sec = USEC_PER_SEC;
2075 #endif /* !defined(ARM_BOARD_WFE_TIMEOUT_NS) */
2076         }
2077         ticks_per_sec = gPEClockFrequencyInfo.timebase_frequency_hz;
2078         ticks_per_event = ticks_per_sec / events_per_sec;
2079         bit_index = flsll(ticks_per_event) - 1; /* Highest bit set */
2080
2081         /* Round up to power of two */
2082         if ((ticks_per_event & ((1 << bit_index) - 1)) != 0) {
2083                 bit_index++;
2084         }
2085
2086         /*
2087          * The timer can only trigger on rising or falling edge,
2088          * not both; we don't care which we trigger on, but we
2089          * do need to adjust which bit we are interested in to
2090          * account for this.
2091          */
2092         if (bit_index != 0) {
2093                 bit_index--;
2094         }
2095
2096         arm64_eventi = bit_index;
2097         wfe_timeout_init();
2098 }
2099
2100 boolean_t
2101 ml_delay_should_spin(uint64_t interval)
2102 {
2103         cpu_data_t     *cdp = getCpuDatap();
2104
2105         if (cdp->cpu_idle_latency) {
2106                 return (interval < cdp->cpu_idle_latency) ? TRUE : FALSE;
2107         } else {
2108                 /*
2109                  * Early boot, latency is unknown. Err on the side of blocking,
2110                  * which should always be safe, even if slow
2111                  */
2112                 return FALSE;
2113         }
2114 }
2115
2116 boolean_t
2117 ml_thread_is64bit(thread_t thread)
2118 {
2119         return thread_is_64bit_addr(thread);
2120 }
2121
2122 void
2123 ml_delay_on_yield(void)
2124 {
2125 #if DEVELOPMENT || DEBUG
2126         if (yield_delay_us) {
2127                 delay(yield_delay_us);
2128         }
2129 #endif
2130 }
2131
2132 void
2133 ml_timer_evaluate(void)
2134 {
2135 }
2136
2137 boolean_t
2138 ml_timer_forced_evaluation(void)
2139 {
2140         return FALSE;
2141 }
2142
2143 uint64_t
2144 ml_energy_stat(thread_t t)
2145 {
2146         return t->machine.energy_estimate_nj;
2147 }
2148
2149
2150 void
2151 ml_gpu_stat_update(__unused uint64_t gpu_ns_delta)
2152 {
2153         /*
2154          * For now: update the resource coalition stats of the
2155          * current thread's coalition
2156          */
2157         task_coalition_update_gpu_stats(current_task(), gpu_ns_delta);
2158 }
2159
2160 uint64_t
2161 ml_gpu_stat(__unused thread_t t)
2162 {
2163         return 0;
2164 }
2165
2166 #if !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME || HAS_FAST_CNTVCT
2167
2168 static void
2169 timer_state_event(boolean_t switch_to_kernel)
2170 {
2171         thread_t thread = current_thread();
2172         if (!thread->precise_user_kernel_time) {
2173                 return;
2174         }
2175
2176         processor_t pd = current_processor();
2177         uint64_t now = ml_get_speculative_timebase();
2178
2179         timer_stop(pd->current_state, now);
2180         pd->current_state = (switch_to_kernel) ? &pd->system_state : &pd->user_state;
2181         timer_start(pd->current_state, now);
2182
2183         timer_stop(pd->thread_timer, now);
2184         pd->thread_timer = (switch_to_kernel) ? &thread->system_timer : &thread->user_timer;
2185         timer_start(pd->thread_timer, now);
2186 }
2187
2188 void
2189 timer_state_event_user_to_kernel(void)
2190 {
2191         timer_state_event(TRUE);
2192 }
2193
2194 void
2195 timer_state_event_kernel_to_user(void)
2196 {
2197         timer_state_event(FALSE);
2198 }
2199 #endif /* !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME || HAS_FAST_CNTVCT */
2200
2201 /*
2202  * The following are required for parts of the kernel
2203  * that cannot resolve these functions as inlines:
2204  */
2205 extern thread_t current_act(void) __attribute__((const));
2206 thread_t
2207 current_act(void)
2208 {
2209         return current_thread_fast();
2210 }
2211
2212 #undef current_thread
2213 extern thread_t current_thread(void) __attribute__((const));
2214 thread_t
2215 current_thread(void)
2216 {
2217         return current_thread_fast();
2218 }
2219
2220 typedef struct{
2221         ex_cb_t         cb;
2222         void            *refcon;
2223 }
2224 ex_cb_info_t;
2225
2226 ex_cb_info_t ex_cb_info[EXCB_CLASS_MAX];
2227
2228 /*
2229  * Callback registration
2230  * Currently we support only one registered callback per class but
2231  * it should be possible to support more callbacks
2232  */
2233 kern_return_t
2234 ex_cb_register(
2235         ex_cb_class_t   cb_class,
2236         ex_cb_t                 cb,
2237         void                    *refcon)
2238 {
2239         ex_cb_info_t *pInfo = &ex_cb_info[cb_class];
2240
2241         if ((NULL == cb) || (cb_class >= EXCB_CLASS_MAX)) {
2242                 return KERN_INVALID_VALUE;
2243         }
2244
2245         if (NULL == pInfo->cb) {
2246                 pInfo->cb = cb;
2247                 pInfo->refcon = refcon;
2248                 return KERN_SUCCESS;
2249         }
2250         return KERN_FAILURE;
2251 }
2252
2253 /*
2254  * Called internally by platform kernel to invoke the registered callback for class
2255  */
2256 ex_cb_action_t
2257 ex_cb_invoke(
2258         ex_cb_class_t   cb_class,
2259         vm_offset_t             far)
2260 {
2261         ex_cb_info_t *pInfo = &ex_cb_info[cb_class];
2262         ex_cb_state_t state = {far};
2263
2264         if (cb_class >= EXCB_CLASS_MAX) {
2265                 panic("Invalid exception callback class 0x%x\n", cb_class);
2266         }
2267
2268         if (pInfo->cb) {
2269                 return pInfo->cb(cb_class, pInfo->refcon, &state);
2270         }
2271         return EXCB_ACTION_NONE;
2272 }
2273
2274 #if defined(HAS_APPLE_PAC)
2275 static inline bool
2276 cpu_supports_userkeyen()
2277 {
2278 #if   HAS_APCTL_EL1_USERKEYEN
2279         return true;
2280 #else
2281         return false;
2282 #endif
2283 }
2284
2285 /**
2286  * Returns the default JOP key.  Depending on how the CPU diversifies userspace
2287  * JOP keys, this value may reflect either KERNKeyLo or APIAKeyLo.
2288  */
2289 uint64_t
2290 ml_default_jop_pid(void)
2291 {
2292         if (cpu_supports_userkeyen()) {
2293                 return KERNEL_KERNKEY_ID;
2294         } else {
2295                 return KERNEL_JOP_ID;
2296         }
2297 }
2298
2299 void
2300 ml_task_set_disable_user_jop(task_t task, uint8_t disable_user_jop)
2301 {
2302         assert(task);
2303         task->disable_user_jop = disable_user_jop;
2304 }
2305
2306 void
2307 ml_thread_set_disable_user_jop(thread_t thread, uint8_t disable_user_jop)
2308 {
2309         assert(thread);
2310         thread->machine.disable_user_jop = disable_user_jop;
2311 }
2312
2313 void
2314 ml_task_set_rop_pid(task_t task, task_t parent_task, boolean_t inherit)
2315 {
2316         if (inherit) {
2317                 task->rop_pid = parent_task->rop_pid;
2318         } else {
2319                 task->rop_pid = early_random();
2320         }
2321 }
2322
2323 /**
2324  * jop_pid may be inherited from the parent task or generated inside the shared
2325  * region.  Unfortunately these two parameters are available at very different
2326  * times during task creation, so we need to split this into two steps.
2327  */
2328 void
2329 ml_task_set_jop_pid(task_t task, task_t parent_task, boolean_t inherit)
2330 {
2331         if (inherit) {
2332                 task->jop_pid = parent_task->jop_pid;
2333         } else {
2334                 task->jop_pid = ml_default_jop_pid();
2335         }
2336 }
2337
2338 void
2339 ml_task_set_jop_pid_from_shared_region(task_t task)
2340 {
2341         vm_shared_region_t sr = vm_shared_region_get(task);
2342         /*
2343          * If there's no shared region, we can assign the key arbitrarily.  This
2344          * typically happens when Mach-O image activation failed part of the way
2345          * through, and this task is in the middle of dying with SIGKILL anyway.
2346          */
2347         if (__improbable(!sr)) {
2348                 task->jop_pid = early_random();
2349                 return;
2350         }
2351         vm_shared_region_deallocate(sr);
2352
2353         /*
2354          * Similarly we have to worry about jetsam having killed the task and
2355          * already cleared the shared_region_id.
2356          */
2357         task_lock(task);
2358         if (task->shared_region_id != NULL) {
2359                 task->jop_pid = shared_region_find_key(task->shared_region_id);
2360         } else {
2361                 task->jop_pid = early_random();
2362         }
2363         task_unlock(task);
2364 }
2365
2366 void
2367 ml_thread_set_jop_pid(thread_t thread, task_t task)
2368 {
2369         thread->machine.jop_pid = task->jop_pid;
2370 }
2371 #endif /* defined(HAS_APPLE_PAC) */
2372
2373
2374 #if defined(HAS_APPLE_PAC)
2375 #define _ml_auth_ptr_unchecked(_ptr, _suffix, _modifier) \
2376         asm volatile ("aut" #_suffix " %[ptr], %[modifier]" : [ptr] "+r"(_ptr) : [modifier] "r"(_modifier));
2377
2378 /*
2379  * ml_auth_ptr_unchecked: call this instead of ptrauth_auth_data
2380  * instrinsic when you don't want to trap on auth fail.
2381  *
2382  */
2383 void *
2384 ml_auth_ptr_unchecked(void *ptr, ptrauth_key key, uint64_t modifier)
2385 {
2386         switch (key & 0x3) {
2387         case ptrauth_key_asia:
2388                 _ml_auth_ptr_unchecked(ptr, ia, modifier);
2389                 break;
2390         case ptrauth_key_asib:
2391                 _ml_auth_ptr_unchecked(ptr, ib, modifier);
2392                 break;
2393         case ptrauth_key_asda:
2394                 _ml_auth_ptr_unchecked(ptr, da, modifier);
2395                 break;
2396         case ptrauth_key_asdb:
2397                 _ml_auth_ptr_unchecked(ptr, db, modifier);
2398                 break;
2399         }
2400
2401         return ptr;
2402 }
2403 #endif /* defined(HAS_APPLE_PAC) */
2404
2405 #ifdef CONFIG_XNUPOST
2406 void
2407 ml_expect_fault_begin(expected_fault_handler_t expected_fault_handler, uintptr_t expected_fault_addr)
2408 {
2409         thread_t thread = current_thread();
2410         thread->machine.expected_fault_handler = expected_fault_handler;
2411         thread->machine.expected_fault_addr = expected_fault_addr;
2412 }
2413
2414 void
2415 ml_expect_fault_end(void)
2416 {
2417         thread_t thread = current_thread();
2418         thread->machine.expected_fault_handler = NULL;
2419         thread->machine.expected_fault_addr = 0;
2420 }
2421 #endif /* CONFIG_XNUPOST */
2422
2423 void
2424 ml_hibernate_active_pre(void)
2425 {
2426 #if HIBERNATION
2427         if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) {
2428                 /* validate rorgn hmac */
2429                 ppl_hmac_compute_rorgn_hmac();
2430
2431                 hibernate_rebuild_vm_structs();
2432         }
2433 #endif /* HIBERNATION */
2434 }
2435
2436 void
2437 ml_hibernate_active_post(void)
2438 {
2439 #if HIBERNATION
2440         if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) {
2441                 hibernate_machine_init();
2442                 hibernate_vm_lock_end();
2443                 current_cpu_datap()->cpu_hibernate = 0;
2444         }
2445 #endif /* HIBERNATION */
2446 }
2447
2448 /**
2449  * Return back a machine-dependent array of address space regions that should be
2450  * reserved by the VM (pre-mapped in the address space). This will prevent user
2451  * processes from allocating or deallocating from within these regions.
2452  *
2453  * @param vm_is64bit True if the process has a 64-bit address space.
2454  * @param regions An out parameter representing an array of regions to reserve.
2455  *
2456  * @return The number of reserved regions returned through `regions`.
2457  */
2458 size_t
2459 ml_get_vm_reserved_regions(bool vm_is64bit, struct vm_reserved_region **regions)
2460 {
2461         assert(regions != NULL);
2462
2463         /**
2464          * Reserved regions only apply to 64-bit address spaces. This is because
2465          * we only expect to grow the maximum user VA address on 64-bit address spaces
2466          * (we've essentially already reached the max for 32-bit spaces). The reserved
2467          * regions should safely fall outside of the max user VA for 32-bit processes.
2468          */
2469         if (vm_is64bit) {
2470                 *regions = vm_reserved_regions;
2471                 return ARRAY_COUNT(vm_reserved_regions);
2472         } else {
2473                 /* Don't reserve any VA regions on arm64_32 processes. */
2474                 *regions = NULL;
2475                 return 0;
2476         }
2477 }
2478 /* These WFE recommendations are expected to be updated on a relatively
2479  * infrequent cadence, possibly from a different cluster, hence
2480  * false cacheline sharing isn't expected to be material
2481  */
2482 static uint64_t arm64_cluster_wfe_recs[MAX_CPU_CLUSTERS];
2483
2484 uint32_t
2485 ml_update_cluster_wfe_recommendation(uint32_t wfe_cluster_id, uint64_t wfe_timeout_abstime_interval, __unused uint64_t wfe_hint_flags)
2486 {
2487         assert(wfe_cluster_id < MAX_CPU_CLUSTERS);
2488         assert(wfe_timeout_abstime_interval <= ml_wfe_hint_max_interval);
2489         os_atomic_store(&arm64_cluster_wfe_recs[wfe_cluster_id], wfe_timeout_abstime_interval, relaxed);
2490         return 0; /* Success */
2491 }
2492
2493 uint64_t
2494 ml_cluster_wfe_timeout(uint32_t wfe_cluster_id)
2495 {
2496         /* This and its consumer does not synchronize vis-a-vis updates
2497          * of the recommendation; races are acceptable.
2498          */
2499         uint64_t wfet = os_atomic_load(&arm64_cluster_wfe_recs[wfe_cluster_id], relaxed);
2500         return wfet;
2501 }