osfmk/arm64/machine_routines.c

   1 /*
   2  * Copyright (c) 2007-2017 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <arm64/proc_reg.h>
  30 #include <arm/machine_cpu.h>
  31 #include <arm/cpu_internal.h>
  32 #include <arm/cpuid.h>
  33 #include <arm/io_map_entries.h>
  34 #include <arm/cpu_data.h>
  35 #include <arm/cpu_data_internal.h>
  36 #include <arm/caches_internal.h>
  37 #include <arm/misc_protos.h>
  38 #include <arm/machdep_call.h>
  39 #include <arm/machine_routines.h>
  40 #include <arm/rtclock.h>
  41 #include <arm/cpuid_internal.h>
  42 #include <arm/cpu_capabilities.h>
  43 #include <console/serial_protos.h>
  44 #include <kern/machine.h>
  45 #include <kern/misc_protos.h>
  46 #include <prng/random.h>
  47 #include <kern/startup.h>
  48 #include <kern/thread.h>
  49 #include <kern/timer_queue.h>
  50 #include <mach/machine.h>
  51 #include <machine/atomic.h>
  52 #include <machine/config.h>
  53 #include <vm/pmap.h>
  54 #include <vm/vm_page.h>
  55 #include <vm/vm_shared_region.h>
  56 #include <vm/vm_map.h>
  57 #include <sys/codesign.h>
  58 #include <sys/kdebug.h>
  59 #include <kern/coalition.h>
  60 #include <pexpert/device_tree.h>
  61
  62 #include <IOKit/IOPlatformExpert.h>
  63 #if HIBERNATION
  64 #include <IOKit/IOHibernatePrivate.h>
  65 #endif /* HIBERNATION */
  66
  67 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
  68 #include <arm64/amcc_rorgn.h>
  69 #endif
  70
  71 #include <libkern/section_keywords.h>
  72
  73 /**
  74  * On supported hardware, debuggable builds make the HID bits read-only
  75  * without locking them.  This lets people manually modify HID bits while
  76  * debugging, since they can use a debugging tool to first reset the HID
  77  * bits back to read/write.  However it will still catch xnu changes that
  78  * accidentally write to HID bits after they've been made read-only.
  79  */
  80 #if HAS_TWO_STAGE_SPR_LOCK && !(DEVELOPMENT || DEBUG)
  81 #define USE_TWO_STAGE_SPR_LOCK
  82 #endif
  83
  84 #if KPC
  85 #include <kern/kpc.h>
  86 #endif
  87
  88 #define MPIDR_CPU_ID(mpidr_el1_val)             (((mpidr_el1_val) & MPIDR_AFF0_MASK) >> MPIDR_AFF0_SHIFT)
  89 #define MPIDR_CLUSTER_ID(mpidr_el1_val)         (((mpidr_el1_val) & MPIDR_AFF1_MASK) >> MPIDR_AFF1_SHIFT)
  90
  91 #if HAS_CLUSTER
  92 static uint8_t cluster_initialized = 0;
  93 #endif
  94
  95 uint32_t LockTimeOut;
  96 uint32_t LockTimeOutUsec;
  97 uint64_t TLockTimeOut;
  98 uint64_t MutexSpin;
  99 uint64_t low_MutexSpin;
 100 int64_t high_MutexSpin;
 101
 102 static uint64_t ml_wfe_hint_max_interval;
 103 #define MAX_WFE_HINT_INTERVAL_US (500ULL)
 104
 105 /* Must be less than cpu_idle_latency to ensure ml_delay_should_spin is true */
 106 TUNABLE(uint32_t, yield_delay_us, "yield_delay_us", 0);
 107
 108 extern vm_offset_t   segLOWEST;
 109 extern vm_offset_t   segLOWESTTEXT;
 110 extern vm_offset_t   segLASTB;
 111 extern unsigned long segSizeLAST;
 112
 113 /* ARM64 specific bounds; used to test for presence in the kernelcache. */
 114 extern vm_offset_t   vm_kernelcache_base;
 115 extern vm_offset_t   vm_kernelcache_top;
 116
 117 #if defined(HAS_IPI)
 118 unsigned int gFastIPI = 1;
 119 #define kDeferredIPITimerDefault (64 * NSEC_PER_USEC) /* in nanoseconds */
 120 static TUNABLE_WRITEABLE(uint64_t, deferred_ipi_timer_ns, "fastipitimeout",
 121     kDeferredIPITimerDefault);
 122 #endif /* defined(HAS_IPI) */
 123
 124 thread_t Idle_context(void);
 125
 126 SECURITY_READ_ONLY_LATE(static ml_topology_cpu_t) topology_cpu_array[MAX_CPUS];
 127 SECURITY_READ_ONLY_LATE(static ml_topology_cluster_t) topology_cluster_array[MAX_CPU_CLUSTERS];
 128 SECURITY_READ_ONLY_LATE(static ml_topology_info_t) topology_info = {
 129         .version = CPU_TOPOLOGY_VERSION,
 130         .cpus = topology_cpu_array,
 131         .clusters = topology_cluster_array,
 132 };
 133 /**
 134  * Represents the offset of each cluster within a hypothetical array of MAX_CPUS
 135  * entries of an arbitrary data type.  This is intended for use by specialized consumers
 136  * that must quickly access per-CPU data using only the physical CPU ID (MPIDR_EL1),
 137  * as follows:
 138  *      hypothetical_array[cluster_offsets[AFF1] + AFF0]
 139  * Most consumers should instead use general-purpose facilities such as PERCPU or
 140  * ml_get_cpu_number().
 141  */
 142 SECURITY_READ_ONLY_LATE(int64_t) cluster_offsets[MAX_CPU_CLUSTER_PHY_ID + 1];
 143
 144 SECURITY_READ_ONLY_LATE(static uint32_t) arm64_eventi = UINT32_MAX;
 145
 146 extern uint32_t lockdown_done;
 147
 148 /**
 149  * Represents regions of virtual address space that should be reserved
 150  * (pre-mapped) in each user address space.
 151  */
 152 SECURITY_READ_ONLY_LATE(static struct vm_reserved_region) vm_reserved_regions[] = {
 153         {
 154                 .vmrr_name = "GPU Carveout",
 155                 .vmrr_addr = MACH_VM_MIN_GPU_CARVEOUT_ADDRESS,
 156                 .vmrr_size = (vm_map_size_t)(MACH_VM_MAX_GPU_CARVEOUT_ADDRESS - MACH_VM_MIN_GPU_CARVEOUT_ADDRESS)
 157         },
 158         /*
 159          * Reserve the virtual memory space representing the commpage nesting region
 160          * to prevent user processes from allocating memory within it. The actual
 161          * page table entries for the commpage are inserted by vm_commpage_enter().
 162          * This vm_map_enter() just prevents userspace from allocating/deallocating
 163          * anything within the entire commpage nested region.
 164          */
 165         {
 166                 .vmrr_name = "commpage nesting",
 167                 .vmrr_addr = _COMM_PAGE64_NESTING_START,
 168                 .vmrr_size = _COMM_PAGE64_NESTING_SIZE
 169         }
 170 };
 171
 172 uint32_t get_arm_cpu_version(void);
 173
 174 #if defined(HAS_IPI)
 175 static inline void
 176 ml_cpu_signal_type(unsigned int cpu_mpidr, uint32_t type)
 177 {
 178 #if HAS_CLUSTER
 179         uint64_t local_mpidr;
 180         /* NOTE: this logic expects that we are called in a non-preemptible
 181          * context, or at least one in which the calling thread is bound
 182          * to a single CPU.  Otherwise we may migrate between choosing which
 183          * IPI mechanism to use and issuing the IPI. */
 184         MRS(local_mpidr, "MPIDR_EL1");
 185         if (MPIDR_CLUSTER_ID(local_mpidr) == MPIDR_CLUSTER_ID(cpu_mpidr)) {
 186                 uint64_t x = type | MPIDR_CPU_ID(cpu_mpidr);
 187                 MSR(ARM64_REG_IPI_RR_LOCAL, x);
 188         } else {
 189                 #define IPI_RR_TARGET_CLUSTER_SHIFT 16
 190                 uint64_t x = type | (MPIDR_CLUSTER_ID(cpu_mpidr) << IPI_RR_TARGET_CLUSTER_SHIFT) | MPIDR_CPU_ID(cpu_mpidr);
 191                 MSR(ARM64_REG_IPI_RR_GLOBAL, x);
 192         }
 193 #else
 194         uint64_t x = type | MPIDR_CPU_ID(cpu_mpidr);
 195         MSR(ARM64_REG_IPI_RR, x);
 196 #endif
 197 }
 198 #endif
 199
 200 #if !defined(HAS_IPI)
 201 __dead2
 202 #endif
 203 void
 204 ml_cpu_signal(unsigned int cpu_mpidr __unused)
 205 {
 206 #if defined(HAS_IPI)
 207         ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_IMMEDIATE);
 208 #else
 209         panic("Platform does not support ACC Fast IPI");
 210 #endif
 211 }
 212
 213 #if !defined(HAS_IPI)
 214 __dead2
 215 #endif
 216 void
 217 ml_cpu_signal_deferred_adjust_timer(uint64_t nanosecs)
 218 {
 219 #if defined(HAS_IPI)
 220         /* adjust IPI_CR timer countdown value for deferred IPI
 221          * accepts input in nanosecs, convert to absolutetime (REFCLK ticks),
 222          * clamp maximum REFCLK ticks to 0xFFFF (16 bit field)
 223          *
 224          * global register, should only require a single write to update all
 225          * CPU cores: from Skye ACC user spec section 5.7.3.3
 226          *
 227          * IPICR is a global register but there are two copies in ACC: one at pBLK and one at eBLK.
 228          * IPICR write SPR token also traverses both pCPM and eCPM rings and updates both copies.
 229          */
 230         uint64_t abstime;
 231
 232         nanoseconds_to_absolutetime(nanosecs, &abstime);
 233
 234         abstime = MIN(abstime, 0xFFFF);
 235
 236         /* update deferred_ipi_timer_ns with the new clamped value */
 237         absolutetime_to_nanoseconds(abstime, &deferred_ipi_timer_ns);
 238
 239         MSR(ARM64_REG_IPI_CR, abstime);
 240 #else
 241         (void)nanosecs;
 242         panic("Platform does not support ACC Fast IPI");
 243 #endif
 244 }
 245
 246 uint64_t
 247 ml_cpu_signal_deferred_get_timer()
 248 {
 249 #if defined(HAS_IPI)
 250         return deferred_ipi_timer_ns;
 251 #else
 252         return 0;
 253 #endif
 254 }
 255
 256 #if !defined(HAS_IPI)
 257 __dead2
 258 #endif
 259 void
 260 ml_cpu_signal_deferred(unsigned int cpu_mpidr __unused)
 261 {
 262 #if defined(HAS_IPI)
 263         ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_DEFERRED);
 264 #else
 265         panic("Platform does not support ACC Fast IPI deferral");
 266 #endif
 267 }
 268
 269 #if !defined(HAS_IPI)
 270 __dead2
 271 #endif
 272 void
 273 ml_cpu_signal_retract(unsigned int cpu_mpidr __unused)
 274 {
 275 #if defined(HAS_IPI)
 276         ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_RETRACT);
 277 #else
 278         panic("Platform does not support ACC Fast IPI retraction");
 279 #endif
 280 }
 281
 282 void
 283 machine_idle(void)
 284 {
 285         /* Interrupts are expected to be masked on entry or re-entry via
 286          * Idle_load_context()
 287          */
 288         assert((__builtin_arm_rsr("DAIF") & DAIF_IRQF) == DAIF_IRQF);
 289         Idle_context();
 290         __builtin_arm_wsr("DAIFClr", (DAIFSC_IRQF | DAIFSC_FIQF));
 291 }
 292
 293 void
 294 OSSynchronizeIO(void)
 295 {
 296         __builtin_arm_dsb(DSB_SY);
 297 }
 298
 299 uint64_t
 300 get_aux_control(void)
 301 {
 302         uint64_t        value;
 303
 304         MRS(value, "ACTLR_EL1");
 305         return value;
 306 }
 307
 308 uint64_t
 309 get_mmu_control(void)
 310 {
 311         uint64_t        value;
 312
 313         MRS(value, "SCTLR_EL1");
 314         return value;
 315 }
 316
 317 uint64_t
 318 get_tcr(void)
 319 {
 320         uint64_t        value;
 321
 322         MRS(value, "TCR_EL1");
 323         return value;
 324 }
 325
 326 boolean_t
 327 ml_get_interrupts_enabled(void)
 328 {
 329         uint64_t        value;
 330
 331         MRS(value, "DAIF");
 332         if (value & DAIF_IRQF) {
 333                 return FALSE;
 334         }
 335         return TRUE;
 336 }
 337
 338 pmap_paddr_t
 339 get_mmu_ttb(void)
 340 {
 341         pmap_paddr_t    value;
 342
 343         MRS(value, "TTBR0_EL1");
 344         return value;
 345 }
 346
 347 uint32_t
 348 get_arm_cpu_version(void)
 349 {
 350         uint32_t value = machine_read_midr();
 351
 352         /* Compose the register values into 8 bits; variant[7:4], revision[3:0]. */
 353         return ((value & MIDR_EL1_REV_MASK) >> MIDR_EL1_REV_SHIFT) | ((value & MIDR_EL1_VAR_MASK) >> (MIDR_EL1_VAR_SHIFT - 4));
 354 }
 355
 356 bool
 357 ml_feature_supported(uint32_t feature_bit)
 358 {
 359         uint64_t aidr_el1_value = 0;
 360
 361         MRS(aidr_el1_value, "AIDR_EL1");
 362
 363
 364         return aidr_el1_value & feature_bit;
 365 }
 366
 367 /*
 368  * user_cont_hwclock_allowed()
 369  *
 370  * Indicates whether we allow EL0 to read the virtual timebase (CNTVCT_EL0)
 371  * as a continuous time source (e.g. from mach_continuous_time)
 372  */
 373 boolean_t
 374 user_cont_hwclock_allowed(void)
 375 {
 376 #if HAS_CONTINUOUS_HWCLOCK
 377         return TRUE;
 378 #else
 379         return FALSE;
 380 #endif
 381 }
 382
 383
 384 uint8_t
 385 user_timebase_type(void)
 386 {
 387         return USER_TIMEBASE_SPEC;
 388 }
 389
 390 void
 391 machine_startup(__unused boot_args * args)
 392 {
 393 #if defined(HAS_IPI) && (DEVELOPMENT || DEBUG)
 394         if (!PE_parse_boot_argn("fastipi", &gFastIPI, sizeof(gFastIPI))) {
 395                 gFastIPI = 1;
 396         }
 397 #endif /* defined(HAS_IPI) && (DEVELOPMENT || DEBUG)*/
 398
 399         machine_conf();
 400
 401         /*
 402          * Kick off the kernel bootstrap.
 403          */
 404         kernel_bootstrap();
 405         /* NOTREACHED */
 406 }
 407
 408 typedef void (*invalidate_fn_t)(void);
 409
 410 static SECURITY_READ_ONLY_LATE(invalidate_fn_t) invalidate_hmac_function = NULL;
 411
 412 void set_invalidate_hmac_function(invalidate_fn_t fn);
 413
 414 void
 415 set_invalidate_hmac_function(invalidate_fn_t fn)
 416 {
 417         if (NULL != invalidate_hmac_function) {
 418                 panic("Invalidate HMAC function already set");
 419         }
 420
 421         invalidate_hmac_function = fn;
 422 }
 423
 424 void
 425 machine_lockdown(void)
 426 {
 427         arm_vm_prot_finalize(PE_state.bootArgs);
 428
 429 #if CONFIG_KERNEL_INTEGRITY
 430 #if KERNEL_INTEGRITY_WT
 431         /* Watchtower
 432          *
 433          * Notify the monitor about the completion of early kernel bootstrap.
 434          * From this point forward it will enforce the integrity of kernel text,
 435          * rodata and page tables.
 436          */
 437
 438 #ifdef MONITOR
 439         monitor_call(MONITOR_LOCKDOWN, 0, 0, 0);
 440 #endif
 441 #endif /* KERNEL_INTEGRITY_WT */
 442
 443 #if XNU_MONITOR
 444         pmap_lockdown_ppl();
 445 #endif
 446
 447 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
 448         /* KTRR
 449          *
 450          * Lock physical KTRR region. KTRR region is read-only. Memory outside
 451          * the region is not executable at EL1.
 452          */
 453
 454         rorgn_lockdown();
 455 #endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
 456
 457
 458 #endif /* CONFIG_KERNEL_INTEGRITY */
 459
 460
 461         if (NULL != invalidate_hmac_function) {
 462                 invalidate_hmac_function();
 463         }
 464
 465         lockdown_done = 1;
 466 }
 467
 468
 469 char           *
 470 machine_boot_info(
 471         __unused char *buf,
 472         __unused vm_size_t size)
 473 {
 474         return PE_boot_args();
 475 }
 476
 477 void
 478 slave_machine_init(__unused void *param)
 479 {
 480         cpu_machine_init();     /* Initialize the processor */
 481         clock_init();           /* Init the clock */
 482 }
 483
 484 /*
 485  *      Routine:        machine_processor_shutdown
 486  *      Function:
 487  */
 488 thread_t
 489 machine_processor_shutdown(
 490         __unused thread_t thread,
 491         void (*doshutdown)(processor_t),
 492         processor_t processor)
 493 {
 494         return Shutdown_context(doshutdown, processor);
 495 }
 496
 497 /*
 498  *      Routine:        ml_init_lock_timeout
 499  *      Function:
 500  */
 501 void
 502 ml_init_lock_timeout(void)
 503 {
 504         uint64_t        abstime;
 505         uint64_t        mtxspin;
 506         uint64_t        default_timeout_ns = NSEC_PER_SEC >> 2;
 507         uint32_t        slto;
 508
 509         if (PE_parse_boot_argn("slto_us", &slto, sizeof(slto))) {
 510                 default_timeout_ns = slto * NSEC_PER_USEC;
 511         }
 512
 513         nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
 514         LockTimeOutUsec = (uint32_t) (default_timeout_ns / NSEC_PER_USEC);
 515         LockTimeOut = (uint32_t)abstime;
 516
 517         if (PE_parse_boot_argn("tlto_us", &slto, sizeof(slto))) {
 518                 nanoseconds_to_absolutetime(slto * NSEC_PER_USEC, &abstime);
 519                 TLockTimeOut = abstime;
 520         } else {
 521                 TLockTimeOut = LockTimeOut >> 1;
 522         }
 523
 524         if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof(mtxspin))) {
 525                 if (mtxspin > USEC_PER_SEC >> 4) {
 526                         mtxspin =  USEC_PER_SEC >> 4;
 527                 }
 528                 nanoseconds_to_absolutetime(mtxspin * NSEC_PER_USEC, &abstime);
 529         } else {
 530                 nanoseconds_to_absolutetime(10 * NSEC_PER_USEC, &abstime);
 531         }
 532         MutexSpin = abstime;
 533         low_MutexSpin = MutexSpin;
 534         /*
 535          * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
 536          * real_ncpus is not set at this time
 537          *
 538          * NOTE: active spinning is disabled in arm. It can be activated
 539          * by setting high_MutexSpin through the sysctl.
 540          */
 541         high_MutexSpin = low_MutexSpin;
 542
 543         nanoseconds_to_absolutetime(MAX_WFE_HINT_INTERVAL_US * NSEC_PER_USEC, &ml_wfe_hint_max_interval);
 544 }
 545
 546 /*
 547  * This is called from the machine-independent routine cpu_up()
 548  * to perform machine-dependent info updates.
 549  */
 550 void
 551 ml_cpu_up(void)
 552 {
 553         os_atomic_inc(&machine_info.physical_cpu, relaxed);
 554         os_atomic_inc(&machine_info.logical_cpu, relaxed);
 555 }
 556
 557 /*
 558  * This is called from the machine-independent routine cpu_down()
 559  * to perform machine-dependent info updates.
 560  */
 561 void
 562 ml_cpu_down(void)
 563 {
 564         cpu_data_t      *cpu_data_ptr;
 565
 566         os_atomic_dec(&machine_info.physical_cpu, relaxed);
 567         os_atomic_dec(&machine_info.logical_cpu, relaxed);
 568
 569         /*
 570          * If we want to deal with outstanding IPIs, we need to
 571          * do relatively early in the processor_doshutdown path,
 572          * as we pend decrementer interrupts using the IPI
 573          * mechanism if we cannot immediately service them (if
 574          * IRQ is masked).  Do so now.
 575          *
 576          * We aren't on the interrupt stack here; would it make
 577          * more sense to disable signaling and then enable
 578          * interrupts?  It might be a bit cleaner.
 579          */
 580         cpu_data_ptr = getCpuDatap();
 581         cpu_data_ptr->cpu_running = FALSE;
 582
 583         if (cpu_data_ptr != &BootCpuData) {
 584                 /*
 585                  * Move all of this cpu's timers to the master/boot cpu,
 586                  * and poke it in case there's a sooner deadline for it to schedule.
 587                  */
 588                 timer_queue_shutdown(&cpu_data_ptr->rtclock_timer.queue);
 589                 cpu_xcall(BootCpuData.cpu_number, &timer_queue_expire_local, NULL);
 590         }
 591
 592         cpu_signal_handler_internal(TRUE);
 593 }
 594
 595 /*
 596  *      Routine:        ml_cpu_get_info
 597  *      Function:
 598  */
 599 void
 600 ml_cpu_get_info(ml_cpu_info_t * ml_cpu_info)
 601 {
 602         cache_info_t   *cpuid_cache_info;
 603
 604         cpuid_cache_info = cache_info();
 605         ml_cpu_info->vector_unit = 0;
 606         ml_cpu_info->cache_line_size = cpuid_cache_info->c_linesz;
 607         ml_cpu_info->l1_icache_size = cpuid_cache_info->c_isize;
 608         ml_cpu_info->l1_dcache_size = cpuid_cache_info->c_dsize;
 609
 610 #if (__ARM_ARCH__ >= 7)
 611         ml_cpu_info->l2_settings = 1;
 612         ml_cpu_info->l2_cache_size = cpuid_cache_info->c_l2size;
 613 #else
 614         ml_cpu_info->l2_settings = 0;
 615         ml_cpu_info->l2_cache_size = 0xFFFFFFFF;
 616 #endif
 617         ml_cpu_info->l3_settings = 0;
 618         ml_cpu_info->l3_cache_size = 0xFFFFFFFF;
 619 }
 620
 621 unsigned int
 622 ml_get_machine_mem(void)
 623 {
 624         return machine_info.memory_size;
 625 }
 626
 627 __attribute__((noreturn))
 628 void
 629 halt_all_cpus(boolean_t reboot)
 630 {
 631         if (reboot) {
 632                 printf("MACH Reboot\n");
 633                 PEHaltRestart(kPERestartCPU);
 634         } else {
 635                 printf("CPU halted\n");
 636                 PEHaltRestart(kPEHaltCPU);
 637         }
 638         while (1) {
 639                 ;
 640         }
 641 }
 642
 643 __attribute__((noreturn))
 644 void
 645 halt_cpu(void)
 646 {
 647         halt_all_cpus(FALSE);
 648 }
 649
 650 /*
 651  *      Routine:        machine_signal_idle
 652  *      Function:
 653  */
 654 void
 655 machine_signal_idle(
 656         processor_t processor)
 657 {
 658         cpu_signal(processor_to_cpu_datap(processor), SIGPnop, (void *)NULL, (void *)NULL);
 659         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
 660 }
 661
 662 void
 663 machine_signal_idle_deferred(
 664         processor_t processor)
 665 {
 666         cpu_signal_deferred(processor_to_cpu_datap(processor));
 667         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_DEFERRED_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
 668 }
 669
 670 void
 671 machine_signal_idle_cancel(
 672         processor_t processor)
 673 {
 674         cpu_signal_cancel(processor_to_cpu_datap(processor));
 675         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_CANCEL_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
 676 }
 677
 678 /*
 679  *      Routine:        ml_install_interrupt_handler
 680  *      Function:       Initialize Interrupt Handler
 681  */
 682 void
 683 ml_install_interrupt_handler(
 684         void *nub,
 685         int source,
 686         void *target,
 687         IOInterruptHandler handler,
 688         void *refCon)
 689 {
 690         cpu_data_t     *cpu_data_ptr;
 691         boolean_t       current_state;
 692
 693         current_state = ml_set_interrupts_enabled(FALSE);
 694         cpu_data_ptr = getCpuDatap();
 695
 696         cpu_data_ptr->interrupt_nub = nub;
 697         cpu_data_ptr->interrupt_source = source;
 698         cpu_data_ptr->interrupt_target = target;
 699         cpu_data_ptr->interrupt_handler = handler;
 700         cpu_data_ptr->interrupt_refCon = refCon;
 701
 702         (void) ml_set_interrupts_enabled(current_state);
 703 }
 704
 705 /*
 706  *      Routine:        ml_init_interrupt
 707  *      Function:       Initialize Interrupts
 708  */
 709 void
 710 ml_init_interrupt(void)
 711 {
 712 #if defined(HAS_IPI)
 713         /*
 714          * ml_init_interrupt will get called once for each CPU, but this is redundant
 715          * because there is only one global copy of the register for skye. do it only
 716          * on the bootstrap cpu
 717          */
 718         if (getCpuDatap()->cluster_master) {
 719                 ml_cpu_signal_deferred_adjust_timer(deferred_ipi_timer_ns);
 720         }
 721 #endif
 722 }
 723
 724 /*
 725  *      Routine:        ml_init_timebase
 726  *      Function:       register and setup Timebase, Decremeter services
 727  */
 728 void
 729 ml_init_timebase(
 730         void            *args,
 731         tbd_ops_t       tbd_funcs,
 732         vm_offset_t     int_address,
 733         vm_offset_t     int_value __unused)
 734 {
 735         cpu_data_t     *cpu_data_ptr;
 736
 737         cpu_data_ptr = (cpu_data_t *)args;
 738
 739         if ((cpu_data_ptr == &BootCpuData)
 740             && (rtclock_timebase_func.tbd_fiq_handler == (void *)NULL)) {
 741                 rtclock_timebase_func = *tbd_funcs;
 742                 rtclock_timebase_addr = int_address;
 743         }
 744 }
 745
 746 #define ML_READPROP_MANDATORY UINT64_MAX
 747
 748 static uint64_t
 749 ml_readprop(const DTEntry entry, const char *propertyName, uint64_t default_value)
 750 {
 751         void const *prop;
 752         unsigned int propSize;
 753
 754         if (SecureDTGetProperty(entry, propertyName, &prop, &propSize) == kSuccess) {
 755                 if (propSize == sizeof(uint8_t)) {
 756                         return *((uint8_t const *)prop);
 757                 } else if (propSize == sizeof(uint16_t)) {
 758                         return *((uint16_t const *)prop);
 759                 } else if (propSize == sizeof(uint32_t)) {
 760                         return *((uint32_t const *)prop);
 761                 } else if (propSize == sizeof(uint64_t)) {
 762                         return *((uint64_t const *)prop);
 763                 } else {
 764                         panic("CPU property '%s' has bad size %u", propertyName, propSize);
 765                 }
 766         } else {
 767                 if (default_value == ML_READPROP_MANDATORY) {
 768                         panic("Missing mandatory property '%s'", propertyName);
 769                 }
 770                 return default_value;
 771         }
 772 }
 773
 774 static boolean_t
 775 ml_read_reg_range(const DTEntry entry, const char *propertyName, uint64_t *pa_ptr, uint64_t *len_ptr)
 776 {
 777         uint64_t const *prop;
 778         unsigned int propSize;
 779
 780         if (SecureDTGetProperty(entry, propertyName, (void const **)&prop, &propSize) != kSuccess) {
 781                 return FALSE;
 782         }
 783
 784         if (propSize != sizeof(uint64_t) * 2) {
 785                 panic("Wrong property size for %s", propertyName);
 786         }
 787
 788         *pa_ptr = prop[0];
 789         *len_ptr = prop[1];
 790         return TRUE;
 791 }
 792
 793 static boolean_t
 794 ml_is_boot_cpu(const DTEntry entry)
 795 {
 796         void const *prop;
 797         unsigned int propSize;
 798
 799         if (SecureDTGetProperty(entry, "state", &prop, &propSize) != kSuccess) {
 800                 panic("unable to retrieve state for cpu");
 801         }
 802
 803         if (strncmp((char const *)prop, "running", propSize) == 0) {
 804                 return TRUE;
 805         } else {
 806                 return FALSE;
 807         }
 808 }
 809
 810 static void
 811 ml_read_chip_revision(unsigned int *rev __unused)
 812 {
 813         // The CPU_VERSION_* macros are only defined on APPLE_ARM64_ARCH_FAMILY builds
 814 #ifdef APPLE_ARM64_ARCH_FAMILY
 815         DTEntry         entryP;
 816
 817         if ((SecureDTFindEntry("name", "arm-io", &entryP) == kSuccess)) {
 818                 *rev = (unsigned int)ml_readprop(entryP, "chip-revision", CPU_VERSION_UNKNOWN);
 819         } else {
 820                 *rev = CPU_VERSION_UNKNOWN;
 821         }
 822 #endif
 823 }
 824
 825 static boolean_t
 826 ml_parse_interrupt_prop(const DTEntry entry, ml_topology_cpu_t *cpu)
 827 {
 828         uint32_t const *prop;
 829         unsigned int propSize;
 830
 831         if (SecureDTGetProperty(entry, "interrupts", (void const **)&prop, &propSize) != kSuccess) {
 832                 return FALSE;
 833         }
 834
 835         if (propSize == sizeof(uint32_t) * 1) {
 836                 cpu->pmi_irq = prop[0];
 837                 return TRUE;
 838         } else if (propSize == sizeof(uint32_t) * 3) {
 839                 cpu->self_ipi_irq = prop[0];
 840                 cpu->pmi_irq = prop[1];
 841                 cpu->other_ipi_irq = prop[2];
 842                 return TRUE;
 843         } else {
 844                 return FALSE;
 845         }
 846 }
 847
 848 void
 849 ml_parse_cpu_topology(void)
 850 {
 851         DTEntry entry, child __unused;
 852         OpaqueDTEntryIterator iter;
 853         uint32_t cpu_boot_arg;
 854         int err;
 855
 856         int64_t cluster_phys_to_logical[MAX_CPU_CLUSTER_PHY_ID + 1];
 857         int64_t cluster_max_cpu_phys_id[MAX_CPU_CLUSTER_PHY_ID + 1];
 858         cpu_boot_arg = MAX_CPUS;
 859         PE_parse_boot_argn("cpus", &cpu_boot_arg, sizeof(cpu_boot_arg));
 860
 861         err = SecureDTLookupEntry(NULL, "/cpus", &entry);
 862         assert(err == kSuccess);
 863
 864         err = SecureDTInitEntryIterator(entry, &iter);
 865         assert(err == kSuccess);
 866
 867         for (int i = 0; i <= MAX_CPU_CLUSTER_PHY_ID; i++) {
 868                 cluster_offsets[i] = -1;
 869                 cluster_phys_to_logical[i] = -1;
 870                 cluster_max_cpu_phys_id[i] = 0;
 871         }
 872
 873         while (kSuccess == SecureDTIterateEntries(&iter, &child)) {
 874                 boolean_t is_boot_cpu = ml_is_boot_cpu(child);
 875
 876                 // If the number of CPUs is constrained by the cpus= boot-arg, and the boot CPU hasn't
 877                 // been added to the topology struct yet, and we only have one slot left, then skip
 878                 // every other non-boot CPU in order to leave room for the boot CPU.
 879                 //
 880                 // e.g. if the boot-args say "cpus=3" and CPU4 is the boot CPU, then the cpus[]
 881                 // array will list CPU0, CPU1, and CPU4.  CPU2-CPU3 and CPU5-CPUn will be omitted.
 882                 if (topology_info.num_cpus >= (cpu_boot_arg - 1) && topology_info.boot_cpu == NULL && !is_boot_cpu) {
 883                         continue;
 884                 }
 885                 if (topology_info.num_cpus >= cpu_boot_arg) {
 886                         break;
 887                 }
 888
 889                 ml_topology_cpu_t *cpu = &topology_info.cpus[topology_info.num_cpus];
 890
 891                 cpu->cpu_id = topology_info.num_cpus++;
 892                 assert(cpu->cpu_id < MAX_CPUS);
 893                 topology_info.max_cpu_id = MAX(topology_info.max_cpu_id, cpu->cpu_id);
 894
 895                 cpu->die_id = (int)ml_readprop(child, "die-id", 0);
 896                 topology_info.max_die_id = MAX(topology_info.max_die_id, cpu->die_id);
 897
 898                 cpu->phys_id = (uint32_t)ml_readprop(child, "reg", ML_READPROP_MANDATORY);
 899
 900                 cpu->l2_access_penalty = (uint32_t)ml_readprop(child, "l2-access-penalty", 0);
 901                 cpu->l2_cache_size = (uint32_t)ml_readprop(child, "l2-cache-size", 0);
 902                 cpu->l2_cache_id = (uint32_t)ml_readprop(child, "l2-cache-id", 0);
 903                 cpu->l3_cache_size = (uint32_t)ml_readprop(child, "l3-cache-size", 0);
 904                 cpu->l3_cache_id = (uint32_t)ml_readprop(child, "l3-cache-id", 0);
 905
 906                 ml_parse_interrupt_prop(child, cpu);
 907                 ml_read_reg_range(child, "cpu-uttdbg-reg", &cpu->cpu_UTTDBG_pa, &cpu->cpu_UTTDBG_len);
 908                 ml_read_reg_range(child, "cpu-impl-reg", &cpu->cpu_IMPL_pa, &cpu->cpu_IMPL_len);
 909                 ml_read_reg_range(child, "coresight-reg", &cpu->coresight_pa, &cpu->coresight_len);
 910                 cpu->cluster_type = CLUSTER_TYPE_SMP;
 911
 912                 int cluster_type = (int)ml_readprop(child, "cluster-type", 0);
 913                 if (cluster_type == 'E') {
 914                         cpu->cluster_type = CLUSTER_TYPE_E;
 915                 } else if (cluster_type == 'P') {
 916                         cpu->cluster_type = CLUSTER_TYPE_P;
 917                 }
 918
 919                 /*
 920                  * Since we want to keep a linear cluster ID space, we cannot just rely
 921                  * on the value provided by EDT. Instead, use the MPIDR value to see if we have
 922                  * seen this exact cluster before. If so, then reuse that cluster ID for this CPU.
 923                  */
 924 #if HAS_CLUSTER
 925                 uint32_t phys_cluster_id = MPIDR_CLUSTER_ID(cpu->phys_id);
 926 #else
 927                 uint32_t phys_cluster_id = (cpu->cluster_type == CLUSTER_TYPE_P);
 928 #endif
 929                 assert(phys_cluster_id <= MAX_CPU_CLUSTER_PHY_ID);
 930                 cpu->cluster_id = ((cluster_phys_to_logical[phys_cluster_id] == -1) ?
 931                     topology_info.num_clusters : cluster_phys_to_logical[phys_cluster_id]);
 932
 933                 assert(cpu->cluster_id < MAX_CPU_CLUSTERS);
 934
 935                 ml_topology_cluster_t *cluster = &topology_info.clusters[cpu->cluster_id];
 936                 if (cluster->num_cpus == 0) {
 937                         assert(topology_info.num_clusters < MAX_CPU_CLUSTERS);
 938
 939                         topology_info.num_clusters++;
 940                         topology_info.max_cluster_id = MAX(topology_info.max_cluster_id, cpu->cluster_id);
 941
 942                         cluster->cluster_id = cpu->cluster_id;
 943                         cluster->cluster_type = cpu->cluster_type;
 944                         cluster->first_cpu_id = cpu->cpu_id;
 945                         assert(cluster_phys_to_logical[phys_cluster_id] == -1);
 946                         cluster_phys_to_logical[phys_cluster_id] = cpu->cluster_id;
 947
 948                         // Since we don't have a per-cluster EDT node, this is repeated in each CPU node.
 949                         // If we wind up with a bunch of these, we might want to create separate per-cluster
 950                         // EDT nodes and have the CPU nodes reference them through a phandle.
 951                         ml_read_reg_range(child, "acc-impl-reg", &cluster->acc_IMPL_pa, &cluster->acc_IMPL_len);
 952                         ml_read_reg_range(child, "cpm-impl-reg", &cluster->cpm_IMPL_pa, &cluster->cpm_IMPL_len);
 953                 }
 954
 955 #if HAS_CLUSTER
 956                 if (MPIDR_CPU_ID(cpu->phys_id) > cluster_max_cpu_phys_id[phys_cluster_id]) {
 957                         cluster_max_cpu_phys_id[phys_cluster_id] = MPIDR_CPU_ID(cpu->phys_id);
 958                 }
 959 #endif
 960
 961                 cpu->die_cluster_id = (int)ml_readprop(child, "die-cluster-id", MPIDR_CLUSTER_ID(cpu->phys_id));
 962                 cpu->cluster_core_id = (int)ml_readprop(child, "cluster-core-id", MPIDR_CPU_ID(cpu->phys_id));
 963
 964                 cluster->num_cpus++;
 965                 cluster->cpu_mask |= 1ULL << cpu->cpu_id;
 966
 967                 if (is_boot_cpu) {
 968                         assert(topology_info.boot_cpu == NULL);
 969                         topology_info.boot_cpu = cpu;
 970                         topology_info.boot_cluster = cluster;
 971                 }
 972         }
 973
 974 #if HAS_CLUSTER
 975         /*
 976          * Build the cluster offset array, ensuring that the region reserved
 977          * for each physical cluster contains enough entries to be indexed
 978          * by the maximum physical CPU ID (AFF0) within the cluster.
 979          */
 980         unsigned int cur_cluster_offset = 0;
 981         for (int i = 0; i <= MAX_CPU_CLUSTER_PHY_ID; i++) {
 982                 if (cluster_phys_to_logical[i] != -1) {
 983                         cluster_offsets[i] = cur_cluster_offset;
 984                         cur_cluster_offset += (cluster_max_cpu_phys_id[i] + 1);
 985                 }
 986         }
 987         assert(cur_cluster_offset <= MAX_CPUS);
 988 #else
 989         /*
 990          * For H10, there are really 2 physical clusters, but they are not separated
 991          * into distinct ACCs.  AFF1 therefore always reports 0, and AFF0 numbering
 992          * is linear across both clusters.   For the purpose of MPIDR_EL1-based indexing,
 993          * treat H10 and earlier devices as though they contain a single cluster.
 994          */
 995         cluster_offsets[0] = 0;
 996 #endif
 997         assert(topology_info.boot_cpu != NULL);
 998         ml_read_chip_revision(&topology_info.chip_revision);
 999
1000         /*
1001          * Set TPIDRRO_EL0 to indicate the correct cpu number, as we may
1002          * not be booting from cpu 0.  Userspace will consume the current
1003          * CPU number through this register.  For non-boot cores, this is
1004          * done in start.s (start_cpu) using the cpu_number field of the
1005          * per-cpu data object.
1006          */
1007         assert(__builtin_arm_rsr64("TPIDRRO_EL0") == 0);
1008         __builtin_arm_wsr64("TPIDRRO_EL0", (uint64_t)topology_info.boot_cpu->cpu_id);
1009 }
1010
1011 const ml_topology_info_t *
1012 ml_get_topology_info(void)
1013 {
1014         return &topology_info;
1015 }
1016
1017 void
1018 ml_map_cpu_pio(void)
1019 {
1020         unsigned int i;
1021
1022         for (i = 0; i < topology_info.num_cpus; i++) {
1023                 ml_topology_cpu_t *cpu = &topology_info.cpus[i];
1024                 if (cpu->cpu_IMPL_pa) {
1025                         cpu->cpu_IMPL_regs = (vm_offset_t)ml_io_map(cpu->cpu_IMPL_pa, cpu->cpu_IMPL_len);
1026                         cpu->coresight_regs = (vm_offset_t)ml_io_map(cpu->coresight_pa, cpu->coresight_len);
1027                 }
1028                 if (cpu->cpu_UTTDBG_pa) {
1029                         cpu->cpu_UTTDBG_regs = (vm_offset_t)ml_io_map(cpu->cpu_UTTDBG_pa, cpu->cpu_UTTDBG_len);
1030                 }
1031         }
1032
1033         for (i = 0; i < topology_info.num_clusters; i++) {
1034                 ml_topology_cluster_t *cluster = &topology_info.clusters[i];
1035                 if (cluster->acc_IMPL_pa) {
1036                         cluster->acc_IMPL_regs = (vm_offset_t)ml_io_map(cluster->acc_IMPL_pa, cluster->acc_IMPL_len);
1037                 }
1038                 if (cluster->cpm_IMPL_pa) {
1039                         cluster->cpm_IMPL_regs = (vm_offset_t)ml_io_map(cluster->cpm_IMPL_pa, cluster->cpm_IMPL_len);
1040                 }
1041         }
1042 }
1043
1044 unsigned int
1045 ml_get_cpu_count(void)
1046 {
1047         return topology_info.num_cpus;
1048 }
1049
1050 unsigned int
1051 ml_get_cluster_count(void)
1052 {
1053         return topology_info.num_clusters;
1054 }
1055
1056 int
1057 ml_get_boot_cpu_number(void)
1058 {
1059         return topology_info.boot_cpu->cpu_id;
1060 }
1061
1062 cluster_type_t
1063 ml_get_boot_cluster(void)
1064 {
1065         return topology_info.boot_cluster->cluster_type;
1066 }
1067
1068 int
1069 ml_get_cpu_number(uint32_t phys_id)
1070 {
1071         phys_id &= MPIDR_AFF1_MASK | MPIDR_AFF0_MASK;
1072
1073         for (unsigned i = 0; i < topology_info.num_cpus; i++) {
1074                 if (topology_info.cpus[i].phys_id == phys_id) {
1075                         return i;
1076                 }
1077         }
1078
1079         return -1;
1080 }
1081
1082 int
1083 ml_get_cluster_number(uint32_t phys_id)
1084 {
1085         int cpu_id = ml_get_cpu_number(phys_id);
1086         if (cpu_id < 0) {
1087                 return -1;
1088         }
1089
1090         ml_topology_cpu_t *cpu = &topology_info.cpus[cpu_id];
1091
1092         return cpu->cluster_id;
1093 }
1094
1095 unsigned int
1096 ml_get_cpu_number_local(void)
1097 {
1098         uint64_t mpidr_el1_value = 0;
1099         unsigned cpu_id;
1100
1101         /* We identify the CPU based on the constant bits of MPIDR_EL1. */
1102         MRS(mpidr_el1_value, "MPIDR_EL1");
1103         cpu_id = ml_get_cpu_number((uint32_t)mpidr_el1_value);
1104
1105         assert(cpu_id <= (unsigned int)ml_get_max_cpu_number());
1106
1107         return cpu_id;
1108 }
1109
1110 int
1111 ml_get_cluster_number_local()
1112 {
1113         uint64_t mpidr_el1_value = 0;
1114         unsigned cluster_id;
1115
1116         /* We identify the cluster based on the constant bits of MPIDR_EL1. */
1117         MRS(mpidr_el1_value, "MPIDR_EL1");
1118         cluster_id = ml_get_cluster_number((uint32_t)mpidr_el1_value);
1119
1120         assert(cluster_id <= (unsigned int)ml_get_max_cluster_number());
1121
1122         return cluster_id;
1123 }
1124
1125 int
1126 ml_get_max_cpu_number(void)
1127 {
1128         return topology_info.max_cpu_id;
1129 }
1130
1131 int
1132 ml_get_max_cluster_number(void)
1133 {
1134         return topology_info.max_cluster_id;
1135 }
1136
1137 unsigned int
1138 ml_get_first_cpu_id(unsigned int cluster_id)
1139 {
1140         return topology_info.clusters[cluster_id].first_cpu_id;
1141 }
1142
1143 void
1144 ml_lockdown_init()
1145 {
1146 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
1147         rorgn_stash_range();
1148 #endif
1149 }
1150
1151 kern_return_t
1152 ml_lockdown_handler_register(lockdown_handler_t f, void *this)
1153 {
1154         if (!f) {
1155                 return KERN_FAILURE;
1156         }
1157
1158         assert(lockdown_done);
1159         f(this); // XXX: f this whole function
1160
1161         return KERN_SUCCESS;
1162 }
1163
1164 kern_return_t
1165 ml_processor_register(ml_processor_info_t *in_processor_info,
1166     processor_t *processor_out, ipi_handler_t *ipi_handler_out,
1167     perfmon_interrupt_handler_func *pmi_handler_out)
1168 {
1169         cpu_data_t *this_cpu_datap;
1170         processor_set_t pset;
1171         boolean_t  is_boot_cpu;
1172         static unsigned int reg_cpu_count = 0;
1173
1174         if (in_processor_info->log_id > (uint32_t)ml_get_max_cpu_number()) {
1175                 return KERN_FAILURE;
1176         }
1177
1178         if ((unsigned)OSIncrementAtomic((SInt32*)&reg_cpu_count) >= topology_info.num_cpus) {
1179                 return KERN_FAILURE;
1180         }
1181
1182         if (in_processor_info->log_id != (uint32_t)ml_get_boot_cpu_number()) {
1183                 is_boot_cpu = FALSE;
1184                 this_cpu_datap = cpu_data_alloc(FALSE);
1185                 cpu_data_init(this_cpu_datap);
1186         } else {
1187                 this_cpu_datap = &BootCpuData;
1188                 is_boot_cpu = TRUE;
1189         }
1190
1191         assert(in_processor_info->log_id <= (uint32_t)ml_get_max_cpu_number());
1192
1193         this_cpu_datap->cpu_id = in_processor_info->cpu_id;
1194
1195         this_cpu_datap->cpu_console_buf = console_cpu_alloc(is_boot_cpu);
1196         if (this_cpu_datap->cpu_console_buf == (void *)(NULL)) {
1197                 goto processor_register_error;
1198         }
1199
1200         if (!is_boot_cpu) {
1201                 this_cpu_datap->cpu_number = (unsigned short)(in_processor_info->log_id);
1202
1203                 if (cpu_data_register(this_cpu_datap) != KERN_SUCCESS) {
1204                         goto processor_register_error;
1205                 }
1206         }
1207
1208         this_cpu_datap->cpu_idle_notify = in_processor_info->processor_idle;
1209         this_cpu_datap->cpu_cache_dispatch = (cache_dispatch_t)in_processor_info->platform_cache_dispatch;
1210         nanoseconds_to_absolutetime((uint64_t) in_processor_info->powergate_latency, &this_cpu_datap->cpu_idle_latency);
1211         this_cpu_datap->cpu_reset_assist = kvtophys(in_processor_info->powergate_stub_addr);
1212
1213         this_cpu_datap->idle_timer_notify = in_processor_info->idle_timer;
1214         this_cpu_datap->idle_timer_refcon = in_processor_info->idle_timer_refcon;
1215
1216         this_cpu_datap->platform_error_handler = in_processor_info->platform_error_handler;
1217         this_cpu_datap->cpu_regmap_paddr = in_processor_info->regmap_paddr;
1218         this_cpu_datap->cpu_phys_id = in_processor_info->phys_id;
1219         this_cpu_datap->cpu_l2_access_penalty = in_processor_info->l2_access_penalty;
1220
1221         this_cpu_datap->cpu_cluster_type = in_processor_info->cluster_type;
1222         this_cpu_datap->cpu_cluster_id = in_processor_info->cluster_id;
1223         this_cpu_datap->cpu_l2_id = in_processor_info->l2_cache_id;
1224         this_cpu_datap->cpu_l2_size = in_processor_info->l2_cache_size;
1225         this_cpu_datap->cpu_l3_id = in_processor_info->l3_cache_id;
1226         this_cpu_datap->cpu_l3_size = in_processor_info->l3_cache_size;
1227
1228 #if HAS_CLUSTER
1229         this_cpu_datap->cluster_master = !OSTestAndSet(this_cpu_datap->cpu_cluster_id, &cluster_initialized);
1230 #else /* HAS_CLUSTER */
1231         this_cpu_datap->cluster_master = is_boot_cpu;
1232 #endif /* HAS_CLUSTER */
1233
1234         pset = pset_find(in_processor_info->cluster_id, processor_pset(master_processor));
1235
1236         assert(pset != NULL);
1237         kprintf("%s>cpu_id %p cluster_id %d cpu_number %d is type %d\n", __FUNCTION__, in_processor_info->cpu_id, in_processor_info->cluster_id, this_cpu_datap->cpu_number, in_processor_info->cluster_type);
1238
1239         processor_t processor = PERCPU_GET_RELATIVE(processor, cpu_data, this_cpu_datap);
1240         if (!is_boot_cpu) {
1241                 processor_init(processor, this_cpu_datap->cpu_number, pset);
1242
1243                 if (this_cpu_datap->cpu_l2_access_penalty) {
1244                         /*
1245                          * Cores that have a non-zero L2 access penalty compared
1246                          * to the boot processor should be de-prioritized by the
1247                          * scheduler, so that threads use the cores with better L2
1248                          * preferentially.
1249                          */
1250                         processor_set_primary(processor, master_processor);
1251                 }
1252         }
1253
1254         *processor_out = processor;
1255         *ipi_handler_out = cpu_signal_handler;
1256 #if CPMU_AIC_PMI && MONOTONIC
1257         *pmi_handler_out = mt_cpmu_aic_pmi;
1258 #else
1259         *pmi_handler_out = NULL;
1260 #endif /* CPMU_AIC_PMI && MONOTONIC */
1261         if (in_processor_info->idle_tickle != (idle_tickle_t *) NULL) {
1262                 *in_processor_info->idle_tickle = (idle_tickle_t) cpu_idle_tickle;
1263         }
1264
1265 #if KPC
1266         if (kpc_register_cpu(this_cpu_datap) != TRUE) {
1267                 goto processor_register_error;
1268         }
1269 #endif /* KPC */
1270
1271         if (!is_boot_cpu) {
1272                 random_cpu_init(this_cpu_datap->cpu_number);
1273                 // now let next CPU register itself
1274                 OSIncrementAtomic((SInt32*)&real_ncpus);
1275         }
1276
1277         return KERN_SUCCESS;
1278
1279 processor_register_error:
1280 #if KPC
1281         kpc_unregister_cpu(this_cpu_datap);
1282 #endif /* KPC */
1283         if (!is_boot_cpu) {
1284                 cpu_data_free(this_cpu_datap);
1285         }
1286
1287         return KERN_FAILURE;
1288 }
1289
1290 void
1291 ml_init_arm_debug_interface(
1292         void * in_cpu_datap,
1293         vm_offset_t virt_address)
1294 {
1295         ((cpu_data_t *)in_cpu_datap)->cpu_debug_interface_map = virt_address;
1296         do_debugid();
1297 }
1298
1299 /*
1300  *      Routine:        init_ast_check
1301  *      Function:
1302  */
1303 void
1304 init_ast_check(
1305         __unused processor_t processor)
1306 {
1307 }
1308
1309 /*
1310  *      Routine:        cause_ast_check
1311  *      Function:
1312  */
1313 void
1314 cause_ast_check(
1315         processor_t processor)
1316 {
1317         if (current_processor() != processor) {
1318                 cpu_signal(processor_to_cpu_datap(processor), SIGPast, (void *)NULL, (void *)NULL);
1319                 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), processor->cpu_id, 1 /* ast */, 0, 0, 0);
1320         }
1321 }
1322
1323 extern uint32_t cpu_idle_count;
1324
1325 void
1326 ml_get_power_state(boolean_t *icp, boolean_t *pidlep)
1327 {
1328         *icp = ml_at_interrupt_context();
1329         *pidlep = (cpu_idle_count == real_ncpus);
1330 }
1331
1332 /*
1333  *      Routine:        ml_cause_interrupt
1334  *      Function:       Generate a fake interrupt
1335  */
1336 void
1337 ml_cause_interrupt(void)
1338 {
1339         return;                 /* BS_XXX */
1340 }
1341
1342 /* Map memory map IO space */
1343 vm_offset_t
1344 ml_io_map(
1345         vm_offset_t phys_addr,
1346         vm_size_t size)
1347 {
1348         return io_map(phys_addr, size, VM_WIMG_IO);
1349 }
1350
1351 /* Map memory map IO space (with protections specified) */
1352 vm_offset_t
1353 ml_io_map_with_prot(
1354         vm_offset_t phys_addr,
1355         vm_size_t size,
1356         vm_prot_t prot)
1357 {
1358         return io_map_with_prot(phys_addr, size, VM_WIMG_IO, prot);
1359 }
1360
1361 vm_offset_t
1362 ml_io_map_wcomb(
1363         vm_offset_t phys_addr,
1364         vm_size_t size)
1365 {
1366         return io_map(phys_addr, size, VM_WIMG_WCOMB);
1367 }
1368
1369 void
1370 ml_io_unmap(vm_offset_t addr, vm_size_t sz)
1371 {
1372         pmap_remove(kernel_pmap, addr, addr + sz);
1373         kmem_free(kernel_map, addr, sz);
1374 }
1375
1376 /* boot memory allocation */
1377 vm_offset_t
1378 ml_static_malloc(
1379         __unused vm_size_t size)
1380 {
1381         return (vm_offset_t) NULL;
1382 }
1383
1384 vm_map_address_t
1385 ml_map_high_window(
1386         vm_offset_t     phys_addr,
1387         vm_size_t       len)
1388 {
1389         return pmap_map_high_window_bd(phys_addr, len, VM_PROT_READ | VM_PROT_WRITE);
1390 }
1391
1392 vm_offset_t
1393 ml_static_ptovirt(
1394         vm_offset_t paddr)
1395 {
1396         return phystokv(paddr);
1397 }
1398
1399 vm_offset_t
1400 ml_static_slide(
1401         vm_offset_t vaddr)
1402 {
1403         vm_offset_t slid_vaddr = vaddr + vm_kernel_slide;
1404
1405         if ((slid_vaddr < vm_kernelcache_base) || (slid_vaddr >= vm_kernelcache_top)) {
1406                 /* This is only intended for use on kernelcache addresses. */
1407                 return 0;
1408         }
1409
1410         /*
1411          * Because the address is in the kernelcache, we can do a simple
1412          * slide calculation.
1413          */
1414         return slid_vaddr;
1415 }
1416
1417 vm_offset_t
1418 ml_static_unslide(
1419         vm_offset_t vaddr)
1420 {
1421         if ((vaddr < vm_kernelcache_base) || (vaddr >= vm_kernelcache_top)) {
1422                 /* This is only intended for use on kernelcache addresses. */
1423                 return 0;
1424         }
1425
1426         return vaddr - vm_kernel_slide;
1427 }
1428
1429 extern tt_entry_t *arm_kva_to_tte(vm_offset_t va);
1430
1431 kern_return_t
1432 ml_static_protect(
1433         vm_offset_t vaddr, /* kernel virtual address */
1434         vm_size_t size,
1435         vm_prot_t new_prot)
1436 {
1437         pt_entry_t    arm_prot = 0;
1438         pt_entry_t    arm_block_prot = 0;
1439         vm_offset_t   vaddr_cur;
1440         ppnum_t       ppn;
1441         kern_return_t result = KERN_SUCCESS;
1442
1443         if (vaddr < VM_MIN_KERNEL_ADDRESS) {
1444                 panic("ml_static_protect(): %p < %p", (void *) vaddr, (void *) VM_MIN_KERNEL_ADDRESS);
1445                 return KERN_FAILURE;
1446         }
1447
1448         assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
1449
1450         if ((new_prot & VM_PROT_WRITE) && (new_prot & VM_PROT_EXECUTE)) {
1451                 panic("ml_static_protect(): WX request on %p", (void *) vaddr);
1452         }
1453         if (lockdown_done && (new_prot & VM_PROT_EXECUTE)) {
1454                 panic("ml_static_protect(): attempt to inject executable mapping on %p", (void *) vaddr);
1455         }
1456
1457         /* Set up the protection bits, and block bits so we can validate block mappings. */
1458         if (new_prot & VM_PROT_WRITE) {
1459                 arm_prot |= ARM_PTE_AP(AP_RWNA);
1460                 arm_block_prot |= ARM_TTE_BLOCK_AP(AP_RWNA);
1461         } else {
1462                 arm_prot |= ARM_PTE_AP(AP_RONA);
1463                 arm_block_prot |= ARM_TTE_BLOCK_AP(AP_RONA);
1464         }
1465
1466         arm_prot |= ARM_PTE_NX;
1467         arm_block_prot |= ARM_TTE_BLOCK_NX;
1468
1469         if (!(new_prot & VM_PROT_EXECUTE)) {
1470                 arm_prot |= ARM_PTE_PNX;
1471                 arm_block_prot |= ARM_TTE_BLOCK_PNX;
1472         }
1473
1474         for (vaddr_cur = vaddr;
1475             vaddr_cur < trunc_page_64(vaddr + size);
1476             vaddr_cur += PAGE_SIZE) {
1477                 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
1478                 if (ppn != (vm_offset_t) NULL) {
1479                         tt_entry_t      *tte2;
1480                         pt_entry_t      *pte_p;
1481                         pt_entry_t      ptmp;
1482
1483 #if XNU_MONITOR
1484                         assert(!pmap_is_monitor(ppn));
1485                         assert(!TEST_PAGE_RATIO_4);
1486 #endif
1487
1488                         tte2 = arm_kva_to_tte(vaddr_cur);
1489
1490                         if (((*tte2) & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE) {
1491                                 if ((((*tte2) & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) &&
1492                                     ((*tte2 & (ARM_TTE_BLOCK_NXMASK | ARM_TTE_BLOCK_PNXMASK | ARM_TTE_BLOCK_APMASK)) == arm_block_prot)) {
1493                                         /*
1494                                          * We can support ml_static_protect on a block mapping if the mapping already has
1495                                          * the desired protections.  We still want to run checks on a per-page basis.
1496                                          */
1497                                         continue;
1498                                 }
1499
1500                                 result = KERN_FAILURE;
1501                                 break;
1502                         }
1503
1504                         pte_p = (pt_entry_t *)&((tt_entry_t*)(phystokv((*tte2) & ARM_TTE_TABLE_MASK)))[(((vaddr_cur) & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT)];
1505                         ptmp = *pte_p;
1506
1507                         if ((ptmp & ARM_PTE_HINT_MASK) && ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot)) {
1508                                 /*
1509                                  * The contiguous hint is similar to a block mapping for ml_static_protect; if the existing
1510                                  * protections do not match the desired protections, then we will fail (as we cannot update
1511                                  * this mapping without updating other mappings as well).
1512                                  */
1513                                 result = KERN_FAILURE;
1514                                 break;
1515                         }
1516
1517                         __unreachable_ok_push
1518                         if (TEST_PAGE_RATIO_4) {
1519                                 {
1520                                         unsigned int    i;
1521                                         pt_entry_t      *ptep_iter;
1522
1523                                         ptep_iter = pte_p;
1524                                         for (i = 0; i < 4; i++, ptep_iter++) {
1525                                                 /* Note that there is a hole in the HINT sanity checking here. */
1526                                                 ptmp = *ptep_iter;
1527
1528                                                 /* We only need to update the page tables if the protections do not match. */
1529                                                 if ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot) {
1530                                                         ptmp = (ptmp & ~(ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) | arm_prot;
1531                                                         *ptep_iter = ptmp;
1532                                                 }
1533                                         }
1534                                 }
1535                         } else {
1536                                 ptmp = *pte_p;
1537                                 /* We only need to update the page tables if the protections do not match. */
1538                                 if ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot) {
1539                                         ptmp = (ptmp & ~(ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) | arm_prot;
1540                                         *pte_p = ptmp;
1541                                 }
1542                         }
1543                         __unreachable_ok_pop
1544                 }
1545         }
1546
1547         if (vaddr_cur > vaddr) {
1548                 assert(((vaddr_cur - vaddr) & 0xFFFFFFFF00000000ULL) == 0);
1549                 flush_mmu_tlb_region(vaddr, (uint32_t)(vaddr_cur - vaddr));
1550         }
1551
1552
1553         return result;
1554 }
1555
1556 /*
1557  *      Routine:        ml_static_mfree
1558  *      Function:
1559  */
1560 void
1561 ml_static_mfree(
1562         vm_offset_t vaddr,
1563         vm_size_t size)
1564 {
1565         vm_offset_t     vaddr_cur;
1566         ppnum_t         ppn;
1567         uint32_t freed_pages = 0;
1568         uint32_t freed_kernelcache_pages = 0;
1569
1570         /* It is acceptable (if bad) to fail to free. */
1571         if (vaddr < VM_MIN_KERNEL_ADDRESS) {
1572                 return;
1573         }
1574
1575         assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
1576
1577         for (vaddr_cur = vaddr;
1578             vaddr_cur < trunc_page_64(vaddr + size);
1579             vaddr_cur += PAGE_SIZE) {
1580                 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
1581                 if (ppn != (vm_offset_t) NULL) {
1582                         /*
1583                          * It is not acceptable to fail to update the protections on a page
1584                          * we will release to the VM.  We need to either panic or continue.
1585                          * For now, we'll panic (to help flag if there is memory we can
1586                          * reclaim).
1587                          */
1588                         if (ml_static_protect(vaddr_cur, PAGE_SIZE, VM_PROT_WRITE | VM_PROT_READ) != KERN_SUCCESS) {
1589                                 panic("Failed ml_static_mfree on %p", (void *) vaddr_cur);
1590                         }
1591
1592                         vm_page_create(ppn, (ppn + 1));
1593                         freed_pages++;
1594                         if (vaddr_cur >= segLOWEST && vaddr_cur < end_kern) {
1595                                 freed_kernelcache_pages++;
1596                         }
1597                 }
1598         }
1599         vm_page_lockspin_queues();
1600         vm_page_wire_count -= freed_pages;
1601         vm_page_wire_count_initial -= freed_pages;
1602         vm_page_kernelcache_count -= freed_kernelcache_pages;
1603         vm_page_unlock_queues();
1604 #if     DEBUG
1605         kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn);
1606 #endif
1607 }
1608
1609
1610 /* virtual to physical on wired pages */
1611 vm_offset_t
1612 ml_vtophys(vm_offset_t vaddr)
1613 {
1614         return kvtophys(vaddr);
1615 }
1616
1617 /*
1618  * Routine: ml_nofault_copy
1619  * Function: Perform a physical mode copy if the source and destination have
1620  * valid translations in the kernel pmap. If translations are present, they are
1621  * assumed to be wired; e.g., no attempt is made to guarantee that the
1622  * translations obtained remain valid for the duration of the copy process.
1623  */
1624 vm_size_t
1625 ml_nofault_copy(vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size)
1626 {
1627         addr64_t        cur_phys_dst, cur_phys_src;
1628         vm_size_t       count, nbytes = 0;
1629
1630         while (size > 0) {
1631                 if (!(cur_phys_src = kvtophys(virtsrc))) {
1632                         break;
1633                 }
1634                 if (!(cur_phys_dst = kvtophys(virtdst))) {
1635                         break;
1636                 }
1637                 if (!pmap_valid_address(trunc_page_64(cur_phys_dst)) ||
1638                     !pmap_valid_address(trunc_page_64(cur_phys_src))) {
1639                         break;
1640                 }
1641                 count = PAGE_SIZE - (cur_phys_src & PAGE_MASK);
1642                 if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK))) {
1643                         count = PAGE_SIZE - (cur_phys_dst & PAGE_MASK);
1644                 }
1645                 if (count > size) {
1646                         count = size;
1647                 }
1648
1649                 bcopy_phys(cur_phys_src, cur_phys_dst, count);
1650
1651                 nbytes += count;
1652                 virtsrc += count;
1653                 virtdst += count;
1654                 size -= count;
1655         }
1656
1657         return nbytes;
1658 }
1659
1660 /*
1661  *      Routine:        ml_validate_nofault
1662  *      Function: Validate that ths address range has a valid translations
1663  *                      in the kernel pmap.  If translations are present, they are
1664  *                      assumed to be wired; i.e. no attempt is made to guarantee
1665  *                      that the translation persist after the check.
1666  *  Returns: TRUE if the range is mapped and will not cause a fault,
1667  *                      FALSE otherwise.
1668  */
1669
1670 boolean_t
1671 ml_validate_nofault(
1672         vm_offset_t virtsrc, vm_size_t size)
1673 {
1674         addr64_t cur_phys_src;
1675         uint32_t count;
1676
1677         while (size > 0) {
1678                 if (!(cur_phys_src = kvtophys(virtsrc))) {
1679                         return FALSE;
1680                 }
1681                 if (!pmap_valid_address(trunc_page_64(cur_phys_src))) {
1682                         return FALSE;
1683                 }
1684                 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
1685                 if (count > size) {
1686                         count = (uint32_t)size;
1687                 }
1688
1689                 virtsrc += count;
1690                 size -= count;
1691         }
1692
1693         return TRUE;
1694 }
1695
1696 void
1697 ml_get_bouncepool_info(vm_offset_t * phys_addr, vm_size_t * size)
1698 {
1699         *phys_addr = 0;
1700         *size = 0;
1701 }
1702
1703 void
1704 active_rt_threads(__unused boolean_t active)
1705 {
1706 }
1707
1708 static void
1709 cpu_qos_cb_default(__unused int urgency, __unused uint64_t qos_param1, __unused uint64_t qos_param2)
1710 {
1711         return;
1712 }
1713
1714 cpu_qos_update_t cpu_qos_update = cpu_qos_cb_default;
1715
1716 void
1717 cpu_qos_update_register(cpu_qos_update_t cpu_qos_cb)
1718 {
1719         if (cpu_qos_cb != NULL) {
1720                 cpu_qos_update = cpu_qos_cb;
1721         } else {
1722                 cpu_qos_update = cpu_qos_cb_default;
1723         }
1724 }
1725
1726 void
1727 thread_tell_urgency(thread_urgency_t urgency, uint64_t rt_period, uint64_t rt_deadline, uint64_t sched_latency __unused, __unused thread_t nthread)
1728 {
1729         SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, sched_latency, 0);
1730
1731         cpu_qos_update((int)urgency, rt_period, rt_deadline);
1732
1733         SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0);
1734 }
1735
1736 void
1737 machine_run_count(__unused uint32_t count)
1738 {
1739 }
1740
1741 processor_t
1742 machine_choose_processor(__unused processor_set_t pset, processor_t processor)
1743 {
1744         return processor;
1745 }
1746
1747 #if KASAN
1748 vm_offset_t ml_stack_base(void);
1749 vm_size_t ml_stack_size(void);
1750
1751 vm_offset_t
1752 ml_stack_base(void)
1753 {
1754         uintptr_t local = (uintptr_t) &local;
1755         vm_offset_t     intstack_top_ptr;
1756
1757         intstack_top_ptr = getCpuDatap()->intstack_top;
1758         if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) {
1759                 return intstack_top_ptr - INTSTACK_SIZE;
1760         } else {
1761                 return current_thread()->kernel_stack;
1762         }
1763 }
1764 vm_size_t
1765 ml_stack_size(void)
1766 {
1767         uintptr_t local = (uintptr_t) &local;
1768         vm_offset_t     intstack_top_ptr;
1769
1770         intstack_top_ptr = getCpuDatap()->intstack_top;
1771         if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) {
1772                 return INTSTACK_SIZE;
1773         } else {
1774                 return kernel_stack_size;
1775         }
1776 }
1777 #endif
1778
1779 boolean_t
1780 machine_timeout_suspended(void)
1781 {
1782         return FALSE;
1783 }
1784
1785 kern_return_t
1786 ml_interrupt_prewarm(__unused uint64_t deadline)
1787 {
1788         return KERN_FAILURE;
1789 }
1790
1791 /*
1792  * Assumes fiq, irq disabled.
1793  */
1794 void
1795 ml_set_decrementer(uint32_t dec_value)
1796 {
1797         cpu_data_t      *cdp = getCpuDatap();
1798
1799         assert(ml_get_interrupts_enabled() == FALSE);
1800         cdp->cpu_decrementer = dec_value;
1801
1802         if (cdp->cpu_set_decrementer_func) {
1803                 cdp->cpu_set_decrementer_func(dec_value);
1804         } else {
1805                 __builtin_arm_wsr64("CNTV_TVAL_EL0", (uint64_t)dec_value);
1806         }
1807 }
1808
1809 uint64_t
1810 ml_get_hwclock()
1811 {
1812         uint64_t timebase;
1813
1814         // ISB required by ARMV7C.b section B8.1.2 & ARMv8 section D6.1.2
1815         // "Reads of CNT[PV]CT[_EL0] can occur speculatively and out of order relative
1816         // to other instructions executed on the same processor."
1817         __builtin_arm_isb(ISB_SY);
1818         timebase = __builtin_arm_rsr64("CNTVCT_EL0");
1819
1820         return timebase;
1821 }
1822
1823 uint64_t
1824 ml_get_timebase()
1825 {
1826         return ml_get_hwclock() + getCpuDatap()->cpu_base_timebase;
1827 }
1828
1829 /*
1830  * Get the speculative timebase without an ISB.
1831  */
1832 uint64_t
1833 ml_get_speculative_timebase()
1834 {
1835         uint64_t timebase;
1836
1837         timebase = __builtin_arm_rsr64("CNTVCT_EL0");
1838
1839         return timebase + getCpuDatap()->cpu_base_timebase;
1840 }
1841
1842 uint64_t
1843 ml_get_timebase_entropy(void)
1844 {
1845         return ml_get_speculative_timebase();
1846 }
1847
1848 uint32_t
1849 ml_get_decrementer()
1850 {
1851         cpu_data_t *cdp = getCpuDatap();
1852         uint32_t dec;
1853
1854         assert(ml_get_interrupts_enabled() == FALSE);
1855
1856         if (cdp->cpu_get_decrementer_func) {
1857                 dec = cdp->cpu_get_decrementer_func();
1858         } else {
1859                 uint64_t wide_val;
1860
1861                 wide_val = __builtin_arm_rsr64("CNTV_TVAL_EL0");
1862                 dec = (uint32_t)wide_val;
1863                 assert(wide_val == (uint64_t)dec);
1864         }
1865
1866         return dec;
1867 }
1868
1869 boolean_t
1870 ml_get_timer_pending()
1871 {
1872         uint64_t cntv_ctl = __builtin_arm_rsr64("CNTV_CTL_EL0");
1873         return ((cntv_ctl & CNTV_CTL_EL0_ISTATUS) != 0) ? TRUE : FALSE;
1874 }
1875
1876 static void
1877 cache_trap_error(thread_t thread, vm_map_address_t fault_addr)
1878 {
1879         mach_exception_data_type_t exc_data[2];
1880         arm_saved_state_t *regs = get_user_regs(thread);
1881
1882         set_saved_state_far(regs, fault_addr);
1883
1884         exc_data[0] = KERN_INVALID_ADDRESS;
1885         exc_data[1] = fault_addr;
1886
1887         exception_triage(EXC_BAD_ACCESS, exc_data, 2);
1888 }
1889
1890 static void
1891 cache_trap_recover()
1892 {
1893         vm_map_address_t fault_addr;
1894
1895         __asm__ volatile ("mrs %0, FAR_EL1" : "=r"(fault_addr));
1896
1897         cache_trap_error(current_thread(), fault_addr);
1898 }
1899
1900 static void
1901 set_cache_trap_recover(thread_t thread)
1902 {
1903 #if defined(HAS_APPLE_PAC)
1904         thread->recover = (vm_address_t)ptrauth_auth_and_resign(&cache_trap_recover,
1905             ptrauth_key_function_pointer, 0,
1906             ptrauth_key_function_pointer, ptrauth_blend_discriminator(&thread->recover, PAC_DISCRIMINATOR_RECOVER));
1907 #else /* defined(HAS_APPLE_PAC) */
1908         thread->recover = (vm_address_t)cache_trap_recover;
1909 #endif /* defined(HAS_APPLE_PAC) */
1910 }
1911
1912 static void
1913 dcache_flush_trap(vm_map_address_t start, vm_map_size_t size)
1914 {
1915         vm_map_address_t end = start + size;
1916         thread_t thread = current_thread();
1917         vm_offset_t old_recover = thread->recover;
1918
1919         /* Check bounds */
1920         if (task_has_64Bit_addr(current_task())) {
1921                 if (end > MACH_VM_MAX_ADDRESS) {
1922                         cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1923                 }
1924         } else {
1925                 if (end > VM_MAX_ADDRESS) {
1926                         cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1927                 }
1928         }
1929
1930         if (start > end) {
1931                 cache_trap_error(thread, start & ((1 << ARM64_CLINE_SHIFT) - 1));
1932         }
1933
1934         set_cache_trap_recover(thread);
1935
1936         /*
1937          * We're coherent on Apple ARM64 CPUs, so this could be a nop.  However,
1938          * if the region given us is bad, it would be good to catch it and
1939          * crash, ergo we still do the flush.
1940          */
1941         FlushPoC_DcacheRegion(start, (uint32_t)size);
1942
1943         /* Restore recovery function */
1944         thread->recover = old_recover;
1945
1946         /* Return (caller does exception return) */
1947 }
1948
1949 static void
1950 icache_invalidate_trap(vm_map_address_t start, vm_map_size_t size)
1951 {
1952         vm_map_address_t end = start + size;
1953         thread_t thread = current_thread();
1954         vm_offset_t old_recover = thread->recover;
1955
1956         /* Check bounds */
1957         if (task_has_64Bit_addr(current_task())) {
1958                 if (end > MACH_VM_MAX_ADDRESS) {
1959                         cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1960                 }
1961         } else {
1962                 if (end > VM_MAX_ADDRESS) {
1963                         cache_trap_error(thread, end & ((1 << ARM64_CLINE_SHIFT) - 1));
1964                 }
1965         }
1966
1967         if (start > end) {
1968                 cache_trap_error(thread, start & ((1 << ARM64_CLINE_SHIFT) - 1));
1969         }
1970
1971         set_cache_trap_recover(thread);
1972
1973         /* Invalidate iCache to point of unification */
1974         InvalidatePoU_IcacheRegion(start, (uint32_t)size);
1975
1976         /* Restore recovery function */
1977         thread->recover = old_recover;
1978
1979         /* Return (caller does exception return) */
1980 }
1981
1982 __attribute__((noreturn))
1983 void
1984 platform_syscall(arm_saved_state_t *state)
1985 {
1986         uint32_t code;
1987
1988 #define platform_syscall_kprintf(x...) /* kprintf("platform_syscall: " x) */
1989
1990         code = (uint32_t)get_saved_state_reg(state, 3);
1991         switch (code) {
1992         case 0:
1993                 /* I-Cache flush */
1994                 platform_syscall_kprintf("icache flush requested.\n");
1995                 icache_invalidate_trap(get_saved_state_reg(state, 0), get_saved_state_reg(state, 1));
1996                 break;
1997         case 1:
1998                 /* D-Cache flush */
1999                 platform_syscall_kprintf("dcache flush requested.\n");
2000                 dcache_flush_trap(get_saved_state_reg(state, 0), get_saved_state_reg(state, 1));
2001                 break;
2002         case 2:
2003                 /* set cthread */
2004                 platform_syscall_kprintf("set cthread self.\n");
2005                 thread_set_cthread_self(get_saved_state_reg(state, 0));
2006                 break;
2007         case 3:
2008                 /* get cthread */
2009                 platform_syscall_kprintf("get cthread self.\n");
2010                 set_saved_state_reg(state, 0, thread_get_cthread_self());
2011                 break;
2012         default:
2013                 platform_syscall_kprintf("unknown: %d\n", code);
2014                 break;
2015         }
2016
2017         thread_exception_return();
2018 }
2019
2020 static void
2021 _enable_timebase_event_stream(uint32_t bit_index)
2022 {
2023         uint64_t cntkctl; /* One wants to use 32 bits, but "mrs" prefers it this way */
2024
2025         if (bit_index >= 64) {
2026                 panic("%s: invalid bit index (%u)", __FUNCTION__, bit_index);
2027         }
2028
2029         __asm__ volatile ("mrs  %0, CNTKCTL_EL1" : "=r"(cntkctl));
2030
2031         cntkctl |= (bit_index << CNTKCTL_EL1_EVENTI_SHIFT);
2032         cntkctl |= CNTKCTL_EL1_EVNTEN;
2033         cntkctl |= CNTKCTL_EL1_EVENTDIR; /* 1->0; why not? */
2034
2035         /*
2036          * If the SOC supports it (and it isn't broken), enable
2037          * EL0 access to the timebase registers.
2038          */
2039         if (user_timebase_type() != USER_TIMEBASE_NONE) {
2040                 cntkctl |= (CNTKCTL_EL1_PL0PCTEN | CNTKCTL_EL1_PL0VCTEN);
2041         }
2042
2043         __builtin_arm_wsr64("CNTKCTL_EL1", cntkctl);
2044 }
2045
2046 /*
2047  * Turn timer on, unmask that interrupt.
2048  */
2049 static void
2050 _enable_virtual_timer(void)
2051 {
2052         uint64_t cntvctl = CNTV_CTL_EL0_ENABLE; /* One wants to use 32 bits, but "mrs" prefers it this way */
2053
2054         __builtin_arm_wsr64("CNTV_CTL_EL0", cntvctl);
2055         /* disable the physical timer as a precaution, as its registers reset to architecturally unknown values */
2056         __builtin_arm_wsr64("CNTP_CTL_EL0", CNTP_CTL_EL0_IMASKED);
2057 }
2058
2059 void
2060 fiq_context_init(boolean_t enable_fiq __unused)
2061 {
2062         /* Interrupts still disabled. */
2063         assert(ml_get_interrupts_enabled() == FALSE);
2064         _enable_virtual_timer();
2065 }
2066
2067 void
2068 wfe_timeout_init(void)
2069 {
2070         _enable_timebase_event_stream(arm64_eventi);
2071 }
2072
2073 void
2074 wfe_timeout_configure(void)
2075 {
2076         /* Could fill in our own ops here, if we needed them */
2077         uint64_t        ticks_per_sec, ticks_per_event, events_per_sec = 0;
2078         uint32_t        bit_index;
2079
2080         if (PE_parse_boot_argn("wfe_events_sec", &events_per_sec, sizeof(events_per_sec))) {
2081                 if (events_per_sec <= 0) {
2082                         events_per_sec = 1;
2083                 } else if (events_per_sec > USEC_PER_SEC) {
2084                         events_per_sec = USEC_PER_SEC;
2085                 }
2086         } else {
2087 #if defined(ARM_BOARD_WFE_TIMEOUT_NS)
2088                 events_per_sec = NSEC_PER_SEC / ARM_BOARD_WFE_TIMEOUT_NS;
2089 #else /* !defined(ARM_BOARD_WFE_TIMEOUT_NS) */
2090                 /* Default to 1usec (or as close as we can get) */
2091                 events_per_sec = USEC_PER_SEC;
2092 #endif /* !defined(ARM_BOARD_WFE_TIMEOUT_NS) */
2093         }
2094         ticks_per_sec = gPEClockFrequencyInfo.timebase_frequency_hz;
2095         ticks_per_event = ticks_per_sec / events_per_sec;
2096         bit_index = flsll(ticks_per_event) - 1; /* Highest bit set */
2097
2098         /* Round up to power of two */
2099         if ((ticks_per_event & ((1 << bit_index) - 1)) != 0) {
2100                 bit_index++;
2101         }
2102
2103         /*
2104          * The timer can only trigger on rising or falling edge,
2105          * not both; we don't care which we trigger on, but we
2106          * do need to adjust which bit we are interested in to
2107          * account for this.
2108          */
2109         if (bit_index != 0) {
2110                 bit_index--;
2111         }
2112
2113         arm64_eventi = bit_index;
2114         wfe_timeout_init();
2115 }
2116
2117 boolean_t
2118 ml_delay_should_spin(uint64_t interval)
2119 {
2120         cpu_data_t     *cdp = getCpuDatap();
2121
2122         if (cdp->cpu_idle_latency) {
2123                 return (interval < cdp->cpu_idle_latency) ? TRUE : FALSE;
2124         } else {
2125                 /*
2126                  * Early boot, latency is unknown. Err on the side of blocking,
2127                  * which should always be safe, even if slow
2128                  */
2129                 return FALSE;
2130         }
2131 }
2132
2133 boolean_t
2134 ml_thread_is64bit(thread_t thread)
2135 {
2136         return thread_is_64bit_addr(thread);
2137 }
2138
2139 void
2140 ml_delay_on_yield(void)
2141 {
2142 #if DEVELOPMENT || DEBUG
2143         if (yield_delay_us) {
2144                 delay(yield_delay_us);
2145         }
2146 #endif
2147 }
2148
2149 void
2150 ml_timer_evaluate(void)
2151 {
2152 }
2153
2154 boolean_t
2155 ml_timer_forced_evaluation(void)
2156 {
2157         return FALSE;
2158 }
2159
2160 uint64_t
2161 ml_energy_stat(thread_t t)
2162 {
2163         return t->machine.energy_estimate_nj;
2164 }
2165
2166
2167 void
2168 ml_gpu_stat_update(__unused uint64_t gpu_ns_delta)
2169 {
2170         /*
2171          * For now: update the resource coalition stats of the
2172          * current thread's coalition
2173          */
2174         task_coalition_update_gpu_stats(current_task(), gpu_ns_delta);
2175 }
2176
2177 uint64_t
2178 ml_gpu_stat(__unused thread_t t)
2179 {
2180         return 0;
2181 }
2182
2183 #if !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME || HAS_FAST_CNTVCT
2184
2185 static void
2186 timer_state_event(boolean_t switch_to_kernel)
2187 {
2188         thread_t thread = current_thread();
2189         if (!thread->precise_user_kernel_time) {
2190                 return;
2191         }
2192
2193         processor_t pd = current_processor();
2194         uint64_t now = ml_get_speculative_timebase();
2195
2196         timer_stop(pd->current_state, now);
2197         pd->current_state = (switch_to_kernel) ? &pd->system_state : &pd->user_state;
2198         timer_start(pd->current_state, now);
2199
2200         timer_stop(pd->thread_timer, now);
2201         pd->thread_timer = (switch_to_kernel) ? &thread->system_timer : &thread->user_timer;
2202         timer_start(pd->thread_timer, now);
2203 }
2204
2205 void
2206 timer_state_event_user_to_kernel(void)
2207 {
2208         timer_state_event(TRUE);
2209 }
2210
2211 void
2212 timer_state_event_kernel_to_user(void)
2213 {
2214         timer_state_event(FALSE);
2215 }
2216 #endif /* !CONFIG_SKIP_PRECISE_USER_KERNEL_TIME || HAS_FAST_CNTVCT */
2217
2218 /*
2219  * The following are required for parts of the kernel
2220  * that cannot resolve these functions as inlines:
2221  */
2222 extern thread_t current_act(void) __attribute__((const));
2223 thread_t
2224 current_act(void)
2225 {
2226         return current_thread_fast();
2227 }
2228
2229 #undef current_thread
2230 extern thread_t current_thread(void) __attribute__((const));
2231 thread_t
2232 current_thread(void)
2233 {
2234         return current_thread_fast();
2235 }
2236
2237 typedef struct{
2238         ex_cb_t         cb;
2239         void            *refcon;
2240 }
2241 ex_cb_info_t;
2242
2243 ex_cb_info_t ex_cb_info[EXCB_CLASS_MAX];
2244
2245 /*
2246  * Callback registration
2247  * Currently we support only one registered callback per class but
2248  * it should be possible to support more callbacks
2249  */
2250 kern_return_t
2251 ex_cb_register(
2252         ex_cb_class_t   cb_class,
2253         ex_cb_t                 cb,
2254         void                    *refcon)
2255 {
2256         ex_cb_info_t *pInfo = &ex_cb_info[cb_class];
2257
2258         if ((NULL == cb) || (cb_class >= EXCB_CLASS_MAX)) {
2259                 return KERN_INVALID_VALUE;
2260         }
2261
2262         if (NULL == pInfo->cb) {
2263                 pInfo->cb = cb;
2264                 pInfo->refcon = refcon;
2265                 return KERN_SUCCESS;
2266         }
2267         return KERN_FAILURE;
2268 }
2269
2270 /*
2271  * Called internally by platform kernel to invoke the registered callback for class
2272  */
2273 ex_cb_action_t
2274 ex_cb_invoke(
2275         ex_cb_class_t   cb_class,
2276         vm_offset_t             far)
2277 {
2278         ex_cb_info_t *pInfo = &ex_cb_info[cb_class];
2279         ex_cb_state_t state = {far};
2280
2281         if (cb_class >= EXCB_CLASS_MAX) {
2282                 panic("Invalid exception callback class 0x%x\n", cb_class);
2283         }
2284
2285         if (pInfo->cb) {
2286                 return pInfo->cb(cb_class, pInfo->refcon, &state);
2287         }
2288         return EXCB_ACTION_NONE;
2289 }
2290
2291 #if defined(HAS_APPLE_PAC)
2292 static inline bool
2293 cpu_supports_userkeyen()
2294 {
2295 #if defined(APPLEFIRESTORM)
2296         return __builtin_arm_rsr64(ARM64_REG_APCTL_EL1) & APCTL_EL1_UserKeyEn;
2297 #elif HAS_APCTL_EL1_USERKEYEN
2298         return true;
2299 #else
2300         return false;
2301 #endif
2302 }
2303
2304 /**
2305  * Returns the default JOP key.  Depending on how the CPU diversifies userspace
2306  * JOP keys, this value may reflect either KERNKeyLo or APIAKeyLo.
2307  */
2308 uint64_t
2309 ml_default_jop_pid(void)
2310 {
2311         if (cpu_supports_userkeyen()) {
2312                 return KERNEL_KERNKEY_ID;
2313         } else {
2314                 return KERNEL_JOP_ID;
2315         }
2316 }
2317
2318 void
2319 ml_task_set_disable_user_jop(task_t task, uint8_t disable_user_jop)
2320 {
2321         assert(task);
2322         task->disable_user_jop = disable_user_jop;
2323 }
2324
2325 void
2326 ml_thread_set_disable_user_jop(thread_t thread, uint8_t disable_user_jop)
2327 {
2328         assert(thread);
2329         thread->machine.disable_user_jop = disable_user_jop;
2330 }
2331
2332 void
2333 ml_task_set_rop_pid(task_t task, task_t parent_task, boolean_t inherit)
2334 {
2335         if (inherit) {
2336                 task->rop_pid = parent_task->rop_pid;
2337         } else {
2338                 task->rop_pid = early_random();
2339         }
2340 }
2341
2342 /**
2343  * jop_pid may be inherited from the parent task or generated inside the shared
2344  * region.  Unfortunately these two parameters are available at very different
2345  * times during task creation, so we need to split this into two steps.
2346  */
2347 void
2348 ml_task_set_jop_pid(task_t task, task_t parent_task, boolean_t inherit)
2349 {
2350         if (inherit) {
2351                 task->jop_pid = parent_task->jop_pid;
2352         } else {
2353                 task->jop_pid = ml_default_jop_pid();
2354         }
2355 }
2356
2357 void
2358 ml_task_set_jop_pid_from_shared_region(task_t task)
2359 {
2360         vm_shared_region_t sr = vm_shared_region_get(task);
2361         /*
2362          * If there's no shared region, we can assign the key arbitrarily.  This
2363          * typically happens when Mach-O image activation failed part of the way
2364          * through, and this task is in the middle of dying with SIGKILL anyway.
2365          */
2366         if (__improbable(!sr)) {
2367                 task->jop_pid = early_random();
2368                 return;
2369         }
2370         vm_shared_region_deallocate(sr);
2371
2372         /*
2373          * Similarly we have to worry about jetsam having killed the task and
2374          * already cleared the shared_region_id.
2375          */
2376         task_lock(task);
2377         if (task->shared_region_id != NULL) {
2378                 task->jop_pid = shared_region_find_key(task->shared_region_id);
2379         } else {
2380                 task->jop_pid = early_random();
2381         }
2382         task_unlock(task);
2383 }
2384
2385 void
2386 ml_thread_set_jop_pid(thread_t thread, task_t task)
2387 {
2388         thread->machine.jop_pid = task->jop_pid;
2389 }
2390 #endif /* defined(HAS_APPLE_PAC) */
2391
2392 #if defined(HAS_APPLE_PAC)
2393 #define _ml_auth_ptr_unchecked(_ptr, _suffix, _modifier) \
2394         asm volatile ("aut" #_suffix " %[ptr], %[modifier]" : [ptr] "+r"(_ptr) : [modifier] "r"(_modifier));
2395
2396 /*
2397  * ml_auth_ptr_unchecked: call this instead of ptrauth_auth_data
2398  * instrinsic when you don't want to trap on auth fail.
2399  *
2400  */
2401 void *
2402 ml_auth_ptr_unchecked(void *ptr, ptrauth_key key, uint64_t modifier)
2403 {
2404         switch (key & 0x3) {
2405         case ptrauth_key_asia:
2406                 _ml_auth_ptr_unchecked(ptr, ia, modifier);
2407                 break;
2408         case ptrauth_key_asib:
2409                 _ml_auth_ptr_unchecked(ptr, ib, modifier);
2410                 break;
2411         case ptrauth_key_asda:
2412                 _ml_auth_ptr_unchecked(ptr, da, modifier);
2413                 break;
2414         case ptrauth_key_asdb:
2415                 _ml_auth_ptr_unchecked(ptr, db, modifier);
2416                 break;
2417         }
2418
2419         return ptr;
2420 }
2421 #endif /* defined(HAS_APPLE_PAC) */
2422
2423 #ifdef CONFIG_XNUPOST
2424 void
2425 ml_expect_fault_begin(expected_fault_handler_t expected_fault_handler, uintptr_t expected_fault_addr)
2426 {
2427         thread_t thread = current_thread();
2428         thread->machine.expected_fault_handler = expected_fault_handler;
2429         thread->machine.expected_fault_addr = expected_fault_addr;
2430 }
2431
2432 void
2433 ml_expect_fault_end(void)
2434 {
2435         thread_t thread = current_thread();
2436         thread->machine.expected_fault_handler = NULL;
2437         thread->machine.expected_fault_addr = 0;
2438 }
2439 #endif /* CONFIG_XNUPOST */
2440
2441 void
2442 ml_hibernate_active_pre(void)
2443 {
2444 #if HIBERNATION
2445         if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) {
2446
2447                 hibernate_rebuild_vm_structs();
2448         }
2449 #endif /* HIBERNATION */
2450 }
2451
2452 void
2453 ml_hibernate_active_post(void)
2454 {
2455 #if HIBERNATION
2456         if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) {
2457                 hibernate_machine_init();
2458                 hibernate_vm_lock_end();
2459                 current_cpu_datap()->cpu_hibernate = 0;
2460         }
2461 #endif /* HIBERNATION */
2462 }
2463
2464 /**
2465  * Return back a machine-dependent array of address space regions that should be
2466  * reserved by the VM (pre-mapped in the address space). This will prevent user
2467  * processes from allocating or deallocating from within these regions.
2468  *
2469  * @param vm_is64bit True if the process has a 64-bit address space.
2470  * @param regions An out parameter representing an array of regions to reserve.
2471  *
2472  * @return The number of reserved regions returned through `regions`.
2473  */
2474 size_t
2475 ml_get_vm_reserved_regions(bool vm_is64bit, struct vm_reserved_region **regions)
2476 {
2477         assert(regions != NULL);
2478
2479         /**
2480          * Reserved regions only apply to 64-bit address spaces. This is because
2481          * we only expect to grow the maximum user VA address on 64-bit address spaces
2482          * (we've essentially already reached the max for 32-bit spaces). The reserved
2483          * regions should safely fall outside of the max user VA for 32-bit processes.
2484          */
2485         if (vm_is64bit) {
2486                 *regions = vm_reserved_regions;
2487                 return ARRAY_COUNT(vm_reserved_regions);
2488         } else {
2489                 /* Don't reserve any VA regions on arm64_32 processes. */
2490                 *regions = NULL;
2491                 return 0;
2492         }
2493 }
2494 /* These WFE recommendations are expected to be updated on a relatively
2495  * infrequent cadence, possibly from a different cluster, hence
2496  * false cacheline sharing isn't expected to be material
2497  */
2498 static uint64_t arm64_cluster_wfe_recs[MAX_CPU_CLUSTERS];
2499
2500 uint32_t
2501 ml_update_cluster_wfe_recommendation(uint32_t wfe_cluster_id, uint64_t wfe_timeout_abstime_interval, __unused uint64_t wfe_hint_flags)
2502 {
2503         assert(wfe_cluster_id < MAX_CPU_CLUSTERS);
2504         assert(wfe_timeout_abstime_interval <= ml_wfe_hint_max_interval);
2505         os_atomic_store(&arm64_cluster_wfe_recs[wfe_cluster_id], wfe_timeout_abstime_interval, relaxed);
2506         return 0; /* Success */
2507 }
2508
2509 uint64_t
2510 ml_cluster_wfe_timeout(uint32_t wfe_cluster_id)
2511 {
2512         /* This and its consumer does not synchronize vis-a-vis updates
2513          * of the recommendation; races are acceptable.
2514          */
2515         uint64_t wfet = os_atomic_load(&arm64_cluster_wfe_recs[wfe_cluster_id], relaxed);
2516         return wfet;
2517 }