osfmk/i386/machine_routines.c

   1 /*
   2  * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <i386/machine_routines.h>
  30 #include <i386/io_map_entries.h>
  31 #include <i386/cpuid.h>
  32 #include <i386/fpu.h>
  33 #include <mach/processor.h>
  34 #include <kern/processor.h>
  35 #include <kern/machine.h>
  36
  37 #include <kern/cpu_number.h>
  38 #include <kern/thread.h>
  39 #include <kern/thread_call.h>
  40 #include <kern/policy_internal.h>
  41
  42 #include <prng/random.h>
  43 #include <prng/entropy.h>
  44 #include <i386/machine_cpu.h>
  45 #include <i386/lapic.h>
  46 #include <i386/bit_routines.h>
  47 #include <i386/mp_events.h>
  48 #include <i386/pmCPU.h>
  49 #include <i386/trap.h>
  50 #include <i386/tsc.h>
  51 #include <i386/cpu_threads.h>
  52 #include <i386/proc_reg.h>
  53 #include <mach/vm_param.h>
  54 #include <i386/pmap.h>
  55 #include <i386/pmap_internal.h>
  56 #include <i386/misc_protos.h>
  57 #include <kern/timer_queue.h>
  58 #include <vm/vm_map.h>
  59 #if KPC
  60 #include <kern/kpc.h>
  61 #endif
  62 #include <architecture/i386/pio.h>
  63 #include <i386/cpu_data.h>
  64 #if DEBUG
  65 #define DBG(x...)       kprintf("DBG: " x)
  66 #else
  67 #define DBG(x...)
  68 #endif
  69
  70 #if MONOTONIC
  71 #include <kern/monotonic.h>
  72 #endif /* MONOTONIC */
  73
  74 extern void     wakeup(void *);
  75
  76 uint64_t        LockTimeOut;
  77 uint64_t        TLBTimeOut;
  78 uint64_t        LockTimeOutTSC;
  79 uint32_t        LockTimeOutUsec;
  80 uint64_t        MutexSpin;
  81 uint64_t        low_MutexSpin;
  82 int64_t         high_MutexSpin;
  83 uint64_t        LastDebuggerEntryAllowance;
  84 uint64_t        delay_spin_threshold;
  85
  86 extern uint64_t panic_restart_timeout;
  87
  88 boolean_t virtualized = FALSE;
  89
  90 decl_simple_lock_data(static, ml_timer_evaluation_slock);
  91 uint32_t ml_timer_eager_evaluations;
  92 uint64_t ml_timer_eager_evaluation_max;
  93 static boolean_t ml_timer_evaluation_in_progress = FALSE;
  94
  95 LCK_GRP_DECLARE(max_cpus_grp, "max_cpus");
  96 LCK_MTX_DECLARE(max_cpus_lock, &max_cpus_grp);
  97 static int max_cpus_initialized = 0;
  98 #define MAX_CPUS_SET    0x1
  99 #define MAX_CPUS_WAIT   0x2
 100
 101 /* IO memory map services */
 102
 103 /* Map memory map IO space */
 104 vm_offset_t
 105 ml_io_map(
 106         vm_offset_t phys_addr,
 107         vm_size_t size)
 108 {
 109         return io_map(phys_addr, size, VM_WIMG_IO);
 110 }
 111
 112 /* boot memory allocation */
 113 vm_offset_t
 114 ml_static_malloc(
 115         __unused vm_size_t size)
 116 {
 117         return (vm_offset_t)NULL;
 118 }
 119
 120
 121 void
 122 ml_get_bouncepool_info(vm_offset_t *phys_addr, vm_size_t *size)
 123 {
 124         *phys_addr = 0;
 125         *size      = 0;
 126 }
 127
 128
 129 vm_offset_t
 130 ml_static_ptovirt(
 131         vm_offset_t paddr)
 132 {
 133 #if defined(__x86_64__)
 134         return (vm_offset_t)(((unsigned long) paddr) | VM_MIN_KERNEL_ADDRESS);
 135 #else
 136         return (vm_offset_t)((paddr) | LINEAR_KERNEL_ADDRESS);
 137 #endif
 138 }
 139
 140 vm_offset_t
 141 ml_static_slide(
 142         vm_offset_t vaddr)
 143 {
 144         return VM_KERNEL_SLIDE(vaddr);
 145 }
 146
 147 /*
 148  * base must be page-aligned, and size must be a multiple of PAGE_SIZE
 149  */
 150 kern_return_t
 151 ml_static_verify_page_protections(
 152         uint64_t base, uint64_t size, vm_prot_t prot)
 153 {
 154         vm_prot_t pageprot;
 155         uint64_t offset;
 156
 157         DBG("ml_static_verify_page_protections: vaddr 0x%llx sz 0x%llx prot 0x%x\n", base, size, prot);
 158
 159         /*
 160          * base must be within the static bounds, defined to be:
 161          * (vm_kernel_stext, kc_highest_nonlinkedit_vmaddr)
 162          */
 163 #if DEVELOPMENT || DEBUG || KASAN
 164         assert(kc_highest_nonlinkedit_vmaddr > 0 && base > vm_kernel_stext && base < kc_highest_nonlinkedit_vmaddr);
 165 #else   /* On release kernels, assume this is a protection mismatch failure. */
 166         if (kc_highest_nonlinkedit_vmaddr == 0 || base < vm_kernel_stext || base >= kc_highest_nonlinkedit_vmaddr) {
 167                 return KERN_FAILURE;
 168         }
 169 #endif
 170
 171         for (offset = 0; offset < size; offset += PAGE_SIZE) {
 172                 if (pmap_get_prot(kernel_pmap, base + offset, &pageprot) == KERN_FAILURE) {
 173                         return KERN_FAILURE;
 174                 }
 175                 if ((pageprot & prot) != prot) {
 176                         return KERN_FAILURE;
 177                 }
 178         }
 179
 180         return KERN_SUCCESS;
 181 }
 182
 183 vm_offset_t
 184 ml_static_unslide(
 185         vm_offset_t vaddr)
 186 {
 187         return VM_KERNEL_UNSLIDE(vaddr);
 188 }
 189
 190 /*
 191  * Reclaim memory, by virtual address, that was used in early boot that is no longer needed
 192  * by the kernel.
 193  */
 194 void
 195 ml_static_mfree(
 196         vm_offset_t vaddr,
 197         vm_size_t size)
 198 {
 199         addr64_t vaddr_cur;
 200         ppnum_t ppn;
 201         uint32_t freed_pages = 0;
 202         vm_size_t map_size;
 203
 204         assert(vaddr >= VM_MIN_KERNEL_ADDRESS);
 205
 206         assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
 207
 208         for (vaddr_cur = vaddr; vaddr_cur < round_page_64(vaddr + size);) {
 209                 map_size = pmap_query_pagesize(kernel_pmap, vaddr_cur);
 210
 211                 /* just skip if nothing mapped here */
 212                 if (map_size == 0) {
 213                         vaddr_cur += PAGE_SIZE;
 214                         continue;
 215                 }
 216
 217                 /*
 218                  * Can't free from the middle of a large page.
 219                  */
 220                 assert((vaddr_cur & (map_size - 1)) == 0);
 221
 222                 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
 223                 assert(ppn != (ppnum_t)NULL);
 224
 225                 pmap_remove(kernel_pmap, vaddr_cur, vaddr_cur + map_size);
 226                 while (map_size > 0) {
 227                         if (++kernel_pmap->stats.resident_count > kernel_pmap->stats.resident_max) {
 228                                 kernel_pmap->stats.resident_max = kernel_pmap->stats.resident_count;
 229                         }
 230
 231                         assert(pmap_valid_page(ppn));
 232                         if (IS_MANAGED_PAGE(ppn)) {
 233                                 vm_page_create(ppn, (ppn + 1));
 234                                 freed_pages++;
 235                         }
 236                         map_size -= PAGE_SIZE;
 237                         vaddr_cur += PAGE_SIZE;
 238                         ppn++;
 239                 }
 240         }
 241         vm_page_lockspin_queues();
 242         vm_page_wire_count -= freed_pages;
 243         vm_page_wire_count_initial -= freed_pages;
 244         if (vm_page_wire_count_on_boot != 0) {
 245                 assert(vm_page_wire_count_on_boot >= freed_pages);
 246                 vm_page_wire_count_on_boot -= freed_pages;
 247         }
 248         vm_page_unlock_queues();
 249
 250 #if     DEBUG
 251         kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn);
 252 #endif
 253 }
 254
 255 /* Change page protections for addresses previously loaded by efiboot */
 256 kern_return_t
 257 ml_static_protect(vm_offset_t vmaddr, vm_size_t size, vm_prot_t prot)
 258 {
 259         boolean_t NX = !!!(prot & VM_PROT_EXECUTE), ro = !!!(prot & VM_PROT_WRITE);
 260
 261         assert(prot & VM_PROT_READ);
 262
 263         pmap_mark_range(kernel_pmap, vmaddr, size, NX, ro);
 264
 265         return KERN_SUCCESS;
 266 }
 267
 268 /* virtual to physical on wired pages */
 269 vm_offset_t
 270 ml_vtophys(
 271         vm_offset_t vaddr)
 272 {
 273         return (vm_offset_t)kvtophys(vaddr);
 274 }
 275
 276 /*
 277  *      Routine:        ml_nofault_copy
 278  *      Function:       Perform a physical mode copy if the source and
 279  *                      destination have valid translations in the kernel pmap.
 280  *                      If translations are present, they are assumed to
 281  *                      be wired; i.e. no attempt is made to guarantee that the
 282  *                      translations obtained remained valid for
 283  *                      the duration of the copy process.
 284  */
 285
 286 vm_size_t
 287 ml_nofault_copy(
 288         vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size)
 289 {
 290         addr64_t cur_phys_dst, cur_phys_src;
 291         uint32_t count, nbytes = 0;
 292
 293         while (size > 0) {
 294                 if (!(cur_phys_src = kvtophys(virtsrc))) {
 295                         break;
 296                 }
 297                 if (!(cur_phys_dst = kvtophys(virtdst))) {
 298                         break;
 299                 }
 300                 if (!pmap_valid_page(i386_btop(cur_phys_dst)) || !pmap_valid_page(i386_btop(cur_phys_src))) {
 301                         break;
 302                 }
 303                 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
 304                 if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK))) {
 305                         count = (uint32_t)(PAGE_SIZE - (cur_phys_dst & PAGE_MASK));
 306                 }
 307                 if (count > size) {
 308                         count = (uint32_t)size;
 309                 }
 310
 311                 bcopy_phys(cur_phys_src, cur_phys_dst, count);
 312
 313                 nbytes += count;
 314                 virtsrc += count;
 315                 virtdst += count;
 316                 size -= count;
 317         }
 318
 319         return nbytes;
 320 }
 321
 322 /*
 323  *      Routine:        ml_validate_nofault
 324  *      Function: Validate that ths address range has a valid translations
 325  *                      in the kernel pmap.  If translations are present, they are
 326  *                      assumed to be wired; i.e. no attempt is made to guarantee
 327  *                      that the translation persist after the check.
 328  *  Returns: TRUE if the range is mapped and will not cause a fault,
 329  *                      FALSE otherwise.
 330  */
 331
 332 boolean_t
 333 ml_validate_nofault(
 334         vm_offset_t virtsrc, vm_size_t size)
 335 {
 336         addr64_t cur_phys_src;
 337         uint32_t count;
 338
 339         while (size > 0) {
 340                 if (!(cur_phys_src = kvtophys(virtsrc))) {
 341                         return FALSE;
 342                 }
 343                 if (!pmap_valid_page(i386_btop(cur_phys_src))) {
 344                         return FALSE;
 345                 }
 346                 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
 347                 if (count > size) {
 348                         count = (uint32_t)size;
 349                 }
 350
 351                 virtsrc += count;
 352                 size -= count;
 353         }
 354
 355         return TRUE;
 356 }
 357
 358 /* Interrupt handling */
 359
 360 /* Initialize Interrupts */
 361 void
 362 ml_init_interrupt(void)
 363 {
 364         (void) ml_set_interrupts_enabled(TRUE);
 365 }
 366
 367
 368 /* Get Interrupts Enabled */
 369 boolean_t
 370 ml_get_interrupts_enabled(void)
 371 {
 372         unsigned long flags;
 373
 374         __asm__ volatile ("pushf; pop   %0":  "=r" (flags));
 375         return (flags & EFL_IF) != 0;
 376 }
 377
 378 /* Set Interrupts Enabled */
 379 boolean_t
 380 ml_set_interrupts_enabled(boolean_t enable)
 381 {
 382         unsigned long flags;
 383         boolean_t istate;
 384
 385         __asm__ volatile ("pushf; pop   %0"  :  "=r" (flags));
 386
 387         assert(get_interrupt_level() ? (enable == FALSE) : TRUE);
 388
 389         istate = ((flags & EFL_IF) != 0);
 390
 391         if (enable) {
 392                 __asm__ volatile ("sti;nop");
 393
 394                 if ((get_preemption_level() == 0) && (*ast_pending() & AST_URGENT)) {
 395                         __asm__ volatile ("int %0" :: "N" (T_PREEMPT));
 396                 }
 397         } else {
 398                 if (istate) {
 399                         __asm__ volatile ("cli");
 400                 }
 401         }
 402
 403         return istate;
 404 }
 405
 406 /* Early Set Interrupts Enabled */
 407 boolean_t
 408 ml_early_set_interrupts_enabled(boolean_t enable)
 409 {
 410         if (enable == TRUE) {
 411                 kprintf("Caller attempted to enable interrupts too early in "
 412                     "kernel startup. Halting.\n");
 413                 hlt();
 414                 /*NOTREACHED*/
 415         }
 416
 417         /* On x86, do not allow interrupts to be enabled very early */
 418         return FALSE;
 419 }
 420
 421 /* Check if running at interrupt context */
 422 boolean_t
 423 ml_at_interrupt_context(void)
 424 {
 425         return get_interrupt_level() != 0;
 426 }
 427
 428 void
 429 ml_get_power_state(boolean_t *icp, boolean_t *pidlep)
 430 {
 431         *icp = (get_interrupt_level() != 0);
 432         /* These will be technically inaccurate for interrupts that occur
 433          * successively within a single "idle exit" event, but shouldn't
 434          * matter statistically.
 435          */
 436         *pidlep = (current_cpu_datap()->lcpu.package->num_idle == topoParms.nLThreadsPerPackage);
 437 }
 438
 439 /* Generate a fake interrupt */
 440 __dead2
 441 void
 442 ml_cause_interrupt(void)
 443 {
 444         panic("ml_cause_interrupt not defined yet on Intel");
 445 }
 446
 447 /*
 448  * TODO: transition users of this to kernel_thread_start_priority
 449  * ml_thread_policy is an unsupported KPI
 450  */
 451 void
 452 ml_thread_policy(
 453         thread_t thread,
 454         __unused        unsigned policy_id,
 455         unsigned policy_info)
 456 {
 457         if (policy_info & MACHINE_NETWORK_WORKLOOP) {
 458                 thread_precedence_policy_data_t info;
 459                 __assert_only kern_return_t kret;
 460
 461                 info.importance = 1;
 462
 463                 kret = thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY,
 464                     (thread_policy_t)&info,
 465                     THREAD_PRECEDENCE_POLICY_COUNT);
 466                 assert(kret == KERN_SUCCESS);
 467         }
 468 }
 469
 470 /* Initialize Interrupts */
 471 void
 472 ml_install_interrupt_handler(
 473         void *nub,
 474         int source,
 475         void *target,
 476         IOInterruptHandler handler,
 477         void *refCon)
 478 {
 479         boolean_t current_state;
 480
 481         current_state = ml_set_interrupts_enabled(FALSE);
 482
 483         PE_install_interrupt_handler(nub, source, target,
 484             (IOInterruptHandler) handler, refCon);
 485
 486         (void) ml_set_interrupts_enabled(current_state);
 487 }
 488
 489
 490 void
 491 machine_signal_idle(
 492         processor_t processor)
 493 {
 494         cpu_interrupt(processor->cpu_id);
 495 }
 496
 497 __dead2
 498 void
 499 machine_signal_idle_deferred(
 500         __unused processor_t processor)
 501 {
 502         panic("Unimplemented");
 503 }
 504
 505 __dead2
 506 void
 507 machine_signal_idle_cancel(
 508         __unused processor_t processor)
 509 {
 510         panic("Unimplemented");
 511 }
 512
 513 static kern_return_t
 514 register_cpu(
 515         uint32_t        lapic_id,
 516         processor_t     *processor_out,
 517         boolean_t       boot_cpu )
 518 {
 519         int             target_cpu;
 520         cpu_data_t      *this_cpu_datap;
 521
 522         this_cpu_datap = cpu_data_alloc(boot_cpu);
 523         if (this_cpu_datap == NULL) {
 524                 return KERN_FAILURE;
 525         }
 526         target_cpu = this_cpu_datap->cpu_number;
 527         assert((boot_cpu && (target_cpu == 0)) ||
 528             (!boot_cpu && (target_cpu != 0)));
 529
 530         lapic_cpu_map(lapic_id, target_cpu);
 531
 532         /* The cpu_id is not known at registration phase. Just do
 533          * lapic_id for now
 534          */
 535         this_cpu_datap->cpu_phys_number = lapic_id;
 536
 537         this_cpu_datap->cpu_console_buf = console_cpu_alloc(boot_cpu);
 538         if (this_cpu_datap->cpu_console_buf == NULL) {
 539                 goto failed;
 540         }
 541
 542 #if KPC
 543         if (kpc_register_cpu(this_cpu_datap) != TRUE) {
 544                 goto failed;
 545         }
 546 #endif
 547
 548         if (!boot_cpu) {
 549                 cpu_thread_alloc(this_cpu_datap->cpu_number);
 550                 if (this_cpu_datap->lcpu.core == NULL) {
 551                         goto failed;
 552                 }
 553         }
 554
 555         /*
 556          * processor_init() deferred to topology start
 557          * because "slot numbers" a.k.a. logical processor numbers
 558          * are not yet finalized.
 559          */
 560         *processor_out = this_cpu_datap->cpu_processor;
 561
 562         return KERN_SUCCESS;
 563
 564 failed:
 565         console_cpu_free(this_cpu_datap->cpu_console_buf);
 566 #if KPC
 567         kpc_unregister_cpu(this_cpu_datap);
 568 #endif /* KPC */
 569
 570         return KERN_FAILURE;
 571 }
 572
 573
 574 kern_return_t
 575 ml_processor_register(
 576         cpu_id_t        cpu_id,
 577         uint32_t        lapic_id,
 578         processor_t     *processor_out,
 579         boolean_t       boot_cpu,
 580         boolean_t       start )
 581 {
 582         static boolean_t done_topo_sort = FALSE;
 583         static uint32_t num_registered = 0;
 584
 585         /* Register all CPUs first, and track max */
 586         if (start == FALSE) {
 587                 num_registered++;
 588
 589                 DBG( "registering CPU lapic id %d\n", lapic_id );
 590
 591                 return register_cpu( lapic_id, processor_out, boot_cpu );
 592         }
 593
 594         /* Sort by topology before we start anything */
 595         if (!done_topo_sort) {
 596                 DBG( "about to start CPUs. %d registered\n", num_registered );
 597
 598                 cpu_topology_sort( num_registered );
 599                 done_topo_sort = TRUE;
 600         }
 601
 602         /* Assign the cpu ID */
 603         uint32_t cpunum = -1;
 604         cpu_data_t  *this_cpu_datap = NULL;
 605
 606         /* find cpu num and pointer */
 607         cpunum = ml_get_cpuid( lapic_id );
 608
 609         if (cpunum == 0xFFFFFFFF) { /* never heard of it? */
 610                 panic( "trying to start invalid/unregistered CPU %d\n", lapic_id );
 611         }
 612
 613         this_cpu_datap = cpu_datap(cpunum);
 614
 615         /* fix the CPU id */
 616         this_cpu_datap->cpu_id = cpu_id;
 617
 618         /* allocate and initialize other per-cpu structures */
 619         if (!boot_cpu) {
 620                 mp_cpus_call_cpu_init(cpunum);
 621                 random_cpu_init(cpunum);
 622         }
 623
 624         /* output arg */
 625         *processor_out = this_cpu_datap->cpu_processor;
 626
 627         /* OK, try and start this CPU */
 628         return cpu_topology_start_cpu( cpunum );
 629 }
 630
 631
 632 void
 633 ml_cpu_get_info(ml_cpu_info_t *cpu_infop)
 634 {
 635         boolean_t       os_supports_sse;
 636         i386_cpu_info_t *cpuid_infop;
 637
 638         if (cpu_infop == NULL) {
 639                 return;
 640         }
 641
 642         /*
 643          * Are we supporting MMX/SSE/SSE2/SSE3?
 644          * As distinct from whether the cpu has these capabilities.
 645          */
 646         os_supports_sse = !!(get_cr4() & CR4_OSXMM);
 647
 648         if (ml_fpu_avx_enabled()) {
 649                 cpu_infop->vector_unit = 9;
 650         } else if ((cpuid_features() & CPUID_FEATURE_SSE4_2) && os_supports_sse) {
 651                 cpu_infop->vector_unit = 8;
 652         } else if ((cpuid_features() & CPUID_FEATURE_SSE4_1) && os_supports_sse) {
 653                 cpu_infop->vector_unit = 7;
 654         } else if ((cpuid_features() & CPUID_FEATURE_SSSE3) && os_supports_sse) {
 655                 cpu_infop->vector_unit = 6;
 656         } else if ((cpuid_features() & CPUID_FEATURE_SSE3) && os_supports_sse) {
 657                 cpu_infop->vector_unit = 5;
 658         } else if ((cpuid_features() & CPUID_FEATURE_SSE2) && os_supports_sse) {
 659                 cpu_infop->vector_unit = 4;
 660         } else if ((cpuid_features() & CPUID_FEATURE_SSE) && os_supports_sse) {
 661                 cpu_infop->vector_unit = 3;
 662         } else if (cpuid_features() & CPUID_FEATURE_MMX) {
 663                 cpu_infop->vector_unit = 2;
 664         } else {
 665                 cpu_infop->vector_unit = 0;
 666         }
 667
 668         cpuid_infop  = cpuid_info();
 669
 670         cpu_infop->cache_line_size = cpuid_infop->cache_linesize;
 671
 672         cpu_infop->l1_icache_size = cpuid_infop->cache_size[L1I];
 673         cpu_infop->l1_dcache_size = cpuid_infop->cache_size[L1D];
 674
 675         if (cpuid_infop->cache_size[L2U] > 0) {
 676                 cpu_infop->l2_settings = 1;
 677                 cpu_infop->l2_cache_size = cpuid_infop->cache_size[L2U];
 678         } else {
 679                 cpu_infop->l2_settings = 0;
 680                 cpu_infop->l2_cache_size = 0xFFFFFFFF;
 681         }
 682
 683         if (cpuid_infop->cache_size[L3U] > 0) {
 684                 cpu_infop->l3_settings = 1;
 685                 cpu_infop->l3_cache_size = cpuid_infop->cache_size[L3U];
 686         } else {
 687                 cpu_infop->l3_settings = 0;
 688                 cpu_infop->l3_cache_size = 0xFFFFFFFF;
 689         }
 690 }
 691
 692 int
 693 ml_early_cpu_max_number(void)
 694 {
 695         int n = max_ncpus;
 696
 697         assert(startup_phase >= STARTUP_SUB_TUNABLES);
 698         if (max_cpus_from_firmware) {
 699                 n = MIN(n, max_cpus_from_firmware);
 700         }
 701         return n - 1;
 702 }
 703
 704 void
 705 ml_set_max_cpus(unsigned int max_cpus)
 706 {
 707         lck_mtx_lock(&max_cpus_lock);
 708         if (max_cpus_initialized != MAX_CPUS_SET) {
 709                 if (max_cpus > 0 && max_cpus <= MAX_CPUS) {
 710                         /*
 711                          * Note: max_cpus is the number of enabled processors
 712                          * that ACPI found; max_ncpus is the maximum number
 713                          * that the kernel supports or that the "cpus="
 714                          * boot-arg has set. Here we take int minimum.
 715                          */
 716                         machine_info.max_cpus = (integer_t)MIN(max_cpus, max_ncpus);
 717                 }
 718                 if (max_cpus_initialized == MAX_CPUS_WAIT) {
 719                         thread_wakeup((event_t) &max_cpus_initialized);
 720                 }
 721                 max_cpus_initialized = MAX_CPUS_SET;
 722         }
 723         lck_mtx_unlock(&max_cpus_lock);
 724 }
 725
 726 unsigned int
 727 ml_wait_max_cpus(void)
 728 {
 729         lck_mtx_lock(&max_cpus_lock);
 730         while (max_cpus_initialized != MAX_CPUS_SET) {
 731                 max_cpus_initialized = MAX_CPUS_WAIT;
 732                 lck_mtx_sleep(&max_cpus_lock, LCK_SLEEP_DEFAULT, &max_cpus_initialized, THREAD_UNINT);
 733         }
 734         lck_mtx_unlock(&max_cpus_lock);
 735         return machine_info.max_cpus;
 736 }
 737
 738 void
 739 ml_panic_trap_to_debugger(__unused const char *panic_format_str,
 740     __unused va_list *panic_args,
 741     __unused unsigned int reason,
 742     __unused void *ctx,
 743     __unused uint64_t panic_options_mask,
 744     __unused unsigned long panic_caller)
 745 {
 746         return;
 747 }
 748
 749 static uint64_t
 750 virtual_timeout_inflate64(unsigned int vti, uint64_t timeout, uint64_t max_timeout)
 751 {
 752         if (vti >= 64) {
 753                 return max_timeout;
 754         }
 755
 756         if ((timeout << vti) >> vti != timeout) {
 757                 return max_timeout;
 758         }
 759
 760         if ((timeout << vti) > max_timeout) {
 761                 return max_timeout;
 762         }
 763
 764         return timeout << vti;
 765 }
 766
 767 static uint32_t
 768 virtual_timeout_inflate32(unsigned int vti, uint32_t timeout, uint32_t max_timeout)
 769 {
 770         if (vti >= 32) {
 771                 return max_timeout;
 772         }
 773
 774         if ((timeout << vti) >> vti != timeout) {
 775                 return max_timeout;
 776         }
 777
 778         return timeout << vti;
 779 }
 780
 781 /*
 782  * Some timeouts are later adjusted or used in calculations setting
 783  * other values. In order to avoid overflow, cap the max timeout as
 784  * 2^47ns (~39 hours).
 785  */
 786 static const uint64_t max_timeout_ns = 1ULL << 47;
 787
 788 /*
 789  * Inflate a timeout in absolutetime.
 790  */
 791 static uint64_t
 792 virtual_timeout_inflate_abs(unsigned int vti, uint64_t timeout)
 793 {
 794         uint64_t max_timeout;
 795         nanoseconds_to_absolutetime(max_timeout_ns, &max_timeout);
 796         return virtual_timeout_inflate64(vti, timeout, max_timeout);
 797 }
 798
 799 /*
 800  * Inflate a value in TSC ticks.
 801  */
 802 static uint64_t
 803 virtual_timeout_inflate_tsc(unsigned int vti, uint64_t timeout)
 804 {
 805         const uint64_t max_timeout = tmrCvt(max_timeout_ns, tscFCvtn2t);
 806         return virtual_timeout_inflate64(vti, timeout, max_timeout);
 807 }
 808
 809 /*
 810  * Inflate a timeout in microseconds.
 811  */
 812 static uint32_t
 813 virtual_timeout_inflate_us(unsigned int vti, uint64_t timeout)
 814 {
 815         const uint32_t max_timeout = ~0;
 816         return virtual_timeout_inflate32(vti, timeout, max_timeout);
 817 }
 818
 819 /*
 820  *      Routine:        ml_init_lock_timeout
 821  *      Function:
 822  */
 823 void
 824 ml_init_lock_timeout(void)
 825 {
 826         uint64_t        abstime;
 827         uint32_t        mtxspin;
 828 #if DEVELOPMENT || DEBUG
 829         uint64_t        default_timeout_ns = NSEC_PER_SEC >> 2;
 830 #else
 831         uint64_t        default_timeout_ns = NSEC_PER_SEC >> 1;
 832 #endif
 833         uint32_t        slto;
 834         uint32_t        prt;
 835
 836         if (PE_parse_boot_argn("slto_us", &slto, sizeof(slto))) {
 837                 default_timeout_ns = slto * NSEC_PER_USEC;
 838         }
 839
 840         /*
 841          * LockTimeOut is absolutetime, LockTimeOutTSC is in TSC ticks,
 842          * and LockTimeOutUsec is in microseconds and it's 32-bits.
 843          */
 844         LockTimeOutUsec = (uint32_t) (default_timeout_ns / NSEC_PER_USEC);
 845         nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
 846         LockTimeOut = abstime;
 847         LockTimeOutTSC = tmrCvt(abstime, tscFCvtn2t);
 848
 849         /*
 850          * TLBTimeOut dictates the TLB flush timeout period. It defaults to
 851          * LockTimeOut but can be overriden separately. In particular, a
 852          * zero value inhibits the timeout-panic and cuts a trace evnt instead
 853          * - see pmap_flush_tlbs().
 854          */
 855         if (PE_parse_boot_argn("tlbto_us", &slto, sizeof(slto))) {
 856                 default_timeout_ns = slto * NSEC_PER_USEC;
 857                 nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
 858                 TLBTimeOut = (uint32_t) abstime;
 859         } else {
 860                 TLBTimeOut = LockTimeOut;
 861         }
 862
 863 #if DEVELOPMENT || DEBUG
 864         reportphyreaddelayabs = LockTimeOut >> 1;
 865 #endif
 866         if (PE_parse_boot_argn("phyreadmaxus", &slto, sizeof(slto))) {
 867                 default_timeout_ns = slto * NSEC_PER_USEC;
 868                 nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
 869                 reportphyreaddelayabs = abstime;
 870         }
 871
 872         if (PE_parse_boot_argn("phywritemaxus", &slto, sizeof(slto))) {
 873                 nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
 874                 reportphywritedelayabs = abstime;
 875         }
 876
 877         if (PE_parse_boot_argn("tracephyreadus", &slto, sizeof(slto))) {
 878                 nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
 879                 tracephyreaddelayabs = abstime;
 880         }
 881
 882         if (PE_parse_boot_argn("tracephywriteus", &slto, sizeof(slto))) {
 883                 nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
 884                 tracephywritedelayabs = abstime;
 885         }
 886
 887         if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof(mtxspin))) {
 888                 if (mtxspin > USEC_PER_SEC >> 4) {
 889                         mtxspin =  USEC_PER_SEC >> 4;
 890                 }
 891                 nanoseconds_to_absolutetime(mtxspin * NSEC_PER_USEC, &abstime);
 892         } else {
 893                 nanoseconds_to_absolutetime(10 * NSEC_PER_USEC, &abstime);
 894         }
 895         MutexSpin = (unsigned int)abstime;
 896         low_MutexSpin = MutexSpin;
 897         /*
 898          * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
 899          * real_ncpus is not set at this time
 900          */
 901         high_MutexSpin = -1;
 902
 903         nanoseconds_to_absolutetime(4ULL * NSEC_PER_SEC, &LastDebuggerEntryAllowance);
 904         if (PE_parse_boot_argn("panic_restart_timeout", &prt, sizeof(prt))) {
 905                 nanoseconds_to_absolutetime(prt * NSEC_PER_SEC, &panic_restart_timeout);
 906         }
 907
 908         virtualized = ((cpuid_features() & CPUID_FEATURE_VMM) != 0);
 909         if (virtualized) {
 910                 unsigned int vti;
 911
 912                 if (!PE_parse_boot_argn("vti", &vti, sizeof(vti))) {
 913                         vti = 6;
 914                 }
 915                 printf("Timeouts adjusted for virtualization (<<%d)\n", vti);
 916                 kprintf("Timeouts adjusted for virtualization (<<%d):\n", vti);
 917 #define VIRTUAL_TIMEOUT_INFLATE_ABS(_timeout)              \
 918 MACRO_BEGIN                                                \
 919         kprintf("%24s: 0x%016llx ", #_timeout, _timeout);      \
 920         _timeout = virtual_timeout_inflate_abs(vti, _timeout); \
 921         kprintf("-> 0x%016llx\n",  _timeout);                  \
 922 MACRO_END
 923
 924 #define VIRTUAL_TIMEOUT_INFLATE_TSC(_timeout)              \
 925 MACRO_BEGIN                                                \
 926         kprintf("%24s: 0x%016llx ", #_timeout, _timeout);      \
 927         _timeout = virtual_timeout_inflate_tsc(vti, _timeout); \
 928         kprintf("-> 0x%016llx\n",  _timeout);                  \
 929 MACRO_END
 930 #define VIRTUAL_TIMEOUT_INFLATE_US(_timeout)               \
 931 MACRO_BEGIN                                                \
 932         kprintf("%24s:         0x%08x ", #_timeout, _timeout); \
 933         _timeout = virtual_timeout_inflate_us(vti, _timeout);  \
 934         kprintf("-> 0x%08x\n",  _timeout);                     \
 935 MACRO_END
 936                 VIRTUAL_TIMEOUT_INFLATE_US(LockTimeOutUsec);
 937                 VIRTUAL_TIMEOUT_INFLATE_ABS(LockTimeOut);
 938                 VIRTUAL_TIMEOUT_INFLATE_TSC(LockTimeOutTSC);
 939                 VIRTUAL_TIMEOUT_INFLATE_ABS(TLBTimeOut);
 940                 VIRTUAL_TIMEOUT_INFLATE_ABS(MutexSpin);
 941                 VIRTUAL_TIMEOUT_INFLATE_ABS(low_MutexSpin);
 942                 VIRTUAL_TIMEOUT_INFLATE_ABS(reportphyreaddelayabs);
 943         }
 944
 945         interrupt_latency_tracker_setup();
 946         simple_lock_init(&ml_timer_evaluation_slock, 0);
 947 }
 948
 949 /*
 950  * Threshold above which we should attempt to block
 951  * instead of spinning for clock_delay_until().
 952  */
 953
 954 void
 955 ml_init_delay_spin_threshold(int threshold_us)
 956 {
 957         nanoseconds_to_absolutetime(threshold_us * NSEC_PER_USEC, &delay_spin_threshold);
 958 }
 959
 960 boolean_t
 961 ml_delay_should_spin(uint64_t interval)
 962 {
 963         return (interval < delay_spin_threshold) ? TRUE : FALSE;
 964 }
 965
 966 TUNABLE(uint32_t, yield_delay_us, "yield_delay_us", 0);
 967
 968 void
 969 ml_delay_on_yield(void)
 970 {
 971 #if DEVELOPMENT || DEBUG
 972         if (yield_delay_us) {
 973                 delay(yield_delay_us);
 974         }
 975 #endif
 976 }
 977
 978 /*
 979  * This is called from the machine-independent layer
 980  * to perform machine-dependent info updates. Defer to cpu_thread_init().
 981  */
 982 void
 983 ml_cpu_up(void)
 984 {
 985         return;
 986 }
 987
 988 /*
 989  * This is called from the machine-independent layer
 990  * to perform machine-dependent info updates.
 991  */
 992 void
 993 ml_cpu_down(void)
 994 {
 995         i386_deactivate_cpu();
 996
 997         return;
 998 }
 999
1000 /*
1001  * The following are required for parts of the kernel
1002  * that cannot resolve these functions as inlines:
1003  */
1004 extern thread_t current_act(void) __attribute__((const));
1005 thread_t
1006 current_act(void)
1007 {
1008         return current_thread_fast();
1009 }
1010
1011 #undef current_thread
1012 extern thread_t current_thread(void) __attribute__((const));
1013 thread_t
1014 current_thread(void)
1015 {
1016         return current_thread_fast();
1017 }
1018
1019
1020 boolean_t
1021 ml_is64bit(void)
1022 {
1023         return cpu_mode_is64bit();
1024 }
1025
1026
1027 boolean_t
1028 ml_thread_is64bit(thread_t thread)
1029 {
1030         return thread_is_64bit_addr(thread);
1031 }
1032
1033
1034 boolean_t
1035 ml_state_is64bit(void *saved_state)
1036 {
1037         return is_saved_state64(saved_state);
1038 }
1039
1040 void
1041 ml_cpu_set_ldt(int selector)
1042 {
1043         /*
1044          * Avoid loading the LDT
1045          * if we're setting the KERNEL LDT and it's already set.
1046          */
1047         if (selector == KERNEL_LDT &&
1048             current_cpu_datap()->cpu_ldt == KERNEL_LDT) {
1049                 return;
1050         }
1051
1052         lldt(selector);
1053         current_cpu_datap()->cpu_ldt = selector;
1054 }
1055
1056 void
1057 ml_fp_setvalid(boolean_t value)
1058 {
1059         fp_setvalid(value);
1060 }
1061
1062 uint64_t
1063 ml_cpu_int_event_time(void)
1064 {
1065         return current_cpu_datap()->cpu_int_event_time;
1066 }
1067
1068 vm_offset_t
1069 ml_stack_remaining(void)
1070 {
1071         uintptr_t local = (uintptr_t) &local;
1072
1073         if (ml_at_interrupt_context() != 0) {
1074                 return local - (current_cpu_datap()->cpu_int_stack_top - INTSTACK_SIZE);
1075         } else {
1076                 return local - current_thread()->kernel_stack;
1077         }
1078 }
1079
1080 #if KASAN
1081 vm_offset_t ml_stack_base(void);
1082 vm_size_t ml_stack_size(void);
1083
1084 vm_offset_t
1085 ml_stack_base(void)
1086 {
1087         if (ml_at_interrupt_context()) {
1088                 return current_cpu_datap()->cpu_int_stack_top - INTSTACK_SIZE;
1089         } else {
1090                 return current_thread()->kernel_stack;
1091         }
1092 }
1093
1094 vm_size_t
1095 ml_stack_size(void)
1096 {
1097         if (ml_at_interrupt_context()) {
1098                 return INTSTACK_SIZE;
1099         } else {
1100                 return kernel_stack_size;
1101         }
1102 }
1103 #endif
1104
1105 void
1106 kernel_preempt_check(void)
1107 {
1108         boolean_t       intr;
1109         unsigned long flags;
1110
1111         assert(get_preemption_level() == 0);
1112
1113         if (__improbable(*ast_pending() & AST_URGENT)) {
1114                 /*
1115                  * can handle interrupts and preemptions
1116                  * at this point
1117                  */
1118                 __asm__ volatile ("pushf; pop   %0"  :  "=r" (flags));
1119
1120                 intr = ((flags & EFL_IF) != 0);
1121
1122                 /*
1123                  * now cause the PRE-EMPTION trap
1124                  */
1125                 if (intr == TRUE) {
1126                         __asm__ volatile ("int %0" :: "N" (T_PREEMPT));
1127                 }
1128         }
1129 }
1130
1131 boolean_t
1132 machine_timeout_suspended(void)
1133 {
1134         return pmap_tlb_flush_timeout || spinlock_timed_out || panic_active() || mp_recent_debugger_activity() || ml_recent_wake();
1135 }
1136
1137 /* Eagerly evaluate all pending timer and thread callouts
1138  */
1139 void
1140 ml_timer_evaluate(void)
1141 {
1142         KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN | DBG_FUNC_START, 0, 0, 0, 0, 0);
1143
1144         uint64_t te_end, te_start = mach_absolute_time();
1145         simple_lock(&ml_timer_evaluation_slock, LCK_GRP_NULL);
1146         ml_timer_evaluation_in_progress = TRUE;
1147         thread_call_delayed_timer_rescan_all();
1148         mp_cpus_call(CPUMASK_ALL, ASYNC, timer_queue_expire_rescan, NULL);
1149         ml_timer_evaluation_in_progress = FALSE;
1150         ml_timer_eager_evaluations++;
1151         te_end = mach_absolute_time();
1152         ml_timer_eager_evaluation_max = MAX(ml_timer_eager_evaluation_max, (te_end - te_start));
1153         simple_unlock(&ml_timer_evaluation_slock);
1154
1155         KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN | DBG_FUNC_END, 0, 0, 0, 0, 0);
1156 }
1157
1158 boolean_t
1159 ml_timer_forced_evaluation(void)
1160 {
1161         return ml_timer_evaluation_in_progress;
1162 }
1163
1164 /* 32-bit right-rotate n bits */
1165 static inline uint32_t
1166 ror32(uint32_t val, const unsigned int n)
1167 {
1168         __asm__ volatile ("rorl %%cl,%0" : "=r" (val) : "0" (val), "c" (n));
1169         return val;
1170 }
1171
1172 void
1173 ml_entropy_collect(void)
1174 {
1175         uint32_t        tsc_lo, tsc_hi;
1176         uint32_t        *ep;
1177
1178         assert(cpu_number() == master_cpu);
1179
1180         /* update buffer pointer cyclically */
1181         ep = EntropyData.buffer + (EntropyData.sample_count & EntropyData.buffer_index_mask);
1182         EntropyData.sample_count += 1;
1183
1184         rdtsc_nofence(tsc_lo, tsc_hi);
1185         *ep = (ror32(*ep, 9) & EntropyData.ror_mask) ^ tsc_lo;
1186 }
1187
1188 uint64_t
1189 ml_energy_stat(__unused thread_t t)
1190 {
1191         return 0;
1192 }
1193
1194 void
1195 ml_gpu_stat_update(uint64_t gpu_ns_delta)
1196 {
1197         current_thread()->machine.thread_gpu_ns += gpu_ns_delta;
1198 }
1199
1200 uint64_t
1201 ml_gpu_stat(thread_t t)
1202 {
1203         return t->machine.thread_gpu_ns;
1204 }
1205
1206 int plctrace_enabled = 0;
1207
1208 void
1209 _disable_preemption(void)
1210 {
1211         disable_preemption_internal();
1212 }
1213
1214 void
1215 _enable_preemption(void)
1216 {
1217         enable_preemption_internal();
1218 }
1219
1220 void
1221 plctrace_disable(void)
1222 {
1223         plctrace_enabled = 0;
1224 }
1225
1226 static boolean_t ml_quiescing;
1227
1228 void
1229 ml_set_is_quiescing(boolean_t quiescing)
1230 {
1231         ml_quiescing = quiescing;
1232 }
1233
1234 boolean_t
1235 ml_is_quiescing(void)
1236 {
1237         return ml_quiescing;
1238 }
1239
1240 uint64_t
1241 ml_get_booter_memory_size(void)
1242 {
1243         return 0;
1244 }
1245
1246 void
1247 machine_lockdown(void)
1248 {
1249         x86_64_protect_data_const();
1250 }
1251
1252 bool
1253 ml_cpu_can_exit(__unused int cpu_id)
1254 {
1255         return true;
1256 }
1257
1258 void
1259 ml_cpu_init_state(void)
1260 {
1261 }
1262
1263 void
1264 ml_cpu_begin_state_transition(__unused int cpu_id)
1265 {
1266 }
1267
1268 void
1269 ml_cpu_end_state_transition(__unused int cpu_id)
1270 {
1271 }
1272
1273 void
1274 ml_cpu_begin_loop(void)
1275 {
1276 }
1277
1278 void
1279 ml_cpu_end_loop(void)
1280 {
1281 }
1282
1283 size_t
1284 ml_get_vm_reserved_regions(bool vm_is64bit, struct vm_reserved_region **regions)
1285 {
1286 #pragma unused(vm_is64bit)
1287         assert(regions != NULL);
1288
1289         *regions = NULL;
1290         return 0;
1291 }