osfmk/i386/machine_routines.c

   1 /*
   2  * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <i386/machine_routines.h>
  30 #include <i386/io_map_entries.h>
  31 #include <i386/cpuid.h>
  32 #include <i386/fpu.h>
  33 #include <mach/processor.h>
  34 #include <kern/processor.h>
  35 #include <kern/machine.h>
  36
  37 #include <kern/cpu_number.h>
  38 #include <kern/thread.h>
  39 #include <kern/thread_call.h>
  40 #include <kern/policy_internal.h>
  41
  42 #include <prng/random.h>
  43 #include <i386/machine_cpu.h>
  44 #include <i386/lapic.h>
  45 #include <i386/bit_routines.h>
  46 #include <i386/mp_events.h>
  47 #include <i386/pmCPU.h>
  48 #include <i386/trap.h>
  49 #include <i386/tsc.h>
  50 #include <i386/cpu_threads.h>
  51 #include <i386/proc_reg.h>
  52 #include <mach/vm_param.h>
  53 #include <i386/pmap.h>
  54 #include <i386/pmap_internal.h>
  55 #include <i386/misc_protos.h>
  56 #include <kern/timer_queue.h>
  57 #if KPC
  58 #include <kern/kpc.h>
  59 #endif
  60 #include <architecture/i386/pio.h>
  61 #include <i386/cpu_data.h>
  62 #if DEBUG
  63 #define DBG(x...)       kprintf("DBG: " x)
  64 #else
  65 #define DBG(x...)
  66 #endif
  67
  68 #if MONOTONIC
  69 #include <kern/monotonic.h>
  70 #endif /* MONOTONIC */
  71
  72 extern void     wakeup(void *);
  73
  74 static int max_cpus_initialized = 0;
  75
  76 uint64_t        LockTimeOut;
  77 uint64_t        TLBTimeOut;
  78 uint64_t        LockTimeOutTSC;
  79 uint32_t        LockTimeOutUsec;
  80 uint64_t        MutexSpin;
  81 uint64_t        LastDebuggerEntryAllowance;
  82 uint64_t        delay_spin_threshold;
  83
  84 extern uint64_t panic_restart_timeout;
  85
  86 boolean_t virtualized = FALSE;
  87
  88 decl_simple_lock_data(static, ml_timer_evaluation_slock);
  89 uint32_t ml_timer_eager_evaluations;
  90 uint64_t ml_timer_eager_evaluation_max;
  91 static boolean_t ml_timer_evaluation_in_progress = FALSE;
  92
  93
  94 #define MAX_CPUS_SET    0x1
  95 #define MAX_CPUS_WAIT   0x2
  96
  97 /* IO memory map services */
  98
  99 /* Map memory map IO space */
 100 vm_offset_t
 101 ml_io_map(
 102         vm_offset_t phys_addr,
 103         vm_size_t size)
 104 {
 105         return io_map(phys_addr, size, VM_WIMG_IO);
 106 }
 107
 108 /* boot memory allocation */
 109 vm_offset_t
 110 ml_static_malloc(
 111         __unused vm_size_t size)
 112 {
 113         return (vm_offset_t)NULL;
 114 }
 115
 116
 117 void
 118 ml_get_bouncepool_info(vm_offset_t *phys_addr, vm_size_t *size)
 119 {
 120         *phys_addr = 0;
 121         *size      = 0;
 122 }
 123
 124
 125 vm_offset_t
 126 ml_static_ptovirt(
 127         vm_offset_t paddr)
 128 {
 129 #if defined(__x86_64__)
 130         return (vm_offset_t)(((unsigned long) paddr) | VM_MIN_KERNEL_ADDRESS);
 131 #else
 132         return (vm_offset_t)((paddr) | LINEAR_KERNEL_ADDRESS);
 133 #endif
 134 }
 135
 136 vm_offset_t
 137 ml_static_slide(
 138         vm_offset_t vaddr)
 139 {
 140         return VM_KERNEL_SLIDE(vaddr);
 141 }
 142
 143 vm_offset_t
 144 ml_static_unslide(
 145         vm_offset_t vaddr)
 146 {
 147         return VM_KERNEL_UNSLIDE(vaddr);
 148 }
 149
 150 /*
 151  * Reclaim memory, by virtual address, that was used in early boot that is no longer needed
 152  * by the kernel.
 153  */
 154 void
 155 ml_static_mfree(
 156         vm_offset_t vaddr,
 157         vm_size_t size)
 158 {
 159         addr64_t vaddr_cur;
 160         ppnum_t ppn;
 161         uint32_t freed_pages = 0;
 162         vm_size_t map_size;
 163
 164         assert(vaddr >= VM_MIN_KERNEL_ADDRESS);
 165
 166         assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
 167
 168         for (vaddr_cur = vaddr; vaddr_cur < round_page_64(vaddr + size);) {
 169                 map_size = pmap_query_pagesize(kernel_pmap, vaddr_cur);
 170
 171                 /* just skip if nothing mapped here */
 172                 if (map_size == 0) {
 173                         vaddr_cur += PAGE_SIZE;
 174                         continue;
 175                 }
 176
 177                 /*
 178                  * Can't free from the middle of a large page.
 179                  */
 180                 assert((vaddr_cur & (map_size - 1)) == 0);
 181
 182                 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
 183                 assert(ppn != (ppnum_t)NULL);
 184
 185                 pmap_remove(kernel_pmap, vaddr_cur, vaddr_cur + map_size);
 186                 while (map_size > 0) {
 187                         if (++kernel_pmap->stats.resident_count > kernel_pmap->stats.resident_max) {
 188                                 kernel_pmap->stats.resident_max = kernel_pmap->stats.resident_count;
 189                         }
 190
 191                         assert(pmap_valid_page(ppn));
 192                         if (IS_MANAGED_PAGE(ppn)) {
 193                                 vm_page_create(ppn, (ppn + 1));
 194                                 freed_pages++;
 195                         }
 196                         map_size -= PAGE_SIZE;
 197                         vaddr_cur += PAGE_SIZE;
 198                         ppn++;
 199                 }
 200         }
 201         vm_page_lockspin_queues();
 202         vm_page_wire_count -= freed_pages;
 203         vm_page_wire_count_initial -= freed_pages;
 204         if (vm_page_wire_count_on_boot != 0) {
 205                 assert(vm_page_wire_count_on_boot >= freed_pages);
 206                 vm_page_wire_count_on_boot -= freed_pages;
 207         }
 208         vm_page_unlock_queues();
 209
 210 #if     DEBUG
 211         kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn);
 212 #endif
 213 }
 214
 215
 216 /* virtual to physical on wired pages */
 217 vm_offset_t
 218 ml_vtophys(
 219         vm_offset_t vaddr)
 220 {
 221         return (vm_offset_t)kvtophys(vaddr);
 222 }
 223
 224 /*
 225  *      Routine:        ml_nofault_copy
 226  *      Function:       Perform a physical mode copy if the source and
 227  *                      destination have valid translations in the kernel pmap.
 228  *                      If translations are present, they are assumed to
 229  *                      be wired; i.e. no attempt is made to guarantee that the
 230  *                      translations obtained remained valid for
 231  *                      the duration of the copy process.
 232  */
 233
 234 vm_size_t
 235 ml_nofault_copy(
 236         vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size)
 237 {
 238         addr64_t cur_phys_dst, cur_phys_src;
 239         uint32_t count, nbytes = 0;
 240
 241         while (size > 0) {
 242                 if (!(cur_phys_src = kvtophys(virtsrc))) {
 243                         break;
 244                 }
 245                 if (!(cur_phys_dst = kvtophys(virtdst))) {
 246                         break;
 247                 }
 248                 if (!pmap_valid_page(i386_btop(cur_phys_dst)) || !pmap_valid_page(i386_btop(cur_phys_src))) {
 249                         break;
 250                 }
 251                 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
 252                 if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK))) {
 253                         count = (uint32_t)(PAGE_SIZE - (cur_phys_dst & PAGE_MASK));
 254                 }
 255                 if (count > size) {
 256                         count = (uint32_t)size;
 257                 }
 258
 259                 bcopy_phys(cur_phys_src, cur_phys_dst, count);
 260
 261                 nbytes += count;
 262                 virtsrc += count;
 263                 virtdst += count;
 264                 size -= count;
 265         }
 266
 267         return nbytes;
 268 }
 269
 270 /*
 271  *      Routine:        ml_validate_nofault
 272  *      Function: Validate that ths address range has a valid translations
 273  *                      in the kernel pmap.  If translations are present, they are
 274  *                      assumed to be wired; i.e. no attempt is made to guarantee
 275  *                      that the translation persist after the check.
 276  *  Returns: TRUE if the range is mapped and will not cause a fault,
 277  *                      FALSE otherwise.
 278  */
 279
 280 boolean_t
 281 ml_validate_nofault(
 282         vm_offset_t virtsrc, vm_size_t size)
 283 {
 284         addr64_t cur_phys_src;
 285         uint32_t count;
 286
 287         while (size > 0) {
 288                 if (!(cur_phys_src = kvtophys(virtsrc))) {
 289                         return FALSE;
 290                 }
 291                 if (!pmap_valid_page(i386_btop(cur_phys_src))) {
 292                         return FALSE;
 293                 }
 294                 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
 295                 if (count > size) {
 296                         count = (uint32_t)size;
 297                 }
 298
 299                 virtsrc += count;
 300                 size -= count;
 301         }
 302
 303         return TRUE;
 304 }
 305
 306 /* Interrupt handling */
 307
 308 /* Initialize Interrupts */
 309 void
 310 ml_init_interrupt(void)
 311 {
 312         (void) ml_set_interrupts_enabled(TRUE);
 313 }
 314
 315
 316 /* Get Interrupts Enabled */
 317 boolean_t
 318 ml_get_interrupts_enabled(void)
 319 {
 320         unsigned long flags;
 321
 322         __asm__ volatile ("pushf; pop   %0":  "=r" (flags));
 323         return (flags & EFL_IF) != 0;
 324 }
 325
 326 /* Set Interrupts Enabled */
 327 boolean_t
 328 ml_set_interrupts_enabled(boolean_t enable)
 329 {
 330         unsigned long flags;
 331         boolean_t istate;
 332
 333         __asm__ volatile ("pushf; pop   %0"  :  "=r" (flags));
 334
 335         assert(get_interrupt_level() ? (enable == FALSE) : TRUE);
 336
 337         istate = ((flags & EFL_IF) != 0);
 338
 339         if (enable) {
 340                 __asm__ volatile ("sti;nop");
 341
 342                 if ((get_preemption_level() == 0) && (*ast_pending() & AST_URGENT)) {
 343                         __asm__ volatile ("int %0" :: "N" (T_PREEMPT));
 344                 }
 345         } else {
 346                 if (istate) {
 347                         __asm__ volatile ("cli");
 348                 }
 349         }
 350
 351         return istate;
 352 }
 353
 354 /* Early Set Interrupts Enabled */
 355 boolean_t
 356 ml_early_set_interrupts_enabled(boolean_t enable)
 357 {
 358         if (enable == TRUE) {
 359                 kprintf("Caller attempted to enable interrupts too early in "
 360                     "kernel startup. Halting.\n");
 361                 hlt();
 362                 /*NOTREACHED*/
 363         }
 364
 365         /* On x86, do not allow interrupts to be enabled very early */
 366         return FALSE;
 367 }
 368
 369 /* Check if running at interrupt context */
 370 boolean_t
 371 ml_at_interrupt_context(void)
 372 {
 373         return get_interrupt_level() != 0;
 374 }
 375
 376 void
 377 ml_get_power_state(boolean_t *icp, boolean_t *pidlep)
 378 {
 379         *icp = (get_interrupt_level() != 0);
 380         /* These will be technically inaccurate for interrupts that occur
 381          * successively within a single "idle exit" event, but shouldn't
 382          * matter statistically.
 383          */
 384         *pidlep = (current_cpu_datap()->lcpu.package->num_idle == topoParms.nLThreadsPerPackage);
 385 }
 386
 387 /* Generate a fake interrupt */
 388 __dead2
 389 void
 390 ml_cause_interrupt(void)
 391 {
 392         panic("ml_cause_interrupt not defined yet on Intel");
 393 }
 394
 395 /*
 396  * TODO: transition users of this to kernel_thread_start_priority
 397  * ml_thread_policy is an unsupported KPI
 398  */
 399 void
 400 ml_thread_policy(
 401         thread_t thread,
 402         __unused        unsigned policy_id,
 403         unsigned policy_info)
 404 {
 405         if (policy_info & MACHINE_NETWORK_WORKLOOP) {
 406                 thread_precedence_policy_data_t info;
 407                 __assert_only kern_return_t kret;
 408
 409                 info.importance = 1;
 410
 411                 kret = thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY,
 412                     (thread_policy_t)&info,
 413                     THREAD_PRECEDENCE_POLICY_COUNT);
 414                 assert(kret == KERN_SUCCESS);
 415         }
 416 }
 417
 418 /* Initialize Interrupts */
 419 void
 420 ml_install_interrupt_handler(
 421         void *nub,
 422         int source,
 423         void *target,
 424         IOInterruptHandler handler,
 425         void *refCon)
 426 {
 427         boolean_t current_state;
 428
 429         current_state = ml_set_interrupts_enabled(FALSE);
 430
 431         PE_install_interrupt_handler(nub, source, target,
 432             (IOInterruptHandler) handler, refCon);
 433
 434         (void) ml_set_interrupts_enabled(current_state);
 435
 436         initialize_screen(NULL, kPEAcquireScreen);
 437 }
 438
 439
 440 void
 441 machine_signal_idle(
 442         processor_t processor)
 443 {
 444         cpu_interrupt(processor->cpu_id);
 445 }
 446
 447 __dead2
 448 void
 449 machine_signal_idle_deferred(
 450         __unused processor_t processor)
 451 {
 452         panic("Unimplemented");
 453 }
 454
 455 __dead2
 456 void
 457 machine_signal_idle_cancel(
 458         __unused processor_t processor)
 459 {
 460         panic("Unimplemented");
 461 }
 462
 463 static kern_return_t
 464 register_cpu(
 465         uint32_t        lapic_id,
 466         processor_t     *processor_out,
 467         boolean_t       boot_cpu )
 468 {
 469         int             target_cpu;
 470         cpu_data_t      *this_cpu_datap;
 471
 472         this_cpu_datap = cpu_data_alloc(boot_cpu);
 473         if (this_cpu_datap == NULL) {
 474                 return KERN_FAILURE;
 475         }
 476         target_cpu = this_cpu_datap->cpu_number;
 477         assert((boot_cpu && (target_cpu == 0)) ||
 478             (!boot_cpu && (target_cpu != 0)));
 479
 480         lapic_cpu_map(lapic_id, target_cpu);
 481
 482         /* The cpu_id is not known at registration phase. Just do
 483          * lapic_id for now
 484          */
 485         this_cpu_datap->cpu_phys_number = lapic_id;
 486
 487         this_cpu_datap->cpu_console_buf = console_cpu_alloc(boot_cpu);
 488         if (this_cpu_datap->cpu_console_buf == NULL) {
 489                 goto failed;
 490         }
 491
 492 #if KPC
 493         if (kpc_register_cpu(this_cpu_datap) != TRUE) {
 494                 goto failed;
 495         }
 496 #endif
 497
 498         if (!boot_cpu) {
 499                 cpu_thread_alloc(this_cpu_datap->cpu_number);
 500                 if (this_cpu_datap->lcpu.core == NULL) {
 501                         goto failed;
 502                 }
 503
 504 #if NCOPY_WINDOWS > 0
 505                 this_cpu_datap->cpu_pmap = pmap_cpu_alloc(boot_cpu);
 506                 if (this_cpu_datap->cpu_pmap == NULL) {
 507                         goto failed;
 508                 }
 509 #endif
 510
 511                 this_cpu_datap->cpu_processor = cpu_processor_alloc(boot_cpu);
 512                 if (this_cpu_datap->cpu_processor == NULL) {
 513                         goto failed;
 514                 }
 515                 /*
 516                  * processor_init() deferred to topology start
 517                  * because "slot numbers" a.k.a. logical processor numbers
 518                  * are not yet finalized.
 519                  */
 520         }
 521
 522         *processor_out = this_cpu_datap->cpu_processor;
 523
 524         return KERN_SUCCESS;
 525
 526 failed:
 527         cpu_processor_free(this_cpu_datap->cpu_processor);
 528 #if NCOPY_WINDOWS > 0
 529         pmap_cpu_free(this_cpu_datap->cpu_pmap);
 530 #endif
 531         console_cpu_free(this_cpu_datap->cpu_console_buf);
 532 #if KPC
 533         kpc_unregister_cpu(this_cpu_datap);
 534 #endif /* KPC */
 535
 536         return KERN_FAILURE;
 537 }
 538
 539
 540 kern_return_t
 541 ml_processor_register(
 542         cpu_id_t        cpu_id,
 543         uint32_t        lapic_id,
 544         processor_t     *processor_out,
 545         boolean_t       boot_cpu,
 546         boolean_t       start )
 547 {
 548         static boolean_t done_topo_sort = FALSE;
 549         static uint32_t num_registered = 0;
 550
 551         /* Register all CPUs first, and track max */
 552         if (start == FALSE) {
 553                 num_registered++;
 554
 555                 DBG( "registering CPU lapic id %d\n", lapic_id );
 556
 557                 return register_cpu( lapic_id, processor_out, boot_cpu );
 558         }
 559
 560         /* Sort by topology before we start anything */
 561         if (!done_topo_sort) {
 562                 DBG( "about to start CPUs. %d registered\n", num_registered );
 563
 564                 cpu_topology_sort( num_registered );
 565                 done_topo_sort = TRUE;
 566         }
 567
 568         /* Assign the cpu ID */
 569         uint32_t cpunum = -1;
 570         cpu_data_t  *this_cpu_datap = NULL;
 571
 572         /* find cpu num and pointer */
 573         cpunum = ml_get_cpuid( lapic_id );
 574
 575         if (cpunum == 0xFFFFFFFF) { /* never heard of it? */
 576                 panic( "trying to start invalid/unregistered CPU %d\n", lapic_id );
 577         }
 578
 579         this_cpu_datap = cpu_datap(cpunum);
 580
 581         /* fix the CPU id */
 582         this_cpu_datap->cpu_id = cpu_id;
 583
 584         /* allocate and initialize other per-cpu structures */
 585         if (!boot_cpu) {
 586                 mp_cpus_call_cpu_init(cpunum);
 587                 random_cpu_init(cpunum);
 588         }
 589
 590         /* output arg */
 591         *processor_out = this_cpu_datap->cpu_processor;
 592
 593         /* OK, try and start this CPU */
 594         return cpu_topology_start_cpu( cpunum );
 595 }
 596
 597
 598 void
 599 ml_cpu_get_info(ml_cpu_info_t *cpu_infop)
 600 {
 601         boolean_t       os_supports_sse;
 602         i386_cpu_info_t *cpuid_infop;
 603
 604         if (cpu_infop == NULL) {
 605                 return;
 606         }
 607
 608         /*
 609          * Are we supporting MMX/SSE/SSE2/SSE3?
 610          * As distinct from whether the cpu has these capabilities.
 611          */
 612         os_supports_sse = !!(get_cr4() & CR4_OSXMM);
 613
 614         if (ml_fpu_avx_enabled()) {
 615                 cpu_infop->vector_unit = 9;
 616         } else if ((cpuid_features() & CPUID_FEATURE_SSE4_2) && os_supports_sse) {
 617                 cpu_infop->vector_unit = 8;
 618         } else if ((cpuid_features() & CPUID_FEATURE_SSE4_1) && os_supports_sse) {
 619                 cpu_infop->vector_unit = 7;
 620         } else if ((cpuid_features() & CPUID_FEATURE_SSSE3) && os_supports_sse) {
 621                 cpu_infop->vector_unit = 6;
 622         } else if ((cpuid_features() & CPUID_FEATURE_SSE3) && os_supports_sse) {
 623                 cpu_infop->vector_unit = 5;
 624         } else if ((cpuid_features() & CPUID_FEATURE_SSE2) && os_supports_sse) {
 625                 cpu_infop->vector_unit = 4;
 626         } else if ((cpuid_features() & CPUID_FEATURE_SSE) && os_supports_sse) {
 627                 cpu_infop->vector_unit = 3;
 628         } else if (cpuid_features() & CPUID_FEATURE_MMX) {
 629                 cpu_infop->vector_unit = 2;
 630         } else {
 631                 cpu_infop->vector_unit = 0;
 632         }
 633
 634         cpuid_infop  = cpuid_info();
 635
 636         cpu_infop->cache_line_size = cpuid_infop->cache_linesize;
 637
 638         cpu_infop->l1_icache_size = cpuid_infop->cache_size[L1I];
 639         cpu_infop->l1_dcache_size = cpuid_infop->cache_size[L1D];
 640
 641         if (cpuid_infop->cache_size[L2U] > 0) {
 642                 cpu_infop->l2_settings = 1;
 643                 cpu_infop->l2_cache_size = cpuid_infop->cache_size[L2U];
 644         } else {
 645                 cpu_infop->l2_settings = 0;
 646                 cpu_infop->l2_cache_size = 0xFFFFFFFF;
 647         }
 648
 649         if (cpuid_infop->cache_size[L3U] > 0) {
 650                 cpu_infop->l3_settings = 1;
 651                 cpu_infop->l3_cache_size = cpuid_infop->cache_size[L3U];
 652         } else {
 653                 cpu_infop->l3_settings = 0;
 654                 cpu_infop->l3_cache_size = 0xFFFFFFFF;
 655         }
 656 }
 657
 658 void
 659 ml_init_max_cpus(unsigned long max_cpus)
 660 {
 661         boolean_t current_state;
 662
 663         current_state = ml_set_interrupts_enabled(FALSE);
 664         if (max_cpus_initialized != MAX_CPUS_SET) {
 665                 if (max_cpus > 0 && max_cpus <= MAX_CPUS) {
 666                         /*
 667                          * Note: max_cpus is the number of enabled processors
 668                          * that ACPI found; max_ncpus is the maximum number
 669                          * that the kernel supports or that the "cpus="
 670                          * boot-arg has set. Here we take int minimum.
 671                          */
 672                         machine_info.max_cpus = (integer_t)MIN(max_cpus, max_ncpus);
 673                 }
 674                 if (max_cpus_initialized == MAX_CPUS_WAIT) {
 675                         wakeup((event_t)&max_cpus_initialized);
 676                 }
 677                 max_cpus_initialized = MAX_CPUS_SET;
 678         }
 679         (void) ml_set_interrupts_enabled(current_state);
 680 }
 681
 682 int
 683 ml_get_max_cpus(void)
 684 {
 685         boolean_t current_state;
 686
 687         current_state = ml_set_interrupts_enabled(FALSE);
 688         if (max_cpus_initialized != MAX_CPUS_SET) {
 689                 max_cpus_initialized = MAX_CPUS_WAIT;
 690                 assert_wait((event_t)&max_cpus_initialized, THREAD_UNINT);
 691                 (void)thread_block(THREAD_CONTINUE_NULL);
 692         }
 693         (void) ml_set_interrupts_enabled(current_state);
 694         return machine_info.max_cpus;
 695 }
 696
 697 boolean_t
 698 ml_wants_panic_trap_to_debugger(void)
 699 {
 700         return FALSE;
 701 }
 702
 703 void
 704 ml_panic_trap_to_debugger(__unused const char *panic_format_str,
 705     __unused va_list *panic_args,
 706     __unused unsigned int reason,
 707     __unused void *ctx,
 708     __unused uint64_t panic_options_mask,
 709     __unused unsigned long panic_caller)
 710 {
 711         return;
 712 }
 713
 714 /*
 715  *      Routine:        ml_init_lock_timeout
 716  *      Function:
 717  */
 718 void
 719 ml_init_lock_timeout(void)
 720 {
 721         uint64_t        abstime;
 722         uint32_t        mtxspin;
 723 #if DEVELOPMENT || DEBUG
 724         uint64_t        default_timeout_ns = NSEC_PER_SEC >> 2;
 725 #else
 726         uint64_t        default_timeout_ns = NSEC_PER_SEC >> 1;
 727 #endif
 728         uint32_t        slto;
 729         uint32_t        prt;
 730
 731         if (PE_parse_boot_argn("slto_us", &slto, sizeof(slto))) {
 732                 default_timeout_ns = slto * NSEC_PER_USEC;
 733         }
 734
 735         /*
 736          * LockTimeOut is absolutetime, LockTimeOutTSC is in TSC ticks,
 737          * and LockTimeOutUsec is in microseconds and it's 32-bits.
 738          */
 739         LockTimeOutUsec = (uint32_t) (default_timeout_ns / NSEC_PER_USEC);
 740         nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
 741         LockTimeOut = abstime;
 742         LockTimeOutTSC = tmrCvt(abstime, tscFCvtn2t);
 743
 744         /*
 745          * TLBTimeOut dictates the TLB flush timeout period. It defaults to
 746          * LockTimeOut but can be overriden separately. In particular, a
 747          * zero value inhibits the timeout-panic and cuts a trace evnt instead
 748          * - see pmap_flush_tlbs().
 749          */
 750         if (PE_parse_boot_argn("tlbto_us", &slto, sizeof(slto))) {
 751                 default_timeout_ns = slto * NSEC_PER_USEC;
 752                 nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
 753                 TLBTimeOut = (uint32_t) abstime;
 754         } else {
 755                 TLBTimeOut = LockTimeOut;
 756         }
 757
 758 #if DEVELOPMENT || DEBUG
 759         reportphyreaddelayabs = LockTimeOut >> 1;
 760 #endif
 761         if (PE_parse_boot_argn("phyreadmaxus", &slto, sizeof(slto))) {
 762                 default_timeout_ns = slto * NSEC_PER_USEC;
 763                 nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
 764                 reportphyreaddelayabs = abstime;
 765         }
 766
 767         if (PE_parse_boot_argn("phywritemaxus", &slto, sizeof(slto))) {
 768                 nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
 769                 reportphywritedelayabs = abstime;
 770         }
 771
 772         if (PE_parse_boot_argn("tracephyreadus", &slto, sizeof(slto))) {
 773                 nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
 774                 tracephyreaddelayabs = abstime;
 775         }
 776
 777         if (PE_parse_boot_argn("tracephywriteus", &slto, sizeof(slto))) {
 778                 nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
 779                 tracephywritedelayabs = abstime;
 780         }
 781
 782         if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof(mtxspin))) {
 783                 if (mtxspin > USEC_PER_SEC >> 4) {
 784                         mtxspin =  USEC_PER_SEC >> 4;
 785                 }
 786                 nanoseconds_to_absolutetime(mtxspin * NSEC_PER_USEC, &abstime);
 787         } else {
 788                 nanoseconds_to_absolutetime(10 * NSEC_PER_USEC, &abstime);
 789         }
 790         MutexSpin = (unsigned int)abstime;
 791
 792         nanoseconds_to_absolutetime(4ULL * NSEC_PER_SEC, &LastDebuggerEntryAllowance);
 793         if (PE_parse_boot_argn("panic_restart_timeout", &prt, sizeof(prt))) {
 794                 nanoseconds_to_absolutetime(prt * NSEC_PER_SEC, &panic_restart_timeout);
 795         }
 796
 797         virtualized = ((cpuid_features() & CPUID_FEATURE_VMM) != 0);
 798         if (virtualized) {
 799                 int     vti;
 800
 801                 if (!PE_parse_boot_argn("vti", &vti, sizeof(vti))) {
 802                         vti = 6;
 803                 }
 804                 printf("Timeouts adjusted for virtualization (<<%d)\n", vti);
 805                 kprintf("Timeouts adjusted for virtualization (<<%d):\n", vti);
 806 #define VIRTUAL_TIMEOUT_INFLATE64(_timeout)                     \
 807 MACRO_BEGIN                                                     \
 808         kprintf("%24s: 0x%016llx ", #_timeout, _timeout);       \
 809         _timeout <<= vti;                                       \
 810         kprintf("-> 0x%016llx\n",  _timeout);                   \
 811 MACRO_END
 812 #define VIRTUAL_TIMEOUT_INFLATE32(_timeout)                     \
 813 MACRO_BEGIN                                                     \
 814         kprintf("%24s:         0x%08x ", #_timeout, _timeout);  \
 815         if ((_timeout <<vti) >> vti == _timeout)                \
 816                 _timeout <<= vti;                               \
 817         else                                                    \
 818                 _timeout = ~0; /* cap rather than overflow */   \
 819         kprintf("-> 0x%08x\n",  _timeout);                      \
 820 MACRO_END
 821                 VIRTUAL_TIMEOUT_INFLATE32(LockTimeOutUsec);
 822                 VIRTUAL_TIMEOUT_INFLATE64(LockTimeOut);
 823                 VIRTUAL_TIMEOUT_INFLATE64(LockTimeOutTSC);
 824                 VIRTUAL_TIMEOUT_INFLATE64(TLBTimeOut);
 825                 VIRTUAL_TIMEOUT_INFLATE64(MutexSpin);
 826                 VIRTUAL_TIMEOUT_INFLATE64(reportphyreaddelayabs);
 827         }
 828
 829         interrupt_latency_tracker_setup();
 830         simple_lock_init(&ml_timer_evaluation_slock, 0);
 831 }
 832
 833 /*
 834  * Threshold above which we should attempt to block
 835  * instead of spinning for clock_delay_until().
 836  */
 837
 838 void
 839 ml_init_delay_spin_threshold(int threshold_us)
 840 {
 841         nanoseconds_to_absolutetime(threshold_us * NSEC_PER_USEC, &delay_spin_threshold);
 842 }
 843
 844 boolean_t
 845 ml_delay_should_spin(uint64_t interval)
 846 {
 847         return (interval < delay_spin_threshold) ? TRUE : FALSE;
 848 }
 849
 850 uint32_t yield_delay_us = 0;
 851
 852 void
 853 ml_delay_on_yield(void)
 854 {
 855 #if DEVELOPMENT || DEBUG
 856         if (yield_delay_us) {
 857                 delay(yield_delay_us);
 858         }
 859 #endif
 860 }
 861
 862 /*
 863  * This is called from the machine-independent layer
 864  * to perform machine-dependent info updates. Defer to cpu_thread_init().
 865  */
 866 void
 867 ml_cpu_up(void)
 868 {
 869         return;
 870 }
 871
 872 /*
 873  * This is called from the machine-independent layer
 874  * to perform machine-dependent info updates.
 875  */
 876 void
 877 ml_cpu_down(void)
 878 {
 879         i386_deactivate_cpu();
 880
 881         return;
 882 }
 883
 884 /*
 885  * The following are required for parts of the kernel
 886  * that cannot resolve these functions as inlines:
 887  */
 888 extern thread_t current_act(void) __attribute__((const));
 889 thread_t
 890 current_act(void)
 891 {
 892         return current_thread_fast();
 893 }
 894
 895 #undef current_thread
 896 extern thread_t current_thread(void) __attribute__((const));
 897 thread_t
 898 current_thread(void)
 899 {
 900         return current_thread_fast();
 901 }
 902
 903
 904 boolean_t
 905 ml_is64bit(void)
 906 {
 907         return cpu_mode_is64bit();
 908 }
 909
 910
 911 boolean_t
 912 ml_thread_is64bit(thread_t thread)
 913 {
 914         return thread_is_64bit_addr(thread);
 915 }
 916
 917
 918 boolean_t
 919 ml_state_is64bit(void *saved_state)
 920 {
 921         return is_saved_state64(saved_state);
 922 }
 923
 924 void
 925 ml_cpu_set_ldt(int selector)
 926 {
 927         /*
 928          * Avoid loading the LDT
 929          * if we're setting the KERNEL LDT and it's already set.
 930          */
 931         if (selector == KERNEL_LDT &&
 932             current_cpu_datap()->cpu_ldt == KERNEL_LDT) {
 933                 return;
 934         }
 935
 936         lldt(selector);
 937         current_cpu_datap()->cpu_ldt = selector;
 938 }
 939
 940 void
 941 ml_fp_setvalid(boolean_t value)
 942 {
 943         fp_setvalid(value);
 944 }
 945
 946 uint64_t
 947 ml_cpu_int_event_time(void)
 948 {
 949         return current_cpu_datap()->cpu_int_event_time;
 950 }
 951
 952 vm_offset_t
 953 ml_stack_remaining(void)
 954 {
 955         uintptr_t local = (uintptr_t) &local;
 956
 957         if (ml_at_interrupt_context() != 0) {
 958                 return local - (current_cpu_datap()->cpu_int_stack_top - INTSTACK_SIZE);
 959         } else {
 960                 return local - current_thread()->kernel_stack;
 961         }
 962 }
 963
 964 #if KASAN
 965 vm_offset_t ml_stack_base(void);
 966 vm_size_t ml_stack_size(void);
 967
 968 vm_offset_t
 969 ml_stack_base(void)
 970 {
 971         if (ml_at_interrupt_context()) {
 972                 return current_cpu_datap()->cpu_int_stack_top - INTSTACK_SIZE;
 973         } else {
 974                 return current_thread()->kernel_stack;
 975         }
 976 }
 977
 978 vm_size_t
 979 ml_stack_size(void)
 980 {
 981         if (ml_at_interrupt_context()) {
 982                 return INTSTACK_SIZE;
 983         } else {
 984                 return kernel_stack_size;
 985         }
 986 }
 987 #endif
 988
 989 void
 990 kernel_preempt_check(void)
 991 {
 992         boolean_t       intr;
 993         unsigned long flags;
 994
 995         assert(get_preemption_level() == 0);
 996
 997         if (__improbable(*ast_pending() & AST_URGENT)) {
 998                 /*
 999                  * can handle interrupts and preemptions
1000                  * at this point
1001                  */
1002                 __asm__ volatile ("pushf; pop   %0"  :  "=r" (flags));
1003
1004                 intr = ((flags & EFL_IF) != 0);
1005
1006                 /*
1007                  * now cause the PRE-EMPTION trap
1008                  */
1009                 if (intr == TRUE) {
1010                         __asm__ volatile ("int %0" :: "N" (T_PREEMPT));
1011                 }
1012         }
1013 }
1014
1015 boolean_t
1016 machine_timeout_suspended(void)
1017 {
1018         return pmap_tlb_flush_timeout || spinlock_timed_out || panic_active() || mp_recent_debugger_activity() || ml_recent_wake();
1019 }
1020
1021 /* Eagerly evaluate all pending timer and thread callouts
1022  */
1023 void
1024 ml_timer_evaluate(void)
1025 {
1026         KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN | DBG_FUNC_START, 0, 0, 0, 0, 0);
1027
1028         uint64_t te_end, te_start = mach_absolute_time();
1029         simple_lock(&ml_timer_evaluation_slock, LCK_GRP_NULL);
1030         ml_timer_evaluation_in_progress = TRUE;
1031         thread_call_delayed_timer_rescan_all();
1032         mp_cpus_call(CPUMASK_ALL, ASYNC, timer_queue_expire_rescan, NULL);
1033         ml_timer_evaluation_in_progress = FALSE;
1034         ml_timer_eager_evaluations++;
1035         te_end = mach_absolute_time();
1036         ml_timer_eager_evaluation_max = MAX(ml_timer_eager_evaluation_max, (te_end - te_start));
1037         simple_unlock(&ml_timer_evaluation_slock);
1038
1039         KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN | DBG_FUNC_END, 0, 0, 0, 0, 0);
1040 }
1041
1042 boolean_t
1043 ml_timer_forced_evaluation(void)
1044 {
1045         return ml_timer_evaluation_in_progress;
1046 }
1047
1048 /* 32-bit right-rotate n bits */
1049 static inline uint32_t
1050 ror32(uint32_t val, const unsigned int n)
1051 {
1052         __asm__ volatile ("rorl %%cl,%0" : "=r" (val) : "0" (val), "c" (n));
1053         return val;
1054 }
1055
1056 void
1057 ml_entropy_collect(void)
1058 {
1059         uint32_t        tsc_lo, tsc_hi;
1060         uint32_t        *ep;
1061
1062         assert(cpu_number() == master_cpu);
1063
1064         /* update buffer pointer cyclically */
1065         ep = EntropyData.buffer + (EntropyData.sample_count & ENTROPY_BUFFER_INDEX_MASK);
1066         EntropyData.sample_count += 1;
1067
1068         rdtsc_nofence(tsc_lo, tsc_hi);
1069         *ep = ror32(*ep, 9) ^ tsc_lo;
1070 }
1071
1072 uint64_t
1073 ml_energy_stat(__unused thread_t t)
1074 {
1075         return 0;
1076 }
1077
1078 void
1079 ml_gpu_stat_update(uint64_t gpu_ns_delta)
1080 {
1081         current_thread()->machine.thread_gpu_ns += gpu_ns_delta;
1082 }
1083
1084 uint64_t
1085 ml_gpu_stat(thread_t t)
1086 {
1087         return t->machine.thread_gpu_ns;
1088 }
1089
1090 int plctrace_enabled = 0;
1091
1092 void
1093 _disable_preemption(void)
1094 {
1095         disable_preemption_internal();
1096 }
1097
1098 void
1099 _enable_preemption(void)
1100 {
1101         enable_preemption_internal();
1102 }
1103
1104 void
1105 plctrace_disable(void)
1106 {
1107         plctrace_enabled = 0;
1108 }
1109
1110 static boolean_t ml_quiescing;
1111
1112 void
1113 ml_set_is_quiescing(boolean_t quiescing)
1114 {
1115         assert(FALSE == ml_get_interrupts_enabled());
1116         ml_quiescing = quiescing;
1117 }
1118
1119 boolean_t
1120 ml_is_quiescing(void)
1121 {
1122         assert(FALSE == ml_get_interrupts_enabled());
1123         return ml_quiescing;
1124 }
1125
1126 uint64_t
1127 ml_get_booter_memory_size(void)
1128 {
1129         return 0;
1130 }