osfmk/i386/machine_routines.c

   1 /*
   2  * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <i386/machine_routines.h>
  30 #include <i386/io_map_entries.h>
  31 #include <i386/cpuid.h>
  32 #include <i386/fpu.h>
  33 #include <mach/processor.h>
  34 #include <kern/processor.h>
  35 #include <kern/machine.h>
  36
  37 #include <kern/cpu_number.h>
  38 #include <kern/thread.h>
  39 #include <kern/thread_call.h>
  40 #include <kern/policy_internal.h>
  41
  42 #include <prng/random.h>
  43 #include <i386/machine_cpu.h>
  44 #include <i386/lapic.h>
  45 #include <i386/bit_routines.h>
  46 #include <i386/mp_events.h>
  47 #include <i386/pmCPU.h>
  48 #include <i386/trap.h>
  49 #include <i386/tsc.h>
  50 #include <i386/cpu_threads.h>
  51 #include <i386/proc_reg.h>
  52 #include <mach/vm_param.h>
  53 #include <i386/pmap.h>
  54 #include <i386/pmap_internal.h>
  55 #include <i386/misc_protos.h>
  56 #include <kern/timer_queue.h>
  57 #if KPC
  58 #include <kern/kpc.h>
  59 #endif
  60 #include <architecture/i386/pio.h>
  61 #include <i386/cpu_data.h>
  62 #if DEBUG
  63 #define DBG(x...)       kprintf("DBG: " x)
  64 #else
  65 #define DBG(x...)
  66 #endif
  67
  68 #if MONOTONIC
  69 #include <kern/monotonic.h>
  70 #endif /* MONOTONIC */
  71
  72 extern void     wakeup(void *);
  73
  74 static int max_cpus_initialized = 0;
  75
  76 uint64_t        LockTimeOut;
  77 uint64_t        TLBTimeOut;
  78 uint64_t        LockTimeOutTSC;
  79 uint32_t        LockTimeOutUsec;
  80 uint64_t        MutexSpin;
  81 uint64_t        low_MutexSpin;
  82 int64_t         high_MutexSpin;
  83 uint64_t        LastDebuggerEntryAllowance;
  84 uint64_t        delay_spin_threshold;
  85
  86 extern uint64_t panic_restart_timeout;
  87
  88 boolean_t virtualized = FALSE;
  89
  90 decl_simple_lock_data(static, ml_timer_evaluation_slock);
  91 uint32_t ml_timer_eager_evaluations;
  92 uint64_t ml_timer_eager_evaluation_max;
  93 static boolean_t ml_timer_evaluation_in_progress = FALSE;
  94
  95
  96 #define MAX_CPUS_SET    0x1
  97 #define MAX_CPUS_WAIT   0x2
  98
  99 /* IO memory map services */
 100
 101 /* Map memory map IO space */
 102 vm_offset_t
 103 ml_io_map(
 104         vm_offset_t phys_addr,
 105         vm_size_t size)
 106 {
 107         return io_map(phys_addr, size, VM_WIMG_IO);
 108 }
 109
 110 /* boot memory allocation */
 111 vm_offset_t
 112 ml_static_malloc(
 113         __unused vm_size_t size)
 114 {
 115         return (vm_offset_t)NULL;
 116 }
 117
 118
 119 void
 120 ml_get_bouncepool_info(vm_offset_t *phys_addr, vm_size_t *size)
 121 {
 122         *phys_addr = 0;
 123         *size      = 0;
 124 }
 125
 126
 127 vm_offset_t
 128 ml_static_ptovirt(
 129         vm_offset_t paddr)
 130 {
 131 #if defined(__x86_64__)
 132         return (vm_offset_t)(((unsigned long) paddr) | VM_MIN_KERNEL_ADDRESS);
 133 #else
 134         return (vm_offset_t)((paddr) | LINEAR_KERNEL_ADDRESS);
 135 #endif
 136 }
 137
 138 vm_offset_t
 139 ml_static_slide(
 140         vm_offset_t vaddr)
 141 {
 142         return VM_KERNEL_SLIDE(vaddr);
 143 }
 144
 145 vm_offset_t
 146 ml_static_unslide(
 147         vm_offset_t vaddr)
 148 {
 149         return VM_KERNEL_UNSLIDE(vaddr);
 150 }
 151
 152 /*
 153  * Reclaim memory, by virtual address, that was used in early boot that is no longer needed
 154  * by the kernel.
 155  */
 156 void
 157 ml_static_mfree(
 158         vm_offset_t vaddr,
 159         vm_size_t size)
 160 {
 161         addr64_t vaddr_cur;
 162         ppnum_t ppn;
 163         uint32_t freed_pages = 0;
 164         vm_size_t map_size;
 165
 166         assert(vaddr >= VM_MIN_KERNEL_ADDRESS);
 167
 168         assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
 169
 170         for (vaddr_cur = vaddr; vaddr_cur < round_page_64(vaddr + size);) {
 171                 map_size = pmap_query_pagesize(kernel_pmap, vaddr_cur);
 172
 173                 /* just skip if nothing mapped here */
 174                 if (map_size == 0) {
 175                         vaddr_cur += PAGE_SIZE;
 176                         continue;
 177                 }
 178
 179                 /*
 180                  * Can't free from the middle of a large page.
 181                  */
 182                 assert((vaddr_cur & (map_size - 1)) == 0);
 183
 184                 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
 185                 assert(ppn != (ppnum_t)NULL);
 186
 187                 pmap_remove(kernel_pmap, vaddr_cur, vaddr_cur + map_size);
 188                 while (map_size > 0) {
 189                         if (++kernel_pmap->stats.resident_count > kernel_pmap->stats.resident_max) {
 190                                 kernel_pmap->stats.resident_max = kernel_pmap->stats.resident_count;
 191                         }
 192
 193                         assert(pmap_valid_page(ppn));
 194                         if (IS_MANAGED_PAGE(ppn)) {
 195                                 vm_page_create(ppn, (ppn + 1));
 196                                 freed_pages++;
 197                         }
 198                         map_size -= PAGE_SIZE;
 199                         vaddr_cur += PAGE_SIZE;
 200                         ppn++;
 201                 }
 202         }
 203         vm_page_lockspin_queues();
 204         vm_page_wire_count -= freed_pages;
 205         vm_page_wire_count_initial -= freed_pages;
 206         if (vm_page_wire_count_on_boot != 0) {
 207                 assert(vm_page_wire_count_on_boot >= freed_pages);
 208                 vm_page_wire_count_on_boot -= freed_pages;
 209         }
 210         vm_page_unlock_queues();
 211
 212 #if     DEBUG
 213         kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn);
 214 #endif
 215 }
 216
 217
 218 /* virtual to physical on wired pages */
 219 vm_offset_t
 220 ml_vtophys(
 221         vm_offset_t vaddr)
 222 {
 223         return (vm_offset_t)kvtophys(vaddr);
 224 }
 225
 226 /*
 227  *      Routine:        ml_nofault_copy
 228  *      Function:       Perform a physical mode copy if the source and
 229  *                      destination have valid translations in the kernel pmap.
 230  *                      If translations are present, they are assumed to
 231  *                      be wired; i.e. no attempt is made to guarantee that the
 232  *                      translations obtained remained valid for
 233  *                      the duration of the copy process.
 234  */
 235
 236 vm_size_t
 237 ml_nofault_copy(
 238         vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size)
 239 {
 240         addr64_t cur_phys_dst, cur_phys_src;
 241         uint32_t count, nbytes = 0;
 242
 243         while (size > 0) {
 244                 if (!(cur_phys_src = kvtophys(virtsrc))) {
 245                         break;
 246                 }
 247                 if (!(cur_phys_dst = kvtophys(virtdst))) {
 248                         break;
 249                 }
 250                 if (!pmap_valid_page(i386_btop(cur_phys_dst)) || !pmap_valid_page(i386_btop(cur_phys_src))) {
 251                         break;
 252                 }
 253                 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
 254                 if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK))) {
 255                         count = (uint32_t)(PAGE_SIZE - (cur_phys_dst & PAGE_MASK));
 256                 }
 257                 if (count > size) {
 258                         count = (uint32_t)size;
 259                 }
 260
 261                 bcopy_phys(cur_phys_src, cur_phys_dst, count);
 262
 263                 nbytes += count;
 264                 virtsrc += count;
 265                 virtdst += count;
 266                 size -= count;
 267         }
 268
 269         return nbytes;
 270 }
 271
 272 /*
 273  *      Routine:        ml_validate_nofault
 274  *      Function: Validate that ths address range has a valid translations
 275  *                      in the kernel pmap.  If translations are present, they are
 276  *                      assumed to be wired; i.e. no attempt is made to guarantee
 277  *                      that the translation persist after the check.
 278  *  Returns: TRUE if the range is mapped and will not cause a fault,
 279  *                      FALSE otherwise.
 280  */
 281
 282 boolean_t
 283 ml_validate_nofault(
 284         vm_offset_t virtsrc, vm_size_t size)
 285 {
 286         addr64_t cur_phys_src;
 287         uint32_t count;
 288
 289         while (size > 0) {
 290                 if (!(cur_phys_src = kvtophys(virtsrc))) {
 291                         return FALSE;
 292                 }
 293                 if (!pmap_valid_page(i386_btop(cur_phys_src))) {
 294                         return FALSE;
 295                 }
 296                 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
 297                 if (count > size) {
 298                         count = (uint32_t)size;
 299                 }
 300
 301                 virtsrc += count;
 302                 size -= count;
 303         }
 304
 305         return TRUE;
 306 }
 307
 308 /* Interrupt handling */
 309
 310 /* Initialize Interrupts */
 311 void
 312 ml_init_interrupt(void)
 313 {
 314         (void) ml_set_interrupts_enabled(TRUE);
 315 }
 316
 317
 318 /* Get Interrupts Enabled */
 319 boolean_t
 320 ml_get_interrupts_enabled(void)
 321 {
 322         unsigned long flags;
 323
 324         __asm__ volatile ("pushf; pop   %0":  "=r" (flags));
 325         return (flags & EFL_IF) != 0;
 326 }
 327
 328 /* Set Interrupts Enabled */
 329 boolean_t
 330 ml_set_interrupts_enabled(boolean_t enable)
 331 {
 332         unsigned long flags;
 333         boolean_t istate;
 334
 335         __asm__ volatile ("pushf; pop   %0"  :  "=r" (flags));
 336
 337         assert(get_interrupt_level() ? (enable == FALSE) : TRUE);
 338
 339         istate = ((flags & EFL_IF) != 0);
 340
 341         if (enable) {
 342                 __asm__ volatile ("sti;nop");
 343
 344                 if ((get_preemption_level() == 0) && (*ast_pending() & AST_URGENT)) {
 345                         __asm__ volatile ("int %0" :: "N" (T_PREEMPT));
 346                 }
 347         } else {
 348                 if (istate) {
 349                         __asm__ volatile ("cli");
 350                 }
 351         }
 352
 353         return istate;
 354 }
 355
 356 /* Early Set Interrupts Enabled */
 357 boolean_t
 358 ml_early_set_interrupts_enabled(boolean_t enable)
 359 {
 360         if (enable == TRUE) {
 361                 kprintf("Caller attempted to enable interrupts too early in "
 362                     "kernel startup. Halting.\n");
 363                 hlt();
 364                 /*NOTREACHED*/
 365         }
 366
 367         /* On x86, do not allow interrupts to be enabled very early */
 368         return FALSE;
 369 }
 370
 371 /* Check if running at interrupt context */
 372 boolean_t
 373 ml_at_interrupt_context(void)
 374 {
 375         return get_interrupt_level() != 0;
 376 }
 377
 378 void
 379 ml_get_power_state(boolean_t *icp, boolean_t *pidlep)
 380 {
 381         *icp = (get_interrupt_level() != 0);
 382         /* These will be technically inaccurate for interrupts that occur
 383          * successively within a single "idle exit" event, but shouldn't
 384          * matter statistically.
 385          */
 386         *pidlep = (current_cpu_datap()->lcpu.package->num_idle == topoParms.nLThreadsPerPackage);
 387 }
 388
 389 /* Generate a fake interrupt */
 390 __dead2
 391 void
 392 ml_cause_interrupt(void)
 393 {
 394         panic("ml_cause_interrupt not defined yet on Intel");
 395 }
 396
 397 /*
 398  * TODO: transition users of this to kernel_thread_start_priority
 399  * ml_thread_policy is an unsupported KPI
 400  */
 401 void
 402 ml_thread_policy(
 403         thread_t thread,
 404         __unused        unsigned policy_id,
 405         unsigned policy_info)
 406 {
 407         if (policy_info & MACHINE_NETWORK_WORKLOOP) {
 408                 thread_precedence_policy_data_t info;
 409                 __assert_only kern_return_t kret;
 410
 411                 info.importance = 1;
 412
 413                 kret = thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY,
 414                     (thread_policy_t)&info,
 415                     THREAD_PRECEDENCE_POLICY_COUNT);
 416                 assert(kret == KERN_SUCCESS);
 417         }
 418 }
 419
 420 /* Initialize Interrupts */
 421 void
 422 ml_install_interrupt_handler(
 423         void *nub,
 424         int source,
 425         void *target,
 426         IOInterruptHandler handler,
 427         void *refCon)
 428 {
 429         boolean_t current_state;
 430
 431         current_state = ml_set_interrupts_enabled(FALSE);
 432
 433         PE_install_interrupt_handler(nub, source, target,
 434             (IOInterruptHandler) handler, refCon);
 435
 436         (void) ml_set_interrupts_enabled(current_state);
 437
 438         initialize_screen(NULL, kPEAcquireScreen);
 439 }
 440
 441
 442 void
 443 machine_signal_idle(
 444         processor_t processor)
 445 {
 446         cpu_interrupt(processor->cpu_id);
 447 }
 448
 449 __dead2
 450 void
 451 machine_signal_idle_deferred(
 452         __unused processor_t processor)
 453 {
 454         panic("Unimplemented");
 455 }
 456
 457 __dead2
 458 void
 459 machine_signal_idle_cancel(
 460         __unused processor_t processor)
 461 {
 462         panic("Unimplemented");
 463 }
 464
 465 static kern_return_t
 466 register_cpu(
 467         uint32_t        lapic_id,
 468         processor_t     *processor_out,
 469         boolean_t       boot_cpu )
 470 {
 471         int             target_cpu;
 472         cpu_data_t      *this_cpu_datap;
 473
 474         this_cpu_datap = cpu_data_alloc(boot_cpu);
 475         if (this_cpu_datap == NULL) {
 476                 return KERN_FAILURE;
 477         }
 478         target_cpu = this_cpu_datap->cpu_number;
 479         assert((boot_cpu && (target_cpu == 0)) ||
 480             (!boot_cpu && (target_cpu != 0)));
 481
 482         lapic_cpu_map(lapic_id, target_cpu);
 483
 484         /* The cpu_id is not known at registration phase. Just do
 485          * lapic_id for now
 486          */
 487         this_cpu_datap->cpu_phys_number = lapic_id;
 488
 489         this_cpu_datap->cpu_console_buf = console_cpu_alloc(boot_cpu);
 490         if (this_cpu_datap->cpu_console_buf == NULL) {
 491                 goto failed;
 492         }
 493
 494 #if KPC
 495         if (kpc_register_cpu(this_cpu_datap) != TRUE) {
 496                 goto failed;
 497         }
 498 #endif
 499
 500         if (!boot_cpu) {
 501                 cpu_thread_alloc(this_cpu_datap->cpu_number);
 502                 if (this_cpu_datap->lcpu.core == NULL) {
 503                         goto failed;
 504                 }
 505
 506 #if NCOPY_WINDOWS > 0
 507                 this_cpu_datap->cpu_pmap = pmap_cpu_alloc(boot_cpu);
 508                 if (this_cpu_datap->cpu_pmap == NULL) {
 509                         goto failed;
 510                 }
 511 #endif
 512
 513                 this_cpu_datap->cpu_processor = cpu_processor_alloc(boot_cpu);
 514                 if (this_cpu_datap->cpu_processor == NULL) {
 515                         goto failed;
 516                 }
 517                 /*
 518                  * processor_init() deferred to topology start
 519                  * because "slot numbers" a.k.a. logical processor numbers
 520                  * are not yet finalized.
 521                  */
 522         }
 523
 524         *processor_out = this_cpu_datap->cpu_processor;
 525
 526         return KERN_SUCCESS;
 527
 528 failed:
 529         cpu_processor_free(this_cpu_datap->cpu_processor);
 530 #if NCOPY_WINDOWS > 0
 531         pmap_cpu_free(this_cpu_datap->cpu_pmap);
 532 #endif
 533         console_cpu_free(this_cpu_datap->cpu_console_buf);
 534 #if KPC
 535         kpc_unregister_cpu(this_cpu_datap);
 536 #endif /* KPC */
 537
 538         return KERN_FAILURE;
 539 }
 540
 541
 542 kern_return_t
 543 ml_processor_register(
 544         cpu_id_t        cpu_id,
 545         uint32_t        lapic_id,
 546         processor_t     *processor_out,
 547         boolean_t       boot_cpu,
 548         boolean_t       start )
 549 {
 550         static boolean_t done_topo_sort = FALSE;
 551         static uint32_t num_registered = 0;
 552
 553         /* Register all CPUs first, and track max */
 554         if (start == FALSE) {
 555                 num_registered++;
 556
 557                 DBG( "registering CPU lapic id %d\n", lapic_id );
 558
 559                 return register_cpu( lapic_id, processor_out, boot_cpu );
 560         }
 561
 562         /* Sort by topology before we start anything */
 563         if (!done_topo_sort) {
 564                 DBG( "about to start CPUs. %d registered\n", num_registered );
 565
 566                 cpu_topology_sort( num_registered );
 567                 done_topo_sort = TRUE;
 568         }
 569
 570         /* Assign the cpu ID */
 571         uint32_t cpunum = -1;
 572         cpu_data_t  *this_cpu_datap = NULL;
 573
 574         /* find cpu num and pointer */
 575         cpunum = ml_get_cpuid( lapic_id );
 576
 577         if (cpunum == 0xFFFFFFFF) { /* never heard of it? */
 578                 panic( "trying to start invalid/unregistered CPU %d\n", lapic_id );
 579         }
 580
 581         this_cpu_datap = cpu_datap(cpunum);
 582
 583         /* fix the CPU id */
 584         this_cpu_datap->cpu_id = cpu_id;
 585
 586         /* allocate and initialize other per-cpu structures */
 587         if (!boot_cpu) {
 588                 mp_cpus_call_cpu_init(cpunum);
 589                 random_cpu_init(cpunum);
 590         }
 591
 592         /* output arg */
 593         *processor_out = this_cpu_datap->cpu_processor;
 594
 595         /* OK, try and start this CPU */
 596         return cpu_topology_start_cpu( cpunum );
 597 }
 598
 599
 600 void
 601 ml_cpu_get_info(ml_cpu_info_t *cpu_infop)
 602 {
 603         boolean_t       os_supports_sse;
 604         i386_cpu_info_t *cpuid_infop;
 605
 606         if (cpu_infop == NULL) {
 607                 return;
 608         }
 609
 610         /*
 611          * Are we supporting MMX/SSE/SSE2/SSE3?
 612          * As distinct from whether the cpu has these capabilities.
 613          */
 614         os_supports_sse = !!(get_cr4() & CR4_OSXMM);
 615
 616         if (ml_fpu_avx_enabled()) {
 617                 cpu_infop->vector_unit = 9;
 618         } else if ((cpuid_features() & CPUID_FEATURE_SSE4_2) && os_supports_sse) {
 619                 cpu_infop->vector_unit = 8;
 620         } else if ((cpuid_features() & CPUID_FEATURE_SSE4_1) && os_supports_sse) {
 621                 cpu_infop->vector_unit = 7;
 622         } else if ((cpuid_features() & CPUID_FEATURE_SSSE3) && os_supports_sse) {
 623                 cpu_infop->vector_unit = 6;
 624         } else if ((cpuid_features() & CPUID_FEATURE_SSE3) && os_supports_sse) {
 625                 cpu_infop->vector_unit = 5;
 626         } else if ((cpuid_features() & CPUID_FEATURE_SSE2) && os_supports_sse) {
 627                 cpu_infop->vector_unit = 4;
 628         } else if ((cpuid_features() & CPUID_FEATURE_SSE) && os_supports_sse) {
 629                 cpu_infop->vector_unit = 3;
 630         } else if (cpuid_features() & CPUID_FEATURE_MMX) {
 631                 cpu_infop->vector_unit = 2;
 632         } else {
 633                 cpu_infop->vector_unit = 0;
 634         }
 635
 636         cpuid_infop  = cpuid_info();
 637
 638         cpu_infop->cache_line_size = cpuid_infop->cache_linesize;
 639
 640         cpu_infop->l1_icache_size = cpuid_infop->cache_size[L1I];
 641         cpu_infop->l1_dcache_size = cpuid_infop->cache_size[L1D];
 642
 643         if (cpuid_infop->cache_size[L2U] > 0) {
 644                 cpu_infop->l2_settings = 1;
 645                 cpu_infop->l2_cache_size = cpuid_infop->cache_size[L2U];
 646         } else {
 647                 cpu_infop->l2_settings = 0;
 648                 cpu_infop->l2_cache_size = 0xFFFFFFFF;
 649         }
 650
 651         if (cpuid_infop->cache_size[L3U] > 0) {
 652                 cpu_infop->l3_settings = 1;
 653                 cpu_infop->l3_cache_size = cpuid_infop->cache_size[L3U];
 654         } else {
 655                 cpu_infop->l3_settings = 0;
 656                 cpu_infop->l3_cache_size = 0xFFFFFFFF;
 657         }
 658 }
 659
 660 void
 661 ml_init_max_cpus(unsigned long max_cpus)
 662 {
 663         boolean_t current_state;
 664
 665         current_state = ml_set_interrupts_enabled(FALSE);
 666         if (max_cpus_initialized != MAX_CPUS_SET) {
 667                 if (max_cpus > 0 && max_cpus <= MAX_CPUS) {
 668                         /*
 669                          * Note: max_cpus is the number of enabled processors
 670                          * that ACPI found; max_ncpus is the maximum number
 671                          * that the kernel supports or that the "cpus="
 672                          * boot-arg has set. Here we take int minimum.
 673                          */
 674                         machine_info.max_cpus = (integer_t)MIN(max_cpus, max_ncpus);
 675                 }
 676                 if (max_cpus_initialized == MAX_CPUS_WAIT) {
 677                         wakeup((event_t)&max_cpus_initialized);
 678                 }
 679                 max_cpus_initialized = MAX_CPUS_SET;
 680         }
 681         (void) ml_set_interrupts_enabled(current_state);
 682 }
 683
 684 int
 685 ml_get_max_cpus(void)
 686 {
 687         boolean_t current_state;
 688
 689         current_state = ml_set_interrupts_enabled(FALSE);
 690         if (max_cpus_initialized != MAX_CPUS_SET) {
 691                 max_cpus_initialized = MAX_CPUS_WAIT;
 692                 assert_wait((event_t)&max_cpus_initialized, THREAD_UNINT);
 693                 (void)thread_block(THREAD_CONTINUE_NULL);
 694         }
 695         (void) ml_set_interrupts_enabled(current_state);
 696         return machine_info.max_cpus;
 697 }
 698
 699 boolean_t
 700 ml_wants_panic_trap_to_debugger(void)
 701 {
 702         return FALSE;
 703 }
 704
 705 void
 706 ml_panic_trap_to_debugger(__unused const char *panic_format_str,
 707     __unused va_list *panic_args,
 708     __unused unsigned int reason,
 709     __unused void *ctx,
 710     __unused uint64_t panic_options_mask,
 711     __unused unsigned long panic_caller)
 712 {
 713         return;
 714 }
 715
 716 /*
 717  *      Routine:        ml_init_lock_timeout
 718  *      Function:
 719  */
 720 void
 721 ml_init_lock_timeout(void)
 722 {
 723         uint64_t        abstime;
 724         uint32_t        mtxspin;
 725 #if DEVELOPMENT || DEBUG
 726         uint64_t        default_timeout_ns = NSEC_PER_SEC >> 2;
 727 #else
 728         uint64_t        default_timeout_ns = NSEC_PER_SEC >> 1;
 729 #endif
 730         uint32_t        slto;
 731         uint32_t        prt;
 732
 733         if (PE_parse_boot_argn("slto_us", &slto, sizeof(slto))) {
 734                 default_timeout_ns = slto * NSEC_PER_USEC;
 735         }
 736
 737         /*
 738          * LockTimeOut is absolutetime, LockTimeOutTSC is in TSC ticks,
 739          * and LockTimeOutUsec is in microseconds and it's 32-bits.
 740          */
 741         LockTimeOutUsec = (uint32_t) (default_timeout_ns / NSEC_PER_USEC);
 742         nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
 743         LockTimeOut = abstime;
 744         LockTimeOutTSC = tmrCvt(abstime, tscFCvtn2t);
 745
 746         /*
 747          * TLBTimeOut dictates the TLB flush timeout period. It defaults to
 748          * LockTimeOut but can be overriden separately. In particular, a
 749          * zero value inhibits the timeout-panic and cuts a trace evnt instead
 750          * - see pmap_flush_tlbs().
 751          */
 752         if (PE_parse_boot_argn("tlbto_us", &slto, sizeof(slto))) {
 753                 default_timeout_ns = slto * NSEC_PER_USEC;
 754                 nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
 755                 TLBTimeOut = (uint32_t) abstime;
 756         } else {
 757                 TLBTimeOut = LockTimeOut;
 758         }
 759
 760 #if DEVELOPMENT || DEBUG
 761         reportphyreaddelayabs = LockTimeOut >> 1;
 762 #endif
 763         if (PE_parse_boot_argn("phyreadmaxus", &slto, sizeof(slto))) {
 764                 default_timeout_ns = slto * NSEC_PER_USEC;
 765                 nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
 766                 reportphyreaddelayabs = abstime;
 767         }
 768
 769         if (PE_parse_boot_argn("phywritemaxus", &slto, sizeof(slto))) {
 770                 nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
 771                 reportphywritedelayabs = abstime;
 772         }
 773
 774         if (PE_parse_boot_argn("tracephyreadus", &slto, sizeof(slto))) {
 775                 nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
 776                 tracephyreaddelayabs = abstime;
 777         }
 778
 779         if (PE_parse_boot_argn("tracephywriteus", &slto, sizeof(slto))) {
 780                 nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
 781                 tracephywritedelayabs = abstime;
 782         }
 783
 784         if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof(mtxspin))) {
 785                 if (mtxspin > USEC_PER_SEC >> 4) {
 786                         mtxspin =  USEC_PER_SEC >> 4;
 787                 }
 788                 nanoseconds_to_absolutetime(mtxspin * NSEC_PER_USEC, &abstime);
 789         } else {
 790                 nanoseconds_to_absolutetime(10 * NSEC_PER_USEC, &abstime);
 791         }
 792         MutexSpin = (unsigned int)abstime;
 793         low_MutexSpin = MutexSpin;
 794         /*
 795          * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
 796          * real_ncpus is not set at this time
 797          */
 798         high_MutexSpin = -1;
 799
 800         nanoseconds_to_absolutetime(4ULL * NSEC_PER_SEC, &LastDebuggerEntryAllowance);
 801         if (PE_parse_boot_argn("panic_restart_timeout", &prt, sizeof(prt))) {
 802                 nanoseconds_to_absolutetime(prt * NSEC_PER_SEC, &panic_restart_timeout);
 803         }
 804
 805         virtualized = ((cpuid_features() & CPUID_FEATURE_VMM) != 0);
 806         if (virtualized) {
 807                 int     vti;
 808
 809                 if (!PE_parse_boot_argn("vti", &vti, sizeof(vti))) {
 810                         vti = 6;
 811                 }
 812                 printf("Timeouts adjusted for virtualization (<<%d)\n", vti);
 813                 kprintf("Timeouts adjusted for virtualization (<<%d):\n", vti);
 814 #define VIRTUAL_TIMEOUT_INFLATE64(_timeout)                     \
 815 MACRO_BEGIN                                                     \
 816         kprintf("%24s: 0x%016llx ", #_timeout, _timeout);       \
 817         _timeout <<= vti;                                       \
 818         kprintf("-> 0x%016llx\n",  _timeout);                   \
 819 MACRO_END
 820 #define VIRTUAL_TIMEOUT_INFLATE32(_timeout)                     \
 821 MACRO_BEGIN                                                     \
 822         kprintf("%24s:         0x%08x ", #_timeout, _timeout);  \
 823         if ((_timeout <<vti) >> vti == _timeout)                \
 824                 _timeout <<= vti;                               \
 825         else                                                    \
 826                 _timeout = ~0; /* cap rather than overflow */   \
 827         kprintf("-> 0x%08x\n",  _timeout);                      \
 828 MACRO_END
 829                 VIRTUAL_TIMEOUT_INFLATE32(LockTimeOutUsec);
 830                 VIRTUAL_TIMEOUT_INFLATE64(LockTimeOut);
 831                 VIRTUAL_TIMEOUT_INFLATE64(LockTimeOutTSC);
 832                 VIRTUAL_TIMEOUT_INFLATE64(TLBTimeOut);
 833                 VIRTUAL_TIMEOUT_INFLATE64(MutexSpin);
 834                 VIRTUAL_TIMEOUT_INFLATE64(low_MutexSpin);
 835                 VIRTUAL_TIMEOUT_INFLATE64(reportphyreaddelayabs);
 836         }
 837
 838         interrupt_latency_tracker_setup();
 839         simple_lock_init(&ml_timer_evaluation_slock, 0);
 840 }
 841
 842 /*
 843  * Threshold above which we should attempt to block
 844  * instead of spinning for clock_delay_until().
 845  */
 846
 847 void
 848 ml_init_delay_spin_threshold(int threshold_us)
 849 {
 850         nanoseconds_to_absolutetime(threshold_us * NSEC_PER_USEC, &delay_spin_threshold);
 851 }
 852
 853 boolean_t
 854 ml_delay_should_spin(uint64_t interval)
 855 {
 856         return (interval < delay_spin_threshold) ? TRUE : FALSE;
 857 }
 858
 859 uint32_t yield_delay_us = 0;
 860
 861 void
 862 ml_delay_on_yield(void)
 863 {
 864 #if DEVELOPMENT || DEBUG
 865         if (yield_delay_us) {
 866                 delay(yield_delay_us);
 867         }
 868 #endif
 869 }
 870
 871 /*
 872  * This is called from the machine-independent layer
 873  * to perform machine-dependent info updates. Defer to cpu_thread_init().
 874  */
 875 void
 876 ml_cpu_up(void)
 877 {
 878         return;
 879 }
 880
 881 /*
 882  * This is called from the machine-independent layer
 883  * to perform machine-dependent info updates.
 884  */
 885 void
 886 ml_cpu_down(void)
 887 {
 888         i386_deactivate_cpu();
 889
 890         return;
 891 }
 892
 893 /*
 894  * The following are required for parts of the kernel
 895  * that cannot resolve these functions as inlines:
 896  */
 897 extern thread_t current_act(void) __attribute__((const));
 898 thread_t
 899 current_act(void)
 900 {
 901         return current_thread_fast();
 902 }
 903
 904 #undef current_thread
 905 extern thread_t current_thread(void) __attribute__((const));
 906 thread_t
 907 current_thread(void)
 908 {
 909         return current_thread_fast();
 910 }
 911
 912
 913 boolean_t
 914 ml_is64bit(void)
 915 {
 916         return cpu_mode_is64bit();
 917 }
 918
 919
 920 boolean_t
 921 ml_thread_is64bit(thread_t thread)
 922 {
 923         return thread_is_64bit_addr(thread);
 924 }
 925
 926
 927 boolean_t
 928 ml_state_is64bit(void *saved_state)
 929 {
 930         return is_saved_state64(saved_state);
 931 }
 932
 933 void
 934 ml_cpu_set_ldt(int selector)
 935 {
 936         /*
 937          * Avoid loading the LDT
 938          * if we're setting the KERNEL LDT and it's already set.
 939          */
 940         if (selector == KERNEL_LDT &&
 941             current_cpu_datap()->cpu_ldt == KERNEL_LDT) {
 942                 return;
 943         }
 944
 945         lldt(selector);
 946         current_cpu_datap()->cpu_ldt = selector;
 947 }
 948
 949 void
 950 ml_fp_setvalid(boolean_t value)
 951 {
 952         fp_setvalid(value);
 953 }
 954
 955 uint64_t
 956 ml_cpu_int_event_time(void)
 957 {
 958         return current_cpu_datap()->cpu_int_event_time;
 959 }
 960
 961 vm_offset_t
 962 ml_stack_remaining(void)
 963 {
 964         uintptr_t local = (uintptr_t) &local;
 965
 966         if (ml_at_interrupt_context() != 0) {
 967                 return local - (current_cpu_datap()->cpu_int_stack_top - INTSTACK_SIZE);
 968         } else {
 969                 return local - current_thread()->kernel_stack;
 970         }
 971 }
 972
 973 #if KASAN
 974 vm_offset_t ml_stack_base(void);
 975 vm_size_t ml_stack_size(void);
 976
 977 vm_offset_t
 978 ml_stack_base(void)
 979 {
 980         if (ml_at_interrupt_context()) {
 981                 return current_cpu_datap()->cpu_int_stack_top - INTSTACK_SIZE;
 982         } else {
 983                 return current_thread()->kernel_stack;
 984         }
 985 }
 986
 987 vm_size_t
 988 ml_stack_size(void)
 989 {
 990         if (ml_at_interrupt_context()) {
 991                 return INTSTACK_SIZE;
 992         } else {
 993                 return kernel_stack_size;
 994         }
 995 }
 996 #endif
 997
 998 void
 999 kernel_preempt_check(void)
1000 {
1001         boolean_t       intr;
1002         unsigned long flags;
1003
1004         assert(get_preemption_level() == 0);
1005
1006         if (__improbable(*ast_pending() & AST_URGENT)) {
1007                 /*
1008                  * can handle interrupts and preemptions
1009                  * at this point
1010                  */
1011                 __asm__ volatile ("pushf; pop   %0"  :  "=r" (flags));
1012
1013                 intr = ((flags & EFL_IF) != 0);
1014
1015                 /*
1016                  * now cause the PRE-EMPTION trap
1017                  */
1018                 if (intr == TRUE) {
1019                         __asm__ volatile ("int %0" :: "N" (T_PREEMPT));
1020                 }
1021         }
1022 }
1023
1024 boolean_t
1025 machine_timeout_suspended(void)
1026 {
1027         return pmap_tlb_flush_timeout || spinlock_timed_out || panic_active() || mp_recent_debugger_activity() || ml_recent_wake();
1028 }
1029
1030 /* Eagerly evaluate all pending timer and thread callouts
1031  */
1032 void
1033 ml_timer_evaluate(void)
1034 {
1035         KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN | DBG_FUNC_START, 0, 0, 0, 0, 0);
1036
1037         uint64_t te_end, te_start = mach_absolute_time();
1038         simple_lock(&ml_timer_evaluation_slock, LCK_GRP_NULL);
1039         ml_timer_evaluation_in_progress = TRUE;
1040         thread_call_delayed_timer_rescan_all();
1041         mp_cpus_call(CPUMASK_ALL, ASYNC, timer_queue_expire_rescan, NULL);
1042         ml_timer_evaluation_in_progress = FALSE;
1043         ml_timer_eager_evaluations++;
1044         te_end = mach_absolute_time();
1045         ml_timer_eager_evaluation_max = MAX(ml_timer_eager_evaluation_max, (te_end - te_start));
1046         simple_unlock(&ml_timer_evaluation_slock);
1047
1048         KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN | DBG_FUNC_END, 0, 0, 0, 0, 0);
1049 }
1050
1051 boolean_t
1052 ml_timer_forced_evaluation(void)
1053 {
1054         return ml_timer_evaluation_in_progress;
1055 }
1056
1057 /* 32-bit right-rotate n bits */
1058 static inline uint32_t
1059 ror32(uint32_t val, const unsigned int n)
1060 {
1061         __asm__ volatile ("rorl %%cl,%0" : "=r" (val) : "0" (val), "c" (n));
1062         return val;
1063 }
1064
1065 void
1066 ml_entropy_collect(void)
1067 {
1068         uint32_t        tsc_lo, tsc_hi;
1069         uint32_t        *ep;
1070
1071         assert(cpu_number() == master_cpu);
1072
1073         /* update buffer pointer cyclically */
1074         ep = EntropyData.buffer + (EntropyData.sample_count & ENTROPY_BUFFER_INDEX_MASK);
1075         EntropyData.sample_count += 1;
1076
1077         rdtsc_nofence(tsc_lo, tsc_hi);
1078         *ep = ror32(*ep, 9) ^ tsc_lo;
1079 }
1080
1081 uint64_t
1082 ml_energy_stat(__unused thread_t t)
1083 {
1084         return 0;
1085 }
1086
1087 void
1088 ml_gpu_stat_update(uint64_t gpu_ns_delta)
1089 {
1090         current_thread()->machine.thread_gpu_ns += gpu_ns_delta;
1091 }
1092
1093 uint64_t
1094 ml_gpu_stat(thread_t t)
1095 {
1096         return t->machine.thread_gpu_ns;
1097 }
1098
1099 int plctrace_enabled = 0;
1100
1101 void
1102 _disable_preemption(void)
1103 {
1104         disable_preemption_internal();
1105 }
1106
1107 void
1108 _enable_preemption(void)
1109 {
1110         enable_preemption_internal();
1111 }
1112
1113 void
1114 plctrace_disable(void)
1115 {
1116         plctrace_enabled = 0;
1117 }
1118
1119 static boolean_t ml_quiescing;
1120
1121 void
1122 ml_set_is_quiescing(boolean_t quiescing)
1123 {
1124         assert(FALSE == ml_get_interrupts_enabled());
1125         ml_quiescing = quiescing;
1126 }
1127
1128 boolean_t
1129 ml_is_quiescing(void)
1130 {
1131         assert(FALSE == ml_get_interrupts_enabled());
1132         return ml_quiescing;
1133 }
1134
1135 uint64_t
1136 ml_get_booter_memory_size(void)
1137 {
1138         return 0;
1139 }