2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <i386/machine_routines.h>
30 #include <i386/io_map_entries.h>
31 #include <i386/cpuid.h>
33 #include <mach/processor.h>
34 #include <kern/processor.h>
35 #include <kern/machine.h>
37 #include <kern/cpu_number.h>
38 #include <kern/thread.h>
39 #include <kern/thread_call.h>
40 #include <kern/policy_internal.h>
42 #include <prng/random.h>
43 #include <prng/entropy.h>
44 #include <i386/machine_cpu.h>
45 #include <i386/lapic.h>
46 #include <i386/bit_routines.h>
47 #include <i386/mp_events.h>
48 #include <i386/pmCPU.h>
49 #include <i386/trap.h>
51 #include <i386/cpu_threads.h>
52 #include <i386/proc_reg.h>
53 #include <mach/vm_param.h>
54 #include <i386/pmap.h>
55 #include <i386/pmap_internal.h>
56 #include <i386/misc_protos.h>
57 #include <kern/timer_queue.h>
58 #include <vm/vm_map.h>
62 #include <architecture/i386/pio.h>
63 #include <i386/cpu_data.h>
65 #define DBG(x...) kprintf("DBG: " x)
71 #include <kern/monotonic.h>
72 #endif /* MONOTONIC */
74 extern void wakeup(void *);
78 uint64_t LockTimeOutTSC
;
79 uint32_t LockTimeOutUsec
;
81 uint64_t low_MutexSpin
;
82 int64_t high_MutexSpin
;
83 uint64_t LastDebuggerEntryAllowance
;
84 uint64_t delay_spin_threshold
;
86 extern uint64_t panic_restart_timeout
;
88 boolean_t virtualized
= FALSE
;
90 decl_simple_lock_data(static, ml_timer_evaluation_slock
);
91 uint32_t ml_timer_eager_evaluations
;
92 uint64_t ml_timer_eager_evaluation_max
;
93 static boolean_t ml_timer_evaluation_in_progress
= FALSE
;
95 LCK_GRP_DECLARE(max_cpus_grp
, "max_cpus");
96 LCK_MTX_DECLARE(max_cpus_lock
, &max_cpus_grp
);
97 static int max_cpus_initialized
= 0;
98 #define MAX_CPUS_SET 0x1
99 #define MAX_CPUS_WAIT 0x2
101 /* IO memory map services */
103 /* Map memory map IO space */
106 vm_offset_t phys_addr
,
109 return io_map(phys_addr
, size
, VM_WIMG_IO
);
112 /* boot memory allocation */
115 __unused vm_size_t size
)
117 return (vm_offset_t
)NULL
;
122 ml_get_bouncepool_info(vm_offset_t
*phys_addr
, vm_size_t
*size
)
133 #if defined(__x86_64__)
134 return (vm_offset_t
)(((unsigned long) paddr
) | VM_MIN_KERNEL_ADDRESS
);
136 return (vm_offset_t
)((paddr
) | LINEAR_KERNEL_ADDRESS
);
144 return VM_KERNEL_SLIDE(vaddr
);
148 * base must be page-aligned, and size must be a multiple of PAGE_SIZE
151 ml_static_verify_page_protections(
152 uint64_t base
, uint64_t size
, vm_prot_t prot
)
157 DBG("ml_static_verify_page_protections: vaddr 0x%llx sz 0x%llx prot 0x%x\n", base
, size
, prot
);
160 * base must be within the static bounds, defined to be:
161 * (vm_kernel_stext, kc_highest_nonlinkedit_vmaddr)
163 #if DEVELOPMENT || DEBUG || KASAN
164 assert(kc_highest_nonlinkedit_vmaddr
> 0 && base
> vm_kernel_stext
&& base
< kc_highest_nonlinkedit_vmaddr
);
165 #else /* On release kernels, assume this is a protection mismatch failure. */
166 if (kc_highest_nonlinkedit_vmaddr
== 0 || base
< vm_kernel_stext
|| base
>= kc_highest_nonlinkedit_vmaddr
) {
171 for (offset
= 0; offset
< size
; offset
+= PAGE_SIZE
) {
172 if (pmap_get_prot(kernel_pmap
, base
+ offset
, &pageprot
) == KERN_FAILURE
) {
175 if ((pageprot
& prot
) != prot
) {
187 return VM_KERNEL_UNSLIDE(vaddr
);
191 * Reclaim memory, by virtual address, that was used in early boot that is no longer needed
201 uint32_t freed_pages
= 0;
204 assert(vaddr
>= VM_MIN_KERNEL_ADDRESS
);
206 assert((vaddr
& (PAGE_SIZE
- 1)) == 0); /* must be page aligned */
208 for (vaddr_cur
= vaddr
; vaddr_cur
< round_page_64(vaddr
+ size
);) {
209 map_size
= pmap_query_pagesize(kernel_pmap
, vaddr_cur
);
211 /* just skip if nothing mapped here */
213 vaddr_cur
+= PAGE_SIZE
;
218 * Can't free from the middle of a large page.
220 assert((vaddr_cur
& (map_size
- 1)) == 0);
222 ppn
= pmap_find_phys(kernel_pmap
, vaddr_cur
);
223 assert(ppn
!= (ppnum_t
)NULL
);
225 pmap_remove(kernel_pmap
, vaddr_cur
, vaddr_cur
+ map_size
);
226 while (map_size
> 0) {
227 if (++kernel_pmap
->stats
.resident_count
> kernel_pmap
->stats
.resident_max
) {
228 kernel_pmap
->stats
.resident_max
= kernel_pmap
->stats
.resident_count
;
231 assert(pmap_valid_page(ppn
));
232 if (IS_MANAGED_PAGE(ppn
)) {
233 vm_page_create(ppn
, (ppn
+ 1));
236 map_size
-= PAGE_SIZE
;
237 vaddr_cur
+= PAGE_SIZE
;
241 vm_page_lockspin_queues();
242 vm_page_wire_count
-= freed_pages
;
243 vm_page_wire_count_initial
-= freed_pages
;
244 if (vm_page_wire_count_on_boot
!= 0) {
245 assert(vm_page_wire_count_on_boot
>= freed_pages
);
246 vm_page_wire_count_on_boot
-= freed_pages
;
248 vm_page_unlock_queues();
251 kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages
, (void *)vaddr
, (uint64_t)size
, ppn
);
255 /* Change page protections for addresses previously loaded by efiboot */
257 ml_static_protect(vm_offset_t vmaddr
, vm_size_t size
, vm_prot_t prot
)
259 boolean_t NX
= !!!(prot
& VM_PROT_EXECUTE
), ro
= !!!(prot
& VM_PROT_WRITE
);
261 assert(prot
& VM_PROT_READ
);
263 pmap_mark_range(kernel_pmap
, vmaddr
, size
, NX
, ro
);
268 /* virtual to physical on wired pages */
273 return (vm_offset_t
)kvtophys(vaddr
);
277 * Routine: ml_nofault_copy
278 * Function: Perform a physical mode copy if the source and
279 * destination have valid translations in the kernel pmap.
280 * If translations are present, they are assumed to
281 * be wired; i.e. no attempt is made to guarantee that the
282 * translations obtained remained valid for
283 * the duration of the copy process.
288 vm_offset_t virtsrc
, vm_offset_t virtdst
, vm_size_t size
)
290 addr64_t cur_phys_dst
, cur_phys_src
;
291 uint32_t count
, nbytes
= 0;
294 if (!(cur_phys_src
= kvtophys(virtsrc
))) {
297 if (!(cur_phys_dst
= kvtophys(virtdst
))) {
300 if (!pmap_valid_page(i386_btop(cur_phys_dst
)) || !pmap_valid_page(i386_btop(cur_phys_src
))) {
303 count
= (uint32_t)(PAGE_SIZE
- (cur_phys_src
& PAGE_MASK
));
304 if (count
> (PAGE_SIZE
- (cur_phys_dst
& PAGE_MASK
))) {
305 count
= (uint32_t)(PAGE_SIZE
- (cur_phys_dst
& PAGE_MASK
));
308 count
= (uint32_t)size
;
311 bcopy_phys(cur_phys_src
, cur_phys_dst
, count
);
323 * Routine: ml_validate_nofault
324 * Function: Validate that ths address range has a valid translations
325 * in the kernel pmap. If translations are present, they are
326 * assumed to be wired; i.e. no attempt is made to guarantee
327 * that the translation persist after the check.
328 * Returns: TRUE if the range is mapped and will not cause a fault,
334 vm_offset_t virtsrc
, vm_size_t size
)
336 addr64_t cur_phys_src
;
340 if (!(cur_phys_src
= kvtophys(virtsrc
))) {
343 if (!pmap_valid_page(i386_btop(cur_phys_src
))) {
346 count
= (uint32_t)(PAGE_SIZE
- (cur_phys_src
& PAGE_MASK
));
348 count
= (uint32_t)size
;
358 /* Interrupt handling */
360 /* Initialize Interrupts */
362 ml_init_interrupt(void)
364 (void) ml_set_interrupts_enabled(TRUE
);
368 /* Get Interrupts Enabled */
370 ml_get_interrupts_enabled(void)
374 __asm__
volatile ("pushf; pop %0": "=r" (flags
));
375 return (flags
& EFL_IF
) != 0;
378 /* Set Interrupts Enabled */
380 ml_set_interrupts_enabled(boolean_t enable
)
385 __asm__
volatile ("pushf; pop %0" : "=r" (flags
));
387 assert(get_interrupt_level() ? (enable
== FALSE
) : TRUE
);
389 istate
= ((flags
& EFL_IF
) != 0);
392 __asm__
volatile ("sti;nop");
394 if ((get_preemption_level() == 0) && (*ast_pending() & AST_URGENT
)) {
395 __asm__
volatile ("int %0" :: "N" (T_PREEMPT
));
399 __asm__
volatile ("cli");
406 /* Early Set Interrupts Enabled */
408 ml_early_set_interrupts_enabled(boolean_t enable
)
410 if (enable
== TRUE
) {
411 kprintf("Caller attempted to enable interrupts too early in "
412 "kernel startup. Halting.\n");
417 /* On x86, do not allow interrupts to be enabled very early */
421 /* Check if running at interrupt context */
423 ml_at_interrupt_context(void)
425 return get_interrupt_level() != 0;
429 ml_get_power_state(boolean_t
*icp
, boolean_t
*pidlep
)
431 *icp
= (get_interrupt_level() != 0);
432 /* These will be technically inaccurate for interrupts that occur
433 * successively within a single "idle exit" event, but shouldn't
434 * matter statistically.
436 *pidlep
= (current_cpu_datap()->lcpu
.package
->num_idle
== topoParms
.nLThreadsPerPackage
);
439 /* Generate a fake interrupt */
442 ml_cause_interrupt(void)
444 panic("ml_cause_interrupt not defined yet on Intel");
448 * TODO: transition users of this to kernel_thread_start_priority
449 * ml_thread_policy is an unsupported KPI
454 __unused
unsigned policy_id
,
455 unsigned policy_info
)
457 if (policy_info
& MACHINE_NETWORK_WORKLOOP
) {
458 thread_precedence_policy_data_t info
;
459 __assert_only kern_return_t kret
;
463 kret
= thread_policy_set_internal(thread
, THREAD_PRECEDENCE_POLICY
,
464 (thread_policy_t
)&info
,
465 THREAD_PRECEDENCE_POLICY_COUNT
);
466 assert(kret
== KERN_SUCCESS
);
470 /* Initialize Interrupts */
472 ml_install_interrupt_handler(
476 IOInterruptHandler handler
,
479 boolean_t current_state
;
481 current_state
= ml_set_interrupts_enabled(FALSE
);
483 PE_install_interrupt_handler(nub
, source
, target
,
484 (IOInterruptHandler
) handler
, refCon
);
486 (void) ml_set_interrupts_enabled(current_state
);
492 processor_t processor
)
494 cpu_interrupt(processor
->cpu_id
);
499 machine_signal_idle_deferred(
500 __unused processor_t processor
)
502 panic("Unimplemented");
507 machine_signal_idle_cancel(
508 __unused processor_t processor
)
510 panic("Unimplemented");
516 processor_t
*processor_out
,
520 cpu_data_t
*this_cpu_datap
;
522 this_cpu_datap
= cpu_data_alloc(boot_cpu
);
523 if (this_cpu_datap
== NULL
) {
526 target_cpu
= this_cpu_datap
->cpu_number
;
527 assert((boot_cpu
&& (target_cpu
== 0)) ||
528 (!boot_cpu
&& (target_cpu
!= 0)));
530 lapic_cpu_map(lapic_id
, target_cpu
);
532 /* The cpu_id is not known at registration phase. Just do
535 this_cpu_datap
->cpu_phys_number
= lapic_id
;
537 this_cpu_datap
->cpu_console_buf
= console_cpu_alloc(boot_cpu
);
538 if (this_cpu_datap
->cpu_console_buf
== NULL
) {
543 if (kpc_register_cpu(this_cpu_datap
) != TRUE
) {
549 cpu_thread_alloc(this_cpu_datap
->cpu_number
);
550 if (this_cpu_datap
->lcpu
.core
== NULL
) {
556 * processor_init() deferred to topology start
557 * because "slot numbers" a.k.a. logical processor numbers
558 * are not yet finalized.
560 *processor_out
= this_cpu_datap
->cpu_processor
;
565 console_cpu_free(this_cpu_datap
->cpu_console_buf
);
567 kpc_unregister_cpu(this_cpu_datap
);
575 ml_processor_register(
578 processor_t
*processor_out
,
582 static boolean_t done_topo_sort
= FALSE
;
583 static uint32_t num_registered
= 0;
585 /* Register all CPUs first, and track max */
586 if (start
== FALSE
) {
589 DBG( "registering CPU lapic id %d\n", lapic_id
);
591 return register_cpu( lapic_id
, processor_out
, boot_cpu
);
594 /* Sort by topology before we start anything */
595 if (!done_topo_sort
) {
596 DBG( "about to start CPUs. %d registered\n", num_registered
);
598 cpu_topology_sort( num_registered
);
599 done_topo_sort
= TRUE
;
602 /* Assign the cpu ID */
603 uint32_t cpunum
= -1;
604 cpu_data_t
*this_cpu_datap
= NULL
;
606 /* find cpu num and pointer */
607 cpunum
= ml_get_cpuid( lapic_id
);
609 if (cpunum
== 0xFFFFFFFF) { /* never heard of it? */
610 panic( "trying to start invalid/unregistered CPU %d\n", lapic_id
);
613 this_cpu_datap
= cpu_datap(cpunum
);
616 this_cpu_datap
->cpu_id
= cpu_id
;
618 /* allocate and initialize other per-cpu structures */
620 mp_cpus_call_cpu_init(cpunum
);
621 random_cpu_init(cpunum
);
625 *processor_out
= this_cpu_datap
->cpu_processor
;
627 /* OK, try and start this CPU */
628 return cpu_topology_start_cpu( cpunum
);
633 ml_cpu_get_info(ml_cpu_info_t
*cpu_infop
)
635 boolean_t os_supports_sse
;
636 i386_cpu_info_t
*cpuid_infop
;
638 if (cpu_infop
== NULL
) {
643 * Are we supporting MMX/SSE/SSE2/SSE3?
644 * As distinct from whether the cpu has these capabilities.
646 os_supports_sse
= !!(get_cr4() & CR4_OSXMM
);
648 if (ml_fpu_avx_enabled()) {
649 cpu_infop
->vector_unit
= 9;
650 } else if ((cpuid_features() & CPUID_FEATURE_SSE4_2
) && os_supports_sse
) {
651 cpu_infop
->vector_unit
= 8;
652 } else if ((cpuid_features() & CPUID_FEATURE_SSE4_1
) && os_supports_sse
) {
653 cpu_infop
->vector_unit
= 7;
654 } else if ((cpuid_features() & CPUID_FEATURE_SSSE3
) && os_supports_sse
) {
655 cpu_infop
->vector_unit
= 6;
656 } else if ((cpuid_features() & CPUID_FEATURE_SSE3
) && os_supports_sse
) {
657 cpu_infop
->vector_unit
= 5;
658 } else if ((cpuid_features() & CPUID_FEATURE_SSE2
) && os_supports_sse
) {
659 cpu_infop
->vector_unit
= 4;
660 } else if ((cpuid_features() & CPUID_FEATURE_SSE
) && os_supports_sse
) {
661 cpu_infop
->vector_unit
= 3;
662 } else if (cpuid_features() & CPUID_FEATURE_MMX
) {
663 cpu_infop
->vector_unit
= 2;
665 cpu_infop
->vector_unit
= 0;
668 cpuid_infop
= cpuid_info();
670 cpu_infop
->cache_line_size
= cpuid_infop
->cache_linesize
;
672 cpu_infop
->l1_icache_size
= cpuid_infop
->cache_size
[L1I
];
673 cpu_infop
->l1_dcache_size
= cpuid_infop
->cache_size
[L1D
];
675 if (cpuid_infop
->cache_size
[L2U
] > 0) {
676 cpu_infop
->l2_settings
= 1;
677 cpu_infop
->l2_cache_size
= cpuid_infop
->cache_size
[L2U
];
679 cpu_infop
->l2_settings
= 0;
680 cpu_infop
->l2_cache_size
= 0xFFFFFFFF;
683 if (cpuid_infop
->cache_size
[L3U
] > 0) {
684 cpu_infop
->l3_settings
= 1;
685 cpu_infop
->l3_cache_size
= cpuid_infop
->cache_size
[L3U
];
687 cpu_infop
->l3_settings
= 0;
688 cpu_infop
->l3_cache_size
= 0xFFFFFFFF;
693 ml_early_cpu_max_number(void)
697 assert(startup_phase
>= STARTUP_SUB_TUNABLES
);
698 if (max_cpus_from_firmware
) {
699 n
= MIN(n
, max_cpus_from_firmware
);
705 ml_set_max_cpus(unsigned int max_cpus
)
707 lck_mtx_lock(&max_cpus_lock
);
708 if (max_cpus_initialized
!= MAX_CPUS_SET
) {
709 if (max_cpus
> 0 && max_cpus
<= MAX_CPUS
) {
711 * Note: max_cpus is the number of enabled processors
712 * that ACPI found; max_ncpus is the maximum number
713 * that the kernel supports or that the "cpus="
714 * boot-arg has set. Here we take int minimum.
716 machine_info
.max_cpus
= (integer_t
)MIN(max_cpus
, max_ncpus
);
718 if (max_cpus_initialized
== MAX_CPUS_WAIT
) {
719 thread_wakeup((event_t
) &max_cpus_initialized
);
721 max_cpus_initialized
= MAX_CPUS_SET
;
723 lck_mtx_unlock(&max_cpus_lock
);
727 ml_wait_max_cpus(void)
729 lck_mtx_lock(&max_cpus_lock
);
730 while (max_cpus_initialized
!= MAX_CPUS_SET
) {
731 max_cpus_initialized
= MAX_CPUS_WAIT
;
732 lck_mtx_sleep(&max_cpus_lock
, LCK_SLEEP_DEFAULT
, &max_cpus_initialized
, THREAD_UNINT
);
734 lck_mtx_unlock(&max_cpus_lock
);
735 return machine_info
.max_cpus
;
739 ml_panic_trap_to_debugger(__unused
const char *panic_format_str
,
740 __unused
va_list *panic_args
,
741 __unused
unsigned int reason
,
743 __unused
uint64_t panic_options_mask
,
744 __unused
unsigned long panic_caller
)
750 virtual_timeout_inflate64(unsigned int vti
, uint64_t timeout
, uint64_t max_timeout
)
756 if ((timeout
<< vti
) >> vti
!= timeout
) {
760 if ((timeout
<< vti
) > max_timeout
) {
764 return timeout
<< vti
;
768 virtual_timeout_inflate32(unsigned int vti
, uint32_t timeout
, uint32_t max_timeout
)
774 if ((timeout
<< vti
) >> vti
!= timeout
) {
778 return timeout
<< vti
;
782 * Some timeouts are later adjusted or used in calculations setting
783 * other values. In order to avoid overflow, cap the max timeout as
784 * 2^47ns (~39 hours).
786 static const uint64_t max_timeout_ns
= 1ULL << 47;
789 * Inflate a timeout in absolutetime.
792 virtual_timeout_inflate_abs(unsigned int vti
, uint64_t timeout
)
794 uint64_t max_timeout
;
795 nanoseconds_to_absolutetime(max_timeout_ns
, &max_timeout
);
796 return virtual_timeout_inflate64(vti
, timeout
, max_timeout
);
800 * Inflate a value in TSC ticks.
803 virtual_timeout_inflate_tsc(unsigned int vti
, uint64_t timeout
)
805 const uint64_t max_timeout
= tmrCvt(max_timeout_ns
, tscFCvtn2t
);
806 return virtual_timeout_inflate64(vti
, timeout
, max_timeout
);
810 * Inflate a timeout in microseconds.
813 virtual_timeout_inflate_us(unsigned int vti
, uint64_t timeout
)
815 const uint32_t max_timeout
= ~0;
816 return virtual_timeout_inflate32(vti
, timeout
, max_timeout
);
820 * Routine: ml_init_lock_timeout
824 ml_init_lock_timeout(void)
828 #if DEVELOPMENT || DEBUG
829 uint64_t default_timeout_ns
= NSEC_PER_SEC
>> 2;
831 uint64_t default_timeout_ns
= NSEC_PER_SEC
>> 1;
836 if (PE_parse_boot_argn("slto_us", &slto
, sizeof(slto
))) {
837 default_timeout_ns
= slto
* NSEC_PER_USEC
;
841 * LockTimeOut is absolutetime, LockTimeOutTSC is in TSC ticks,
842 * and LockTimeOutUsec is in microseconds and it's 32-bits.
844 LockTimeOutUsec
= (uint32_t) (default_timeout_ns
/ NSEC_PER_USEC
);
845 nanoseconds_to_absolutetime(default_timeout_ns
, &abstime
);
846 LockTimeOut
= abstime
;
847 LockTimeOutTSC
= tmrCvt(abstime
, tscFCvtn2t
);
850 * TLBTimeOut dictates the TLB flush timeout period. It defaults to
851 * LockTimeOut but can be overriden separately. In particular, a
852 * zero value inhibits the timeout-panic and cuts a trace evnt instead
853 * - see pmap_flush_tlbs().
855 if (PE_parse_boot_argn("tlbto_us", &slto
, sizeof(slto
))) {
856 default_timeout_ns
= slto
* NSEC_PER_USEC
;
857 nanoseconds_to_absolutetime(default_timeout_ns
, &abstime
);
858 TLBTimeOut
= (uint32_t) abstime
;
860 TLBTimeOut
= LockTimeOut
;
863 #if DEVELOPMENT || DEBUG
864 reportphyreaddelayabs
= LockTimeOut
>> 1;
866 if (PE_parse_boot_argn("phyreadmaxus", &slto
, sizeof(slto
))) {
867 default_timeout_ns
= slto
* NSEC_PER_USEC
;
868 nanoseconds_to_absolutetime(default_timeout_ns
, &abstime
);
869 reportphyreaddelayabs
= abstime
;
872 if (PE_parse_boot_argn("phywritemaxus", &slto
, sizeof(slto
))) {
873 nanoseconds_to_absolutetime((uint64_t)slto
* NSEC_PER_USEC
, &abstime
);
874 reportphywritedelayabs
= abstime
;
877 if (PE_parse_boot_argn("tracephyreadus", &slto
, sizeof(slto
))) {
878 nanoseconds_to_absolutetime((uint64_t)slto
* NSEC_PER_USEC
, &abstime
);
879 tracephyreaddelayabs
= abstime
;
882 if (PE_parse_boot_argn("tracephywriteus", &slto
, sizeof(slto
))) {
883 nanoseconds_to_absolutetime((uint64_t)slto
* NSEC_PER_USEC
, &abstime
);
884 tracephywritedelayabs
= abstime
;
887 if (PE_parse_boot_argn("mtxspin", &mtxspin
, sizeof(mtxspin
))) {
888 if (mtxspin
> USEC_PER_SEC
>> 4) {
889 mtxspin
= USEC_PER_SEC
>> 4;
891 nanoseconds_to_absolutetime(mtxspin
* NSEC_PER_USEC
, &abstime
);
893 nanoseconds_to_absolutetime(10 * NSEC_PER_USEC
, &abstime
);
895 MutexSpin
= (unsigned int)abstime
;
896 low_MutexSpin
= MutexSpin
;
898 * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
899 * real_ncpus is not set at this time
903 nanoseconds_to_absolutetime(4ULL * NSEC_PER_SEC
, &LastDebuggerEntryAllowance
);
904 if (PE_parse_boot_argn("panic_restart_timeout", &prt
, sizeof(prt
))) {
905 nanoseconds_to_absolutetime(prt
* NSEC_PER_SEC
, &panic_restart_timeout
);
908 virtualized
= ((cpuid_features() & CPUID_FEATURE_VMM
) != 0);
912 if (!PE_parse_boot_argn("vti", &vti
, sizeof(vti
))) {
915 printf("Timeouts adjusted for virtualization (<<%d)\n", vti
);
916 kprintf("Timeouts adjusted for virtualization (<<%d):\n", vti
);
917 #define VIRTUAL_TIMEOUT_INFLATE_ABS(_timeout) \
919 kprintf("%24s: 0x%016llx ", #_timeout, _timeout); \
920 _timeout = virtual_timeout_inflate_abs(vti, _timeout); \
921 kprintf("-> 0x%016llx\n", _timeout); \
924 #define VIRTUAL_TIMEOUT_INFLATE_TSC(_timeout) \
926 kprintf("%24s: 0x%016llx ", #_timeout, _timeout); \
927 _timeout = virtual_timeout_inflate_tsc(vti, _timeout); \
928 kprintf("-> 0x%016llx\n", _timeout); \
930 #define VIRTUAL_TIMEOUT_INFLATE_US(_timeout) \
932 kprintf("%24s: 0x%08x ", #_timeout, _timeout); \
933 _timeout = virtual_timeout_inflate_us(vti, _timeout); \
934 kprintf("-> 0x%08x\n", _timeout); \
936 VIRTUAL_TIMEOUT_INFLATE_US(LockTimeOutUsec
);
937 VIRTUAL_TIMEOUT_INFLATE_ABS(LockTimeOut
);
938 VIRTUAL_TIMEOUT_INFLATE_TSC(LockTimeOutTSC
);
939 VIRTUAL_TIMEOUT_INFLATE_ABS(TLBTimeOut
);
940 VIRTUAL_TIMEOUT_INFLATE_ABS(MutexSpin
);
941 VIRTUAL_TIMEOUT_INFLATE_ABS(low_MutexSpin
);
942 VIRTUAL_TIMEOUT_INFLATE_ABS(reportphyreaddelayabs
);
945 interrupt_latency_tracker_setup();
946 simple_lock_init(&ml_timer_evaluation_slock
, 0);
950 * Threshold above which we should attempt to block
951 * instead of spinning for clock_delay_until().
955 ml_init_delay_spin_threshold(int threshold_us
)
957 nanoseconds_to_absolutetime(threshold_us
* NSEC_PER_USEC
, &delay_spin_threshold
);
961 ml_delay_should_spin(uint64_t interval
)
963 return (interval
< delay_spin_threshold
) ? TRUE
: FALSE
;
966 TUNABLE(uint32_t, yield_delay_us
, "yield_delay_us", 0);
969 ml_delay_on_yield(void)
971 #if DEVELOPMENT || DEBUG
972 if (yield_delay_us
) {
973 delay(yield_delay_us
);
979 * This is called from the machine-independent layer
980 * to perform machine-dependent info updates. Defer to cpu_thread_init().
989 * This is called from the machine-independent layer
990 * to perform machine-dependent info updates.
995 i386_deactivate_cpu();
1001 * The following are required for parts of the kernel
1002 * that cannot resolve these functions as inlines:
1004 extern thread_t
current_act(void) __attribute__((const));
1008 return current_thread_fast();
1011 #undef current_thread
1012 extern thread_t
current_thread(void) __attribute__((const));
1014 current_thread(void)
1016 return current_thread_fast();
1023 return cpu_mode_is64bit();
1028 ml_thread_is64bit(thread_t thread
)
1030 return thread_is_64bit_addr(thread
);
1035 ml_state_is64bit(void *saved_state
)
1037 return is_saved_state64(saved_state
);
1041 ml_cpu_set_ldt(int selector
)
1044 * Avoid loading the LDT
1045 * if we're setting the KERNEL LDT and it's already set.
1047 if (selector
== KERNEL_LDT
&&
1048 current_cpu_datap()->cpu_ldt
== KERNEL_LDT
) {
1053 current_cpu_datap()->cpu_ldt
= selector
;
1057 ml_fp_setvalid(boolean_t value
)
1063 ml_cpu_int_event_time(void)
1065 return current_cpu_datap()->cpu_int_event_time
;
1069 ml_stack_remaining(void)
1071 uintptr_t local
= (uintptr_t) &local
;
1073 if (ml_at_interrupt_context() != 0) {
1074 return local
- (current_cpu_datap()->cpu_int_stack_top
- INTSTACK_SIZE
);
1076 return local
- current_thread()->kernel_stack
;
1081 vm_offset_t
ml_stack_base(void);
1082 vm_size_t
ml_stack_size(void);
1087 if (ml_at_interrupt_context()) {
1088 return current_cpu_datap()->cpu_int_stack_top
- INTSTACK_SIZE
;
1090 return current_thread()->kernel_stack
;
1097 if (ml_at_interrupt_context()) {
1098 return INTSTACK_SIZE
;
1100 return kernel_stack_size
;
1106 kernel_preempt_check(void)
1109 unsigned long flags
;
1111 assert(get_preemption_level() == 0);
1113 if (__improbable(*ast_pending() & AST_URGENT
)) {
1115 * can handle interrupts and preemptions
1118 __asm__
volatile ("pushf; pop %0" : "=r" (flags
));
1120 intr
= ((flags
& EFL_IF
) != 0);
1123 * now cause the PRE-EMPTION trap
1126 __asm__
volatile ("int %0" :: "N" (T_PREEMPT
));
1132 machine_timeout_suspended(void)
1134 return pmap_tlb_flush_timeout
|| spinlock_timed_out
|| panic_active() || mp_recent_debugger_activity() || ml_recent_wake();
1137 /* Eagerly evaluate all pending timer and thread callouts
1140 ml_timer_evaluate(void)
1142 KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN
| DBG_FUNC_START
, 0, 0, 0, 0, 0);
1144 uint64_t te_end
, te_start
= mach_absolute_time();
1145 simple_lock(&ml_timer_evaluation_slock
, LCK_GRP_NULL
);
1146 ml_timer_evaluation_in_progress
= TRUE
;
1147 thread_call_delayed_timer_rescan_all();
1148 mp_cpus_call(CPUMASK_ALL
, ASYNC
, timer_queue_expire_rescan
, NULL
);
1149 ml_timer_evaluation_in_progress
= FALSE
;
1150 ml_timer_eager_evaluations
++;
1151 te_end
= mach_absolute_time();
1152 ml_timer_eager_evaluation_max
= MAX(ml_timer_eager_evaluation_max
, (te_end
- te_start
));
1153 simple_unlock(&ml_timer_evaluation_slock
);
1155 KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN
| DBG_FUNC_END
, 0, 0, 0, 0, 0);
1159 ml_timer_forced_evaluation(void)
1161 return ml_timer_evaluation_in_progress
;
1164 /* 32-bit right-rotate n bits */
1165 static inline uint32_t
1166 ror32(uint32_t val
, const unsigned int n
)
1168 __asm__
volatile ("rorl %%cl,%0" : "=r" (val
) : "0" (val
), "c" (n
));
1173 ml_entropy_collect(void)
1175 uint32_t tsc_lo
, tsc_hi
;
1178 assert(cpu_number() == master_cpu
);
1180 /* update buffer pointer cyclically */
1181 ep
= EntropyData
.buffer
+ (EntropyData
.sample_count
& EntropyData
.buffer_index_mask
);
1182 EntropyData
.sample_count
+= 1;
1184 rdtsc_nofence(tsc_lo
, tsc_hi
);
1185 *ep
= (ror32(*ep
, 9) & EntropyData
.ror_mask
) ^ tsc_lo
;
1189 ml_energy_stat(__unused thread_t t
)
1195 ml_gpu_stat_update(uint64_t gpu_ns_delta
)
1197 current_thread()->machine
.thread_gpu_ns
+= gpu_ns_delta
;
1201 ml_gpu_stat(thread_t t
)
1203 return t
->machine
.thread_gpu_ns
;
1206 int plctrace_enabled
= 0;
1209 _disable_preemption(void)
1211 disable_preemption_internal();
1215 _enable_preemption(void)
1217 enable_preemption_internal();
1221 plctrace_disable(void)
1223 plctrace_enabled
= 0;
1226 static boolean_t ml_quiescing
;
1229 ml_set_is_quiescing(boolean_t quiescing
)
1231 ml_quiescing
= quiescing
;
1235 ml_is_quiescing(void)
1237 return ml_quiescing
;
1241 ml_get_booter_memory_size(void)
1247 machine_lockdown(void)
1249 x86_64_protect_data_const();
1253 ml_cpu_can_exit(__unused
int cpu_id
)
1259 ml_cpu_init_state(void)
1264 ml_cpu_begin_state_transition(__unused
int cpu_id
)
1269 ml_cpu_end_state_transition(__unused
int cpu_id
)
1274 ml_cpu_begin_loop(void)
1279 ml_cpu_end_loop(void)
1284 ml_get_vm_reserved_regions(bool vm_is64bit
, struct vm_reserved_region
**regions
)
1286 #pragma unused(vm_is64bit)
1287 assert(regions
!= NULL
);