]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/machine_routines.c
xnu-4903.270.47.tar.gz
[apple/xnu.git] / osfmk / i386 / machine_routines.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <i386/machine_routines.h>
30 #include <i386/io_map_entries.h>
31 #include <i386/cpuid.h>
32 #include <i386/fpu.h>
33 #include <mach/processor.h>
34 #include <kern/processor.h>
35 #include <kern/machine.h>
36
37 #include <kern/cpu_number.h>
38 #include <kern/thread.h>
39 #include <kern/thread_call.h>
40 #include <kern/policy_internal.h>
41
42 #include <prng/random.h>
43 #include <i386/machine_cpu.h>
44 #include <i386/lapic.h>
45 #include <i386/bit_routines.h>
46 #include <i386/mp_events.h>
47 #include <i386/pmCPU.h>
48 #include <i386/trap.h>
49 #include <i386/tsc.h>
50 #include <i386/cpu_threads.h>
51 #include <i386/proc_reg.h>
52 #include <mach/vm_param.h>
53 #include <i386/pmap.h>
54 #include <i386/pmap_internal.h>
55 #include <i386/misc_protos.h>
56 #include <kern/timer_queue.h>
57 #if KPC
58 #include <kern/kpc.h>
59 #endif
60 #include <architecture/i386/pio.h>
61 #include <i386/cpu_data.h>
62 #if DEBUG
63 #define DBG(x...) kprintf("DBG: " x)
64 #else
65 #define DBG(x...)
66 #endif
67
68 #if MONOTONIC
69 #include <kern/monotonic.h>
70 #endif /* MONOTONIC */
71
72 extern void wakeup(void *);
73
74 static int max_cpus_initialized = 0;
75
76 uint64_t LockTimeOut;
77 uint64_t TLBTimeOut;
78 uint64_t LockTimeOutTSC;
79 uint32_t LockTimeOutUsec;
80 uint64_t MutexSpin;
81 uint64_t LastDebuggerEntryAllowance;
82 uint64_t delay_spin_threshold;
83
84 extern uint64_t panic_restart_timeout;
85
86 boolean_t virtualized = FALSE;
87
88 decl_simple_lock_data(static, ml_timer_evaluation_slock);
89 uint32_t ml_timer_eager_evaluations;
90 uint64_t ml_timer_eager_evaluation_max;
91 static boolean_t ml_timer_evaluation_in_progress = FALSE;
92
93
94 #define MAX_CPUS_SET 0x1
95 #define MAX_CPUS_WAIT 0x2
96
97 /* IO memory map services */
98
99 /* Map memory map IO space */
100 vm_offset_t
101 ml_io_map(
102 vm_offset_t phys_addr,
103 vm_size_t size)
104 {
105 return io_map(phys_addr, size, VM_WIMG_IO);
106 }
107
108 /* boot memory allocation */
109 vm_offset_t
110 ml_static_malloc(
111 __unused vm_size_t size)
112 {
113 return (vm_offset_t)NULL;
114 }
115
116
117 void
118 ml_get_bouncepool_info(vm_offset_t *phys_addr, vm_size_t *size)
119 {
120 *phys_addr = 0;
121 *size = 0;
122 }
123
124
125 vm_offset_t
126 ml_static_ptovirt(
127 vm_offset_t paddr)
128 {
129 #if defined(__x86_64__)
130 return (vm_offset_t)(((unsigned long) paddr) | VM_MIN_KERNEL_ADDRESS);
131 #else
132 return (vm_offset_t)((paddr) | LINEAR_KERNEL_ADDRESS);
133 #endif
134 }
135
136 vm_offset_t
137 ml_static_slide(
138 vm_offset_t vaddr)
139 {
140 return VM_KERNEL_SLIDE(vaddr);
141 }
142
143 vm_offset_t
144 ml_static_unslide(
145 vm_offset_t vaddr)
146 {
147 return VM_KERNEL_UNSLIDE(vaddr);
148 }
149
150
151 /*
152 * Routine: ml_static_mfree
153 * Function:
154 */
155 void
156 ml_static_mfree(
157 vm_offset_t vaddr,
158 vm_size_t size)
159 {
160 addr64_t vaddr_cur;
161 ppnum_t ppn;
162 uint32_t freed_pages = 0;
163
164 assert(vaddr >= VM_MIN_KERNEL_ADDRESS);
165
166 assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
167
168 for (vaddr_cur = vaddr;
169 vaddr_cur < round_page_64(vaddr + size);
170 vaddr_cur += PAGE_SIZE) {
171 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
172 if (ppn != (vm_offset_t)NULL) {
173 kernel_pmap->stats.resident_count++;
174 if (kernel_pmap->stats.resident_count >
175 kernel_pmap->stats.resident_max) {
176 kernel_pmap->stats.resident_max =
177 kernel_pmap->stats.resident_count;
178 }
179 pmap_remove(kernel_pmap, vaddr_cur, vaddr_cur + PAGE_SIZE);
180 assert(pmap_valid_page(ppn));
181 if (IS_MANAGED_PAGE(ppn)) {
182 vm_page_create(ppn, (ppn + 1));
183 freed_pages++;
184 }
185 }
186 }
187 vm_page_lockspin_queues();
188 vm_page_wire_count -= freed_pages;
189 vm_page_wire_count_initial -= freed_pages;
190 if (vm_page_wire_count_on_boot != 0) {
191 assert(vm_page_wire_count_on_boot >= freed_pages);
192 vm_page_wire_count_on_boot -= freed_pages;
193 }
194 vm_page_unlock_queues();
195
196 #if DEBUG
197 kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn);
198 #endif
199 }
200
201
202 /* virtual to physical on wired pages */
203 vm_offset_t
204 ml_vtophys(
205 vm_offset_t vaddr)
206 {
207 return (vm_offset_t)kvtophys(vaddr);
208 }
209
210 /*
211 * Routine: ml_nofault_copy
212 * Function: Perform a physical mode copy if the source and
213 * destination have valid translations in the kernel pmap.
214 * If translations are present, they are assumed to
215 * be wired; i.e. no attempt is made to guarantee that the
216 * translations obtained remained valid for
217 * the duration of the copy process.
218 */
219
220 vm_size_t
221 ml_nofault_copy(
222 vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size)
223 {
224 addr64_t cur_phys_dst, cur_phys_src;
225 uint32_t count, nbytes = 0;
226
227 while (size > 0) {
228 if (!(cur_phys_src = kvtophys(virtsrc))) {
229 break;
230 }
231 if (!(cur_phys_dst = kvtophys(virtdst))) {
232 break;
233 }
234 if (!pmap_valid_page(i386_btop(cur_phys_dst)) || !pmap_valid_page(i386_btop(cur_phys_src))) {
235 break;
236 }
237 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
238 if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK))) {
239 count = (uint32_t)(PAGE_SIZE - (cur_phys_dst & PAGE_MASK));
240 }
241 if (count > size) {
242 count = (uint32_t)size;
243 }
244
245 bcopy_phys(cur_phys_src, cur_phys_dst, count);
246
247 nbytes += count;
248 virtsrc += count;
249 virtdst += count;
250 size -= count;
251 }
252
253 return nbytes;
254 }
255
256 /*
257 * Routine: ml_validate_nofault
258 * Function: Validate that ths address range has a valid translations
259 * in the kernel pmap. If translations are present, they are
260 * assumed to be wired; i.e. no attempt is made to guarantee
261 * that the translation persist after the check.
262 * Returns: TRUE if the range is mapped and will not cause a fault,
263 * FALSE otherwise.
264 */
265
266 boolean_t
267 ml_validate_nofault(
268 vm_offset_t virtsrc, vm_size_t size)
269 {
270 addr64_t cur_phys_src;
271 uint32_t count;
272
273 while (size > 0) {
274 if (!(cur_phys_src = kvtophys(virtsrc))) {
275 return FALSE;
276 }
277 if (!pmap_valid_page(i386_btop(cur_phys_src))) {
278 return FALSE;
279 }
280 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
281 if (count > size) {
282 count = (uint32_t)size;
283 }
284
285 virtsrc += count;
286 size -= count;
287 }
288
289 return TRUE;
290 }
291
292 /* Interrupt handling */
293
294 /* Initialize Interrupts */
295 void
296 ml_init_interrupt(void)
297 {
298 (void) ml_set_interrupts_enabled(TRUE);
299 }
300
301
302 /* Get Interrupts Enabled */
303 boolean_t
304 ml_get_interrupts_enabled(void)
305 {
306 unsigned long flags;
307
308 __asm__ volatile ("pushf; pop %0": "=r" (flags));
309 return (flags & EFL_IF) != 0;
310 }
311
312 /* Set Interrupts Enabled */
313 boolean_t
314 ml_set_interrupts_enabled(boolean_t enable)
315 {
316 unsigned long flags;
317 boolean_t istate;
318
319 __asm__ volatile ("pushf; pop %0" : "=r" (flags));
320
321 assert(get_interrupt_level() ? (enable == FALSE) : TRUE);
322
323 istate = ((flags & EFL_IF) != 0);
324
325 if (enable) {
326 __asm__ volatile ("sti;nop");
327
328 if ((get_preemption_level() == 0) && (*ast_pending() & AST_URGENT)) {
329 __asm__ volatile ("int %0" :: "N" (T_PREEMPT));
330 }
331 } else {
332 if (istate) {
333 __asm__ volatile ("cli");
334 }
335 }
336
337 return istate;
338 }
339
340 /* Early Set Interrupts Enabled */
341 boolean_t
342 ml_early_set_interrupts_enabled(boolean_t enable)
343 {
344 if (enable == TRUE) {
345 kprintf("Caller attempted to enable interrupts too early in "
346 "kernel startup. Halting.\n");
347 hlt();
348 /*NOTREACHED*/
349 }
350
351 /* On x86, do not allow interrupts to be enabled very early */
352 return FALSE;
353 }
354
355 /* Check if running at interrupt context */
356 boolean_t
357 ml_at_interrupt_context(void)
358 {
359 return get_interrupt_level() != 0;
360 }
361
362 void
363 ml_get_power_state(boolean_t *icp, boolean_t *pidlep)
364 {
365 *icp = (get_interrupt_level() != 0);
366 /* These will be technically inaccurate for interrupts that occur
367 * successively within a single "idle exit" event, but shouldn't
368 * matter statistically.
369 */
370 *pidlep = (current_cpu_datap()->lcpu.package->num_idle == topoParms.nLThreadsPerPackage);
371 }
372
373 /* Generate a fake interrupt */
374 void
375 ml_cause_interrupt(void)
376 {
377 panic("ml_cause_interrupt not defined yet on Intel");
378 }
379
380 /*
381 * TODO: transition users of this to kernel_thread_start_priority
382 * ml_thread_policy is an unsupported KPI
383 */
384 void
385 ml_thread_policy(
386 thread_t thread,
387 __unused unsigned policy_id,
388 unsigned policy_info)
389 {
390 if (policy_info & MACHINE_NETWORK_WORKLOOP) {
391 thread_precedence_policy_data_t info;
392 __assert_only kern_return_t kret;
393
394 info.importance = 1;
395
396 kret = thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY,
397 (thread_policy_t)&info,
398 THREAD_PRECEDENCE_POLICY_COUNT);
399 assert(kret == KERN_SUCCESS);
400 }
401 }
402
403 /* Initialize Interrupts */
404 void
405 ml_install_interrupt_handler(
406 void *nub,
407 int source,
408 void *target,
409 IOInterruptHandler handler,
410 void *refCon)
411 {
412 boolean_t current_state;
413
414 current_state = ml_set_interrupts_enabled(FALSE);
415
416 PE_install_interrupt_handler(nub, source, target,
417 (IOInterruptHandler) handler, refCon);
418
419 (void) ml_set_interrupts_enabled(current_state);
420
421 initialize_screen(NULL, kPEAcquireScreen);
422 }
423
424
425 void
426 machine_signal_idle(
427 processor_t processor)
428 {
429 cpu_interrupt(processor->cpu_id);
430 }
431
432 void
433 machine_signal_idle_deferred(
434 __unused processor_t processor)
435 {
436 panic("Unimplemented");
437 }
438
439 void
440 machine_signal_idle_cancel(
441 __unused processor_t processor)
442 {
443 panic("Unimplemented");
444 }
445
446 static kern_return_t
447 register_cpu(
448 uint32_t lapic_id,
449 processor_t *processor_out,
450 boolean_t boot_cpu )
451 {
452 int target_cpu;
453 cpu_data_t *this_cpu_datap;
454
455 this_cpu_datap = cpu_data_alloc(boot_cpu);
456 if (this_cpu_datap == NULL) {
457 return KERN_FAILURE;
458 }
459 target_cpu = this_cpu_datap->cpu_number;
460 assert((boot_cpu && (target_cpu == 0)) ||
461 (!boot_cpu && (target_cpu != 0)));
462
463 lapic_cpu_map(lapic_id, target_cpu);
464
465 /* The cpu_id is not known at registration phase. Just do
466 * lapic_id for now
467 */
468 this_cpu_datap->cpu_phys_number = lapic_id;
469
470 this_cpu_datap->cpu_console_buf = console_cpu_alloc(boot_cpu);
471 if (this_cpu_datap->cpu_console_buf == NULL) {
472 goto failed;
473 }
474
475 #if KPC
476 if (kpc_register_cpu(this_cpu_datap) != TRUE) {
477 goto failed;
478 }
479 #endif
480
481 if (!boot_cpu) {
482 cpu_thread_alloc(this_cpu_datap->cpu_number);
483 if (this_cpu_datap->lcpu.core == NULL) {
484 goto failed;
485 }
486
487 #if NCOPY_WINDOWS > 0
488 this_cpu_datap->cpu_pmap = pmap_cpu_alloc(boot_cpu);
489 if (this_cpu_datap->cpu_pmap == NULL) {
490 goto failed;
491 }
492 #endif
493
494 this_cpu_datap->cpu_processor = cpu_processor_alloc(boot_cpu);
495 if (this_cpu_datap->cpu_processor == NULL) {
496 goto failed;
497 }
498 /*
499 * processor_init() deferred to topology start
500 * because "slot numbers" a.k.a. logical processor numbers
501 * are not yet finalized.
502 */
503 }
504
505 *processor_out = this_cpu_datap->cpu_processor;
506
507 return KERN_SUCCESS;
508
509 failed:
510 cpu_processor_free(this_cpu_datap->cpu_processor);
511 #if NCOPY_WINDOWS > 0
512 pmap_cpu_free(this_cpu_datap->cpu_pmap);
513 #endif
514 console_cpu_free(this_cpu_datap->cpu_console_buf);
515 #if KPC
516 kpc_unregister_cpu(this_cpu_datap);
517 #endif /* KPC */
518
519 return KERN_FAILURE;
520 }
521
522
523 kern_return_t
524 ml_processor_register(
525 cpu_id_t cpu_id,
526 uint32_t lapic_id,
527 processor_t *processor_out,
528 boolean_t boot_cpu,
529 boolean_t start )
530 {
531 static boolean_t done_topo_sort = FALSE;
532 static uint32_t num_registered = 0;
533
534 /* Register all CPUs first, and track max */
535 if (start == FALSE) {
536 num_registered++;
537
538 DBG( "registering CPU lapic id %d\n", lapic_id );
539
540 return register_cpu( lapic_id, processor_out, boot_cpu );
541 }
542
543 /* Sort by topology before we start anything */
544 if (!done_topo_sort) {
545 DBG( "about to start CPUs. %d registered\n", num_registered );
546
547 cpu_topology_sort( num_registered );
548 done_topo_sort = TRUE;
549 }
550
551 /* Assign the cpu ID */
552 uint32_t cpunum = -1;
553 cpu_data_t *this_cpu_datap = NULL;
554
555 /* find cpu num and pointer */
556 cpunum = ml_get_cpuid( lapic_id );
557
558 if (cpunum == 0xFFFFFFFF) { /* never heard of it? */
559 panic( "trying to start invalid/unregistered CPU %d\n", lapic_id );
560 }
561
562 this_cpu_datap = cpu_datap(cpunum);
563
564 /* fix the CPU id */
565 this_cpu_datap->cpu_id = cpu_id;
566
567 /* allocate and initialize other per-cpu structures */
568 if (!boot_cpu) {
569 mp_cpus_call_cpu_init(cpunum);
570 early_random_cpu_init(cpunum);
571 }
572
573 /* output arg */
574 *processor_out = this_cpu_datap->cpu_processor;
575
576 /* OK, try and start this CPU */
577 return cpu_topology_start_cpu( cpunum );
578 }
579
580
581 void
582 ml_cpu_get_info(ml_cpu_info_t *cpu_infop)
583 {
584 boolean_t os_supports_sse;
585 i386_cpu_info_t *cpuid_infop;
586
587 if (cpu_infop == NULL) {
588 return;
589 }
590
591 /*
592 * Are we supporting MMX/SSE/SSE2/SSE3?
593 * As distinct from whether the cpu has these capabilities.
594 */
595 os_supports_sse = !!(get_cr4() & CR4_OSXMM);
596
597 if (ml_fpu_avx_enabled()) {
598 cpu_infop->vector_unit = 9;
599 } else if ((cpuid_features() & CPUID_FEATURE_SSE4_2) && os_supports_sse) {
600 cpu_infop->vector_unit = 8;
601 } else if ((cpuid_features() & CPUID_FEATURE_SSE4_1) && os_supports_sse) {
602 cpu_infop->vector_unit = 7;
603 } else if ((cpuid_features() & CPUID_FEATURE_SSSE3) && os_supports_sse) {
604 cpu_infop->vector_unit = 6;
605 } else if ((cpuid_features() & CPUID_FEATURE_SSE3) && os_supports_sse) {
606 cpu_infop->vector_unit = 5;
607 } else if ((cpuid_features() & CPUID_FEATURE_SSE2) && os_supports_sse) {
608 cpu_infop->vector_unit = 4;
609 } else if ((cpuid_features() & CPUID_FEATURE_SSE) && os_supports_sse) {
610 cpu_infop->vector_unit = 3;
611 } else if (cpuid_features() & CPUID_FEATURE_MMX) {
612 cpu_infop->vector_unit = 2;
613 } else {
614 cpu_infop->vector_unit = 0;
615 }
616
617 cpuid_infop = cpuid_info();
618
619 cpu_infop->cache_line_size = cpuid_infop->cache_linesize;
620
621 cpu_infop->l1_icache_size = cpuid_infop->cache_size[L1I];
622 cpu_infop->l1_dcache_size = cpuid_infop->cache_size[L1D];
623
624 if (cpuid_infop->cache_size[L2U] > 0) {
625 cpu_infop->l2_settings = 1;
626 cpu_infop->l2_cache_size = cpuid_infop->cache_size[L2U];
627 } else {
628 cpu_infop->l2_settings = 0;
629 cpu_infop->l2_cache_size = 0xFFFFFFFF;
630 }
631
632 if (cpuid_infop->cache_size[L3U] > 0) {
633 cpu_infop->l3_settings = 1;
634 cpu_infop->l3_cache_size = cpuid_infop->cache_size[L3U];
635 } else {
636 cpu_infop->l3_settings = 0;
637 cpu_infop->l3_cache_size = 0xFFFFFFFF;
638 }
639 }
640
641 void
642 ml_init_max_cpus(unsigned long max_cpus)
643 {
644 boolean_t current_state;
645
646 current_state = ml_set_interrupts_enabled(FALSE);
647 if (max_cpus_initialized != MAX_CPUS_SET) {
648 if (max_cpus > 0 && max_cpus <= MAX_CPUS) {
649 /*
650 * Note: max_cpus is the number of enabled processors
651 * that ACPI found; max_ncpus is the maximum number
652 * that the kernel supports or that the "cpus="
653 * boot-arg has set. Here we take int minimum.
654 */
655 machine_info.max_cpus = (integer_t)MIN(max_cpus, max_ncpus);
656 }
657 if (max_cpus_initialized == MAX_CPUS_WAIT) {
658 wakeup((event_t)&max_cpus_initialized);
659 }
660 max_cpus_initialized = MAX_CPUS_SET;
661 }
662 (void) ml_set_interrupts_enabled(current_state);
663 }
664
665 int
666 ml_get_max_cpus(void)
667 {
668 boolean_t current_state;
669
670 current_state = ml_set_interrupts_enabled(FALSE);
671 if (max_cpus_initialized != MAX_CPUS_SET) {
672 max_cpus_initialized = MAX_CPUS_WAIT;
673 assert_wait((event_t)&max_cpus_initialized, THREAD_UNINT);
674 (void)thread_block(THREAD_CONTINUE_NULL);
675 }
676 (void) ml_set_interrupts_enabled(current_state);
677 return machine_info.max_cpus;
678 }
679
680 boolean_t
681 ml_wants_panic_trap_to_debugger(void)
682 {
683 return FALSE;
684 }
685
686 void
687 ml_panic_trap_to_debugger(__unused const char *panic_format_str,
688 __unused va_list *panic_args,
689 __unused unsigned int reason,
690 __unused void *ctx,
691 __unused uint64_t panic_options_mask,
692 __unused unsigned long panic_caller)
693 {
694 return;
695 }
696
697 /*
698 * Routine: ml_init_lock_timeout
699 * Function:
700 */
701 void
702 ml_init_lock_timeout(void)
703 {
704 uint64_t abstime;
705 uint32_t mtxspin;
706 #if DEVELOPMENT || DEBUG
707 uint64_t default_timeout_ns = NSEC_PER_SEC >> 2;
708 #else
709 uint64_t default_timeout_ns = NSEC_PER_SEC >> 1;
710 #endif
711 uint32_t slto;
712 uint32_t prt;
713
714 if (PE_parse_boot_argn("slto_us", &slto, sizeof(slto))) {
715 default_timeout_ns = slto * NSEC_PER_USEC;
716 }
717
718 /*
719 * LockTimeOut is absolutetime, LockTimeOutTSC is in TSC ticks,
720 * and LockTimeOutUsec is in microseconds and it's 32-bits.
721 */
722 LockTimeOutUsec = (uint32_t) (default_timeout_ns / NSEC_PER_USEC);
723 nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
724 LockTimeOut = abstime;
725 LockTimeOutTSC = tmrCvt(abstime, tscFCvtn2t);
726
727 /*
728 * TLBTimeOut dictates the TLB flush timeout period. It defaults to
729 * LockTimeOut but can be overriden separately. In particular, a
730 * zero value inhibits the timeout-panic and cuts a trace evnt instead
731 * - see pmap_flush_tlbs().
732 */
733 if (PE_parse_boot_argn("tlbto_us", &slto, sizeof(slto))) {
734 default_timeout_ns = slto * NSEC_PER_USEC;
735 nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
736 TLBTimeOut = (uint32_t) abstime;
737 } else {
738 TLBTimeOut = LockTimeOut;
739 }
740
741 #if DEVELOPMENT || DEBUG
742 reportphyreaddelayabs = LockTimeOut >> 1;
743 #endif
744 if (PE_parse_boot_argn("phyreadmaxus", &slto, sizeof(slto))) {
745 default_timeout_ns = slto * NSEC_PER_USEC;
746 nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
747 reportphyreaddelayabs = abstime;
748 }
749
750 if (PE_parse_boot_argn("phywritemaxus", &slto, sizeof(slto))) {
751 nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
752 reportphywritedelayabs = abstime;
753 }
754
755 if (PE_parse_boot_argn("tracephyreadus", &slto, sizeof(slto))) {
756 nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
757 tracephyreaddelayabs = abstime;
758 }
759
760 if (PE_parse_boot_argn("tracephywriteus", &slto, sizeof(slto))) {
761 nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
762 tracephywritedelayabs = abstime;
763 }
764
765 if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof(mtxspin))) {
766 if (mtxspin > USEC_PER_SEC >> 4) {
767 mtxspin = USEC_PER_SEC >> 4;
768 }
769 nanoseconds_to_absolutetime(mtxspin * NSEC_PER_USEC, &abstime);
770 } else {
771 nanoseconds_to_absolutetime(10 * NSEC_PER_USEC, &abstime);
772 }
773 MutexSpin = (unsigned int)abstime;
774
775 nanoseconds_to_absolutetime(4ULL * NSEC_PER_SEC, &LastDebuggerEntryAllowance);
776 if (PE_parse_boot_argn("panic_restart_timeout", &prt, sizeof(prt))) {
777 nanoseconds_to_absolutetime(prt * NSEC_PER_SEC, &panic_restart_timeout);
778 }
779
780 virtualized = ((cpuid_features() & CPUID_FEATURE_VMM) != 0);
781 if (virtualized) {
782 int vti;
783
784 if (!PE_parse_boot_argn("vti", &vti, sizeof(vti))) {
785 vti = 6;
786 }
787 printf("Timeouts adjusted for virtualization (<<%d)\n", vti);
788 kprintf("Timeouts adjusted for virtualization (<<%d):\n", vti);
789 #define VIRTUAL_TIMEOUT_INFLATE64(_timeout) \
790 MACRO_BEGIN \
791 kprintf("%24s: 0x%016llx ", #_timeout, _timeout); \
792 _timeout <<= vti; \
793 kprintf("-> 0x%016llx\n", _timeout); \
794 MACRO_END
795 #define VIRTUAL_TIMEOUT_INFLATE32(_timeout) \
796 MACRO_BEGIN \
797 kprintf("%24s: 0x%08x ", #_timeout, _timeout); \
798 if ((_timeout <<vti) >> vti == _timeout) \
799 _timeout <<= vti; \
800 else \
801 _timeout = ~0; /* cap rather than overflow */ \
802 kprintf("-> 0x%08x\n", _timeout); \
803 MACRO_END
804 VIRTUAL_TIMEOUT_INFLATE32(LockTimeOutUsec);
805 VIRTUAL_TIMEOUT_INFLATE64(LockTimeOut);
806 VIRTUAL_TIMEOUT_INFLATE64(LockTimeOutTSC);
807 VIRTUAL_TIMEOUT_INFLATE64(TLBTimeOut);
808 VIRTUAL_TIMEOUT_INFLATE64(MutexSpin);
809 VIRTUAL_TIMEOUT_INFLATE64(reportphyreaddelayabs);
810 }
811
812 interrupt_latency_tracker_setup();
813 simple_lock_init(&ml_timer_evaluation_slock, 0);
814 }
815
816 /*
817 * Threshold above which we should attempt to block
818 * instead of spinning for clock_delay_until().
819 */
820
821 void
822 ml_init_delay_spin_threshold(int threshold_us)
823 {
824 nanoseconds_to_absolutetime(threshold_us * NSEC_PER_USEC, &delay_spin_threshold);
825 }
826
827 boolean_t
828 ml_delay_should_spin(uint64_t interval)
829 {
830 return (interval < delay_spin_threshold) ? TRUE : FALSE;
831 }
832
833 uint32_t yield_delay_us = 0;
834
835 void
836 ml_delay_on_yield(void)
837 {
838 #if DEVELOPMENT || DEBUG
839 if (yield_delay_us) {
840 delay(yield_delay_us);
841 }
842 #endif
843 }
844
845 /*
846 * This is called from the machine-independent layer
847 * to perform machine-dependent info updates. Defer to cpu_thread_init().
848 */
849 void
850 ml_cpu_up(void)
851 {
852 return;
853 }
854
855 /*
856 * This is called from the machine-independent layer
857 * to perform machine-dependent info updates.
858 */
859 void
860 ml_cpu_down(void)
861 {
862 i386_deactivate_cpu();
863
864 return;
865 }
866
867 /*
868 * The following are required for parts of the kernel
869 * that cannot resolve these functions as inlines:
870 */
871 extern thread_t current_act(void);
872 thread_t
873 current_act(void)
874 {
875 return current_thread_fast();
876 }
877
878 #undef current_thread
879 extern thread_t current_thread(void);
880 thread_t
881 current_thread(void)
882 {
883 return current_thread_fast();
884 }
885
886
887 boolean_t
888 ml_is64bit(void)
889 {
890 return cpu_mode_is64bit();
891 }
892
893
894 boolean_t
895 ml_thread_is64bit(thread_t thread)
896 {
897 return thread_is_64bit_addr(thread);
898 }
899
900
901 boolean_t
902 ml_state_is64bit(void *saved_state)
903 {
904 return is_saved_state64(saved_state);
905 }
906
907 void
908 ml_cpu_set_ldt(int selector)
909 {
910 /*
911 * Avoid loading the LDT
912 * if we're setting the KERNEL LDT and it's already set.
913 */
914 if (selector == KERNEL_LDT &&
915 current_cpu_datap()->cpu_ldt == KERNEL_LDT) {
916 return;
917 }
918
919 lldt(selector);
920 current_cpu_datap()->cpu_ldt = selector;
921 }
922
923 void
924 ml_fp_setvalid(boolean_t value)
925 {
926 fp_setvalid(value);
927 }
928
929 uint64_t
930 ml_cpu_int_event_time(void)
931 {
932 return current_cpu_datap()->cpu_int_event_time;
933 }
934
935 vm_offset_t
936 ml_stack_remaining(void)
937 {
938 uintptr_t local = (uintptr_t) &local;
939
940 if (ml_at_interrupt_context() != 0) {
941 return local - (current_cpu_datap()->cpu_int_stack_top - INTSTACK_SIZE);
942 } else {
943 return local - current_thread()->kernel_stack;
944 }
945 }
946
947 #if KASAN
948 vm_offset_t ml_stack_base(void);
949 vm_size_t ml_stack_size(void);
950
951 vm_offset_t
952 ml_stack_base(void)
953 {
954 if (ml_at_interrupt_context()) {
955 return current_cpu_datap()->cpu_int_stack_top - INTSTACK_SIZE;
956 } else {
957 return current_thread()->kernel_stack;
958 }
959 }
960
961 vm_size_t
962 ml_stack_size(void)
963 {
964 if (ml_at_interrupt_context()) {
965 return INTSTACK_SIZE;
966 } else {
967 return kernel_stack_size;
968 }
969 }
970 #endif
971
972 void
973 kernel_preempt_check(void)
974 {
975 boolean_t intr;
976 unsigned long flags;
977
978 assert(get_preemption_level() == 0);
979
980 if (__improbable(*ast_pending() & AST_URGENT)) {
981 /*
982 * can handle interrupts and preemptions
983 * at this point
984 */
985 __asm__ volatile ("pushf; pop %0" : "=r" (flags));
986
987 intr = ((flags & EFL_IF) != 0);
988
989 /*
990 * now cause the PRE-EMPTION trap
991 */
992 if (intr == TRUE) {
993 __asm__ volatile ("int %0" :: "N" (T_PREEMPT));
994 }
995 }
996 }
997
998 boolean_t
999 machine_timeout_suspended(void)
1000 {
1001 return pmap_tlb_flush_timeout || spinlock_timed_out || panic_active() || mp_recent_debugger_activity() || ml_recent_wake();
1002 }
1003
1004 /* Eagerly evaluate all pending timer and thread callouts
1005 */
1006 void
1007 ml_timer_evaluate(void)
1008 {
1009 KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN | DBG_FUNC_START, 0, 0, 0, 0, 0);
1010
1011 uint64_t te_end, te_start = mach_absolute_time();
1012 simple_lock(&ml_timer_evaluation_slock, LCK_GRP_NULL);
1013 ml_timer_evaluation_in_progress = TRUE;
1014 thread_call_delayed_timer_rescan_all();
1015 mp_cpus_call(CPUMASK_ALL, ASYNC, timer_queue_expire_rescan, NULL);
1016 ml_timer_evaluation_in_progress = FALSE;
1017 ml_timer_eager_evaluations++;
1018 te_end = mach_absolute_time();
1019 ml_timer_eager_evaluation_max = MAX(ml_timer_eager_evaluation_max, (te_end - te_start));
1020 simple_unlock(&ml_timer_evaluation_slock);
1021
1022 KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN | DBG_FUNC_END, 0, 0, 0, 0, 0);
1023 }
1024
1025 boolean_t
1026 ml_timer_forced_evaluation(void)
1027 {
1028 return ml_timer_evaluation_in_progress;
1029 }
1030
1031 /* 32-bit right-rotate n bits */
1032 static inline uint32_t
1033 ror32(uint32_t val, const unsigned int n)
1034 {
1035 __asm__ volatile ("rorl %%cl,%0" : "=r" (val) : "0" (val), "c" (n));
1036 return val;
1037 }
1038
1039 void
1040 ml_entropy_collect(void)
1041 {
1042 uint32_t tsc_lo, tsc_hi;
1043 uint32_t *ep;
1044
1045 assert(cpu_number() == master_cpu);
1046
1047 /* update buffer pointer cyclically */
1048 if (EntropyData.index_ptr - EntropyData.buffer == ENTROPY_BUFFER_SIZE) {
1049 ep = EntropyData.index_ptr = EntropyData.buffer;
1050 } else {
1051 ep = EntropyData.index_ptr++;
1052 }
1053
1054 rdtsc_nofence(tsc_lo, tsc_hi);
1055 *ep = ror32(*ep, 9) ^ tsc_lo;
1056 }
1057
1058 uint64_t
1059 ml_energy_stat(__unused thread_t t)
1060 {
1061 return 0;
1062 }
1063
1064 void
1065 ml_gpu_stat_update(uint64_t gpu_ns_delta)
1066 {
1067 current_thread()->machine.thread_gpu_ns += gpu_ns_delta;
1068 }
1069
1070 uint64_t
1071 ml_gpu_stat(thread_t t)
1072 {
1073 return t->machine.thread_gpu_ns;
1074 }
1075
1076 int plctrace_enabled = 0;
1077
1078 void
1079 _disable_preemption(void)
1080 {
1081 disable_preemption_internal();
1082 }
1083
1084 void
1085 _enable_preemption(void)
1086 {
1087 enable_preemption_internal();
1088 }
1089
1090 void
1091 plctrace_disable(void)
1092 {
1093 plctrace_enabled = 0;
1094 }
1095
1096 static boolean_t ml_quiescing;
1097
1098 void
1099 ml_set_is_quiescing(boolean_t quiescing)
1100 {
1101 assert(FALSE == ml_get_interrupts_enabled());
1102 ml_quiescing = quiescing;
1103 }
1104
1105 boolean_t
1106 ml_is_quiescing(void)
1107 {
1108 assert(FALSE == ml_get_interrupts_enabled());
1109 return ml_quiescing;
1110 }
1111
1112 uint64_t
1113 ml_get_booter_memory_size(void)
1114 {
1115 return 0;
1116 }