]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/machine_routines.c
xnu-6153.141.1.tar.gz
[apple/xnu.git] / osfmk / i386 / machine_routines.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <i386/machine_routines.h>
30 #include <i386/io_map_entries.h>
31 #include <i386/cpuid.h>
32 #include <i386/fpu.h>
33 #include <mach/processor.h>
34 #include <kern/processor.h>
35 #include <kern/machine.h>
36
37 #include <kern/cpu_number.h>
38 #include <kern/thread.h>
39 #include <kern/thread_call.h>
40 #include <kern/policy_internal.h>
41
42 #include <prng/random.h>
43 #include <i386/machine_cpu.h>
44 #include <i386/lapic.h>
45 #include <i386/bit_routines.h>
46 #include <i386/mp_events.h>
47 #include <i386/pmCPU.h>
48 #include <i386/trap.h>
49 #include <i386/tsc.h>
50 #include <i386/cpu_threads.h>
51 #include <i386/proc_reg.h>
52 #include <mach/vm_param.h>
53 #include <i386/pmap.h>
54 #include <i386/pmap_internal.h>
55 #include <i386/misc_protos.h>
56 #include <kern/timer_queue.h>
57 #if KPC
58 #include <kern/kpc.h>
59 #endif
60 #include <architecture/i386/pio.h>
61 #include <i386/cpu_data.h>
62 #if DEBUG
63 #define DBG(x...) kprintf("DBG: " x)
64 #else
65 #define DBG(x...)
66 #endif
67
68 #if MONOTONIC
69 #include <kern/monotonic.h>
70 #endif /* MONOTONIC */
71
72 extern void wakeup(void *);
73
74 static int max_cpus_initialized = 0;
75
76 uint64_t LockTimeOut;
77 uint64_t TLBTimeOut;
78 uint64_t LockTimeOutTSC;
79 uint32_t LockTimeOutUsec;
80 uint64_t MutexSpin;
81 uint64_t low_MutexSpin;
82 int64_t high_MutexSpin;
83 uint64_t LastDebuggerEntryAllowance;
84 uint64_t delay_spin_threshold;
85
86 extern uint64_t panic_restart_timeout;
87
88 boolean_t virtualized = FALSE;
89
90 decl_simple_lock_data(static, ml_timer_evaluation_slock);
91 uint32_t ml_timer_eager_evaluations;
92 uint64_t ml_timer_eager_evaluation_max;
93 static boolean_t ml_timer_evaluation_in_progress = FALSE;
94
95
96 #define MAX_CPUS_SET 0x1
97 #define MAX_CPUS_WAIT 0x2
98
99 /* IO memory map services */
100
101 /* Map memory map IO space */
102 vm_offset_t
103 ml_io_map(
104 vm_offset_t phys_addr,
105 vm_size_t size)
106 {
107 return io_map(phys_addr, size, VM_WIMG_IO);
108 }
109
110 /* boot memory allocation */
111 vm_offset_t
112 ml_static_malloc(
113 __unused vm_size_t size)
114 {
115 return (vm_offset_t)NULL;
116 }
117
118
119 void
120 ml_get_bouncepool_info(vm_offset_t *phys_addr, vm_size_t *size)
121 {
122 *phys_addr = 0;
123 *size = 0;
124 }
125
126
127 vm_offset_t
128 ml_static_ptovirt(
129 vm_offset_t paddr)
130 {
131 #if defined(__x86_64__)
132 return (vm_offset_t)(((unsigned long) paddr) | VM_MIN_KERNEL_ADDRESS);
133 #else
134 return (vm_offset_t)((paddr) | LINEAR_KERNEL_ADDRESS);
135 #endif
136 }
137
138 vm_offset_t
139 ml_static_slide(
140 vm_offset_t vaddr)
141 {
142 return VM_KERNEL_SLIDE(vaddr);
143 }
144
145 vm_offset_t
146 ml_static_unslide(
147 vm_offset_t vaddr)
148 {
149 return VM_KERNEL_UNSLIDE(vaddr);
150 }
151
152 /*
153 * Reclaim memory, by virtual address, that was used in early boot that is no longer needed
154 * by the kernel.
155 */
156 void
157 ml_static_mfree(
158 vm_offset_t vaddr,
159 vm_size_t size)
160 {
161 addr64_t vaddr_cur;
162 ppnum_t ppn;
163 uint32_t freed_pages = 0;
164 vm_size_t map_size;
165
166 assert(vaddr >= VM_MIN_KERNEL_ADDRESS);
167
168 assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
169
170 for (vaddr_cur = vaddr; vaddr_cur < round_page_64(vaddr + size);) {
171 map_size = pmap_query_pagesize(kernel_pmap, vaddr_cur);
172
173 /* just skip if nothing mapped here */
174 if (map_size == 0) {
175 vaddr_cur += PAGE_SIZE;
176 continue;
177 }
178
179 /*
180 * Can't free from the middle of a large page.
181 */
182 assert((vaddr_cur & (map_size - 1)) == 0);
183
184 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
185 assert(ppn != (ppnum_t)NULL);
186
187 pmap_remove(kernel_pmap, vaddr_cur, vaddr_cur + map_size);
188 while (map_size > 0) {
189 if (++kernel_pmap->stats.resident_count > kernel_pmap->stats.resident_max) {
190 kernel_pmap->stats.resident_max = kernel_pmap->stats.resident_count;
191 }
192
193 assert(pmap_valid_page(ppn));
194 if (IS_MANAGED_PAGE(ppn)) {
195 vm_page_create(ppn, (ppn + 1));
196 freed_pages++;
197 }
198 map_size -= PAGE_SIZE;
199 vaddr_cur += PAGE_SIZE;
200 ppn++;
201 }
202 }
203 vm_page_lockspin_queues();
204 vm_page_wire_count -= freed_pages;
205 vm_page_wire_count_initial -= freed_pages;
206 if (vm_page_wire_count_on_boot != 0) {
207 assert(vm_page_wire_count_on_boot >= freed_pages);
208 vm_page_wire_count_on_boot -= freed_pages;
209 }
210 vm_page_unlock_queues();
211
212 #if DEBUG
213 kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn);
214 #endif
215 }
216
217
218 /* virtual to physical on wired pages */
219 vm_offset_t
220 ml_vtophys(
221 vm_offset_t vaddr)
222 {
223 return (vm_offset_t)kvtophys(vaddr);
224 }
225
226 /*
227 * Routine: ml_nofault_copy
228 * Function: Perform a physical mode copy if the source and
229 * destination have valid translations in the kernel pmap.
230 * If translations are present, they are assumed to
231 * be wired; i.e. no attempt is made to guarantee that the
232 * translations obtained remained valid for
233 * the duration of the copy process.
234 */
235
236 vm_size_t
237 ml_nofault_copy(
238 vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size)
239 {
240 addr64_t cur_phys_dst, cur_phys_src;
241 uint32_t count, nbytes = 0;
242
243 while (size > 0) {
244 if (!(cur_phys_src = kvtophys(virtsrc))) {
245 break;
246 }
247 if (!(cur_phys_dst = kvtophys(virtdst))) {
248 break;
249 }
250 if (!pmap_valid_page(i386_btop(cur_phys_dst)) || !pmap_valid_page(i386_btop(cur_phys_src))) {
251 break;
252 }
253 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
254 if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK))) {
255 count = (uint32_t)(PAGE_SIZE - (cur_phys_dst & PAGE_MASK));
256 }
257 if (count > size) {
258 count = (uint32_t)size;
259 }
260
261 bcopy_phys(cur_phys_src, cur_phys_dst, count);
262
263 nbytes += count;
264 virtsrc += count;
265 virtdst += count;
266 size -= count;
267 }
268
269 return nbytes;
270 }
271
272 /*
273 * Routine: ml_validate_nofault
274 * Function: Validate that ths address range has a valid translations
275 * in the kernel pmap. If translations are present, they are
276 * assumed to be wired; i.e. no attempt is made to guarantee
277 * that the translation persist after the check.
278 * Returns: TRUE if the range is mapped and will not cause a fault,
279 * FALSE otherwise.
280 */
281
282 boolean_t
283 ml_validate_nofault(
284 vm_offset_t virtsrc, vm_size_t size)
285 {
286 addr64_t cur_phys_src;
287 uint32_t count;
288
289 while (size > 0) {
290 if (!(cur_phys_src = kvtophys(virtsrc))) {
291 return FALSE;
292 }
293 if (!pmap_valid_page(i386_btop(cur_phys_src))) {
294 return FALSE;
295 }
296 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
297 if (count > size) {
298 count = (uint32_t)size;
299 }
300
301 virtsrc += count;
302 size -= count;
303 }
304
305 return TRUE;
306 }
307
308 /* Interrupt handling */
309
310 /* Initialize Interrupts */
311 void
312 ml_init_interrupt(void)
313 {
314 (void) ml_set_interrupts_enabled(TRUE);
315 }
316
317
318 /* Get Interrupts Enabled */
319 boolean_t
320 ml_get_interrupts_enabled(void)
321 {
322 unsigned long flags;
323
324 __asm__ volatile ("pushf; pop %0": "=r" (flags));
325 return (flags & EFL_IF) != 0;
326 }
327
328 /* Set Interrupts Enabled */
329 boolean_t
330 ml_set_interrupts_enabled(boolean_t enable)
331 {
332 unsigned long flags;
333 boolean_t istate;
334
335 __asm__ volatile ("pushf; pop %0" : "=r" (flags));
336
337 assert(get_interrupt_level() ? (enable == FALSE) : TRUE);
338
339 istate = ((flags & EFL_IF) != 0);
340
341 if (enable) {
342 __asm__ volatile ("sti;nop");
343
344 if ((get_preemption_level() == 0) && (*ast_pending() & AST_URGENT)) {
345 __asm__ volatile ("int %0" :: "N" (T_PREEMPT));
346 }
347 } else {
348 if (istate) {
349 __asm__ volatile ("cli");
350 }
351 }
352
353 return istate;
354 }
355
356 /* Early Set Interrupts Enabled */
357 boolean_t
358 ml_early_set_interrupts_enabled(boolean_t enable)
359 {
360 if (enable == TRUE) {
361 kprintf("Caller attempted to enable interrupts too early in "
362 "kernel startup. Halting.\n");
363 hlt();
364 /*NOTREACHED*/
365 }
366
367 /* On x86, do not allow interrupts to be enabled very early */
368 return FALSE;
369 }
370
371 /* Check if running at interrupt context */
372 boolean_t
373 ml_at_interrupt_context(void)
374 {
375 return get_interrupt_level() != 0;
376 }
377
378 void
379 ml_get_power_state(boolean_t *icp, boolean_t *pidlep)
380 {
381 *icp = (get_interrupt_level() != 0);
382 /* These will be technically inaccurate for interrupts that occur
383 * successively within a single "idle exit" event, but shouldn't
384 * matter statistically.
385 */
386 *pidlep = (current_cpu_datap()->lcpu.package->num_idle == topoParms.nLThreadsPerPackage);
387 }
388
389 /* Generate a fake interrupt */
390 __dead2
391 void
392 ml_cause_interrupt(void)
393 {
394 panic("ml_cause_interrupt not defined yet on Intel");
395 }
396
397 /*
398 * TODO: transition users of this to kernel_thread_start_priority
399 * ml_thread_policy is an unsupported KPI
400 */
401 void
402 ml_thread_policy(
403 thread_t thread,
404 __unused unsigned policy_id,
405 unsigned policy_info)
406 {
407 if (policy_info & MACHINE_NETWORK_WORKLOOP) {
408 thread_precedence_policy_data_t info;
409 __assert_only kern_return_t kret;
410
411 info.importance = 1;
412
413 kret = thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY,
414 (thread_policy_t)&info,
415 THREAD_PRECEDENCE_POLICY_COUNT);
416 assert(kret == KERN_SUCCESS);
417 }
418 }
419
420 /* Initialize Interrupts */
421 void
422 ml_install_interrupt_handler(
423 void *nub,
424 int source,
425 void *target,
426 IOInterruptHandler handler,
427 void *refCon)
428 {
429 boolean_t current_state;
430
431 current_state = ml_set_interrupts_enabled(FALSE);
432
433 PE_install_interrupt_handler(nub, source, target,
434 (IOInterruptHandler) handler, refCon);
435
436 (void) ml_set_interrupts_enabled(current_state);
437
438 initialize_screen(NULL, kPEAcquireScreen);
439 }
440
441
442 void
443 machine_signal_idle(
444 processor_t processor)
445 {
446 cpu_interrupt(processor->cpu_id);
447 }
448
449 __dead2
450 void
451 machine_signal_idle_deferred(
452 __unused processor_t processor)
453 {
454 panic("Unimplemented");
455 }
456
457 __dead2
458 void
459 machine_signal_idle_cancel(
460 __unused processor_t processor)
461 {
462 panic("Unimplemented");
463 }
464
465 static kern_return_t
466 register_cpu(
467 uint32_t lapic_id,
468 processor_t *processor_out,
469 boolean_t boot_cpu )
470 {
471 int target_cpu;
472 cpu_data_t *this_cpu_datap;
473
474 this_cpu_datap = cpu_data_alloc(boot_cpu);
475 if (this_cpu_datap == NULL) {
476 return KERN_FAILURE;
477 }
478 target_cpu = this_cpu_datap->cpu_number;
479 assert((boot_cpu && (target_cpu == 0)) ||
480 (!boot_cpu && (target_cpu != 0)));
481
482 lapic_cpu_map(lapic_id, target_cpu);
483
484 /* The cpu_id is not known at registration phase. Just do
485 * lapic_id for now
486 */
487 this_cpu_datap->cpu_phys_number = lapic_id;
488
489 this_cpu_datap->cpu_console_buf = console_cpu_alloc(boot_cpu);
490 if (this_cpu_datap->cpu_console_buf == NULL) {
491 goto failed;
492 }
493
494 #if KPC
495 if (kpc_register_cpu(this_cpu_datap) != TRUE) {
496 goto failed;
497 }
498 #endif
499
500 if (!boot_cpu) {
501 cpu_thread_alloc(this_cpu_datap->cpu_number);
502 if (this_cpu_datap->lcpu.core == NULL) {
503 goto failed;
504 }
505
506 #if NCOPY_WINDOWS > 0
507 this_cpu_datap->cpu_pmap = pmap_cpu_alloc(boot_cpu);
508 if (this_cpu_datap->cpu_pmap == NULL) {
509 goto failed;
510 }
511 #endif
512
513 this_cpu_datap->cpu_processor = cpu_processor_alloc(boot_cpu);
514 if (this_cpu_datap->cpu_processor == NULL) {
515 goto failed;
516 }
517 /*
518 * processor_init() deferred to topology start
519 * because "slot numbers" a.k.a. logical processor numbers
520 * are not yet finalized.
521 */
522 }
523
524 *processor_out = this_cpu_datap->cpu_processor;
525
526 return KERN_SUCCESS;
527
528 failed:
529 cpu_processor_free(this_cpu_datap->cpu_processor);
530 #if NCOPY_WINDOWS > 0
531 pmap_cpu_free(this_cpu_datap->cpu_pmap);
532 #endif
533 console_cpu_free(this_cpu_datap->cpu_console_buf);
534 #if KPC
535 kpc_unregister_cpu(this_cpu_datap);
536 #endif /* KPC */
537
538 return KERN_FAILURE;
539 }
540
541
542 kern_return_t
543 ml_processor_register(
544 cpu_id_t cpu_id,
545 uint32_t lapic_id,
546 processor_t *processor_out,
547 boolean_t boot_cpu,
548 boolean_t start )
549 {
550 static boolean_t done_topo_sort = FALSE;
551 static uint32_t num_registered = 0;
552
553 /* Register all CPUs first, and track max */
554 if (start == FALSE) {
555 num_registered++;
556
557 DBG( "registering CPU lapic id %d\n", lapic_id );
558
559 return register_cpu( lapic_id, processor_out, boot_cpu );
560 }
561
562 /* Sort by topology before we start anything */
563 if (!done_topo_sort) {
564 DBG( "about to start CPUs. %d registered\n", num_registered );
565
566 cpu_topology_sort( num_registered );
567 done_topo_sort = TRUE;
568 }
569
570 /* Assign the cpu ID */
571 uint32_t cpunum = -1;
572 cpu_data_t *this_cpu_datap = NULL;
573
574 /* find cpu num and pointer */
575 cpunum = ml_get_cpuid( lapic_id );
576
577 if (cpunum == 0xFFFFFFFF) { /* never heard of it? */
578 panic( "trying to start invalid/unregistered CPU %d\n", lapic_id );
579 }
580
581 this_cpu_datap = cpu_datap(cpunum);
582
583 /* fix the CPU id */
584 this_cpu_datap->cpu_id = cpu_id;
585
586 /* allocate and initialize other per-cpu structures */
587 if (!boot_cpu) {
588 mp_cpus_call_cpu_init(cpunum);
589 random_cpu_init(cpunum);
590 }
591
592 /* output arg */
593 *processor_out = this_cpu_datap->cpu_processor;
594
595 /* OK, try and start this CPU */
596 return cpu_topology_start_cpu( cpunum );
597 }
598
599
600 void
601 ml_cpu_get_info(ml_cpu_info_t *cpu_infop)
602 {
603 boolean_t os_supports_sse;
604 i386_cpu_info_t *cpuid_infop;
605
606 if (cpu_infop == NULL) {
607 return;
608 }
609
610 /*
611 * Are we supporting MMX/SSE/SSE2/SSE3?
612 * As distinct from whether the cpu has these capabilities.
613 */
614 os_supports_sse = !!(get_cr4() & CR4_OSXMM);
615
616 if (ml_fpu_avx_enabled()) {
617 cpu_infop->vector_unit = 9;
618 } else if ((cpuid_features() & CPUID_FEATURE_SSE4_2) && os_supports_sse) {
619 cpu_infop->vector_unit = 8;
620 } else if ((cpuid_features() & CPUID_FEATURE_SSE4_1) && os_supports_sse) {
621 cpu_infop->vector_unit = 7;
622 } else if ((cpuid_features() & CPUID_FEATURE_SSSE3) && os_supports_sse) {
623 cpu_infop->vector_unit = 6;
624 } else if ((cpuid_features() & CPUID_FEATURE_SSE3) && os_supports_sse) {
625 cpu_infop->vector_unit = 5;
626 } else if ((cpuid_features() & CPUID_FEATURE_SSE2) && os_supports_sse) {
627 cpu_infop->vector_unit = 4;
628 } else if ((cpuid_features() & CPUID_FEATURE_SSE) && os_supports_sse) {
629 cpu_infop->vector_unit = 3;
630 } else if (cpuid_features() & CPUID_FEATURE_MMX) {
631 cpu_infop->vector_unit = 2;
632 } else {
633 cpu_infop->vector_unit = 0;
634 }
635
636 cpuid_infop = cpuid_info();
637
638 cpu_infop->cache_line_size = cpuid_infop->cache_linesize;
639
640 cpu_infop->l1_icache_size = cpuid_infop->cache_size[L1I];
641 cpu_infop->l1_dcache_size = cpuid_infop->cache_size[L1D];
642
643 if (cpuid_infop->cache_size[L2U] > 0) {
644 cpu_infop->l2_settings = 1;
645 cpu_infop->l2_cache_size = cpuid_infop->cache_size[L2U];
646 } else {
647 cpu_infop->l2_settings = 0;
648 cpu_infop->l2_cache_size = 0xFFFFFFFF;
649 }
650
651 if (cpuid_infop->cache_size[L3U] > 0) {
652 cpu_infop->l3_settings = 1;
653 cpu_infop->l3_cache_size = cpuid_infop->cache_size[L3U];
654 } else {
655 cpu_infop->l3_settings = 0;
656 cpu_infop->l3_cache_size = 0xFFFFFFFF;
657 }
658 }
659
660 void
661 ml_init_max_cpus(unsigned long max_cpus)
662 {
663 boolean_t current_state;
664
665 current_state = ml_set_interrupts_enabled(FALSE);
666 if (max_cpus_initialized != MAX_CPUS_SET) {
667 if (max_cpus > 0 && max_cpus <= MAX_CPUS) {
668 /*
669 * Note: max_cpus is the number of enabled processors
670 * that ACPI found; max_ncpus is the maximum number
671 * that the kernel supports or that the "cpus="
672 * boot-arg has set. Here we take int minimum.
673 */
674 machine_info.max_cpus = (integer_t)MIN(max_cpus, max_ncpus);
675 }
676 if (max_cpus_initialized == MAX_CPUS_WAIT) {
677 wakeup((event_t)&max_cpus_initialized);
678 }
679 max_cpus_initialized = MAX_CPUS_SET;
680 }
681 (void) ml_set_interrupts_enabled(current_state);
682 }
683
684 int
685 ml_get_max_cpus(void)
686 {
687 boolean_t current_state;
688
689 current_state = ml_set_interrupts_enabled(FALSE);
690 if (max_cpus_initialized != MAX_CPUS_SET) {
691 max_cpus_initialized = MAX_CPUS_WAIT;
692 assert_wait((event_t)&max_cpus_initialized, THREAD_UNINT);
693 (void)thread_block(THREAD_CONTINUE_NULL);
694 }
695 (void) ml_set_interrupts_enabled(current_state);
696 return machine_info.max_cpus;
697 }
698
699 boolean_t
700 ml_wants_panic_trap_to_debugger(void)
701 {
702 return FALSE;
703 }
704
705 void
706 ml_panic_trap_to_debugger(__unused const char *panic_format_str,
707 __unused va_list *panic_args,
708 __unused unsigned int reason,
709 __unused void *ctx,
710 __unused uint64_t panic_options_mask,
711 __unused unsigned long panic_caller)
712 {
713 return;
714 }
715
716 /*
717 * Routine: ml_init_lock_timeout
718 * Function:
719 */
720 void
721 ml_init_lock_timeout(void)
722 {
723 uint64_t abstime;
724 uint32_t mtxspin;
725 #if DEVELOPMENT || DEBUG
726 uint64_t default_timeout_ns = NSEC_PER_SEC >> 2;
727 #else
728 uint64_t default_timeout_ns = NSEC_PER_SEC >> 1;
729 #endif
730 uint32_t slto;
731 uint32_t prt;
732
733 if (PE_parse_boot_argn("slto_us", &slto, sizeof(slto))) {
734 default_timeout_ns = slto * NSEC_PER_USEC;
735 }
736
737 /*
738 * LockTimeOut is absolutetime, LockTimeOutTSC is in TSC ticks,
739 * and LockTimeOutUsec is in microseconds and it's 32-bits.
740 */
741 LockTimeOutUsec = (uint32_t) (default_timeout_ns / NSEC_PER_USEC);
742 nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
743 LockTimeOut = abstime;
744 LockTimeOutTSC = tmrCvt(abstime, tscFCvtn2t);
745
746 /*
747 * TLBTimeOut dictates the TLB flush timeout period. It defaults to
748 * LockTimeOut but can be overriden separately. In particular, a
749 * zero value inhibits the timeout-panic and cuts a trace evnt instead
750 * - see pmap_flush_tlbs().
751 */
752 if (PE_parse_boot_argn("tlbto_us", &slto, sizeof(slto))) {
753 default_timeout_ns = slto * NSEC_PER_USEC;
754 nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
755 TLBTimeOut = (uint32_t) abstime;
756 } else {
757 TLBTimeOut = LockTimeOut;
758 }
759
760 #if DEVELOPMENT || DEBUG
761 reportphyreaddelayabs = LockTimeOut >> 1;
762 #endif
763 if (PE_parse_boot_argn("phyreadmaxus", &slto, sizeof(slto))) {
764 default_timeout_ns = slto * NSEC_PER_USEC;
765 nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
766 reportphyreaddelayabs = abstime;
767 }
768
769 if (PE_parse_boot_argn("phywritemaxus", &slto, sizeof(slto))) {
770 nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
771 reportphywritedelayabs = abstime;
772 }
773
774 if (PE_parse_boot_argn("tracephyreadus", &slto, sizeof(slto))) {
775 nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
776 tracephyreaddelayabs = abstime;
777 }
778
779 if (PE_parse_boot_argn("tracephywriteus", &slto, sizeof(slto))) {
780 nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
781 tracephywritedelayabs = abstime;
782 }
783
784 if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof(mtxspin))) {
785 if (mtxspin > USEC_PER_SEC >> 4) {
786 mtxspin = USEC_PER_SEC >> 4;
787 }
788 nanoseconds_to_absolutetime(mtxspin * NSEC_PER_USEC, &abstime);
789 } else {
790 nanoseconds_to_absolutetime(10 * NSEC_PER_USEC, &abstime);
791 }
792 MutexSpin = (unsigned int)abstime;
793 low_MutexSpin = MutexSpin;
794 /*
795 * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
796 * real_ncpus is not set at this time
797 */
798 high_MutexSpin = -1;
799
800 nanoseconds_to_absolutetime(4ULL * NSEC_PER_SEC, &LastDebuggerEntryAllowance);
801 if (PE_parse_boot_argn("panic_restart_timeout", &prt, sizeof(prt))) {
802 nanoseconds_to_absolutetime(prt * NSEC_PER_SEC, &panic_restart_timeout);
803 }
804
805 virtualized = ((cpuid_features() & CPUID_FEATURE_VMM) != 0);
806 if (virtualized) {
807 int vti;
808
809 if (!PE_parse_boot_argn("vti", &vti, sizeof(vti))) {
810 vti = 6;
811 }
812 printf("Timeouts adjusted for virtualization (<<%d)\n", vti);
813 kprintf("Timeouts adjusted for virtualization (<<%d):\n", vti);
814 #define VIRTUAL_TIMEOUT_INFLATE64(_timeout) \
815 MACRO_BEGIN \
816 kprintf("%24s: 0x%016llx ", #_timeout, _timeout); \
817 _timeout <<= vti; \
818 kprintf("-> 0x%016llx\n", _timeout); \
819 MACRO_END
820 #define VIRTUAL_TIMEOUT_INFLATE32(_timeout) \
821 MACRO_BEGIN \
822 kprintf("%24s: 0x%08x ", #_timeout, _timeout); \
823 if ((_timeout <<vti) >> vti == _timeout) \
824 _timeout <<= vti; \
825 else \
826 _timeout = ~0; /* cap rather than overflow */ \
827 kprintf("-> 0x%08x\n", _timeout); \
828 MACRO_END
829 VIRTUAL_TIMEOUT_INFLATE32(LockTimeOutUsec);
830 VIRTUAL_TIMEOUT_INFLATE64(LockTimeOut);
831 VIRTUAL_TIMEOUT_INFLATE64(LockTimeOutTSC);
832 VIRTUAL_TIMEOUT_INFLATE64(TLBTimeOut);
833 VIRTUAL_TIMEOUT_INFLATE64(MutexSpin);
834 VIRTUAL_TIMEOUT_INFLATE64(low_MutexSpin);
835 VIRTUAL_TIMEOUT_INFLATE64(reportphyreaddelayabs);
836 }
837
838 interrupt_latency_tracker_setup();
839 simple_lock_init(&ml_timer_evaluation_slock, 0);
840 }
841
842 /*
843 * Threshold above which we should attempt to block
844 * instead of spinning for clock_delay_until().
845 */
846
847 void
848 ml_init_delay_spin_threshold(int threshold_us)
849 {
850 nanoseconds_to_absolutetime(threshold_us * NSEC_PER_USEC, &delay_spin_threshold);
851 }
852
853 boolean_t
854 ml_delay_should_spin(uint64_t interval)
855 {
856 return (interval < delay_spin_threshold) ? TRUE : FALSE;
857 }
858
859 uint32_t yield_delay_us = 0;
860
861 void
862 ml_delay_on_yield(void)
863 {
864 #if DEVELOPMENT || DEBUG
865 if (yield_delay_us) {
866 delay(yield_delay_us);
867 }
868 #endif
869 }
870
871 /*
872 * This is called from the machine-independent layer
873 * to perform machine-dependent info updates. Defer to cpu_thread_init().
874 */
875 void
876 ml_cpu_up(void)
877 {
878 return;
879 }
880
881 /*
882 * This is called from the machine-independent layer
883 * to perform machine-dependent info updates.
884 */
885 void
886 ml_cpu_down(void)
887 {
888 i386_deactivate_cpu();
889
890 return;
891 }
892
893 /*
894 * The following are required for parts of the kernel
895 * that cannot resolve these functions as inlines:
896 */
897 extern thread_t current_act(void) __attribute__((const));
898 thread_t
899 current_act(void)
900 {
901 return current_thread_fast();
902 }
903
904 #undef current_thread
905 extern thread_t current_thread(void) __attribute__((const));
906 thread_t
907 current_thread(void)
908 {
909 return current_thread_fast();
910 }
911
912
913 boolean_t
914 ml_is64bit(void)
915 {
916 return cpu_mode_is64bit();
917 }
918
919
920 boolean_t
921 ml_thread_is64bit(thread_t thread)
922 {
923 return thread_is_64bit_addr(thread);
924 }
925
926
927 boolean_t
928 ml_state_is64bit(void *saved_state)
929 {
930 return is_saved_state64(saved_state);
931 }
932
933 void
934 ml_cpu_set_ldt(int selector)
935 {
936 /*
937 * Avoid loading the LDT
938 * if we're setting the KERNEL LDT and it's already set.
939 */
940 if (selector == KERNEL_LDT &&
941 current_cpu_datap()->cpu_ldt == KERNEL_LDT) {
942 return;
943 }
944
945 lldt(selector);
946 current_cpu_datap()->cpu_ldt = selector;
947 }
948
949 void
950 ml_fp_setvalid(boolean_t value)
951 {
952 fp_setvalid(value);
953 }
954
955 uint64_t
956 ml_cpu_int_event_time(void)
957 {
958 return current_cpu_datap()->cpu_int_event_time;
959 }
960
961 vm_offset_t
962 ml_stack_remaining(void)
963 {
964 uintptr_t local = (uintptr_t) &local;
965
966 if (ml_at_interrupt_context() != 0) {
967 return local - (current_cpu_datap()->cpu_int_stack_top - INTSTACK_SIZE);
968 } else {
969 return local - current_thread()->kernel_stack;
970 }
971 }
972
973 #if KASAN
974 vm_offset_t ml_stack_base(void);
975 vm_size_t ml_stack_size(void);
976
977 vm_offset_t
978 ml_stack_base(void)
979 {
980 if (ml_at_interrupt_context()) {
981 return current_cpu_datap()->cpu_int_stack_top - INTSTACK_SIZE;
982 } else {
983 return current_thread()->kernel_stack;
984 }
985 }
986
987 vm_size_t
988 ml_stack_size(void)
989 {
990 if (ml_at_interrupt_context()) {
991 return INTSTACK_SIZE;
992 } else {
993 return kernel_stack_size;
994 }
995 }
996 #endif
997
998 void
999 kernel_preempt_check(void)
1000 {
1001 boolean_t intr;
1002 unsigned long flags;
1003
1004 assert(get_preemption_level() == 0);
1005
1006 if (__improbable(*ast_pending() & AST_URGENT)) {
1007 /*
1008 * can handle interrupts and preemptions
1009 * at this point
1010 */
1011 __asm__ volatile ("pushf; pop %0" : "=r" (flags));
1012
1013 intr = ((flags & EFL_IF) != 0);
1014
1015 /*
1016 * now cause the PRE-EMPTION trap
1017 */
1018 if (intr == TRUE) {
1019 __asm__ volatile ("int %0" :: "N" (T_PREEMPT));
1020 }
1021 }
1022 }
1023
1024 boolean_t
1025 machine_timeout_suspended(void)
1026 {
1027 return pmap_tlb_flush_timeout || spinlock_timed_out || panic_active() || mp_recent_debugger_activity() || ml_recent_wake();
1028 }
1029
1030 /* Eagerly evaluate all pending timer and thread callouts
1031 */
1032 void
1033 ml_timer_evaluate(void)
1034 {
1035 KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN | DBG_FUNC_START, 0, 0, 0, 0, 0);
1036
1037 uint64_t te_end, te_start = mach_absolute_time();
1038 simple_lock(&ml_timer_evaluation_slock, LCK_GRP_NULL);
1039 ml_timer_evaluation_in_progress = TRUE;
1040 thread_call_delayed_timer_rescan_all();
1041 mp_cpus_call(CPUMASK_ALL, ASYNC, timer_queue_expire_rescan, NULL);
1042 ml_timer_evaluation_in_progress = FALSE;
1043 ml_timer_eager_evaluations++;
1044 te_end = mach_absolute_time();
1045 ml_timer_eager_evaluation_max = MAX(ml_timer_eager_evaluation_max, (te_end - te_start));
1046 simple_unlock(&ml_timer_evaluation_slock);
1047
1048 KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN | DBG_FUNC_END, 0, 0, 0, 0, 0);
1049 }
1050
1051 boolean_t
1052 ml_timer_forced_evaluation(void)
1053 {
1054 return ml_timer_evaluation_in_progress;
1055 }
1056
1057 /* 32-bit right-rotate n bits */
1058 static inline uint32_t
1059 ror32(uint32_t val, const unsigned int n)
1060 {
1061 __asm__ volatile ("rorl %%cl,%0" : "=r" (val) : "0" (val), "c" (n));
1062 return val;
1063 }
1064
1065 void
1066 ml_entropy_collect(void)
1067 {
1068 uint32_t tsc_lo, tsc_hi;
1069 uint32_t *ep;
1070
1071 assert(cpu_number() == master_cpu);
1072
1073 /* update buffer pointer cyclically */
1074 ep = EntropyData.buffer + (EntropyData.sample_count & ENTROPY_BUFFER_INDEX_MASK);
1075 EntropyData.sample_count += 1;
1076
1077 rdtsc_nofence(tsc_lo, tsc_hi);
1078 *ep = ror32(*ep, 9) ^ tsc_lo;
1079 }
1080
1081 uint64_t
1082 ml_energy_stat(__unused thread_t t)
1083 {
1084 return 0;
1085 }
1086
1087 void
1088 ml_gpu_stat_update(uint64_t gpu_ns_delta)
1089 {
1090 current_thread()->machine.thread_gpu_ns += gpu_ns_delta;
1091 }
1092
1093 uint64_t
1094 ml_gpu_stat(thread_t t)
1095 {
1096 return t->machine.thread_gpu_ns;
1097 }
1098
1099 int plctrace_enabled = 0;
1100
1101 void
1102 _disable_preemption(void)
1103 {
1104 disable_preemption_internal();
1105 }
1106
1107 void
1108 _enable_preemption(void)
1109 {
1110 enable_preemption_internal();
1111 }
1112
1113 void
1114 plctrace_disable(void)
1115 {
1116 plctrace_enabled = 0;
1117 }
1118
1119 static boolean_t ml_quiescing;
1120
1121 void
1122 ml_set_is_quiescing(boolean_t quiescing)
1123 {
1124 assert(FALSE == ml_get_interrupts_enabled());
1125 ml_quiescing = quiescing;
1126 }
1127
1128 boolean_t
1129 ml_is_quiescing(void)
1130 {
1131 assert(FALSE == ml_get_interrupts_enabled());
1132 return ml_quiescing;
1133 }
1134
1135 uint64_t
1136 ml_get_booter_memory_size(void)
1137 {
1138 return 0;
1139 }