]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/machine_routines.c
6357bdb8d5af79133f0594e0d611c14c455fd326
[apple/xnu.git] / osfmk / i386 / machine_routines.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <i386/machine_routines.h>
30 #include <i386/io_map_entries.h>
31 #include <i386/cpuid.h>
32 #include <i386/fpu.h>
33 #include <mach/processor.h>
34 #include <kern/processor.h>
35 #include <kern/machine.h>
36
37 #include <kern/cpu_number.h>
38 #include <kern/thread.h>
39 #include <kern/thread_call.h>
40 #include <kern/policy_internal.h>
41
42 #include <prng/random.h>
43 #include <prng/entropy.h>
44 #include <i386/machine_cpu.h>
45 #include <i386/lapic.h>
46 #include <i386/bit_routines.h>
47 #include <i386/mp_events.h>
48 #include <i386/pmCPU.h>
49 #include <i386/trap.h>
50 #include <i386/tsc.h>
51 #include <i386/cpu_threads.h>
52 #include <i386/proc_reg.h>
53 #include <mach/vm_param.h>
54 #include <i386/pmap.h>
55 #include <i386/pmap_internal.h>
56 #include <i386/misc_protos.h>
57 #include <kern/timer_queue.h>
58 #include <vm/vm_map.h>
59 #if KPC
60 #include <kern/kpc.h>
61 #endif
62 #include <architecture/i386/pio.h>
63 #include <i386/cpu_data.h>
64 #if DEBUG
65 #define DBG(x...) kprintf("DBG: " x)
66 #else
67 #define DBG(x...)
68 #endif
69
70 #if MONOTONIC
71 #include <kern/monotonic.h>
72 #endif /* MONOTONIC */
73
74 extern void wakeup(void *);
75
76 uint64_t LockTimeOut;
77 uint64_t TLBTimeOut;
78 uint64_t LockTimeOutTSC;
79 uint32_t LockTimeOutUsec;
80 uint64_t MutexSpin;
81 uint64_t low_MutexSpin;
82 int64_t high_MutexSpin;
83 uint64_t LastDebuggerEntryAllowance;
84 uint64_t delay_spin_threshold;
85
86 extern uint64_t panic_restart_timeout;
87
88 boolean_t virtualized = FALSE;
89
90 decl_simple_lock_data(static, ml_timer_evaluation_slock);
91 uint32_t ml_timer_eager_evaluations;
92 uint64_t ml_timer_eager_evaluation_max;
93 static boolean_t ml_timer_evaluation_in_progress = FALSE;
94
95 LCK_GRP_DECLARE(max_cpus_grp, "max_cpus");
96 LCK_MTX_DECLARE(max_cpus_lock, &max_cpus_grp);
97 static int max_cpus_initialized = 0;
98 #define MAX_CPUS_SET 0x1
99 #define MAX_CPUS_WAIT 0x2
100
101 /* IO memory map services */
102
103 /* Map memory map IO space */
104 vm_offset_t
105 ml_io_map(
106 vm_offset_t phys_addr,
107 vm_size_t size)
108 {
109 return io_map(phys_addr, size, VM_WIMG_IO);
110 }
111
112 /* boot memory allocation */
113 vm_offset_t
114 ml_static_malloc(
115 __unused vm_size_t size)
116 {
117 return (vm_offset_t)NULL;
118 }
119
120
121 void
122 ml_get_bouncepool_info(vm_offset_t *phys_addr, vm_size_t *size)
123 {
124 *phys_addr = 0;
125 *size = 0;
126 }
127
128
129 vm_offset_t
130 ml_static_ptovirt(
131 vm_offset_t paddr)
132 {
133 #if defined(__x86_64__)
134 return (vm_offset_t)(((unsigned long) paddr) | VM_MIN_KERNEL_ADDRESS);
135 #else
136 return (vm_offset_t)((paddr) | LINEAR_KERNEL_ADDRESS);
137 #endif
138 }
139
140 vm_offset_t
141 ml_static_slide(
142 vm_offset_t vaddr)
143 {
144 return VM_KERNEL_SLIDE(vaddr);
145 }
146
147 /*
148 * base must be page-aligned, and size must be a multiple of PAGE_SIZE
149 */
150 kern_return_t
151 ml_static_verify_page_protections(
152 uint64_t base, uint64_t size, vm_prot_t prot)
153 {
154 vm_prot_t pageprot;
155 uint64_t offset;
156
157 DBG("ml_static_verify_page_protections: vaddr 0x%llx sz 0x%llx prot 0x%x\n", base, size, prot);
158
159 /*
160 * base must be within the static bounds, defined to be:
161 * (vm_kernel_stext, kc_highest_nonlinkedit_vmaddr)
162 */
163 #if DEVELOPMENT || DEBUG || KASAN
164 assert(kc_highest_nonlinkedit_vmaddr > 0 && base > vm_kernel_stext && base < kc_highest_nonlinkedit_vmaddr);
165 #else /* On release kernels, assume this is a protection mismatch failure. */
166 if (kc_highest_nonlinkedit_vmaddr == 0 || base < vm_kernel_stext || base >= kc_highest_nonlinkedit_vmaddr) {
167 return KERN_FAILURE;
168 }
169 #endif
170
171 for (offset = 0; offset < size; offset += PAGE_SIZE) {
172 if (pmap_get_prot(kernel_pmap, base + offset, &pageprot) == KERN_FAILURE) {
173 return KERN_FAILURE;
174 }
175 if ((pageprot & prot) != prot) {
176 return KERN_FAILURE;
177 }
178 }
179
180 return KERN_SUCCESS;
181 }
182
183 vm_offset_t
184 ml_static_unslide(
185 vm_offset_t vaddr)
186 {
187 return VM_KERNEL_UNSLIDE(vaddr);
188 }
189
190 /*
191 * Reclaim memory, by virtual address, that was used in early boot that is no longer needed
192 * by the kernel.
193 */
194 void
195 ml_static_mfree(
196 vm_offset_t vaddr,
197 vm_size_t size)
198 {
199 addr64_t vaddr_cur;
200 ppnum_t ppn;
201 uint32_t freed_pages = 0;
202 vm_size_t map_size;
203
204 assert(vaddr >= VM_MIN_KERNEL_ADDRESS);
205
206 assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
207
208 for (vaddr_cur = vaddr; vaddr_cur < round_page_64(vaddr + size);) {
209 map_size = pmap_query_pagesize(kernel_pmap, vaddr_cur);
210
211 /* just skip if nothing mapped here */
212 if (map_size == 0) {
213 vaddr_cur += PAGE_SIZE;
214 continue;
215 }
216
217 /*
218 * Can't free from the middle of a large page.
219 */
220 assert((vaddr_cur & (map_size - 1)) == 0);
221
222 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
223 assert(ppn != (ppnum_t)NULL);
224
225 pmap_remove(kernel_pmap, vaddr_cur, vaddr_cur + map_size);
226 while (map_size > 0) {
227 if (++kernel_pmap->stats.resident_count > kernel_pmap->stats.resident_max) {
228 kernel_pmap->stats.resident_max = kernel_pmap->stats.resident_count;
229 }
230
231 assert(pmap_valid_page(ppn));
232 if (IS_MANAGED_PAGE(ppn)) {
233 vm_page_create(ppn, (ppn + 1));
234 freed_pages++;
235 }
236 map_size -= PAGE_SIZE;
237 vaddr_cur += PAGE_SIZE;
238 ppn++;
239 }
240 }
241 vm_page_lockspin_queues();
242 vm_page_wire_count -= freed_pages;
243 vm_page_wire_count_initial -= freed_pages;
244 if (vm_page_wire_count_on_boot != 0) {
245 assert(vm_page_wire_count_on_boot >= freed_pages);
246 vm_page_wire_count_on_boot -= freed_pages;
247 }
248 vm_page_unlock_queues();
249
250 #if DEBUG
251 kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn);
252 #endif
253 }
254
255 /* Change page protections for addresses previously loaded by efiboot */
256 kern_return_t
257 ml_static_protect(vm_offset_t vmaddr, vm_size_t size, vm_prot_t prot)
258 {
259 boolean_t NX = !!!(prot & VM_PROT_EXECUTE), ro = !!!(prot & VM_PROT_WRITE);
260
261 assert(prot & VM_PROT_READ);
262
263 pmap_mark_range(kernel_pmap, vmaddr, size, NX, ro);
264
265 return KERN_SUCCESS;
266 }
267
268 /* virtual to physical on wired pages */
269 vm_offset_t
270 ml_vtophys(
271 vm_offset_t vaddr)
272 {
273 return (vm_offset_t)kvtophys(vaddr);
274 }
275
276 /*
277 * Routine: ml_nofault_copy
278 * Function: Perform a physical mode copy if the source and
279 * destination have valid translations in the kernel pmap.
280 * If translations are present, they are assumed to
281 * be wired; i.e. no attempt is made to guarantee that the
282 * translations obtained remained valid for
283 * the duration of the copy process.
284 */
285
286 vm_size_t
287 ml_nofault_copy(
288 vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size)
289 {
290 addr64_t cur_phys_dst, cur_phys_src;
291 uint32_t count, nbytes = 0;
292
293 while (size > 0) {
294 if (!(cur_phys_src = kvtophys(virtsrc))) {
295 break;
296 }
297 if (!(cur_phys_dst = kvtophys(virtdst))) {
298 break;
299 }
300 if (!pmap_valid_page(i386_btop(cur_phys_dst)) || !pmap_valid_page(i386_btop(cur_phys_src))) {
301 break;
302 }
303 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
304 if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK))) {
305 count = (uint32_t)(PAGE_SIZE - (cur_phys_dst & PAGE_MASK));
306 }
307 if (count > size) {
308 count = (uint32_t)size;
309 }
310
311 bcopy_phys(cur_phys_src, cur_phys_dst, count);
312
313 nbytes += count;
314 virtsrc += count;
315 virtdst += count;
316 size -= count;
317 }
318
319 return nbytes;
320 }
321
322 /*
323 * Routine: ml_validate_nofault
324 * Function: Validate that ths address range has a valid translations
325 * in the kernel pmap. If translations are present, they are
326 * assumed to be wired; i.e. no attempt is made to guarantee
327 * that the translation persist after the check.
328 * Returns: TRUE if the range is mapped and will not cause a fault,
329 * FALSE otherwise.
330 */
331
332 boolean_t
333 ml_validate_nofault(
334 vm_offset_t virtsrc, vm_size_t size)
335 {
336 addr64_t cur_phys_src;
337 uint32_t count;
338
339 while (size > 0) {
340 if (!(cur_phys_src = kvtophys(virtsrc))) {
341 return FALSE;
342 }
343 if (!pmap_valid_page(i386_btop(cur_phys_src))) {
344 return FALSE;
345 }
346 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
347 if (count > size) {
348 count = (uint32_t)size;
349 }
350
351 virtsrc += count;
352 size -= count;
353 }
354
355 return TRUE;
356 }
357
358 /* Interrupt handling */
359
360 /* Initialize Interrupts */
361 void
362 ml_init_interrupt(void)
363 {
364 (void) ml_set_interrupts_enabled(TRUE);
365 }
366
367
368 /* Get Interrupts Enabled */
369 boolean_t
370 ml_get_interrupts_enabled(void)
371 {
372 unsigned long flags;
373
374 __asm__ volatile ("pushf; pop %0": "=r" (flags));
375 return (flags & EFL_IF) != 0;
376 }
377
378 /* Set Interrupts Enabled */
379 boolean_t
380 ml_set_interrupts_enabled(boolean_t enable)
381 {
382 unsigned long flags;
383 boolean_t istate;
384
385 __asm__ volatile ("pushf; pop %0" : "=r" (flags));
386
387 assert(get_interrupt_level() ? (enable == FALSE) : TRUE);
388
389 istate = ((flags & EFL_IF) != 0);
390
391 if (enable) {
392 __asm__ volatile ("sti;nop");
393
394 if ((get_preemption_level() == 0) && (*ast_pending() & AST_URGENT)) {
395 __asm__ volatile ("int %0" :: "N" (T_PREEMPT));
396 }
397 } else {
398 if (istate) {
399 __asm__ volatile ("cli");
400 }
401 }
402
403 return istate;
404 }
405
406 /* Early Set Interrupts Enabled */
407 boolean_t
408 ml_early_set_interrupts_enabled(boolean_t enable)
409 {
410 if (enable == TRUE) {
411 kprintf("Caller attempted to enable interrupts too early in "
412 "kernel startup. Halting.\n");
413 hlt();
414 /*NOTREACHED*/
415 }
416
417 /* On x86, do not allow interrupts to be enabled very early */
418 return FALSE;
419 }
420
421 /* Check if running at interrupt context */
422 boolean_t
423 ml_at_interrupt_context(void)
424 {
425 return get_interrupt_level() != 0;
426 }
427
428 void
429 ml_get_power_state(boolean_t *icp, boolean_t *pidlep)
430 {
431 *icp = (get_interrupt_level() != 0);
432 /* These will be technically inaccurate for interrupts that occur
433 * successively within a single "idle exit" event, but shouldn't
434 * matter statistically.
435 */
436 *pidlep = (current_cpu_datap()->lcpu.package->num_idle == topoParms.nLThreadsPerPackage);
437 }
438
439 /* Generate a fake interrupt */
440 __dead2
441 void
442 ml_cause_interrupt(void)
443 {
444 panic("ml_cause_interrupt not defined yet on Intel");
445 }
446
447 /*
448 * TODO: transition users of this to kernel_thread_start_priority
449 * ml_thread_policy is an unsupported KPI
450 */
451 void
452 ml_thread_policy(
453 thread_t thread,
454 __unused unsigned policy_id,
455 unsigned policy_info)
456 {
457 if (policy_info & MACHINE_NETWORK_WORKLOOP) {
458 thread_precedence_policy_data_t info;
459 __assert_only kern_return_t kret;
460
461 info.importance = 1;
462
463 kret = thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY,
464 (thread_policy_t)&info,
465 THREAD_PRECEDENCE_POLICY_COUNT);
466 assert(kret == KERN_SUCCESS);
467 }
468 }
469
470 /* Initialize Interrupts */
471 void
472 ml_install_interrupt_handler(
473 void *nub,
474 int source,
475 void *target,
476 IOInterruptHandler handler,
477 void *refCon)
478 {
479 boolean_t current_state;
480
481 current_state = ml_set_interrupts_enabled(FALSE);
482
483 PE_install_interrupt_handler(nub, source, target,
484 (IOInterruptHandler) handler, refCon);
485
486 (void) ml_set_interrupts_enabled(current_state);
487 }
488
489
490 void
491 machine_signal_idle(
492 processor_t processor)
493 {
494 cpu_interrupt(processor->cpu_id);
495 }
496
497 __dead2
498 void
499 machine_signal_idle_deferred(
500 __unused processor_t processor)
501 {
502 panic("Unimplemented");
503 }
504
505 __dead2
506 void
507 machine_signal_idle_cancel(
508 __unused processor_t processor)
509 {
510 panic("Unimplemented");
511 }
512
513 static kern_return_t
514 register_cpu(
515 uint32_t lapic_id,
516 processor_t *processor_out,
517 boolean_t boot_cpu )
518 {
519 int target_cpu;
520 cpu_data_t *this_cpu_datap;
521
522 this_cpu_datap = cpu_data_alloc(boot_cpu);
523 if (this_cpu_datap == NULL) {
524 return KERN_FAILURE;
525 }
526 target_cpu = this_cpu_datap->cpu_number;
527 assert((boot_cpu && (target_cpu == 0)) ||
528 (!boot_cpu && (target_cpu != 0)));
529
530 lapic_cpu_map(lapic_id, target_cpu);
531
532 /* The cpu_id is not known at registration phase. Just do
533 * lapic_id for now
534 */
535 this_cpu_datap->cpu_phys_number = lapic_id;
536
537 this_cpu_datap->cpu_console_buf = console_cpu_alloc(boot_cpu);
538 if (this_cpu_datap->cpu_console_buf == NULL) {
539 goto failed;
540 }
541
542 #if KPC
543 if (kpc_register_cpu(this_cpu_datap) != TRUE) {
544 goto failed;
545 }
546 #endif
547
548 if (!boot_cpu) {
549 cpu_thread_alloc(this_cpu_datap->cpu_number);
550 if (this_cpu_datap->lcpu.core == NULL) {
551 goto failed;
552 }
553 }
554
555 /*
556 * processor_init() deferred to topology start
557 * because "slot numbers" a.k.a. logical processor numbers
558 * are not yet finalized.
559 */
560 *processor_out = this_cpu_datap->cpu_processor;
561
562 return KERN_SUCCESS;
563
564 failed:
565 console_cpu_free(this_cpu_datap->cpu_console_buf);
566 #if KPC
567 kpc_unregister_cpu(this_cpu_datap);
568 #endif /* KPC */
569
570 return KERN_FAILURE;
571 }
572
573
574 kern_return_t
575 ml_processor_register(
576 cpu_id_t cpu_id,
577 uint32_t lapic_id,
578 processor_t *processor_out,
579 boolean_t boot_cpu,
580 boolean_t start )
581 {
582 static boolean_t done_topo_sort = FALSE;
583 static uint32_t num_registered = 0;
584
585 /* Register all CPUs first, and track max */
586 if (start == FALSE) {
587 num_registered++;
588
589 DBG( "registering CPU lapic id %d\n", lapic_id );
590
591 return register_cpu( lapic_id, processor_out, boot_cpu );
592 }
593
594 /* Sort by topology before we start anything */
595 if (!done_topo_sort) {
596 DBG( "about to start CPUs. %d registered\n", num_registered );
597
598 cpu_topology_sort( num_registered );
599 done_topo_sort = TRUE;
600 }
601
602 /* Assign the cpu ID */
603 uint32_t cpunum = -1;
604 cpu_data_t *this_cpu_datap = NULL;
605
606 /* find cpu num and pointer */
607 cpunum = ml_get_cpuid( lapic_id );
608
609 if (cpunum == 0xFFFFFFFF) { /* never heard of it? */
610 panic( "trying to start invalid/unregistered CPU %d\n", lapic_id );
611 }
612
613 this_cpu_datap = cpu_datap(cpunum);
614
615 /* fix the CPU id */
616 this_cpu_datap->cpu_id = cpu_id;
617
618 /* allocate and initialize other per-cpu structures */
619 if (!boot_cpu) {
620 mp_cpus_call_cpu_init(cpunum);
621 random_cpu_init(cpunum);
622 }
623
624 /* output arg */
625 *processor_out = this_cpu_datap->cpu_processor;
626
627 /* OK, try and start this CPU */
628 return cpu_topology_start_cpu( cpunum );
629 }
630
631
632 void
633 ml_cpu_get_info(ml_cpu_info_t *cpu_infop)
634 {
635 boolean_t os_supports_sse;
636 i386_cpu_info_t *cpuid_infop;
637
638 if (cpu_infop == NULL) {
639 return;
640 }
641
642 /*
643 * Are we supporting MMX/SSE/SSE2/SSE3?
644 * As distinct from whether the cpu has these capabilities.
645 */
646 os_supports_sse = !!(get_cr4() & CR4_OSXMM);
647
648 if (ml_fpu_avx_enabled()) {
649 cpu_infop->vector_unit = 9;
650 } else if ((cpuid_features() & CPUID_FEATURE_SSE4_2) && os_supports_sse) {
651 cpu_infop->vector_unit = 8;
652 } else if ((cpuid_features() & CPUID_FEATURE_SSE4_1) && os_supports_sse) {
653 cpu_infop->vector_unit = 7;
654 } else if ((cpuid_features() & CPUID_FEATURE_SSSE3) && os_supports_sse) {
655 cpu_infop->vector_unit = 6;
656 } else if ((cpuid_features() & CPUID_FEATURE_SSE3) && os_supports_sse) {
657 cpu_infop->vector_unit = 5;
658 } else if ((cpuid_features() & CPUID_FEATURE_SSE2) && os_supports_sse) {
659 cpu_infop->vector_unit = 4;
660 } else if ((cpuid_features() & CPUID_FEATURE_SSE) && os_supports_sse) {
661 cpu_infop->vector_unit = 3;
662 } else if (cpuid_features() & CPUID_FEATURE_MMX) {
663 cpu_infop->vector_unit = 2;
664 } else {
665 cpu_infop->vector_unit = 0;
666 }
667
668 cpuid_infop = cpuid_info();
669
670 cpu_infop->cache_line_size = cpuid_infop->cache_linesize;
671
672 cpu_infop->l1_icache_size = cpuid_infop->cache_size[L1I];
673 cpu_infop->l1_dcache_size = cpuid_infop->cache_size[L1D];
674
675 if (cpuid_infop->cache_size[L2U] > 0) {
676 cpu_infop->l2_settings = 1;
677 cpu_infop->l2_cache_size = cpuid_infop->cache_size[L2U];
678 } else {
679 cpu_infop->l2_settings = 0;
680 cpu_infop->l2_cache_size = 0xFFFFFFFF;
681 }
682
683 if (cpuid_infop->cache_size[L3U] > 0) {
684 cpu_infop->l3_settings = 1;
685 cpu_infop->l3_cache_size = cpuid_infop->cache_size[L3U];
686 } else {
687 cpu_infop->l3_settings = 0;
688 cpu_infop->l3_cache_size = 0xFFFFFFFF;
689 }
690 }
691
692 int
693 ml_early_cpu_max_number(void)
694 {
695 int n = max_ncpus;
696
697 assert(startup_phase >= STARTUP_SUB_TUNABLES);
698 if (max_cpus_from_firmware) {
699 n = MIN(n, max_cpus_from_firmware);
700 }
701 return n - 1;
702 }
703
704 void
705 ml_set_max_cpus(unsigned int max_cpus)
706 {
707 lck_mtx_lock(&max_cpus_lock);
708 if (max_cpus_initialized != MAX_CPUS_SET) {
709 if (max_cpus > 0 && max_cpus <= MAX_CPUS) {
710 /*
711 * Note: max_cpus is the number of enabled processors
712 * that ACPI found; max_ncpus is the maximum number
713 * that the kernel supports or that the "cpus="
714 * boot-arg has set. Here we take int minimum.
715 */
716 machine_info.max_cpus = (integer_t)MIN(max_cpus, max_ncpus);
717 }
718 if (max_cpus_initialized == MAX_CPUS_WAIT) {
719 thread_wakeup((event_t) &max_cpus_initialized);
720 }
721 max_cpus_initialized = MAX_CPUS_SET;
722 }
723 lck_mtx_unlock(&max_cpus_lock);
724 }
725
726 unsigned int
727 ml_wait_max_cpus(void)
728 {
729 lck_mtx_lock(&max_cpus_lock);
730 while (max_cpus_initialized != MAX_CPUS_SET) {
731 max_cpus_initialized = MAX_CPUS_WAIT;
732 lck_mtx_sleep(&max_cpus_lock, LCK_SLEEP_DEFAULT, &max_cpus_initialized, THREAD_UNINT);
733 }
734 lck_mtx_unlock(&max_cpus_lock);
735 return machine_info.max_cpus;
736 }
737
738 void
739 ml_panic_trap_to_debugger(__unused const char *panic_format_str,
740 __unused va_list *panic_args,
741 __unused unsigned int reason,
742 __unused void *ctx,
743 __unused uint64_t panic_options_mask,
744 __unused unsigned long panic_caller)
745 {
746 return;
747 }
748
749 static uint64_t
750 virtual_timeout_inflate64(unsigned int vti, uint64_t timeout, uint64_t max_timeout)
751 {
752 if (vti >= 64) {
753 return max_timeout;
754 }
755
756 if ((timeout << vti) >> vti != timeout) {
757 return max_timeout;
758 }
759
760 if ((timeout << vti) > max_timeout) {
761 return max_timeout;
762 }
763
764 return timeout << vti;
765 }
766
767 static uint32_t
768 virtual_timeout_inflate32(unsigned int vti, uint32_t timeout, uint32_t max_timeout)
769 {
770 if (vti >= 32) {
771 return max_timeout;
772 }
773
774 if ((timeout << vti) >> vti != timeout) {
775 return max_timeout;
776 }
777
778 return timeout << vti;
779 }
780
781 /*
782 * Some timeouts are later adjusted or used in calculations setting
783 * other values. In order to avoid overflow, cap the max timeout as
784 * 2^47ns (~39 hours).
785 */
786 static const uint64_t max_timeout_ns = 1ULL << 47;
787
788 /*
789 * Inflate a timeout in absolutetime.
790 */
791 static uint64_t
792 virtual_timeout_inflate_abs(unsigned int vti, uint64_t timeout)
793 {
794 uint64_t max_timeout;
795 nanoseconds_to_absolutetime(max_timeout_ns, &max_timeout);
796 return virtual_timeout_inflate64(vti, timeout, max_timeout);
797 }
798
799 /*
800 * Inflate a value in TSC ticks.
801 */
802 static uint64_t
803 virtual_timeout_inflate_tsc(unsigned int vti, uint64_t timeout)
804 {
805 const uint64_t max_timeout = tmrCvt(max_timeout_ns, tscFCvtn2t);
806 return virtual_timeout_inflate64(vti, timeout, max_timeout);
807 }
808
809 /*
810 * Inflate a timeout in microseconds.
811 */
812 static uint32_t
813 virtual_timeout_inflate_us(unsigned int vti, uint64_t timeout)
814 {
815 const uint32_t max_timeout = ~0;
816 return virtual_timeout_inflate32(vti, timeout, max_timeout);
817 }
818
819 /*
820 * Routine: ml_init_lock_timeout
821 * Function:
822 */
823 void
824 ml_init_lock_timeout(void)
825 {
826 uint64_t abstime;
827 uint32_t mtxspin;
828 #if DEVELOPMENT || DEBUG
829 uint64_t default_timeout_ns = NSEC_PER_SEC >> 2;
830 #else
831 uint64_t default_timeout_ns = NSEC_PER_SEC >> 1;
832 #endif
833 uint32_t slto;
834 uint32_t prt;
835
836 if (PE_parse_boot_argn("slto_us", &slto, sizeof(slto))) {
837 default_timeout_ns = slto * NSEC_PER_USEC;
838 }
839
840 /*
841 * LockTimeOut is absolutetime, LockTimeOutTSC is in TSC ticks,
842 * and LockTimeOutUsec is in microseconds and it's 32-bits.
843 */
844 LockTimeOutUsec = (uint32_t) (default_timeout_ns / NSEC_PER_USEC);
845 nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
846 LockTimeOut = abstime;
847 LockTimeOutTSC = tmrCvt(abstime, tscFCvtn2t);
848
849 /*
850 * TLBTimeOut dictates the TLB flush timeout period. It defaults to
851 * LockTimeOut but can be overriden separately. In particular, a
852 * zero value inhibits the timeout-panic and cuts a trace evnt instead
853 * - see pmap_flush_tlbs().
854 */
855 if (PE_parse_boot_argn("tlbto_us", &slto, sizeof(slto))) {
856 default_timeout_ns = slto * NSEC_PER_USEC;
857 nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
858 TLBTimeOut = (uint32_t) abstime;
859 } else {
860 TLBTimeOut = LockTimeOut;
861 }
862
863 #if DEVELOPMENT || DEBUG
864 reportphyreaddelayabs = LockTimeOut >> 1;
865 #endif
866 if (PE_parse_boot_argn("phyreadmaxus", &slto, sizeof(slto))) {
867 default_timeout_ns = slto * NSEC_PER_USEC;
868 nanoseconds_to_absolutetime(default_timeout_ns, &abstime);
869 reportphyreaddelayabs = abstime;
870 }
871
872 if (PE_parse_boot_argn("phywritemaxus", &slto, sizeof(slto))) {
873 nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
874 reportphywritedelayabs = abstime;
875 }
876
877 if (PE_parse_boot_argn("tracephyreadus", &slto, sizeof(slto))) {
878 nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
879 tracephyreaddelayabs = abstime;
880 }
881
882 if (PE_parse_boot_argn("tracephywriteus", &slto, sizeof(slto))) {
883 nanoseconds_to_absolutetime((uint64_t)slto * NSEC_PER_USEC, &abstime);
884 tracephywritedelayabs = abstime;
885 }
886
887 if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof(mtxspin))) {
888 if (mtxspin > USEC_PER_SEC >> 4) {
889 mtxspin = USEC_PER_SEC >> 4;
890 }
891 nanoseconds_to_absolutetime(mtxspin * NSEC_PER_USEC, &abstime);
892 } else {
893 nanoseconds_to_absolutetime(10 * NSEC_PER_USEC, &abstime);
894 }
895 MutexSpin = (unsigned int)abstime;
896 low_MutexSpin = MutexSpin;
897 /*
898 * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
899 * real_ncpus is not set at this time
900 */
901 high_MutexSpin = -1;
902
903 nanoseconds_to_absolutetime(4ULL * NSEC_PER_SEC, &LastDebuggerEntryAllowance);
904 if (PE_parse_boot_argn("panic_restart_timeout", &prt, sizeof(prt))) {
905 nanoseconds_to_absolutetime(prt * NSEC_PER_SEC, &panic_restart_timeout);
906 }
907
908 virtualized = ((cpuid_features() & CPUID_FEATURE_VMM) != 0);
909 if (virtualized) {
910 unsigned int vti;
911
912 if (!PE_parse_boot_argn("vti", &vti, sizeof(vti))) {
913 vti = 6;
914 }
915 printf("Timeouts adjusted for virtualization (<<%d)\n", vti);
916 kprintf("Timeouts adjusted for virtualization (<<%d):\n", vti);
917 #define VIRTUAL_TIMEOUT_INFLATE_ABS(_timeout) \
918 MACRO_BEGIN \
919 kprintf("%24s: 0x%016llx ", #_timeout, _timeout); \
920 _timeout = virtual_timeout_inflate_abs(vti, _timeout); \
921 kprintf("-> 0x%016llx\n", _timeout); \
922 MACRO_END
923
924 #define VIRTUAL_TIMEOUT_INFLATE_TSC(_timeout) \
925 MACRO_BEGIN \
926 kprintf("%24s: 0x%016llx ", #_timeout, _timeout); \
927 _timeout = virtual_timeout_inflate_tsc(vti, _timeout); \
928 kprintf("-> 0x%016llx\n", _timeout); \
929 MACRO_END
930 #define VIRTUAL_TIMEOUT_INFLATE_US(_timeout) \
931 MACRO_BEGIN \
932 kprintf("%24s: 0x%08x ", #_timeout, _timeout); \
933 _timeout = virtual_timeout_inflate_us(vti, _timeout); \
934 kprintf("-> 0x%08x\n", _timeout); \
935 MACRO_END
936 VIRTUAL_TIMEOUT_INFLATE_US(LockTimeOutUsec);
937 VIRTUAL_TIMEOUT_INFLATE_ABS(LockTimeOut);
938 VIRTUAL_TIMEOUT_INFLATE_TSC(LockTimeOutTSC);
939 VIRTUAL_TIMEOUT_INFLATE_ABS(TLBTimeOut);
940 VIRTUAL_TIMEOUT_INFLATE_ABS(MutexSpin);
941 VIRTUAL_TIMEOUT_INFLATE_ABS(low_MutexSpin);
942 VIRTUAL_TIMEOUT_INFLATE_ABS(reportphyreaddelayabs);
943 }
944
945 interrupt_latency_tracker_setup();
946 simple_lock_init(&ml_timer_evaluation_slock, 0);
947 }
948
949 /*
950 * Threshold above which we should attempt to block
951 * instead of spinning for clock_delay_until().
952 */
953
954 void
955 ml_init_delay_spin_threshold(int threshold_us)
956 {
957 nanoseconds_to_absolutetime(threshold_us * NSEC_PER_USEC, &delay_spin_threshold);
958 }
959
960 boolean_t
961 ml_delay_should_spin(uint64_t interval)
962 {
963 return (interval < delay_spin_threshold) ? TRUE : FALSE;
964 }
965
966 TUNABLE(uint32_t, yield_delay_us, "yield_delay_us", 0);
967
968 void
969 ml_delay_on_yield(void)
970 {
971 #if DEVELOPMENT || DEBUG
972 if (yield_delay_us) {
973 delay(yield_delay_us);
974 }
975 #endif
976 }
977
978 /*
979 * This is called from the machine-independent layer
980 * to perform machine-dependent info updates. Defer to cpu_thread_init().
981 */
982 void
983 ml_cpu_up(void)
984 {
985 return;
986 }
987
988 /*
989 * This is called from the machine-independent layer
990 * to perform machine-dependent info updates.
991 */
992 void
993 ml_cpu_down(void)
994 {
995 i386_deactivate_cpu();
996
997 return;
998 }
999
1000 /*
1001 * The following are required for parts of the kernel
1002 * that cannot resolve these functions as inlines:
1003 */
1004 extern thread_t current_act(void) __attribute__((const));
1005 thread_t
1006 current_act(void)
1007 {
1008 return current_thread_fast();
1009 }
1010
1011 #undef current_thread
1012 extern thread_t current_thread(void) __attribute__((const));
1013 thread_t
1014 current_thread(void)
1015 {
1016 return current_thread_fast();
1017 }
1018
1019
1020 boolean_t
1021 ml_is64bit(void)
1022 {
1023 return cpu_mode_is64bit();
1024 }
1025
1026
1027 boolean_t
1028 ml_thread_is64bit(thread_t thread)
1029 {
1030 return thread_is_64bit_addr(thread);
1031 }
1032
1033
1034 boolean_t
1035 ml_state_is64bit(void *saved_state)
1036 {
1037 return is_saved_state64(saved_state);
1038 }
1039
1040 void
1041 ml_cpu_set_ldt(int selector)
1042 {
1043 /*
1044 * Avoid loading the LDT
1045 * if we're setting the KERNEL LDT and it's already set.
1046 */
1047 if (selector == KERNEL_LDT &&
1048 current_cpu_datap()->cpu_ldt == KERNEL_LDT) {
1049 return;
1050 }
1051
1052 lldt(selector);
1053 current_cpu_datap()->cpu_ldt = selector;
1054 }
1055
1056 void
1057 ml_fp_setvalid(boolean_t value)
1058 {
1059 fp_setvalid(value);
1060 }
1061
1062 uint64_t
1063 ml_cpu_int_event_time(void)
1064 {
1065 return current_cpu_datap()->cpu_int_event_time;
1066 }
1067
1068 vm_offset_t
1069 ml_stack_remaining(void)
1070 {
1071 uintptr_t local = (uintptr_t) &local;
1072
1073 if (ml_at_interrupt_context() != 0) {
1074 return local - (current_cpu_datap()->cpu_int_stack_top - INTSTACK_SIZE);
1075 } else {
1076 return local - current_thread()->kernel_stack;
1077 }
1078 }
1079
1080 #if KASAN
1081 vm_offset_t ml_stack_base(void);
1082 vm_size_t ml_stack_size(void);
1083
1084 vm_offset_t
1085 ml_stack_base(void)
1086 {
1087 if (ml_at_interrupt_context()) {
1088 return current_cpu_datap()->cpu_int_stack_top - INTSTACK_SIZE;
1089 } else {
1090 return current_thread()->kernel_stack;
1091 }
1092 }
1093
1094 vm_size_t
1095 ml_stack_size(void)
1096 {
1097 if (ml_at_interrupt_context()) {
1098 return INTSTACK_SIZE;
1099 } else {
1100 return kernel_stack_size;
1101 }
1102 }
1103 #endif
1104
1105 void
1106 kernel_preempt_check(void)
1107 {
1108 boolean_t intr;
1109 unsigned long flags;
1110
1111 assert(get_preemption_level() == 0);
1112
1113 if (__improbable(*ast_pending() & AST_URGENT)) {
1114 /*
1115 * can handle interrupts and preemptions
1116 * at this point
1117 */
1118 __asm__ volatile ("pushf; pop %0" : "=r" (flags));
1119
1120 intr = ((flags & EFL_IF) != 0);
1121
1122 /*
1123 * now cause the PRE-EMPTION trap
1124 */
1125 if (intr == TRUE) {
1126 __asm__ volatile ("int %0" :: "N" (T_PREEMPT));
1127 }
1128 }
1129 }
1130
1131 boolean_t
1132 machine_timeout_suspended(void)
1133 {
1134 return pmap_tlb_flush_timeout || spinlock_timed_out || panic_active() || mp_recent_debugger_activity() || ml_recent_wake();
1135 }
1136
1137 /* Eagerly evaluate all pending timer and thread callouts
1138 */
1139 void
1140 ml_timer_evaluate(void)
1141 {
1142 KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN | DBG_FUNC_START, 0, 0, 0, 0, 0);
1143
1144 uint64_t te_end, te_start = mach_absolute_time();
1145 simple_lock(&ml_timer_evaluation_slock, LCK_GRP_NULL);
1146 ml_timer_evaluation_in_progress = TRUE;
1147 thread_call_delayed_timer_rescan_all();
1148 mp_cpus_call(CPUMASK_ALL, ASYNC, timer_queue_expire_rescan, NULL);
1149 ml_timer_evaluation_in_progress = FALSE;
1150 ml_timer_eager_evaluations++;
1151 te_end = mach_absolute_time();
1152 ml_timer_eager_evaluation_max = MAX(ml_timer_eager_evaluation_max, (te_end - te_start));
1153 simple_unlock(&ml_timer_evaluation_slock);
1154
1155 KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN | DBG_FUNC_END, 0, 0, 0, 0, 0);
1156 }
1157
1158 boolean_t
1159 ml_timer_forced_evaluation(void)
1160 {
1161 return ml_timer_evaluation_in_progress;
1162 }
1163
1164 /* 32-bit right-rotate n bits */
1165 static inline uint32_t
1166 ror32(uint32_t val, const unsigned int n)
1167 {
1168 __asm__ volatile ("rorl %%cl,%0" : "=r" (val) : "0" (val), "c" (n));
1169 return val;
1170 }
1171
1172 void
1173 ml_entropy_collect(void)
1174 {
1175 uint32_t tsc_lo, tsc_hi;
1176 uint32_t *ep;
1177
1178 assert(cpu_number() == master_cpu);
1179
1180 /* update buffer pointer cyclically */
1181 ep = EntropyData.buffer + (EntropyData.sample_count & EntropyData.buffer_index_mask);
1182 EntropyData.sample_count += 1;
1183
1184 rdtsc_nofence(tsc_lo, tsc_hi);
1185 *ep = (ror32(*ep, 9) & EntropyData.ror_mask) ^ tsc_lo;
1186 }
1187
1188 uint64_t
1189 ml_energy_stat(__unused thread_t t)
1190 {
1191 return 0;
1192 }
1193
1194 void
1195 ml_gpu_stat_update(uint64_t gpu_ns_delta)
1196 {
1197 current_thread()->machine.thread_gpu_ns += gpu_ns_delta;
1198 }
1199
1200 uint64_t
1201 ml_gpu_stat(thread_t t)
1202 {
1203 return t->machine.thread_gpu_ns;
1204 }
1205
1206 int plctrace_enabled = 0;
1207
1208 void
1209 _disable_preemption(void)
1210 {
1211 disable_preemption_internal();
1212 }
1213
1214 void
1215 _enable_preemption(void)
1216 {
1217 enable_preemption_internal();
1218 }
1219
1220 void
1221 plctrace_disable(void)
1222 {
1223 plctrace_enabled = 0;
1224 }
1225
1226 static boolean_t ml_quiescing;
1227
1228 void
1229 ml_set_is_quiescing(boolean_t quiescing)
1230 {
1231 ml_quiescing = quiescing;
1232 }
1233
1234 boolean_t
1235 ml_is_quiescing(void)
1236 {
1237 return ml_quiescing;
1238 }
1239
1240 uint64_t
1241 ml_get_booter_memory_size(void)
1242 {
1243 return 0;
1244 }
1245
1246 void
1247 machine_lockdown(void)
1248 {
1249 x86_64_protect_data_const();
1250 }
1251
1252 bool
1253 ml_cpu_can_exit(__unused int cpu_id)
1254 {
1255 return true;
1256 }
1257
1258 void
1259 ml_cpu_init_state(void)
1260 {
1261 }
1262
1263 void
1264 ml_cpu_begin_state_transition(__unused int cpu_id)
1265 {
1266 }
1267
1268 void
1269 ml_cpu_end_state_transition(__unused int cpu_id)
1270 {
1271 }
1272
1273 void
1274 ml_cpu_begin_loop(void)
1275 {
1276 }
1277
1278 void
1279 ml_cpu_end_loop(void)
1280 {
1281 }
1282
1283 size_t
1284 ml_get_vm_reserved_regions(bool vm_is64bit, struct vm_reserved_region **regions)
1285 {
1286 #pragma unused(vm_is64bit)
1287 assert(regions != NULL);
1288
1289 *regions = NULL;
1290 return 0;
1291 }