X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/0b4c1975fb5e4eccf1012a35081f7e7799b81046..3e170ce000f1506b7b5d2c5c7faec85ceabb573d:/osfmk/x86_64/machine_routines_asm.s diff --git a/osfmk/x86_64/machine_routines_asm.s b/osfmk/x86_64/machine_routines_asm.s index f8fecaccf..af962f2f4 100644 --- a/osfmk/x86_64/machine_routines_asm.s +++ b/osfmk/x86_64/machine_routines_asm.s @@ -27,20 +27,19 @@ */ #include -#include +#include #include #include #include #include +#include #include /* ** ml_get_timebase() ** -** Entry - %rdi contains pointer to 64 bit structure. -** -** Exit - 64 bit structure filled in. +** Returns TSC in RAX ** */ ENTRY(ml_get_timebase) @@ -50,7 +49,6 @@ ENTRY(ml_get_timebase) lfence shlq $32,%rdx orq %rdx,%rax - movq %rax, (%rdi) ret @@ -80,36 +78,17 @@ ENTRY(ml_get_timebase) * */ ENTRY(tmrCvt) + cmpq $1,%rsi /* check for unity fastpath */ + je 1f movq %rdi,%rax mulq %rsi /* result is %rdx:%rax */ shrdq $32,%rdx,%rax /* %rdx:%rax >>= 32 */ ret - - -/* - * void _rtc_nanotime_store( - * uint64_t tsc, // %rdi - * uint64_t nsec, // %rsi - * uint32_t scale, // %rdx - * uint32_t shift, // %rcx - * rtc_nanotime_t *dst); // %r8 - */ -ENTRY(_rtc_nanotime_store) - movl RNT_GENERATION(%r8),%eax /* get current generation */ - movl $0,RNT_GENERATION(%r8) /* flag data as being updated */ - movq %rdi,RNT_TSC_BASE(%r8) - movq %rsi,RNT_NS_BASE(%r8) - movl %edx,RNT_SCALE(%r8) - movl %ecx,RNT_SHIFT(%r8) - - incl %eax /* next generation */ - jnz 1f - incl %eax /* skip 0, which is a flag */ -1: movl %eax,RNT_GENERATION(%r8) /* update generation */ - +1: + mov %rdi,%rax ret -/* + /* * void _rtc_nanotime_adjust( * uint64_t tsc_base_delta, // %rdi * rtc_nanotime_t *dst); // %rsi @@ -127,28 +106,36 @@ ENTRY(_rtc_nanotime_adjust) ret /* - * unint64_t _rtc_nanotime_read(rtc_nanotime_t *rntp, int slow); + * uint64_t _rtc_nanotime_read(rtc_nanotime_t *rntp); * * This is the same as the commpage nanotime routine, except that it uses the * kernel internal "rtc_nanotime_info" data instead of the commpage data. * These two copies of data are kept in sync by rtc_clock_napped(). * - * Warning! There is another copy of this code in osfmk/x86_64/idt64.s. - * These are kept in sync by both using the RTC_NANOTIME_READ() macro. + * Warning! There are several copies of this code in the trampolines found in + * osfmk/x86_64/idt64.s, coming from the various TIMER macros in rtclock_asm.h. + * They're all kept in sync by using the RTC_NANOTIME_READ() macro. + * + * The algorithm we use is: + * + * ns = ((((rdtsc - rnt_tsc_base)< SLOW_TSC_THRESHOLD * - * ns = (((rdtsc - rnt_tsc_base)*rnt_tsc_scale) / 2**32) + rnt_ns_base; + * Where SLOW_TSC_THRESHOLD is about 10e9. Since most processor's tscFreqs are greater + * than 1GHz, rnt_shift is usually 0. rnt_tsc_scale is also a 32-bit constant: * - * Of course, the divide by 2**32 is a nop. rnt_tsc_scale is a constant - * computed during initialization: + * rnt_tsc_scale = (10e9 * 2**32) / (tscFreq << rnt_shift); * - * rnt_tsc_scale = (10e9 * 2**32) / tscFreq; + * On 64-bit processors this algorithm could be simplified by doing a 64x64 bit + * multiply of rdtsc by tscFCvtt2n: * - * The "slow" algorithm uses long division: + * ns = (((rdtsc - rnt_tsc_base) * tscFCvtt2n) / 2**32) + rnt_ns_base; * - * ns = (((rdtsc - rnt_tsc_base) * 10e9) / tscFreq) + rnt_ns_base; + * We don't do so in order to use the same algorithm in 32- and 64-bit mode. + * When U32 goes away, we should reconsider. * * Since this routine is not synchronized and can be called in any context, * we use a generation count to guard against seeing partially updated data. @@ -159,30 +146,99 @@ ENTRY(_rtc_nanotime_adjust) * the generation is zero. * * unint64_t _rtc_nanotime_read( - * rtc_nanotime_t *rntp, // %rdi - * int slow); // %rsi + * rtc_nanotime_t *rntp); // %rdi * */ ENTRY(_rtc_nanotime_read) - test %rsi,%rsi - jnz Lslow - - /* - * Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD - */ - RTC_NANOTIME_READ_FAST() + PAL_RTC_NANOTIME_READ_FAST() + + ret + +/* + * extern uint64_t _rtc_tsc_to_nanoseconds( + * uint64_t value, // %rdi + * pal_rtc_nanotime_t *rntp); // %rsi + * + * Converts TSC units to nanoseconds, using an abbreviated form of the above + * algorithm. Note that while we could have simply used tmrCvt(value,tscFCvtt2n), + * which would avoid the need for this asm, doing so is a bit more risky since + * we'd be using a different algorithm with possibly different rounding etc. + */ + +ENTRY(_rtc_tsc_to_nanoseconds) + movq %rdi,%rax /* copy value (in TSC units) to convert */ + movl RNT_SHIFT(%rsi),%ecx + movl RNT_SCALE(%rsi),%edx + shlq %cl,%rax /* tscUnits << shift */ + mulq %rdx /* (tscUnits << shift) * scale */ + shrdq $32,%rdx,%rax /* %rdx:%rax >>= 32 */ + ret + + + +Entry(call_continuation) + movq %rdi,%rcx /* get continuation */ + movq %rsi,%rdi /* continuation param */ + movq %rdx,%rsi /* wait result */ + movq %gs:CPU_KERNEL_STACK,%rsp /* set the stack */ + xorq %rbp,%rbp /* zero frame pointer */ + call *%rcx /* call continuation */ + movq %gs:CPU_ACTIVE_THREAD,%rdi + call EXT(thread_terminate) + +Entry(x86_init_wrapper) + xor %rbp, %rbp + movq %rsi, %rsp + callq *%rdi + +#if CONFIG_VMX + +/* + * __vmxon -- Enter VMX Operation + * int __vmxon(addr64_t v); + */ +Entry(__vmxon) + FRAME + push %rdi + + mov $(VMX_FAIL_INVALID), %ecx + mov $(VMX_FAIL_VALID), %edx + mov $(VMX_SUCCEED), %eax + vmxon (%rsp) + cmovcl %ecx, %eax /* CF = 1, ZF = 0 */ + cmovzl %edx, %eax /* CF = 0, ZF = 1 */ + + pop %rdi + EMARF ret - /* - * Processor whose TSC frequency is not faster than SLOW_TSC_THRESHOLD - * But K64 doesn't support this... - */ -Lslow: - lea 1f(%rip),%rdi - xorb %al,%al - call EXT(panic) - hlt - .data -1: String "_rtc_nanotime_read() - slow algorithm not supported" +/* + * __vmxoff -- Leave VMX Operation + * int __vmxoff(void); + */ +Entry(__vmxoff) + FRAME + + mov $(VMX_FAIL_INVALID), %ecx + mov $(VMX_FAIL_VALID), %edx + mov $(VMX_SUCCEED), %eax + vmxoff + cmovcl %ecx, %eax /* CF = 1, ZF = 0 */ + cmovzl %edx, %eax /* CF = 0, ZF = 1 */ + + EMARF + ret +#endif /* CONFIG_VMX */ + +/* + * mfence -- Memory Barrier + * Use out-of-line assembly to get + * standard x86-64 ABI guarantees + * about what the caller's codegen + * has in registers vs. memory + */ +Entry(do_mfence) + mfence + ret