*/
#include <i386/asm.h>
-#include <i386/rtclock.h>
+#include <i386/rtclock_asm.h>
#include <i386/proc_reg.h>
#include <i386/eflags.h>
#include <i386/postcode.h>
#include <i386/apic.h>
+#include <i386/vmx/vmx_asm.h>
#include <assym.s>
/*
** ml_get_timebase()
**
-** Entry - %rdi contains pointer to 64 bit structure.
-**
-** Exit - 64 bit structure filled in.
+** Returns TSC in RAX
**
*/
ENTRY(ml_get_timebase)
lfence
shlq $32,%rdx
orq %rdx,%rax
- movq %rax, (%rdi)
ret
*
*/
ENTRY(tmrCvt)
+ cmpq $1,%rsi /* check for unity fastpath */
+ je 1f
movq %rdi,%rax
mulq %rsi /* result is %rdx:%rax */
shrdq $32,%rdx,%rax /* %rdx:%rax >>= 32 */
ret
-
-
-/*
- * void _rtc_nanotime_store(
- * uint64_t tsc, // %rdi
- * uint64_t nsec, // %rsi
- * uint32_t scale, // %rdx
- * uint32_t shift, // %rcx
- * rtc_nanotime_t *dst); // %r8
- */
-ENTRY(_rtc_nanotime_store)
- movl RNT_GENERATION(%r8),%eax /* get current generation */
- movl $0,RNT_GENERATION(%r8) /* flag data as being updated */
- movq %rdi,RNT_TSC_BASE(%r8)
- movq %rsi,RNT_NS_BASE(%r8)
- movl %edx,RNT_SCALE(%r8)
- movl %ecx,RNT_SHIFT(%r8)
-
- incl %eax /* next generation */
- jnz 1f
- incl %eax /* skip 0, which is a flag */
-1: movl %eax,RNT_GENERATION(%r8) /* update generation */
-
+1:
+ mov %rdi,%rax
ret
-/*
+ /*
* void _rtc_nanotime_adjust(
* uint64_t tsc_base_delta, // %rdi
* rtc_nanotime_t *dst); // %rsi
ret
/*
- * unint64_t _rtc_nanotime_read(rtc_nanotime_t *rntp, int slow);
+ * uint64_t _rtc_nanotime_read(rtc_nanotime_t *rntp);
*
* This is the same as the commpage nanotime routine, except that it uses the
* kernel internal "rtc_nanotime_info" data instead of the commpage data.
* These two copies of data are kept in sync by rtc_clock_napped().
*
- * Warning! There is another copy of this code in osfmk/x86_64/idt64.s.
- * These are kept in sync by both using the RTC_NANOTIME_READ() macro.
+ * Warning! There are several copies of this code in the trampolines found in
+ * osfmk/x86_64/idt64.s, coming from the various TIMER macros in rtclock_asm.h.
+ * They're all kept in sync by using the RTC_NANOTIME_READ() macro.
+ *
+ * The algorithm we use is:
*
- * There are two versions of this algorithm, for "slow" and "fast" processors.
- * The more common "fast" algorithm is:
+ * ns = ((((rdtsc - rnt_tsc_base)<<rnt_shift)*rnt_tsc_scale) / 2**32) + rnt_ns_base;
*
- * ns = (((rdtsc - rnt_tsc_base)*rnt_tsc_scale) / 2**32) + rnt_ns_base;
+ * rnt_shift, a constant computed during initialization, is the smallest value for which:
*
- * Of course, the divide by 2**32 is a nop. rnt_tsc_scale is a constant
- * computed during initialization:
+ * (tscFreq << rnt_shift) > SLOW_TSC_THRESHOLD
*
- * rnt_tsc_scale = (10e9 * 2**32) / tscFreq;
+ * Where SLOW_TSC_THRESHOLD is about 10e9. Since most processor's tscFreqs are greater
+ * than 1GHz, rnt_shift is usually 0. rnt_tsc_scale is also a 32-bit constant:
*
- * The "slow" algorithm uses long division:
+ * rnt_tsc_scale = (10e9 * 2**32) / (tscFreq << rnt_shift);
*
- * ns = (((rdtsc - rnt_tsc_base) * 10e9) / tscFreq) + rnt_ns_base;
+ * On 64-bit processors this algorithm could be simplified by doing a 64x64 bit
+ * multiply of rdtsc by tscFCvtt2n:
+ *
+ * ns = (((rdtsc - rnt_tsc_base) * tscFCvtt2n) / 2**32) + rnt_ns_base;
+ *
+ * We don't do so in order to use the same algorithm in 32- and 64-bit mode.
+ * When U32 goes away, we should reconsider.
*
* Since this routine is not synchronized and can be called in any context,
* we use a generation count to guard against seeing partially updated data.
* the generation is zero.
*
* unint64_t _rtc_nanotime_read(
- * rtc_nanotime_t *rntp, // %rdi
- * int slow); // %rsi
+ * rtc_nanotime_t *rntp); // %rdi
*
*/
ENTRY(_rtc_nanotime_read)
- test %rsi,%rsi
- jnz Lslow
-
- /*
- * Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD
- */
- RTC_NANOTIME_READ_FAST()
+ PAL_RTC_NANOTIME_READ_FAST()
+
+ ret
+
+/*
+ * extern uint64_t _rtc_tsc_to_nanoseconds(
+ * uint64_t value, // %rdi
+ * pal_rtc_nanotime_t *rntp); // %rsi
+ *
+ * Converts TSC units to nanoseconds, using an abbreviated form of the above
+ * algorithm. Note that while we could have simply used tmrCvt(value,tscFCvtt2n),
+ * which would avoid the need for this asm, doing so is a bit more risky since
+ * we'd be using a different algorithm with possibly different rounding etc.
+ */
+
+ENTRY(_rtc_tsc_to_nanoseconds)
+ movq %rdi,%rax /* copy value (in TSC units) to convert */
+ movl RNT_SHIFT(%rsi),%ecx
+ movl RNT_SCALE(%rsi),%edx
+ shlq %cl,%rax /* tscUnits << shift */
+ mulq %rdx /* (tscUnits << shift) * scale */
+ shrdq $32,%rdx,%rax /* %rdx:%rax >>= 32 */
ret
+
- /*
- * Processor whose TSC frequency is not faster than SLOW_TSC_THRESHOLD
- * But K64 doesn't support this...
- */
-Lslow:
- lea 1f(%rip),%rdi
- xorb %al,%al
- call EXT(panic)
- hlt
- .data
-1: String "_rtc_nanotime_read() - slow algorithm not supported"
+/*
+ * typedef void (*thread_continue_t)(void *param, wait_result_t)
+ *
+ * void call_continuation( thread_continue_t continuation,
+ * void *param,
+ * wait_result_t wresult,
+ * bool enable interrupts)
+ */
+Entry(call_continuation)
+
+ movq %rdi, %r12 /* continuation */
+ movq %rsi, %r13 /* continuation param */
+ movq %rdx, %r14 /* wait result */
+
+ movq %gs:CPU_KERNEL_STACK,%rsp /* set the stack */
+ xorq %rbp,%rbp /* zero frame pointer */
+
+ test %ecx, %ecx
+ jz 1f
+ mov $1, %edi
+ call _ml_set_interrupts_enabled
+1:
+
+ movq %r12,%rcx /* continuation */
+ movq %r13,%rdi /* continuation param */
+ movq %r14,%rsi /* wait result */
+
+ call *%rcx /* call continuation */
+ movq %gs:CPU_ACTIVE_THREAD,%rdi
+ call EXT(thread_terminate)
+
+
+Entry(x86_init_wrapper)
+ xor %rbp, %rbp
+ movq %rsi, %rsp
+ callq *%rdi
+
+#if CONFIG_VMX
+
+/*
+ * __vmxon -- Enter VMX Operation
+ * int __vmxon(addr64_t v);
+ */
+Entry(__vmxon)
+ FRAME
+ push %rdi
+
+ mov $(VMX_FAIL_INVALID), %ecx
+ mov $(VMX_FAIL_VALID), %edx
+ mov $(VMX_SUCCEED), %eax
+ vmxon (%rsp)
+ cmovcl %ecx, %eax /* CF = 1, ZF = 0 */
+ cmovzl %edx, %eax /* CF = 0, ZF = 1 */
+
+ pop %rdi
+ EMARF
+ ret
+
+/*
+ * __vmxoff -- Leave VMX Operation
+ * int __vmxoff(void);
+ */
+Entry(__vmxoff)
+ FRAME
+
+ mov $(VMX_FAIL_INVALID), %ecx
+ mov $(VMX_FAIL_VALID), %edx
+ mov $(VMX_SUCCEED), %eax
+ vmxoff
+ cmovcl %ecx, %eax /* CF = 1, ZF = 0 */
+ cmovzl %edx, %eax /* CF = 0, ZF = 1 */
+
+ EMARF
+ ret
+
+#endif /* CONFIG_VMX */
+
+/*
+ * mfence -- Memory Barrier
+ * Use out-of-line assembly to get
+ * standard x86-64 ABI guarantees
+ * about what the caller's codegen
+ * has in registers vs. memory
+ */
+Entry(do_mfence)
+ mfence
+ ret