X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/0b4c1975fb5e4eccf1012a35081f7e7799b81046..3e170ce000f1506b7b5d2c5c7faec85ceabb573d:/osfmk/x86_64/machine_routines_asm.s

diff --git a/osfmk/x86_64/machine_routines_asm.s b/osfmk/x86_64/machine_routines_asm.s
index f8fecaccf..af962f2f4 100644
--- a/osfmk/x86_64/machine_routines_asm.s
+++ b/osfmk/x86_64/machine_routines_asm.s
@@ -27,20 +27,19 @@
  */
  
 #include <i386/asm.h>
-#include <i386/rtclock.h>
+#include <i386/rtclock_asm.h>
 #include <i386/proc_reg.h>
 #include <i386/eflags.h>
        
 #include <i386/postcode.h>
 #include <i386/apic.h>
+#include <i386/vmx/vmx_asm.h>
 #include <assym.s>
 
 /*
 **      ml_get_timebase()
 **
-**      Entry   - %rdi contains pointer to 64 bit structure.
-**
-**      Exit    - 64 bit structure filled in.
+**      Returns TSC in RAX
 **
 */
 ENTRY(ml_get_timebase)
@@ -50,7 +49,6 @@ ENTRY(ml_get_timebase)
 	lfence
         shlq	$32,%rdx 
         orq	%rdx,%rax
-	movq    %rax, (%rdi)
 			
 	ret
 
@@ -80,36 +78,17 @@ ENTRY(ml_get_timebase)
  *
  */
 ENTRY(tmrCvt)
+	cmpq	$1,%rsi				/* check for unity fastpath */
+	je	1f
 	movq	%rdi,%rax
 	mulq	%rsi				/* result is %rdx:%rax */
 	shrdq   $32,%rdx,%rax			/* %rdx:%rax >>= 32 */
 	ret
-
-
-/*
- * void _rtc_nanotime_store(
- *		uint64_t        tsc,		// %rdi
- *		uint64_t        nsec,		// %rsi
- *		uint32_t        scale,		// %rdx
- *		uint32_t        shift,		// %rcx
- *		rtc_nanotime_t  *dst);		// %r8
- */
-ENTRY(_rtc_nanotime_store)
-	movl	RNT_GENERATION(%r8),%eax	/* get current generation */
-	movl	$0,RNT_GENERATION(%r8)		/* flag data as being updated */
-	movq	%rdi,RNT_TSC_BASE(%r8)
-	movq	%rsi,RNT_NS_BASE(%r8)
-	movl	%edx,RNT_SCALE(%r8)
-	movl	%ecx,RNT_SHIFT(%r8)
-
-	incl	%eax				/* next generation */
-	jnz	1f
-	incl	%eax				/* skip 0, which is a flag */
-1:	movl	%eax,RNT_GENERATION(%r8)	/* update generation */
-
+1:
+	mov	%rdi,%rax
 	ret
 
-/*
+ /*
  * void _rtc_nanotime_adjust(
  *		uint64_t        tsc_base_delta,	// %rdi
  *		rtc_nanotime_t  *dst);		// %rsi
@@ -127,28 +106,36 @@ ENTRY(_rtc_nanotime_adjust)
 	ret
 
 /*
- * unint64_t _rtc_nanotime_read(rtc_nanotime_t *rntp, int slow);
+ * uint64_t _rtc_nanotime_read(rtc_nanotime_t *rntp);
  *
  * This is the same as the commpage nanotime routine, except that it uses the
  * kernel internal "rtc_nanotime_info" data instead of the commpage data.
  * These two copies of data are kept in sync by rtc_clock_napped().
  *
- * Warning!  There is another copy of this code in osfmk/x86_64/idt64.s.
- * These are kept in sync by both using the RTC_NANOTIME_READ() macro.
+ * Warning!  There are several copies of this code in the trampolines found in
+ * osfmk/x86_64/idt64.s, coming from the various TIMER macros in rtclock_asm.h.
+ * They're all kept in sync by using the RTC_NANOTIME_READ() macro.
+ *
+ * The algorithm we use is:
+ *
+ *	ns = ((((rdtsc - rnt_tsc_base)<<rnt_shift)*rnt_tsc_scale) / 2**32) + rnt_ns_base;
+ *
+ * rnt_shift, a constant computed during initialization, is the smallest value for which:
  *
- * There are two versions of this algorithm, for "slow" and "fast" processors.
- * The more common "fast" algorithm is:
+ *	(tscFreq << rnt_shift) > SLOW_TSC_THRESHOLD
  *
- *	ns = (((rdtsc - rnt_tsc_base)*rnt_tsc_scale) / 2**32) + rnt_ns_base;
+ * Where SLOW_TSC_THRESHOLD is about 10e9.  Since most processor's tscFreqs are greater
+ * than 1GHz, rnt_shift is usually 0.  rnt_tsc_scale is also a 32-bit constant:
  *
- * Of course, the divide by 2**32 is a nop.  rnt_tsc_scale is a constant
- * computed during initialization:
+ *	rnt_tsc_scale = (10e9 * 2**32) / (tscFreq << rnt_shift);
  *
- *	rnt_tsc_scale = (10e9 * 2**32) / tscFreq;
+ * On 64-bit processors this algorithm could be simplified by doing a 64x64 bit
+ * multiply of rdtsc by tscFCvtt2n:
  *
- * The "slow" algorithm uses long division:
+ *	ns = (((rdtsc - rnt_tsc_base) * tscFCvtt2n) / 2**32) + rnt_ns_base;
  *
- *	ns = (((rdtsc - rnt_tsc_base) * 10e9) / tscFreq) + rnt_ns_base;
+ * We don't do so in order to use the same algorithm in 32- and 64-bit mode.
+ * When U32 goes away, we should reconsider.
  *
  * Since this routine is not synchronized and can be called in any context, 
  * we use a generation count to guard against seeing partially updated data.
@@ -159,30 +146,99 @@ ENTRY(_rtc_nanotime_adjust)
  * the generation is zero.
  *
  * unint64_t _rtc_nanotime_read(
- *			rtc_nanotime_t *rntp,		// %rdi
- *			int            slow);		// %rsi
+ *			rtc_nanotime_t *rntp);		// %rdi
  *
  */
 ENTRY(_rtc_nanotime_read)
-	test		%rsi,%rsi
-	jnz		Lslow
-		
-	/*
-	 * Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD
-	 */
-	RTC_NANOTIME_READ_FAST()
 
+	PAL_RTC_NANOTIME_READ_FAST()
+
+	ret
+    
+/*
+ * extern uint64_t _rtc_tsc_to_nanoseconds(
+ *          uint64_t    value,              // %rdi
+ *          pal_rtc_nanotime_t	*rntp);     // %rsi
+ *
+ * Converts TSC units to nanoseconds, using an abbreviated form of the above
+ * algorithm.  Note that while we could have simply used tmrCvt(value,tscFCvtt2n),
+ * which would avoid the need for this asm, doing so is a bit more risky since
+ * we'd be using a different algorithm with possibly different rounding etc.
+ */
+
+ENTRY(_rtc_tsc_to_nanoseconds)
+	movq    %rdi,%rax			/* copy value (in TSC units) to convert */
+	movl    RNT_SHIFT(%rsi),%ecx
+	movl    RNT_SCALE(%rsi),%edx
+	shlq    %cl,%rax			/* tscUnits << shift */
+	mulq    %rdx				/* (tscUnits << shift) * scale */
+	shrdq   $32,%rdx,%rax			/* %rdx:%rax >>= 32 */
+	ret
+    
+    
+
+Entry(call_continuation)
+	movq	%rdi,%rcx			/* get continuation */
+	movq	%rsi,%rdi			/* continuation param */
+	movq	%rdx,%rsi			/* wait result */
+	movq	%gs:CPU_KERNEL_STACK,%rsp	/* set the stack */
+	xorq	%rbp,%rbp			/* zero frame pointer */
+	call	*%rcx				/* call continuation */
+	movq	%gs:CPU_ACTIVE_THREAD,%rdi
+	call	EXT(thread_terminate)
+
+Entry(x86_init_wrapper)
+	xor	%rbp, %rbp
+	movq	%rsi, %rsp
+	callq	*%rdi
+
+#if CONFIG_VMX
+
+/*
+ *	__vmxon -- Enter VMX Operation
+ *	int __vmxon(addr64_t v);
+ */
+Entry(__vmxon)
+	FRAME
+	push	%rdi
+	
+	mov	$(VMX_FAIL_INVALID), %ecx
+	mov	$(VMX_FAIL_VALID), %edx
+	mov	$(VMX_SUCCEED), %eax
+	vmxon	(%rsp)
+	cmovcl 	%ecx, %eax	/* CF = 1, ZF = 0 */
+	cmovzl	%edx, %eax	/* CF = 0, ZF = 1 */
+
+	pop	%rdi
+	EMARF
 	ret
 
-	/*
-	 * Processor whose TSC frequency is not faster than SLOW_TSC_THRESHOLD
-	 * But K64 doesn't support this...
-	 */
-Lslow:
-	lea	1f(%rip),%rdi
-	xorb	%al,%al
-	call	EXT(panic)
-	hlt
-	.data
-1: 	String	"_rtc_nanotime_read() - slow algorithm not supported"
+/*
+ *	__vmxoff -- Leave VMX Operation
+ *	int __vmxoff(void);
+ */
+Entry(__vmxoff)
+	FRAME
+	
+	mov	$(VMX_FAIL_INVALID), %ecx
+	mov	$(VMX_FAIL_VALID), %edx
+	mov	$(VMX_SUCCEED), %eax
+	vmxoff
+	cmovcl 	%ecx, %eax	/* CF = 1, ZF = 0 */
+	cmovzl	%edx, %eax	/* CF = 0, ZF = 1 */
+
+	EMARF
+	ret
 
+#endif /* CONFIG_VMX */
+
+/*
+ *	mfence -- Memory Barrier
+ *	Use out-of-line assembly to get
+ *	standard x86-64 ABI guarantees
+ *	about what the caller's codegen
+ *	has in registers vs. memory
+ */
+Entry(do_mfence)
+	mfence
+	ret