xnu-3248.40.184.tar.gz

[apple/xnu.git] / osfmk / x86_64 / machine_routines_asm.s
diff --git a/osfmk/x86_64/machine_routines_asm.s b/osfmk/x86_64/machine_routines_asm.s

index 3628875868bad43f22a33de71f2e6af3ac828b8f..af962f2f44238dc46b88b3fe701941950163045e 100644 (file)
--- a/osfmk/x86_64/machine_routines_asm.s
+++ b/osfmk/x86_64/machine_routines_asm.s
@@ -39,9 +39,7 @@
  /*
  **      ml_get_timebase()
  **
-**      Entry   - %rdi contains pointer to 64 bit structure.
-**
-**      Exit    - 64 bit structure filled in.
+**      Returns TSC in RAX
  **
  */
  ENTRY(ml_get_timebase)
@@ -51,7 +49,6 @@ ENTRY(ml_get_timebase)
         lfence
          shlq   $32,%rdx 
          orq    %rdx,%rax
-       movq    %rax, (%rdi)
                         
         ret
  
@@ -81,10 +78,15 @@ ENTRY(ml_get_timebase)
   *
   */
  ENTRY(tmrCvt)
+       cmpq    $1,%rsi                         /* check for unity fastpath */
+       je      1f
         movq    %rdi,%rax
         mulq    %rsi                            /* result is %rdx:%rax */
         shrdq   $32,%rdx,%rax                   /* %rdx:%rax >>= 32 */
         ret
+1:
+       mov     %rdi,%rax
+       ret
  
   /*
   * void _rtc_nanotime_adjust(
@@ -104,28 +106,36 @@ ENTRY(_rtc_nanotime_adjust)
         ret
  
  /*
- * unint64_t _rtc_nanotime_read(rtc_nanotime_t *rntp, int slow);
+ * uint64_t _rtc_nanotime_read(rtc_nanotime_t *rntp);
   *
   * This is the same as the commpage nanotime routine, except that it uses the
   * kernel internal "rtc_nanotime_info" data instead of the commpage data.
   * These two copies of data are kept in sync by rtc_clock_napped().
   *
- * Warning!  There is another copy of this code in osfmk/x86_64/idt64.s.
- * These are kept in sync by both using the RTC_NANOTIME_READ() macro.
+ * Warning!  There are several copies of this code in the trampolines found in
+ * osfmk/x86_64/idt64.s, coming from the various TIMER macros in rtclock_asm.h.
+ * They're all kept in sync by using the RTC_NANOTIME_READ() macro.
+ *
+ * The algorithm we use is:
+ *
+ *     ns = ((((rdtsc - rnt_tsc_base)<<rnt_shift)*rnt_tsc_scale) / 2**32) + rnt_ns_base;
+ *
+ * rnt_shift, a constant computed during initialization, is the smallest value for which:
   *
- * There are two versions of this algorithm, for "slow" and "fast" processors.
- * The more common "fast" algorithm is:
+ *     (tscFreq << rnt_shift) > SLOW_TSC_THRESHOLD
   *
- *     ns = (((rdtsc - rnt_tsc_base)*rnt_tsc_scale) / 2**32) + rnt_ns_base;
+ * Where SLOW_TSC_THRESHOLD is about 10e9.  Since most processor's tscFreqs are greater
+ * than 1GHz, rnt_shift is usually 0.  rnt_tsc_scale is also a 32-bit constant:
   *
- * Of course, the divide by 2**32 is a nop.  rnt_tsc_scale is a constant
- * computed during initialization:
+ *     rnt_tsc_scale = (10e9 * 2**32) / (tscFreq << rnt_shift);
   *
- *     rnt_tsc_scale = (10e9 * 2**32) / tscFreq;
+ * On 64-bit processors this algorithm could be simplified by doing a 64x64 bit
+ * multiply of rdtsc by tscFCvtt2n:
   *
- * The "slow" algorithm uses long division:
+ *     ns = (((rdtsc - rnt_tsc_base) * tscFCvtt2n) / 2**32) + rnt_ns_base;
   *
- *     ns = (((rdtsc - rnt_tsc_base) * 10e9) / tscFreq) + rnt_ns_base;
+ * We don't do so in order to use the same algorithm in 32- and 64-bit mode.
+ * When U32 goes away, we should reconsider.
   *
   * Since this routine is not synchronized and can be called in any context, 
   * we use a generation count to guard against seeing partially updated data.
@@ -136,33 +146,36 @@ ENTRY(_rtc_nanotime_adjust)
   * the generation is zero.
   *
   * unint64_t _rtc_nanotime_read(
- *                     rtc_nanotime_t *rntp,           // %rdi
- *                     int            slow);           // %rsi
+ *                     rtc_nanotime_t *rntp);          // %rdi
   *
   */
  ENTRY(_rtc_nanotime_read)
-       test            %rsi,%rsi
-       jnz             Lslow
-               
-       /*
-        * Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD
-        */
+
         PAL_RTC_NANOTIME_READ_FAST()
  
         ret
+    
+/*
+ * extern uint64_t _rtc_tsc_to_nanoseconds(
+ *          uint64_t    value,              // %rdi
+ *          pal_rtc_nanotime_t *rntp);     // %rsi
+ *
+ * Converts TSC units to nanoseconds, using an abbreviated form of the above
+ * algorithm.  Note that while we could have simply used tmrCvt(value,tscFCvtt2n),
+ * which would avoid the need for this asm, doing so is a bit more risky since
+ * we'd be using a different algorithm with possibly different rounding etc.
+ */
  
-       /*
-        * Processor whose TSC frequency is not faster than SLOW_TSC_THRESHOLD
-        * But K64 doesn't support this...
-        */
-Lslow:
-       lea     1f(%rip),%rdi
-       xorb    %al,%al
-       call    EXT(panic)
-       hlt
-       .data
-1:     String  "_rtc_nanotime_read() - slow algorithm not supported"
-       .text
+ENTRY(_rtc_tsc_to_nanoseconds)
+       movq    %rdi,%rax                       /* copy value (in TSC units) to convert */
+       movl    RNT_SHIFT(%rsi),%ecx
+       movl    RNT_SCALE(%rsi),%edx
+       shlq    %cl,%rax                        /* tscUnits << shift */
+       mulq    %rdx                            /* (tscUnits << shift) * scale */
+       shrdq   $32,%rdx,%rax                   /* %rdx:%rax >>= 32 */
+       ret
+    
+    
  
  Entry(call_continuation)
         movq    %rdi,%rcx                       /* get continuation */
@@ -179,45 +192,6 @@ Entry(x86_init_wrapper)
         movq    %rsi, %rsp
         callq   *%rdi
  
-       /*
-       * Generate a 64-bit quantity with possibly random characteristics, intended for use
-       * before the kernel entropy pool is available. The processor's RNG is used if
-       * available, and a value derived from the Time Stamp Counter is returned if not.
-       * Multiple invocations may result in well-correlated values if sourced from the TSC.
-       */
-Entry(ml_early_random)
-       mov     %rbx, %rsi
-       mov     $1, %eax
-       cpuid
-       mov     %rsi, %rbx
-       test    $(1 << 30), %ecx
-       jz      Lnon_rdrand
-       RDRAND_RAX              /* RAX := 64 bits of DRBG entropy */
-       jnc     Lnon_rdrand
-       ret
-Lnon_rdrand:
-       rdtsc /* EDX:EAX := TSC */
-       /* Distribute low order bits */
-       mov     %eax, %ecx
-       xor     %al, %ah
-       shl     $16, %rcx
-       xor     %rcx, %rax
-       xor     %eax, %edx
-
-       /* Incorporate ASLR entropy, if any */
-       lea     (%rip), %rcx
-       shr     $21, %rcx
-       movzbl  %cl, %ecx
-       shl     $16, %ecx
-       xor     %ecx, %edx
-
-       mov     %ah, %cl
-       ror     %cl, %edx /* Right rotate EDX (TSC&0xFF ^ (TSC>>8 & 0xFF))&1F */
-       shl     $32, %rdx
-       xor     %rdx, %rax
-       mov     %cl, %al
-       ret
-       
  #if CONFIG_VMX
  
  /*
@@ -257,3 +231,14 @@ Entry(__vmxoff)
         ret
  
  #endif /* CONFIG_VMX */
+
+/*
+ *     mfence -- Memory Barrier
+ *     Use out-of-line assembly to get
+ *     standard x86-64 ABI guarantees
+ *     about what the caller's codegen
+ *     has in registers vs. memory
+ */
+Entry(do_mfence)
+       mfence
+       ret