osfmk/x86_64/machine_routines_asm.s

   1 /*
   2  * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <i386/asm.h>
  30 #include <i386/rtclock_asm.h>
  31 #include <i386/proc_reg.h>
  32 #include <i386/eflags.h>
  33
  34 #include <i386/postcode.h>
  35 #include <i386/apic.h>
  36 #include <i386/vmx/vmx_asm.h>
  37 #include <assym.s>
  38
  39 /*
  40 **      ml_get_timebase()
  41 **
  42 **      Entry   - %rdi contains pointer to 64 bit structure.
  43 **
  44 **      Exit    - 64 bit structure filled in.
  45 **
  46 */
  47 ENTRY(ml_get_timebase)
  48
  49         lfence
  50         rdtsc
  51         lfence
  52         shlq    $32,%rdx
  53         orq     %rdx,%rax
  54         movq    %rax, (%rdi)
  55
  56         ret
  57
  58 /*
  59  *      Convert between various timer units
  60  *
  61  *      This code converts 64-bit time units to other units.
  62  *      For example, the TSC is converted to HPET units.
  63  *
  64  *      Time is a 64-bit integer that is some number of ticks.
  65  *      Conversion is 64-bit fixed point number which is composed
  66  *      of a 32 bit integer and a 32 bit fraction.
  67  *
  68  *      The time ticks are multiplied by the conversion factor.  The
  69  *      calculations are done as a 128-bit value but both the high
  70  *      and low words are dropped.  The high word is overflow and the
  71  *      low word is the fraction part of the result.
  72  *
  73  *      We return a 64-bit value.
  74  *
  75  *      Note that we can use this function to multiply 2 conversion factors.
  76  *      We do this in order to calculate the multiplier used to convert
  77  *      directly between any two units.
  78  *
  79  *      uint64_t tmrCvt(uint64_t time,          // %rdi
  80  *                      uint64_t conversion)    // %rsi
  81  *
  82  */
  83 ENTRY(tmrCvt)
  84         movq    %rdi,%rax
  85         mulq    %rsi                            /* result is %rdx:%rax */
  86         shrdq   $32,%rdx,%rax                   /* %rdx:%rax >>= 32 */
  87         ret
  88
  89  /*
  90  * void _rtc_nanotime_adjust(
  91  *              uint64_t        tsc_base_delta, // %rdi
  92  *              rtc_nanotime_t  *dst);          // %rsi
  93  */
  94 ENTRY(_rtc_nanotime_adjust)
  95         movl    RNT_GENERATION(%rsi),%eax       /* get current generation */
  96         movl    $0,RNT_GENERATION(%rsi)         /* flag data as being updated */
  97         addq    %rdi,RNT_TSC_BASE(%rsi)
  98
  99         incl    %eax                            /* next generation */
 100         jnz     1f
 101         incl    %eax                            /* skip 0, which is a flag */
 102 1:      movl    %eax,RNT_GENERATION(%rsi)       /* update generation */
 103
 104         ret
 105
 106 /*
 107  * uint64_t _rtc_nanotime_read(rtc_nanotime_t *rntp);
 108  *
 109  * This is the same as the commpage nanotime routine, except that it uses the
 110  * kernel internal "rtc_nanotime_info" data instead of the commpage data.
 111  * These two copies of data are kept in sync by rtc_clock_napped().
 112  *
 113  * Warning!  There are several copies of this code in the trampolines found in
 114  * osfmk/x86_64/idt64.s, coming from the various TIMER macros in rtclock_asm.h.
 115  * They're all kept in sync by using the RTC_NANOTIME_READ() macro.
 116  *
 117  * The algorithm we use is:
 118  *
 119  *      ns = ((((rdtsc - rnt_tsc_base)<<rnt_shift)*rnt_tsc_scale) / 2**32) + rnt_ns_base;
 120  *
 121  * rnt_shift, a constant computed during initialization, is the smallest value for which:
 122  *
 123  *      (tscFreq << rnt_shift) > SLOW_TSC_THRESHOLD
 124  *
 125  * Where SLOW_TSC_THRESHOLD is about 10e9.  Since most processor's tscFreqs are greater
 126  * than 1GHz, rnt_shift is usually 0.  rnt_tsc_scale is also a 32-bit constant:
 127  *
 128  *      rnt_tsc_scale = (10e9 * 2**32) / (tscFreq << rnt_shift);
 129  *
 130  * On 64-bit processors this algorithm could be simplified by doing a 64x64 bit
 131  * multiply of rdtsc by tscFCvtt2n:
 132  *
 133  *      ns = (((rdtsc - rnt_tsc_base) * tscFCvtt2n) / 2**32) + rnt_ns_base;
 134  *
 135  * We don't do so in order to use the same algorithm in 32- and 64-bit mode.
 136  * When U32 goes away, we should reconsider.
 137  *
 138  * Since this routine is not synchronized and can be called in any context,
 139  * we use a generation count to guard against seeing partially updated data.
 140  * In addition, the _rtc_nanotime_store() routine zeroes the generation before
 141  * updating the data, and stores the nonzero generation only after all fields
 142  * have been stored.  Because IA32 guarantees that stores by one processor
 143  * must be seen in order by another, we can avoid using a lock.  We spin while
 144  * the generation is zero.
 145  *
 146  * unint64_t _rtc_nanotime_read(
 147  *                      rtc_nanotime_t *rntp);          // %rdi
 148  *
 149  */
 150 ENTRY(_rtc_nanotime_read)
 151
 152         PAL_RTC_NANOTIME_READ_FAST()
 153
 154         ret
 155
 156 /*
 157  * extern uint64_t _rtc_tsc_to_nanoseconds(
 158  *          uint64_t    value,              // %rdi
 159  *          pal_rtc_nanotime_t  *rntp);     // %rsi
 160  *
 161  * Converts TSC units to nanoseconds, using an abbreviated form of the above
 162  * algorithm.  Note that while we could have simply used tmrCvt(value,tscFCvtt2n),
 163  * which would avoid the need for this asm, doing so is a bit more risky since
 164  * we'd be using a different algorithm with possibly different rounding etc.
 165  */
 166
 167 ENTRY(_rtc_tsc_to_nanoseconds)
 168         movq    %rdi,%rax                       /* copy value (in TSC units) to convert */
 169         movl    RNT_SHIFT(%rsi),%ecx
 170         movl    RNT_SCALE(%rsi),%edx
 171         shlq    %cl,%rax                        /* tscUnits << shift */
 172         mulq    %rdx                            /* (tscUnits << shift) * scale */
 173         shrdq   $32,%rdx,%rax                   /* %rdx:%rax >>= 32 */
 174         ret
 175
 176
 177
 178 Entry(call_continuation)
 179         movq    %rdi,%rcx                       /* get continuation */
 180         movq    %rsi,%rdi                       /* continuation param */
 181         movq    %rdx,%rsi                       /* wait result */
 182         movq    %gs:CPU_KERNEL_STACK,%rsp       /* set the stack */
 183         xorq    %rbp,%rbp                       /* zero frame pointer */
 184         call    *%rcx                           /* call continuation */
 185         movq    %gs:CPU_ACTIVE_THREAD,%rdi
 186         call    EXT(thread_terminate)
 187
 188 Entry(x86_init_wrapper)
 189         xor     %rbp, %rbp
 190         movq    %rsi, %rsp
 191         callq   *%rdi
 192
 193         /*
 194         * Generate a 64-bit quantity with possibly random characteristics, intended for use
 195         * before the kernel entropy pool is available. The processor's RNG is used if
 196         * available, and a value derived from the Time Stamp Counter is returned if not.
 197         * Multiple invocations may result in well-correlated values if sourced from the TSC.
 198         */
 199 Entry(ml_early_random)
 200         mov     %rbx, %rsi
 201         mov     $1, %eax
 202         cpuid
 203         mov     %rsi, %rbx
 204         test    $(1 << 30), %ecx
 205         jz      Lnon_rdrand
 206         RDRAND_RAX              /* RAX := 64 bits of DRBG entropy */
 207         jnc     Lnon_rdrand
 208         ret
 209 Lnon_rdrand:
 210         rdtsc /* EDX:EAX := TSC */
 211         /* Distribute low order bits */
 212         mov     %eax, %ecx
 213         xor     %al, %ah
 214         shl     $16, %rcx
 215         xor     %rcx, %rax
 216         xor     %eax, %edx
 217
 218         /* Incorporate ASLR entropy, if any */
 219         lea     (%rip), %rcx
 220         shr     $21, %rcx
 221         movzbl  %cl, %ecx
 222         shl     $16, %ecx
 223         xor     %ecx, %edx
 224
 225         mov     %ah, %cl
 226         ror     %cl, %edx /* Right rotate EDX (TSC&0xFF ^ (TSC>>8 & 0xFF))&1F */
 227         shl     $32, %rdx
 228         xor     %rdx, %rax
 229         mov     %cl, %al
 230         ret
 231
 232 #if CONFIG_VMX
 233
 234 /*
 235  *      __vmxon -- Enter VMX Operation
 236  *      int __vmxon(addr64_t v);
 237  */
 238 Entry(__vmxon)
 239         FRAME
 240         push    %rdi
 241
 242         mov     $(VMX_FAIL_INVALID), %ecx
 243         mov     $(VMX_FAIL_VALID), %edx
 244         mov     $(VMX_SUCCEED), %eax
 245         vmxon   (%rsp)
 246         cmovcl  %ecx, %eax      /* CF = 1, ZF = 0 */
 247         cmovzl  %edx, %eax      /* CF = 0, ZF = 1 */
 248
 249         pop     %rdi
 250         EMARF
 251         ret
 252
 253 /*
 254  *      __vmxoff -- Leave VMX Operation
 255  *      int __vmxoff(void);
 256  */
 257 Entry(__vmxoff)
 258         FRAME
 259
 260         mov     $(VMX_FAIL_INVALID), %ecx
 261         mov     $(VMX_FAIL_VALID), %edx
 262         mov     $(VMX_SUCCEED), %eax
 263         vmxoff
 264         cmovcl  %ecx, %eax      /* CF = 1, ZF = 0 */
 265         cmovzl  %edx, %eax      /* CF = 0, ZF = 1 */
 266
 267         EMARF
 268         ret
 269
 270 #endif /* CONFIG_VMX */