]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2000-2010 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <i386/asm.h> | |
30 | #include <i386/rtclock_asm.h> | |
31 | #include <i386/proc_reg.h> | |
32 | #include <i386/eflags.h> | |
33 | ||
34 | #include <i386/postcode.h> | |
35 | #include <i386/apic.h> | |
36 | #include <i386/vmx/vmx_asm.h> | |
37 | #include <assym.s> | |
38 | ||
39 | /* | |
40 | ** ml_get_timebase() | |
41 | ** | |
42 | ** Returns TSC in RAX | |
43 | ** | |
44 | */ | |
45 | ENTRY(ml_get_timebase) | |
46 | ||
47 | lfence | |
48 | rdtsc | |
49 | lfence | |
50 | shlq $32,%rdx | |
51 | orq %rdx,%rax | |
52 | ||
53 | ret | |
54 | ||
55 | /* | |
56 | * Convert between various timer units | |
57 | * | |
58 | * This code converts 64-bit time units to other units. | |
59 | * For example, the TSC is converted to HPET units. | |
60 | * | |
61 | * Time is a 64-bit integer that is some number of ticks. | |
62 | * Conversion is 64-bit fixed point number which is composed | |
63 | * of a 32 bit integer and a 32 bit fraction. | |
64 | * | |
65 | * The time ticks are multiplied by the conversion factor. The | |
66 | * calculations are done as a 128-bit value but both the high | |
67 | * and low words are dropped. The high word is overflow and the | |
68 | * low word is the fraction part of the result. | |
69 | * | |
70 | * We return a 64-bit value. | |
71 | * | |
72 | * Note that we can use this function to multiply 2 conversion factors. | |
73 | * We do this in order to calculate the multiplier used to convert | |
74 | * directly between any two units. | |
75 | * | |
76 | * uint64_t tmrCvt(uint64_t time, // %rdi | |
77 | * uint64_t conversion) // %rsi | |
78 | * | |
79 | */ | |
80 | ENTRY(tmrCvt) | |
81 | cmpq $1,%rsi /* check for unity fastpath */ | |
82 | je 1f | |
83 | movq %rdi,%rax | |
84 | mulq %rsi /* result is %rdx:%rax */ | |
85 | shrdq $32,%rdx,%rax /* %rdx:%rax >>= 32 */ | |
86 | ret | |
87 | 1: | |
88 | mov %rdi,%rax | |
89 | ret | |
90 | ||
91 | /* | |
92 | * void _rtc_nanotime_adjust( | |
93 | * uint64_t tsc_base_delta, // %rdi | |
94 | * rtc_nanotime_t *dst); // %rsi | |
95 | */ | |
96 | ENTRY(_rtc_nanotime_adjust) | |
97 | movl RNT_GENERATION(%rsi),%eax /* get current generation */ | |
98 | movl $0,RNT_GENERATION(%rsi) /* flag data as being updated */ | |
99 | addq %rdi,RNT_TSC_BASE(%rsi) | |
100 | ||
101 | incl %eax /* next generation */ | |
102 | jnz 1f | |
103 | incl %eax /* skip 0, which is a flag */ | |
104 | 1: movl %eax,RNT_GENERATION(%rsi) /* update generation */ | |
105 | ||
106 | ret | |
107 | ||
108 | /* | |
109 | * uint64_t _rtc_nanotime_read(rtc_nanotime_t *rntp); | |
110 | * | |
111 | * This is the same as the commpage nanotime routine, except that it uses the | |
112 | * kernel internal "rtc_nanotime_info" data instead of the commpage data. | |
113 | * These two copies of data are kept in sync by rtc_clock_napped(). | |
114 | * | |
115 | * Warning! There are several copies of this code in the trampolines found in | |
116 | * osfmk/x86_64/idt64.s, coming from the various TIMER macros in rtclock_asm.h. | |
117 | * They're all kept in sync by using the RTC_NANOTIME_READ() macro. | |
118 | * | |
119 | * The algorithm we use is: | |
120 | * | |
121 | * ns = ((((rdtsc - rnt_tsc_base)<<rnt_shift)*rnt_tsc_scale) / 2**32) + rnt_ns_base; | |
122 | * | |
123 | * rnt_shift, a constant computed during initialization, is the smallest value for which: | |
124 | * | |
125 | * (tscFreq << rnt_shift) > SLOW_TSC_THRESHOLD | |
126 | * | |
127 | * Where SLOW_TSC_THRESHOLD is about 10e9. Since most processor's tscFreqs are greater | |
128 | * than 1GHz, rnt_shift is usually 0. rnt_tsc_scale is also a 32-bit constant: | |
129 | * | |
130 | * rnt_tsc_scale = (10e9 * 2**32) / (tscFreq << rnt_shift); | |
131 | * | |
132 | * On 64-bit processors this algorithm could be simplified by doing a 64x64 bit | |
133 | * multiply of rdtsc by tscFCvtt2n: | |
134 | * | |
135 | * ns = (((rdtsc - rnt_tsc_base) * tscFCvtt2n) / 2**32) + rnt_ns_base; | |
136 | * | |
137 | * We don't do so in order to use the same algorithm in 32- and 64-bit mode. | |
138 | * When U32 goes away, we should reconsider. | |
139 | * | |
140 | * Since this routine is not synchronized and can be called in any context, | |
141 | * we use a generation count to guard against seeing partially updated data. | |
142 | * In addition, the _rtc_nanotime_store() routine zeroes the generation before | |
143 | * updating the data, and stores the nonzero generation only after all fields | |
144 | * have been stored. Because IA32 guarantees that stores by one processor | |
145 | * must be seen in order by another, we can avoid using a lock. We spin while | |
146 | * the generation is zero. | |
147 | * | |
148 | * unint64_t _rtc_nanotime_read( | |
149 | * rtc_nanotime_t *rntp); // %rdi | |
150 | * | |
151 | */ | |
152 | ENTRY(_rtc_nanotime_read) | |
153 | ||
154 | PAL_RTC_NANOTIME_READ_FAST() | |
155 | ||
156 | ret | |
157 | ||
158 | /* | |
159 | * extern uint64_t _rtc_tsc_to_nanoseconds( | |
160 | * uint64_t value, // %rdi | |
161 | * pal_rtc_nanotime_t *rntp); // %rsi | |
162 | * | |
163 | * Converts TSC units to nanoseconds, using an abbreviated form of the above | |
164 | * algorithm. Note that while we could have simply used tmrCvt(value,tscFCvtt2n), | |
165 | * which would avoid the need for this asm, doing so is a bit more risky since | |
166 | * we'd be using a different algorithm with possibly different rounding etc. | |
167 | */ | |
168 | ||
169 | ENTRY(_rtc_tsc_to_nanoseconds) | |
170 | movq %rdi,%rax /* copy value (in TSC units) to convert */ | |
171 | movl RNT_SHIFT(%rsi),%ecx | |
172 | movl RNT_SCALE(%rsi),%edx | |
173 | shlq %cl,%rax /* tscUnits << shift */ | |
174 | mulq %rdx /* (tscUnits << shift) * scale */ | |
175 | shrdq $32,%rdx,%rax /* %rdx:%rax >>= 32 */ | |
176 | ret | |
177 | ||
178 | ||
179 | ||
180 | Entry(call_continuation) | |
181 | movq %rdi,%rcx /* get continuation */ | |
182 | movq %rsi,%rdi /* continuation param */ | |
183 | movq %rdx,%rsi /* wait result */ | |
184 | movq %gs:CPU_KERNEL_STACK,%rsp /* set the stack */ | |
185 | xorq %rbp,%rbp /* zero frame pointer */ | |
186 | call *%rcx /* call continuation */ | |
187 | movq %gs:CPU_ACTIVE_THREAD,%rdi | |
188 | call EXT(thread_terminate) | |
189 | ||
190 | Entry(x86_init_wrapper) | |
191 | xor %rbp, %rbp | |
192 | movq %rsi, %rsp | |
193 | callq *%rdi | |
194 | ||
195 | #if CONFIG_VMX | |
196 | ||
197 | /* | |
198 | * __vmxon -- Enter VMX Operation | |
199 | * int __vmxon(addr64_t v); | |
200 | */ | |
201 | Entry(__vmxon) | |
202 | FRAME | |
203 | push %rdi | |
204 | ||
205 | mov $(VMX_FAIL_INVALID), %ecx | |
206 | mov $(VMX_FAIL_VALID), %edx | |
207 | mov $(VMX_SUCCEED), %eax | |
208 | vmxon (%rsp) | |
209 | cmovcl %ecx, %eax /* CF = 1, ZF = 0 */ | |
210 | cmovzl %edx, %eax /* CF = 0, ZF = 1 */ | |
211 | ||
212 | pop %rdi | |
213 | EMARF | |
214 | ret | |
215 | ||
216 | /* | |
217 | * __vmxoff -- Leave VMX Operation | |
218 | * int __vmxoff(void); | |
219 | */ | |
220 | Entry(__vmxoff) | |
221 | FRAME | |
222 | ||
223 | mov $(VMX_FAIL_INVALID), %ecx | |
224 | mov $(VMX_FAIL_VALID), %edx | |
225 | mov $(VMX_SUCCEED), %eax | |
226 | vmxoff | |
227 | cmovcl %ecx, %eax /* CF = 1, ZF = 0 */ | |
228 | cmovzl %edx, %eax /* CF = 0, ZF = 1 */ | |
229 | ||
230 | EMARF | |
231 | ret | |
232 | ||
233 | #endif /* CONFIG_VMX */ | |
234 | ||
235 | /* | |
236 | * mfence -- Memory Barrier | |
237 | * Use out-of-line assembly to get | |
238 | * standard x86-64 ABI guarantees | |
239 | * about what the caller's codegen | |
240 | * has in registers vs. memory | |
241 | */ | |
242 | Entry(do_mfence) | |
243 | mfence | |
244 | ret |