]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/machine_routines_asm.s
xnu-2050.18.24.tar.gz
[apple/xnu.git] / osfmk / i386 / machine_routines_asm.s
1 /*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <i386/asm.h>
30 #include <i386/apic.h>
31 #include <i386/eflags.h>
32 #include <i386/rtclock_asm.h>
33 #include <i386/postcode.h>
34 #include <i386/proc_reg.h>
35 #include <assym.s>
36
37 /*
38 ** ml_get_timebase()
39 **
40 ** Entry - %esp contains pointer to 64 bit structure.
41 **
42 ** Exit - 64 bit structure filled in.
43 **
44 */
45 ENTRY(ml_get_timebase)
46
47 movl S_ARG0, %ecx
48
49 lfence
50 rdtsc
51 lfence
52
53 movl %edx, 0(%ecx)
54 movl %eax, 4(%ecx)
55
56 ret
57
58 /*
59 * Convert between various timer units
60 *
61 * uint64_t tmrCvt(uint64_t time, uint64_t *conversion)
62 *
63 * This code converts 64-bit time units to other units.
64 * For example, the TSC is converted to HPET units.
65 *
66 * Time is a 64-bit integer that is some number of ticks.
67 * Conversion is 64-bit fixed point number which is composed
68 * of a 32 bit integer and a 32 bit fraction.
69 *
70 * The time ticks are multiplied by the conversion factor. The
71 * calculations are done as a 128-bit value but both the high
72 * and low words are dropped. The high word is overflow and the
73 * low word is the fraction part of the result.
74 *
75 * We return a 64-bit value.
76 *
77 * Note that we can use this function to multiply 2 conversion factors.
78 * We do this in order to calculate the multiplier used to convert
79 * directly between any two units.
80 *
81 */
82
83 .globl EXT(tmrCvt)
84 .align FALIGN
85
86 LEXT(tmrCvt)
87
88 pushl %ebp // Save a volatile
89 movl %esp,%ebp // Get the parameters - 8
90 pushl %ebx // Save a volatile
91 pushl %esi // Save a volatile
92 pushl %edi // Save a volatile
93
94 // %ebp + 8 - low-order ts
95 // %ebp + 12 - high-order ts
96 // %ebp + 16 - low-order cvt
97 // %ebp + 20 - high-order cvt
98
99 movl 8(%ebp),%eax // Get low-order ts
100 mull 16(%ebp) // Multiply by low-order conversion
101 movl %edx,%edi // Need to save only the high order part
102
103 movl 12(%ebp),%eax // Get the high-order ts
104 mull 16(%ebp) // Multiply by low-order conversion
105 addl %eax,%edi // Add in the overflow from the low x low calculation
106 adcl $0,%edx // Add in any overflow to high high part
107 movl %edx,%esi // Save high high part
108
109 // We now have the upper 64 bits of the 96 bit multiply of ts and the low half of cvt
110 // in %esi:%edi
111
112 movl 8(%ebp),%eax // Get low-order ts
113 mull 20(%ebp) // Multiply by high-order conversion
114 movl %eax,%ebx // Need to save the low order part
115 movl %edx,%ecx // Need to save the high order part
116
117 movl 12(%ebp),%eax // Get the high-order ts
118 mull 20(%ebp) // Multiply by high-order conversion
119
120 // Now have %ecx:%ebx as low part of high low and %edx:%eax as high part of high high
121 // We don't care about the highest word since it is overflow
122
123 addl %edi,%ebx // Add the low words
124 adcl %ecx,%esi // Add in the high plus carry from low
125 addl %eax,%esi // Add in the rest of the high
126
127 movl %ebx,%eax // Pass back low word
128 movl %esi,%edx // and the high word
129
130 popl %edi // Restore a volatile
131 popl %esi // Restore a volatile
132 popl %ebx // Restore a volatile
133 popl %ebp // Restore a volatile
134
135 ret // Leave...
136
137
138 /* void _rtc_nanotime_adjust(
139 uint64_t tsc_base_delta,
140 rtc_nanotime_t *dst);
141 */
142 .globl EXT(_rtc_nanotime_adjust)
143 .align FALIGN
144
145 LEXT(_rtc_nanotime_adjust)
146 mov 12(%esp),%edx /* ptr to rtc_nanotime_info */
147
148 movl RNT_GENERATION(%edx),%ecx /* get current generation */
149 movl $0,RNT_GENERATION(%edx) /* flag data as being updated */
150
151 movl 4(%esp),%eax /* get lower 32-bits of delta */
152 addl %eax,RNT_TSC_BASE(%edx)
153 adcl $0,RNT_TSC_BASE+4(%edx) /* propagate carry */
154
155 incl %ecx /* next generation */
156 jnz 1f
157 incl %ecx /* skip 0, which is a flag */
158 1: movl %ecx,RNT_GENERATION(%edx) /* update generation and make usable */
159
160 ret
161
162
163 /* unint64_t _rtc_nanotime_read( rtc_nanotime_t *rntp, int slow );
164 *
165 * This is the same as the commpage nanotime routine, except that it uses the
166 * kernel internal "rtc_nanotime_info" data instead of the commpage data. The two copies
167 * of data (one in the kernel and one in user space) are kept in sync by rtc_clock_napped().
168 *
169 * Warning! There is another copy of this code in osfmk/i386/locore.s. The
170 * two versions must be kept in sync with each other!
171 *
172 * There are actually two versions of the algorithm, one each for "slow" and "fast"
173 * processors. The more common "fast" algorithm is:
174 *
175 * nanoseconds = (((rdtsc - rnt_tsc_base) * rnt_tsc_scale) / 2**32) - rnt_ns_base;
176 *
177 * Of course, the divide by 2**32 is a nop. rnt_tsc_scale is a constant computed during initialization:
178 *
179 * rnt_tsc_scale = (10e9 * 2**32) / tscFreq;
180 *
181 * The "slow" algorithm uses long division:
182 *
183 * nanoseconds = (((rdtsc - rnt_tsc_base) * 10e9) / tscFreq) - rnt_ns_base;
184 *
185 * Since this routine is not synchronized and can be called in any context,
186 * we use a generation count to guard against seeing partially updated data. In addition,
187 * the _rtc_nanotime_store() routine -- just above -- zeroes the generation before
188 * updating the data, and stores the nonzero generation only after all other data has been
189 * stored. Because IA32 guarantees that stores by one processor must be seen in order
190 * by another, we can avoid using a lock. We spin while the generation is zero.
191 *
192 * In accordance with the ABI, we return the 64-bit nanotime in %edx:%eax.
193 */
194
195 .globl EXT(_rtc_nanotime_read)
196 .align FALIGN
197 LEXT(_rtc_nanotime_read)
198 pushl %ebp
199 movl %esp,%ebp
200 pushl %esi
201 pushl %edi
202 pushl %ebx
203 movl 8(%ebp),%edi /* get ptr to rtc_nanotime_info */
204 movl 12(%ebp),%eax /* get "slow" flag */
205 testl %eax,%eax
206 jnz Lslow
207
208 /* Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD */
209 PAL_RTC_NANOTIME_READ_FAST()
210
211 popl %ebx
212 popl %edi
213 popl %esi
214 popl %ebp
215 ret
216
217 /* Processor whose TSC frequency is slower than or equal to SLOW_TSC_THRESHOLD */
218 Lslow:
219 movl RNT_GENERATION(%edi),%esi /* get generation (0 if being changed) */
220 testl %esi,%esi /* if being changed, loop until stable */
221 jz Lslow
222 pushl %esi /* save generation */
223 pushl RNT_SHIFT(%edi) /* save low 32 bits of tscFreq */
224
225 lfence
226 rdtsc /* get TSC in %edx:%eax */
227 lfence
228 subl RNT_TSC_BASE(%edi),%eax
229 sbbl RNT_TSC_BASE+4(%edi),%edx
230
231 /*
232 * Do the math to convert tsc ticks to nanoseconds. We first
233 * do long multiply of 1 billion times the tsc. Then we do
234 * long division by the tsc frequency
235 */
236 mov $1000000000, %ecx /* number of nanoseconds in a second */
237 mov %edx, %ebx
238 mul %ecx
239 mov %edx, %edi
240 mov %eax, %esi
241 mov %ebx, %eax
242 mul %ecx
243 add %edi, %eax
244 adc $0, %edx /* result in edx:eax:esi */
245 mov %eax, %edi
246 popl %ecx /* get low 32 tscFreq */
247 xor %eax, %eax
248 xchg %edx, %eax
249 div %ecx
250 xor %eax, %eax
251 mov %edi, %eax
252 div %ecx
253 mov %eax, %ebx
254 mov %esi, %eax
255 div %ecx
256 mov %ebx, %edx /* result in edx:eax */
257
258 movl 8(%ebp),%edi /* recover ptr to rtc_nanotime_info */
259 popl %esi /* recover generation */
260
261 addl RNT_NS_BASE(%edi),%eax
262 adcl RNT_NS_BASE+4(%edi),%edx
263
264 cmpl RNT_GENERATION(%edi),%esi /* have the parameters changed? */
265 jne Lslow /* yes, loop until stable */
266
267 pop %ebx
268 pop %edi
269 pop %esi
270 pop %ebp
271 ret /* result in edx:eax */
272
273
274
275 /*
276 * Timing routines.
277 */
278 Entry(timer_update)
279 movl 4(%esp),%ecx
280 movl 8(%esp),%eax
281 movl 12(%esp),%edx
282 movl %eax,TIMER_HIGHCHK(%ecx)
283 movl %edx,TIMER_LOW(%ecx)
284 movl %eax,TIMER_HIGH(%ecx)
285 ret
286
287 Entry(timer_grab)
288 movl 4(%esp),%ecx
289 0: movl TIMER_HIGH(%ecx),%edx
290 movl TIMER_LOW(%ecx),%eax
291 cmpl TIMER_HIGHCHK(%ecx),%edx
292 jne 0b
293 ret
294
295
296 Entry(call_continuation)
297 movl S_ARG0,%eax /* get continuation */
298 movl S_ARG1,%edx /* continuation param */
299 movl S_ARG2,%ecx /* wait result */
300 movl %gs:CPU_KERNEL_STACK,%esp /* pop the stack */
301 xorl %ebp,%ebp /* zero frame pointer */
302 subl $8,%esp /* align the stack */
303 pushl %ecx
304 pushl %edx
305 call *%eax /* call continuation */
306 addl $16,%esp
307 movl %gs:CPU_ACTIVE_THREAD,%eax
308 pushl %eax
309 call EXT(thread_terminate)
310
311
312 Entry(ml_early_random)
313 xor %eax, %eax
314 ret