]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
0b4c1975 | 2 | * Copyright (c) 2000-2010 Apple Inc. All rights reserved. |
1c79356b | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
1c79356b | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
8f6c56a5 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
8f6c56a5 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b | 27 | */ |
0c530ab8 | 28 | |
1c79356b | 29 | #include <i386/asm.h> |
6d2010ae | 30 | #include <i386/apic.h> |
0c530ab8 | 31 | #include <i386/eflags.h> |
6d2010ae | 32 | #include <i386/rtclock_asm.h> |
0c530ab8 | 33 | #include <i386/postcode.h> |
6d2010ae | 34 | #include <i386/proc_reg.h> |
0c530ab8 A |
35 | #include <assym.s> |
36 | ||
1c79356b A |
37 | /* |
38 | ** ml_get_timebase() | |
39 | ** | |
40 | ** Entry - %esp contains pointer to 64 bit structure. | |
41 | ** | |
42 | ** Exit - 64 bit structure filled in. | |
43 | ** | |
44 | */ | |
45 | ENTRY(ml_get_timebase) | |
46 | ||
0c530ab8 A |
47 | movl S_ARG0, %ecx |
48 | ||
c910b4d9 | 49 | lfence |
0c530ab8 | 50 | rdtsc |
593a1d5f | 51 | lfence |
0c530ab8 A |
52 | |
53 | movl %edx, 0(%ecx) | |
54 | movl %eax, 4(%ecx) | |
55 | ||
56 | ret | |
57 | ||
0c530ab8 A |
58 | /* |
59 | * Convert between various timer units | |
60 | * | |
61 | * uint64_t tmrCvt(uint64_t time, uint64_t *conversion) | |
62 | * | |
63 | * This code converts 64-bit time units to other units. | |
64 | * For example, the TSC is converted to HPET units. | |
65 | * | |
66 | * Time is a 64-bit integer that is some number of ticks. | |
67 | * Conversion is 64-bit fixed point number which is composed | |
68 | * of a 32 bit integer and a 32 bit fraction. | |
69 | * | |
70 | * The time ticks are multiplied by the conversion factor. The | |
71 | * calculations are done as a 128-bit value but both the high | |
72 | * and low words are dropped. The high word is overflow and the | |
73 | * low word is the fraction part of the result. | |
74 | * | |
75 | * We return a 64-bit value. | |
76 | * | |
77 | * Note that we can use this function to multiply 2 conversion factors. | |
78 | * We do this in order to calculate the multiplier used to convert | |
79 | * directly between any two units. | |
80 | * | |
81 | */ | |
82 | ||
83 | .globl EXT(tmrCvt) | |
84 | .align FALIGN | |
85 | ||
86 | LEXT(tmrCvt) | |
87 | ||
88 | pushl %ebp // Save a volatile | |
89 | movl %esp,%ebp // Get the parameters - 8 | |
90 | pushl %ebx // Save a volatile | |
91 | pushl %esi // Save a volatile | |
92 | pushl %edi // Save a volatile | |
93 | ||
94 | // %ebp + 8 - low-order ts | |
95 | // %ebp + 12 - high-order ts | |
96 | // %ebp + 16 - low-order cvt | |
97 | // %ebp + 20 - high-order cvt | |
98 | ||
99 | movl 8(%ebp),%eax // Get low-order ts | |
100 | mull 16(%ebp) // Multiply by low-order conversion | |
101 | movl %edx,%edi // Need to save only the high order part | |
102 | ||
103 | movl 12(%ebp),%eax // Get the high-order ts | |
104 | mull 16(%ebp) // Multiply by low-order conversion | |
105 | addl %eax,%edi // Add in the overflow from the low x low calculation | |
106 | adcl $0,%edx // Add in any overflow to high high part | |
107 | movl %edx,%esi // Save high high part | |
108 | ||
109 | // We now have the upper 64 bits of the 96 bit multiply of ts and the low half of cvt | |
110 | // in %esi:%edi | |
111 | ||
112 | movl 8(%ebp),%eax // Get low-order ts | |
113 | mull 20(%ebp) // Multiply by high-order conversion | |
114 | movl %eax,%ebx // Need to save the low order part | |
115 | movl %edx,%ecx // Need to save the high order part | |
116 | ||
117 | movl 12(%ebp),%eax // Get the high-order ts | |
118 | mull 20(%ebp) // Multiply by high-order conversion | |
119 | ||
120 | // Now have %ecx:%ebx as low part of high low and %edx:%eax as high part of high high | |
121 | // We don't care about the highest word since it is overflow | |
122 | ||
123 | addl %edi,%ebx // Add the low words | |
124 | adcl %ecx,%esi // Add in the high plus carry from low | |
125 | addl %eax,%esi // Add in the rest of the high | |
126 | ||
127 | movl %ebx,%eax // Pass back low word | |
128 | movl %esi,%edx // and the high word | |
129 | ||
130 | popl %edi // Restore a volatile | |
131 | popl %esi // Restore a volatile | |
132 | popl %ebx // Restore a volatile | |
133 | popl %ebp // Restore a volatile | |
134 | ||
2d21ac55 | 135 | ret // Leave... |
0c530ab8 | 136 | |
b0d623f7 | 137 | |
0b4c1975 A |
138 | /* void _rtc_nanotime_adjust( |
139 | uint64_t tsc_base_delta, | |
140 | rtc_nanotime_t *dst); | |
141 | */ | |
142 | .globl EXT(_rtc_nanotime_adjust) | |
143 | .align FALIGN | |
144 | ||
145 | LEXT(_rtc_nanotime_adjust) | |
146 | mov 12(%esp),%edx /* ptr to rtc_nanotime_info */ | |
147 | ||
148 | movl RNT_GENERATION(%edx),%ecx /* get current generation */ | |
149 | movl $0,RNT_GENERATION(%edx) /* flag data as being updated */ | |
150 | ||
151 | movl 4(%esp),%eax /* get lower 32-bits of delta */ | |
152 | addl %eax,RNT_TSC_BASE(%edx) | |
153 | adcl $0,RNT_TSC_BASE+4(%edx) /* propagate carry */ | |
154 | ||
155 | incl %ecx /* next generation */ | |
156 | jnz 1f | |
157 | incl %ecx /* skip 0, which is a flag */ | |
158 | 1: movl %ecx,RNT_GENERATION(%edx) /* update generation and make usable */ | |
159 | ||
160 | ret | |
161 | ||
162 | ||
2d21ac55 A |
163 | /* unint64_t _rtc_nanotime_read( rtc_nanotime_t *rntp, int slow ); |
164 | * | |
165 | * This is the same as the commpage nanotime routine, except that it uses the | |
166 | * kernel internal "rtc_nanotime_info" data instead of the commpage data. The two copies | |
cf7d32b8 A |
167 | * of data (one in the kernel and one in user space) are kept in sync by rtc_clock_napped(). |
168 | * | |
169 | * Warning! There is another copy of this code in osfmk/i386/locore.s. The | |
170 | * two versions must be kept in sync with each other! | |
2d21ac55 A |
171 | * |
172 | * There are actually two versions of the algorithm, one each for "slow" and "fast" | |
173 | * processors. The more common "fast" algorithm is: | |
174 | * | |
175 | * nanoseconds = (((rdtsc - rnt_tsc_base) * rnt_tsc_scale) / 2**32) - rnt_ns_base; | |
176 | * | |
177 | * Of course, the divide by 2**32 is a nop. rnt_tsc_scale is a constant computed during initialization: | |
178 | * | |
179 | * rnt_tsc_scale = (10e9 * 2**32) / tscFreq; | |
180 | * | |
181 | * The "slow" algorithm uses long division: | |
182 | * | |
183 | * nanoseconds = (((rdtsc - rnt_tsc_base) * 10e9) / tscFreq) - rnt_ns_base; | |
184 | * | |
185 | * Since this routine is not synchronized and can be called in any context, | |
186 | * we use a generation count to guard against seeing partially updated data. In addition, | |
187 | * the _rtc_nanotime_store() routine -- just above -- zeroes the generation before | |
188 | * updating the data, and stores the nonzero generation only after all other data has been | |
189 | * stored. Because IA32 guarantees that stores by one processor must be seen in order | |
190 | * by another, we can avoid using a lock. We spin while the generation is zero. | |
191 | * | |
192 | * In accordance with the ABI, we return the 64-bit nanotime in %edx:%eax. | |
193 | */ | |
194 | ||
195 | .globl EXT(_rtc_nanotime_read) | |
196 | .align FALIGN | |
197 | LEXT(_rtc_nanotime_read) | |
198 | pushl %ebp | |
199 | movl %esp,%ebp | |
200 | pushl %esi | |
201 | pushl %edi | |
202 | pushl %ebx | |
203 | movl 8(%ebp),%edi /* get ptr to rtc_nanotime_info */ | |
204 | movl 12(%ebp),%eax /* get "slow" flag */ | |
205 | testl %eax,%eax | |
206 | jnz Lslow | |
207 | ||
208 | /* Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD */ | |
6d2010ae | 209 | PAL_RTC_NANOTIME_READ_FAST() |
1c79356b | 210 | |
2d21ac55 A |
211 | popl %ebx |
212 | popl %edi | |
213 | popl %esi | |
214 | popl %ebp | |
215 | ret | |
216 | ||
217 | /* Processor whose TSC frequency is slower than or equal to SLOW_TSC_THRESHOLD */ | |
218 | Lslow: | |
219 | movl RNT_GENERATION(%edi),%esi /* get generation (0 if being changed) */ | |
220 | testl %esi,%esi /* if being changed, loop until stable */ | |
221 | jz Lslow | |
222 | pushl %esi /* save generation */ | |
223 | pushl RNT_SHIFT(%edi) /* save low 32 bits of tscFreq */ | |
224 | ||
c910b4d9 A |
225 | lfence |
226 | rdtsc /* get TSC in %edx:%eax */ | |
227 | lfence | |
2d21ac55 A |
228 | subl RNT_TSC_BASE(%edi),%eax |
229 | sbbl RNT_TSC_BASE+4(%edi),%edx | |
230 | ||
231 | /* | |
232 | * Do the math to convert tsc ticks to nanoseconds. We first | |
233 | * do long multiply of 1 billion times the tsc. Then we do | |
234 | * long division by the tsc frequency | |
235 | */ | |
236 | mov $1000000000, %ecx /* number of nanoseconds in a second */ | |
237 | mov %edx, %ebx | |
238 | mul %ecx | |
239 | mov %edx, %edi | |
240 | mov %eax, %esi | |
241 | mov %ebx, %eax | |
242 | mul %ecx | |
243 | add %edi, %eax | |
244 | adc $0, %edx /* result in edx:eax:esi */ | |
245 | mov %eax, %edi | |
246 | popl %ecx /* get low 32 tscFreq */ | |
247 | xor %eax, %eax | |
248 | xchg %edx, %eax | |
249 | div %ecx | |
250 | xor %eax, %eax | |
251 | mov %edi, %eax | |
252 | div %ecx | |
253 | mov %eax, %ebx | |
254 | mov %esi, %eax | |
255 | div %ecx | |
256 | mov %ebx, %edx /* result in edx:eax */ | |
257 | ||
258 | movl 8(%ebp),%edi /* recover ptr to rtc_nanotime_info */ | |
259 | popl %esi /* recover generation */ | |
43866e37 | 260 | |
2d21ac55 A |
261 | addl RNT_NS_BASE(%edi),%eax |
262 | adcl RNT_NS_BASE+4(%edi),%edx | |
263 | ||
264 | cmpl RNT_GENERATION(%edi),%esi /* have the parameters changed? */ | |
265 | jne Lslow /* yes, loop until stable */ | |
266 | ||
267 | pop %ebx | |
268 | pop %edi | |
269 | pop %esi | |
0c530ab8 | 270 | pop %ebp |
2d21ac55 A |
271 | ret /* result in edx:eax */ |
272 | ||
6d2010ae A |
273 | |
274 | ||
275 | /* | |
276 | * Timing routines. | |
277 | */ | |
278 | Entry(timer_update) | |
279 | movl 4(%esp),%ecx | |
280 | movl 8(%esp),%eax | |
281 | movl 12(%esp),%edx | |
282 | movl %eax,TIMER_HIGHCHK(%ecx) | |
283 | movl %edx,TIMER_LOW(%ecx) | |
284 | movl %eax,TIMER_HIGH(%ecx) | |
285 | ret | |
286 | ||
287 | Entry(timer_grab) | |
288 | movl 4(%esp),%ecx | |
289 | 0: movl TIMER_HIGH(%ecx),%edx | |
290 | movl TIMER_LOW(%ecx),%eax | |
291 | cmpl TIMER_HIGHCHK(%ecx),%edx | |
292 | jne 0b | |
293 | ret | |
294 | ||
295 | ||
296 | Entry(call_continuation) | |
297 | movl S_ARG0,%eax /* get continuation */ | |
298 | movl S_ARG1,%edx /* continuation param */ | |
299 | movl S_ARG2,%ecx /* wait result */ | |
300 | movl %gs:CPU_KERNEL_STACK,%esp /* pop the stack */ | |
301 | xorl %ebp,%ebp /* zero frame pointer */ | |
302 | subl $8,%esp /* align the stack */ | |
303 | pushl %ecx | |
304 | pushl %edx | |
305 | call *%eax /* call continuation */ | |
306 | addl $16,%esp | |
307 | movl %gs:CPU_ACTIVE_THREAD,%eax | |
308 | pushl %eax | |
309 | call EXT(thread_terminate) | |
310 | ||
311 |