]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
2d21ac55 | 2 | * Copyright (c) 2000-2007 Apple Inc. All rights reserved. |
1c79356b | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
1c79356b | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
8f6c56a5 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
8f6c56a5 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b | 27 | */ |
0c530ab8 | 28 | |
1c79356b | 29 | #include <i386/asm.h> |
593a1d5f | 30 | #include <i386/rtclock.h> |
0c530ab8 A |
31 | #include <i386/proc_reg.h> |
32 | #include <i386/eflags.h> | |
33 | ||
34 | #include <i386/postcode.h> | |
35 | #include <i386/apic.h> | |
36 | #include <assym.s> | |
37 | ||
1c79356b A |
38 | /* |
39 | ** ml_get_timebase() | |
40 | ** | |
41 | ** Entry - %esp contains pointer to 64 bit structure. | |
42 | ** | |
43 | ** Exit - 64 bit structure filled in. | |
44 | ** | |
45 | */ | |
46 | ENTRY(ml_get_timebase) | |
47 | ||
0c530ab8 A |
48 | movl S_ARG0, %ecx |
49 | ||
c910b4d9 | 50 | lfence |
0c530ab8 | 51 | rdtsc |
593a1d5f | 52 | lfence |
0c530ab8 A |
53 | |
54 | movl %edx, 0(%ecx) | |
55 | movl %eax, 4(%ecx) | |
56 | ||
57 | ret | |
58 | ||
0c530ab8 A |
59 | /* |
60 | * Convert between various timer units | |
61 | * | |
62 | * uint64_t tmrCvt(uint64_t time, uint64_t *conversion) | |
63 | * | |
64 | * This code converts 64-bit time units to other units. | |
65 | * For example, the TSC is converted to HPET units. | |
66 | * | |
67 | * Time is a 64-bit integer that is some number of ticks. | |
68 | * Conversion is 64-bit fixed point number which is composed | |
69 | * of a 32 bit integer and a 32 bit fraction. | |
70 | * | |
71 | * The time ticks are multiplied by the conversion factor. The | |
72 | * calculations are done as a 128-bit value but both the high | |
73 | * and low words are dropped. The high word is overflow and the | |
74 | * low word is the fraction part of the result. | |
75 | * | |
76 | * We return a 64-bit value. | |
77 | * | |
78 | * Note that we can use this function to multiply 2 conversion factors. | |
79 | * We do this in order to calculate the multiplier used to convert | |
80 | * directly between any two units. | |
81 | * | |
82 | */ | |
83 | ||
84 | .globl EXT(tmrCvt) | |
85 | .align FALIGN | |
86 | ||
87 | LEXT(tmrCvt) | |
88 | ||
89 | pushl %ebp // Save a volatile | |
90 | movl %esp,%ebp // Get the parameters - 8 | |
91 | pushl %ebx // Save a volatile | |
92 | pushl %esi // Save a volatile | |
93 | pushl %edi // Save a volatile | |
94 | ||
95 | // %ebp + 8 - low-order ts | |
96 | // %ebp + 12 - high-order ts | |
97 | // %ebp + 16 - low-order cvt | |
98 | // %ebp + 20 - high-order cvt | |
99 | ||
100 | movl 8(%ebp),%eax // Get low-order ts | |
101 | mull 16(%ebp) // Multiply by low-order conversion | |
102 | movl %edx,%edi // Need to save only the high order part | |
103 | ||
104 | movl 12(%ebp),%eax // Get the high-order ts | |
105 | mull 16(%ebp) // Multiply by low-order conversion | |
106 | addl %eax,%edi // Add in the overflow from the low x low calculation | |
107 | adcl $0,%edx // Add in any overflow to high high part | |
108 | movl %edx,%esi // Save high high part | |
109 | ||
110 | // We now have the upper 64 bits of the 96 bit multiply of ts and the low half of cvt | |
111 | // in %esi:%edi | |
112 | ||
113 | movl 8(%ebp),%eax // Get low-order ts | |
114 | mull 20(%ebp) // Multiply by high-order conversion | |
115 | movl %eax,%ebx // Need to save the low order part | |
116 | movl %edx,%ecx // Need to save the high order part | |
117 | ||
118 | movl 12(%ebp),%eax // Get the high-order ts | |
119 | mull 20(%ebp) // Multiply by high-order conversion | |
120 | ||
121 | // Now have %ecx:%ebx as low part of high low and %edx:%eax as high part of high high | |
122 | // We don't care about the highest word since it is overflow | |
123 | ||
124 | addl %edi,%ebx // Add the low words | |
125 | adcl %ecx,%esi // Add in the high plus carry from low | |
126 | addl %eax,%esi // Add in the rest of the high | |
127 | ||
128 | movl %ebx,%eax // Pass back low word | |
129 | movl %esi,%edx // and the high word | |
130 | ||
131 | popl %edi // Restore a volatile | |
132 | popl %esi // Restore a volatile | |
133 | popl %ebx // Restore a volatile | |
134 | popl %ebp // Restore a volatile | |
135 | ||
2d21ac55 | 136 | ret // Leave... |
0c530ab8 | 137 | |
b0d623f7 A |
138 | |
139 | /* void _rtc_nanotime_store(uint64_t tsc, | |
140 | uint64_t nsec, | |
141 | uint32_t scale, | |
142 | uint32_t shift, | |
143 | rtc_nanotime_t *dst) ; | |
144 | */ | |
2d21ac55 | 145 | .globl EXT(_rtc_nanotime_store) |
0c530ab8 A |
146 | .align FALIGN |
147 | ||
2d21ac55 A |
148 | LEXT(_rtc_nanotime_store) |
149 | push %ebp | |
150 | movl %esp,%ebp | |
151 | push %esi | |
0c530ab8 | 152 | |
2d21ac55 A |
153 | mov 32(%ebp),%edx /* get ptr to rtc_nanotime_info */ |
154 | ||
155 | movl RNT_GENERATION(%edx),%esi /* get current generation */ | |
156 | movl $0,RNT_GENERATION(%edx) /* flag data as being updated */ | |
0c530ab8 A |
157 | |
158 | mov 8(%ebp),%eax | |
159 | mov %eax,RNT_TSC_BASE(%edx) | |
160 | mov 12(%ebp),%eax | |
161 | mov %eax,RNT_TSC_BASE+4(%edx) | |
162 | ||
163 | mov 24(%ebp),%eax | |
164 | mov %eax,RNT_SCALE(%edx) | |
165 | ||
166 | mov 28(%ebp),%eax | |
167 | mov %eax,RNT_SHIFT(%edx) | |
168 | ||
169 | mov 16(%ebp),%eax | |
170 | mov %eax,RNT_NS_BASE(%edx) | |
171 | mov 20(%ebp),%eax | |
172 | mov %eax,RNT_NS_BASE+4(%edx) | |
2d21ac55 A |
173 | |
174 | incl %esi /* next generation */ | |
175 | jnz 1f | |
176 | incl %esi /* skip 0, which is a flag */ | |
177 | 1: movl %esi,RNT_GENERATION(%edx) /* update generation and make usable */ | |
0c530ab8 | 178 | |
2d21ac55 | 179 | pop %esi |
0c530ab8 A |
180 | pop %ebp |
181 | ret | |
182 | ||
0c530ab8 | 183 | |
2d21ac55 A |
184 | /* unint64_t _rtc_nanotime_read( rtc_nanotime_t *rntp, int slow ); |
185 | * | |
186 | * This is the same as the commpage nanotime routine, except that it uses the | |
187 | * kernel internal "rtc_nanotime_info" data instead of the commpage data. The two copies | |
cf7d32b8 A |
188 | * of data (one in the kernel and one in user space) are kept in sync by rtc_clock_napped(). |
189 | * | |
190 | * Warning! There is another copy of this code in osfmk/i386/locore.s. The | |
191 | * two versions must be kept in sync with each other! | |
2d21ac55 A |
192 | * |
193 | * There are actually two versions of the algorithm, one each for "slow" and "fast" | |
194 | * processors. The more common "fast" algorithm is: | |
195 | * | |
196 | * nanoseconds = (((rdtsc - rnt_tsc_base) * rnt_tsc_scale) / 2**32) - rnt_ns_base; | |
197 | * | |
198 | * Of course, the divide by 2**32 is a nop. rnt_tsc_scale is a constant computed during initialization: | |
199 | * | |
200 | * rnt_tsc_scale = (10e9 * 2**32) / tscFreq; | |
201 | * | |
202 | * The "slow" algorithm uses long division: | |
203 | * | |
204 | * nanoseconds = (((rdtsc - rnt_tsc_base) * 10e9) / tscFreq) - rnt_ns_base; | |
205 | * | |
206 | * Since this routine is not synchronized and can be called in any context, | |
207 | * we use a generation count to guard against seeing partially updated data. In addition, | |
208 | * the _rtc_nanotime_store() routine -- just above -- zeroes the generation before | |
209 | * updating the data, and stores the nonzero generation only after all other data has been | |
210 | * stored. Because IA32 guarantees that stores by one processor must be seen in order | |
211 | * by another, we can avoid using a lock. We spin while the generation is zero. | |
212 | * | |
213 | * In accordance with the ABI, we return the 64-bit nanotime in %edx:%eax. | |
214 | */ | |
215 | ||
216 | .globl EXT(_rtc_nanotime_read) | |
217 | .align FALIGN | |
218 | LEXT(_rtc_nanotime_read) | |
219 | pushl %ebp | |
220 | movl %esp,%ebp | |
221 | pushl %esi | |
222 | pushl %edi | |
223 | pushl %ebx | |
224 | movl 8(%ebp),%edi /* get ptr to rtc_nanotime_info */ | |
225 | movl 12(%ebp),%eax /* get "slow" flag */ | |
226 | testl %eax,%eax | |
227 | jnz Lslow | |
228 | ||
229 | /* Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD */ | |
593a1d5f | 230 | RTC_NANOTIME_READ_FAST() |
1c79356b | 231 | |
2d21ac55 A |
232 | popl %ebx |
233 | popl %edi | |
234 | popl %esi | |
235 | popl %ebp | |
236 | ret | |
237 | ||
238 | /* Processor whose TSC frequency is slower than or equal to SLOW_TSC_THRESHOLD */ | |
239 | Lslow: | |
240 | movl RNT_GENERATION(%edi),%esi /* get generation (0 if being changed) */ | |
241 | testl %esi,%esi /* if being changed, loop until stable */ | |
242 | jz Lslow | |
243 | pushl %esi /* save generation */ | |
244 | pushl RNT_SHIFT(%edi) /* save low 32 bits of tscFreq */ | |
245 | ||
c910b4d9 A |
246 | lfence |
247 | rdtsc /* get TSC in %edx:%eax */ | |
248 | lfence | |
2d21ac55 A |
249 | subl RNT_TSC_BASE(%edi),%eax |
250 | sbbl RNT_TSC_BASE+4(%edi),%edx | |
251 | ||
252 | /* | |
253 | * Do the math to convert tsc ticks to nanoseconds. We first | |
254 | * do long multiply of 1 billion times the tsc. Then we do | |
255 | * long division by the tsc frequency | |
256 | */ | |
257 | mov $1000000000, %ecx /* number of nanoseconds in a second */ | |
258 | mov %edx, %ebx | |
259 | mul %ecx | |
260 | mov %edx, %edi | |
261 | mov %eax, %esi | |
262 | mov %ebx, %eax | |
263 | mul %ecx | |
264 | add %edi, %eax | |
265 | adc $0, %edx /* result in edx:eax:esi */ | |
266 | mov %eax, %edi | |
267 | popl %ecx /* get low 32 tscFreq */ | |
268 | xor %eax, %eax | |
269 | xchg %edx, %eax | |
270 | div %ecx | |
271 | xor %eax, %eax | |
272 | mov %edi, %eax | |
273 | div %ecx | |
274 | mov %eax, %ebx | |
275 | mov %esi, %eax | |
276 | div %ecx | |
277 | mov %ebx, %edx /* result in edx:eax */ | |
278 | ||
279 | movl 8(%ebp),%edi /* recover ptr to rtc_nanotime_info */ | |
280 | popl %esi /* recover generation */ | |
43866e37 | 281 | |
2d21ac55 A |
282 | addl RNT_NS_BASE(%edi),%eax |
283 | adcl RNT_NS_BASE+4(%edi),%edx | |
284 | ||
285 | cmpl RNT_GENERATION(%edi),%esi /* have the parameters changed? */ | |
286 | jne Lslow /* yes, loop until stable */ | |
287 | ||
288 | pop %ebx | |
289 | pop %edi | |
290 | pop %esi | |
0c530ab8 | 291 | pop %ebp |
2d21ac55 A |
292 | ret /* result in edx:eax */ |
293 |