]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/machine_routines_asm.s
xnu-1486.2.11.tar.gz
[apple/xnu.git] / osfmk / i386 / machine_routines_asm.s
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <i386/asm.h>
30 #include <i386/rtclock.h>
31 #include <i386/proc_reg.h>
32 #include <i386/eflags.h>
33
34 #include <i386/postcode.h>
35 #include <i386/apic.h>
36 #include <assym.s>
37
38 /*
39 ** ml_get_timebase()
40 **
41 ** Entry - %esp contains pointer to 64 bit structure.
42 **
43 ** Exit - 64 bit structure filled in.
44 **
45 */
46 ENTRY(ml_get_timebase)
47
48 movl S_ARG0, %ecx
49
50 lfence
51 rdtsc
52 lfence
53
54 movl %edx, 0(%ecx)
55 movl %eax, 4(%ecx)
56
57 ret
58
59 /*
60 * Convert between various timer units
61 *
62 * uint64_t tmrCvt(uint64_t time, uint64_t *conversion)
63 *
64 * This code converts 64-bit time units to other units.
65 * For example, the TSC is converted to HPET units.
66 *
67 * Time is a 64-bit integer that is some number of ticks.
68 * Conversion is 64-bit fixed point number which is composed
69 * of a 32 bit integer and a 32 bit fraction.
70 *
71 * The time ticks are multiplied by the conversion factor. The
72 * calculations are done as a 128-bit value but both the high
73 * and low words are dropped. The high word is overflow and the
74 * low word is the fraction part of the result.
75 *
76 * We return a 64-bit value.
77 *
78 * Note that we can use this function to multiply 2 conversion factors.
79 * We do this in order to calculate the multiplier used to convert
80 * directly between any two units.
81 *
82 */
83
84 .globl EXT(tmrCvt)
85 .align FALIGN
86
87 LEXT(tmrCvt)
88
89 pushl %ebp // Save a volatile
90 movl %esp,%ebp // Get the parameters - 8
91 pushl %ebx // Save a volatile
92 pushl %esi // Save a volatile
93 pushl %edi // Save a volatile
94
95 // %ebp + 8 - low-order ts
96 // %ebp + 12 - high-order ts
97 // %ebp + 16 - low-order cvt
98 // %ebp + 20 - high-order cvt
99
100 movl 8(%ebp),%eax // Get low-order ts
101 mull 16(%ebp) // Multiply by low-order conversion
102 movl %edx,%edi // Need to save only the high order part
103
104 movl 12(%ebp),%eax // Get the high-order ts
105 mull 16(%ebp) // Multiply by low-order conversion
106 addl %eax,%edi // Add in the overflow from the low x low calculation
107 adcl $0,%edx // Add in any overflow to high high part
108 movl %edx,%esi // Save high high part
109
110 // We now have the upper 64 bits of the 96 bit multiply of ts and the low half of cvt
111 // in %esi:%edi
112
113 movl 8(%ebp),%eax // Get low-order ts
114 mull 20(%ebp) // Multiply by high-order conversion
115 movl %eax,%ebx // Need to save the low order part
116 movl %edx,%ecx // Need to save the high order part
117
118 movl 12(%ebp),%eax // Get the high-order ts
119 mull 20(%ebp) // Multiply by high-order conversion
120
121 // Now have %ecx:%ebx as low part of high low and %edx:%eax as high part of high high
122 // We don't care about the highest word since it is overflow
123
124 addl %edi,%ebx // Add the low words
125 adcl %ecx,%esi // Add in the high plus carry from low
126 addl %eax,%esi // Add in the rest of the high
127
128 movl %ebx,%eax // Pass back low word
129 movl %esi,%edx // and the high word
130
131 popl %edi // Restore a volatile
132 popl %esi // Restore a volatile
133 popl %ebx // Restore a volatile
134 popl %ebp // Restore a volatile
135
136 ret // Leave...
137
138
139 /* void _rtc_nanotime_store(uint64_t tsc,
140 uint64_t nsec,
141 uint32_t scale,
142 uint32_t shift,
143 rtc_nanotime_t *dst) ;
144 */
145 .globl EXT(_rtc_nanotime_store)
146 .align FALIGN
147
148 LEXT(_rtc_nanotime_store)
149 push %ebp
150 movl %esp,%ebp
151 push %esi
152
153 mov 32(%ebp),%edx /* get ptr to rtc_nanotime_info */
154
155 movl RNT_GENERATION(%edx),%esi /* get current generation */
156 movl $0,RNT_GENERATION(%edx) /* flag data as being updated */
157
158 mov 8(%ebp),%eax
159 mov %eax,RNT_TSC_BASE(%edx)
160 mov 12(%ebp),%eax
161 mov %eax,RNT_TSC_BASE+4(%edx)
162
163 mov 24(%ebp),%eax
164 mov %eax,RNT_SCALE(%edx)
165
166 mov 28(%ebp),%eax
167 mov %eax,RNT_SHIFT(%edx)
168
169 mov 16(%ebp),%eax
170 mov %eax,RNT_NS_BASE(%edx)
171 mov 20(%ebp),%eax
172 mov %eax,RNT_NS_BASE+4(%edx)
173
174 incl %esi /* next generation */
175 jnz 1f
176 incl %esi /* skip 0, which is a flag */
177 1: movl %esi,RNT_GENERATION(%edx) /* update generation and make usable */
178
179 pop %esi
180 pop %ebp
181 ret
182
183
184 /* unint64_t _rtc_nanotime_read( rtc_nanotime_t *rntp, int slow );
185 *
186 * This is the same as the commpage nanotime routine, except that it uses the
187 * kernel internal "rtc_nanotime_info" data instead of the commpage data. The two copies
188 * of data (one in the kernel and one in user space) are kept in sync by rtc_clock_napped().
189 *
190 * Warning! There is another copy of this code in osfmk/i386/locore.s. The
191 * two versions must be kept in sync with each other!
192 *
193 * There are actually two versions of the algorithm, one each for "slow" and "fast"
194 * processors. The more common "fast" algorithm is:
195 *
196 * nanoseconds = (((rdtsc - rnt_tsc_base) * rnt_tsc_scale) / 2**32) - rnt_ns_base;
197 *
198 * Of course, the divide by 2**32 is a nop. rnt_tsc_scale is a constant computed during initialization:
199 *
200 * rnt_tsc_scale = (10e9 * 2**32) / tscFreq;
201 *
202 * The "slow" algorithm uses long division:
203 *
204 * nanoseconds = (((rdtsc - rnt_tsc_base) * 10e9) / tscFreq) - rnt_ns_base;
205 *
206 * Since this routine is not synchronized and can be called in any context,
207 * we use a generation count to guard against seeing partially updated data. In addition,
208 * the _rtc_nanotime_store() routine -- just above -- zeroes the generation before
209 * updating the data, and stores the nonzero generation only after all other data has been
210 * stored. Because IA32 guarantees that stores by one processor must be seen in order
211 * by another, we can avoid using a lock. We spin while the generation is zero.
212 *
213 * In accordance with the ABI, we return the 64-bit nanotime in %edx:%eax.
214 */
215
216 .globl EXT(_rtc_nanotime_read)
217 .align FALIGN
218 LEXT(_rtc_nanotime_read)
219 pushl %ebp
220 movl %esp,%ebp
221 pushl %esi
222 pushl %edi
223 pushl %ebx
224 movl 8(%ebp),%edi /* get ptr to rtc_nanotime_info */
225 movl 12(%ebp),%eax /* get "slow" flag */
226 testl %eax,%eax
227 jnz Lslow
228
229 /* Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD */
230 RTC_NANOTIME_READ_FAST()
231
232 popl %ebx
233 popl %edi
234 popl %esi
235 popl %ebp
236 ret
237
238 /* Processor whose TSC frequency is slower than or equal to SLOW_TSC_THRESHOLD */
239 Lslow:
240 movl RNT_GENERATION(%edi),%esi /* get generation (0 if being changed) */
241 testl %esi,%esi /* if being changed, loop until stable */
242 jz Lslow
243 pushl %esi /* save generation */
244 pushl RNT_SHIFT(%edi) /* save low 32 bits of tscFreq */
245
246 lfence
247 rdtsc /* get TSC in %edx:%eax */
248 lfence
249 subl RNT_TSC_BASE(%edi),%eax
250 sbbl RNT_TSC_BASE+4(%edi),%edx
251
252 /*
253 * Do the math to convert tsc ticks to nanoseconds. We first
254 * do long multiply of 1 billion times the tsc. Then we do
255 * long division by the tsc frequency
256 */
257 mov $1000000000, %ecx /* number of nanoseconds in a second */
258 mov %edx, %ebx
259 mul %ecx
260 mov %edx, %edi
261 mov %eax, %esi
262 mov %ebx, %eax
263 mul %ecx
264 add %edi, %eax
265 adc $0, %edx /* result in edx:eax:esi */
266 mov %eax, %edi
267 popl %ecx /* get low 32 tscFreq */
268 xor %eax, %eax
269 xchg %edx, %eax
270 div %ecx
271 xor %eax, %eax
272 mov %edi, %eax
273 div %ecx
274 mov %eax, %ebx
275 mov %esi, %eax
276 div %ecx
277 mov %ebx, %edx /* result in edx:eax */
278
279 movl 8(%ebp),%edi /* recover ptr to rtc_nanotime_info */
280 popl %esi /* recover generation */
281
282 addl RNT_NS_BASE(%edi),%eax
283 adcl RNT_NS_BASE+4(%edi),%edx
284
285 cmpl RNT_GENERATION(%edi),%esi /* have the parameters changed? */
286 jne Lslow /* yes, loop until stable */
287
288 pop %ebx
289 pop %edi
290 pop %esi
291 pop %ebp
292 ret /* result in edx:eax */
293