]>
Commit | Line | Data |
---|---|---|
39236c6e A |
1 | /* |
2 | * Copyright (c) 2003-2007 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <sys/appleapiopts.h> | |
30 | #include <machine/cpu_capabilities.h> | |
31 | ||
32 | #if defined(__i386__) | |
33 | ||
34 | /* return mach_absolute_time in %edx:%eax | |
35 | * | |
36 | * The algorithm we use is: | |
37 | * | |
38 | * ns = ((((rdtsc - rnt_tsc_base)<<rnt_shift)*rnt_tsc_scale) / 2**32) + rnt_ns_base; | |
39 | * | |
40 | * rnt_shift, a constant computed during initialization, is the smallest value for which: | |
41 | * | |
42 | * (tscFreq << rnt_shift) > SLOW_TSC_THRESHOLD | |
43 | * | |
44 | * Where SLOW_TSC_THRESHOLD is about 10e9. Since most processor's tscFreq is greater | |
45 | * than 1GHz, rnt_shift is usually 0. rnt_tsc_scale is also a 32-bit constant: | |
46 | * | |
47 | * rnt_tsc_scale = (10e9 * 2**32) / (tscFreq << rnt_shift); | |
48 | */ | |
49 | ||
50 | .globl _mach_absolute_time | |
51 | _mach_absolute_time: | |
52 | pushl %ebp | |
53 | movl %esp,%ebp | |
54 | pushl %esi | |
55 | pushl %ebx | |
56 | ||
57 | 0: | |
58 | movl _COMM_PAGE_NT_GENERATION,%esi /* get generation (0 if being changed) */ | |
59 | testl %esi,%esi /* if being updated, loop until stable */ | |
60 | jz 0b | |
61 | ||
62 | lfence | |
63 | rdtsc /* get TSC in %edx:%eax */ | |
64 | lfence | |
65 | ||
66 | subl _COMM_PAGE_NT_TSC_BASE,%eax | |
67 | sbbl _COMM_PAGE_NT_TSC_BASE+4,%edx | |
68 | ||
69 | /* | |
70 | * Prior to supporting "slow" processors, xnu always set _NT_SHIFT to 32. | |
71 | * Now it defaults to 0, unless the processor is slow. The shifts | |
72 | * below implicitly mask the count down to 5 bits, handling either default. | |
73 | */ | |
74 | movl _COMM_PAGE_NT_SHIFT,%ecx | |
75 | shldl %cl,%eax,%edx /* shift %edx left, filling in from %eax */ | |
76 | shll %cl,%eax /* finish shifting %edx:%eax left by _COMM_PAGE_NT_SHIFT bits */ | |
77 | ||
78 | movl _COMM_PAGE_NT_SCALE,%ecx | |
79 | ||
80 | movl %edx,%ebx | |
81 | mull %ecx | |
82 | movl %ebx,%eax | |
83 | movl %edx,%ebx | |
84 | mull %ecx | |
85 | addl %ebx,%eax | |
86 | adcl $0,%edx | |
87 | ||
88 | addl _COMM_PAGE_NT_NS_BASE,%eax | |
89 | adcl _COMM_PAGE_NT_NS_BASE+4,%edx | |
90 | ||
91 | cmpl _COMM_PAGE_NT_GENERATION,%esi /* have the parameters changed? */ | |
92 | jne 0b /* yes, loop until stable */ | |
93 | ||
94 | popl %ebx | |
95 | popl %esi | |
96 | popl %ebp | |
97 | ret | |
98 | ||
99 | #elif defined(__x86_64__) | |
100 | ||
101 | /* | |
102 | * 64-bit version _mach_absolute_time. We return the 64-bit nanotime in %rax. | |
103 | * | |
104 | * The algorithm we use is: | |
105 | * | |
106 | * ns = ((((rdtsc - rnt_tsc_base)<<rnt_shift)*rnt_tsc_scale) / 2**32) + rnt_ns_base; | |
107 | * | |
108 | * rnt_shift, a constant computed during initialization, is the smallest value for which: | |
109 | * | |
110 | * tscFreq << rnt_shift) > SLOW_TSC_THRESHOLD | |
111 | * | |
112 | * Where SLOW_TSC_THRESHOLD is about 10e9. Since most processor's tscFreqs are greater | |
113 | * than 1GHz, rnt_shift is usually 0. rnt_tsc_scale is also a 32-bit constant: | |
114 | * | |
115 | * rnt_tsc_scale = (10e9 * 2**32) / (tscFreq << rnt_shift); | |
116 | * | |
117 | */ | |
118 | .globl _mach_absolute_time | |
119 | _mach_absolute_time: | |
120 | pushq %rbp // set up a frame for backtraces | |
121 | movq %rsp,%rbp | |
122 | movq $(_COMM_PAGE_TIME_DATA_START),%rsi | |
123 | 1: | |
124 | movl _NT_GENERATION(%rsi),%r8d // get generation | |
125 | testl %r8d,%r8d // if 0, data is being changed... | |
126 | jz 1b // ...so loop until stable | |
127 | lfence | |
128 | rdtsc // edx:eax := tsc | |
129 | lfence | |
130 | shlq $32,%rdx // rax := ((edx << 32) | eax), ie 64-bit tsc | |
131 | orq %rdx,%rax | |
132 | ||
133 | /* | |
134 | * Prior to supporting "slow" processors, xnu always set _NT_SHIFT to 32. | |
135 | * Now it defaults to 0, unless the processor is slow. In order to maintain | |
136 | * compatibility with both old and new versions of xnu, we mask the shift | |
137 | * down to 0x1F, which maps the old default (32) into the new default (0). | |
138 | */ | |
139 | movl _NT_SHIFT(%rsi),%ecx | |
140 | andl $0x1F,%ecx // *** remove this line once 10.9 is GM *** | |
141 | subq _NT_TSC_BASE(%rsi), %rax // rax := (tsc - base_tsc) | |
142 | shlq %cl,%rax // rax := (tsc - base_tsc) << NT_SHIFT | |
143 | movl _NT_SCALE(%rsi),%ecx | |
144 | mulq %rcx // rdx:rax := ((tsc - base_tsc)<<shift) * scale | |
145 | shrdq $32,%rdx,%rax // divide by 2**32 | |
146 | addq _NT_NS_BASE(%rsi),%rax // (((tsc - base_tsc) * scale) >> 32) + ns_base | |
147 | ||
148 | cmpl _NT_GENERATION(%rsi),%r8d // did the data change during computation? | |
149 | jne 1b | |
150 | popq %rbp | |
151 | ret | |
152 | ||
5ba3f43e A |
153 | #elif defined(__arm__) |
154 | ||
155 | #include <mach/arm/syscall_sw.h> | |
156 | ||
157 | /* | |
158 | * If userspace access to the timebase is supported (indicated through the commpage), | |
159 | * directly reads the timebase and uses it and the current timebase offset (also in | |
160 | * the commpage, and updated whenever the system wakes from sleep) to construct the | |
161 | * current time value; otherwise, traps to the kernel to handle this. | |
162 | * | |
163 | * If we do this in user mode, there are two cases where we may need to redrive the | |
164 | * read. We do 3 reads (high-low-high) to the timebase, because we only have a | |
165 | * 32-bit interface to it (despite the use of mrrc). If the high bits change, we | |
166 | * need to reread the register (as our returned value could otherwise be off by | |
167 | * 2^32 mach absolute time units). | |
168 | * | |
169 | * We do two reads of the offset, before and after the register reads. If the offset | |
170 | * changes, we have gone to sleep in the midst of doing a read. This case should be | |
171 | * exceedingly rare, but could result in a terribly inaccurate result, so we need | |
172 | * to get a fresh timebase value. | |
173 | */ | |
174 | .text | |
175 | .align 2 | |
176 | .globl _mach_absolute_time | |
177 | _mach_absolute_time: | |
178 | movw ip, #((_COMM_PAGE_TIMEBASE_OFFSET) & 0x0000FFFF) | |
179 | movt ip, #(((_COMM_PAGE_TIMEBASE_OFFSET) >> 16) & 0x0000FFFF) | |
180 | ldrb r0, [ip, #((_COMM_PAGE_USER_TIMEBASE) - (_COMM_PAGE_TIMEBASE_OFFSET))] | |
181 | cmp r0, #0 // Are userspace reads supported? | |
182 | beq _mach_absolute_time_kernel // If not, go to the kernel | |
183 | isb // Prevent speculation on CNTPCT across calls | |
184 | // (see ARMV7C.b section B8.1.2, ARMv8 section D6.1.2) | |
185 | push {r4, r5, r7, lr} // Push a frame | |
186 | add r7, sp, #8 | |
187 | L_mach_absolute_time_user: | |
188 | ldr r4, [ip] // Load offset low bits | |
189 | ldr r5, [ip, #4] // Load offset high bits | |
190 | mrrc p15, 0, r3, r1, c14 // Read timebase high to r1 | |
191 | mrrc p15, 0, r0, r3, c14 // Read timebase low to r0 | |
192 | mrrc p15, 0, r3, r2, c14 // Read timebase high to r2 | |
193 | cmp r1, r2 // Did the high bits change? | |
194 | bne L_mach_absolute_time_user // Loop if timebase high changed | |
195 | ldr r2, [ip] // Load offset low bits | |
196 | ldr r3, [ip, #4] // Load offset high bits | |
197 | eor r4, r2 // Compare our offset values... | |
198 | eor r5, r3 | |
199 | orrs r5, r4 | |
200 | bne L_mach_absolute_time_user // If they changed, try again | |
201 | adds r0, r0, r2 // Construct mach_absolute_time | |
202 | adcs r1, r1, r3 | |
203 | pop {r4, r5, r7, pc} // Pop the frame | |
204 | ||
205 | .text | |
206 | .align 2 | |
207 | .globl _mach_absolute_time_kernel | |
208 | _mach_absolute_time_kernel: | |
209 | mov r12, #-3 // Load the magic MAT number | |
210 | swi #SWI_SYSCALL | |
211 | bx lr | |
212 | ||
213 | .text | |
214 | .align 2 | |
215 | .globl _mach_continuous_time_kernel | |
216 | _mach_continuous_time_kernel: | |
217 | mov r12, #-4 // Load the magic MCT number | |
218 | swi #SWI_SYSCALL | |
219 | bx lr | |
220 | ||
221 | #elif defined(__arm64__) | |
222 | ||
223 | #include <mach/arm/syscall_sw.h> | |
224 | ||
225 | /* | |
226 | * If userspace access to the timebase is supported (indicated through the commpage), | |
227 | * directly reads the timebase and uses it and the current timebase offset (also in | |
228 | * the commpage, and updated whenever the system wakes from sleep) to construct the | |
229 | * current time value; otherwise, traps to the kernel to handle this. | |
230 | * | |
231 | * If we do this in user mode, we do two reads of the offset, before and after we | |
232 | * read the register. If the offset changes, we have gone to sleep in the midst of | |
233 | * doing a read. This case should be exceedingly rare, but could result in a terribly | |
234 | * inaccurate result, so we need to get a fresh timebase value. | |
235 | * | |
236 | * Note that the commpage address construction expects our top 2 bytes to be 0xFFFF. | |
237 | * If this changes (i.e, we significantly relocate the commpage), this logic will need | |
238 | * to change as well (use 4 movk instructions rather than cheating with the movn). | |
239 | */ | |
240 | .text | |
241 | .align 2 | |
242 | .globl _mach_absolute_time | |
243 | _mach_absolute_time: | |
244 | movn x3, #(~((_COMM_PAGE_TIMEBASE_OFFSET) >> 32) & 0x000000000000FFFF), lsl #32 | |
245 | movk x3, #(((_COMM_PAGE_TIMEBASE_OFFSET) >> 16) & 0x000000000000FFFF), lsl #16 | |
246 | movk x3, #((_COMM_PAGE_TIMEBASE_OFFSET) & 0x000000000000FFFF) | |
247 | ldrb w2, [x3, #((_COMM_PAGE_USER_TIMEBASE) - (_COMM_PAGE_TIMEBASE_OFFSET))] | |
248 | cmp x2, #0 // Are userspace reads supported? | |
249 | b.eq _mach_absolute_time_kernel // If not, go to the kernel | |
250 | isb // Prevent speculation on CNTPCT across calls | |
251 | // (see ARMV7C.b section B8.1.2, ARMv8 section D6.1.2) | |
252 | L_mach_absolute_time_user: | |
253 | ldr x1, [x3] // Load the offset | |
254 | mrs x0, CNTPCT_EL0 // Read the timebase | |
255 | ldr x2, [x3] // Load the offset | |
256 | cmp x1, x2 // Compare our offset values... | |
257 | b.ne L_mach_absolute_time_user // If they changed, try again | |
258 | add x0, x0, x1 // Construct mach_absolute_time | |
259 | ret | |
260 | ||
261 | .text | |
262 | .align 2 | |
263 | .globl _mach_absolute_time_kernel | |
264 | _mach_absolute_time_kernel: | |
265 | mov w16, #-3 // Load the magic MAT number | |
266 | svc #SWI_SYSCALL | |
267 | ret | |
268 | ||
269 | .text | |
270 | .align 2 | |
271 | .globl _mach_continuous_time_kernel | |
272 | _mach_continuous_time_kernel: | |
273 | mov w16, #-4 // Load the magic MCT number | |
274 | svc #SWI_SYSCALL | |
275 | ret | |
276 | ||
39236c6e A |
277 | #else |
278 | #error Unsupported architecture | |
279 | #endif |