]>
Commit | Line | Data |
---|---|---|
0a7de745 A |
1 | /* |
2 | * Copyright (c) 2018 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | #define ATOMIC_PRIVATE 1 | |
29 | #define LOCK_PRIVATE 1 | |
30 | ||
31 | #include <stdint.h> | |
32 | #include <kern/thread.h> | |
33 | #include <machine/atomic.h> | |
34 | #include <kern/locks.h> | |
f427ee49 | 35 | #include <kern/lock_stat.h> |
0a7de745 A |
36 | #include <machine/machine_cpu.h> |
37 | ||
38 | #if defined(__x86_64__) | |
39 | #include <i386/mp.h> | |
40 | extern uint64_t LockTimeOutTSC; | |
41 | #define TICKET_LOCK_PANIC_TIMEOUT LockTimeOutTSC | |
42 | #endif | |
43 | ||
44 | #if defined(__arm__) || defined(__arm64__) | |
45 | extern uint64_t TLockTimeOut; | |
46 | #define TICKET_LOCK_PANIC_TIMEOUT TLockTimeOut | |
47 | #endif | |
48 | /* "Ticket": A FIFO spinlock with constant backoff | |
49 | * cf. Algorithms for Scalable Synchronization on Shared-Memory Multiprocessors | |
50 | * by Mellor-Crumney and Scott, 1991 | |
51 | */ | |
52 | ||
53 | /* TODO: proportional back-off based on desired-current ticket distance | |
54 | * This has the potential to considerably reduce snoop traffic | |
55 | * but must be tuned carefully | |
56 | * TODO: UP implementation. | |
57 | * Currently only used within the scheduler, where it is acquired with | |
58 | * interrupts masked, and consequently doesn't require a uniprocessor | |
59 | * implementation. | |
60 | * TODO: Evaluate a bias towards the performant clusters on | |
61 | * asymmetric efficient/performant multi-cluster systems, while | |
62 | * retaining the starvation-free property. A small intra-cluster bias may | |
63 | * be profitable for overall throughput | |
64 | */ | |
65 | ||
66 | void | |
f427ee49 | 67 | lck_ticket_init(lck_ticket_t *tlock, lck_grp_t *grp) |
0a7de745 A |
68 | { |
69 | memset(tlock, 0, sizeof(*tlock)); | |
70 | /* Current ticket size limit--tickets can be trivially expanded | |
71 | * to 16-bits if needed | |
72 | */ | |
73 | static_assert(MAX_CPUS < 256); | |
74 | ||
75 | __assert_only lck_ticket_internal *tlocki = &tlock->tu; | |
76 | /* Verify alignment */ | |
77 | __assert_only uintptr_t tcn = (uintptr_t) &tlocki->tcurnext; | |
78 | __assert_only uintptr_t tc = (uintptr_t) &tlocki->cticket; | |
79 | __assert_only uintptr_t tn = (uintptr_t) &tlocki->nticket; | |
80 | ||
81 | assert(((tcn & 3) == 0) && (tcn == tc) && (tn == (tc + 1))); | |
f427ee49 A |
82 | |
83 | if (grp) { | |
84 | lck_grp_reference(grp); | |
85 | lck_grp_lckcnt_incr(grp, LCK_TYPE_TICKET); | |
86 | } | |
0a7de745 A |
87 | } |
88 | ||
89 | static void | |
90 | tlock_mark_owned(lck_ticket_t *tlock, thread_t cthread) | |
91 | { | |
92 | assert(tlock->lck_owner == 0); | |
93 | /* There is a small pre-emption disabled window (also interrupts masked | |
94 | * for the pset lock) between the acquisition of the lock and the | |
95 | * population of the advisory 'owner' thread field | |
96 | * On architectures with a DCAS (ARM v8.1 or x86), conceivably we could | |
97 | * populate the next ticket and the thread atomically, with | |
98 | * possible overhead, potential loss of micro-architectural fwd progress | |
99 | * properties of an unconditional fetch-add, and a 16 byte alignment requirement. | |
100 | */ | |
101 | __c11_atomic_store((_Atomic thread_t *)&tlock->lck_owner, cthread, __ATOMIC_RELAXED); | |
102 | } | |
103 | ||
cb323159 A |
104 | #if __arm__ || __arm64__ |
105 | __unused static uint8_t | |
106 | load_exclusive_acquire8(uint8_t *target) | |
107 | { | |
108 | uint8_t value; | |
109 | #if __arm__ | |
110 | value = __builtin_arm_ldrex(target); | |
111 | __c11_atomic_thread_fence(__ATOMIC_ACQUIRE); | |
112 | #else | |
113 | value = __builtin_arm_ldaex(target); // ldaxr | |
114 | /* "Compiler barrier", no barrier instructions are emitted */ | |
115 | atomic_signal_fence(memory_order_acquire); | |
116 | #endif | |
117 | return value; | |
118 | } | |
119 | #endif | |
120 | ||
0a7de745 A |
121 | /* On contention, poll for ownership |
122 | * Returns when the current ticket is observed equal to "mt" | |
123 | */ | |
124 | static void __attribute__((noinline)) | |
f427ee49 | 125 | tlock_contended(uint8_t *tp, uint8_t mt, lck_ticket_t *tlock, thread_t cthread LCK_GRP_ARG(lck_grp_t *grp)) |
0a7de745 A |
126 | { |
127 | uint8_t cticket; | |
128 | uint64_t etime = 0, ctime = 0, stime = 0; | |
129 | ||
f427ee49 A |
130 | #if CONFIG_DTRACE || LOCK_STATS |
131 | uint64_t begin = 0; | |
132 | boolean_t stat_enabled = lck_grp_ticket_spin_enabled(tlock LCK_GRP_ARG(grp)); | |
133 | ||
134 | if (__improbable(stat_enabled)) { | |
135 | begin = mach_absolute_time(); | |
136 | } | |
137 | #endif /* CONFIG_DTRACE || LOCK_STATS */ | |
138 | ||
0a7de745 A |
139 | assertf(tlock->lck_owner != (uintptr_t) cthread, "Recursive ticket lock, owner: %p, current thread: %p", (void *) tlock->lck_owner, (void *) cthread); |
140 | ||
141 | for (;;) { | |
142 | for (int i = 0; i < LOCK_SNOOP_SPINS; i++) { | |
143 | #if (__ARM_ENABLE_WFE_) | |
144 | if ((cticket = load_exclusive_acquire8(tp)) != mt) { | |
145 | wait_for_event(); | |
146 | } else { | |
147 | /* Some micro-architectures may benefit | |
148 | * from disarming the monitor. | |
149 | * TODO: determine specific micro-architectures | |
150 | * which benefit, modern CPUs may not | |
151 | */ | |
cb323159 | 152 | os_atomic_clear_exclusive(); |
0a7de745 | 153 | tlock_mark_owned(tlock, cthread); |
f427ee49 A |
154 | #if CONFIG_DTRACE || LOCK_STATS |
155 | lck_grp_ticket_update_miss(tlock LCK_GRP_ARG(grp)); | |
156 | if (__improbable(stat_enabled)) { | |
157 | lck_grp_ticket_update_spin(tlock LCK_GRP_ARG(grp), mach_absolute_time() - begin); | |
158 | } | |
159 | #endif /* CONFIG_DTRACE || LOCK_STATS */ | |
0a7de745 A |
160 | return; |
161 | } | |
162 | #else /* !WFE */ | |
163 | #if defined(__x86_64__) | |
164 | __builtin_ia32_pause(); | |
165 | #endif /* x64 */ | |
166 | if ((cticket = __c11_atomic_load((_Atomic uint8_t *) tp, __ATOMIC_SEQ_CST)) == mt) { | |
167 | tlock_mark_owned(tlock, cthread); | |
f427ee49 A |
168 | #if CONFIG_DTRACE || LOCK_STATS |
169 | if (__improbable(stat_enabled)) { | |
170 | lck_grp_ticket_update_spin(tlock LCK_GRP_ARG(grp), mach_absolute_time() - begin); | |
171 | } | |
172 | lck_grp_ticket_update_miss(tlock LCK_GRP_ARG(grp)); | |
173 | #endif /* CONFIG_DTRACE || LOCK_STATS */ | |
0a7de745 A |
174 | return; |
175 | } | |
176 | #endif /* !WFE */ | |
177 | } | |
178 | ||
179 | if (etime == 0) { | |
180 | stime = ml_get_timebase(); | |
181 | etime = stime + TICKET_LOCK_PANIC_TIMEOUT; | |
182 | } else if ((ctime = ml_get_timebase()) >= etime) { | |
183 | break; | |
184 | } | |
185 | } | |
186 | #if defined (__x86_64__) | |
187 | uintptr_t lowner = tlock->lck_owner; | |
188 | uint32_t ocpu = spinlock_timeout_NMI(lowner); | |
189 | panic("Ticket spinlock timeout; start: 0x%llx, end: 0x%llx, current: 0x%llx, lock: %p, *lock: 0x%x, waiting for 0x%x, pre-NMI owner: %p, current owner: %p, owner CPU: 0x%x", stime, etime, ctime, tp, *tp, mt, (void *) lowner, (void *) tlock->lck_owner, ocpu); | |
190 | #else | |
191 | panic("Ticket spinlock timeout; start: 0x%llx, end: 0x%llx, current: 0x%llx, lock: %p, *lock: 0x%x, waiting for 0x%x, owner: %p", stime, etime, ctime, tp, *tp, mt, (void *) tlock->lck_owner); | |
192 | #endif | |
193 | } | |
194 | ||
195 | void | |
f427ee49 | 196 | (lck_ticket_lock)(lck_ticket_t * tlock LCK_GRP_ARG(lck_grp_t *grp)) |
0a7de745 A |
197 | { |
198 | lck_ticket_internal *tlocki = &tlock->tu; | |
199 | thread_t cthread = current_thread(); | |
200 | lck_ticket_internal tlocka; | |
201 | ||
202 | disable_preemption_for_thread(cthread); | |
203 | /* Atomically load both the current and next ticket, and increment the | |
204 | * latter. Wrap of the ticket field is OK as long as the total | |
205 | * number of contending CPUs is < maximum ticket | |
206 | */ | |
207 | tlocka.tcurnext = __c11_atomic_fetch_add((_Atomic uint16_t *)&tlocki->tcurnext, 1U << 8, __ATOMIC_ACQUIRE); | |
208 | ||
209 | /* Contention? branch to out of line contended block */ | |
210 | if (__improbable(tlocka.cticket != tlocka.nticket)) { | |
f427ee49 | 211 | return tlock_contended(&tlocki->cticket, tlocka.nticket, tlock, cthread LCK_GRP_ARG(grp)); |
0a7de745 A |
212 | } |
213 | ||
214 | tlock_mark_owned(tlock, cthread); | |
f427ee49 A |
215 | |
216 | lck_grp_ticket_update_held(tlock LCK_GRP_ARG(grp)); | |
0a7de745 A |
217 | } |
218 | ||
219 | void | |
220 | lck_ticket_unlock(lck_ticket_t *tlock) | |
221 | { | |
222 | lck_ticket_internal *tlocki = &tlock->tu; | |
223 | ||
224 | assertf(tlock->lck_owner == (uintptr_t) current_thread(), "Ticket unlock non-owned, owner: %p", (void *) tlock->lck_owner); | |
225 | ||
226 | __c11_atomic_store((_Atomic uintptr_t *)&tlock->lck_owner, 0, __ATOMIC_RELAXED); | |
227 | ||
228 | #if defined(__x86_64__) | |
229 | /* Communicate desired release semantics to the compiler */ | |
230 | __c11_atomic_thread_fence(__ATOMIC_RELEASE); | |
231 | /* '+ constraint indicates a read modify write */ | |
232 | /* I have not yet located a c11 primitive which synthesizes an 'INC <MEM>', | |
233 | * i.e. a specified-granule non-atomic memory read-modify-write. | |
234 | */ | |
235 | __asm__ volatile ("incb %0" : "+m"(tlocki->cticket) :: "cc"); | |
236 | #else /* !x86_64 */ | |
237 | uint8_t cticket = __c11_atomic_load((_Atomic uint8_t *) &tlocki->cticket, __ATOMIC_RELAXED); | |
238 | cticket++; | |
239 | __c11_atomic_store((_Atomic uint8_t *) &tlocki->cticket, cticket, __ATOMIC_RELEASE); | |
240 | #if __arm__ | |
241 | set_event(); | |
242 | #endif // __arm__ | |
243 | #endif /* !x86_64 */ | |
f427ee49 A |
244 | #if CONFIG_DTRACE |
245 | LOCKSTAT_RECORD(LS_LCK_TICKET_LOCK_RELEASE, tlock); | |
246 | #endif /* CONFIG_DTRACE */ | |
0a7de745 A |
247 | enable_preemption(); |
248 | } | |
249 | ||
250 | void | |
251 | lck_ticket_assert_owned(__assert_only lck_ticket_t *tlock) | |
252 | { | |
253 | assertf(__c11_atomic_load((_Atomic thread_t *)&tlock->lck_owner, __ATOMIC_RELAXED) == current_thread(), "lck_ticket_assert_owned: owner %p, current: %p", (void *) tlock->lck_owner, current_thread()); | |
254 | } |