]>
Commit | Line | Data |
---|---|---|
0a7de745 A |
1 | /* |
2 | * Copyright (c) 2018 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | #define ATOMIC_PRIVATE 1 | |
29 | #define LOCK_PRIVATE 1 | |
30 | ||
31 | #include <stdint.h> | |
32 | #include <kern/thread.h> | |
33 | #include <machine/atomic.h> | |
34 | #include <kern/locks.h> | |
35 | #include <machine/machine_cpu.h> | |
36 | ||
37 | #if defined(__x86_64__) | |
38 | #include <i386/mp.h> | |
39 | extern uint64_t LockTimeOutTSC; | |
40 | #define TICKET_LOCK_PANIC_TIMEOUT LockTimeOutTSC | |
41 | #endif | |
42 | ||
43 | #if defined(__arm__) || defined(__arm64__) | |
44 | extern uint64_t TLockTimeOut; | |
45 | #define TICKET_LOCK_PANIC_TIMEOUT TLockTimeOut | |
46 | #endif | |
47 | /* "Ticket": A FIFO spinlock with constant backoff | |
48 | * cf. Algorithms for Scalable Synchronization on Shared-Memory Multiprocessors | |
49 | * by Mellor-Crumney and Scott, 1991 | |
50 | */ | |
51 | ||
52 | /* TODO: proportional back-off based on desired-current ticket distance | |
53 | * This has the potential to considerably reduce snoop traffic | |
54 | * but must be tuned carefully | |
55 | * TODO: UP implementation. | |
56 | * Currently only used within the scheduler, where it is acquired with | |
57 | * interrupts masked, and consequently doesn't require a uniprocessor | |
58 | * implementation. | |
59 | * TODO: Evaluate a bias towards the performant clusters on | |
60 | * asymmetric efficient/performant multi-cluster systems, while | |
61 | * retaining the starvation-free property. A small intra-cluster bias may | |
62 | * be profitable for overall throughput | |
63 | */ | |
64 | ||
65 | void | |
66 | lck_ticket_init(lck_ticket_t *tlock) | |
67 | { | |
68 | memset(tlock, 0, sizeof(*tlock)); | |
69 | /* Current ticket size limit--tickets can be trivially expanded | |
70 | * to 16-bits if needed | |
71 | */ | |
72 | static_assert(MAX_CPUS < 256); | |
73 | ||
74 | __assert_only lck_ticket_internal *tlocki = &tlock->tu; | |
75 | /* Verify alignment */ | |
76 | __assert_only uintptr_t tcn = (uintptr_t) &tlocki->tcurnext; | |
77 | __assert_only uintptr_t tc = (uintptr_t) &tlocki->cticket; | |
78 | __assert_only uintptr_t tn = (uintptr_t) &tlocki->nticket; | |
79 | ||
80 | assert(((tcn & 3) == 0) && (tcn == tc) && (tn == (tc + 1))); | |
81 | } | |
82 | ||
83 | static void | |
84 | tlock_mark_owned(lck_ticket_t *tlock, thread_t cthread) | |
85 | { | |
86 | assert(tlock->lck_owner == 0); | |
87 | /* There is a small pre-emption disabled window (also interrupts masked | |
88 | * for the pset lock) between the acquisition of the lock and the | |
89 | * population of the advisory 'owner' thread field | |
90 | * On architectures with a DCAS (ARM v8.1 or x86), conceivably we could | |
91 | * populate the next ticket and the thread atomically, with | |
92 | * possible overhead, potential loss of micro-architectural fwd progress | |
93 | * properties of an unconditional fetch-add, and a 16 byte alignment requirement. | |
94 | */ | |
95 | __c11_atomic_store((_Atomic thread_t *)&tlock->lck_owner, cthread, __ATOMIC_RELAXED); | |
96 | } | |
97 | ||
cb323159 A |
98 | #if __arm__ || __arm64__ |
99 | __unused static uint8_t | |
100 | load_exclusive_acquire8(uint8_t *target) | |
101 | { | |
102 | uint8_t value; | |
103 | #if __arm__ | |
104 | value = __builtin_arm_ldrex(target); | |
105 | __c11_atomic_thread_fence(__ATOMIC_ACQUIRE); | |
106 | #else | |
107 | value = __builtin_arm_ldaex(target); // ldaxr | |
108 | /* "Compiler barrier", no barrier instructions are emitted */ | |
109 | atomic_signal_fence(memory_order_acquire); | |
110 | #endif | |
111 | return value; | |
112 | } | |
113 | #endif | |
114 | ||
0a7de745 A |
115 | /* On contention, poll for ownership |
116 | * Returns when the current ticket is observed equal to "mt" | |
117 | */ | |
118 | static void __attribute__((noinline)) | |
119 | tlock_contended(uint8_t *tp, uint8_t mt, lck_ticket_t *tlock, thread_t cthread) | |
120 | { | |
121 | uint8_t cticket; | |
122 | uint64_t etime = 0, ctime = 0, stime = 0; | |
123 | ||
124 | assertf(tlock->lck_owner != (uintptr_t) cthread, "Recursive ticket lock, owner: %p, current thread: %p", (void *) tlock->lck_owner, (void *) cthread); | |
125 | ||
126 | for (;;) { | |
127 | for (int i = 0; i < LOCK_SNOOP_SPINS; i++) { | |
128 | #if (__ARM_ENABLE_WFE_) | |
129 | if ((cticket = load_exclusive_acquire8(tp)) != mt) { | |
130 | wait_for_event(); | |
131 | } else { | |
132 | /* Some micro-architectures may benefit | |
133 | * from disarming the monitor. | |
134 | * TODO: determine specific micro-architectures | |
135 | * which benefit, modern CPUs may not | |
136 | */ | |
cb323159 | 137 | os_atomic_clear_exclusive(); |
0a7de745 A |
138 | tlock_mark_owned(tlock, cthread); |
139 | return; | |
140 | } | |
141 | #else /* !WFE */ | |
142 | #if defined(__x86_64__) | |
143 | __builtin_ia32_pause(); | |
144 | #endif /* x64 */ | |
145 | if ((cticket = __c11_atomic_load((_Atomic uint8_t *) tp, __ATOMIC_SEQ_CST)) == mt) { | |
146 | tlock_mark_owned(tlock, cthread); | |
147 | return; | |
148 | } | |
149 | #endif /* !WFE */ | |
150 | } | |
151 | ||
152 | if (etime == 0) { | |
153 | stime = ml_get_timebase(); | |
154 | etime = stime + TICKET_LOCK_PANIC_TIMEOUT; | |
155 | } else if ((ctime = ml_get_timebase()) >= etime) { | |
156 | break; | |
157 | } | |
158 | } | |
159 | #if defined (__x86_64__) | |
160 | uintptr_t lowner = tlock->lck_owner; | |
161 | uint32_t ocpu = spinlock_timeout_NMI(lowner); | |
162 | panic("Ticket spinlock timeout; start: 0x%llx, end: 0x%llx, current: 0x%llx, lock: %p, *lock: 0x%x, waiting for 0x%x, pre-NMI owner: %p, current owner: %p, owner CPU: 0x%x", stime, etime, ctime, tp, *tp, mt, (void *) lowner, (void *) tlock->lck_owner, ocpu); | |
163 | #else | |
164 | panic("Ticket spinlock timeout; start: 0x%llx, end: 0x%llx, current: 0x%llx, lock: %p, *lock: 0x%x, waiting for 0x%x, owner: %p", stime, etime, ctime, tp, *tp, mt, (void *) tlock->lck_owner); | |
165 | #endif | |
166 | } | |
167 | ||
168 | void | |
169 | lck_ticket_lock(lck_ticket_t *tlock) | |
170 | { | |
171 | lck_ticket_internal *tlocki = &tlock->tu; | |
172 | thread_t cthread = current_thread(); | |
173 | lck_ticket_internal tlocka; | |
174 | ||
175 | disable_preemption_for_thread(cthread); | |
176 | /* Atomically load both the current and next ticket, and increment the | |
177 | * latter. Wrap of the ticket field is OK as long as the total | |
178 | * number of contending CPUs is < maximum ticket | |
179 | */ | |
180 | tlocka.tcurnext = __c11_atomic_fetch_add((_Atomic uint16_t *)&tlocki->tcurnext, 1U << 8, __ATOMIC_ACQUIRE); | |
181 | ||
182 | /* Contention? branch to out of line contended block */ | |
183 | if (__improbable(tlocka.cticket != tlocka.nticket)) { | |
184 | return tlock_contended(&tlocki->cticket, tlocka.nticket, tlock, cthread); | |
185 | } | |
186 | ||
187 | tlock_mark_owned(tlock, cthread); | |
188 | } | |
189 | ||
190 | void | |
191 | lck_ticket_unlock(lck_ticket_t *tlock) | |
192 | { | |
193 | lck_ticket_internal *tlocki = &tlock->tu; | |
194 | ||
195 | assertf(tlock->lck_owner == (uintptr_t) current_thread(), "Ticket unlock non-owned, owner: %p", (void *) tlock->lck_owner); | |
196 | ||
197 | __c11_atomic_store((_Atomic uintptr_t *)&tlock->lck_owner, 0, __ATOMIC_RELAXED); | |
198 | ||
199 | #if defined(__x86_64__) | |
200 | /* Communicate desired release semantics to the compiler */ | |
201 | __c11_atomic_thread_fence(__ATOMIC_RELEASE); | |
202 | /* '+ constraint indicates a read modify write */ | |
203 | /* I have not yet located a c11 primitive which synthesizes an 'INC <MEM>', | |
204 | * i.e. a specified-granule non-atomic memory read-modify-write. | |
205 | */ | |
206 | __asm__ volatile ("incb %0" : "+m"(tlocki->cticket) :: "cc"); | |
207 | #else /* !x86_64 */ | |
208 | uint8_t cticket = __c11_atomic_load((_Atomic uint8_t *) &tlocki->cticket, __ATOMIC_RELAXED); | |
209 | cticket++; | |
210 | __c11_atomic_store((_Atomic uint8_t *) &tlocki->cticket, cticket, __ATOMIC_RELEASE); | |
211 | #if __arm__ | |
212 | set_event(); | |
213 | #endif // __arm__ | |
214 | #endif /* !x86_64 */ | |
215 | enable_preemption(); | |
216 | } | |
217 | ||
218 | void | |
219 | lck_ticket_assert_owned(__assert_only lck_ticket_t *tlock) | |
220 | { | |
221 | assertf(__c11_atomic_load((_Atomic thread_t *)&tlock->lck_owner, __ATOMIC_RELAXED) == current_thread(), "lck_ticket_assert_owned: owner %p, current: %p", (void *) tlock->lck_owner, current_thread()); | |
222 | } |