]> git.saurik.com Git - apple/xnu.git/blame - osfmk/kern/tlock.c
xnu-7195.81.3.tar.gz
[apple/xnu.git] / osfmk / kern / tlock.c
CommitLineData
0a7de745
A
1/*
2 * Copyright (c) 2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#define ATOMIC_PRIVATE 1
29#define LOCK_PRIVATE 1
30
31#include <stdint.h>
32#include <kern/thread.h>
33#include <machine/atomic.h>
34#include <kern/locks.h>
f427ee49 35#include <kern/lock_stat.h>
0a7de745
A
36#include <machine/machine_cpu.h>
37
38#if defined(__x86_64__)
39#include <i386/mp.h>
40extern uint64_t LockTimeOutTSC;
41#define TICKET_LOCK_PANIC_TIMEOUT LockTimeOutTSC
42#endif
43
44#if defined(__arm__) || defined(__arm64__)
45extern uint64_t TLockTimeOut;
46#define TICKET_LOCK_PANIC_TIMEOUT TLockTimeOut
47#endif
48/* "Ticket": A FIFO spinlock with constant backoff
49 * cf. Algorithms for Scalable Synchronization on Shared-Memory Multiprocessors
50 * by Mellor-Crumney and Scott, 1991
51 */
52
53/* TODO: proportional back-off based on desired-current ticket distance
54 * This has the potential to considerably reduce snoop traffic
55 * but must be tuned carefully
56 * TODO: UP implementation.
57 * Currently only used within the scheduler, where it is acquired with
58 * interrupts masked, and consequently doesn't require a uniprocessor
59 * implementation.
60 * TODO: Evaluate a bias towards the performant clusters on
61 * asymmetric efficient/performant multi-cluster systems, while
62 * retaining the starvation-free property. A small intra-cluster bias may
63 * be profitable for overall throughput
64 */
65
66void
f427ee49 67lck_ticket_init(lck_ticket_t *tlock, lck_grp_t *grp)
0a7de745
A
68{
69 memset(tlock, 0, sizeof(*tlock));
70 /* Current ticket size limit--tickets can be trivially expanded
71 * to 16-bits if needed
72 */
73 static_assert(MAX_CPUS < 256);
74
75 __assert_only lck_ticket_internal *tlocki = &tlock->tu;
76 /* Verify alignment */
77 __assert_only uintptr_t tcn = (uintptr_t) &tlocki->tcurnext;
78 __assert_only uintptr_t tc = (uintptr_t) &tlocki->cticket;
79 __assert_only uintptr_t tn = (uintptr_t) &tlocki->nticket;
80
81 assert(((tcn & 3) == 0) && (tcn == tc) && (tn == (tc + 1)));
f427ee49
A
82
83 if (grp) {
84 lck_grp_reference(grp);
85 lck_grp_lckcnt_incr(grp, LCK_TYPE_TICKET);
86 }
0a7de745
A
87}
88
89static void
90tlock_mark_owned(lck_ticket_t *tlock, thread_t cthread)
91{
92 assert(tlock->lck_owner == 0);
93 /* There is a small pre-emption disabled window (also interrupts masked
94 * for the pset lock) between the acquisition of the lock and the
95 * population of the advisory 'owner' thread field
96 * On architectures with a DCAS (ARM v8.1 or x86), conceivably we could
97 * populate the next ticket and the thread atomically, with
98 * possible overhead, potential loss of micro-architectural fwd progress
99 * properties of an unconditional fetch-add, and a 16 byte alignment requirement.
100 */
101 __c11_atomic_store((_Atomic thread_t *)&tlock->lck_owner, cthread, __ATOMIC_RELAXED);
102}
103
cb323159
A
104#if __arm__ || __arm64__
105__unused static uint8_t
106load_exclusive_acquire8(uint8_t *target)
107{
108 uint8_t value;
109#if __arm__
110 value = __builtin_arm_ldrex(target);
111 __c11_atomic_thread_fence(__ATOMIC_ACQUIRE);
112#else
113 value = __builtin_arm_ldaex(target); // ldaxr
114 /* "Compiler barrier", no barrier instructions are emitted */
115 atomic_signal_fence(memory_order_acquire);
116#endif
117 return value;
118}
119#endif
120
0a7de745
A
121/* On contention, poll for ownership
122 * Returns when the current ticket is observed equal to "mt"
123 */
124static void __attribute__((noinline))
f427ee49 125tlock_contended(uint8_t *tp, uint8_t mt, lck_ticket_t *tlock, thread_t cthread LCK_GRP_ARG(lck_grp_t *grp))
0a7de745
A
126{
127 uint8_t cticket;
128 uint64_t etime = 0, ctime = 0, stime = 0;
129
f427ee49
A
130#if CONFIG_DTRACE || LOCK_STATS
131 uint64_t begin = 0;
132 boolean_t stat_enabled = lck_grp_ticket_spin_enabled(tlock LCK_GRP_ARG(grp));
133
134 if (__improbable(stat_enabled)) {
135 begin = mach_absolute_time();
136 }
137#endif /* CONFIG_DTRACE || LOCK_STATS */
138
0a7de745
A
139 assertf(tlock->lck_owner != (uintptr_t) cthread, "Recursive ticket lock, owner: %p, current thread: %p", (void *) tlock->lck_owner, (void *) cthread);
140
141 for (;;) {
142 for (int i = 0; i < LOCK_SNOOP_SPINS; i++) {
143#if (__ARM_ENABLE_WFE_)
144 if ((cticket = load_exclusive_acquire8(tp)) != mt) {
145 wait_for_event();
146 } else {
147 /* Some micro-architectures may benefit
148 * from disarming the monitor.
149 * TODO: determine specific micro-architectures
150 * which benefit, modern CPUs may not
151 */
cb323159 152 os_atomic_clear_exclusive();
0a7de745 153 tlock_mark_owned(tlock, cthread);
f427ee49
A
154#if CONFIG_DTRACE || LOCK_STATS
155 lck_grp_ticket_update_miss(tlock LCK_GRP_ARG(grp));
156 if (__improbable(stat_enabled)) {
157 lck_grp_ticket_update_spin(tlock LCK_GRP_ARG(grp), mach_absolute_time() - begin);
158 }
159#endif /* CONFIG_DTRACE || LOCK_STATS */
0a7de745
A
160 return;
161 }
162#else /* !WFE */
163#if defined(__x86_64__)
164 __builtin_ia32_pause();
165#endif /* x64 */
166 if ((cticket = __c11_atomic_load((_Atomic uint8_t *) tp, __ATOMIC_SEQ_CST)) == mt) {
167 tlock_mark_owned(tlock, cthread);
f427ee49
A
168#if CONFIG_DTRACE || LOCK_STATS
169 if (__improbable(stat_enabled)) {
170 lck_grp_ticket_update_spin(tlock LCK_GRP_ARG(grp), mach_absolute_time() - begin);
171 }
172 lck_grp_ticket_update_miss(tlock LCK_GRP_ARG(grp));
173#endif /* CONFIG_DTRACE || LOCK_STATS */
0a7de745
A
174 return;
175 }
176#endif /* !WFE */
177 }
178
179 if (etime == 0) {
180 stime = ml_get_timebase();
181 etime = stime + TICKET_LOCK_PANIC_TIMEOUT;
182 } else if ((ctime = ml_get_timebase()) >= etime) {
183 break;
184 }
185 }
186#if defined (__x86_64__)
187 uintptr_t lowner = tlock->lck_owner;
188 uint32_t ocpu = spinlock_timeout_NMI(lowner);
189 panic("Ticket spinlock timeout; start: 0x%llx, end: 0x%llx, current: 0x%llx, lock: %p, *lock: 0x%x, waiting for 0x%x, pre-NMI owner: %p, current owner: %p, owner CPU: 0x%x", stime, etime, ctime, tp, *tp, mt, (void *) lowner, (void *) tlock->lck_owner, ocpu);
190#else
191 panic("Ticket spinlock timeout; start: 0x%llx, end: 0x%llx, current: 0x%llx, lock: %p, *lock: 0x%x, waiting for 0x%x, owner: %p", stime, etime, ctime, tp, *tp, mt, (void *) tlock->lck_owner);
192#endif
193}
194
195void
f427ee49 196(lck_ticket_lock)(lck_ticket_t * tlock LCK_GRP_ARG(lck_grp_t *grp))
0a7de745
A
197{
198 lck_ticket_internal *tlocki = &tlock->tu;
199 thread_t cthread = current_thread();
200 lck_ticket_internal tlocka;
201
202 disable_preemption_for_thread(cthread);
203 /* Atomically load both the current and next ticket, and increment the
204 * latter. Wrap of the ticket field is OK as long as the total
205 * number of contending CPUs is < maximum ticket
206 */
207 tlocka.tcurnext = __c11_atomic_fetch_add((_Atomic uint16_t *)&tlocki->tcurnext, 1U << 8, __ATOMIC_ACQUIRE);
208
209 /* Contention? branch to out of line contended block */
210 if (__improbable(tlocka.cticket != tlocka.nticket)) {
f427ee49 211 return tlock_contended(&tlocki->cticket, tlocka.nticket, tlock, cthread LCK_GRP_ARG(grp));
0a7de745
A
212 }
213
214 tlock_mark_owned(tlock, cthread);
f427ee49
A
215
216 lck_grp_ticket_update_held(tlock LCK_GRP_ARG(grp));
0a7de745
A
217}
218
219void
220lck_ticket_unlock(lck_ticket_t *tlock)
221{
222 lck_ticket_internal *tlocki = &tlock->tu;
223
224 assertf(tlock->lck_owner == (uintptr_t) current_thread(), "Ticket unlock non-owned, owner: %p", (void *) tlock->lck_owner);
225
226 __c11_atomic_store((_Atomic uintptr_t *)&tlock->lck_owner, 0, __ATOMIC_RELAXED);
227
228#if defined(__x86_64__)
229 /* Communicate desired release semantics to the compiler */
230 __c11_atomic_thread_fence(__ATOMIC_RELEASE);
231 /* '+ constraint indicates a read modify write */
232 /* I have not yet located a c11 primitive which synthesizes an 'INC <MEM>',
233 * i.e. a specified-granule non-atomic memory read-modify-write.
234 */
235 __asm__ volatile ("incb %0" : "+m"(tlocki->cticket) :: "cc");
236#else /* !x86_64 */
237 uint8_t cticket = __c11_atomic_load((_Atomic uint8_t *) &tlocki->cticket, __ATOMIC_RELAXED);
238 cticket++;
239 __c11_atomic_store((_Atomic uint8_t *) &tlocki->cticket, cticket, __ATOMIC_RELEASE);
240#if __arm__
241 set_event();
242#endif // __arm__
243#endif /* !x86_64 */
f427ee49
A
244#if CONFIG_DTRACE
245 LOCKSTAT_RECORD(LS_LCK_TICKET_LOCK_RELEASE, tlock);
246#endif /* CONFIG_DTRACE */
0a7de745
A
247 enable_preemption();
248}
249
250void
251lck_ticket_assert_owned(__assert_only lck_ticket_t *tlock)
252{
253 assertf(__c11_atomic_load((_Atomic thread_t *)&tlock->lck_owner, __ATOMIC_RELAXED) == current_thread(), "lck_ticket_assert_owned: owner %p, current: %p", (void *) tlock->lck_owner, current_thread());
254}