2 * Copyright (c) 2018 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 #define ATOMIC_PRIVATE 1
29 #define LOCK_PRIVATE 1
32 #include <kern/thread.h>
33 #include <machine/atomic.h>
34 #include <kern/locks.h>
35 #include <kern/lock_stat.h>
36 #include <machine/machine_cpu.h>
38 #if defined(__x86_64__)
40 extern uint64_t LockTimeOutTSC
;
41 #define TICKET_LOCK_PANIC_TIMEOUT LockTimeOutTSC
44 #if defined(__arm__) || defined(__arm64__)
45 extern uint64_t TLockTimeOut
;
46 #define TICKET_LOCK_PANIC_TIMEOUT TLockTimeOut
48 /* "Ticket": A FIFO spinlock with constant backoff
49 * cf. Algorithms for Scalable Synchronization on Shared-Memory Multiprocessors
50 * by Mellor-Crumney and Scott, 1991
53 /* TODO: proportional back-off based on desired-current ticket distance
54 * This has the potential to considerably reduce snoop traffic
55 * but must be tuned carefully
56 * TODO: UP implementation.
57 * Currently only used within the scheduler, where it is acquired with
58 * interrupts masked, and consequently doesn't require a uniprocessor
60 * TODO: Evaluate a bias towards the performant clusters on
61 * asymmetric efficient/performant multi-cluster systems, while
62 * retaining the starvation-free property. A small intra-cluster bias may
63 * be profitable for overall throughput
67 lck_ticket_init(lck_ticket_t
*tlock
, lck_grp_t
*grp
)
69 memset(tlock
, 0, sizeof(*tlock
));
70 /* Current ticket size limit--tickets can be trivially expanded
71 * to 16-bits if needed
73 static_assert(MAX_CPUS
< 256);
75 __assert_only lck_ticket_internal
*tlocki
= &tlock
->tu
;
76 /* Verify alignment */
77 __assert_only
uintptr_t tcn
= (uintptr_t) &tlocki
->tcurnext
;
78 __assert_only
uintptr_t tc
= (uintptr_t) &tlocki
->cticket
;
79 __assert_only
uintptr_t tn
= (uintptr_t) &tlocki
->nticket
;
81 assert(((tcn
& 3) == 0) && (tcn
== tc
) && (tn
== (tc
+ 1)));
84 lck_grp_reference(grp
);
85 lck_grp_lckcnt_incr(grp
, LCK_TYPE_TICKET
);
90 tlock_mark_owned(lck_ticket_t
*tlock
, thread_t cthread
)
92 assert(tlock
->lck_owner
== 0);
93 /* There is a small pre-emption disabled window (also interrupts masked
94 * for the pset lock) between the acquisition of the lock and the
95 * population of the advisory 'owner' thread field
96 * On architectures with a DCAS (ARM v8.1 or x86), conceivably we could
97 * populate the next ticket and the thread atomically, with
98 * possible overhead, potential loss of micro-architectural fwd progress
99 * properties of an unconditional fetch-add, and a 16 byte alignment requirement.
101 __c11_atomic_store((_Atomic thread_t
*)&tlock
->lck_owner
, cthread
, __ATOMIC_RELAXED
);
104 #if __arm__ || __arm64__
105 __unused
static uint8_t
106 load_exclusive_acquire8(uint8_t *target
)
110 value
= __builtin_arm_ldrex(target
);
111 __c11_atomic_thread_fence(__ATOMIC_ACQUIRE
);
113 value
= __builtin_arm_ldaex(target
); // ldaxr
114 /* "Compiler barrier", no barrier instructions are emitted */
115 atomic_signal_fence(memory_order_acquire
);
121 /* On contention, poll for ownership
122 * Returns when the current ticket is observed equal to "mt"
124 static void __attribute__((noinline
))
125 tlock_contended(uint8_t *tp
, uint8_t mt
, lck_ticket_t
*tlock
, thread_t cthread
LCK_GRP_ARG(lck_grp_t
*grp
))
128 uint64_t etime
= 0, ctime
= 0, stime
= 0;
130 #if CONFIG_DTRACE || LOCK_STATS
132 boolean_t stat_enabled
= lck_grp_ticket_spin_enabled(tlock
LCK_GRP_ARG(grp
));
134 if (__improbable(stat_enabled
)) {
135 begin
= mach_absolute_time();
137 #endif /* CONFIG_DTRACE || LOCK_STATS */
139 assertf(tlock
->lck_owner
!= (uintptr_t) cthread
, "Recursive ticket lock, owner: %p, current thread: %p", (void *) tlock
->lck_owner
, (void *) cthread
);
142 for (int i
= 0; i
< LOCK_SNOOP_SPINS
; i
++) {
143 #if (__ARM_ENABLE_WFE_)
144 if ((cticket
= load_exclusive_acquire8(tp
)) != mt
) {
147 /* Some micro-architectures may benefit
148 * from disarming the monitor.
149 * TODO: determine specific micro-architectures
150 * which benefit, modern CPUs may not
152 os_atomic_clear_exclusive();
153 tlock_mark_owned(tlock
, cthread
);
154 #if CONFIG_DTRACE || LOCK_STATS
155 lck_grp_ticket_update_miss(tlock
LCK_GRP_ARG(grp
));
156 if (__improbable(stat_enabled
)) {
157 lck_grp_ticket_update_spin(tlock
LCK_GRP_ARG(grp
), mach_absolute_time() - begin
);
159 #endif /* CONFIG_DTRACE || LOCK_STATS */
163 #if defined(__x86_64__)
164 __builtin_ia32_pause();
166 if ((cticket
= __c11_atomic_load((_Atomic
uint8_t *) tp
, __ATOMIC_SEQ_CST
)) == mt
) {
167 tlock_mark_owned(tlock
, cthread
);
168 #if CONFIG_DTRACE || LOCK_STATS
169 if (__improbable(stat_enabled
)) {
170 lck_grp_ticket_update_spin(tlock
LCK_GRP_ARG(grp
), mach_absolute_time() - begin
);
172 lck_grp_ticket_update_miss(tlock
LCK_GRP_ARG(grp
));
173 #endif /* CONFIG_DTRACE || LOCK_STATS */
180 stime
= ml_get_timebase();
181 etime
= stime
+ TICKET_LOCK_PANIC_TIMEOUT
;
182 } else if ((ctime
= ml_get_timebase()) >= etime
) {
186 #if defined (__x86_64__)
187 uintptr_t lowner
= tlock
->lck_owner
;
188 uint32_t ocpu
= spinlock_timeout_NMI(lowner
);
189 panic("Ticket spinlock timeout; start: 0x%llx, end: 0x%llx, current: 0x%llx, lock: %p, *lock: 0x%x, waiting for 0x%x, pre-NMI owner: %p, current owner: %p, owner CPU: 0x%x", stime
, etime
, ctime
, tp
, *tp
, mt
, (void *) lowner
, (void *) tlock
->lck_owner
, ocpu
);
191 panic("Ticket spinlock timeout; start: 0x%llx, end: 0x%llx, current: 0x%llx, lock: %p, *lock: 0x%x, waiting for 0x%x, owner: %p", stime
, etime
, ctime
, tp
, *tp
, mt
, (void *) tlock
->lck_owner
);
196 (lck_ticket_lock
)(lck_ticket_t
* tlock
LCK_GRP_ARG(lck_grp_t
*grp
))
198 lck_ticket_internal
*tlocki
= &tlock
->tu
;
199 thread_t cthread
= current_thread();
200 lck_ticket_internal tlocka
;
202 disable_preemption_for_thread(cthread
);
203 /* Atomically load both the current and next ticket, and increment the
204 * latter. Wrap of the ticket field is OK as long as the total
205 * number of contending CPUs is < maximum ticket
207 tlocka
.tcurnext
= __c11_atomic_fetch_add((_Atomic
uint16_t *)&tlocki
->tcurnext
, 1U << 8, __ATOMIC_ACQUIRE
);
209 /* Contention? branch to out of line contended block */
210 if (__improbable(tlocka
.cticket
!= tlocka
.nticket
)) {
211 return tlock_contended(&tlocki
->cticket
, tlocka
.nticket
, tlock
, cthread
LCK_GRP_ARG(grp
));
214 tlock_mark_owned(tlock
, cthread
);
216 lck_grp_ticket_update_held(tlock
LCK_GRP_ARG(grp
));
220 lck_ticket_unlock(lck_ticket_t
*tlock
)
222 lck_ticket_internal
*tlocki
= &tlock
->tu
;
224 assertf(tlock
->lck_owner
== (uintptr_t) current_thread(), "Ticket unlock non-owned, owner: %p", (void *) tlock
->lck_owner
);
226 __c11_atomic_store((_Atomic
uintptr_t *)&tlock
->lck_owner
, 0, __ATOMIC_RELAXED
);
228 #if defined(__x86_64__)
229 /* Communicate desired release semantics to the compiler */
230 __c11_atomic_thread_fence(__ATOMIC_RELEASE
);
231 /* '+ constraint indicates a read modify write */
232 /* I have not yet located a c11 primitive which synthesizes an 'INC <MEM>',
233 * i.e. a specified-granule non-atomic memory read-modify-write.
235 __asm__
volatile ("incb %0" : "+m"(tlocki
->cticket
) :: "cc");
237 uint8_t cticket
= __c11_atomic_load((_Atomic
uint8_t *) &tlocki
->cticket
, __ATOMIC_RELAXED
);
239 __c11_atomic_store((_Atomic
uint8_t *) &tlocki
->cticket
, cticket
, __ATOMIC_RELEASE
);
245 LOCKSTAT_RECORD(LS_LCK_TICKET_LOCK_RELEASE
, tlock
);
246 #endif /* CONFIG_DTRACE */
251 lck_ticket_assert_owned(__assert_only lck_ticket_t
*tlock
)
253 assertf(__c11_atomic_load((_Atomic thread_t
*)&tlock
->lck_owner
, __ATOMIC_RELAXED
) == current_thread(), "lck_ticket_assert_owned: owner %p, current: %p", (void *) tlock
->lck_owner
, current_thread());