]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/kern/sys_ulock.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / kern / sys_ulock.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2015-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <machine/atomic.h>
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/ioctl.h>
34#include <sys/file_internal.h>
35#include <sys/proc_internal.h>
36#include <sys/kernel.h>
37#include <sys/guarded.h>
38#include <sys/stat.h>
39#include <sys/malloc.h>
40#include <sys/sysproto.h>
41#include <sys/pthread_shims.h>
42
43#include <mach/mach_types.h>
44
45#include <kern/cpu_data.h>
46#include <kern/mach_param.h>
47#include <kern/kern_types.h>
48#include <kern/assert.h>
49#include <kern/zalloc.h>
50#include <kern/thread.h>
51#include <kern/clock.h>
52#include <kern/ledger.h>
53#include <kern/policy_internal.h>
54#include <kern/task.h>
55#include <kern/telemetry.h>
56#include <kern/waitq.h>
57#include <kern/sched_prim.h>
58#include <kern/turnstile.h>
59#include <kern/zalloc.h>
60#include <kern/debug.h>
61
62#include <pexpert/pexpert.h>
63
64#define XNU_TEST_BITMAP
65#include <kern/bits.h>
66
67#include <os/hash.h>
68#include <sys/ulock.h>
69
70/*
71 * How ulock promotion works:
72 *
73 * There’s a requested policy field on every thread called ‘promotions’, which
74 * expresses which ulock promotions are happening to this thread.
75 * The promotion priority saturates until the promotion count goes to 0.
76 *
77 * We also track effective promotion qos, which is the qos before clamping.
78 * This value is used for promoting a thread that another thread is waiting on,
79 * so that the lock owner reinflates to the right priority after unclamping.
80 *
81 * This also works for non-QoS threads, which can donate base priority to QoS
82 * and non-QoS threads alike.
83 *
84 * ulock wait applies a promotion to the owner communicated through
85 * UL_UNFAIR_LOCK as waiters block, and that promotion is saturated as long as
86 * there is still an owner. In ulock wake, if the waker is still the owner,
87 * then it clears its ownership and drops the boost. It does NOT transfer
88 * ownership/priority boost to the new thread. Instead, it selects the
89 * waiting thread with the highest base priority to be woken next, and
90 * relies on that thread to carry the torch for the other waiting threads.
91 */
92
93static LCK_GRP_DECLARE(ull_lck_grp, "ulocks");
94
95typedef lck_spin_t ull_lock_t;
96#define ull_lock_init(ull) lck_spin_init(&ull->ull_lock, &ull_lck_grp, NULL)
97#define ull_lock_destroy(ull) lck_spin_destroy(&ull->ull_lock, &ull_lck_grp)
98#define ull_lock(ull) lck_spin_lock_grp(&ull->ull_lock, &ull_lck_grp)
99#define ull_unlock(ull) lck_spin_unlock(&ull->ull_lock)
100#define ull_assert_owned(ull) LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_OWNED)
101#define ull_assert_notwned(ull) LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_NOTOWNED)
102
103#define ULOCK_TO_EVENT(ull) ((event_t)ull)
104#define EVENT_TO_ULOCK(event) ((ull_t *)event)
105
106typedef enum {
107 ULK_INVALID = 0,
108 ULK_UADDR,
109 ULK_XPROC,
110} ulk_type;
111
112typedef struct {
113 union {
114 struct __attribute__((packed)) {
115 user_addr_t ulk_addr;
116 pid_t ulk_pid;
117 };
118 struct __attribute__((packed)) {
119 uint64_t ulk_object;
120 uint64_t ulk_offset;
121 };
122 };
123 ulk_type ulk_key_type;
124} ulk_t;
125
126#define ULK_UADDR_LEN (sizeof(user_addr_t) + sizeof(pid_t))
127#define ULK_XPROC_LEN (sizeof(uint64_t) + sizeof(uint64_t))
128
129inline static bool
130ull_key_match(ulk_t *a, ulk_t *b)
131{
132 if (a->ulk_key_type != b->ulk_key_type) {
133 return false;
134 }
135
136 if (a->ulk_key_type == ULK_UADDR) {
137 return (a->ulk_pid == b->ulk_pid) &&
138 (a->ulk_addr == b->ulk_addr);
139 }
140
141 assert(a->ulk_key_type == ULK_XPROC);
142 return (a->ulk_object == b->ulk_object) &&
143 (a->ulk_offset == b->ulk_offset);
144}
145
146typedef struct ull {
147 /*
148 * ull_owner is the most recent known value for the owner of this ulock
149 * i.e. it may be out of date WRT the real value in userspace.
150 */
151 thread_t ull_owner; /* holds +1 thread reference */
152 ulk_t ull_key;
153 ull_lock_t ull_lock;
154 uint ull_bucket_index;
155 int32_t ull_nwaiters;
156 int32_t ull_refcount;
157 uint8_t ull_opcode;
158 struct turnstile *ull_turnstile;
159 queue_chain_t ull_hash_link;
160} ull_t;
161
162extern void ulock_initialize(void);
163
164#define ULL_MUST_EXIST 0x0001
165static void ull_put(ull_t *);
166
167static uint32_t ulock_adaptive_spin_usecs = 20;
168
169SYSCTL_INT(_kern, OID_AUTO, ulock_adaptive_spin_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
170 &ulock_adaptive_spin_usecs, 0, "ulock adaptive spin duration");
171
172#if DEVELOPMENT || DEBUG
173static int ull_simulate_copyin_fault = 0;
174
175static void
176ull_dump(ull_t *ull)
177{
178 kprintf("ull\t%p\n", ull);
179 switch (ull->ull_key.ulk_key_type) {
180 case ULK_UADDR:
181 kprintf("ull_key.ulk_key_type\tULK_UADDR\n");
182 kprintf("ull_key.ulk_pid\t%d\n", ull->ull_key.ulk_pid);
183 kprintf("ull_key.ulk_addr\t%p\n", (void *)(ull->ull_key.ulk_addr));
184 break;
185 case ULK_XPROC:
186 kprintf("ull_key.ulk_key_type\tULK_XPROC\n");
187 kprintf("ull_key.ulk_object\t%p\n", (void *)(ull->ull_key.ulk_object));
188 kprintf("ull_key.ulk_offset\t%p\n", (void *)(ull->ull_key.ulk_offset));
189 break;
190 default:
191 kprintf("ull_key.ulk_key_type\tUNKNOWN %d\n", ull->ull_key.ulk_key_type);
192 break;
193 }
194 kprintf("ull_nwaiters\t%d\n", ull->ull_nwaiters);
195 kprintf("ull_refcount\t%d\n", ull->ull_refcount);
196 kprintf("ull_opcode\t%d\n\n", ull->ull_opcode);
197 kprintf("ull_owner\t0x%llx\n\n", thread_tid(ull->ull_owner));
198 kprintf("ull_turnstile\t%p\n\n", ull->ull_turnstile);
199}
200#endif
201
202typedef struct ull_bucket {
203 queue_head_t ulb_head;
204 lck_spin_t ulb_lock;
205} ull_bucket_t;
206
207static int ull_hash_buckets;
208static ull_bucket_t *ull_bucket;
209static uint32_t ull_nzalloc = 0;
210static ZONE_DECLARE(ull_zone, "ulocks", sizeof(ull_t), ZC_NOENCRYPT | ZC_CACHING);
211
212#define ull_bucket_lock(i) lck_spin_lock_grp(&ull_bucket[i].ulb_lock, &ull_lck_grp)
213#define ull_bucket_unlock(i) lck_spin_unlock(&ull_bucket[i].ulb_lock)
214
215static __inline__ uint32_t
216ull_hash_index(const void *key, size_t length)
217{
218 uint32_t hash = os_hash_jenkins(key, length);
219
220 hash &= (ull_hash_buckets - 1);
221
222 return hash;
223}
224
225#define ULL_INDEX(keyp) ull_hash_index(keyp, keyp->ulk_key_type == ULK_UADDR ? ULK_UADDR_LEN : ULK_XPROC_LEN)
226
227void
228ulock_initialize(void)
229{
230 assert(thread_max > 16);
231 /* Size ull_hash_buckets based on thread_max.
232 * Round up to nearest power of 2, then divide by 4
233 */
234 ull_hash_buckets = (1 << (bit_ceiling(thread_max) - 2));
235
236 kprintf("%s>thread_max=%d, ull_hash_buckets=%d\n", __FUNCTION__, thread_max, ull_hash_buckets);
237 assert(ull_hash_buckets >= thread_max / 4);
238
239 ull_bucket = zalloc_permanent(sizeof(ull_bucket_t) * ull_hash_buckets,
240 ZALIGN_PTR);
241 assert(ull_bucket != NULL);
242
243 for (int i = 0; i < ull_hash_buckets; i++) {
244 queue_init(&ull_bucket[i].ulb_head);
245 lck_spin_init(&ull_bucket[i].ulb_lock, &ull_lck_grp, NULL);
246 }
247}
248
249#if DEVELOPMENT || DEBUG
250/* Count the number of hash entries for a given pid.
251 * if pid==0, dump the whole table.
252 */
253static int
254ull_hash_dump(pid_t pid)
255{
256 int count = 0;
257 if (pid == 0) {
258 kprintf("%s>total number of ull_t allocated %d\n", __FUNCTION__, ull_nzalloc);
259 kprintf("%s>BEGIN\n", __FUNCTION__);
260 }
261 for (int i = 0; i < ull_hash_buckets; i++) {
262 ull_bucket_lock(i);
263 if (!queue_empty(&ull_bucket[i].ulb_head)) {
264 ull_t *elem;
265 if (pid == 0) {
266 kprintf("%s>index %d:\n", __FUNCTION__, i);
267 }
268 qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) {
269 if ((pid == 0) || ((elem->ull_key.ulk_key_type == ULK_UADDR) && (pid == elem->ull_key.ulk_pid))) {
270 ull_dump(elem);
271 count++;
272 }
273 }
274 }
275 ull_bucket_unlock(i);
276 }
277 if (pid == 0) {
278 kprintf("%s>END\n", __FUNCTION__);
279 ull_nzalloc = 0;
280 }
281 return count;
282}
283#endif
284
285static ull_t *
286ull_alloc(ulk_t *key)
287{
288 ull_t *ull = (ull_t *)zalloc(ull_zone);
289 assert(ull != NULL);
290
291 ull->ull_refcount = 1;
292 ull->ull_key = *key;
293 ull->ull_bucket_index = ULL_INDEX(key);
294 ull->ull_nwaiters = 0;
295 ull->ull_opcode = 0;
296
297 ull->ull_owner = THREAD_NULL;
298 ull->ull_turnstile = TURNSTILE_NULL;
299
300 ull_lock_init(ull);
301
302 ull_nzalloc++;
303 return ull;
304}
305
306static void
307ull_free(ull_t *ull)
308{
309 assert(ull->ull_owner == THREAD_NULL);
310 assert(ull->ull_turnstile == TURNSTILE_NULL);
311
312 ull_assert_notwned(ull);
313
314 ull_lock_destroy(ull);
315
316 zfree(ull_zone, ull);
317}
318
319/* Finds an existing ulock structure (ull_t), or creates a new one.
320 * If MUST_EXIST flag is set, returns NULL instead of creating a new one.
321 * The ulock structure is returned with ull_lock locked
322 */
323static ull_t *
324ull_get(ulk_t *key, uint32_t flags, ull_t **unused_ull)
325{
326 ull_t *ull = NULL;
327 uint i = ULL_INDEX(key);
328 ull_t *new_ull = (flags & ULL_MUST_EXIST) ? NULL : ull_alloc(key);
329 ull_t *elem;
330
331 ull_bucket_lock(i);
332 qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) {
333 ull_lock(elem);
334 if (ull_key_match(&elem->ull_key, key)) {
335 ull = elem;
336 break;
337 } else {
338 ull_unlock(elem);
339 }
340 }
341 if (ull == NULL) {
342 if (flags & ULL_MUST_EXIST) {
343 /* Must already exist (called from wake) */
344 ull_bucket_unlock(i);
345 assert(new_ull == NULL);
346 assert(unused_ull == NULL);
347 return NULL;
348 }
349
350 if (new_ull == NULL) {
351 /* Alloc above failed */
352 ull_bucket_unlock(i);
353 return NULL;
354 }
355
356 ull = new_ull;
357 ull_lock(ull);
358 enqueue(&ull_bucket[i].ulb_head, &ull->ull_hash_link);
359 } else if (!(flags & ULL_MUST_EXIST)) {
360 assert(new_ull);
361 assert(unused_ull);
362 assert(*unused_ull == NULL);
363 *unused_ull = new_ull;
364 }
365
366 ull->ull_refcount++;
367
368 ull_bucket_unlock(i);
369
370 return ull; /* still locked */
371}
372
373/*
374 * Must be called with ull_lock held
375 */
376static void
377ull_put(ull_t *ull)
378{
379 ull_assert_owned(ull);
380 int refcount = --ull->ull_refcount;
381 assert(refcount == 0 ? (ull->ull_key.ulk_key_type == ULK_INVALID) : 1);
382 ull_unlock(ull);
383
384 if (refcount > 0) {
385 return;
386 }
387
388 ull_bucket_lock(ull->ull_bucket_index);
389 remqueue(&ull->ull_hash_link);
390 ull_bucket_unlock(ull->ull_bucket_index);
391
392 ull_free(ull);
393}
394
395extern kern_return_t vm_map_page_info(vm_map_t map, vm_map_offset_t offset, vm_page_info_flavor_t flavor, vm_page_info_t info, mach_msg_type_number_t *count);
396extern vm_map_t current_map(void);
397extern boolean_t machine_thread_on_core(thread_t thread);
398
399static int
400uaddr_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
401{
402 kern_return_t ret;
403 vm_page_info_basic_data_t info;
404 mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT;
405 ret = vm_map_page_info(current_map(), uaddr, VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count);
406 if (ret != KERN_SUCCESS) {
407 return EINVAL;
408 }
409
410 if (objectp != NULL) {
411 *objectp = (uint64_t)info.object_id;
412 }
413 if (offsetp != NULL) {
414 *offsetp = (uint64_t)info.offset;
415 }
416
417 return 0;
418}
419
420static void ulock_wait_continue(void *, wait_result_t);
421static void ulock_wait_cleanup(ull_t *, thread_t, thread_t, int32_t *);
422
423inline static int
424wait_result_to_return_code(wait_result_t wr)
425{
426 int ret = 0;
427
428 switch (wr) {
429 case THREAD_AWAKENED:
430 break;
431 case THREAD_TIMED_OUT:
432 ret = ETIMEDOUT;
433 break;
434 case THREAD_INTERRUPTED:
435 case THREAD_RESTART:
436 default:
437 ret = EINTR;
438 break;
439 }
440
441 return ret;
442}
443
444static int
445ulock_resolve_owner(uint32_t value, thread_t *owner)
446{
447 mach_port_name_t owner_name = ulock_owner_value_to_port_name(value);
448
449 *owner = port_name_to_thread(owner_name,
450 PORT_TO_THREAD_IN_CURRENT_TASK |
451 PORT_TO_THREAD_NOT_CURRENT_THREAD);
452 if (*owner == THREAD_NULL) {
453 /*
454 * Translation failed - even though the lock value is up to date,
455 * whatever was stored in the lock wasn't actually a thread port.
456 */
457 return owner_name == MACH_PORT_DEAD ? ESRCH : EOWNERDEAD;
458 }
459 return 0;
460}
461
462int
463ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval)
464{
465 struct ulock_wait2_args args2;
466
467 args2.operation = args->operation;
468 args2.addr = args->addr;
469 args2.value = args->value;
470 args2.timeout = (uint64_t)(args->timeout) * NSEC_PER_USEC;
471 args2.value2 = 0;
472
473 return ulock_wait2(p, &args2, retval);
474}
475
476int
477ulock_wait2(struct proc *p, struct ulock_wait2_args *args, int32_t *retval)
478{
479 uint8_t opcode = (uint8_t)(args->operation & UL_OPCODE_MASK);
480 uint flags = args->operation & UL_FLAGS_MASK;
481
482 if (flags & ULF_WAIT_CANCEL_POINT) {
483 __pthread_testcancel(1);
484 }
485
486 int ret = 0;
487 thread_t self = current_thread();
488 ulk_t key;
489
490 /* involved threads - each variable holds +1 ref if not null */
491 thread_t owner_thread = THREAD_NULL;
492 thread_t old_owner = THREAD_NULL;
493
494 ull_t *unused_ull = NULL;
495
496 if ((flags & ULF_WAIT_MASK) != flags) {
497 ret = EINVAL;
498 goto munge_retval;
499 }
500
501 bool set_owner = false;
502 bool xproc = false;
503 size_t lock_size = sizeof(uint32_t);
504 int copy_ret;
505
506 switch (opcode) {
507 case UL_UNFAIR_LOCK:
508 set_owner = true;
509 break;
510 case UL_COMPARE_AND_WAIT:
511 break;
512 case UL_COMPARE_AND_WAIT64:
513 lock_size = sizeof(uint64_t);
514 break;
515 case UL_COMPARE_AND_WAIT_SHARED:
516 xproc = true;
517 break;
518 case UL_COMPARE_AND_WAIT64_SHARED:
519 xproc = true;
520 lock_size = sizeof(uint64_t);
521 break;
522 default:
523 ret = EINVAL;
524 goto munge_retval;
525 }
526
527 uint64_t value = 0;
528
529 if ((args->addr == 0) || (args->addr & (lock_size - 1))) {
530 ret = EINVAL;
531 goto munge_retval;
532 }
533
534 if (xproc) {
535 uint64_t object = 0;
536 uint64_t offset = 0;
537
538 ret = uaddr_findobj(args->addr, &object, &offset);
539 if (ret) {
540 ret = EINVAL;
541 goto munge_retval;
542 }
543 key.ulk_key_type = ULK_XPROC;
544 key.ulk_object = object;
545 key.ulk_offset = offset;
546 } else {
547 key.ulk_key_type = ULK_UADDR;
548 key.ulk_pid = p->p_pid;
549 key.ulk_addr = args->addr;
550 }
551
552 if ((flags & ULF_WAIT_ADAPTIVE_SPIN) && set_owner) {
553 /*
554 * Attempt the copyin outside of the lock once,
555 *
556 * If it doesn't match (which is common), return right away.
557 *
558 * If it matches, resolve the current owner, and if it is on core,
559 * spin a bit waiting for the value to change. If the owner isn't on
560 * core, or if the value stays stable, then go on with the regular
561 * blocking code.
562 */
563 uint64_t end = 0;
564 uint32_t u32;
565
566 ret = copyin_atomic32(args->addr, &u32);
567 if (ret || u32 != args->value) {
568 goto munge_retval;
569 }
570 for (;;) {
571 if (owner_thread == NULL && ulock_resolve_owner(u32, &owner_thread) != 0) {
572 break;
573 }
574
575 /* owner_thread may have a +1 starting here */
576
577 if (!machine_thread_on_core(owner_thread)) {
578 break;
579 }
580 if (end == 0) {
581 clock_interval_to_deadline(ulock_adaptive_spin_usecs,
582 NSEC_PER_USEC, &end);
583 } else if (mach_absolute_time() > end) {
584 break;
585 }
586 if (copyin_atomic32_wait_if_equals(args->addr, u32) != 0) {
587 goto munge_retval;
588 }
589 }
590 }
591
592 ull_t *ull = ull_get(&key, 0, &unused_ull);
593 if (ull == NULL) {
594 ret = ENOMEM;
595 goto munge_retval;
596 }
597 /* ull is locked */
598
599 ull->ull_nwaiters++;
600
601 if (ull->ull_opcode == 0) {
602 ull->ull_opcode = opcode;
603 } else if (ull->ull_opcode != opcode) {
604 ret = EDOM;
605 goto out_locked;
606 }
607
608 /*
609 * We don't want this copyin to get wedged behind VM operations,
610 * but we have to read the userspace value under the ull lock for correctness.
611 *
612 * Until <rdar://problem/24999882> exists,
613 * holding the ull spinlock across copyin forces any
614 * vm_fault we encounter to fail.
615 */
616
617 /* copyin_atomicXX always checks alignment */
618
619 if (lock_size == 4) {
620 uint32_t u32;
621 copy_ret = copyin_atomic32(args->addr, &u32);
622 value = u32;
623 } else {
624 copy_ret = copyin_atomic64(args->addr, &value);
625 }
626
627#if DEVELOPMENT || DEBUG
628 /* Occasionally simulate copyin finding the user address paged out */
629 if (((ull_simulate_copyin_fault == p->p_pid) || (ull_simulate_copyin_fault == 1)) && (copy_ret == 0)) {
630 static _Atomic int fault_inject = 0;
631 if (os_atomic_inc_orig(&fault_inject, relaxed) % 73 == 0) {
632 copy_ret = EFAULT;
633 }
634 }
635#endif
636 if (copy_ret != 0) {
637 /* copyin() will return an error if the access to the user addr would have faulted,
638 * so just return and let the user level code fault it in.
639 */
640 ret = copy_ret;
641 goto out_locked;
642 }
643
644 if (value != args->value) {
645 /* Lock value has changed from expected so bail out */
646 goto out_locked;
647 }
648
649 if (set_owner) {
650 if (owner_thread == THREAD_NULL) {
651 ret = ulock_resolve_owner((uint32_t)args->value, &owner_thread);
652 if (ret == EOWNERDEAD) {
653 /*
654 * Translation failed - even though the lock value is up to date,
655 * whatever was stored in the lock wasn't actually a thread port.
656 */
657 goto out_locked;
658 }
659 /* HACK: don't bail on MACH_PORT_DEAD, to avoid blowing up the no-tsd pthread lock */
660 ret = 0;
661 }
662 /* owner_thread has a +1 reference */
663
664 /*
665 * At this point, I know:
666 * a) owner_thread is definitely the current owner, because I just read the value
667 * b) owner_thread is either:
668 * i) holding the user lock or
669 * ii) has just unlocked the user lock after I looked
670 * and is heading toward the kernel to call ull_wake.
671 * If so, it's going to have to wait for the ull mutex.
672 *
673 * Therefore, I can ask the turnstile to promote its priority, and I can rely
674 * on it to come by later to issue the wakeup and lose its promotion.
675 */
676
677 /* Return the +1 ref from the ull_owner field */
678 old_owner = ull->ull_owner;
679 ull->ull_owner = THREAD_NULL;
680
681 if (owner_thread != THREAD_NULL) {
682 /* The ull_owner field now owns a +1 ref on owner_thread */
683 thread_reference(owner_thread);
684 ull->ull_owner = owner_thread;
685 }
686 }
687
688 wait_result_t wr;
689 uint64_t timeout = args->timeout; /* nanoseconds */
690 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
691 wait_interrupt_t interruptible = THREAD_ABORTSAFE;
692 struct turnstile *ts;
693
694 ts = turnstile_prepare((uintptr_t)ull, &ull->ull_turnstile,
695 TURNSTILE_NULL, TURNSTILE_ULOCK);
696 thread_set_pending_block_hint(self, kThreadWaitUserLock);
697
698 if (flags & ULF_WAIT_WORKQ_DATA_CONTENTION) {
699 interruptible |= THREAD_WAIT_NOREPORT;
700 }
701
702 if (timeout) {
703 nanoseconds_to_deadline(timeout, &deadline);
704 }
705
706 turnstile_update_inheritor(ts, owner_thread,
707 (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
708
709 wr = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
710 interruptible, deadline);
711
712 if (wr == THREAD_WAITING) {
713 uthread_t uthread = (uthread_t)get_bsdthread_info(self);
714 uthread->uu_save.uus_ulock_wait_data.ull = ull;
715 uthread->uu_save.uus_ulock_wait_data.retval = retval;
716 uthread->uu_save.uus_ulock_wait_data.flags = flags;
717 uthread->uu_save.uus_ulock_wait_data.owner_thread = owner_thread;
718 uthread->uu_save.uus_ulock_wait_data.old_owner = old_owner;
719 }
720
721 ull_unlock(ull);
722
723 if (unused_ull) {
724 ull_free(unused_ull);
725 unused_ull = NULL;
726 }
727
728 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
729
730 if (wr == THREAD_WAITING) {
731 if (set_owner && owner_thread != THREAD_NULL) {
732 thread_handoff_parameter(owner_thread, ulock_wait_continue, ull, THREAD_HANDOFF_NONE);
733 } else {
734 assert(owner_thread == THREAD_NULL);
735 thread_block_parameter(ulock_wait_continue, ull);
736 }
737 /* NOT REACHED */
738 }
739
740 ret = wait_result_to_return_code(wr);
741
742 ull_lock(ull);
743 turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK);
744
745out_locked:
746 ulock_wait_cleanup(ull, owner_thread, old_owner, retval);
747 owner_thread = NULL;
748
749 if (unused_ull) {
750 ull_free(unused_ull);
751 unused_ull = NULL;
752 }
753
754 assert(*retval >= 0);
755
756munge_retval:
757 if (owner_thread) {
758 thread_deallocate(owner_thread);
759 }
760 if (ret == ESTALE) {
761 ret = 0;
762 }
763 if ((flags & ULF_NO_ERRNO) && (ret != 0)) {
764 *retval = -ret;
765 ret = 0;
766 }
767 return ret;
768}
769
770/*
771 * Must be called with ull_lock held
772 */
773static void
774ulock_wait_cleanup(ull_t *ull, thread_t owner_thread, thread_t old_owner, int32_t *retval)
775{
776 ull_assert_owned(ull);
777
778 thread_t old_lingering_owner = THREAD_NULL;
779
780 *retval = --ull->ull_nwaiters;
781 if (ull->ull_nwaiters == 0) {
782 /*
783 * If the wait was canceled early, we might need to
784 * clear out the lingering owner reference before
785 * freeing the ull.
786 */
787 old_lingering_owner = ull->ull_owner;
788 ull->ull_owner = THREAD_NULL;
789
790 memset(&ull->ull_key, 0, sizeof ull->ull_key);
791 ull->ull_refcount--;
792 assert(ull->ull_refcount > 0);
793 }
794 ull_put(ull);
795
796 /* Need to be called after dropping the interlock */
797 turnstile_cleanup();
798
799 if (owner_thread != THREAD_NULL) {
800 thread_deallocate(owner_thread);
801 }
802
803 if (old_owner != THREAD_NULL) {
804 thread_deallocate(old_owner);
805 }
806
807 if (old_lingering_owner != THREAD_NULL) {
808 thread_deallocate(old_lingering_owner);
809 }
810
811 assert(*retval >= 0);
812}
813
814__attribute__((noreturn))
815static void
816ulock_wait_continue(__unused void * parameter, wait_result_t wr)
817{
818 thread_t self = current_thread();
819 uthread_t uthread = (uthread_t)get_bsdthread_info(self);
820 int ret = 0;
821
822 ull_t *ull = uthread->uu_save.uus_ulock_wait_data.ull;
823 int32_t *retval = uthread->uu_save.uus_ulock_wait_data.retval;
824 uint flags = uthread->uu_save.uus_ulock_wait_data.flags;
825 thread_t owner_thread = uthread->uu_save.uus_ulock_wait_data.owner_thread;
826 thread_t old_owner = uthread->uu_save.uus_ulock_wait_data.old_owner;
827
828 ret = wait_result_to_return_code(wr);
829
830 ull_lock(ull);
831 turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK);
832
833 ulock_wait_cleanup(ull, owner_thread, old_owner, retval);
834
835 if ((flags & ULF_NO_ERRNO) && (ret != 0)) {
836 *retval = -ret;
837 ret = 0;
838 }
839
840 unix_syscall_return(ret);
841}
842
843int
844ulock_wake(struct proc *p, struct ulock_wake_args *args, __unused int32_t *retval)
845{
846 uint8_t opcode = (uint8_t)(args->operation & UL_OPCODE_MASK);
847 uint flags = args->operation & UL_FLAGS_MASK;
848 int ret = 0;
849 ulk_t key;
850
851 /* involved threads - each variable holds +1 ref if not null */
852 thread_t wake_thread = THREAD_NULL;
853
854#if DEVELOPMENT || DEBUG
855 if (opcode == UL_DEBUG_HASH_DUMP_PID) {
856 *retval = ull_hash_dump(p->p_pid);
857 return ret;
858 } else if (opcode == UL_DEBUG_HASH_DUMP_ALL) {
859 *retval = ull_hash_dump(0);
860 return ret;
861 } else if (opcode == UL_DEBUG_SIMULATE_COPYIN_FAULT) {
862 ull_simulate_copyin_fault = (int)(args->wake_value);
863 return ret;
864 }
865#endif
866
867 bool set_owner = false;
868 bool allow_non_owner = false;
869 bool xproc = false;
870
871 switch (opcode) {
872 case UL_UNFAIR_LOCK:
873 set_owner = true;
874 break;
875 case UL_COMPARE_AND_WAIT:
876 case UL_COMPARE_AND_WAIT64:
877 break;
878 case UL_COMPARE_AND_WAIT_SHARED:
879 case UL_COMPARE_AND_WAIT64_SHARED:
880 xproc = true;
881 break;
882 default:
883 ret = EINVAL;
884 goto munge_retval;
885 }
886
887 if ((flags & ULF_WAKE_MASK) != flags) {
888 ret = EINVAL;
889 goto munge_retval;
890 }
891
892 if ((flags & ULF_WAKE_THREAD) && ((flags & ULF_WAKE_ALL) || set_owner)) {
893 ret = EINVAL;
894 goto munge_retval;
895 }
896
897 if (flags & ULF_WAKE_ALLOW_NON_OWNER) {
898 if (!set_owner) {
899 ret = EINVAL;
900 goto munge_retval;
901 }
902
903 allow_non_owner = true;
904 }
905
906 if (args->addr == 0) {
907 ret = EINVAL;
908 goto munge_retval;
909 }
910
911 if (xproc) {
912 uint64_t object = 0;
913 uint64_t offset = 0;
914
915 ret = uaddr_findobj(args->addr, &object, &offset);
916 if (ret) {
917 ret = EINVAL;
918 goto munge_retval;
919 }
920 key.ulk_key_type = ULK_XPROC;
921 key.ulk_object = object;
922 key.ulk_offset = offset;
923 } else {
924 key.ulk_key_type = ULK_UADDR;
925 key.ulk_pid = p->p_pid;
926 key.ulk_addr = args->addr;
927 }
928
929 if (flags & ULF_WAKE_THREAD) {
930 mach_port_name_t wake_thread_name = (mach_port_name_t)(args->wake_value);
931 wake_thread = port_name_to_thread(wake_thread_name,
932 PORT_TO_THREAD_IN_CURRENT_TASK |
933 PORT_TO_THREAD_NOT_CURRENT_THREAD);
934 if (wake_thread == THREAD_NULL) {
935 ret = ESRCH;
936 goto munge_retval;
937 }
938 }
939
940 ull_t *ull = ull_get(&key, ULL_MUST_EXIST, NULL);
941 thread_t new_owner = THREAD_NULL;
942 struct turnstile *ts = TURNSTILE_NULL;
943 thread_t cleanup_thread = THREAD_NULL;
944
945 if (ull == NULL) {
946 ret = ENOENT;
947 goto munge_retval;
948 }
949 /* ull is locked */
950
951 if (opcode != ull->ull_opcode) {
952 ret = EDOM;
953 goto out_ull_put;
954 }
955
956 if (set_owner) {
957 if ((ull->ull_owner != current_thread()) && !allow_non_owner) {
958 /*
959 * If the current thread isn't the known owner,
960 * then this wake call was late to the party,
961 * and the kernel already knows who owns the lock.
962 *
963 * This current owner already knows the lock is contended
964 * and will redrive wakes, just bail out.
965 */
966 goto out_ull_put;
967 }
968 } else {
969 assert(ull->ull_owner == THREAD_NULL);
970 }
971
972 ts = turnstile_prepare((uintptr_t)ull, &ull->ull_turnstile,
973 TURNSTILE_NULL, TURNSTILE_ULOCK);
974 assert(ts != TURNSTILE_NULL);
975
976 if (flags & ULF_WAKE_THREAD) {
977 kern_return_t kr = waitq_wakeup64_thread(&ts->ts_waitq,
978 CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
979 wake_thread, THREAD_AWAKENED);
980 if (kr != KERN_SUCCESS) {
981 assert(kr == KERN_NOT_WAITING);
982 ret = EALREADY;
983 }
984 } else if (flags & ULF_WAKE_ALL) {
985 if (set_owner) {
986 turnstile_update_inheritor(ts, THREAD_NULL,
987 TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD);
988 }
989 waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
990 THREAD_AWAKENED, 0);
991 } else if (set_owner) {
992 /*
993 * The turnstile waitq is priority ordered,
994 * and will wake up the highest priority waiter
995 * and set it as the inheritor for us.
996 */
997 new_owner = waitq_wakeup64_identify(&ts->ts_waitq,
998 CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
999 THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE);
1000 } else {
1001 waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
1002 THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1003 }
1004
1005 if (set_owner) {
1006 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1007 cleanup_thread = ull->ull_owner;
1008 ull->ull_owner = new_owner;
1009 }
1010
1011 turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK);
1012
1013out_ull_put:
1014 ull_put(ull);
1015
1016 if (ts != TURNSTILE_NULL) {
1017 /* Need to be called after dropping the interlock */
1018 turnstile_cleanup();
1019 }
1020
1021 if (cleanup_thread != THREAD_NULL) {
1022 thread_deallocate(cleanup_thread);
1023 }
1024
1025munge_retval:
1026 if (wake_thread != THREAD_NULL) {
1027 thread_deallocate(wake_thread);
1028 }
1029
1030 if ((flags & ULF_NO_ERRNO) && (ret != 0)) {
1031 *retval = -ret;
1032 ret = 0;
1033 }
1034 return ret;
1035}
1036
1037void
1038kdp_ulock_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
1039{
1040 ull_t *ull = EVENT_TO_ULOCK(event);
1041
1042 zone_require(ull_zone, ull);
1043
1044 switch (ull->ull_opcode) {
1045 case UL_UNFAIR_LOCK:
1046 case UL_UNFAIR_LOCK64_SHARED:
1047 waitinfo->owner = thread_tid(ull->ull_owner);
1048 waitinfo->context = ull->ull_key.ulk_addr;
1049 break;
1050 case UL_COMPARE_AND_WAIT:
1051 case UL_COMPARE_AND_WAIT64:
1052 case UL_COMPARE_AND_WAIT_SHARED:
1053 case UL_COMPARE_AND_WAIT64_SHARED:
1054 waitinfo->owner = 0;
1055 waitinfo->context = ull->ull_key.ulk_addr;
1056 break;
1057 default:
1058 panic("%s: Invalid ulock opcode %d addr %p", __FUNCTION__, ull->ull_opcode, (void*)ull);
1059 break;
1060 }
1061 return;
1062}