]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2015-2020 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <machine/atomic.h> | |
30 | ||
31 | #include <sys/param.h> | |
32 | #include <sys/systm.h> | |
33 | #include <sys/ioctl.h> | |
34 | #include <sys/file_internal.h> | |
35 | #include <sys/proc_internal.h> | |
36 | #include <sys/kernel.h> | |
37 | #include <sys/guarded.h> | |
38 | #include <sys/stat.h> | |
39 | #include <sys/malloc.h> | |
40 | #include <sys/sysproto.h> | |
41 | #include <sys/pthread_shims.h> | |
42 | ||
43 | #include <mach/mach_types.h> | |
44 | ||
45 | #include <kern/cpu_data.h> | |
46 | #include <kern/mach_param.h> | |
47 | #include <kern/kern_types.h> | |
48 | #include <kern/assert.h> | |
49 | #include <kern/zalloc.h> | |
50 | #include <kern/thread.h> | |
51 | #include <kern/clock.h> | |
52 | #include <kern/ledger.h> | |
53 | #include <kern/policy_internal.h> | |
54 | #include <kern/task.h> | |
55 | #include <kern/telemetry.h> | |
56 | #include <kern/waitq.h> | |
57 | #include <kern/sched_prim.h> | |
58 | #include <kern/turnstile.h> | |
59 | #include <kern/zalloc.h> | |
60 | #include <kern/debug.h> | |
61 | ||
62 | #include <pexpert/pexpert.h> | |
63 | ||
64 | #define XNU_TEST_BITMAP | |
65 | #include <kern/bits.h> | |
66 | ||
67 | #include <os/hash.h> | |
68 | #include <sys/ulock.h> | |
69 | ||
70 | /* | |
71 | * How ulock promotion works: | |
72 | * | |
73 | * There’s a requested policy field on every thread called ‘promotions’, which | |
74 | * expresses which ulock promotions are happening to this thread. | |
75 | * The promotion priority saturates until the promotion count goes to 0. | |
76 | * | |
77 | * We also track effective promotion qos, which is the qos before clamping. | |
78 | * This value is used for promoting a thread that another thread is waiting on, | |
79 | * so that the lock owner reinflates to the right priority after unclamping. | |
80 | * | |
81 | * This also works for non-QoS threads, which can donate base priority to QoS | |
82 | * and non-QoS threads alike. | |
83 | * | |
84 | * ulock wait applies a promotion to the owner communicated through | |
85 | * UL_UNFAIR_LOCK as waiters block, and that promotion is saturated as long as | |
86 | * there is still an owner. In ulock wake, if the waker is still the owner, | |
87 | * then it clears its ownership and drops the boost. It does NOT transfer | |
88 | * ownership/priority boost to the new thread. Instead, it selects the | |
89 | * waiting thread with the highest base priority to be woken next, and | |
90 | * relies on that thread to carry the torch for the other waiting threads. | |
91 | */ | |
92 | ||
93 | static LCK_GRP_DECLARE(ull_lck_grp, "ulocks"); | |
94 | ||
95 | typedef lck_spin_t ull_lock_t; | |
96 | #define ull_lock_init(ull) lck_spin_init(&ull->ull_lock, &ull_lck_grp, NULL) | |
97 | #define ull_lock_destroy(ull) lck_spin_destroy(&ull->ull_lock, &ull_lck_grp) | |
98 | #define ull_lock(ull) lck_spin_lock_grp(&ull->ull_lock, &ull_lck_grp) | |
99 | #define ull_unlock(ull) lck_spin_unlock(&ull->ull_lock) | |
100 | #define ull_assert_owned(ull) LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_OWNED) | |
101 | #define ull_assert_notwned(ull) LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_NOTOWNED) | |
102 | ||
103 | #define ULOCK_TO_EVENT(ull) ((event_t)ull) | |
104 | #define EVENT_TO_ULOCK(event) ((ull_t *)event) | |
105 | ||
106 | typedef enum { | |
107 | ULK_INVALID = 0, | |
108 | ULK_UADDR, | |
109 | ULK_XPROC, | |
110 | } ulk_type; | |
111 | ||
112 | typedef struct { | |
113 | union { | |
114 | struct __attribute__((packed)) { | |
115 | user_addr_t ulk_addr; | |
116 | pid_t ulk_pid; | |
117 | }; | |
118 | struct __attribute__((packed)) { | |
119 | uint64_t ulk_object; | |
120 | uint64_t ulk_offset; | |
121 | }; | |
122 | }; | |
123 | ulk_type ulk_key_type; | |
124 | } ulk_t; | |
125 | ||
126 | #define ULK_UADDR_LEN (sizeof(user_addr_t) + sizeof(pid_t)) | |
127 | #define ULK_XPROC_LEN (sizeof(uint64_t) + sizeof(uint64_t)) | |
128 | ||
129 | inline static bool | |
130 | ull_key_match(ulk_t *a, ulk_t *b) | |
131 | { | |
132 | if (a->ulk_key_type != b->ulk_key_type) { | |
133 | return false; | |
134 | } | |
135 | ||
136 | if (a->ulk_key_type == ULK_UADDR) { | |
137 | return (a->ulk_pid == b->ulk_pid) && | |
138 | (a->ulk_addr == b->ulk_addr); | |
139 | } | |
140 | ||
141 | assert(a->ulk_key_type == ULK_XPROC); | |
142 | return (a->ulk_object == b->ulk_object) && | |
143 | (a->ulk_offset == b->ulk_offset); | |
144 | } | |
145 | ||
146 | typedef struct ull { | |
147 | /* | |
148 | * ull_owner is the most recent known value for the owner of this ulock | |
149 | * i.e. it may be out of date WRT the real value in userspace. | |
150 | */ | |
151 | thread_t ull_owner; /* holds +1 thread reference */ | |
152 | ulk_t ull_key; | |
153 | ull_lock_t ull_lock; | |
154 | uint ull_bucket_index; | |
155 | int32_t ull_nwaiters; | |
156 | int32_t ull_refcount; | |
157 | uint8_t ull_opcode; | |
158 | struct turnstile *ull_turnstile; | |
159 | queue_chain_t ull_hash_link; | |
160 | } ull_t; | |
161 | ||
162 | extern void ulock_initialize(void); | |
163 | ||
164 | #define ULL_MUST_EXIST 0x0001 | |
165 | static void ull_put(ull_t *); | |
166 | ||
167 | static uint32_t ulock_adaptive_spin_usecs = 20; | |
168 | ||
169 | SYSCTL_INT(_kern, OID_AUTO, ulock_adaptive_spin_usecs, CTLFLAG_RW | CTLFLAG_LOCKED, | |
170 | &ulock_adaptive_spin_usecs, 0, "ulock adaptive spin duration"); | |
171 | ||
172 | #if DEVELOPMENT || DEBUG | |
173 | static int ull_simulate_copyin_fault = 0; | |
174 | ||
175 | static void | |
176 | ull_dump(ull_t *ull) | |
177 | { | |
178 | kprintf("ull\t%p\n", ull); | |
179 | switch (ull->ull_key.ulk_key_type) { | |
180 | case ULK_UADDR: | |
181 | kprintf("ull_key.ulk_key_type\tULK_UADDR\n"); | |
182 | kprintf("ull_key.ulk_pid\t%d\n", ull->ull_key.ulk_pid); | |
183 | kprintf("ull_key.ulk_addr\t%p\n", (void *)(ull->ull_key.ulk_addr)); | |
184 | break; | |
185 | case ULK_XPROC: | |
186 | kprintf("ull_key.ulk_key_type\tULK_XPROC\n"); | |
187 | kprintf("ull_key.ulk_object\t%p\n", (void *)(ull->ull_key.ulk_object)); | |
188 | kprintf("ull_key.ulk_offset\t%p\n", (void *)(ull->ull_key.ulk_offset)); | |
189 | break; | |
190 | default: | |
191 | kprintf("ull_key.ulk_key_type\tUNKNOWN %d\n", ull->ull_key.ulk_key_type); | |
192 | break; | |
193 | } | |
194 | kprintf("ull_nwaiters\t%d\n", ull->ull_nwaiters); | |
195 | kprintf("ull_refcount\t%d\n", ull->ull_refcount); | |
196 | kprintf("ull_opcode\t%d\n\n", ull->ull_opcode); | |
197 | kprintf("ull_owner\t0x%llx\n\n", thread_tid(ull->ull_owner)); | |
198 | kprintf("ull_turnstile\t%p\n\n", ull->ull_turnstile); | |
199 | } | |
200 | #endif | |
201 | ||
202 | typedef struct ull_bucket { | |
203 | queue_head_t ulb_head; | |
204 | lck_spin_t ulb_lock; | |
205 | } ull_bucket_t; | |
206 | ||
207 | static int ull_hash_buckets; | |
208 | static ull_bucket_t *ull_bucket; | |
209 | static uint32_t ull_nzalloc = 0; | |
210 | static ZONE_DECLARE(ull_zone, "ulocks", sizeof(ull_t), ZC_NOENCRYPT | ZC_CACHING); | |
211 | ||
212 | #define ull_bucket_lock(i) lck_spin_lock_grp(&ull_bucket[i].ulb_lock, &ull_lck_grp) | |
213 | #define ull_bucket_unlock(i) lck_spin_unlock(&ull_bucket[i].ulb_lock) | |
214 | ||
215 | static __inline__ uint32_t | |
216 | ull_hash_index(const void *key, size_t length) | |
217 | { | |
218 | uint32_t hash = os_hash_jenkins(key, length); | |
219 | ||
220 | hash &= (ull_hash_buckets - 1); | |
221 | ||
222 | return hash; | |
223 | } | |
224 | ||
225 | #define ULL_INDEX(keyp) ull_hash_index(keyp, keyp->ulk_key_type == ULK_UADDR ? ULK_UADDR_LEN : ULK_XPROC_LEN) | |
226 | ||
227 | void | |
228 | ulock_initialize(void) | |
229 | { | |
230 | assert(thread_max > 16); | |
231 | /* Size ull_hash_buckets based on thread_max. | |
232 | * Round up to nearest power of 2, then divide by 4 | |
233 | */ | |
234 | ull_hash_buckets = (1 << (bit_ceiling(thread_max) - 2)); | |
235 | ||
236 | kprintf("%s>thread_max=%d, ull_hash_buckets=%d\n", __FUNCTION__, thread_max, ull_hash_buckets); | |
237 | assert(ull_hash_buckets >= thread_max / 4); | |
238 | ||
239 | ull_bucket = zalloc_permanent(sizeof(ull_bucket_t) * ull_hash_buckets, | |
240 | ZALIGN_PTR); | |
241 | assert(ull_bucket != NULL); | |
242 | ||
243 | for (int i = 0; i < ull_hash_buckets; i++) { | |
244 | queue_init(&ull_bucket[i].ulb_head); | |
245 | lck_spin_init(&ull_bucket[i].ulb_lock, &ull_lck_grp, NULL); | |
246 | } | |
247 | } | |
248 | ||
249 | #if DEVELOPMENT || DEBUG | |
250 | /* Count the number of hash entries for a given pid. | |
251 | * if pid==0, dump the whole table. | |
252 | */ | |
253 | static int | |
254 | ull_hash_dump(pid_t pid) | |
255 | { | |
256 | int count = 0; | |
257 | if (pid == 0) { | |
258 | kprintf("%s>total number of ull_t allocated %d\n", __FUNCTION__, ull_nzalloc); | |
259 | kprintf("%s>BEGIN\n", __FUNCTION__); | |
260 | } | |
261 | for (int i = 0; i < ull_hash_buckets; i++) { | |
262 | ull_bucket_lock(i); | |
263 | if (!queue_empty(&ull_bucket[i].ulb_head)) { | |
264 | ull_t *elem; | |
265 | if (pid == 0) { | |
266 | kprintf("%s>index %d:\n", __FUNCTION__, i); | |
267 | } | |
268 | qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) { | |
269 | if ((pid == 0) || ((elem->ull_key.ulk_key_type == ULK_UADDR) && (pid == elem->ull_key.ulk_pid))) { | |
270 | ull_dump(elem); | |
271 | count++; | |
272 | } | |
273 | } | |
274 | } | |
275 | ull_bucket_unlock(i); | |
276 | } | |
277 | if (pid == 0) { | |
278 | kprintf("%s>END\n", __FUNCTION__); | |
279 | ull_nzalloc = 0; | |
280 | } | |
281 | return count; | |
282 | } | |
283 | #endif | |
284 | ||
285 | static ull_t * | |
286 | ull_alloc(ulk_t *key) | |
287 | { | |
288 | ull_t *ull = (ull_t *)zalloc(ull_zone); | |
289 | assert(ull != NULL); | |
290 | ||
291 | ull->ull_refcount = 1; | |
292 | ull->ull_key = *key; | |
293 | ull->ull_bucket_index = ULL_INDEX(key); | |
294 | ull->ull_nwaiters = 0; | |
295 | ull->ull_opcode = 0; | |
296 | ||
297 | ull->ull_owner = THREAD_NULL; | |
298 | ull->ull_turnstile = TURNSTILE_NULL; | |
299 | ||
300 | ull_lock_init(ull); | |
301 | ||
302 | ull_nzalloc++; | |
303 | return ull; | |
304 | } | |
305 | ||
306 | static void | |
307 | ull_free(ull_t *ull) | |
308 | { | |
309 | assert(ull->ull_owner == THREAD_NULL); | |
310 | assert(ull->ull_turnstile == TURNSTILE_NULL); | |
311 | ||
312 | ull_assert_notwned(ull); | |
313 | ||
314 | ull_lock_destroy(ull); | |
315 | ||
316 | zfree(ull_zone, ull); | |
317 | } | |
318 | ||
319 | /* Finds an existing ulock structure (ull_t), or creates a new one. | |
320 | * If MUST_EXIST flag is set, returns NULL instead of creating a new one. | |
321 | * The ulock structure is returned with ull_lock locked | |
322 | */ | |
323 | static ull_t * | |
324 | ull_get(ulk_t *key, uint32_t flags, ull_t **unused_ull) | |
325 | { | |
326 | ull_t *ull = NULL; | |
327 | uint i = ULL_INDEX(key); | |
328 | ull_t *new_ull = (flags & ULL_MUST_EXIST) ? NULL : ull_alloc(key); | |
329 | ull_t *elem; | |
330 | ||
331 | ull_bucket_lock(i); | |
332 | qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) { | |
333 | ull_lock(elem); | |
334 | if (ull_key_match(&elem->ull_key, key)) { | |
335 | ull = elem; | |
336 | break; | |
337 | } else { | |
338 | ull_unlock(elem); | |
339 | } | |
340 | } | |
341 | if (ull == NULL) { | |
342 | if (flags & ULL_MUST_EXIST) { | |
343 | /* Must already exist (called from wake) */ | |
344 | ull_bucket_unlock(i); | |
345 | assert(new_ull == NULL); | |
346 | assert(unused_ull == NULL); | |
347 | return NULL; | |
348 | } | |
349 | ||
350 | if (new_ull == NULL) { | |
351 | /* Alloc above failed */ | |
352 | ull_bucket_unlock(i); | |
353 | return NULL; | |
354 | } | |
355 | ||
356 | ull = new_ull; | |
357 | ull_lock(ull); | |
358 | enqueue(&ull_bucket[i].ulb_head, &ull->ull_hash_link); | |
359 | } else if (!(flags & ULL_MUST_EXIST)) { | |
360 | assert(new_ull); | |
361 | assert(unused_ull); | |
362 | assert(*unused_ull == NULL); | |
363 | *unused_ull = new_ull; | |
364 | } | |
365 | ||
366 | ull->ull_refcount++; | |
367 | ||
368 | ull_bucket_unlock(i); | |
369 | ||
370 | return ull; /* still locked */ | |
371 | } | |
372 | ||
373 | /* | |
374 | * Must be called with ull_lock held | |
375 | */ | |
376 | static void | |
377 | ull_put(ull_t *ull) | |
378 | { | |
379 | ull_assert_owned(ull); | |
380 | int refcount = --ull->ull_refcount; | |
381 | assert(refcount == 0 ? (ull->ull_key.ulk_key_type == ULK_INVALID) : 1); | |
382 | ull_unlock(ull); | |
383 | ||
384 | if (refcount > 0) { | |
385 | return; | |
386 | } | |
387 | ||
388 | ull_bucket_lock(ull->ull_bucket_index); | |
389 | remqueue(&ull->ull_hash_link); | |
390 | ull_bucket_unlock(ull->ull_bucket_index); | |
391 | ||
392 | ull_free(ull); | |
393 | } | |
394 | ||
395 | extern kern_return_t vm_map_page_info(vm_map_t map, vm_map_offset_t offset, vm_page_info_flavor_t flavor, vm_page_info_t info, mach_msg_type_number_t *count); | |
396 | extern vm_map_t current_map(void); | |
397 | extern boolean_t machine_thread_on_core(thread_t thread); | |
398 | ||
399 | static int | |
400 | uaddr_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp) | |
401 | { | |
402 | kern_return_t ret; | |
403 | vm_page_info_basic_data_t info; | |
404 | mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT; | |
405 | ret = vm_map_page_info(current_map(), uaddr, VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count); | |
406 | if (ret != KERN_SUCCESS) { | |
407 | return EINVAL; | |
408 | } | |
409 | ||
410 | if (objectp != NULL) { | |
411 | *objectp = (uint64_t)info.object_id; | |
412 | } | |
413 | if (offsetp != NULL) { | |
414 | *offsetp = (uint64_t)info.offset; | |
415 | } | |
416 | ||
417 | return 0; | |
418 | } | |
419 | ||
420 | static void ulock_wait_continue(void *, wait_result_t); | |
421 | static void ulock_wait_cleanup(ull_t *, thread_t, thread_t, int32_t *); | |
422 | ||
423 | inline static int | |
424 | wait_result_to_return_code(wait_result_t wr) | |
425 | { | |
426 | int ret = 0; | |
427 | ||
428 | switch (wr) { | |
429 | case THREAD_AWAKENED: | |
430 | break; | |
431 | case THREAD_TIMED_OUT: | |
432 | ret = ETIMEDOUT; | |
433 | break; | |
434 | case THREAD_INTERRUPTED: | |
435 | case THREAD_RESTART: | |
436 | default: | |
437 | ret = EINTR; | |
438 | break; | |
439 | } | |
440 | ||
441 | return ret; | |
442 | } | |
443 | ||
444 | static int | |
445 | ulock_resolve_owner(uint32_t value, thread_t *owner) | |
446 | { | |
447 | mach_port_name_t owner_name = ulock_owner_value_to_port_name(value); | |
448 | ||
449 | *owner = port_name_to_thread(owner_name, | |
450 | PORT_TO_THREAD_IN_CURRENT_TASK | | |
451 | PORT_TO_THREAD_NOT_CURRENT_THREAD); | |
452 | if (*owner == THREAD_NULL) { | |
453 | /* | |
454 | * Translation failed - even though the lock value is up to date, | |
455 | * whatever was stored in the lock wasn't actually a thread port. | |
456 | */ | |
457 | return owner_name == MACH_PORT_DEAD ? ESRCH : EOWNERDEAD; | |
458 | } | |
459 | return 0; | |
460 | } | |
461 | ||
462 | int | |
463 | ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval) | |
464 | { | |
465 | struct ulock_wait2_args args2; | |
466 | ||
467 | args2.operation = args->operation; | |
468 | args2.addr = args->addr; | |
469 | args2.value = args->value; | |
470 | args2.timeout = (uint64_t)(args->timeout) * NSEC_PER_USEC; | |
471 | args2.value2 = 0; | |
472 | ||
473 | return ulock_wait2(p, &args2, retval); | |
474 | } | |
475 | ||
476 | int | |
477 | ulock_wait2(struct proc *p, struct ulock_wait2_args *args, int32_t *retval) | |
478 | { | |
479 | uint8_t opcode = (uint8_t)(args->operation & UL_OPCODE_MASK); | |
480 | uint flags = args->operation & UL_FLAGS_MASK; | |
481 | ||
482 | if (flags & ULF_WAIT_CANCEL_POINT) { | |
483 | __pthread_testcancel(1); | |
484 | } | |
485 | ||
486 | int ret = 0; | |
487 | thread_t self = current_thread(); | |
488 | ulk_t key; | |
489 | ||
490 | /* involved threads - each variable holds +1 ref if not null */ | |
491 | thread_t owner_thread = THREAD_NULL; | |
492 | thread_t old_owner = THREAD_NULL; | |
493 | ||
494 | ull_t *unused_ull = NULL; | |
495 | ||
496 | if ((flags & ULF_WAIT_MASK) != flags) { | |
497 | ret = EINVAL; | |
498 | goto munge_retval; | |
499 | } | |
500 | ||
501 | bool set_owner = false; | |
502 | bool xproc = false; | |
503 | size_t lock_size = sizeof(uint32_t); | |
504 | int copy_ret; | |
505 | ||
506 | switch (opcode) { | |
507 | case UL_UNFAIR_LOCK: | |
508 | set_owner = true; | |
509 | break; | |
510 | case UL_COMPARE_AND_WAIT: | |
511 | break; | |
512 | case UL_COMPARE_AND_WAIT64: | |
513 | lock_size = sizeof(uint64_t); | |
514 | break; | |
515 | case UL_COMPARE_AND_WAIT_SHARED: | |
516 | xproc = true; | |
517 | break; | |
518 | case UL_COMPARE_AND_WAIT64_SHARED: | |
519 | xproc = true; | |
520 | lock_size = sizeof(uint64_t); | |
521 | break; | |
522 | default: | |
523 | ret = EINVAL; | |
524 | goto munge_retval; | |
525 | } | |
526 | ||
527 | uint64_t value = 0; | |
528 | ||
529 | if ((args->addr == 0) || (args->addr & (lock_size - 1))) { | |
530 | ret = EINVAL; | |
531 | goto munge_retval; | |
532 | } | |
533 | ||
534 | if (xproc) { | |
535 | uint64_t object = 0; | |
536 | uint64_t offset = 0; | |
537 | ||
538 | ret = uaddr_findobj(args->addr, &object, &offset); | |
539 | if (ret) { | |
540 | ret = EINVAL; | |
541 | goto munge_retval; | |
542 | } | |
543 | key.ulk_key_type = ULK_XPROC; | |
544 | key.ulk_object = object; | |
545 | key.ulk_offset = offset; | |
546 | } else { | |
547 | key.ulk_key_type = ULK_UADDR; | |
548 | key.ulk_pid = p->p_pid; | |
549 | key.ulk_addr = args->addr; | |
550 | } | |
551 | ||
552 | if ((flags & ULF_WAIT_ADAPTIVE_SPIN) && set_owner) { | |
553 | /* | |
554 | * Attempt the copyin outside of the lock once, | |
555 | * | |
556 | * If it doesn't match (which is common), return right away. | |
557 | * | |
558 | * If it matches, resolve the current owner, and if it is on core, | |
559 | * spin a bit waiting for the value to change. If the owner isn't on | |
560 | * core, or if the value stays stable, then go on with the regular | |
561 | * blocking code. | |
562 | */ | |
563 | uint64_t end = 0; | |
564 | uint32_t u32; | |
565 | ||
566 | ret = copyin_atomic32(args->addr, &u32); | |
567 | if (ret || u32 != args->value) { | |
568 | goto munge_retval; | |
569 | } | |
570 | for (;;) { | |
571 | if (owner_thread == NULL && ulock_resolve_owner(u32, &owner_thread) != 0) { | |
572 | break; | |
573 | } | |
574 | ||
575 | /* owner_thread may have a +1 starting here */ | |
576 | ||
577 | if (!machine_thread_on_core(owner_thread)) { | |
578 | break; | |
579 | } | |
580 | if (end == 0) { | |
581 | clock_interval_to_deadline(ulock_adaptive_spin_usecs, | |
582 | NSEC_PER_USEC, &end); | |
583 | } else if (mach_absolute_time() > end) { | |
584 | break; | |
585 | } | |
586 | if (copyin_atomic32_wait_if_equals(args->addr, u32) != 0) { | |
587 | goto munge_retval; | |
588 | } | |
589 | } | |
590 | } | |
591 | ||
592 | ull_t *ull = ull_get(&key, 0, &unused_ull); | |
593 | if (ull == NULL) { | |
594 | ret = ENOMEM; | |
595 | goto munge_retval; | |
596 | } | |
597 | /* ull is locked */ | |
598 | ||
599 | ull->ull_nwaiters++; | |
600 | ||
601 | if (ull->ull_opcode == 0) { | |
602 | ull->ull_opcode = opcode; | |
603 | } else if (ull->ull_opcode != opcode) { | |
604 | ret = EDOM; | |
605 | goto out_locked; | |
606 | } | |
607 | ||
608 | /* | |
609 | * We don't want this copyin to get wedged behind VM operations, | |
610 | * but we have to read the userspace value under the ull lock for correctness. | |
611 | * | |
612 | * Until <rdar://problem/24999882> exists, | |
613 | * holding the ull spinlock across copyin forces any | |
614 | * vm_fault we encounter to fail. | |
615 | */ | |
616 | ||
617 | /* copyin_atomicXX always checks alignment */ | |
618 | ||
619 | if (lock_size == 4) { | |
620 | uint32_t u32; | |
621 | copy_ret = copyin_atomic32(args->addr, &u32); | |
622 | value = u32; | |
623 | } else { | |
624 | copy_ret = copyin_atomic64(args->addr, &value); | |
625 | } | |
626 | ||
627 | #if DEVELOPMENT || DEBUG | |
628 | /* Occasionally simulate copyin finding the user address paged out */ | |
629 | if (((ull_simulate_copyin_fault == p->p_pid) || (ull_simulate_copyin_fault == 1)) && (copy_ret == 0)) { | |
630 | static _Atomic int fault_inject = 0; | |
631 | if (os_atomic_inc_orig(&fault_inject, relaxed) % 73 == 0) { | |
632 | copy_ret = EFAULT; | |
633 | } | |
634 | } | |
635 | #endif | |
636 | if (copy_ret != 0) { | |
637 | /* copyin() will return an error if the access to the user addr would have faulted, | |
638 | * so just return and let the user level code fault it in. | |
639 | */ | |
640 | ret = copy_ret; | |
641 | goto out_locked; | |
642 | } | |
643 | ||
644 | if (value != args->value) { | |
645 | /* Lock value has changed from expected so bail out */ | |
646 | goto out_locked; | |
647 | } | |
648 | ||
649 | if (set_owner) { | |
650 | if (owner_thread == THREAD_NULL) { | |
651 | ret = ulock_resolve_owner((uint32_t)args->value, &owner_thread); | |
652 | if (ret == EOWNERDEAD) { | |
653 | /* | |
654 | * Translation failed - even though the lock value is up to date, | |
655 | * whatever was stored in the lock wasn't actually a thread port. | |
656 | */ | |
657 | goto out_locked; | |
658 | } | |
659 | /* HACK: don't bail on MACH_PORT_DEAD, to avoid blowing up the no-tsd pthread lock */ | |
660 | ret = 0; | |
661 | } | |
662 | /* owner_thread has a +1 reference */ | |
663 | ||
664 | /* | |
665 | * At this point, I know: | |
666 | * a) owner_thread is definitely the current owner, because I just read the value | |
667 | * b) owner_thread is either: | |
668 | * i) holding the user lock or | |
669 | * ii) has just unlocked the user lock after I looked | |
670 | * and is heading toward the kernel to call ull_wake. | |
671 | * If so, it's going to have to wait for the ull mutex. | |
672 | * | |
673 | * Therefore, I can ask the turnstile to promote its priority, and I can rely | |
674 | * on it to come by later to issue the wakeup and lose its promotion. | |
675 | */ | |
676 | ||
677 | /* Return the +1 ref from the ull_owner field */ | |
678 | old_owner = ull->ull_owner; | |
679 | ull->ull_owner = THREAD_NULL; | |
680 | ||
681 | if (owner_thread != THREAD_NULL) { | |
682 | /* The ull_owner field now owns a +1 ref on owner_thread */ | |
683 | thread_reference(owner_thread); | |
684 | ull->ull_owner = owner_thread; | |
685 | } | |
686 | } | |
687 | ||
688 | wait_result_t wr; | |
689 | uint64_t timeout = args->timeout; /* nanoseconds */ | |
690 | uint64_t deadline = TIMEOUT_WAIT_FOREVER; | |
691 | wait_interrupt_t interruptible = THREAD_ABORTSAFE; | |
692 | struct turnstile *ts; | |
693 | ||
694 | ts = turnstile_prepare((uintptr_t)ull, &ull->ull_turnstile, | |
695 | TURNSTILE_NULL, TURNSTILE_ULOCK); | |
696 | thread_set_pending_block_hint(self, kThreadWaitUserLock); | |
697 | ||
698 | if (flags & ULF_WAIT_WORKQ_DATA_CONTENTION) { | |
699 | interruptible |= THREAD_WAIT_NOREPORT; | |
700 | } | |
701 | ||
702 | if (timeout) { | |
703 | nanoseconds_to_deadline(timeout, &deadline); | |
704 | } | |
705 | ||
706 | turnstile_update_inheritor(ts, owner_thread, | |
707 | (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD)); | |
708 | ||
709 | wr = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), | |
710 | interruptible, deadline); | |
711 | ||
712 | if (wr == THREAD_WAITING) { | |
713 | uthread_t uthread = (uthread_t)get_bsdthread_info(self); | |
714 | uthread->uu_save.uus_ulock_wait_data.ull = ull; | |
715 | uthread->uu_save.uus_ulock_wait_data.retval = retval; | |
716 | uthread->uu_save.uus_ulock_wait_data.flags = flags; | |
717 | uthread->uu_save.uus_ulock_wait_data.owner_thread = owner_thread; | |
718 | uthread->uu_save.uus_ulock_wait_data.old_owner = old_owner; | |
719 | } | |
720 | ||
721 | ull_unlock(ull); | |
722 | ||
723 | if (unused_ull) { | |
724 | ull_free(unused_ull); | |
725 | unused_ull = NULL; | |
726 | } | |
727 | ||
728 | turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD); | |
729 | ||
730 | if (wr == THREAD_WAITING) { | |
731 | if (set_owner && owner_thread != THREAD_NULL) { | |
732 | thread_handoff_parameter(owner_thread, ulock_wait_continue, ull, THREAD_HANDOFF_NONE); | |
733 | } else { | |
734 | assert(owner_thread == THREAD_NULL); | |
735 | thread_block_parameter(ulock_wait_continue, ull); | |
736 | } | |
737 | /* NOT REACHED */ | |
738 | } | |
739 | ||
740 | ret = wait_result_to_return_code(wr); | |
741 | ||
742 | ull_lock(ull); | |
743 | turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK); | |
744 | ||
745 | out_locked: | |
746 | ulock_wait_cleanup(ull, owner_thread, old_owner, retval); | |
747 | owner_thread = NULL; | |
748 | ||
749 | if (unused_ull) { | |
750 | ull_free(unused_ull); | |
751 | unused_ull = NULL; | |
752 | } | |
753 | ||
754 | assert(*retval >= 0); | |
755 | ||
756 | munge_retval: | |
757 | if (owner_thread) { | |
758 | thread_deallocate(owner_thread); | |
759 | } | |
760 | if (ret == ESTALE) { | |
761 | ret = 0; | |
762 | } | |
763 | if ((flags & ULF_NO_ERRNO) && (ret != 0)) { | |
764 | *retval = -ret; | |
765 | ret = 0; | |
766 | } | |
767 | return ret; | |
768 | } | |
769 | ||
770 | /* | |
771 | * Must be called with ull_lock held | |
772 | */ | |
773 | static void | |
774 | ulock_wait_cleanup(ull_t *ull, thread_t owner_thread, thread_t old_owner, int32_t *retval) | |
775 | { | |
776 | ull_assert_owned(ull); | |
777 | ||
778 | thread_t old_lingering_owner = THREAD_NULL; | |
779 | ||
780 | *retval = --ull->ull_nwaiters; | |
781 | if (ull->ull_nwaiters == 0) { | |
782 | /* | |
783 | * If the wait was canceled early, we might need to | |
784 | * clear out the lingering owner reference before | |
785 | * freeing the ull. | |
786 | */ | |
787 | old_lingering_owner = ull->ull_owner; | |
788 | ull->ull_owner = THREAD_NULL; | |
789 | ||
790 | memset(&ull->ull_key, 0, sizeof ull->ull_key); | |
791 | ull->ull_refcount--; | |
792 | assert(ull->ull_refcount > 0); | |
793 | } | |
794 | ull_put(ull); | |
795 | ||
796 | /* Need to be called after dropping the interlock */ | |
797 | turnstile_cleanup(); | |
798 | ||
799 | if (owner_thread != THREAD_NULL) { | |
800 | thread_deallocate(owner_thread); | |
801 | } | |
802 | ||
803 | if (old_owner != THREAD_NULL) { | |
804 | thread_deallocate(old_owner); | |
805 | } | |
806 | ||
807 | if (old_lingering_owner != THREAD_NULL) { | |
808 | thread_deallocate(old_lingering_owner); | |
809 | } | |
810 | ||
811 | assert(*retval >= 0); | |
812 | } | |
813 | ||
814 | __attribute__((noreturn)) | |
815 | static void | |
816 | ulock_wait_continue(__unused void * parameter, wait_result_t wr) | |
817 | { | |
818 | thread_t self = current_thread(); | |
819 | uthread_t uthread = (uthread_t)get_bsdthread_info(self); | |
820 | int ret = 0; | |
821 | ||
822 | ull_t *ull = uthread->uu_save.uus_ulock_wait_data.ull; | |
823 | int32_t *retval = uthread->uu_save.uus_ulock_wait_data.retval; | |
824 | uint flags = uthread->uu_save.uus_ulock_wait_data.flags; | |
825 | thread_t owner_thread = uthread->uu_save.uus_ulock_wait_data.owner_thread; | |
826 | thread_t old_owner = uthread->uu_save.uus_ulock_wait_data.old_owner; | |
827 | ||
828 | ret = wait_result_to_return_code(wr); | |
829 | ||
830 | ull_lock(ull); | |
831 | turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK); | |
832 | ||
833 | ulock_wait_cleanup(ull, owner_thread, old_owner, retval); | |
834 | ||
835 | if ((flags & ULF_NO_ERRNO) && (ret != 0)) { | |
836 | *retval = -ret; | |
837 | ret = 0; | |
838 | } | |
839 | ||
840 | unix_syscall_return(ret); | |
841 | } | |
842 | ||
843 | int | |
844 | ulock_wake(struct proc *p, struct ulock_wake_args *args, __unused int32_t *retval) | |
845 | { | |
846 | uint8_t opcode = (uint8_t)(args->operation & UL_OPCODE_MASK); | |
847 | uint flags = args->operation & UL_FLAGS_MASK; | |
848 | int ret = 0; | |
849 | ulk_t key; | |
850 | ||
851 | /* involved threads - each variable holds +1 ref if not null */ | |
852 | thread_t wake_thread = THREAD_NULL; | |
853 | ||
854 | #if DEVELOPMENT || DEBUG | |
855 | if (opcode == UL_DEBUG_HASH_DUMP_PID) { | |
856 | *retval = ull_hash_dump(p->p_pid); | |
857 | return ret; | |
858 | } else if (opcode == UL_DEBUG_HASH_DUMP_ALL) { | |
859 | *retval = ull_hash_dump(0); | |
860 | return ret; | |
861 | } else if (opcode == UL_DEBUG_SIMULATE_COPYIN_FAULT) { | |
862 | ull_simulate_copyin_fault = (int)(args->wake_value); | |
863 | return ret; | |
864 | } | |
865 | #endif | |
866 | ||
867 | bool set_owner = false; | |
868 | bool allow_non_owner = false; | |
869 | bool xproc = false; | |
870 | ||
871 | switch (opcode) { | |
872 | case UL_UNFAIR_LOCK: | |
873 | set_owner = true; | |
874 | break; | |
875 | case UL_COMPARE_AND_WAIT: | |
876 | case UL_COMPARE_AND_WAIT64: | |
877 | break; | |
878 | case UL_COMPARE_AND_WAIT_SHARED: | |
879 | case UL_COMPARE_AND_WAIT64_SHARED: | |
880 | xproc = true; | |
881 | break; | |
882 | default: | |
883 | ret = EINVAL; | |
884 | goto munge_retval; | |
885 | } | |
886 | ||
887 | if ((flags & ULF_WAKE_MASK) != flags) { | |
888 | ret = EINVAL; | |
889 | goto munge_retval; | |
890 | } | |
891 | ||
892 | if ((flags & ULF_WAKE_THREAD) && ((flags & ULF_WAKE_ALL) || set_owner)) { | |
893 | ret = EINVAL; | |
894 | goto munge_retval; | |
895 | } | |
896 | ||
897 | if (flags & ULF_WAKE_ALLOW_NON_OWNER) { | |
898 | if (!set_owner) { | |
899 | ret = EINVAL; | |
900 | goto munge_retval; | |
901 | } | |
902 | ||
903 | allow_non_owner = true; | |
904 | } | |
905 | ||
906 | if (args->addr == 0) { | |
907 | ret = EINVAL; | |
908 | goto munge_retval; | |
909 | } | |
910 | ||
911 | if (xproc) { | |
912 | uint64_t object = 0; | |
913 | uint64_t offset = 0; | |
914 | ||
915 | ret = uaddr_findobj(args->addr, &object, &offset); | |
916 | if (ret) { | |
917 | ret = EINVAL; | |
918 | goto munge_retval; | |
919 | } | |
920 | key.ulk_key_type = ULK_XPROC; | |
921 | key.ulk_object = object; | |
922 | key.ulk_offset = offset; | |
923 | } else { | |
924 | key.ulk_key_type = ULK_UADDR; | |
925 | key.ulk_pid = p->p_pid; | |
926 | key.ulk_addr = args->addr; | |
927 | } | |
928 | ||
929 | if (flags & ULF_WAKE_THREAD) { | |
930 | mach_port_name_t wake_thread_name = (mach_port_name_t)(args->wake_value); | |
931 | wake_thread = port_name_to_thread(wake_thread_name, | |
932 | PORT_TO_THREAD_IN_CURRENT_TASK | | |
933 | PORT_TO_THREAD_NOT_CURRENT_THREAD); | |
934 | if (wake_thread == THREAD_NULL) { | |
935 | ret = ESRCH; | |
936 | goto munge_retval; | |
937 | } | |
938 | } | |
939 | ||
940 | ull_t *ull = ull_get(&key, ULL_MUST_EXIST, NULL); | |
941 | thread_t new_owner = THREAD_NULL; | |
942 | struct turnstile *ts = TURNSTILE_NULL; | |
943 | thread_t cleanup_thread = THREAD_NULL; | |
944 | ||
945 | if (ull == NULL) { | |
946 | ret = ENOENT; | |
947 | goto munge_retval; | |
948 | } | |
949 | /* ull is locked */ | |
950 | ||
951 | if (opcode != ull->ull_opcode) { | |
952 | ret = EDOM; | |
953 | goto out_ull_put; | |
954 | } | |
955 | ||
956 | if (set_owner) { | |
957 | if ((ull->ull_owner != current_thread()) && !allow_non_owner) { | |
958 | /* | |
959 | * If the current thread isn't the known owner, | |
960 | * then this wake call was late to the party, | |
961 | * and the kernel already knows who owns the lock. | |
962 | * | |
963 | * This current owner already knows the lock is contended | |
964 | * and will redrive wakes, just bail out. | |
965 | */ | |
966 | goto out_ull_put; | |
967 | } | |
968 | } else { | |
969 | assert(ull->ull_owner == THREAD_NULL); | |
970 | } | |
971 | ||
972 | ts = turnstile_prepare((uintptr_t)ull, &ull->ull_turnstile, | |
973 | TURNSTILE_NULL, TURNSTILE_ULOCK); | |
974 | assert(ts != TURNSTILE_NULL); | |
975 | ||
976 | if (flags & ULF_WAKE_THREAD) { | |
977 | kern_return_t kr = waitq_wakeup64_thread(&ts->ts_waitq, | |
978 | CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), | |
979 | wake_thread, THREAD_AWAKENED); | |
980 | if (kr != KERN_SUCCESS) { | |
981 | assert(kr == KERN_NOT_WAITING); | |
982 | ret = EALREADY; | |
983 | } | |
984 | } else if (flags & ULF_WAKE_ALL) { | |
985 | if (set_owner) { | |
986 | turnstile_update_inheritor(ts, THREAD_NULL, | |
987 | TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD); | |
988 | } | |
989 | waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), | |
990 | THREAD_AWAKENED, 0); | |
991 | } else if (set_owner) { | |
992 | /* | |
993 | * The turnstile waitq is priority ordered, | |
994 | * and will wake up the highest priority waiter | |
995 | * and set it as the inheritor for us. | |
996 | */ | |
997 | new_owner = waitq_wakeup64_identify(&ts->ts_waitq, | |
998 | CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), | |
999 | THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE); | |
1000 | } else { | |
1001 | waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), | |
1002 | THREAD_AWAKENED, WAITQ_ALL_PRIORITIES); | |
1003 | } | |
1004 | ||
1005 | if (set_owner) { | |
1006 | turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD); | |
1007 | cleanup_thread = ull->ull_owner; | |
1008 | ull->ull_owner = new_owner; | |
1009 | } | |
1010 | ||
1011 | turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL, TURNSTILE_ULOCK); | |
1012 | ||
1013 | out_ull_put: | |
1014 | ull_put(ull); | |
1015 | ||
1016 | if (ts != TURNSTILE_NULL) { | |
1017 | /* Need to be called after dropping the interlock */ | |
1018 | turnstile_cleanup(); | |
1019 | } | |
1020 | ||
1021 | if (cleanup_thread != THREAD_NULL) { | |
1022 | thread_deallocate(cleanup_thread); | |
1023 | } | |
1024 | ||
1025 | munge_retval: | |
1026 | if (wake_thread != THREAD_NULL) { | |
1027 | thread_deallocate(wake_thread); | |
1028 | } | |
1029 | ||
1030 | if ((flags & ULF_NO_ERRNO) && (ret != 0)) { | |
1031 | *retval = -ret; | |
1032 | ret = 0; | |
1033 | } | |
1034 | return ret; | |
1035 | } | |
1036 | ||
1037 | void | |
1038 | kdp_ulock_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo) | |
1039 | { | |
1040 | ull_t *ull = EVENT_TO_ULOCK(event); | |
1041 | ||
1042 | zone_require(ull_zone, ull); | |
1043 | ||
1044 | switch (ull->ull_opcode) { | |
1045 | case UL_UNFAIR_LOCK: | |
1046 | case UL_UNFAIR_LOCK64_SHARED: | |
1047 | waitinfo->owner = thread_tid(ull->ull_owner); | |
1048 | waitinfo->context = ull->ull_key.ulk_addr; | |
1049 | break; | |
1050 | case UL_COMPARE_AND_WAIT: | |
1051 | case UL_COMPARE_AND_WAIT64: | |
1052 | case UL_COMPARE_AND_WAIT_SHARED: | |
1053 | case UL_COMPARE_AND_WAIT64_SHARED: | |
1054 | waitinfo->owner = 0; | |
1055 | waitinfo->context = ull->ull_key.ulk_addr; | |
1056 | break; | |
1057 | default: | |
1058 | panic("%s: Invalid ulock opcode %d addr %p", __FUNCTION__, ull->ull_opcode, (void*)ull); | |
1059 | break; | |
1060 | } | |
1061 | return; | |
1062 | } |