osfmk/kern/locks.c

   1 /*
   2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56
  57 #define LOCK_PRIVATE 1
  58
  59 #include <mach_ldebug.h>
  60 #include <debug.h>
  61
  62 #include <mach/kern_return.h>
  63 #include <mach/mach_host_server.h>
  64 #include <mach_debug/lockgroup_info.h>
  65
  66 #include <kern/lock_stat.h>
  67 #include <kern/locks.h>
  68 #include <kern/misc_protos.h>
  69 #include <kern/kalloc.h>
  70 #include <kern/thread.h>
  71 #include <kern/processor.h>
  72 #include <kern/sched_prim.h>
  73 #include <kern/debug.h>
  74 #include <libkern/section_keywords.h>
  75 #include <machine/atomic.h>
  76 #include <machine/machine_cpu.h>
  77 #include <string.h>
  78
  79 #include <sys/kdebug.h>
  80
  81 #define LCK_MTX_SLEEP_CODE              0
  82 #define LCK_MTX_SLEEP_DEADLINE_CODE     1
  83 #define LCK_MTX_LCK_WAIT_CODE           2
  84 #define LCK_MTX_UNLCK_WAKEUP_CODE       3
  85
  86 #if MACH_LDEBUG
  87 #define ALIGN_TEST(p, t) do{if((uintptr_t)p&(sizeof(t)-1)) __builtin_trap();}while(0)
  88 #else
  89 #define ALIGN_TEST(p, t) do{}while(0)
  90 #endif
  91
  92 #define NOINLINE                __attribute__((noinline))
  93
  94 #define ordered_load_hw(lock)          os_atomic_load(&(lock)->lock_data, compiler_acq_rel)
  95 #define ordered_store_hw(lock, value)  os_atomic_store(&(lock)->lock_data, (value), compiler_acq_rel)
  96
  97
  98 queue_head_t     lck_grp_queue;
  99 unsigned int     lck_grp_cnt;
 100
 101 decl_lck_mtx_data(, lck_grp_lock);
 102 static lck_mtx_ext_t lck_grp_lock_ext;
 103
 104 SECURITY_READ_ONLY_LATE(boolean_t) spinlock_timeout_panic = TRUE;
 105
 106 lck_grp_attr_t  LockDefaultGroupAttr;
 107 lck_grp_t               LockCompatGroup;
 108 lck_attr_t              LockDefaultLckAttr;
 109
 110 #if CONFIG_DTRACE && __SMP__
 111 #if defined (__x86_64__)
 112 uint64_t dtrace_spin_threshold = 500; // 500ns
 113 #elif defined(__arm__) || defined(__arm64__)
 114 uint64_t dtrace_spin_threshold = LOCK_PANIC_TIMEOUT / 1000000; // 500ns
 115 #endif
 116 #endif
 117
 118 uintptr_t
 119 unslide_for_kdebug(void* object)
 120 {
 121         if (__improbable(kdebug_enable)) {
 122                 return VM_KERNEL_UNSLIDE_OR_PERM(object);
 123         } else {
 124                 return 0;
 125         }
 126 }
 127
 128 /*
 129  * Routine:     lck_mod_init
 130  */
 131
 132 void
 133 lck_mod_init(
 134         void)
 135 {
 136         /*
 137          * Obtain "lcks" options:this currently controls lock statistics
 138          */
 139         if (!PE_parse_boot_argn("lcks", &LcksOpts, sizeof(LcksOpts))) {
 140                 LcksOpts = 0;
 141         }
 142
 143
 144 #if (DEVELOPMENT || DEBUG) && defined(__x86_64__)
 145         if (!PE_parse_boot_argn("-disable_mtx_chk", &LckDisablePreemptCheck, sizeof(LckDisablePreemptCheck))) {
 146                 LckDisablePreemptCheck = 0;
 147         }
 148 #endif /* (DEVELOPMENT || DEBUG) && defined(__x86_64__) */
 149
 150         queue_init(&lck_grp_queue);
 151
 152         /*
 153          * Need to bootstrap the LockCompatGroup instead of calling lck_grp_init() here. This avoids
 154          * grabbing the lck_grp_lock before it is initialized.
 155          */
 156
 157         bzero(&LockCompatGroup, sizeof(lck_grp_t));
 158         (void) strncpy(LockCompatGroup.lck_grp_name, "Compatibility APIs", LCK_GRP_MAX_NAME);
 159
 160         LockCompatGroup.lck_grp_attr = LCK_ATTR_NONE;
 161         if (LcksOpts & enaLkStat) {
 162                 LockCompatGroup.lck_grp_attr |= LCK_GRP_ATTR_STAT;
 163         }
 164         if (LcksOpts & enaLkTimeStat) {
 165                 LockCompatGroup.lck_grp_attr |= LCK_GRP_ATTR_TIME_STAT;
 166         }
 167
 168         os_ref_init(&LockCompatGroup.lck_grp_refcnt, NULL);
 169
 170         enqueue_tail(&lck_grp_queue, (queue_entry_t)&LockCompatGroup);
 171         lck_grp_cnt = 1;
 172
 173         lck_grp_attr_setdefault(&LockDefaultGroupAttr);
 174         lck_attr_setdefault(&LockDefaultLckAttr);
 175
 176         lck_mtx_init_ext(&lck_grp_lock, &lck_grp_lock_ext, &LockCompatGroup, &LockDefaultLckAttr);
 177 }
 178
 179 /*
 180  * Routine:     lck_grp_attr_alloc_init
 181  */
 182
 183 lck_grp_attr_t  *
 184 lck_grp_attr_alloc_init(
 185         void)
 186 {
 187         lck_grp_attr_t  *attr;
 188
 189         if ((attr = (lck_grp_attr_t *)kalloc(sizeof(lck_grp_attr_t))) != 0) {
 190                 lck_grp_attr_setdefault(attr);
 191         }
 192
 193         return attr;
 194 }
 195
 196
 197 /*
 198  * Routine:     lck_grp_attr_setdefault
 199  */
 200
 201 void
 202 lck_grp_attr_setdefault(
 203         lck_grp_attr_t  *attr)
 204 {
 205         if (LcksOpts & enaLkStat) {
 206                 attr->grp_attr_val = LCK_GRP_ATTR_STAT;
 207         } else {
 208                 attr->grp_attr_val = 0;
 209         }
 210 }
 211
 212
 213 /*
 214  * Routine:     lck_grp_attr_setstat
 215  */
 216
 217 void
 218 lck_grp_attr_setstat(
 219         lck_grp_attr_t  *attr)
 220 {
 221         os_atomic_or(&attr->grp_attr_val, LCK_GRP_ATTR_STAT, relaxed);
 222 }
 223
 224
 225 /*
 226  * Routine:     lck_grp_attr_free
 227  */
 228
 229 void
 230 lck_grp_attr_free(
 231         lck_grp_attr_t  *attr)
 232 {
 233         kfree(attr, sizeof(lck_grp_attr_t));
 234 }
 235
 236
 237 /*
 238  * Routine: lck_grp_alloc_init
 239  */
 240
 241 lck_grp_t *
 242 lck_grp_alloc_init(
 243         const char*     grp_name,
 244         lck_grp_attr_t  *attr)
 245 {
 246         lck_grp_t       *grp;
 247
 248         if ((grp = (lck_grp_t *)kalloc(sizeof(lck_grp_t))) != 0) {
 249                 lck_grp_init(grp, grp_name, attr);
 250         }
 251
 252         return grp;
 253 }
 254
 255 /*
 256  * Routine: lck_grp_init
 257  */
 258
 259 void
 260 lck_grp_init(lck_grp_t * grp, const char * grp_name, lck_grp_attr_t * attr)
 261 {
 262         /* make sure locking infrastructure has been initialized */
 263         assert(lck_grp_cnt > 0);
 264
 265         bzero((void *)grp, sizeof(lck_grp_t));
 266
 267         (void)strlcpy(grp->lck_grp_name, grp_name, LCK_GRP_MAX_NAME);
 268
 269         if (attr != LCK_GRP_ATTR_NULL) {
 270                 grp->lck_grp_attr = attr->grp_attr_val;
 271         } else {
 272                 grp->lck_grp_attr = 0;
 273                 if (LcksOpts & enaLkStat) {
 274                         grp->lck_grp_attr |= LCK_GRP_ATTR_STAT;
 275                 }
 276                 if (LcksOpts & enaLkTimeStat) {
 277                         grp->lck_grp_attr |= LCK_GRP_ATTR_TIME_STAT;
 278                 }
 279         }
 280
 281         if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT) {
 282                 lck_grp_stats_t *stats = &grp->lck_grp_stats;
 283
 284 #if LOCK_STATS
 285                 lck_grp_stat_enable(&stats->lgss_spin_held);
 286                 lck_grp_stat_enable(&stats->lgss_spin_miss);
 287 #endif /* LOCK_STATS */
 288
 289                 lck_grp_stat_enable(&stats->lgss_mtx_held);
 290                 lck_grp_stat_enable(&stats->lgss_mtx_miss);
 291                 lck_grp_stat_enable(&stats->lgss_mtx_direct_wait);
 292         }
 293         if (grp->lck_grp_attr * LCK_GRP_ATTR_TIME_STAT) {
 294 #if LOCK_STATS
 295                 lck_grp_stats_t *stats = &grp->lck_grp_stats;
 296                 lck_grp_stat_enable(&stats->lgss_spin_spin);
 297 #endif /* LOCK_STATS */
 298         }
 299
 300         os_ref_init(&grp->lck_grp_refcnt, NULL);
 301
 302         lck_mtx_lock(&lck_grp_lock);
 303         enqueue_tail(&lck_grp_queue, (queue_entry_t)grp);
 304         lck_grp_cnt++;
 305         lck_mtx_unlock(&lck_grp_lock);
 306 }
 307
 308 /*
 309  * Routine:     lck_grp_free
 310  */
 311
 312 void
 313 lck_grp_free(
 314         lck_grp_t       *grp)
 315 {
 316         lck_mtx_lock(&lck_grp_lock);
 317         lck_grp_cnt--;
 318         (void)remque((queue_entry_t)grp);
 319         lck_mtx_unlock(&lck_grp_lock);
 320         lck_grp_deallocate(grp);
 321 }
 322
 323
 324 /*
 325  * Routine:     lck_grp_reference
 326  */
 327
 328 void
 329 lck_grp_reference(
 330         lck_grp_t       *grp)
 331 {
 332         os_ref_retain(&grp->lck_grp_refcnt);
 333 }
 334
 335
 336 /*
 337  * Routine:     lck_grp_deallocate
 338  */
 339
 340 void
 341 lck_grp_deallocate(
 342         lck_grp_t       *grp)
 343 {
 344         if (os_ref_release(&grp->lck_grp_refcnt) != 0) {
 345                 return;
 346         }
 347
 348         kfree(grp, sizeof(lck_grp_t));
 349 }
 350
 351 /*
 352  * Routine:     lck_grp_lckcnt_incr
 353  */
 354
 355 void
 356 lck_grp_lckcnt_incr(
 357         lck_grp_t       *grp,
 358         lck_type_t      lck_type)
 359 {
 360         unsigned int    *lckcnt;
 361
 362         switch (lck_type) {
 363         case LCK_TYPE_SPIN:
 364                 lckcnt = &grp->lck_grp_spincnt;
 365                 break;
 366         case LCK_TYPE_MTX:
 367                 lckcnt = &grp->lck_grp_mtxcnt;
 368                 break;
 369         case LCK_TYPE_RW:
 370                 lckcnt = &grp->lck_grp_rwcnt;
 371                 break;
 372         default:
 373                 return panic("lck_grp_lckcnt_incr(): invalid lock type: %d\n", lck_type);
 374         }
 375
 376         os_atomic_inc(lckcnt, relaxed);
 377 }
 378
 379 /*
 380  * Routine:     lck_grp_lckcnt_decr
 381  */
 382
 383 void
 384 lck_grp_lckcnt_decr(
 385         lck_grp_t       *grp,
 386         lck_type_t      lck_type)
 387 {
 388         unsigned int    *lckcnt;
 389         int             updated;
 390
 391         switch (lck_type) {
 392         case LCK_TYPE_SPIN:
 393                 lckcnt = &grp->lck_grp_spincnt;
 394                 break;
 395         case LCK_TYPE_MTX:
 396                 lckcnt = &grp->lck_grp_mtxcnt;
 397                 break;
 398         case LCK_TYPE_RW:
 399                 lckcnt = &grp->lck_grp_rwcnt;
 400                 break;
 401         default:
 402                 panic("lck_grp_lckcnt_decr(): invalid lock type: %d\n", lck_type);
 403                 return;
 404         }
 405
 406         updated = os_atomic_dec(lckcnt, relaxed);
 407         assert(updated >= 0);
 408 }
 409
 410 /*
 411  * Routine:     lck_attr_alloc_init
 412  */
 413
 414 lck_attr_t *
 415 lck_attr_alloc_init(
 416         void)
 417 {
 418         lck_attr_t      *attr;
 419
 420         if ((attr = (lck_attr_t *)kalloc(sizeof(lck_attr_t))) != 0) {
 421                 lck_attr_setdefault(attr);
 422         }
 423
 424         return attr;
 425 }
 426
 427
 428 /*
 429  * Routine:     lck_attr_setdefault
 430  */
 431
 432 void
 433 lck_attr_setdefault(
 434         lck_attr_t      *attr)
 435 {
 436 #if __arm__ || __arm64__
 437         /* <rdar://problem/4404579>: Using LCK_ATTR_DEBUG here causes panic at boot time for arm */
 438         attr->lck_attr_val =  LCK_ATTR_NONE;
 439 #elif __i386__ || __x86_64__
 440 #if     !DEBUG
 441         if (LcksOpts & enaLkDeb) {
 442                 attr->lck_attr_val =  LCK_ATTR_DEBUG;
 443         } else {
 444                 attr->lck_attr_val =  LCK_ATTR_NONE;
 445         }
 446 #else
 447         attr->lck_attr_val =  LCK_ATTR_DEBUG;
 448 #endif  /* !DEBUG */
 449 #else
 450 #error Unknown architecture.
 451 #endif  /* __arm__ */
 452 }
 453
 454
 455 /*
 456  * Routine:     lck_attr_setdebug
 457  */
 458 void
 459 lck_attr_setdebug(
 460         lck_attr_t      *attr)
 461 {
 462         os_atomic_or(&attr->lck_attr_val, LCK_ATTR_DEBUG, relaxed);
 463 }
 464
 465 /*
 466  * Routine:     lck_attr_setdebug
 467  */
 468 void
 469 lck_attr_cleardebug(
 470         lck_attr_t      *attr)
 471 {
 472         os_atomic_andnot(&attr->lck_attr_val, LCK_ATTR_DEBUG, relaxed);
 473 }
 474
 475
 476 /*
 477  * Routine:     lck_attr_rw_shared_priority
 478  */
 479 void
 480 lck_attr_rw_shared_priority(
 481         lck_attr_t      *attr)
 482 {
 483         os_atomic_or(&attr->lck_attr_val, LCK_ATTR_RW_SHARED_PRIORITY, relaxed);
 484 }
 485
 486
 487 /*
 488  * Routine:     lck_attr_free
 489  */
 490 void
 491 lck_attr_free(
 492         lck_attr_t      *attr)
 493 {
 494         kfree(attr, sizeof(lck_attr_t));
 495 }
 496
 497 /*
 498  * Routine:     hw_lock_init
 499  *
 500  *      Initialize a hardware lock.
 501  */
 502 void
 503 hw_lock_init(hw_lock_t lock)
 504 {
 505         ordered_store_hw(lock, 0);
 506 }
 507
 508 #if     __SMP__
 509 static inline bool
 510 hw_lock_trylock_contended(hw_lock_t lock, uintptr_t newval)
 511 {
 512 #if OS_ATOMIC_USE_LLSC
 513         uintptr_t oldval;
 514         os_atomic_rmw_loop(&lock->lock_data, oldval, newval, acquire, {
 515                 if (oldval != 0) {
 516                         wait_for_event(); // clears the monitor so we don't need give_up()
 517                         return false;
 518                 }
 519         });
 520         return true;
 521 #else // !OS_ATOMIC_USE_LLSC
 522 #if OS_ATOMIC_HAS_LLSC
 523         uintptr_t oldval = os_atomic_load_exclusive(&lock->lock_data, relaxed);
 524         if (oldval != 0) {
 525                 wait_for_event(); // clears the monitor so we don't need give_up()
 526                 return false;
 527         }
 528 #endif // OS_ATOMIC_HAS_LLSC
 529         return os_atomic_cmpxchg(&lock->lock_data, 0, newval, acquire);
 530 #endif // !OS_ATOMIC_USE_LLSC
 531 }
 532
 533 /*
 534  *      Routine: hw_lock_lock_contended
 535  *
 536  *      Spin until lock is acquired or timeout expires.
 537  *      timeout is in mach_absolute_time ticks. Called with
 538  *      preemption disabled.
 539  */
 540 static unsigned int NOINLINE
 541 hw_lock_lock_contended(hw_lock_t lock, uintptr_t data, uint64_t timeout, boolean_t do_panic LCK_GRP_ARG(lck_grp_t *grp))
 542 {
 543         uint64_t        end = 0;
 544         uintptr_t       holder = lock->lock_data;
 545         int             i;
 546
 547         if (timeout == 0) {
 548                 timeout = LOCK_PANIC_TIMEOUT;
 549         }
 550 #if CONFIG_DTRACE || LOCK_STATS
 551         uint64_t begin = 0;
 552         boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
 553 #endif /* CONFIG_DTRACE || LOCK_STATS */
 554
 555 #if LOCK_STATS || CONFIG_DTRACE
 556         if (__improbable(stat_enabled)) {
 557                 begin = mach_absolute_time();
 558         }
 559 #endif /* LOCK_STATS || CONFIG_DTRACE */
 560         for (;;) {
 561                 for (i = 0; i < LOCK_SNOOP_SPINS; i++) {
 562                         cpu_pause();
 563 #if (!__ARM_ENABLE_WFE_) || (LOCK_PRETEST)
 564                         holder = ordered_load_hw(lock);
 565                         if (holder != 0) {
 566                                 continue;
 567                         }
 568 #endif
 569                         if (hw_lock_trylock_contended(lock, data)) {
 570 #if CONFIG_DTRACE || LOCK_STATS
 571                                 if (__improbable(stat_enabled)) {
 572                                         lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp), mach_absolute_time() - begin);
 573                                 }
 574                                 lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
 575 #endif /* CONFIG_DTRACE || LOCK_STATS */
 576                                 return 1;
 577                         }
 578                 }
 579                 if (end == 0) {
 580                         end = ml_get_timebase() + timeout;
 581                 } else if (ml_get_timebase() >= end) {
 582                         break;
 583                 }
 584         }
 585         if (do_panic) {
 586                 // Capture the actual time spent blocked, which may be higher than the timeout
 587                 // if a misbehaving interrupt stole this thread's CPU time.
 588                 panic("Spinlock timeout after %llu ticks, %p = %lx",
 589                     (ml_get_timebase() - end + timeout), lock, holder);
 590         }
 591         return 0;
 592 }
 593 #endif  // __SMP__
 594
 595 void *
 596 hw_wait_while_equals(void **address, void *current)
 597 {
 598 #if     __SMP__
 599         void *v;
 600         uint64_t end = 0;
 601
 602         for (;;) {
 603                 for (int i = 0; i < LOCK_SNOOP_SPINS; i++) {
 604                         cpu_pause();
 605 #if OS_ATOMIC_HAS_LLSC
 606                         v = os_atomic_load_exclusive(address, relaxed);
 607                         if (__probable(v != current)) {
 608                                 os_atomic_clear_exclusive();
 609                                 return v;
 610                         }
 611                         wait_for_event();
 612 #else
 613                         v = os_atomic_load(address, relaxed);
 614                         if (__probable(v != current)) {
 615                                 return v;
 616                         }
 617 #endif // OS_ATOMIC_HAS_LLSC
 618                 }
 619                 if (end == 0) {
 620                         end = ml_get_timebase() + LOCK_PANIC_TIMEOUT;
 621                 } else if (ml_get_timebase() >= end) {
 622                         panic("Wait while equals timeout @ *%p == %p", address, v);
 623                 }
 624         }
 625 #else // !__SMP__
 626         panic("Value at %p is %p", address, current);
 627         __builtin_unreachable();
 628 #endif // !__SMP__
 629 }
 630
 631 static inline void
 632 hw_lock_lock_internal(hw_lock_t lock, thread_t thread LCK_GRP_ARG(lck_grp_t *grp))
 633 {
 634         uintptr_t       state;
 635
 636         state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
 637 #if     __SMP__
 638 #if     LOCK_PRETEST
 639         if (ordered_load_hw(lock)) {
 640                 goto contended;
 641         }
 642 #endif  // LOCK_PRETEST
 643         if (hw_lock_trylock_contended(lock, state)) {
 644                 goto end;
 645         }
 646 #if     LOCK_PRETEST
 647 contended:
 648 #endif  // LOCK_PRETEST
 649         hw_lock_lock_contended(lock, state, 0, spinlock_timeout_panic LCK_GRP_ARG(grp));
 650 end:
 651 #else   // __SMP__
 652         if (lock->lock_data) {
 653                 panic("Spinlock held %p", lock);
 654         }
 655         lock->lock_data = state;
 656 #endif  // __SMP__
 657         lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
 658
 659         return;
 660 }
 661
 662 /*
 663  *      Routine: hw_lock_lock
 664  *
 665  *      Acquire lock, spinning until it becomes available,
 666  *      return with preemption disabled.
 667  */
 668 void
 669 (hw_lock_lock)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
 670 {
 671         thread_t thread = current_thread();
 672         disable_preemption_for_thread(thread);
 673         hw_lock_lock_internal(lock, thread LCK_GRP_ARG(grp));
 674 }
 675
 676 /*
 677  *      Routine: hw_lock_lock_nopreempt
 678  *
 679  *      Acquire lock, spinning until it becomes available.
 680  */
 681 void
 682 (hw_lock_lock_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
 683 {
 684         thread_t thread = current_thread();
 685         if (__improbable(!preemption_disabled_for_thread(thread))) {
 686                 panic("Attempt to take no-preempt spinlock %p in preemptible context", lock);
 687         }
 688         hw_lock_lock_internal(lock, thread LCK_GRP_ARG(grp));
 689 }
 690
 691 /*
 692  *      Routine: hw_lock_to
 693  *
 694  *      Acquire lock, spinning until it becomes available or timeout.
 695  *      Timeout is in mach_absolute_time ticks, return with
 696  *      preemption disabled.
 697  */
 698 unsigned
 699 int
 700 (hw_lock_to)(hw_lock_t lock, uint64_t timeout LCK_GRP_ARG(lck_grp_t *grp))
 701 {
 702         thread_t        thread;
 703         uintptr_t       state;
 704         unsigned int success = 0;
 705
 706         thread = current_thread();
 707         disable_preemption_for_thread(thread);
 708         state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
 709 #if     __SMP__
 710 #if     LOCK_PRETEST
 711         if (ordered_load_hw(lock)) {
 712                 goto contended;
 713         }
 714 #endif  // LOCK_PRETEST
 715         if (hw_lock_trylock_contended(lock, state)) {
 716                 success = 1;
 717                 goto end;
 718         }
 719 #if     LOCK_PRETEST
 720 contended:
 721 #endif  // LOCK_PRETEST
 722         success = hw_lock_lock_contended(lock, state, timeout, FALSE LCK_GRP_ARG(grp));
 723 end:
 724 #else   // __SMP__
 725         (void)timeout;
 726         if (ordered_load_hw(lock) == 0) {
 727                 ordered_store_hw(lock, state);
 728                 success = 1;
 729         }
 730 #endif  // __SMP__
 731         if (success) {
 732                 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
 733         }
 734         return success;
 735 }
 736
 737 /*
 738  *      Routine: hw_lock_try
 739  *
 740  *      returns with preemption disabled on success.
 741  */
 742 static inline unsigned int
 743 hw_lock_try_internal(hw_lock_t lock, thread_t thread LCK_GRP_ARG(lck_grp_t *grp))
 744 {
 745         int             success = 0;
 746
 747 #if     __SMP__
 748 #if     LOCK_PRETEST
 749         if (ordered_load_hw(lock)) {
 750                 goto failed;
 751         }
 752 #endif  // LOCK_PRETEST
 753         success = os_atomic_cmpxchg(&lock->lock_data, 0,
 754             LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK, acquire);
 755 #else
 756         if (lock->lock_data == 0) {
 757                 lock->lock_data = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
 758                 success = 1;
 759         }
 760 #endif  // __SMP__
 761
 762 #if     LOCK_PRETEST
 763 failed:
 764 #endif  // LOCK_PRETEST
 765         if (success) {
 766                 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
 767         }
 768         return success;
 769 }
 770
 771 unsigned
 772 int
 773 (hw_lock_try)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
 774 {
 775         thread_t thread = current_thread();
 776         disable_preemption_for_thread(thread);
 777         unsigned int success = hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
 778         if (!success) {
 779                 enable_preemption();
 780         }
 781         return success;
 782 }
 783
 784 unsigned
 785 int
 786 (hw_lock_try_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
 787 {
 788         thread_t thread = current_thread();
 789         if (__improbable(!preemption_disabled_for_thread(thread))) {
 790                 panic("Attempt to test no-preempt spinlock %p in preemptible context", lock);
 791         }
 792         return hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
 793 }
 794
 795 /*
 796  *      Routine: hw_lock_unlock
 797  *
 798  *      Unconditionally release lock, release preemption level.
 799  */
 800 static inline void
 801 hw_lock_unlock_internal(hw_lock_t lock)
 802 {
 803         os_atomic_store(&lock->lock_data, 0, release);
 804 #if __arm__ || __arm64__
 805         // ARM tests are only for open-source exclusion
 806         set_event();
 807 #endif  // __arm__ || __arm64__
 808 #if     CONFIG_DTRACE
 809         LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, 0);
 810 #endif /* CONFIG_DTRACE */
 811 }
 812
 813 void
 814 (hw_lock_unlock)(hw_lock_t lock)
 815 {
 816         hw_lock_unlock_internal(lock);
 817         enable_preemption();
 818 }
 819
 820 void
 821 (hw_lock_unlock_nopreempt)(hw_lock_t lock)
 822 {
 823         if (__improbable(!preemption_disabled_for_thread(current_thread()))) {
 824                 panic("Attempt to release no-preempt spinlock %p in preemptible context", lock);
 825         }
 826         hw_lock_unlock_internal(lock);
 827 }
 828
 829 /*
 830  *      Routine hw_lock_held, doesn't change preemption state.
 831  *      N.B.  Racy, of course.
 832  */
 833 unsigned int
 834 hw_lock_held(hw_lock_t lock)
 835 {
 836         return ordered_load_hw(lock) != 0;
 837 }
 838
 839 #if     __SMP__
 840 static unsigned int
 841 hw_lock_bit_to_contended(hw_lock_bit_t *lock, uint32_t mask, uint32_t timeout LCK_GRP_ARG(lck_grp_t *grp));
 842 #endif
 843
 844 static inline unsigned int
 845 hw_lock_bit_to_internal(hw_lock_bit_t *lock, unsigned int bit, uint32_t timeout LCK_GRP_ARG(lck_grp_t *grp))
 846 {
 847         unsigned int success = 0;
 848         uint32_t        mask = (1 << bit);
 849 #if     !__SMP__
 850         uint32_t        state;
 851 #endif
 852
 853 #if     __SMP__
 854         if (__improbable(!hw_atomic_test_and_set32(lock, mask, mask, memory_order_acquire, FALSE))) {
 855                 success = hw_lock_bit_to_contended(lock, mask, timeout LCK_GRP_ARG(grp));
 856         } else {
 857                 success = 1;
 858         }
 859 #else   // __SMP__
 860         (void)timeout;
 861         state = ordered_load_bit(lock);
 862         if (!(mask & state)) {
 863                 ordered_store_bit(lock, state | mask);
 864                 success = 1;
 865         }
 866 #endif  // __SMP__
 867
 868         if (success) {
 869                 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
 870         }
 871
 872         return success;
 873 }
 874
 875 unsigned
 876 int
 877 (hw_lock_bit_to)(hw_lock_bit_t * lock, unsigned int bit, uint32_t timeout LCK_GRP_ARG(lck_grp_t *grp))
 878 {
 879         _disable_preemption();
 880         return hw_lock_bit_to_internal(lock, bit, timeout LCK_GRP_ARG(grp));
 881 }
 882
 883 #if     __SMP__
 884 static unsigned int NOINLINE
 885 hw_lock_bit_to_contended(hw_lock_bit_t *lock, uint32_t mask, uint32_t timeout LCK_GRP_ARG(lck_grp_t *grp))
 886 {
 887         uint64_t        end = 0;
 888         int             i;
 889 #if CONFIG_DTRACE || LOCK_STATS
 890         uint64_t begin = 0;
 891         boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
 892 #endif /* CONFIG_DTRACE || LOCK_STATS */
 893
 894 #if LOCK_STATS || CONFIG_DTRACE
 895         if (__improbable(stat_enabled)) {
 896                 begin = mach_absolute_time();
 897         }
 898 #endif /* LOCK_STATS || CONFIG_DTRACE */
 899         for (;;) {
 900                 for (i = 0; i < LOCK_SNOOP_SPINS; i++) {
 901                         // Always load-exclusive before wfe
 902                         // This grabs the monitor and wakes up on a release event
 903                         if (hw_atomic_test_and_set32(lock, mask, mask, memory_order_acquire, TRUE)) {
 904                                 goto end;
 905                         }
 906                 }
 907                 if (end == 0) {
 908                         end = ml_get_timebase() + timeout;
 909                 } else if (ml_get_timebase() >= end) {
 910                         break;
 911                 }
 912         }
 913         return 0;
 914 end:
 915 #if CONFIG_DTRACE || LOCK_STATS
 916         if (__improbable(stat_enabled)) {
 917                 lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp), mach_absolute_time() - begin);
 918         }
 919         lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
 920 #endif /* CONFIG_DTRACE || LCK_GRP_STAT */
 921
 922         return 1;
 923 }
 924 #endif  // __SMP__
 925
 926 void
 927 (hw_lock_bit)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
 928 {
 929         if (hw_lock_bit_to(lock, bit, LOCK_PANIC_TIMEOUT, LCK_GRP_PROBEARG(grp))) {
 930                 return;
 931         }
 932 #if     __SMP__
 933         panic("hw_lock_bit(): timed out (%p)", lock);
 934 #else
 935         panic("hw_lock_bit(): interlock held (%p)", lock);
 936 #endif
 937 }
 938
 939 void
 940 (hw_lock_bit_nopreempt)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
 941 {
 942         if (__improbable(get_preemption_level() == 0)) {
 943                 panic("Attempt to take no-preempt bitlock %p in preemptible context", lock);
 944         }
 945         if (hw_lock_bit_to_internal(lock, bit, LOCK_PANIC_TIMEOUT LCK_GRP_ARG(grp))) {
 946                 return;
 947         }
 948 #if     __SMP__
 949         panic("hw_lock_bit_nopreempt(): timed out (%p)", lock);
 950 #else
 951         panic("hw_lock_bit_nopreempt(): interlock held (%p)", lock);
 952 #endif
 953 }
 954
 955 unsigned
 956 int
 957 (hw_lock_bit_try)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
 958 {
 959         uint32_t        mask = (1 << bit);
 960 #if     !__SMP__
 961         uint32_t        state;
 962 #endif
 963         boolean_t       success = FALSE;
 964
 965         _disable_preemption();
 966 #if     __SMP__
 967         // TODO: consider weak (non-looping) atomic test-and-set
 968         success = hw_atomic_test_and_set32(lock, mask, mask, memory_order_acquire, FALSE);
 969 #else
 970         state = ordered_load_bit(lock);
 971         if (!(mask & state)) {
 972                 ordered_store_bit(lock, state | mask);
 973                 success = TRUE;
 974         }
 975 #endif  // __SMP__
 976         if (!success) {
 977                 _enable_preemption();
 978         }
 979
 980         if (success) {
 981                 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
 982         }
 983
 984         return success;
 985 }
 986
 987 static inline void
 988 hw_unlock_bit_internal(hw_lock_bit_t *lock, unsigned int bit)
 989 {
 990         uint32_t        mask = (1 << bit);
 991 #if     !__SMP__
 992         uint32_t        state;
 993 #endif
 994
 995 #if     __SMP__
 996         os_atomic_andnot(lock, mask, release);
 997 #if __arm__
 998         set_event();
 999 #endif
1000 #else   // __SMP__
1001         state = ordered_load_bit(lock);
1002         ordered_store_bit(lock, state & ~mask);
1003 #endif  // __SMP__
1004 #if CONFIG_DTRACE
1005         LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit);
1006 #endif
1007 }
1008
1009 /*
1010  *      Routine:        hw_unlock_bit
1011  *
1012  *              Release spin-lock. The second parameter is the bit number to test and set.
1013  *              Decrement the preemption level.
1014  */
1015 void
1016 hw_unlock_bit(hw_lock_bit_t * lock, unsigned int bit)
1017 {
1018         hw_unlock_bit_internal(lock, bit);
1019         _enable_preemption();
1020 }
1021
1022 void
1023 hw_unlock_bit_nopreempt(hw_lock_bit_t * lock, unsigned int bit)
1024 {
1025         if (__improbable(get_preemption_level() == 0)) {
1026                 panic("Attempt to release no-preempt bitlock %p in preemptible context", lock);
1027         }
1028         hw_unlock_bit_internal(lock, bit);
1029 }
1030
1031 /*
1032  * Routine:     lck_spin_sleep
1033  */
1034 wait_result_t
1035 lck_spin_sleep_grp(
1036         lck_spin_t              *lck,
1037         lck_sleep_action_t      lck_sleep_action,
1038         event_t                 event,
1039         wait_interrupt_t        interruptible,
1040         lck_grp_t               *grp)
1041 {
1042         wait_result_t   res;
1043
1044         if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1045                 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1046         }
1047
1048         res = assert_wait(event, interruptible);
1049         if (res == THREAD_WAITING) {
1050                 lck_spin_unlock(lck);
1051                 res = thread_block(THREAD_CONTINUE_NULL);
1052                 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1053                         lck_spin_lock_grp(lck, grp);
1054                 }
1055         } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1056                 lck_spin_unlock(lck);
1057         }
1058
1059         return res;
1060 }
1061
1062 wait_result_t
1063 lck_spin_sleep(
1064         lck_spin_t              *lck,
1065         lck_sleep_action_t      lck_sleep_action,
1066         event_t                 event,
1067         wait_interrupt_t        interruptible)
1068 {
1069         return lck_spin_sleep_grp(lck, lck_sleep_action, event, interruptible, LCK_GRP_NULL);
1070 }
1071
1072 /*
1073  * Routine:     lck_spin_sleep_deadline
1074  */
1075 wait_result_t
1076 lck_spin_sleep_deadline(
1077         lck_spin_t              *lck,
1078         lck_sleep_action_t      lck_sleep_action,
1079         event_t                 event,
1080         wait_interrupt_t        interruptible,
1081         uint64_t                deadline)
1082 {
1083         wait_result_t   res;
1084
1085         if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1086                 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1087         }
1088
1089         res = assert_wait_deadline(event, interruptible, deadline);
1090         if (res == THREAD_WAITING) {
1091                 lck_spin_unlock(lck);
1092                 res = thread_block(THREAD_CONTINUE_NULL);
1093                 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1094                         lck_spin_lock(lck);
1095                 }
1096         } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1097                 lck_spin_unlock(lck);
1098         }
1099
1100         return res;
1101 }
1102
1103 /*
1104  * Routine:     lck_mtx_sleep
1105  */
1106 wait_result_t
1107 lck_mtx_sleep(
1108         lck_mtx_t               *lck,
1109         lck_sleep_action_t      lck_sleep_action,
1110         event_t                 event,
1111         wait_interrupt_t        interruptible)
1112 {
1113         wait_result_t   res;
1114         thread_t                thread = current_thread();
1115
1116         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
1117             VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1118
1119         if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1120                 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1121         }
1122
1123         if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1124                 /*
1125                  * We overload the RW lock promotion to give us a priority ceiling
1126                  * during the time that this thread is asleep, so that when it
1127                  * is re-awakened (and not yet contending on the mutex), it is
1128                  * runnable at a reasonably high priority.
1129                  */
1130                 thread->rwlock_count++;
1131         }
1132
1133         res = assert_wait(event, interruptible);
1134         if (res == THREAD_WAITING) {
1135                 lck_mtx_unlock(lck);
1136                 res = thread_block(THREAD_CONTINUE_NULL);
1137                 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1138                         if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1139                                 lck_mtx_lock_spin(lck);
1140                         } else if ((lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS)) {
1141                                 lck_mtx_lock_spin_always(lck);
1142                         } else {
1143                                 lck_mtx_lock(lck);
1144                         }
1145                 }
1146         } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1147                 lck_mtx_unlock(lck);
1148         }
1149
1150         if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1151                 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1152                         /* sched_flags checked without lock, but will be rechecked while clearing */
1153                         lck_rw_clear_promotion(thread, unslide_for_kdebug(event));
1154                 }
1155         }
1156
1157         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1158
1159         return res;
1160 }
1161
1162
1163 /*
1164  * Routine:     lck_mtx_sleep_deadline
1165  */
1166 wait_result_t
1167 lck_mtx_sleep_deadline(
1168         lck_mtx_t               *lck,
1169         lck_sleep_action_t      lck_sleep_action,
1170         event_t                 event,
1171         wait_interrupt_t        interruptible,
1172         uint64_t                deadline)
1173 {
1174         wait_result_t   res;
1175         thread_t                thread = current_thread();
1176
1177         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
1178             VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1179
1180         if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1181                 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1182         }
1183
1184         if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1185                 /*
1186                  * See lck_mtx_sleep().
1187                  */
1188                 thread->rwlock_count++;
1189         }
1190
1191         res = assert_wait_deadline(event, interruptible, deadline);
1192         if (res == THREAD_WAITING) {
1193                 lck_mtx_unlock(lck);
1194                 res = thread_block(THREAD_CONTINUE_NULL);
1195                 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1196                         if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1197                                 lck_mtx_lock_spin(lck);
1198                         } else {
1199                                 lck_mtx_lock(lck);
1200                         }
1201                 }
1202         } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1203                 lck_mtx_unlock(lck);
1204         }
1205
1206         if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1207                 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1208                         /* sched_flags checked without lock, but will be rechecked while clearing */
1209                         lck_rw_clear_promotion(thread, unslide_for_kdebug(event));
1210                 }
1211         }
1212
1213         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1214
1215         return res;
1216 }
1217
1218 /*
1219  * Lock Boosting Invariants:
1220  *
1221  * The lock owner is always promoted to the max priority of all its waiters.
1222  * Max priority is capped at MAXPRI_PROMOTE.
1223  *
1224  * The last waiter is not given a promotion when it wakes up or acquires the lock.
1225  * When the last waiter is waking up, a new contender can always come in and
1226  * steal the lock without having to wait for the last waiter to make forward progress.
1227  */
1228
1229 /*
1230  * Routine: lck_mtx_lock_wait
1231  *
1232  * Invoked in order to wait on contention.
1233  *
1234  * Called with the interlock locked and
1235  * returns it unlocked.
1236  *
1237  * Always aggressively sets the owning thread to promoted,
1238  * even if it's the same or higher priority
1239  * This prevents it from lowering its own priority while holding a lock
1240  *
1241  * TODO: Come up with a more efficient way to handle same-priority promotions
1242  *      <rdar://problem/30737670> ARM mutex contention logic could avoid taking the thread lock
1243  */
1244 void
1245 lck_mtx_lock_wait(
1246         lck_mtx_t                       *lck,
1247         thread_t                        holder,
1248         struct turnstile                **ts)
1249 {
1250         thread_t                thread = current_thread();
1251         lck_mtx_t               *mutex;
1252         __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
1253
1254 #if     CONFIG_DTRACE
1255         uint64_t                sleep_start = 0;
1256
1257         if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
1258                 sleep_start = mach_absolute_time();
1259         }
1260 #endif
1261
1262         if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
1263                 mutex = lck;
1264         } else {
1265                 mutex = &lck->lck_mtx_ptr->lck_mtx;
1266         }
1267
1268         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
1269             trace_lck, (uintptr_t)thread_tid(thread), 0, 0, 0);
1270
1271         assert(thread->waiting_for_mutex == NULL);
1272         thread->waiting_for_mutex = mutex;
1273         mutex->lck_mtx_waiters++;
1274
1275         if (*ts == NULL) {
1276                 *ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1277         }
1278
1279         struct turnstile *turnstile = *ts;
1280         thread_set_pending_block_hint(thread, kThreadWaitKernelMutex);
1281         turnstile_update_inheritor(turnstile, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1282
1283         waitq_assert_wait64(&turnstile->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_UNINT | THREAD_WAIT_NOREPORT_USER, TIMEOUT_WAIT_FOREVER);
1284
1285         lck_mtx_ilk_unlock(mutex);
1286
1287         turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
1288
1289         thread_block(THREAD_CONTINUE_NULL);
1290
1291         thread->waiting_for_mutex = NULL;
1292
1293         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
1294 #if     CONFIG_DTRACE
1295         /*
1296          * Record the DTrace lockstat probe for blocking, block time
1297          * measured from when we were entered.
1298          */
1299         if (sleep_start) {
1300                 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
1301                         LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, lck,
1302                             mach_absolute_time() - sleep_start);
1303                 } else {
1304                         LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, lck,
1305                             mach_absolute_time() - sleep_start);
1306                 }
1307         }
1308 #endif
1309 }
1310
1311 /*
1312  * Routine:     lck_mtx_lock_acquire
1313  *
1314  * Invoked on acquiring the mutex when there is
1315  * contention.
1316  *
1317  * Returns the current number of waiters.
1318  *
1319  * Called with the interlock locked.
1320  */
1321 int
1322 lck_mtx_lock_acquire(
1323         lck_mtx_t               *lck,
1324         struct turnstile        *ts)
1325 {
1326         thread_t                thread = current_thread();
1327         lck_mtx_t               *mutex;
1328
1329         if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
1330                 mutex = lck;
1331         } else {
1332                 mutex = &lck->lck_mtx_ptr->lck_mtx;
1333         }
1334
1335         assert(thread->waiting_for_mutex == NULL);
1336
1337         if (mutex->lck_mtx_waiters > 0) {
1338                 if (ts == NULL) {
1339                         ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1340                 }
1341
1342                 turnstile_update_inheritor(ts, thread, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
1343                 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1344         }
1345
1346         if (ts != NULL) {
1347                 turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
1348         }
1349
1350         return mutex->lck_mtx_waiters;
1351 }
1352
1353 /*
1354  * Routine:     lck_mtx_unlock_wakeup
1355  *
1356  * Invoked on unlock when there is contention.
1357  *
1358  * Called with the interlock locked.
1359  *
1360  * NOTE: callers should call turnstile_clenup after
1361  * dropping the interlock.
1362  */
1363 boolean_t
1364 lck_mtx_unlock_wakeup(
1365         lck_mtx_t                       *lck,
1366         thread_t                        holder)
1367 {
1368         thread_t                thread = current_thread();
1369         lck_mtx_t               *mutex;
1370         __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
1371         struct turnstile *ts;
1372         kern_return_t did_wake;
1373
1374         if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
1375                 mutex = lck;
1376         } else {
1377                 mutex = &lck->lck_mtx_ptr->lck_mtx;
1378         }
1379
1380         if (thread != holder) {
1381                 panic("lck_mtx_unlock_wakeup: mutex %p holder %p\n", mutex, holder);
1382         }
1383
1384         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START,
1385             trace_lck, (uintptr_t)thread_tid(thread), 0, 0, 0);
1386
1387         assert(mutex->lck_mtx_waiters > 0);
1388         assert(thread->waiting_for_mutex == NULL);
1389
1390         ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1391
1392         if (mutex->lck_mtx_waiters > 1) {
1393                 /* WAITQ_PROMOTE_ON_WAKE will call turnstile_update_inheritor on the wokenup thread */
1394                 did_wake = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE);
1395         } else {
1396                 did_wake = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1397                 turnstile_update_inheritor(ts, NULL, TURNSTILE_IMMEDIATE_UPDATE);
1398         }
1399         assert(did_wake == KERN_SUCCESS);
1400
1401         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1402         turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
1403
1404         mutex->lck_mtx_waiters--;
1405
1406         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
1407
1408         return mutex->lck_mtx_waiters > 0;
1409 }
1410
1411 /*
1412  * Routine:     mutex_pause
1413  *
1414  * Called by former callers of simple_lock_pause().
1415  */
1416 #define MAX_COLLISION_COUNTS    32
1417 #define MAX_COLLISION   8
1418
1419 unsigned int max_collision_count[MAX_COLLISION_COUNTS];
1420
1421 uint32_t collision_backoffs[MAX_COLLISION] = {
1422         10, 50, 100, 200, 400, 600, 800, 1000
1423 };
1424
1425
1426 void
1427 mutex_pause(uint32_t collisions)
1428 {
1429         wait_result_t wait_result;
1430         uint32_t        back_off;
1431
1432         if (collisions >= MAX_COLLISION_COUNTS) {
1433                 collisions = MAX_COLLISION_COUNTS - 1;
1434         }
1435         max_collision_count[collisions]++;
1436
1437         if (collisions >= MAX_COLLISION) {
1438                 collisions = MAX_COLLISION - 1;
1439         }
1440         back_off = collision_backoffs[collisions];
1441
1442         wait_result = assert_wait_timeout((event_t)mutex_pause, THREAD_UNINT, back_off, NSEC_PER_USEC);
1443         assert(wait_result == THREAD_WAITING);
1444
1445         wait_result = thread_block(THREAD_CONTINUE_NULL);
1446         assert(wait_result == THREAD_TIMED_OUT);
1447 }
1448
1449
1450 unsigned int mutex_yield_wait = 0;
1451 unsigned int mutex_yield_no_wait = 0;
1452
1453 void
1454 lck_mtx_yield(
1455         lck_mtx_t   *lck)
1456 {
1457         int     waiters;
1458
1459 #if DEBUG
1460         lck_mtx_assert(lck, LCK_MTX_ASSERT_OWNED);
1461 #endif /* DEBUG */
1462
1463         if (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT) {
1464                 waiters = lck->lck_mtx_ptr->lck_mtx.lck_mtx_waiters;
1465         } else {
1466                 waiters = lck->lck_mtx_waiters;
1467         }
1468
1469         if (!waiters) {
1470                 mutex_yield_no_wait++;
1471         } else {
1472                 mutex_yield_wait++;
1473                 lck_mtx_unlock(lck);
1474                 mutex_pause(0);
1475                 lck_mtx_lock(lck);
1476         }
1477 }
1478
1479
1480 /*
1481  * Routine:     lck_rw_sleep
1482  */
1483 wait_result_t
1484 lck_rw_sleep(
1485         lck_rw_t                *lck,
1486         lck_sleep_action_t      lck_sleep_action,
1487         event_t                 event,
1488         wait_interrupt_t        interruptible)
1489 {
1490         wait_result_t   res;
1491         lck_rw_type_t   lck_rw_type;
1492         thread_t                thread = current_thread();
1493
1494         if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1495                 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1496         }
1497
1498         if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1499                 /*
1500                  * Although we are dropping the RW lock, the intent in most cases
1501                  * is that this thread remains as an observer, since it may hold
1502                  * some secondary resource, but must yield to avoid deadlock. In
1503                  * this situation, make sure that the thread is boosted to the
1504                  * RW lock ceiling while blocked, so that it can re-acquire the
1505                  * RW lock at that priority.
1506                  */
1507                 thread->rwlock_count++;
1508         }
1509
1510         res = assert_wait(event, interruptible);
1511         if (res == THREAD_WAITING) {
1512                 lck_rw_type = lck_rw_done(lck);
1513                 res = thread_block(THREAD_CONTINUE_NULL);
1514                 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1515                         if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
1516                                 lck_rw_lock(lck, lck_rw_type);
1517                         } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
1518                                 lck_rw_lock_exclusive(lck);
1519                         } else {
1520                                 lck_rw_lock_shared(lck);
1521                         }
1522                 }
1523         } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1524                 (void)lck_rw_done(lck);
1525         }
1526
1527         if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1528                 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1529                         /* sched_flags checked without lock, but will be rechecked while clearing */
1530
1531                         /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */
1532                         assert(lck_sleep_action & LCK_SLEEP_UNLOCK);
1533
1534                         lck_rw_clear_promotion(thread, unslide_for_kdebug(event));
1535                 }
1536         }
1537
1538         return res;
1539 }
1540
1541
1542 /*
1543  * Routine:     lck_rw_sleep_deadline
1544  */
1545 wait_result_t
1546 lck_rw_sleep_deadline(
1547         lck_rw_t                *lck,
1548         lck_sleep_action_t      lck_sleep_action,
1549         event_t                 event,
1550         wait_interrupt_t        interruptible,
1551         uint64_t                deadline)
1552 {
1553         wait_result_t   res;
1554         lck_rw_type_t   lck_rw_type;
1555         thread_t                thread = current_thread();
1556
1557         if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1558                 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1559         }
1560
1561         if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1562                 thread->rwlock_count++;
1563         }
1564
1565         res = assert_wait_deadline(event, interruptible, deadline);
1566         if (res == THREAD_WAITING) {
1567                 lck_rw_type = lck_rw_done(lck);
1568                 res = thread_block(THREAD_CONTINUE_NULL);
1569                 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1570                         if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
1571                                 lck_rw_lock(lck, lck_rw_type);
1572                         } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
1573                                 lck_rw_lock_exclusive(lck);
1574                         } else {
1575                                 lck_rw_lock_shared(lck);
1576                         }
1577                 }
1578         } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1579                 (void)lck_rw_done(lck);
1580         }
1581
1582         if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1583                 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1584                         /* sched_flags checked without lock, but will be rechecked while clearing */
1585
1586                         /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */
1587                         assert(lck_sleep_action & LCK_SLEEP_UNLOCK);
1588
1589                         lck_rw_clear_promotion(thread, unslide_for_kdebug(event));
1590                 }
1591         }
1592
1593         return res;
1594 }
1595
1596 /*
1597  * Reader-writer lock promotion
1598  *
1599  * We support a limited form of reader-writer
1600  * lock promotion whose effects are:
1601  *
1602  *   * Qualifying threads have decay disabled
1603  *   * Scheduler priority is reset to a floor of
1604  *     of their statically assigned priority
1605  *     or MINPRI_RWLOCK
1606  *
1607  * The rationale is that lck_rw_ts do not have
1608  * a single owner, so we cannot apply a directed
1609  * priority boost from all waiting threads
1610  * to all holding threads without maintaining
1611  * lists of all shared owners and all waiting
1612  * threads for every lock.
1613  *
1614  * Instead (and to preserve the uncontended fast-
1615  * path), acquiring (or attempting to acquire)
1616  * a RW lock in shared or exclusive lock increments
1617  * a per-thread counter. Only if that thread stops
1618  * making forward progress (for instance blocking
1619  * on a mutex, or being preempted) do we consult
1620  * the counter and apply the priority floor.
1621  * When the thread becomes runnable again (or in
1622  * the case of preemption it never stopped being
1623  * runnable), it has the priority boost and should
1624  * be in a good position to run on the CPU and
1625  * release all RW locks (at which point the priority
1626  * boost is cleared).
1627  *
1628  * Care must be taken to ensure that priority
1629  * boosts are not retained indefinitely, since unlike
1630  * mutex priority boosts (where the boost is tied
1631  * to the mutex lifecycle), the boost is tied
1632  * to the thread and independent of any particular
1633  * lck_rw_t. Assertions are in place on return
1634  * to userspace so that the boost is not held
1635  * indefinitely.
1636  *
1637  * The routines that increment/decrement the
1638  * per-thread counter should err on the side of
1639  * incrementing any time a preemption is possible
1640  * and the lock would be visible to the rest of the
1641  * system as held (so it should be incremented before
1642  * interlocks are dropped/preemption is enabled, or
1643  * before a CAS is executed to acquire the lock).
1644  *
1645  */
1646
1647 /*
1648  * lck_rw_clear_promotion: Undo priority promotions when the last RW
1649  * lock is released by a thread (if a promotion was active)
1650  */
1651 void
1652 lck_rw_clear_promotion(thread_t thread, uintptr_t trace_obj)
1653 {
1654         assert(thread->rwlock_count == 0);
1655
1656         /* Cancel any promotions if the thread had actually blocked while holding a RW lock */
1657         spl_t s = splsched();
1658         thread_lock(thread);
1659
1660         if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
1661                 sched_thread_unpromote_reason(thread, TH_SFLAG_RW_PROMOTED, trace_obj);
1662         }
1663
1664         thread_unlock(thread);
1665         splx(s);
1666 }
1667
1668 /*
1669  * Callout from context switch if the thread goes
1670  * off core with a positive rwlock_count
1671  *
1672  * Called at splsched with the thread locked
1673  */
1674 void
1675 lck_rw_set_promotion_locked(thread_t thread)
1676 {
1677         if (LcksOpts & disLkRWPrio) {
1678                 return;
1679         }
1680
1681         assert(thread->rwlock_count > 0);
1682
1683         if (!(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1684                 sched_thread_promote_reason(thread, TH_SFLAG_RW_PROMOTED, 0);
1685         }
1686 }
1687
1688 kern_return_t
1689 host_lockgroup_info(
1690         host_t                                  host,
1691         lockgroup_info_array_t  *lockgroup_infop,
1692         mach_msg_type_number_t  *lockgroup_infoCntp)
1693 {
1694         lockgroup_info_t        *lockgroup_info_base;
1695         lockgroup_info_t        *lockgroup_info;
1696         vm_offset_t                     lockgroup_info_addr;
1697         vm_size_t                       lockgroup_info_size;
1698         vm_size_t                       lockgroup_info_vmsize;
1699         lck_grp_t                       *lck_grp;
1700         unsigned int            i;
1701         vm_map_copy_t           copy;
1702         kern_return_t           kr;
1703
1704         if (host == HOST_NULL) {
1705                 return KERN_INVALID_HOST;
1706         }
1707
1708         lck_mtx_lock(&lck_grp_lock);
1709
1710         lockgroup_info_size = lck_grp_cnt * sizeof(*lockgroup_info);
1711         lockgroup_info_vmsize = round_page(lockgroup_info_size);
1712         kr = kmem_alloc_pageable(ipc_kernel_map,
1713             &lockgroup_info_addr, lockgroup_info_vmsize, VM_KERN_MEMORY_IPC);
1714         if (kr != KERN_SUCCESS) {
1715                 lck_mtx_unlock(&lck_grp_lock);
1716                 return kr;
1717         }
1718
1719         lockgroup_info_base = (lockgroup_info_t *) lockgroup_info_addr;
1720         lck_grp = (lck_grp_t *)queue_first(&lck_grp_queue);
1721         lockgroup_info = lockgroup_info_base;
1722
1723         for (i = 0; i < lck_grp_cnt; i++) {
1724                 lockgroup_info->lock_spin_cnt = lck_grp->lck_grp_spincnt;
1725                 lockgroup_info->lock_rw_cnt = lck_grp->lck_grp_rwcnt;
1726                 lockgroup_info->lock_mtx_cnt = lck_grp->lck_grp_mtxcnt;
1727
1728 #if LOCK_STATS
1729                 lockgroup_info->lock_spin_held_cnt = lck_grp->lck_grp_stats.lgss_spin_held.lgs_count;
1730                 lockgroup_info->lock_spin_miss_cnt = lck_grp->lck_grp_stats.lgss_spin_miss.lgs_count;
1731 #endif /* LOCK_STATS */
1732
1733                 // Historically on x86, held was used for "direct wait" and util for "held"
1734                 lockgroup_info->lock_mtx_util_cnt = lck_grp->lck_grp_stats.lgss_mtx_held.lgs_count;
1735                 lockgroup_info->lock_mtx_held_cnt = lck_grp->lck_grp_stats.lgss_mtx_direct_wait.lgs_count;
1736                 lockgroup_info->lock_mtx_miss_cnt = lck_grp->lck_grp_stats.lgss_mtx_miss.lgs_count;
1737                 lockgroup_info->lock_mtx_wait_cnt = lck_grp->lck_grp_stats.lgss_mtx_wait.lgs_count;
1738
1739                 (void) strncpy(lockgroup_info->lockgroup_name, lck_grp->lck_grp_name, LOCKGROUP_MAX_NAME);
1740
1741                 lck_grp = (lck_grp_t *)(queue_next((queue_entry_t)(lck_grp)));
1742                 lockgroup_info++;
1743         }
1744
1745         *lockgroup_infoCntp = lck_grp_cnt;
1746         lck_mtx_unlock(&lck_grp_lock);
1747
1748         if (lockgroup_info_size != lockgroup_info_vmsize) {
1749                 bzero((char *)lockgroup_info, lockgroup_info_vmsize - lockgroup_info_size);
1750         }
1751
1752         kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)lockgroup_info_addr,
1753             (vm_map_size_t)lockgroup_info_size, TRUE, &copy);
1754         assert(kr == KERN_SUCCESS);
1755
1756         *lockgroup_infop = (lockgroup_info_t *) copy;
1757
1758         return KERN_SUCCESS;
1759 }
1760
1761 /*
1762  * sleep_with_inheritor and wakeup_with_inheritor KPI
1763  *
1764  * Functions that allow to sleep on an event and use turnstile to propagate the priority of the sleeping threads to
1765  * the latest thread specified as inheritor.
1766  *
1767  * The inheritor management is delegated to the caller, the caller needs to store a thread identifier to provide to this functions to specified upon whom
1768  * direct the push. The inheritor cannot run in user space while holding a push from an event. Therefore is the caller responsibility to call a
1769  * wakeup_with_inheritor from inheritor before running in userspace or specify another inheritor before letting the old inheritor run in userspace.
1770  *
1771  * sleep_with_inheritor requires to hold a locking primitive while invoked, but wakeup_with_inheritor and change_sleep_inheritor don't require it.
1772  *
1773  * Turnstile requires a non blocking primitive as interlock to synchronize the turnstile data structure manipulation, threfore sleep_with_inheritor, change_sleep_inheritor and
1774  * wakeup_with_inheritor will require the same interlock to manipulate turnstiles.
1775  * If sleep_with_inheritor is associated with a locking primitive that can block (like lck_mtx_t or lck_rw_t), an handoff to a non blocking primitive is required before
1776  * invoking any turnstile operation.
1777  *
1778  * All functions will save the turnstile associated with the event on the turnstile kernel hash table and will use the the turnstile kernel hash table bucket
1779  * spinlock as the turnstile interlock. Because we do not want to hold interrupt disabled while holding the bucket interlock a new turnstile kernel hash table
1780  * is instantiated for this KPI to manage the hash without interrupt disabled.
1781  * Also:
1782  * - all events on the system that hash on the same bucket will contend on the same spinlock.
1783  * - every event will have a dedicated wait_queue.
1784  *
1785  * Different locking primitives can be associated with sleep_with_inheritor as long as the primitive_lock() and primitive_unlock() functions are provided to
1786  * sleep_with_inheritor_turnstile to perform the handoff with the bucket spinlock.
1787  */
1788
1789 kern_return_t
1790 wakeup_with_inheritor_and_turnstile_type(event_t event, turnstile_type_t type, wait_result_t result, bool wake_one, lck_wake_action_t action, thread_t *thread_wokenup)
1791 {
1792         uint32_t index;
1793         struct turnstile *ts = NULL;
1794         kern_return_t ret = KERN_NOT_WAITING;
1795         int priority;
1796         thread_t wokeup;
1797
1798         /*
1799          * the hash bucket spinlock is used as turnstile interlock
1800          */
1801         turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1802
1803         ts = turnstile_prepare((uintptr_t)event, NULL, TURNSTILE_NULL, type);
1804
1805         if (wake_one) {
1806                 if (action == LCK_WAKE_DEFAULT) {
1807                         priority = WAITQ_PROMOTE_ON_WAKE;
1808                 } else {
1809                         assert(action == LCK_WAKE_DO_NOT_TRANSFER_PUSH);
1810                         priority = WAITQ_ALL_PRIORITIES;
1811                 }
1812
1813                 /*
1814                  * WAITQ_PROMOTE_ON_WAKE will call turnstile_update_inheritor
1815                  * if it finds a thread
1816                  */
1817                 wokeup = waitq_wakeup64_identify(&ts->ts_waitq, CAST_EVENT64_T(event), result, priority);
1818                 if (wokeup != NULL) {
1819                         if (thread_wokenup != NULL) {
1820                                 *thread_wokenup = wokeup;
1821                         } else {
1822                                 thread_deallocate_safe(wokeup);
1823                         }
1824                         ret = KERN_SUCCESS;
1825                         if (action == LCK_WAKE_DO_NOT_TRANSFER_PUSH) {
1826                                 goto complete;
1827                         }
1828                 } else {
1829                         if (thread_wokenup != NULL) {
1830                                 *thread_wokenup = NULL;
1831                         }
1832                         turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
1833                         ret = KERN_NOT_WAITING;
1834                 }
1835         } else {
1836                 ret = waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(event), result, WAITQ_ALL_PRIORITIES);
1837                 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
1838         }
1839
1840         /*
1841          * turnstile_update_inheritor_complete could be called while holding the interlock.
1842          * In this case the new inheritor or is null, or is a thread that is just been woken up
1843          * and have not blocked because it is racing with the same interlock used here
1844          * after the wait.
1845          * So there is no chain to update for the new inheritor.
1846          *
1847          * However unless the current thread is the old inheritor,
1848          * old inheritor can be blocked and requires a chain update.
1849          *
1850          * The chain should be short because kernel turnstiles cannot have user turnstiles
1851          * chained after them.
1852          *
1853          * We can anyway optimize this by asking turnstile to tell us
1854          * if old inheritor needs an update and drop the lock
1855          * just in that case.
1856          */
1857         turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1858
1859         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1860
1861         turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1862
1863 complete:
1864         turnstile_complete((uintptr_t)event, NULL, NULL, type);
1865
1866         turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1867
1868         turnstile_cleanup();
1869
1870         return ret;
1871 }
1872
1873 static wait_result_t
1874 sleep_with_inheritor_and_turnstile_type(event_t event,
1875     thread_t inheritor,
1876     wait_interrupt_t interruptible,
1877     uint64_t deadline,
1878     turnstile_type_t type,
1879     void (^primitive_lock)(void),
1880     void (^primitive_unlock)(void))
1881 {
1882         wait_result_t ret;
1883         uint32_t index;
1884         struct turnstile *ts = NULL;
1885
1886         /*
1887          * the hash bucket spinlock is used as turnstile interlock,
1888          * lock it before releasing the primitive lock
1889          */
1890         turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1891
1892         primitive_unlock();
1893
1894         ts = turnstile_prepare((uintptr_t)event, NULL, TURNSTILE_NULL, type);
1895
1896         thread_set_pending_block_hint(current_thread(), kThreadWaitSleepWithInheritor);
1897         /*
1898          * We need TURNSTILE_DELAYED_UPDATE because we will call
1899          * waitq_assert_wait64 after.
1900          */
1901         turnstile_update_inheritor(ts, inheritor, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1902
1903         ret = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(event), interruptible, deadline);
1904
1905         turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1906
1907         /*
1908          * Update new and old inheritor chains outside the interlock;
1909          */
1910         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1911
1912         if (ret == THREAD_WAITING) {
1913                 ret = thread_block(THREAD_CONTINUE_NULL);
1914         }
1915
1916         turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1917
1918         turnstile_complete((uintptr_t)event, NULL, NULL, type);
1919
1920         turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1921
1922         turnstile_cleanup();
1923
1924         primitive_lock();
1925
1926         return ret;
1927 }
1928
1929 kern_return_t
1930 change_sleep_inheritor_and_turnstile_type(event_t event,
1931     thread_t inheritor,
1932     turnstile_type_t type)
1933 {
1934         uint32_t index;
1935         struct turnstile *ts = NULL;
1936         kern_return_t ret =  KERN_SUCCESS;
1937         /*
1938          * the hash bucket spinlock is used as turnstile interlock
1939          */
1940         turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1941
1942         ts = turnstile_prepare((uintptr_t)event, NULL, TURNSTILE_NULL, type);
1943
1944         if (!turnstile_has_waiters(ts)) {
1945                 ret = KERN_NOT_WAITING;
1946         }
1947
1948         /*
1949          * We will not call an assert_wait later so use TURNSTILE_IMMEDIATE_UPDATE
1950          */
1951         turnstile_update_inheritor(ts, inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
1952
1953         turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1954
1955         /*
1956          * update the chains outside the interlock
1957          */
1958         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1959
1960         turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1961
1962         turnstile_complete((uintptr_t)event, NULL, NULL, type);
1963
1964         turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1965
1966         turnstile_cleanup();
1967
1968         return ret;
1969 }
1970
1971 typedef void (^void_block_void)(void);
1972
1973 /*
1974  * sleep_with_inheritor functions with lck_mtx_t as locking primitive.
1975  */
1976
1977 wait_result_t
1978 lck_mtx_sleep_with_inheritor_and_turnstile_type(lck_mtx_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline, turnstile_type_t type)
1979 {
1980         LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
1981
1982         if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1983                 return sleep_with_inheritor_and_turnstile_type(event,
1984                            inheritor,
1985                            interruptible,
1986                            deadline,
1987                            type,
1988                            ^{;},
1989                            ^{lck_mtx_unlock(lock);});
1990         } else if (lck_sleep_action & LCK_SLEEP_SPIN) {
1991                 return sleep_with_inheritor_and_turnstile_type(event,
1992                            inheritor,
1993                            interruptible,
1994                            deadline,
1995                            type,
1996                            ^{lck_mtx_lock_spin(lock);},
1997                            ^{lck_mtx_unlock(lock);});
1998         } else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
1999                 return sleep_with_inheritor_and_turnstile_type(event,
2000                            inheritor,
2001                            interruptible,
2002                            deadline,
2003                            type,
2004                            ^{lck_mtx_lock_spin_always(lock);},
2005                            ^{lck_mtx_unlock(lock);});
2006         } else {
2007                 return sleep_with_inheritor_and_turnstile_type(event,
2008                            inheritor,
2009                            interruptible,
2010                            deadline,
2011                            type,
2012                            ^{lck_mtx_lock(lock);},
2013                            ^{lck_mtx_unlock(lock);});
2014         }
2015 }
2016
2017 /*
2018  * Name: lck_spin_sleep_with_inheritor
2019  *
2020  * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
2021  *              While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
2022  *              be directed to the inheritor specified.
2023  *              An interruptible mode and deadline can be specified to return earlier from the wait.
2024  *
2025  * Args:
2026  *   Arg1: lck_spin_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
2027  *   Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK.
2028  *   Arg3: event to wait on.
2029  *   Arg4: thread to propagate the event push to.
2030  *   Arg5: interruptible flag for wait.
2031  *   Arg6: deadline for wait.
2032  *
2033  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2034  *             Lock will be dropped while waiting.
2035  *             The inheritor specified cannot run in user space until another inheritor is specified for the event or a
2036  *             wakeup for the event is called.
2037  *
2038  * Returns: result of the wait.
2039  */
2040 wait_result_t
2041 lck_spin_sleep_with_inheritor(
2042         lck_spin_t *lock,
2043         lck_sleep_action_t lck_sleep_action,
2044         event_t event,
2045         thread_t inheritor,
2046         wait_interrupt_t interruptible,
2047         uint64_t deadline)
2048 {
2049         if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2050                 return sleep_with_inheritor_and_turnstile_type(event, inheritor,
2051                            interruptible, deadline, TURNSTILE_SLEEP_INHERITOR,
2052                            ^{}, ^{ lck_spin_unlock(lock); });
2053         } else {
2054                 return sleep_with_inheritor_and_turnstile_type(event, inheritor,
2055                            interruptible, deadline, TURNSTILE_SLEEP_INHERITOR,
2056                            ^{ lck_spin_lock(lock); }, ^{ lck_spin_unlock(lock); });
2057         }
2058 }
2059
2060 /*
2061  * Name: lck_mtx_sleep_with_inheritor
2062  *
2063  * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
2064  *              While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
2065  *              be directed to the inheritor specified.
2066  *              An interruptible mode and deadline can be specified to return earlier from the wait.
2067  *
2068  * Args:
2069  *   Arg1: lck_mtx_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
2070  *   Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK, LCK_SLEEP_SPIN, LCK_SLEEP_SPIN_ALWAYS.
2071  *   Arg3: event to wait on.
2072  *   Arg4: thread to propagate the event push to.
2073  *   Arg5: interruptible flag for wait.
2074  *   Arg6: deadline for wait.
2075  *
2076  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2077  *             Lock will be dropped while waiting.
2078  *             The inheritor specified cannot run in user space until another inheritor is specified for the event or a
2079  *             wakeup for the event is called.
2080  *
2081  * Returns: result of the wait.
2082  */
2083 wait_result_t
2084 lck_mtx_sleep_with_inheritor(lck_mtx_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline)
2085 {
2086         return lck_mtx_sleep_with_inheritor_and_turnstile_type(lock, lck_sleep_action, event, inheritor, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
2087 }
2088
2089 /*
2090  * sleep_with_inheritor functions with lck_rw_t as locking primitive.
2091  */
2092
2093 wait_result_t
2094 lck_rw_sleep_with_inheritor_and_turnstile_type(lck_rw_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline, turnstile_type_t type)
2095 {
2096         __block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
2097
2098         LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2099
2100         if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2101                 return sleep_with_inheritor_and_turnstile_type(event,
2102                            inheritor,
2103                            interruptible,
2104                            deadline,
2105                            type,
2106                            ^{;},
2107                            ^{lck_rw_type = lck_rw_done(lock);});
2108         } else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2109                 return sleep_with_inheritor_and_turnstile_type(event,
2110                            inheritor,
2111                            interruptible,
2112                            deadline,
2113                            type,
2114                            ^{lck_rw_lock(lock, lck_rw_type);},
2115                            ^{lck_rw_type = lck_rw_done(lock);});
2116         } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2117                 return sleep_with_inheritor_and_turnstile_type(event,
2118                            inheritor,
2119                            interruptible,
2120                            deadline,
2121                            type,
2122                            ^{lck_rw_lock_exclusive(lock);},
2123                            ^{lck_rw_type = lck_rw_done(lock);});
2124         } else {
2125                 return sleep_with_inheritor_and_turnstile_type(event,
2126                            inheritor,
2127                            interruptible,
2128                            deadline,
2129                            type,
2130                            ^{lck_rw_lock_shared(lock);},
2131                            ^{lck_rw_type = lck_rw_done(lock);});
2132         }
2133 }
2134
2135 /*
2136  * Name: lck_rw_sleep_with_inheritor
2137  *
2138  * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
2139  *              While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
2140  *              be directed to the inheritor specified.
2141  *              An interruptible mode and deadline can be specified to return earlier from the wait.
2142  *
2143  * Args:
2144  *   Arg1: lck_rw_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
2145  *   Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_SHARED, LCK_SLEEP_EXCLUSIVE.
2146  *   Arg3: event to wait on.
2147  *   Arg4: thread to propagate the event push to.
2148  *   Arg5: interruptible flag for wait.
2149  *   Arg6: deadline for wait.
2150  *
2151  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2152  *             Lock will be dropped while waiting.
2153  *             The inheritor specified cannot run in user space until another inheritor is specified for the event or a
2154  *             wakeup for the event is called.
2155  *
2156  * Returns: result of the wait.
2157  */
2158 wait_result_t
2159 lck_rw_sleep_with_inheritor(lck_rw_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline)
2160 {
2161         return lck_rw_sleep_with_inheritor_and_turnstile_type(lock, lck_sleep_action, event, inheritor, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
2162 }
2163
2164 /*
2165  * wakeup_with_inheritor functions are independent from the locking primitive.
2166  */
2167
2168 /*
2169  * Name: wakeup_one_with_inheritor
2170  *
2171  * Description: wake up one waiter for event if any. The thread woken up will be the one with the higher sched priority waiting on event.
2172  *              The push for the event will be transferred from the last inheritor to the woken up thread if LCK_WAKE_DEFAULT is specified.
2173  *              If LCK_WAKE_DO_NOT_TRANSFER_PUSH is specified the push will not be transferred.
2174  *
2175  * Args:
2176  *   Arg1: event to wake from.
2177  *   Arg2: wait result to pass to the woken up thread.
2178  *   Arg3: wake flag. LCK_WAKE_DEFAULT or LCK_WAKE_DO_NOT_TRANSFER_PUSH.
2179  *   Arg4: pointer for storing the thread wokenup.
2180  *
2181  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
2182  *
2183  * Conditions: The new inheritor wokenup cannot run in user space until another inheritor is specified for the event or a
2184  *             wakeup for the event is called.
2185  *             A reference for the wokenup thread is acquired.
2186  *             NOTE: this cannot be called from interrupt context.
2187  */
2188 kern_return_t
2189 wakeup_one_with_inheritor(event_t event, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
2190 {
2191         return wakeup_with_inheritor_and_turnstile_type(event,
2192                    TURNSTILE_SLEEP_INHERITOR,
2193                    result,
2194                    TRUE,
2195                    action,
2196                    thread_wokenup);
2197 }
2198
2199 /*
2200  * Name: wakeup_all_with_inheritor
2201  *
2202  * Description: wake up all waiters waiting for event. The old inheritor will lose the push.
2203  *
2204  * Args:
2205  *   Arg1: event to wake from.
2206  *   Arg2: wait result to pass to the woken up threads.
2207  *
2208  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
2209  *
2210  * Conditions: NOTE: this cannot be called from interrupt context.
2211  */
2212 kern_return_t
2213 wakeup_all_with_inheritor(event_t event, wait_result_t result)
2214 {
2215         return wakeup_with_inheritor_and_turnstile_type(event,
2216                    TURNSTILE_SLEEP_INHERITOR,
2217                    result,
2218                    FALSE,
2219                    0,
2220                    NULL);
2221 }
2222
2223 /*
2224  * change_sleep_inheritor is independent from the locking primitive.
2225  */
2226
2227 /*
2228  * Name: change_sleep_inheritor
2229  *
2230  * Description: Redirect the push of the waiting threads of event to the new inheritor specified.
2231  *
2232  * Args:
2233  *   Arg1: event to redirect the push.
2234  *   Arg2: new inheritor for event.
2235  *
2236  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
2237  *
2238  * Conditions: In case of success, the new inheritor cannot run in user space until another inheritor is specified for the event or a
2239  *             wakeup for the event is called.
2240  *             NOTE: this cannot be called from interrupt context.
2241  */
2242 kern_return_t
2243 change_sleep_inheritor(event_t event, thread_t inheritor)
2244 {
2245         return change_sleep_inheritor_and_turnstile_type(event,
2246                    inheritor,
2247                    TURNSTILE_SLEEP_INHERITOR);
2248 }
2249
2250 void
2251 kdp_sleep_with_inheritor_find_owner(struct waitq * waitq, __unused event64_t event, thread_waitinfo_t * waitinfo)
2252 {
2253         assert(waitinfo->wait_type == kThreadWaitSleepWithInheritor);
2254         assert(waitq_is_turnstile_queue(waitq));
2255         waitinfo->owner = 0;
2256         waitinfo->context = 0;
2257
2258         if (waitq_held(waitq)) {
2259                 return;
2260         }
2261
2262         struct turnstile *turnstile = waitq_to_turnstile(waitq);
2263         assert(turnstile->ts_inheritor_flags & TURNSTILE_INHERITOR_THREAD);
2264         waitinfo->owner = thread_tid(turnstile->ts_inheritor);
2265 }
2266
2267 typedef void (*void_func_void)(void);
2268
2269 static kern_return_t
2270 gate_try_close(gate_t *gate)
2271 {
2272         uintptr_t state;
2273         thread_t holder;
2274         kern_return_t ret;
2275         __assert_only bool waiters;
2276         thread_t thread = current_thread();
2277
2278         if (os_atomic_cmpxchg(&gate->gate_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2279                 return KERN_SUCCESS;
2280         }
2281
2282         gate_ilock(gate);
2283         state = ordered_load_gate(gate);
2284         holder = GATE_STATE_TO_THREAD(state);
2285
2286         if (holder == NULL) {
2287                 waiters = gate_has_waiters(state);
2288                 assert(waiters == FALSE);
2289
2290                 state = GATE_THREAD_TO_STATE(current_thread());
2291                 state |= GATE_ILOCK;
2292                 ordered_store_gate(gate, state);
2293                 ret = KERN_SUCCESS;
2294         } else {
2295                 if (holder == current_thread()) {
2296                         panic("Trying to close a gate already owned by current thread %p", current_thread());
2297                 }
2298                 ret = KERN_FAILURE;
2299         }
2300
2301         gate_iunlock(gate);
2302         return ret;
2303 }
2304
2305 static void
2306 gate_close(gate_t* gate)
2307 {
2308         uintptr_t state;
2309         thread_t holder;
2310         __assert_only bool waiters;
2311         thread_t thread = current_thread();
2312
2313         if (os_atomic_cmpxchg(&gate->gate_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2314                 return;
2315         }
2316
2317         gate_ilock(gate);
2318         state = ordered_load_gate(gate);
2319         holder = GATE_STATE_TO_THREAD(state);
2320
2321         if (holder != NULL) {
2322                 panic("Closing a gate already owned by %p from current thread %p", holder, current_thread());
2323         }
2324
2325         waiters = gate_has_waiters(state);
2326         assert(waiters == FALSE);
2327
2328         state = GATE_THREAD_TO_STATE(thread);
2329         state |= GATE_ILOCK;
2330         ordered_store_gate(gate, state);
2331
2332         gate_iunlock(gate);
2333 }
2334
2335 static void
2336 gate_open_turnstile(gate_t *gate)
2337 {
2338         struct turnstile *ts = NULL;
2339
2340         ts = turnstile_prepare((uintptr_t)gate, &gate->turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2341         waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
2342         turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2343         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2344         turnstile_complete((uintptr_t)gate, &gate->turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2345         /*
2346          * We can do the cleanup while holding the interlock.
2347          * It is ok because:
2348          * 1. current_thread is the previous inheritor and it is running
2349          * 2. new inheritor is NULL.
2350          * => No chain of turnstiles needs to be updated.
2351          */
2352         turnstile_cleanup();
2353 }
2354
2355 static void
2356 gate_open(gate_t *gate)
2357 {
2358         uintptr_t state;
2359         thread_t holder;
2360         bool waiters;
2361         thread_t thread = current_thread();
2362
2363         if (os_atomic_cmpxchg(&gate->gate_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2364                 return;
2365         }
2366
2367         gate_ilock(gate);
2368         state = ordered_load_gate(gate);
2369         holder = GATE_STATE_TO_THREAD(state);
2370         waiters = gate_has_waiters(state);
2371
2372         if (holder != thread) {
2373                 panic("Opening gate owned by %p from current thread %p", holder, thread);
2374         }
2375
2376         if (waiters) {
2377                 gate_open_turnstile(gate);
2378         }
2379
2380         state = GATE_ILOCK;
2381         ordered_store_gate(gate, state);
2382
2383         gate_iunlock(gate);
2384 }
2385
2386 static kern_return_t
2387 gate_handoff_turnstile(gate_t *gate,
2388     int flags,
2389     thread_t *thread_woken_up,
2390     bool *waiters)
2391 {
2392         struct turnstile *ts = NULL;
2393         kern_return_t ret = KERN_FAILURE;
2394         thread_t hp_thread;
2395
2396         ts = turnstile_prepare((uintptr_t)gate, &gate->turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2397         /*
2398          * Wake up the higest priority thread waiting on the gate
2399          */
2400         hp_thread = waitq_wakeup64_identify(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE);
2401
2402         if (hp_thread != NULL) {
2403                 /*
2404                  * In this case waitq_wakeup64_identify has called turnstile_update_inheritor for us
2405                  */
2406                 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2407                 *thread_woken_up = hp_thread;
2408                 *waiters = turnstile_has_waiters(ts);
2409                 /*
2410                  * Note: hp_thread is the new holder and the new inheritor.
2411                  * In case there are no more waiters, it doesn't need to be the inheritor
2412                  * and it shouldn't be it by the time it finishes the wait, so that its next open or
2413                  * handoff can go through the fast path.
2414                  * We could set the inheritor to NULL here, or the new holder itself can set it
2415                  * on its way back from the sleep. In the latter case there are more chanses that
2416                  * new waiters will come by, avoiding to do the opearation at all.
2417                  */
2418                 ret = KERN_SUCCESS;
2419         } else {
2420                 /*
2421                  * waiters can have been woken up by an interrupt and still not
2422                  * have updated gate->waiters, so we couldn't find them on the waitq.
2423                  * Update the inheritor to NULL here, so that the current thread can return to userspace
2424                  * indipendently from when the interrupted waiters will finish the wait.
2425                  */
2426                 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2427                         turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2428                         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2429                 }
2430                 // there are no waiters.
2431                 ret = KERN_NOT_WAITING;
2432         }
2433
2434         turnstile_complete((uintptr_t)gate, &gate->turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2435
2436         /*
2437          * We can do the cleanup while holding the interlock.
2438          * It is ok because:
2439          * 1. current_thread is the previous inheritor and it is running
2440          * 2. new inheritor is NULL or it is a just wokenup thread that will race acquiring the lock
2441          *    of the gate before trying to sleep.
2442          * => No chain of turnstiles needs to be updated.
2443          */
2444         turnstile_cleanup();
2445
2446         return ret;
2447 }
2448
2449 static kern_return_t
2450 gate_handoff(gate_t *gate,
2451     int flags)
2452 {
2453         kern_return_t ret;
2454         thread_t new_holder = NULL;
2455         uintptr_t state;
2456         thread_t holder;
2457         bool waiters;
2458         thread_t thread = current_thread();
2459
2460         assert(flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS || flags == GATE_HANDOFF_DEFAULT);
2461
2462         if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2463                 if (os_atomic_cmpxchg(&gate->gate_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2464                         //gate opened but there were no waiters, so return KERN_NOT_WAITING.
2465                         return KERN_NOT_WAITING;
2466                 }
2467         }
2468
2469         gate_ilock(gate);
2470         state = ordered_load_gate(gate);
2471         holder = GATE_STATE_TO_THREAD(state);
2472         waiters = gate_has_waiters(state);
2473
2474         if (holder != current_thread()) {
2475                 panic("Handing off gate owned by %p from current thread %p", holder, current_thread());
2476         }
2477
2478         if (waiters) {
2479                 ret = gate_handoff_turnstile(gate, flags, &new_holder, &waiters);
2480                 if (ret == KERN_SUCCESS) {
2481                         state = GATE_THREAD_TO_STATE(new_holder);
2482                         if (waiters) {
2483                                 state |= GATE_WAITERS;
2484                         }
2485                 } else {
2486                         if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2487                                 state = 0;
2488                         }
2489                 }
2490         } else {
2491                 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2492                         state = 0;
2493                 }
2494                 ret = KERN_NOT_WAITING;
2495         }
2496         state |= GATE_ILOCK;
2497         ordered_store_gate(gate, state);
2498
2499         gate_iunlock(gate);
2500
2501         if (new_holder) {
2502                 thread_deallocate(new_holder);
2503         }
2504         return ret;
2505 }
2506
2507 static void_func_void
2508 gate_steal_turnstile(gate_t *gate,
2509     thread_t new_inheritor)
2510 {
2511         struct turnstile *ts = NULL;
2512
2513         ts = turnstile_prepare((uintptr_t)gate, &gate->turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2514
2515         turnstile_update_inheritor(ts, new_inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
2516         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2517         turnstile_complete((uintptr_t)gate, &gate->turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2518
2519         /*
2520          * turnstile_cleanup might need to update the chain of the old holder.
2521          * This operation should happen without the turnstile interlock held.
2522          */
2523         return turnstile_cleanup;
2524 }
2525
2526 static void
2527 gate_steal(gate_t *gate)
2528 {
2529         uintptr_t state;
2530         thread_t holder;
2531         thread_t thread = current_thread();
2532         bool waiters;
2533
2534         void_func_void func_after_interlock_unlock;
2535
2536         gate_ilock(gate);
2537         state = ordered_load_gate(gate);
2538         holder = GATE_STATE_TO_THREAD(state);
2539         waiters = gate_has_waiters(state);
2540
2541         assert(holder != NULL);
2542         state = GATE_THREAD_TO_STATE(thread) | GATE_ILOCK;
2543         if (waiters) {
2544                 state |= GATE_WAITERS;
2545                 ordered_store_gate(gate, state);
2546                 func_after_interlock_unlock = gate_steal_turnstile(gate, thread);
2547                 gate_iunlock(gate);
2548
2549                 func_after_interlock_unlock();
2550         } else {
2551                 ordered_store_gate(gate, state);
2552                 gate_iunlock(gate);
2553         }
2554 }
2555
2556 static void_func_void
2557 gate_wait_turnstile(gate_t *gate,
2558     wait_interrupt_t interruptible,
2559     uint64_t deadline,
2560     thread_t holder,
2561     wait_result_t* wait,
2562     bool* waiters)
2563 {
2564         struct turnstile *ts;
2565         uintptr_t state;
2566
2567         ts = turnstile_prepare((uintptr_t)gate, &gate->turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2568
2569         turnstile_update_inheritor(ts, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
2570         waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), interruptible, deadline);
2571
2572         gate_iunlock(gate);
2573
2574         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2575
2576         *wait = thread_block(THREAD_CONTINUE_NULL);
2577
2578         gate_ilock(gate);
2579
2580         *waiters = turnstile_has_waiters(ts);
2581
2582         if (!*waiters) {
2583                 /*
2584                  * We want to enable the fast path as soon as we see that there are no more waiters.
2585                  * On the fast path the holder will not do any turnstile operations.
2586                  * Set the inheritor as NULL here.
2587                  *
2588                  * NOTE: if it was an open operation that woke this thread up, the inheritor has
2589                  * already been set to NULL.
2590                  */
2591                 state = ordered_load_gate(gate);
2592                 holder = GATE_STATE_TO_THREAD(state);
2593                 if (holder &&
2594                     ((*wait != THREAD_AWAKENED) ||     // thread interrupted or timedout
2595                     holder == current_thread())) {     // thread was woken up and it is the new holder
2596                         turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2597                         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2598                 }
2599         }
2600
2601         turnstile_complete((uintptr_t)gate, &gate->turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2602
2603         /*
2604          * turnstile_cleanup might need to update the chain of the old holder.
2605          * This operation should happen without the turnstile primitive interlock held.
2606          */
2607         return turnstile_cleanup;
2608 }
2609
2610 static gate_wait_result_t
2611 gate_wait(gate_t* gate,
2612     wait_interrupt_t interruptible,
2613     uint64_t deadline,
2614     void (^primitive_unlock)(void),
2615     void (^primitive_lock)(void))
2616 {
2617         gate_wait_result_t ret;
2618         void_func_void func_after_interlock_unlock;
2619         wait_result_t wait_result;
2620         uintptr_t state;
2621         thread_t holder;
2622         bool waiters;
2623
2624
2625         gate_ilock(gate);
2626         state = ordered_load_gate(gate);
2627         holder = GATE_STATE_TO_THREAD(state);
2628
2629         if (holder == NULL) {
2630                 panic("Trying to wait on open gate thread %p gate %p", current_thread(), gate);
2631         }
2632
2633         state |= GATE_WAITERS;
2634         ordered_store_gate(gate, state);
2635
2636         /*
2637          * Release the primitive lock before any
2638          * turnstile operation. Turnstile
2639          * does not support a blocking primitive as
2640          * interlock.
2641          *
2642          * In this way, concurrent threads will be
2643          * able to acquire the primitive lock
2644          * but still will wait for me through the
2645          * gate interlock.
2646          */
2647         primitive_unlock();
2648
2649         func_after_interlock_unlock = gate_wait_turnstile(    gate,
2650             interruptible,
2651             deadline,
2652             holder,
2653             &wait_result,
2654             &waiters);
2655
2656         state = ordered_load_gate(gate);
2657         holder = GATE_STATE_TO_THREAD(state);
2658
2659         switch (wait_result) {
2660         case THREAD_INTERRUPTED:
2661         case THREAD_TIMED_OUT:
2662                 assert(holder != current_thread());
2663
2664                 if (waiters) {
2665                         state |= GATE_WAITERS;
2666                 } else {
2667                         state &= ~GATE_WAITERS;
2668                 }
2669                 ordered_store_gate(gate, state);
2670
2671                 if (wait_result == THREAD_INTERRUPTED) {
2672                         ret = GATE_INTERRUPTED;
2673                 } else {
2674                         ret = GATE_TIMED_OUT;
2675                 }
2676                 break;
2677         default:
2678                 /*
2679                  * Note it is possible that even if the gate was handed off to
2680                  * me, someone called gate_steal() before I woke up.
2681                  *
2682                  * As well as it is possible that the gate was opened, but someone
2683                  * closed it while I was waking up.
2684                  *
2685                  * In both cases we return GATE_OPENED, as the gate was opened to me
2686                  * at one point, it is the caller responsibility to check again if
2687                  * the gate is open.
2688                  */
2689                 if (holder == current_thread()) {
2690                         ret = GATE_HANDOFF;
2691                 } else {
2692                         ret = GATE_OPENED;
2693                 }
2694                 break;
2695         }
2696
2697         gate_iunlock(gate);
2698
2699         /*
2700          * turnstile func that needs to be executed without
2701          * holding the primitive interlock
2702          */
2703         func_after_interlock_unlock();
2704
2705         primitive_lock();
2706
2707         return ret;
2708 }
2709 static void
2710 gate_assert(gate_t *gate, int flags)
2711 {
2712         uintptr_t state;
2713         thread_t holder;
2714
2715         gate_ilock(gate);
2716         state = ordered_load_gate(gate);
2717         holder = GATE_STATE_TO_THREAD(state);
2718
2719         switch (flags) {
2720         case GATE_ASSERT_CLOSED:
2721                 assert(holder != NULL);
2722                 break;
2723         case GATE_ASSERT_OPEN:
2724                 assert(holder == NULL);
2725                 break;
2726         case GATE_ASSERT_HELD:
2727                 assert(holder == current_thread());
2728                 break;
2729         default:
2730                 panic("invalid %s flag %d", __func__, flags);
2731         }
2732
2733         gate_iunlock(gate);
2734 }
2735
2736 static void
2737 gate_init(gate_t *gate)
2738 {
2739         gate->gate_data = 0;
2740         gate->turnstile = NULL;
2741 }
2742
2743 static void
2744 gate_destroy(__assert_only gate_t *gate)
2745 {
2746         assert(gate->gate_data == 0);
2747         assert(gate->turnstile == NULL);
2748 }
2749
2750 /*
2751  * Name: lck_rw_gate_init
2752  *
2753  * Description: initializes a variable declared with decl_lck_rw_gate_data.
2754  *
2755  * Args:
2756  *   Arg1: lck_rw_t lock used to protect the gate.
2757  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2758  */
2759 void
2760 lck_rw_gate_init(lck_rw_t *lock, gate_t *gate)
2761 {
2762         (void) lock;
2763         gate_init(gate);
2764 }
2765
2766 /*
2767  * Name: lck_rw_gate_destroy
2768  *
2769  * Description: destroys a variable previously initialized.
2770  *
2771  * Args:
2772  *   Arg1: lck_rw_t lock used to protect the gate.
2773  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2774  */
2775 void
2776 lck_rw_gate_destroy(lck_rw_t *lock, gate_t *gate)
2777 {
2778         (void) lock;
2779         gate_destroy(gate);
2780 }
2781
2782 /*
2783  * Name: lck_rw_gate_try_close
2784  *
2785  * Description: Tries to close the gate.
2786  *              In case of success the current thread will be set as
2787  *              the holder of the gate.
2788  *
2789  * Args:
2790  *   Arg1: lck_rw_t lock used to protect the gate.
2791  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2792  *
2793  * Conditions: Lock must be held. Returns with the lock held.
2794  *
2795  * Returns:
2796  *          KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
2797  *          of the gate.
2798  *          A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2799  *          to wake up possible waiters on the gate before returning to userspace.
2800  *          If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
2801  *          between the calls to lck_rw_gate_try_close() and lck_rw_gate_wait().
2802  *
2803  *          KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
2804  *          lck_rw_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
2805  *          The calls to lck_rw_gate_try_close() and lck_rw_gate_wait() should
2806  *          be done without dropping the lock that is protecting the gate in between.
2807  */
2808 int
2809 lck_rw_gate_try_close(__assert_only lck_rw_t *lock, gate_t *gate)
2810 {
2811         LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2812
2813         return gate_try_close(gate);
2814 }
2815
2816 /*
2817  * Name: lck_rw_gate_close
2818  *
2819  * Description: Closes the gate. The current thread will be set as
2820  *              the holder of the gate. Will panic if the gate is already closed.
2821  *              A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2822  *              to wake up possible waiters on the gate before returning to userspace.
2823  *
2824  * Args:
2825  *   Arg1: lck_rw_t lock used to protect the gate.
2826  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2827  *
2828  * Conditions: Lock must be held. Returns with the lock held.
2829  *             The gate must be open.
2830  *
2831  */
2832 void
2833 lck_rw_gate_close(__assert_only lck_rw_t *lock, gate_t *gate)
2834 {
2835         LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2836
2837         return gate_close(gate);
2838 }
2839
2840 /*
2841  * Name: lck_rw_gate_open
2842  *
2843  * Description: Opens the gate and wakes up possible waiters.
2844  *
2845  * Args:
2846  *   Arg1: lck_rw_t lock used to protect the gate.
2847  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2848  *
2849  * Conditions: Lock must be held. Returns with the lock held.
2850  *             The current thread must be the holder of the gate.
2851  *
2852  */
2853 void
2854 lck_rw_gate_open(__assert_only lck_rw_t *lock, gate_t *gate)
2855 {
2856         LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2857
2858         gate_open(gate);
2859 }
2860
2861 /*
2862  * Name: lck_rw_gate_handoff
2863  *
2864  * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
2865  *              priority will be selected as the new holder of the gate, and woken up,
2866  *              with the gate remaining in the closed state throughout.
2867  *              If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
2868  *              will be returned.
2869  *              GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
2870  *              case no waiters were found.
2871  *
2872  *
2873  * Args:
2874  *   Arg1: lck_rw_t lock used to protect the gate.
2875  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2876  *   Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
2877  *
2878  * Conditions: Lock must be held. Returns with the lock held.
2879  *             The current thread must be the holder of the gate.
2880  *
2881  * Returns:
2882  *          KERN_SUCCESS in case one of the waiters became the new holder.
2883  *          KERN_NOT_WAITING in case there were no waiters.
2884  *
2885  */
2886 kern_return_t
2887 lck_rw_gate_handoff(__assert_only lck_rw_t *lock, gate_t *gate, int flags)
2888 {
2889         LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2890
2891         return gate_handoff(gate, flags);
2892 }
2893
2894 /*
2895  * Name: lck_rw_gate_steal
2896  *
2897  * Description: Set the current ownership of the gate. It sets the current thread as the
2898  *              new holder of the gate.
2899  *              A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2900  *              to wake up possible waiters on the gate before returning to userspace.
2901  *              NOTE: the previous holder should not call lck_rw_gate_open() or lck_rw_gate_handoff()
2902  *              anymore.
2903  *
2904  *
2905  * Args:
2906  *   Arg1: lck_rw_t lock used to protect the gate.
2907  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2908  *
2909  * Conditions: Lock must be held. Returns with the lock held.
2910  *             The gate must be closed and the current thread must not already be the holder.
2911  *
2912  */
2913 void
2914 lck_rw_gate_steal(__assert_only lck_rw_t *lock, gate_t *gate)
2915 {
2916         LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2917
2918         gate_steal(gate);
2919 }
2920
2921 /*
2922  * Name: lck_rw_gate_wait
2923  *
2924  * Description: Waits for the current thread to become the holder of the gate or for the
2925  *              gate to become open. An interruptible mode and deadline can be specified
2926  *              to return earlier from the wait.
2927  *
2928  * Args:
2929  *   Arg1: lck_rw_t lock used to protect the gate.
2930  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2931  *   Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_SHARED, LCK_SLEEP_EXCLUSIVE.
2932  *   Arg3: interruptible flag for wait.
2933  *   Arg4: deadline
2934  *
2935  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2936  *             Lock will be dropped while waiting.
2937  *             The gate must be closed.
2938  *
2939  * Returns: Reason why the thread was woken up.
2940  *          GATE_HANDOFF - the current thread was handed off the ownership of the gate.
2941  *                         A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2942  *                         to wake up possible waiters on the gate before returning to userspace.
2943  *          GATE_OPENED - the gate was opened by the holder.
2944  *          GATE_TIMED_OUT - the thread was woken up by a timeout.
2945  *          GATE_INTERRUPTED - the thread was interrupted while sleeping.
2946  *
2947  */
2948 gate_wait_result_t
2949 lck_rw_gate_wait(lck_rw_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
2950 {
2951         __block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
2952
2953         LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2954
2955         if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2956                 return gate_wait(gate,
2957                            interruptible,
2958                            deadline,
2959                            ^{lck_rw_type = lck_rw_done(lock);},
2960                            ^{;});
2961         } else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2962                 return gate_wait(gate,
2963                            interruptible,
2964                            deadline,
2965                            ^{lck_rw_type = lck_rw_done(lock);},
2966                            ^{lck_rw_lock(lock, lck_rw_type);});
2967         } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2968                 return gate_wait(gate,
2969                            interruptible,
2970                            deadline,
2971                            ^{lck_rw_type = lck_rw_done(lock);},
2972                            ^{lck_rw_lock_exclusive(lock);});
2973         } else {
2974                 return gate_wait(gate,
2975                            interruptible,
2976                            deadline,
2977                            ^{lck_rw_type = lck_rw_done(lock);},
2978                            ^{lck_rw_lock_shared(lock);});
2979         }
2980 }
2981
2982 /*
2983  * Name: lck_rw_gate_assert
2984  *
2985  * Description: asserts that the gate is in the specified state.
2986  *
2987  * Args:
2988  *   Arg1: lck_rw_t lock used to protect the gate.
2989  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2990  *   Arg3: flags to specified assert type.
2991  *         GATE_ASSERT_CLOSED - the gate is currently closed
2992  *         GATE_ASSERT_OPEN - the gate is currently opened
2993  *         GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
2994  */
2995 void
2996 lck_rw_gate_assert(__assert_only lck_rw_t *lock, gate_t *gate, int flags)
2997 {
2998         LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2999
3000         gate_assert(gate, flags);
3001         return;
3002 }
3003
3004 /*
3005  * Name: lck_mtx_gate_init
3006  *
3007  * Description: initializes a variable declared with decl_lck_mtx_gate_data.
3008  *
3009  * Args:
3010  *   Arg1: lck_mtx_t lock used to protect the gate.
3011  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3012  */
3013 void
3014 lck_mtx_gate_init(lck_mtx_t *lock, gate_t *gate)
3015 {
3016         (void) lock;
3017         gate_init(gate);
3018 }
3019
3020 /*
3021  * Name: lck_mtx_gate_destroy
3022  *
3023  * Description: destroys a variable previously initialized
3024  *
3025  * Args:
3026  *   Arg1: lck_mtx_t lock used to protect the gate.
3027  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3028  */
3029 void
3030 lck_mtx_gate_destroy(lck_mtx_t *lock, gate_t *gate)
3031 {
3032         (void) lock;
3033         gate_destroy(gate);
3034 }
3035
3036 /*
3037  * Name: lck_mtx_gate_try_close
3038  *
3039  * Description: Tries to close the gate.
3040  *              In case of success the current thread will be set as
3041  *              the holder of the gate.
3042  *
3043  * Args:
3044  *   Arg1: lck_mtx_t lock used to protect the gate.
3045  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3046  *
3047  * Conditions: Lock must be held. Returns with the lock held.
3048  *
3049  * Returns:
3050  *          KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
3051  *          of the gate.
3052  *          A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3053  *          to wake up possible waiters on the gate before returning to userspace.
3054  *          If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
3055  *          between the calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait().
3056  *
3057  *          KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
3058  *          lck_mtx_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
3059  *          The calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait() should
3060  *          be done without dropping the lock that is protecting the gate in between.
3061  */
3062 int
3063 lck_mtx_gate_try_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3064 {
3065         LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3066
3067         return gate_try_close(gate);
3068 }
3069
3070 /*
3071  * Name: lck_mtx_gate_close
3072  *
3073  * Description: Closes the gate. The current thread will be set as
3074  *              the holder of the gate. Will panic if the gate is already closed.
3075  *              A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3076  *              to wake up possible waiters on the gate before returning to userspace.
3077  *
3078  * Args:
3079  *   Arg1: lck_mtx_t lock used to protect the gate.
3080  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3081  *
3082  * Conditions: Lock must be held. Returns with the lock held.
3083  *             The gate must be open.
3084  *
3085  */
3086 void
3087 lck_mtx_gate_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3088 {
3089         LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3090
3091         return gate_close(gate);
3092 }
3093
3094 /*
3095  * Name: lck_mtx_gate_open
3096  *
3097  * Description: Opens of the gate and wakes up possible waiters.
3098  *
3099  * Args:
3100  *   Arg1: lck_mtx_t lock used to protect the gate.
3101  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3102  *
3103  * Conditions: Lock must be held. Returns with the lock held.
3104  *             The current thread must be the holder of the gate.
3105  *
3106  */
3107 void
3108 lck_mtx_gate_open(__assert_only lck_mtx_t *lock, gate_t *gate)
3109 {
3110         LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3111
3112         gate_open(gate);
3113 }
3114
3115 /*
3116  * Name: lck_mtx_gate_handoff
3117  *
3118  * Description: Set the current ownership of the gate. The waiter with highest sched
3119  *              priority will be selected as the new holder of the gate, and woken up,
3120  *              with the gate remaining in the closed state throughout.
3121  *              If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
3122  *              will be returned.
3123  *              OPEN_ON_FAILURE flag can be used to specify if the gate should be opened in
3124  *              case no waiters were found.
3125  *
3126  *
3127  * Args:
3128  *   Arg1: lck_mtx_t lock used to protect the gate.
3129  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3130  *   Arg3: flags - GATE_NO_FALGS or OPEN_ON_FAILURE
3131  *
3132  * Conditions: Lock must be held. Returns with the lock held.
3133  *             The current thread must be the holder of the gate.
3134  *
3135  * Returns:
3136  *          KERN_SUCCESS in case one of the waiters became the new holder.
3137  *          KERN_NOT_WAITING in case there were no waiters.
3138  *
3139  */
3140 kern_return_t
3141 lck_mtx_gate_handoff(__assert_only lck_mtx_t *lock, gate_t *gate, int flags)
3142 {
3143         LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3144
3145         return gate_handoff(gate, flags);
3146 }
3147
3148 /*
3149  * Name: lck_mtx_gate_steal
3150  *
3151  * Description: Steals the ownership of the gate. It sets the current thread as the
3152  *              new holder of the gate.
3153  *              A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3154  *              to wake up possible waiters on the gate before returning to userspace.
3155  *              NOTE: the previous holder should not call lck_mtx_gate_open() or lck_mtx_gate_handoff()
3156  *              anymore.
3157  *
3158  *
3159  * Args:
3160  *   Arg1: lck_mtx_t lock used to protect the gate.
3161  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3162  *
3163  * Conditions: Lock must be held. Returns with the lock held.
3164  *             The gate must be closed and the current thread must not already be the holder.
3165  *
3166  */
3167 void
3168 lck_mtx_gate_steal(__assert_only lck_mtx_t *lock, gate_t *gate)
3169 {
3170         LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3171
3172         gate_steal(gate);
3173 }
3174
3175 /*
3176  * Name: lck_mtx_gate_wait
3177  *
3178  * Description: Waits for the current thread to become the holder of the gate or for the
3179  *              gate to become open. An interruptible mode and deadline can be specified
3180  *              to return earlier from the wait.
3181  *
3182  * Args:
3183  *   Arg1: lck_mtx_t lock used to protect the gate.
3184  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3185  *   Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK, LCK_SLEEP_SPIN, LCK_SLEEP_SPIN_ALWAYS.
3186  *   Arg3: interruptible flag for wait.
3187  *   Arg4: deadline
3188  *
3189  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
3190  *             Lock will be dropped while waiting.
3191  *             The gate must be closed.
3192  *
3193  * Returns: Reason why the thread was woken up.
3194  *          GATE_HANDOFF - the current thread was handed off the ownership of the gate.
3195  *                         A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3196  *                         to wake up possible waiters on the gate before returning to userspace.
3197  *          GATE_OPENED - the gate was opened by the holder.
3198  *          GATE_TIMED_OUT - the thread was woken up by a timeout.
3199  *          GATE_INTERRUPTED - the thread was interrupted while sleeping.
3200  *
3201  */
3202 gate_wait_result_t
3203 lck_mtx_gate_wait(lck_mtx_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
3204 {
3205         LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3206
3207         if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
3208                 return gate_wait(gate,
3209                            interruptible,
3210                            deadline,
3211                            ^{lck_mtx_unlock(lock);},
3212                            ^{;});
3213         } else if (lck_sleep_action & LCK_SLEEP_SPIN) {
3214                 return gate_wait(gate,
3215                            interruptible,
3216                            deadline,
3217                            ^{lck_mtx_unlock(lock);},
3218                            ^{lck_mtx_lock_spin(lock);});
3219         } else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
3220                 return gate_wait(gate,
3221                            interruptible,
3222                            deadline,
3223                            ^{lck_mtx_unlock(lock);},
3224                            ^{lck_mtx_lock_spin_always(lock);});
3225         } else {
3226                 return gate_wait(gate,
3227                            interruptible,
3228                            deadline,
3229                            ^{lck_mtx_unlock(lock);},
3230                            ^{lck_mtx_lock(lock);});
3231         }
3232 }
3233
3234 /*
3235  * Name: lck_mtx_gate_assert
3236  *
3237  * Description: asserts that the gate is in the specified state.
3238  *
3239  * Args:
3240  *   Arg1: lck_mtx_t lock used to protect the gate.
3241  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3242  *   Arg3: flags to specified assert type.
3243  *         GATE_ASSERT_CLOSED - the gate is currently closed
3244  *         GATE_ASSERT_OPEN - the gate is currently opened
3245  *         GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
3246  */
3247 void
3248 lck_mtx_gate_assert(__assert_only lck_mtx_t *lock, gate_t *gate, int flags)
3249 {
3250         LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3251
3252         gate_assert(gate, flags);
3253 }