osfmk/kern/locks.c

   1 /*
   2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56
  57 #define LOCK_PRIVATE 1
  58
  59 #include <mach_ldebug.h>
  60 #include <debug.h>
  61
  62 #include <mach/kern_return.h>
  63 #include <mach/mach_host_server.h>
  64 #include <mach_debug/lockgroup_info.h>
  65
  66 #include <kern/lock_stat.h>
  67 #include <kern/locks.h>
  68 #include <kern/misc_protos.h>
  69 #include <kern/zalloc.h>
  70 #include <kern/thread.h>
  71 #include <kern/processor.h>
  72 #include <kern/sched_prim.h>
  73 #include <kern/debug.h>
  74 #include <libkern/section_keywords.h>
  75 #include <machine/atomic.h>
  76 #include <machine/machine_cpu.h>
  77 #include <string.h>
  78
  79 #include <sys/kdebug.h>
  80
  81 #define LCK_MTX_SLEEP_CODE              0
  82 #define LCK_MTX_SLEEP_DEADLINE_CODE     1
  83 #define LCK_MTX_LCK_WAIT_CODE           2
  84 #define LCK_MTX_UNLCK_WAKEUP_CODE       3
  85
  86 #if MACH_LDEBUG
  87 #define ALIGN_TEST(p, t) do{if((uintptr_t)p&(sizeof(t)-1)) __builtin_trap();}while(0)
  88 #else
  89 #define ALIGN_TEST(p, t) do{}while(0)
  90 #endif
  91
  92 #define NOINLINE                __attribute__((noinline))
  93
  94 #define ordered_load_hw(lock)          os_atomic_load(&(lock)->lock_data, compiler_acq_rel)
  95 #define ordered_store_hw(lock, value)  os_atomic_store(&(lock)->lock_data, (value), compiler_acq_rel)
  96
  97
  98 queue_head_t     lck_grp_queue;
  99 unsigned int     lck_grp_cnt;
 100
 101 decl_lck_mtx_data(, lck_grp_lock);
 102 static lck_mtx_ext_t lck_grp_lock_ext;
 103
 104 SECURITY_READ_ONLY_LATE(boolean_t) spinlock_timeout_panic = TRUE;
 105
 106 /* Obtain "lcks" options:this currently controls lock statistics */
 107 TUNABLE(uint32_t, LcksOpts, "lcks", 0);
 108
 109 ZONE_VIEW_DEFINE(ZV_LCK_GRP_ATTR, "lck_grp_attr",
 110     KHEAP_ID_DEFAULT, sizeof(lck_grp_attr_t));
 111
 112 ZONE_VIEW_DEFINE(ZV_LCK_GRP, "lck_grp",
 113     KHEAP_ID_DEFAULT, sizeof(lck_grp_t));
 114
 115 ZONE_VIEW_DEFINE(ZV_LCK_ATTR, "lck_attr",
 116     KHEAP_ID_DEFAULT, sizeof(lck_attr_t));
 117
 118 lck_grp_attr_t  LockDefaultGroupAttr;
 119 lck_grp_t       LockCompatGroup;
 120 lck_attr_t      LockDefaultLckAttr;
 121
 122 #if CONFIG_DTRACE
 123 #if defined (__x86_64__)
 124 uint64_t dtrace_spin_threshold = 500; // 500ns
 125 #elif defined(__arm__) || defined(__arm64__)
 126 uint64_t dtrace_spin_threshold = LOCK_PANIC_TIMEOUT / 1000000; // 500ns
 127 #endif
 128 #endif
 129
 130 uintptr_t
 131 unslide_for_kdebug(void* object)
 132 {
 133         if (__improbable(kdebug_enable)) {
 134                 return VM_KERNEL_UNSLIDE_OR_PERM(object);
 135         } else {
 136                 return 0;
 137         }
 138 }
 139
 140 __startup_func
 141 static void
 142 lck_mod_init(void)
 143 {
 144         queue_init(&lck_grp_queue);
 145
 146         /*
 147          * Need to bootstrap the LockCompatGroup instead of calling lck_grp_init() here. This avoids
 148          * grabbing the lck_grp_lock before it is initialized.
 149          */
 150
 151         bzero(&LockCompatGroup, sizeof(lck_grp_t));
 152         (void) strncpy(LockCompatGroup.lck_grp_name, "Compatibility APIs", LCK_GRP_MAX_NAME);
 153
 154         LockCompatGroup.lck_grp_attr = LCK_ATTR_NONE;
 155
 156         if (LcksOpts & enaLkStat) {
 157                 LockCompatGroup.lck_grp_attr |= LCK_GRP_ATTR_STAT;
 158         }
 159         if (LcksOpts & enaLkTimeStat) {
 160                 LockCompatGroup.lck_grp_attr |= LCK_GRP_ATTR_TIME_STAT;
 161         }
 162
 163         os_ref_init(&LockCompatGroup.lck_grp_refcnt, NULL);
 164
 165         enqueue_tail(&lck_grp_queue, (queue_entry_t)&LockCompatGroup);
 166         lck_grp_cnt = 1;
 167
 168         lck_grp_attr_setdefault(&LockDefaultGroupAttr);
 169         lck_attr_setdefault(&LockDefaultLckAttr);
 170
 171         lck_mtx_init_ext(&lck_grp_lock, &lck_grp_lock_ext, &LockCompatGroup, &LockDefaultLckAttr);
 172 }
 173 STARTUP(LOCKS_EARLY, STARTUP_RANK_FIRST, lck_mod_init);
 174
 175 /*
 176  * Routine:     lck_grp_attr_alloc_init
 177  */
 178
 179 lck_grp_attr_t  *
 180 lck_grp_attr_alloc_init(
 181         void)
 182 {
 183         lck_grp_attr_t  *attr;
 184
 185         attr = zalloc(ZV_LCK_GRP_ATTR);
 186         lck_grp_attr_setdefault(attr);
 187         return attr;
 188 }
 189
 190
 191 /*
 192  * Routine:     lck_grp_attr_setdefault
 193  */
 194
 195 void
 196 lck_grp_attr_setdefault(
 197         lck_grp_attr_t  *attr)
 198 {
 199         if (LcksOpts & enaLkStat) {
 200                 attr->grp_attr_val = LCK_GRP_ATTR_STAT;
 201         } else {
 202                 attr->grp_attr_val = 0;
 203         }
 204 }
 205
 206
 207 /*
 208  * Routine:     lck_grp_attr_setstat
 209  */
 210
 211 void
 212 lck_grp_attr_setstat(
 213         lck_grp_attr_t  *attr)
 214 {
 215 #pragma unused(attr)
 216         os_atomic_or(&attr->grp_attr_val, LCK_GRP_ATTR_STAT, relaxed);
 217 }
 218
 219
 220 /*
 221  * Routine:     lck_grp_attr_free
 222  */
 223
 224 void
 225 lck_grp_attr_free(
 226         lck_grp_attr_t  *attr)
 227 {
 228         zfree(ZV_LCK_GRP_ATTR, attr);
 229 }
 230
 231
 232 /*
 233  * Routine: lck_grp_alloc_init
 234  */
 235
 236 lck_grp_t *
 237 lck_grp_alloc_init(
 238         const char*     grp_name,
 239         lck_grp_attr_t  *attr)
 240 {
 241         lck_grp_t       *grp;
 242
 243         grp = zalloc(ZV_LCK_GRP);
 244         lck_grp_init(grp, grp_name, attr);
 245         return grp;
 246 }
 247
 248 /*
 249  * Routine: lck_grp_init
 250  */
 251
 252 void
 253 lck_grp_init(lck_grp_t * grp, const char * grp_name, lck_grp_attr_t * attr)
 254 {
 255         /* make sure locking infrastructure has been initialized */
 256         assert(lck_grp_cnt > 0);
 257
 258         bzero((void *)grp, sizeof(lck_grp_t));
 259
 260         (void)strlcpy(grp->lck_grp_name, grp_name, LCK_GRP_MAX_NAME);
 261
 262         if (attr != LCK_GRP_ATTR_NULL) {
 263                 grp->lck_grp_attr = attr->grp_attr_val;
 264         } else {
 265                 grp->lck_grp_attr = 0;
 266                 if (LcksOpts & enaLkStat) {
 267                         grp->lck_grp_attr |= LCK_GRP_ATTR_STAT;
 268                 }
 269                 if (LcksOpts & enaLkTimeStat) {
 270                         grp->lck_grp_attr |= LCK_GRP_ATTR_TIME_STAT;
 271                 }
 272         }
 273
 274         if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT) {
 275                 lck_grp_stats_t *stats = &grp->lck_grp_stats;
 276
 277 #if LOCK_STATS
 278                 lck_grp_stat_enable(&stats->lgss_spin_held);
 279                 lck_grp_stat_enable(&stats->lgss_spin_miss);
 280 #endif /* LOCK_STATS */
 281
 282                 lck_grp_stat_enable(&stats->lgss_mtx_held);
 283                 lck_grp_stat_enable(&stats->lgss_mtx_miss);
 284                 lck_grp_stat_enable(&stats->lgss_mtx_direct_wait);
 285                 lck_grp_stat_enable(&stats->lgss_mtx_wait);
 286         }
 287         if (grp->lck_grp_attr & LCK_GRP_ATTR_TIME_STAT) {
 288 #if LOCK_STATS
 289                 lck_grp_stats_t *stats = &grp->lck_grp_stats;
 290                 lck_grp_stat_enable(&stats->lgss_spin_spin);
 291 #endif /* LOCK_STATS */
 292         }
 293
 294         os_ref_init(&grp->lck_grp_refcnt, NULL);
 295
 296         lck_mtx_lock(&lck_grp_lock);
 297         enqueue_tail(&lck_grp_queue, (queue_entry_t)grp);
 298         lck_grp_cnt++;
 299         lck_mtx_unlock(&lck_grp_lock);
 300 }
 301
 302 /*
 303  * Routine:     lck_grp_free
 304  */
 305
 306 void
 307 lck_grp_free(
 308         lck_grp_t       *grp)
 309 {
 310         lck_mtx_lock(&lck_grp_lock);
 311         lck_grp_cnt--;
 312         (void)remque((queue_entry_t)grp);
 313         lck_mtx_unlock(&lck_grp_lock);
 314         lck_grp_deallocate(grp);
 315 }
 316
 317
 318 /*
 319  * Routine:     lck_grp_reference
 320  */
 321
 322 void
 323 lck_grp_reference(
 324         lck_grp_t       *grp)
 325 {
 326         os_ref_retain(&grp->lck_grp_refcnt);
 327 }
 328
 329
 330 /*
 331  * Routine:     lck_grp_deallocate
 332  */
 333
 334 void
 335 lck_grp_deallocate(
 336         lck_grp_t       *grp)
 337 {
 338         if (os_ref_release(&grp->lck_grp_refcnt) != 0) {
 339                 return;
 340         }
 341
 342         zfree(ZV_LCK_GRP, grp);
 343 }
 344
 345 /*
 346  * Routine:     lck_grp_lckcnt_incr
 347  */
 348
 349 void
 350 lck_grp_lckcnt_incr(
 351         lck_grp_t       *grp,
 352         lck_type_t      lck_type)
 353 {
 354         unsigned int    *lckcnt;
 355
 356         switch (lck_type) {
 357         case LCK_TYPE_SPIN:
 358                 lckcnt = &grp->lck_grp_spincnt;
 359                 break;
 360         case LCK_TYPE_MTX:
 361                 lckcnt = &grp->lck_grp_mtxcnt;
 362                 break;
 363         case LCK_TYPE_RW:
 364                 lckcnt = &grp->lck_grp_rwcnt;
 365                 break;
 366         case LCK_TYPE_TICKET:
 367                 lckcnt = &grp->lck_grp_ticketcnt;
 368                 break;
 369         default:
 370                 return panic("lck_grp_lckcnt_incr(): invalid lock type: %d\n", lck_type);
 371         }
 372
 373         os_atomic_inc(lckcnt, relaxed);
 374 }
 375
 376 /*
 377  * Routine:     lck_grp_lckcnt_decr
 378  */
 379
 380 void
 381 lck_grp_lckcnt_decr(
 382         lck_grp_t       *grp,
 383         lck_type_t      lck_type)
 384 {
 385         unsigned int    *lckcnt;
 386         int             updated;
 387
 388         switch (lck_type) {
 389         case LCK_TYPE_SPIN:
 390                 lckcnt = &grp->lck_grp_spincnt;
 391                 break;
 392         case LCK_TYPE_MTX:
 393                 lckcnt = &grp->lck_grp_mtxcnt;
 394                 break;
 395         case LCK_TYPE_RW:
 396                 lckcnt = &grp->lck_grp_rwcnt;
 397                 break;
 398         case LCK_TYPE_TICKET:
 399                 lckcnt = &grp->lck_grp_ticketcnt;
 400                 break;
 401         default:
 402                 panic("lck_grp_lckcnt_decr(): invalid lock type: %d\n", lck_type);
 403                 return;
 404         }
 405
 406         updated = os_atomic_dec(lckcnt, relaxed);
 407         assert(updated >= 0);
 408 }
 409
 410 /*
 411  * Routine:     lck_attr_alloc_init
 412  */
 413
 414 lck_attr_t *
 415 lck_attr_alloc_init(
 416         void)
 417 {
 418         lck_attr_t      *attr;
 419
 420         attr = zalloc(ZV_LCK_ATTR);
 421         lck_attr_setdefault(attr);
 422         return attr;
 423 }
 424
 425
 426 /*
 427  * Routine:     lck_attr_setdefault
 428  */
 429
 430 void
 431 lck_attr_setdefault(
 432         lck_attr_t      *attr)
 433 {
 434 #if __arm__ || __arm64__
 435         /* <rdar://problem/4404579>: Using LCK_ATTR_DEBUG here causes panic at boot time for arm */
 436         attr->lck_attr_val =  LCK_ATTR_NONE;
 437 #elif __i386__ || __x86_64__
 438 #if     !DEBUG
 439         if (LcksOpts & enaLkDeb) {
 440                 attr->lck_attr_val =  LCK_ATTR_DEBUG;
 441         } else {
 442                 attr->lck_attr_val =  LCK_ATTR_NONE;
 443         }
 444 #else
 445         attr->lck_attr_val =  LCK_ATTR_DEBUG;
 446 #endif  /* !DEBUG */
 447 #else
 448 #error Unknown architecture.
 449 #endif  /* __arm__ */
 450 }
 451
 452
 453 /*
 454  * Routine:     lck_attr_setdebug
 455  */
 456 void
 457 lck_attr_setdebug(
 458         lck_attr_t      *attr)
 459 {
 460         os_atomic_or(&attr->lck_attr_val, LCK_ATTR_DEBUG, relaxed);
 461 }
 462
 463 /*
 464  * Routine:     lck_attr_setdebug
 465  */
 466 void
 467 lck_attr_cleardebug(
 468         lck_attr_t      *attr)
 469 {
 470         os_atomic_andnot(&attr->lck_attr_val, LCK_ATTR_DEBUG, relaxed);
 471 }
 472
 473
 474 /*
 475  * Routine:     lck_attr_rw_shared_priority
 476  */
 477 void
 478 lck_attr_rw_shared_priority(
 479         lck_attr_t      *attr)
 480 {
 481         os_atomic_or(&attr->lck_attr_val, LCK_ATTR_RW_SHARED_PRIORITY, relaxed);
 482 }
 483
 484
 485 /*
 486  * Routine:     lck_attr_free
 487  */
 488 void
 489 lck_attr_free(
 490         lck_attr_t      *attr)
 491 {
 492         zfree(ZV_LCK_ATTR, attr);
 493 }
 494
 495 /*
 496  * Routine:     hw_lock_init
 497  *
 498  *      Initialize a hardware lock.
 499  */
 500 MARK_AS_HIBERNATE_TEXT void
 501 hw_lock_init(hw_lock_t lock)
 502 {
 503         ordered_store_hw(lock, 0);
 504 }
 505
 506 static inline bool
 507 hw_lock_trylock_contended(hw_lock_t lock, uintptr_t newval)
 508 {
 509 #if OS_ATOMIC_USE_LLSC
 510         uintptr_t oldval;
 511         os_atomic_rmw_loop(&lock->lock_data, oldval, newval, acquire, {
 512                 if (oldval != 0) {
 513                         wait_for_event(); // clears the monitor so we don't need give_up()
 514                         return false;
 515                 }
 516         });
 517         return true;
 518 #else // !OS_ATOMIC_USE_LLSC
 519 #if OS_ATOMIC_HAS_LLSC
 520         uintptr_t oldval = os_atomic_load_exclusive(&lock->lock_data, relaxed);
 521         if (oldval != 0) {
 522                 wait_for_event(); // clears the monitor so we don't need give_up()
 523                 return false;
 524         }
 525 #endif // OS_ATOMIC_HAS_LLSC
 526         return os_atomic_cmpxchg(&lock->lock_data, 0, newval, acquire);
 527 #endif // !OS_ATOMIC_USE_LLSC
 528 }
 529
 530 /*
 531  *      Routine: hw_lock_lock_contended
 532  *
 533  *      Spin until lock is acquired or timeout expires.
 534  *      timeout is in mach_absolute_time ticks. Called with
 535  *      preemption disabled.
 536  */
 537 static unsigned int NOINLINE
 538 hw_lock_lock_contended(hw_lock_t lock, uintptr_t data, uint64_t timeout, boolean_t do_panic LCK_GRP_ARG(lck_grp_t *grp))
 539 {
 540         uint64_t        end = 0;
 541         uintptr_t       holder = lock->lock_data;
 542         int             i;
 543
 544         if (timeout == 0) {
 545                 timeout = LOCK_PANIC_TIMEOUT;
 546         }
 547 #if CONFIG_DTRACE || LOCK_STATS
 548         uint64_t begin = 0;
 549         boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
 550 #endif /* CONFIG_DTRACE || LOCK_STATS */
 551
 552 #if LOCK_STATS || CONFIG_DTRACE
 553         if (__improbable(stat_enabled)) {
 554                 begin = mach_absolute_time();
 555         }
 556 #endif /* LOCK_STATS || CONFIG_DTRACE */
 557         for (;;) {
 558                 for (i = 0; i < LOCK_SNOOP_SPINS; i++) {
 559                         cpu_pause();
 560 #if (!__ARM_ENABLE_WFE_) || (LOCK_PRETEST)
 561                         holder = ordered_load_hw(lock);
 562                         if (holder != 0) {
 563                                 continue;
 564                         }
 565 #endif
 566                         if (hw_lock_trylock_contended(lock, data)) {
 567 #if CONFIG_DTRACE || LOCK_STATS
 568                                 if (__improbable(stat_enabled)) {
 569                                         lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp), mach_absolute_time() - begin);
 570                                 }
 571                                 lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
 572 #endif /* CONFIG_DTRACE || LOCK_STATS */
 573                                 return 1;
 574                         }
 575                 }
 576                 if (end == 0) {
 577                         end = ml_get_timebase() + timeout;
 578                 } else if (ml_get_timebase() >= end) {
 579                         break;
 580                 }
 581         }
 582         if (do_panic) {
 583                 // Capture the actual time spent blocked, which may be higher than the timeout
 584                 // if a misbehaving interrupt stole this thread's CPU time.
 585                 panic("Spinlock timeout after %llu ticks, %p = %lx",
 586                     (ml_get_timebase() - end + timeout), lock, holder);
 587         }
 588         return 0;
 589 }
 590
 591 void *
 592 hw_wait_while_equals(void **address, void *current)
 593 {
 594         void *v;
 595         uint64_t end = 0;
 596
 597         for (;;) {
 598                 for (int i = 0; i < LOCK_SNOOP_SPINS; i++) {
 599                         cpu_pause();
 600 #if OS_ATOMIC_HAS_LLSC
 601                         v = os_atomic_load_exclusive(address, relaxed);
 602                         if (__probable(v != current)) {
 603                                 os_atomic_clear_exclusive();
 604                                 return v;
 605                         }
 606                         wait_for_event();
 607 #else
 608                         v = os_atomic_load(address, relaxed);
 609                         if (__probable(v != current)) {
 610                                 return v;
 611                         }
 612 #endif // OS_ATOMIC_HAS_LLSC
 613                 }
 614                 if (end == 0) {
 615                         end = ml_get_timebase() + LOCK_PANIC_TIMEOUT;
 616                 } else if (ml_get_timebase() >= end) {
 617                         panic("Wait while equals timeout @ *%p == %p", address, v);
 618                 }
 619         }
 620 }
 621
 622 static inline void
 623 hw_lock_lock_internal(hw_lock_t lock, thread_t thread LCK_GRP_ARG(lck_grp_t *grp))
 624 {
 625         uintptr_t       state;
 626
 627         state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
 628 #if     LOCK_PRETEST
 629         if (ordered_load_hw(lock)) {
 630                 goto contended;
 631         }
 632 #endif  // LOCK_PRETEST
 633         if (hw_lock_trylock_contended(lock, state)) {
 634                 goto end;
 635         }
 636 #if     LOCK_PRETEST
 637 contended:
 638 #endif  // LOCK_PRETEST
 639         hw_lock_lock_contended(lock, state, 0, spinlock_timeout_panic LCK_GRP_ARG(grp));
 640 end:
 641         lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
 642
 643         return;
 644 }
 645
 646 /*
 647  *      Routine: hw_lock_lock
 648  *
 649  *      Acquire lock, spinning until it becomes available,
 650  *      return with preemption disabled.
 651  */
 652 void
 653 (hw_lock_lock)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
 654 {
 655         thread_t thread = current_thread();
 656         disable_preemption_for_thread(thread);
 657         hw_lock_lock_internal(lock, thread LCK_GRP_ARG(grp));
 658 }
 659
 660 /*
 661  *      Routine: hw_lock_lock_nopreempt
 662  *
 663  *      Acquire lock, spinning until it becomes available.
 664  */
 665 void
 666 (hw_lock_lock_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
 667 {
 668         thread_t thread = current_thread();
 669         if (__improbable(!preemption_disabled_for_thread(thread))) {
 670                 panic("Attempt to take no-preempt spinlock %p in preemptible context", lock);
 671         }
 672         hw_lock_lock_internal(lock, thread LCK_GRP_ARG(grp));
 673 }
 674
 675 static inline unsigned int
 676 hw_lock_to_internal(hw_lock_t lock, uint64_t timeout, thread_t thread
 677     LCK_GRP_ARG(lck_grp_t *grp))
 678 {
 679         uintptr_t state;
 680         unsigned int success = 0;
 681
 682         state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
 683 #if     LOCK_PRETEST
 684         if (ordered_load_hw(lock)) {
 685                 goto contended;
 686         }
 687 #endif  // LOCK_PRETEST
 688         if (hw_lock_trylock_contended(lock, state)) {
 689                 success = 1;
 690                 goto end;
 691         }
 692 #if     LOCK_PRETEST
 693 contended:
 694 #endif  // LOCK_PRETEST
 695         success = hw_lock_lock_contended(lock, state, timeout, FALSE LCK_GRP_ARG(grp));
 696 end:
 697         if (success) {
 698                 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
 699         }
 700         return success;
 701 }
 702
 703 /*
 704  *      Routine: hw_lock_to
 705  *
 706  *      Acquire lock, spinning until it becomes available or timeout.
 707  *      Timeout is in mach_absolute_time ticks, return with
 708  *      preemption disabled.
 709  */
 710 unsigned
 711 int
 712 (hw_lock_to)(hw_lock_t lock, uint64_t timeout LCK_GRP_ARG(lck_grp_t *grp))
 713 {
 714         thread_t thread = current_thread();
 715         disable_preemption_for_thread(thread);
 716         return hw_lock_to_internal(lock, timeout, thread LCK_GRP_ARG(grp));
 717 }
 718
 719 /*
 720  *      Routine: hw_lock_to_nopreempt
 721  *
 722  *      Acquire lock, spinning until it becomes available or timeout.
 723  *      Timeout is in mach_absolute_time ticks, called and return with
 724  *      preemption disabled.
 725  */
 726 unsigned
 727 int
 728 (hw_lock_to_nopreempt)(hw_lock_t lock, uint64_t timeout LCK_GRP_ARG(lck_grp_t *grp))
 729 {
 730         thread_t thread = current_thread();
 731         if (__improbable(!preemption_disabled_for_thread(thread))) {
 732                 panic("Attempt to test no-preempt spinlock %p in preemptible context", lock);
 733         }
 734         return hw_lock_to_internal(lock, timeout, thread LCK_GRP_ARG(grp));
 735 }
 736
 737 /*
 738  *      Routine: hw_lock_try
 739  *
 740  *      returns with preemption disabled on success.
 741  */
 742 static inline unsigned int
 743 hw_lock_try_internal(hw_lock_t lock, thread_t thread LCK_GRP_ARG(lck_grp_t *grp))
 744 {
 745         int             success = 0;
 746
 747 #if     LOCK_PRETEST
 748         if (ordered_load_hw(lock)) {
 749                 goto failed;
 750         }
 751 #endif  // LOCK_PRETEST
 752         success = os_atomic_cmpxchg(&lock->lock_data, 0,
 753             LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK, acquire);
 754
 755 #if     LOCK_PRETEST
 756 failed:
 757 #endif  // LOCK_PRETEST
 758         if (success) {
 759                 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
 760         }
 761         return success;
 762 }
 763
 764 unsigned
 765 int
 766 (hw_lock_try)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
 767 {
 768         thread_t thread = current_thread();
 769         disable_preemption_for_thread(thread);
 770         unsigned int success = hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
 771         if (!success) {
 772                 enable_preemption();
 773         }
 774         return success;
 775 }
 776
 777 unsigned
 778 int
 779 (hw_lock_try_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
 780 {
 781         thread_t thread = current_thread();
 782         if (__improbable(!preemption_disabled_for_thread(thread))) {
 783                 panic("Attempt to test no-preempt spinlock %p in preemptible context", lock);
 784         }
 785         return hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
 786 }
 787
 788 /*
 789  *      Routine: hw_lock_unlock
 790  *
 791  *      Unconditionally release lock, release preemption level.
 792  */
 793 static inline void
 794 hw_lock_unlock_internal(hw_lock_t lock)
 795 {
 796         os_atomic_store(&lock->lock_data, 0, release);
 797 #if __arm__ || __arm64__
 798         // ARM tests are only for open-source exclusion
 799         set_event();
 800 #endif  // __arm__ || __arm64__
 801 #if     CONFIG_DTRACE
 802         LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, 0);
 803 #endif /* CONFIG_DTRACE */
 804 }
 805
 806 void
 807 (hw_lock_unlock)(hw_lock_t lock)
 808 {
 809         hw_lock_unlock_internal(lock);
 810         enable_preemption();
 811 }
 812
 813 void
 814 (hw_lock_unlock_nopreempt)(hw_lock_t lock)
 815 {
 816         if (__improbable(!preemption_disabled_for_thread(current_thread()))) {
 817                 panic("Attempt to release no-preempt spinlock %p in preemptible context", lock);
 818         }
 819         hw_lock_unlock_internal(lock);
 820 }
 821
 822 /*
 823  *      Routine hw_lock_held, doesn't change preemption state.
 824  *      N.B.  Racy, of course.
 825  */
 826 unsigned int
 827 hw_lock_held(hw_lock_t lock)
 828 {
 829         return ordered_load_hw(lock) != 0;
 830 }
 831
 832 static unsigned int
 833 hw_lock_bit_to_contended(hw_lock_bit_t *lock, uint32_t mask, uint32_t timeout LCK_GRP_ARG(lck_grp_t *grp));
 834
 835 static inline unsigned int
 836 hw_lock_bit_to_internal(hw_lock_bit_t *lock, unsigned int bit, uint32_t timeout LCK_GRP_ARG(lck_grp_t *grp))
 837 {
 838         unsigned int success = 0;
 839         uint32_t        mask = (1 << bit);
 840
 841         if (__improbable(!hw_atomic_test_and_set32(lock, mask, mask, memory_order_acquire, FALSE))) {
 842                 success = hw_lock_bit_to_contended(lock, mask, timeout LCK_GRP_ARG(grp));
 843         } else {
 844                 success = 1;
 845         }
 846
 847         if (success) {
 848                 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
 849         }
 850
 851         return success;
 852 }
 853
 854 unsigned
 855 int
 856 (hw_lock_bit_to)(hw_lock_bit_t * lock, unsigned int bit, uint32_t timeout LCK_GRP_ARG(lck_grp_t *grp))
 857 {
 858         _disable_preemption();
 859         return hw_lock_bit_to_internal(lock, bit, timeout LCK_GRP_ARG(grp));
 860 }
 861
 862 static unsigned int NOINLINE
 863 hw_lock_bit_to_contended(hw_lock_bit_t *lock, uint32_t mask, uint32_t timeout LCK_GRP_ARG(lck_grp_t *grp))
 864 {
 865         uint64_t        end = 0;
 866         int             i;
 867 #if CONFIG_DTRACE || LOCK_STATS
 868         uint64_t begin = 0;
 869         boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
 870 #endif /* CONFIG_DTRACE || LOCK_STATS */
 871
 872 #if LOCK_STATS || CONFIG_DTRACE
 873         if (__improbable(stat_enabled)) {
 874                 begin = mach_absolute_time();
 875         }
 876 #endif /* LOCK_STATS || CONFIG_DTRACE */
 877         for (;;) {
 878                 for (i = 0; i < LOCK_SNOOP_SPINS; i++) {
 879                         // Always load-exclusive before wfe
 880                         // This grabs the monitor and wakes up on a release event
 881                         if (hw_atomic_test_and_set32(lock, mask, mask, memory_order_acquire, TRUE)) {
 882                                 goto end;
 883                         }
 884                 }
 885                 if (end == 0) {
 886                         end = ml_get_timebase() + timeout;
 887                 } else if (ml_get_timebase() >= end) {
 888                         break;
 889                 }
 890         }
 891         return 0;
 892 end:
 893 #if CONFIG_DTRACE || LOCK_STATS
 894         if (__improbable(stat_enabled)) {
 895                 lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp), mach_absolute_time() - begin);
 896         }
 897         lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
 898 #endif /* CONFIG_DTRACE || LCK_GRP_STAT */
 899
 900         return 1;
 901 }
 902
 903 void
 904 (hw_lock_bit)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
 905 {
 906         if (hw_lock_bit_to(lock, bit, LOCK_PANIC_TIMEOUT, LCK_GRP_PROBEARG(grp))) {
 907                 return;
 908         }
 909         panic("hw_lock_bit(): timed out (%p)", lock);
 910 }
 911
 912 void
 913 (hw_lock_bit_nopreempt)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
 914 {
 915         if (__improbable(get_preemption_level() == 0)) {
 916                 panic("Attempt to take no-preempt bitlock %p in preemptible context", lock);
 917         }
 918         if (hw_lock_bit_to_internal(lock, bit, LOCK_PANIC_TIMEOUT LCK_GRP_ARG(grp))) {
 919                 return;
 920         }
 921         panic("hw_lock_bit_nopreempt(): timed out (%p)", lock);
 922 }
 923
 924 unsigned
 925 int
 926 (hw_lock_bit_try)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
 927 {
 928         uint32_t        mask = (1 << bit);
 929         boolean_t       success = FALSE;
 930
 931         _disable_preemption();
 932         // TODO: consider weak (non-looping) atomic test-and-set
 933         success = hw_atomic_test_and_set32(lock, mask, mask, memory_order_acquire, FALSE);
 934         if (!success) {
 935                 _enable_preemption();
 936         }
 937
 938         if (success) {
 939                 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
 940         }
 941
 942         return success;
 943 }
 944
 945 static inline void
 946 hw_unlock_bit_internal(hw_lock_bit_t *lock, unsigned int bit)
 947 {
 948         uint32_t        mask = (1 << bit);
 949
 950         os_atomic_andnot(lock, mask, release);
 951 #if __arm__
 952         set_event();
 953 #endif
 954 #if CONFIG_DTRACE
 955         LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit);
 956 #endif
 957 }
 958
 959 /*
 960  *      Routine:        hw_unlock_bit
 961  *
 962  *              Release spin-lock. The second parameter is the bit number to test and set.
 963  *              Decrement the preemption level.
 964  */
 965 void
 966 hw_unlock_bit(hw_lock_bit_t * lock, unsigned int bit)
 967 {
 968         hw_unlock_bit_internal(lock, bit);
 969         _enable_preemption();
 970 }
 971
 972 void
 973 hw_unlock_bit_nopreempt(hw_lock_bit_t * lock, unsigned int bit)
 974 {
 975         if (__improbable(get_preemption_level() == 0)) {
 976                 panic("Attempt to release no-preempt bitlock %p in preemptible context", lock);
 977         }
 978         hw_unlock_bit_internal(lock, bit);
 979 }
 980
 981 /*
 982  * Routine:     lck_spin_sleep
 983  */
 984 wait_result_t
 985 lck_spin_sleep_grp(
 986         lck_spin_t              *lck,
 987         lck_sleep_action_t      lck_sleep_action,
 988         event_t                 event,
 989         wait_interrupt_t        interruptible,
 990         lck_grp_t               *grp)
 991 {
 992         wait_result_t   res;
 993
 994         if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
 995                 panic("Invalid lock sleep action %x\n", lck_sleep_action);
 996         }
 997
 998         res = assert_wait(event, interruptible);
 999         if (res == THREAD_WAITING) {
1000                 lck_spin_unlock(lck);
1001                 res = thread_block(THREAD_CONTINUE_NULL);
1002                 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1003                         lck_spin_lock_grp(lck, grp);
1004                 }
1005         } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1006                 lck_spin_unlock(lck);
1007         }
1008
1009         return res;
1010 }
1011
1012 wait_result_t
1013 lck_spin_sleep(
1014         lck_spin_t              *lck,
1015         lck_sleep_action_t      lck_sleep_action,
1016         event_t                 event,
1017         wait_interrupt_t        interruptible)
1018 {
1019         return lck_spin_sleep_grp(lck, lck_sleep_action, event, interruptible, LCK_GRP_NULL);
1020 }
1021
1022 /*
1023  * Routine:     lck_spin_sleep_deadline
1024  */
1025 wait_result_t
1026 lck_spin_sleep_deadline(
1027         lck_spin_t              *lck,
1028         lck_sleep_action_t      lck_sleep_action,
1029         event_t                 event,
1030         wait_interrupt_t        interruptible,
1031         uint64_t                deadline)
1032 {
1033         wait_result_t   res;
1034
1035         if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1036                 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1037         }
1038
1039         res = assert_wait_deadline(event, interruptible, deadline);
1040         if (res == THREAD_WAITING) {
1041                 lck_spin_unlock(lck);
1042                 res = thread_block(THREAD_CONTINUE_NULL);
1043                 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1044                         lck_spin_lock(lck);
1045                 }
1046         } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1047                 lck_spin_unlock(lck);
1048         }
1049
1050         return res;
1051 }
1052
1053 /*
1054  * Routine:     lck_mtx_sleep
1055  */
1056 wait_result_t
1057 lck_mtx_sleep(
1058         lck_mtx_t               *lck,
1059         lck_sleep_action_t      lck_sleep_action,
1060         event_t                 event,
1061         wait_interrupt_t        interruptible)
1062 {
1063         wait_result_t   res;
1064         thread_t                thread = current_thread();
1065
1066         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
1067             VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1068
1069         if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1070                 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1071         }
1072
1073         if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1074                 /*
1075                  * We overload the RW lock promotion to give us a priority ceiling
1076                  * during the time that this thread is asleep, so that when it
1077                  * is re-awakened (and not yet contending on the mutex), it is
1078                  * runnable at a reasonably high priority.
1079                  */
1080                 thread->rwlock_count++;
1081         }
1082
1083         res = assert_wait(event, interruptible);
1084         if (res == THREAD_WAITING) {
1085                 lck_mtx_unlock(lck);
1086                 res = thread_block(THREAD_CONTINUE_NULL);
1087                 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1088                         if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1089                                 lck_mtx_lock_spin(lck);
1090                         } else if ((lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS)) {
1091                                 lck_mtx_lock_spin_always(lck);
1092                         } else {
1093                                 lck_mtx_lock(lck);
1094                         }
1095                 }
1096         } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1097                 lck_mtx_unlock(lck);
1098         }
1099
1100         if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1101                 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1102                         /* sched_flags checked without lock, but will be rechecked while clearing */
1103                         lck_rw_clear_promotion(thread, unslide_for_kdebug(event));
1104                 }
1105         }
1106
1107         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1108
1109         return res;
1110 }
1111
1112
1113 /*
1114  * Routine:     lck_mtx_sleep_deadline
1115  */
1116 wait_result_t
1117 lck_mtx_sleep_deadline(
1118         lck_mtx_t               *lck,
1119         lck_sleep_action_t      lck_sleep_action,
1120         event_t                 event,
1121         wait_interrupt_t        interruptible,
1122         uint64_t                deadline)
1123 {
1124         wait_result_t   res;
1125         thread_t                thread = current_thread();
1126
1127         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
1128             VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1129
1130         if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1131                 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1132         }
1133
1134         if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1135                 /*
1136                  * See lck_mtx_sleep().
1137                  */
1138                 thread->rwlock_count++;
1139         }
1140
1141         res = assert_wait_deadline(event, interruptible, deadline);
1142         if (res == THREAD_WAITING) {
1143                 lck_mtx_unlock(lck);
1144                 res = thread_block(THREAD_CONTINUE_NULL);
1145                 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1146                         if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1147                                 lck_mtx_lock_spin(lck);
1148                         } else {
1149                                 lck_mtx_lock(lck);
1150                         }
1151                 }
1152         } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1153                 lck_mtx_unlock(lck);
1154         }
1155
1156         if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1157                 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1158                         /* sched_flags checked without lock, but will be rechecked while clearing */
1159                         lck_rw_clear_promotion(thread, unslide_for_kdebug(event));
1160                 }
1161         }
1162
1163         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1164
1165         return res;
1166 }
1167
1168 /*
1169  * Lock Boosting Invariants:
1170  *
1171  * The lock owner is always promoted to the max priority of all its waiters.
1172  * Max priority is capped at MAXPRI_PROMOTE.
1173  *
1174  * The last waiter is not given a promotion when it wakes up or acquires the lock.
1175  * When the last waiter is waking up, a new contender can always come in and
1176  * steal the lock without having to wait for the last waiter to make forward progress.
1177  */
1178
1179 /*
1180  * Routine: lck_mtx_lock_wait
1181  *
1182  * Invoked in order to wait on contention.
1183  *
1184  * Called with the interlock locked and
1185  * returns it unlocked.
1186  *
1187  * Always aggressively sets the owning thread to promoted,
1188  * even if it's the same or higher priority
1189  * This prevents it from lowering its own priority while holding a lock
1190  *
1191  * TODO: Come up with a more efficient way to handle same-priority promotions
1192  *      <rdar://problem/30737670> ARM mutex contention logic could avoid taking the thread lock
1193  */
1194 void
1195 lck_mtx_lock_wait(
1196         lck_mtx_t                       *lck,
1197         thread_t                        holder,
1198         struct turnstile                **ts)
1199 {
1200         thread_t                thread = current_thread();
1201         lck_mtx_t               *mutex;
1202         __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
1203
1204 #if     CONFIG_DTRACE
1205         uint64_t                sleep_start = 0;
1206
1207         if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
1208                 sleep_start = mach_absolute_time();
1209         }
1210 #endif
1211
1212         if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
1213                 mutex = lck;
1214         } else {
1215                 mutex = &lck->lck_mtx_ptr->lck_mtx;
1216         }
1217
1218         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
1219             trace_lck, (uintptr_t)thread_tid(thread), 0, 0, 0);
1220
1221         assert(thread->waiting_for_mutex == NULL);
1222         thread->waiting_for_mutex = mutex;
1223         mutex->lck_mtx_waiters++;
1224
1225         if (*ts == NULL) {
1226                 *ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1227         }
1228
1229         struct turnstile *turnstile = *ts;
1230         thread_set_pending_block_hint(thread, kThreadWaitKernelMutex);
1231         turnstile_update_inheritor(turnstile, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1232
1233         waitq_assert_wait64(&turnstile->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_UNINT | THREAD_WAIT_NOREPORT_USER, TIMEOUT_WAIT_FOREVER);
1234
1235         lck_mtx_ilk_unlock(mutex);
1236
1237         turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
1238
1239         thread_block(THREAD_CONTINUE_NULL);
1240
1241         thread->waiting_for_mutex = NULL;
1242
1243         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
1244 #if     CONFIG_DTRACE
1245         /*
1246          * Record the DTrace lockstat probe for blocking, block time
1247          * measured from when we were entered.
1248          */
1249         if (sleep_start) {
1250                 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
1251                         LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, lck,
1252                             mach_absolute_time() - sleep_start);
1253                 } else {
1254                         LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, lck,
1255                             mach_absolute_time() - sleep_start);
1256                 }
1257         }
1258 #endif
1259 }
1260
1261 /*
1262  * Routine:     lck_mtx_lock_acquire
1263  *
1264  * Invoked on acquiring the mutex when there is
1265  * contention.
1266  *
1267  * Returns the current number of waiters.
1268  *
1269  * Called with the interlock locked.
1270  */
1271 int
1272 lck_mtx_lock_acquire(
1273         lck_mtx_t               *lck,
1274         struct turnstile        *ts)
1275 {
1276         thread_t                thread = current_thread();
1277         lck_mtx_t               *mutex;
1278
1279         if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
1280                 mutex = lck;
1281         } else {
1282                 mutex = &lck->lck_mtx_ptr->lck_mtx;
1283         }
1284
1285         assert(thread->waiting_for_mutex == NULL);
1286
1287         if (mutex->lck_mtx_waiters > 0) {
1288                 if (ts == NULL) {
1289                         ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1290                 }
1291
1292                 turnstile_update_inheritor(ts, thread, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
1293                 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1294         }
1295
1296         if (ts != NULL) {
1297                 turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
1298         }
1299
1300         return mutex->lck_mtx_waiters;
1301 }
1302
1303 /*
1304  * Routine:     lck_mtx_unlock_wakeup
1305  *
1306  * Invoked on unlock when there is contention.
1307  *
1308  * Called with the interlock locked.
1309  *
1310  * NOTE: callers should call turnstile_clenup after
1311  * dropping the interlock.
1312  */
1313 boolean_t
1314 lck_mtx_unlock_wakeup(
1315         lck_mtx_t                       *lck,
1316         thread_t                        holder)
1317 {
1318         thread_t                thread = current_thread();
1319         lck_mtx_t               *mutex;
1320         __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
1321         struct turnstile *ts;
1322         kern_return_t did_wake;
1323
1324         if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
1325                 mutex = lck;
1326         } else {
1327                 mutex = &lck->lck_mtx_ptr->lck_mtx;
1328         }
1329
1330         if (thread != holder) {
1331                 panic("lck_mtx_unlock_wakeup: mutex %p holder %p\n", mutex, holder);
1332         }
1333
1334         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START,
1335             trace_lck, (uintptr_t)thread_tid(thread), 0, 0, 0);
1336
1337         assert(mutex->lck_mtx_waiters > 0);
1338         assert(thread->waiting_for_mutex == NULL);
1339
1340         ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1341
1342         if (mutex->lck_mtx_waiters > 1) {
1343                 /* WAITQ_PROMOTE_ON_WAKE will call turnstile_update_inheritor on the wokenup thread */
1344                 did_wake = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE);
1345         } else {
1346                 did_wake = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1347                 turnstile_update_inheritor(ts, NULL, TURNSTILE_IMMEDIATE_UPDATE);
1348         }
1349         assert(did_wake == KERN_SUCCESS);
1350
1351         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1352         turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
1353
1354         mutex->lck_mtx_waiters--;
1355
1356         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
1357
1358         return mutex->lck_mtx_waiters > 0;
1359 }
1360
1361 /*
1362  * Routine:     mutex_pause
1363  *
1364  * Called by former callers of simple_lock_pause().
1365  */
1366 #define MAX_COLLISION_COUNTS    32
1367 #define MAX_COLLISION   8
1368
1369 unsigned int max_collision_count[MAX_COLLISION_COUNTS];
1370
1371 uint32_t collision_backoffs[MAX_COLLISION] = {
1372         10, 50, 100, 200, 400, 600, 800, 1000
1373 };
1374
1375
1376 void
1377 mutex_pause(uint32_t collisions)
1378 {
1379         wait_result_t wait_result;
1380         uint32_t        back_off;
1381
1382         if (collisions >= MAX_COLLISION_COUNTS) {
1383                 collisions = MAX_COLLISION_COUNTS - 1;
1384         }
1385         max_collision_count[collisions]++;
1386
1387         if (collisions >= MAX_COLLISION) {
1388                 collisions = MAX_COLLISION - 1;
1389         }
1390         back_off = collision_backoffs[collisions];
1391
1392         wait_result = assert_wait_timeout((event_t)mutex_pause, THREAD_UNINT, back_off, NSEC_PER_USEC);
1393         assert(wait_result == THREAD_WAITING);
1394
1395         wait_result = thread_block(THREAD_CONTINUE_NULL);
1396         assert(wait_result == THREAD_TIMED_OUT);
1397 }
1398
1399
1400 unsigned int mutex_yield_wait = 0;
1401 unsigned int mutex_yield_no_wait = 0;
1402
1403 void
1404 lck_mtx_yield(
1405         lck_mtx_t   *lck)
1406 {
1407         int     waiters;
1408
1409 #if DEBUG
1410         lck_mtx_assert(lck, LCK_MTX_ASSERT_OWNED);
1411 #endif /* DEBUG */
1412
1413         if (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT) {
1414                 waiters = lck->lck_mtx_ptr->lck_mtx.lck_mtx_waiters;
1415         } else {
1416                 waiters = lck->lck_mtx_waiters;
1417         }
1418
1419         if (!waiters) {
1420                 mutex_yield_no_wait++;
1421         } else {
1422                 mutex_yield_wait++;
1423                 lck_mtx_unlock(lck);
1424                 mutex_pause(0);
1425                 lck_mtx_lock(lck);
1426         }
1427 }
1428
1429
1430 /*
1431  * Routine:     lck_rw_sleep
1432  */
1433 wait_result_t
1434 lck_rw_sleep(
1435         lck_rw_t                *lck,
1436         lck_sleep_action_t      lck_sleep_action,
1437         event_t                 event,
1438         wait_interrupt_t        interruptible)
1439 {
1440         wait_result_t   res;
1441         lck_rw_type_t   lck_rw_type;
1442         thread_t                thread = current_thread();
1443
1444         if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1445                 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1446         }
1447
1448         if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1449                 /*
1450                  * Although we are dropping the RW lock, the intent in most cases
1451                  * is that this thread remains as an observer, since it may hold
1452                  * some secondary resource, but must yield to avoid deadlock. In
1453                  * this situation, make sure that the thread is boosted to the
1454                  * RW lock ceiling while blocked, so that it can re-acquire the
1455                  * RW lock at that priority.
1456                  */
1457                 thread->rwlock_count++;
1458         }
1459
1460         res = assert_wait(event, interruptible);
1461         if (res == THREAD_WAITING) {
1462                 lck_rw_type = lck_rw_done(lck);
1463                 res = thread_block(THREAD_CONTINUE_NULL);
1464                 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1465                         if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
1466                                 lck_rw_lock(lck, lck_rw_type);
1467                         } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
1468                                 lck_rw_lock_exclusive(lck);
1469                         } else {
1470                                 lck_rw_lock_shared(lck);
1471                         }
1472                 }
1473         } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1474                 (void)lck_rw_done(lck);
1475         }
1476
1477         if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1478                 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1479                         /* sched_flags checked without lock, but will be rechecked while clearing */
1480
1481                         /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */
1482                         assert(lck_sleep_action & LCK_SLEEP_UNLOCK);
1483
1484                         lck_rw_clear_promotion(thread, unslide_for_kdebug(event));
1485                 }
1486         }
1487
1488         return res;
1489 }
1490
1491
1492 /*
1493  * Routine:     lck_rw_sleep_deadline
1494  */
1495 wait_result_t
1496 lck_rw_sleep_deadline(
1497         lck_rw_t                *lck,
1498         lck_sleep_action_t      lck_sleep_action,
1499         event_t                 event,
1500         wait_interrupt_t        interruptible,
1501         uint64_t                deadline)
1502 {
1503         wait_result_t   res;
1504         lck_rw_type_t   lck_rw_type;
1505         thread_t                thread = current_thread();
1506
1507         if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1508                 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1509         }
1510
1511         if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1512                 thread->rwlock_count++;
1513         }
1514
1515         res = assert_wait_deadline(event, interruptible, deadline);
1516         if (res == THREAD_WAITING) {
1517                 lck_rw_type = lck_rw_done(lck);
1518                 res = thread_block(THREAD_CONTINUE_NULL);
1519                 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1520                         if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
1521                                 lck_rw_lock(lck, lck_rw_type);
1522                         } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
1523                                 lck_rw_lock_exclusive(lck);
1524                         } else {
1525                                 lck_rw_lock_shared(lck);
1526                         }
1527                 }
1528         } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1529                 (void)lck_rw_done(lck);
1530         }
1531
1532         if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1533                 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1534                         /* sched_flags checked without lock, but will be rechecked while clearing */
1535
1536                         /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */
1537                         assert(lck_sleep_action & LCK_SLEEP_UNLOCK);
1538
1539                         lck_rw_clear_promotion(thread, unslide_for_kdebug(event));
1540                 }
1541         }
1542
1543         return res;
1544 }
1545
1546 /*
1547  * Reader-writer lock promotion
1548  *
1549  * We support a limited form of reader-writer
1550  * lock promotion whose effects are:
1551  *
1552  *   * Qualifying threads have decay disabled
1553  *   * Scheduler priority is reset to a floor of
1554  *     of their statically assigned priority
1555  *     or MINPRI_RWLOCK
1556  *
1557  * The rationale is that lck_rw_ts do not have
1558  * a single owner, so we cannot apply a directed
1559  * priority boost from all waiting threads
1560  * to all holding threads without maintaining
1561  * lists of all shared owners and all waiting
1562  * threads for every lock.
1563  *
1564  * Instead (and to preserve the uncontended fast-
1565  * path), acquiring (or attempting to acquire)
1566  * a RW lock in shared or exclusive lock increments
1567  * a per-thread counter. Only if that thread stops
1568  * making forward progress (for instance blocking
1569  * on a mutex, or being preempted) do we consult
1570  * the counter and apply the priority floor.
1571  * When the thread becomes runnable again (or in
1572  * the case of preemption it never stopped being
1573  * runnable), it has the priority boost and should
1574  * be in a good position to run on the CPU and
1575  * release all RW locks (at which point the priority
1576  * boost is cleared).
1577  *
1578  * Care must be taken to ensure that priority
1579  * boosts are not retained indefinitely, since unlike
1580  * mutex priority boosts (where the boost is tied
1581  * to the mutex lifecycle), the boost is tied
1582  * to the thread and independent of any particular
1583  * lck_rw_t. Assertions are in place on return
1584  * to userspace so that the boost is not held
1585  * indefinitely.
1586  *
1587  * The routines that increment/decrement the
1588  * per-thread counter should err on the side of
1589  * incrementing any time a preemption is possible
1590  * and the lock would be visible to the rest of the
1591  * system as held (so it should be incremented before
1592  * interlocks are dropped/preemption is enabled, or
1593  * before a CAS is executed to acquire the lock).
1594  *
1595  */
1596
1597 /*
1598  * lck_rw_clear_promotion: Undo priority promotions when the last RW
1599  * lock is released by a thread (if a promotion was active)
1600  */
1601 void
1602 lck_rw_clear_promotion(thread_t thread, uintptr_t trace_obj)
1603 {
1604         assert(thread->rwlock_count == 0);
1605
1606         /* Cancel any promotions if the thread had actually blocked while holding a RW lock */
1607         spl_t s = splsched();
1608         thread_lock(thread);
1609
1610         if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
1611                 sched_thread_unpromote_reason(thread, TH_SFLAG_RW_PROMOTED, trace_obj);
1612         }
1613
1614         thread_unlock(thread);
1615         splx(s);
1616 }
1617
1618 /*
1619  * Callout from context switch if the thread goes
1620  * off core with a positive rwlock_count
1621  *
1622  * Called at splsched with the thread locked
1623  */
1624 void
1625 lck_rw_set_promotion_locked(thread_t thread)
1626 {
1627         if (LcksOpts & disLkRWPrio) {
1628                 return;
1629         }
1630
1631         assert(thread->rwlock_count > 0);
1632
1633         if (!(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1634                 sched_thread_promote_reason(thread, TH_SFLAG_RW_PROMOTED, 0);
1635         }
1636 }
1637
1638 kern_return_t
1639 host_lockgroup_info(
1640         host_t                                  host,
1641         lockgroup_info_array_t  *lockgroup_infop,
1642         mach_msg_type_number_t  *lockgroup_infoCntp)
1643 {
1644         lockgroup_info_t        *lockgroup_info_base;
1645         lockgroup_info_t        *lockgroup_info;
1646         vm_offset_t                     lockgroup_info_addr;
1647         vm_size_t                       lockgroup_info_size;
1648         vm_size_t                       lockgroup_info_vmsize;
1649         lck_grp_t                       *lck_grp;
1650         unsigned int            i;
1651         vm_map_copy_t           copy;
1652         kern_return_t           kr;
1653
1654         if (host == HOST_NULL) {
1655                 return KERN_INVALID_HOST;
1656         }
1657
1658         lck_mtx_lock(&lck_grp_lock);
1659
1660         lockgroup_info_size = lck_grp_cnt * sizeof(*lockgroup_info);
1661         lockgroup_info_vmsize = round_page(lockgroup_info_size);
1662         kr = kmem_alloc_pageable(ipc_kernel_map,
1663             &lockgroup_info_addr, lockgroup_info_vmsize, VM_KERN_MEMORY_IPC);
1664         if (kr != KERN_SUCCESS) {
1665                 lck_mtx_unlock(&lck_grp_lock);
1666                 return kr;
1667         }
1668
1669         lockgroup_info_base = (lockgroup_info_t *) lockgroup_info_addr;
1670         lck_grp = (lck_grp_t *)queue_first(&lck_grp_queue);
1671         lockgroup_info = lockgroup_info_base;
1672
1673         for (i = 0; i < lck_grp_cnt; i++) {
1674                 lockgroup_info->lock_spin_cnt = lck_grp->lck_grp_spincnt;
1675                 lockgroup_info->lock_rw_cnt = lck_grp->lck_grp_rwcnt;
1676                 lockgroup_info->lock_mtx_cnt = lck_grp->lck_grp_mtxcnt;
1677
1678 #if LOCK_STATS
1679                 lockgroup_info->lock_spin_held_cnt = lck_grp->lck_grp_stats.lgss_spin_held.lgs_count;
1680                 lockgroup_info->lock_spin_miss_cnt = lck_grp->lck_grp_stats.lgss_spin_miss.lgs_count;
1681 #endif /* LOCK_STATS */
1682
1683                 // Historically on x86, held was used for "direct wait" and util for "held"
1684                 lockgroup_info->lock_mtx_util_cnt = lck_grp->lck_grp_stats.lgss_mtx_held.lgs_count;
1685                 lockgroup_info->lock_mtx_held_cnt = lck_grp->lck_grp_stats.lgss_mtx_direct_wait.lgs_count;
1686                 lockgroup_info->lock_mtx_miss_cnt = lck_grp->lck_grp_stats.lgss_mtx_miss.lgs_count;
1687                 lockgroup_info->lock_mtx_wait_cnt = lck_grp->lck_grp_stats.lgss_mtx_wait.lgs_count;
1688
1689                 (void) strncpy(lockgroup_info->lockgroup_name, lck_grp->lck_grp_name, LOCKGROUP_MAX_NAME);
1690
1691                 lck_grp = (lck_grp_t *)(queue_next((queue_entry_t)(lck_grp)));
1692                 lockgroup_info++;
1693         }
1694
1695         *lockgroup_infoCntp = lck_grp_cnt;
1696         lck_mtx_unlock(&lck_grp_lock);
1697
1698         if (lockgroup_info_size != lockgroup_info_vmsize) {
1699                 bzero((char *)lockgroup_info, lockgroup_info_vmsize - lockgroup_info_size);
1700         }
1701
1702         kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)lockgroup_info_addr,
1703             (vm_map_size_t)lockgroup_info_size, TRUE, &copy);
1704         assert(kr == KERN_SUCCESS);
1705
1706         *lockgroup_infop = (lockgroup_info_t *) copy;
1707
1708         return KERN_SUCCESS;
1709 }
1710
1711 /*
1712  * sleep_with_inheritor and wakeup_with_inheritor KPI
1713  *
1714  * Functions that allow to sleep on an event and use turnstile to propagate the priority of the sleeping threads to
1715  * the latest thread specified as inheritor.
1716  *
1717  * The inheritor management is delegated to the caller, the caller needs to store a thread identifier to provide to this functions to specified upon whom
1718  * direct the push. The inheritor cannot run in user space while holding a push from an event. Therefore is the caller responsibility to call a
1719  * wakeup_with_inheritor from inheritor before running in userspace or specify another inheritor before letting the old inheritor run in userspace.
1720  *
1721  * sleep_with_inheritor requires to hold a locking primitive while invoked, but wakeup_with_inheritor and change_sleep_inheritor don't require it.
1722  *
1723  * Turnstile requires a non blocking primitive as interlock to synchronize the turnstile data structure manipulation, threfore sleep_with_inheritor, change_sleep_inheritor and
1724  * wakeup_with_inheritor will require the same interlock to manipulate turnstiles.
1725  * If sleep_with_inheritor is associated with a locking primitive that can block (like lck_mtx_t or lck_rw_t), an handoff to a non blocking primitive is required before
1726  * invoking any turnstile operation.
1727  *
1728  * All functions will save the turnstile associated with the event on the turnstile kernel hash table and will use the the turnstile kernel hash table bucket
1729  * spinlock as the turnstile interlock. Because we do not want to hold interrupt disabled while holding the bucket interlock a new turnstile kernel hash table
1730  * is instantiated for this KPI to manage the hash without interrupt disabled.
1731  * Also:
1732  * - all events on the system that hash on the same bucket will contend on the same spinlock.
1733  * - every event will have a dedicated wait_queue.
1734  *
1735  * Different locking primitives can be associated with sleep_with_inheritor as long as the primitive_lock() and primitive_unlock() functions are provided to
1736  * sleep_with_inheritor_turnstile to perform the handoff with the bucket spinlock.
1737  */
1738
1739 kern_return_t
1740 wakeup_with_inheritor_and_turnstile_type(event_t event, turnstile_type_t type, wait_result_t result, bool wake_one, lck_wake_action_t action, thread_t *thread_wokenup)
1741 {
1742         uint32_t index;
1743         struct turnstile *ts = NULL;
1744         kern_return_t ret = KERN_NOT_WAITING;
1745         int priority;
1746         thread_t wokeup;
1747
1748         /*
1749          * the hash bucket spinlock is used as turnstile interlock
1750          */
1751         turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1752
1753         ts = turnstile_prepare((uintptr_t)event, NULL, TURNSTILE_NULL, type);
1754
1755         if (wake_one) {
1756                 if (action == LCK_WAKE_DEFAULT) {
1757                         priority = WAITQ_PROMOTE_ON_WAKE;
1758                 } else {
1759                         assert(action == LCK_WAKE_DO_NOT_TRANSFER_PUSH);
1760                         priority = WAITQ_ALL_PRIORITIES;
1761                 }
1762
1763                 /*
1764                  * WAITQ_PROMOTE_ON_WAKE will call turnstile_update_inheritor
1765                  * if it finds a thread
1766                  */
1767                 wokeup = waitq_wakeup64_identify(&ts->ts_waitq, CAST_EVENT64_T(event), result, priority);
1768                 if (wokeup != NULL) {
1769                         if (thread_wokenup != NULL) {
1770                                 *thread_wokenup = wokeup;
1771                         } else {
1772                                 thread_deallocate_safe(wokeup);
1773                         }
1774                         ret = KERN_SUCCESS;
1775                         if (action == LCK_WAKE_DO_NOT_TRANSFER_PUSH) {
1776                                 goto complete;
1777                         }
1778                 } else {
1779                         if (thread_wokenup != NULL) {
1780                                 *thread_wokenup = NULL;
1781                         }
1782                         turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
1783                         ret = KERN_NOT_WAITING;
1784                 }
1785         } else {
1786                 ret = waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(event), result, WAITQ_ALL_PRIORITIES);
1787                 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
1788         }
1789
1790         /*
1791          * turnstile_update_inheritor_complete could be called while holding the interlock.
1792          * In this case the new inheritor or is null, or is a thread that is just been woken up
1793          * and have not blocked because it is racing with the same interlock used here
1794          * after the wait.
1795          * So there is no chain to update for the new inheritor.
1796          *
1797          * However unless the current thread is the old inheritor,
1798          * old inheritor can be blocked and requires a chain update.
1799          *
1800          * The chain should be short because kernel turnstiles cannot have user turnstiles
1801          * chained after them.
1802          *
1803          * We can anyway optimize this by asking turnstile to tell us
1804          * if old inheritor needs an update and drop the lock
1805          * just in that case.
1806          */
1807         turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1808
1809         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1810
1811         turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1812
1813 complete:
1814         turnstile_complete((uintptr_t)event, NULL, NULL, type);
1815
1816         turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1817
1818         turnstile_cleanup();
1819
1820         return ret;
1821 }
1822
1823 static wait_result_t
1824 sleep_with_inheritor_and_turnstile_type(event_t event,
1825     thread_t inheritor,
1826     wait_interrupt_t interruptible,
1827     uint64_t deadline,
1828     turnstile_type_t type,
1829     void (^primitive_lock)(void),
1830     void (^primitive_unlock)(void))
1831 {
1832         wait_result_t ret;
1833         uint32_t index;
1834         struct turnstile *ts = NULL;
1835
1836         /*
1837          * the hash bucket spinlock is used as turnstile interlock,
1838          * lock it before releasing the primitive lock
1839          */
1840         turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1841
1842         primitive_unlock();
1843
1844         ts = turnstile_prepare((uintptr_t)event, NULL, TURNSTILE_NULL, type);
1845
1846         thread_set_pending_block_hint(current_thread(), kThreadWaitSleepWithInheritor);
1847         /*
1848          * We need TURNSTILE_DELAYED_UPDATE because we will call
1849          * waitq_assert_wait64 after.
1850          */
1851         turnstile_update_inheritor(ts, inheritor, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1852
1853         ret = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(event), interruptible, deadline);
1854
1855         turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1856
1857         /*
1858          * Update new and old inheritor chains outside the interlock;
1859          */
1860         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1861
1862         if (ret == THREAD_WAITING) {
1863                 ret = thread_block(THREAD_CONTINUE_NULL);
1864         }
1865
1866         turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1867
1868         turnstile_complete((uintptr_t)event, NULL, NULL, type);
1869
1870         turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1871
1872         turnstile_cleanup();
1873
1874         primitive_lock();
1875
1876         return ret;
1877 }
1878
1879 kern_return_t
1880 change_sleep_inheritor_and_turnstile_type(event_t event,
1881     thread_t inheritor,
1882     turnstile_type_t type)
1883 {
1884         uint32_t index;
1885         struct turnstile *ts = NULL;
1886         kern_return_t ret =  KERN_SUCCESS;
1887         /*
1888          * the hash bucket spinlock is used as turnstile interlock
1889          */
1890         turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1891
1892         ts = turnstile_prepare((uintptr_t)event, NULL, TURNSTILE_NULL, type);
1893
1894         if (!turnstile_has_waiters(ts)) {
1895                 ret = KERN_NOT_WAITING;
1896         }
1897
1898         /*
1899          * We will not call an assert_wait later so use TURNSTILE_IMMEDIATE_UPDATE
1900          */
1901         turnstile_update_inheritor(ts, inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
1902
1903         turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1904
1905         /*
1906          * update the chains outside the interlock
1907          */
1908         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1909
1910         turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1911
1912         turnstile_complete((uintptr_t)event, NULL, NULL, type);
1913
1914         turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1915
1916         turnstile_cleanup();
1917
1918         return ret;
1919 }
1920
1921 typedef void (^void_block_void)(void);
1922
1923 /*
1924  * sleep_with_inheritor functions with lck_mtx_t as locking primitive.
1925  */
1926
1927 wait_result_t
1928 lck_mtx_sleep_with_inheritor_and_turnstile_type(lck_mtx_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline, turnstile_type_t type)
1929 {
1930         LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
1931
1932         if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1933                 return sleep_with_inheritor_and_turnstile_type(event,
1934                            inheritor,
1935                            interruptible,
1936                            deadline,
1937                            type,
1938                            ^{;},
1939                            ^{lck_mtx_unlock(lock);});
1940         } else if (lck_sleep_action & LCK_SLEEP_SPIN) {
1941                 return sleep_with_inheritor_and_turnstile_type(event,
1942                            inheritor,
1943                            interruptible,
1944                            deadline,
1945                            type,
1946                            ^{lck_mtx_lock_spin(lock);},
1947                            ^{lck_mtx_unlock(lock);});
1948         } else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
1949                 return sleep_with_inheritor_and_turnstile_type(event,
1950                            inheritor,
1951                            interruptible,
1952                            deadline,
1953                            type,
1954                            ^{lck_mtx_lock_spin_always(lock);},
1955                            ^{lck_mtx_unlock(lock);});
1956         } else {
1957                 return sleep_with_inheritor_and_turnstile_type(event,
1958                            inheritor,
1959                            interruptible,
1960                            deadline,
1961                            type,
1962                            ^{lck_mtx_lock(lock);},
1963                            ^{lck_mtx_unlock(lock);});
1964         }
1965 }
1966
1967 /*
1968  * Name: lck_spin_sleep_with_inheritor
1969  *
1970  * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
1971  *              While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
1972  *              be directed to the inheritor specified.
1973  *              An interruptible mode and deadline can be specified to return earlier from the wait.
1974  *
1975  * Args:
1976  *   Arg1: lck_spin_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
1977  *   Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK.
1978  *   Arg3: event to wait on.
1979  *   Arg4: thread to propagate the event push to.
1980  *   Arg5: interruptible flag for wait.
1981  *   Arg6: deadline for wait.
1982  *
1983  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
1984  *             Lock will be dropped while waiting.
1985  *             The inheritor specified cannot run in user space until another inheritor is specified for the event or a
1986  *             wakeup for the event is called.
1987  *
1988  * Returns: result of the wait.
1989  */
1990 wait_result_t
1991 lck_spin_sleep_with_inheritor(
1992         lck_spin_t *lock,
1993         lck_sleep_action_t lck_sleep_action,
1994         event_t event,
1995         thread_t inheritor,
1996         wait_interrupt_t interruptible,
1997         uint64_t deadline)
1998 {
1999         if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2000                 return sleep_with_inheritor_and_turnstile_type(event, inheritor,
2001                            interruptible, deadline, TURNSTILE_SLEEP_INHERITOR,
2002                            ^{}, ^{ lck_spin_unlock(lock); });
2003         } else {
2004                 return sleep_with_inheritor_and_turnstile_type(event, inheritor,
2005                            interruptible, deadline, TURNSTILE_SLEEP_INHERITOR,
2006                            ^{ lck_spin_lock(lock); }, ^{ lck_spin_unlock(lock); });
2007         }
2008 }
2009
2010 /*
2011  * Name: lck_mtx_sleep_with_inheritor
2012  *
2013  * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
2014  *              While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
2015  *              be directed to the inheritor specified.
2016  *              An interruptible mode and deadline can be specified to return earlier from the wait.
2017  *
2018  * Args:
2019  *   Arg1: lck_mtx_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
2020  *   Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK, LCK_SLEEP_SPIN, LCK_SLEEP_SPIN_ALWAYS.
2021  *   Arg3: event to wait on.
2022  *   Arg4: thread to propagate the event push to.
2023  *   Arg5: interruptible flag for wait.
2024  *   Arg6: deadline for wait.
2025  *
2026  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2027  *             Lock will be dropped while waiting.
2028  *             The inheritor specified cannot run in user space until another inheritor is specified for the event or a
2029  *             wakeup for the event is called.
2030  *
2031  * Returns: result of the wait.
2032  */
2033 wait_result_t
2034 lck_mtx_sleep_with_inheritor(lck_mtx_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline)
2035 {
2036         return lck_mtx_sleep_with_inheritor_and_turnstile_type(lock, lck_sleep_action, event, inheritor, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
2037 }
2038
2039 /*
2040  * sleep_with_inheritor functions with lck_rw_t as locking primitive.
2041  */
2042
2043 wait_result_t
2044 lck_rw_sleep_with_inheritor_and_turnstile_type(lck_rw_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline, turnstile_type_t type)
2045 {
2046         __block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
2047
2048         LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2049
2050         if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2051                 return sleep_with_inheritor_and_turnstile_type(event,
2052                            inheritor,
2053                            interruptible,
2054                            deadline,
2055                            type,
2056                            ^{;},
2057                            ^{lck_rw_type = lck_rw_done(lock);});
2058         } else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2059                 return sleep_with_inheritor_and_turnstile_type(event,
2060                            inheritor,
2061                            interruptible,
2062                            deadline,
2063                            type,
2064                            ^{lck_rw_lock(lock, lck_rw_type);},
2065                            ^{lck_rw_type = lck_rw_done(lock);});
2066         } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2067                 return sleep_with_inheritor_and_turnstile_type(event,
2068                            inheritor,
2069                            interruptible,
2070                            deadline,
2071                            type,
2072                            ^{lck_rw_lock_exclusive(lock);},
2073                            ^{lck_rw_type = lck_rw_done(lock);});
2074         } else {
2075                 return sleep_with_inheritor_and_turnstile_type(event,
2076                            inheritor,
2077                            interruptible,
2078                            deadline,
2079                            type,
2080                            ^{lck_rw_lock_shared(lock);},
2081                            ^{lck_rw_type = lck_rw_done(lock);});
2082         }
2083 }
2084
2085 /*
2086  * Name: lck_rw_sleep_with_inheritor
2087  *
2088  * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
2089  *              While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
2090  *              be directed to the inheritor specified.
2091  *              An interruptible mode and deadline can be specified to return earlier from the wait.
2092  *
2093  * Args:
2094  *   Arg1: lck_rw_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
2095  *   Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_SHARED, LCK_SLEEP_EXCLUSIVE.
2096  *   Arg3: event to wait on.
2097  *   Arg4: thread to propagate the event push to.
2098  *   Arg5: interruptible flag for wait.
2099  *   Arg6: deadline for wait.
2100  *
2101  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2102  *             Lock will be dropped while waiting.
2103  *             The inheritor specified cannot run in user space until another inheritor is specified for the event or a
2104  *             wakeup for the event is called.
2105  *
2106  * Returns: result of the wait.
2107  */
2108 wait_result_t
2109 lck_rw_sleep_with_inheritor(lck_rw_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline)
2110 {
2111         return lck_rw_sleep_with_inheritor_and_turnstile_type(lock, lck_sleep_action, event, inheritor, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
2112 }
2113
2114 /*
2115  * wakeup_with_inheritor functions are independent from the locking primitive.
2116  */
2117
2118 /*
2119  * Name: wakeup_one_with_inheritor
2120  *
2121  * Description: wake up one waiter for event if any. The thread woken up will be the one with the higher sched priority waiting on event.
2122  *              The push for the event will be transferred from the last inheritor to the woken up thread if LCK_WAKE_DEFAULT is specified.
2123  *              If LCK_WAKE_DO_NOT_TRANSFER_PUSH is specified the push will not be transferred.
2124  *
2125  * Args:
2126  *   Arg1: event to wake from.
2127  *   Arg2: wait result to pass to the woken up thread.
2128  *   Arg3: wake flag. LCK_WAKE_DEFAULT or LCK_WAKE_DO_NOT_TRANSFER_PUSH.
2129  *   Arg4: pointer for storing the thread wokenup.
2130  *
2131  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
2132  *
2133  * Conditions: The new inheritor wokenup cannot run in user space until another inheritor is specified for the event or a
2134  *             wakeup for the event is called.
2135  *             A reference for the wokenup thread is acquired.
2136  *             NOTE: this cannot be called from interrupt context.
2137  */
2138 kern_return_t
2139 wakeup_one_with_inheritor(event_t event, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
2140 {
2141         return wakeup_with_inheritor_and_turnstile_type(event,
2142                    TURNSTILE_SLEEP_INHERITOR,
2143                    result,
2144                    TRUE,
2145                    action,
2146                    thread_wokenup);
2147 }
2148
2149 /*
2150  * Name: wakeup_all_with_inheritor
2151  *
2152  * Description: wake up all waiters waiting for event. The old inheritor will lose the push.
2153  *
2154  * Args:
2155  *   Arg1: event to wake from.
2156  *   Arg2: wait result to pass to the woken up threads.
2157  *
2158  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
2159  *
2160  * Conditions: NOTE: this cannot be called from interrupt context.
2161  */
2162 kern_return_t
2163 wakeup_all_with_inheritor(event_t event, wait_result_t result)
2164 {
2165         return wakeup_with_inheritor_and_turnstile_type(event,
2166                    TURNSTILE_SLEEP_INHERITOR,
2167                    result,
2168                    FALSE,
2169                    0,
2170                    NULL);
2171 }
2172
2173 /*
2174  * change_sleep_inheritor is independent from the locking primitive.
2175  */
2176
2177 /*
2178  * Name: change_sleep_inheritor
2179  *
2180  * Description: Redirect the push of the waiting threads of event to the new inheritor specified.
2181  *
2182  * Args:
2183  *   Arg1: event to redirect the push.
2184  *   Arg2: new inheritor for event.
2185  *
2186  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
2187  *
2188  * Conditions: In case of success, the new inheritor cannot run in user space until another inheritor is specified for the event or a
2189  *             wakeup for the event is called.
2190  *             NOTE: this cannot be called from interrupt context.
2191  */
2192 kern_return_t
2193 change_sleep_inheritor(event_t event, thread_t inheritor)
2194 {
2195         return change_sleep_inheritor_and_turnstile_type(event,
2196                    inheritor,
2197                    TURNSTILE_SLEEP_INHERITOR);
2198 }
2199
2200 void
2201 kdp_sleep_with_inheritor_find_owner(struct waitq * waitq, __unused event64_t event, thread_waitinfo_t * waitinfo)
2202 {
2203         assert(waitinfo->wait_type == kThreadWaitSleepWithInheritor);
2204         assert(waitq_is_turnstile_queue(waitq));
2205         waitinfo->owner = 0;
2206         waitinfo->context = 0;
2207
2208         if (waitq_held(waitq)) {
2209                 return;
2210         }
2211
2212         struct turnstile *turnstile = waitq_to_turnstile(waitq);
2213         assert(turnstile->ts_inheritor_flags & TURNSTILE_INHERITOR_THREAD);
2214         waitinfo->owner = thread_tid(turnstile->ts_inheritor);
2215 }
2216
2217 typedef void (*void_func_void)(void);
2218
2219 static kern_return_t
2220 gate_try_close(gate_t *gate)
2221 {
2222         uintptr_t state;
2223         thread_t holder;
2224         kern_return_t ret;
2225         __assert_only bool waiters;
2226         thread_t thread = current_thread();
2227
2228         if (os_atomic_cmpxchg(&gate->gate_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2229                 return KERN_SUCCESS;
2230         }
2231
2232         gate_ilock(gate);
2233         state = ordered_load_gate(gate);
2234         holder = GATE_STATE_TO_THREAD(state);
2235
2236         if (holder == NULL) {
2237                 waiters = gate_has_waiters(state);
2238                 assert(waiters == FALSE);
2239
2240                 state = GATE_THREAD_TO_STATE(current_thread());
2241                 state |= GATE_ILOCK;
2242                 ordered_store_gate(gate, state);
2243                 ret = KERN_SUCCESS;
2244         } else {
2245                 if (holder == current_thread()) {
2246                         panic("Trying to close a gate already owned by current thread %p", current_thread());
2247                 }
2248                 ret = KERN_FAILURE;
2249         }
2250
2251         gate_iunlock(gate);
2252         return ret;
2253 }
2254
2255 static void
2256 gate_close(gate_t* gate)
2257 {
2258         uintptr_t state;
2259         thread_t holder;
2260         __assert_only bool waiters;
2261         thread_t thread = current_thread();
2262
2263         if (os_atomic_cmpxchg(&gate->gate_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2264                 return;
2265         }
2266
2267         gate_ilock(gate);
2268         state = ordered_load_gate(gate);
2269         holder = GATE_STATE_TO_THREAD(state);
2270
2271         if (holder != NULL) {
2272                 panic("Closing a gate already owned by %p from current thread %p", holder, current_thread());
2273         }
2274
2275         waiters = gate_has_waiters(state);
2276         assert(waiters == FALSE);
2277
2278         state = GATE_THREAD_TO_STATE(thread);
2279         state |= GATE_ILOCK;
2280         ordered_store_gate(gate, state);
2281
2282         gate_iunlock(gate);
2283 }
2284
2285 static void
2286 gate_open_turnstile(gate_t *gate)
2287 {
2288         struct turnstile *ts = NULL;
2289
2290         ts = turnstile_prepare((uintptr_t)gate, &gate->turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2291         waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
2292         turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2293         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2294         turnstile_complete((uintptr_t)gate, &gate->turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2295         /*
2296          * We can do the cleanup while holding the interlock.
2297          * It is ok because:
2298          * 1. current_thread is the previous inheritor and it is running
2299          * 2. new inheritor is NULL.
2300          * => No chain of turnstiles needs to be updated.
2301          */
2302         turnstile_cleanup();
2303 }
2304
2305 static void
2306 gate_open(gate_t *gate)
2307 {
2308         uintptr_t state;
2309         thread_t holder;
2310         bool waiters;
2311         thread_t thread = current_thread();
2312
2313         if (os_atomic_cmpxchg(&gate->gate_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2314                 return;
2315         }
2316
2317         gate_ilock(gate);
2318         state = ordered_load_gate(gate);
2319         holder = GATE_STATE_TO_THREAD(state);
2320         waiters = gate_has_waiters(state);
2321
2322         if (holder != thread) {
2323                 panic("Opening gate owned by %p from current thread %p", holder, thread);
2324         }
2325
2326         if (waiters) {
2327                 gate_open_turnstile(gate);
2328         }
2329
2330         state = GATE_ILOCK;
2331         ordered_store_gate(gate, state);
2332
2333         gate_iunlock(gate);
2334 }
2335
2336 static kern_return_t
2337 gate_handoff_turnstile(gate_t *gate,
2338     int flags,
2339     thread_t *thread_woken_up,
2340     bool *waiters)
2341 {
2342         struct turnstile *ts = NULL;
2343         kern_return_t ret = KERN_FAILURE;
2344         thread_t hp_thread;
2345
2346         ts = turnstile_prepare((uintptr_t)gate, &gate->turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2347         /*
2348          * Wake up the higest priority thread waiting on the gate
2349          */
2350         hp_thread = waitq_wakeup64_identify(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE);
2351
2352         if (hp_thread != NULL) {
2353                 /*
2354                  * In this case waitq_wakeup64_identify has called turnstile_update_inheritor for us
2355                  */
2356                 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2357                 *thread_woken_up = hp_thread;
2358                 *waiters = turnstile_has_waiters(ts);
2359                 /*
2360                  * Note: hp_thread is the new holder and the new inheritor.
2361                  * In case there are no more waiters, it doesn't need to be the inheritor
2362                  * and it shouldn't be it by the time it finishes the wait, so that its next open or
2363                  * handoff can go through the fast path.
2364                  * We could set the inheritor to NULL here, or the new holder itself can set it
2365                  * on its way back from the sleep. In the latter case there are more chanses that
2366                  * new waiters will come by, avoiding to do the opearation at all.
2367                  */
2368                 ret = KERN_SUCCESS;
2369         } else {
2370                 /*
2371                  * waiters can have been woken up by an interrupt and still not
2372                  * have updated gate->waiters, so we couldn't find them on the waitq.
2373                  * Update the inheritor to NULL here, so that the current thread can return to userspace
2374                  * indipendently from when the interrupted waiters will finish the wait.
2375                  */
2376                 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2377                         turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2378                         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2379                 }
2380                 // there are no waiters.
2381                 ret = KERN_NOT_WAITING;
2382         }
2383
2384         turnstile_complete((uintptr_t)gate, &gate->turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2385
2386         /*
2387          * We can do the cleanup while holding the interlock.
2388          * It is ok because:
2389          * 1. current_thread is the previous inheritor and it is running
2390          * 2. new inheritor is NULL or it is a just wokenup thread that will race acquiring the lock
2391          *    of the gate before trying to sleep.
2392          * => No chain of turnstiles needs to be updated.
2393          */
2394         turnstile_cleanup();
2395
2396         return ret;
2397 }
2398
2399 static kern_return_t
2400 gate_handoff(gate_t *gate,
2401     int flags)
2402 {
2403         kern_return_t ret;
2404         thread_t new_holder = NULL;
2405         uintptr_t state;
2406         thread_t holder;
2407         bool waiters;
2408         thread_t thread = current_thread();
2409
2410         assert(flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS || flags == GATE_HANDOFF_DEFAULT);
2411
2412         if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2413                 if (os_atomic_cmpxchg(&gate->gate_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2414                         //gate opened but there were no waiters, so return KERN_NOT_WAITING.
2415                         return KERN_NOT_WAITING;
2416                 }
2417         }
2418
2419         gate_ilock(gate);
2420         state = ordered_load_gate(gate);
2421         holder = GATE_STATE_TO_THREAD(state);
2422         waiters = gate_has_waiters(state);
2423
2424         if (holder != current_thread()) {
2425                 panic("Handing off gate owned by %p from current thread %p", holder, current_thread());
2426         }
2427
2428         if (waiters) {
2429                 ret = gate_handoff_turnstile(gate, flags, &new_holder, &waiters);
2430                 if (ret == KERN_SUCCESS) {
2431                         state = GATE_THREAD_TO_STATE(new_holder);
2432                         if (waiters) {
2433                                 state |= GATE_WAITERS;
2434                         }
2435                 } else {
2436                         if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2437                                 state = 0;
2438                         }
2439                 }
2440         } else {
2441                 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2442                         state = 0;
2443                 }
2444                 ret = KERN_NOT_WAITING;
2445         }
2446         state |= GATE_ILOCK;
2447         ordered_store_gate(gate, state);
2448
2449         gate_iunlock(gate);
2450
2451         if (new_holder) {
2452                 thread_deallocate(new_holder);
2453         }
2454         return ret;
2455 }
2456
2457 static void_func_void
2458 gate_steal_turnstile(gate_t *gate,
2459     thread_t new_inheritor)
2460 {
2461         struct turnstile *ts = NULL;
2462
2463         ts = turnstile_prepare((uintptr_t)gate, &gate->turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2464
2465         turnstile_update_inheritor(ts, new_inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
2466         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2467         turnstile_complete((uintptr_t)gate, &gate->turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2468
2469         /*
2470          * turnstile_cleanup might need to update the chain of the old holder.
2471          * This operation should happen without the turnstile interlock held.
2472          */
2473         return turnstile_cleanup;
2474 }
2475
2476 static void
2477 gate_steal(gate_t *gate)
2478 {
2479         uintptr_t state;
2480         thread_t holder;
2481         thread_t thread = current_thread();
2482         bool waiters;
2483
2484         void_func_void func_after_interlock_unlock;
2485
2486         gate_ilock(gate);
2487         state = ordered_load_gate(gate);
2488         holder = GATE_STATE_TO_THREAD(state);
2489         waiters = gate_has_waiters(state);
2490
2491         assert(holder != NULL);
2492         state = GATE_THREAD_TO_STATE(thread) | GATE_ILOCK;
2493         if (waiters) {
2494                 state |= GATE_WAITERS;
2495                 ordered_store_gate(gate, state);
2496                 func_after_interlock_unlock = gate_steal_turnstile(gate, thread);
2497                 gate_iunlock(gate);
2498
2499                 func_after_interlock_unlock();
2500         } else {
2501                 ordered_store_gate(gate, state);
2502                 gate_iunlock(gate);
2503         }
2504 }
2505
2506 static void_func_void
2507 gate_wait_turnstile(gate_t *gate,
2508     wait_interrupt_t interruptible,
2509     uint64_t deadline,
2510     thread_t holder,
2511     wait_result_t* wait,
2512     bool* waiters)
2513 {
2514         struct turnstile *ts;
2515         uintptr_t state;
2516
2517         ts = turnstile_prepare((uintptr_t)gate, &gate->turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2518
2519         turnstile_update_inheritor(ts, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
2520         waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), interruptible, deadline);
2521
2522         gate_iunlock(gate);
2523
2524         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2525
2526         *wait = thread_block(THREAD_CONTINUE_NULL);
2527
2528         gate_ilock(gate);
2529
2530         *waiters = turnstile_has_waiters(ts);
2531
2532         if (!*waiters) {
2533                 /*
2534                  * We want to enable the fast path as soon as we see that there are no more waiters.
2535                  * On the fast path the holder will not do any turnstile operations.
2536                  * Set the inheritor as NULL here.
2537                  *
2538                  * NOTE: if it was an open operation that woke this thread up, the inheritor has
2539                  * already been set to NULL.
2540                  */
2541                 state = ordered_load_gate(gate);
2542                 holder = GATE_STATE_TO_THREAD(state);
2543                 if (holder &&
2544                     ((*wait != THREAD_AWAKENED) ||     // thread interrupted or timedout
2545                     holder == current_thread())) {     // thread was woken up and it is the new holder
2546                         turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2547                         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2548                 }
2549         }
2550
2551         turnstile_complete((uintptr_t)gate, &gate->turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2552
2553         /*
2554          * turnstile_cleanup might need to update the chain of the old holder.
2555          * This operation should happen without the turnstile primitive interlock held.
2556          */
2557         return turnstile_cleanup;
2558 }
2559
2560 static gate_wait_result_t
2561 gate_wait(gate_t* gate,
2562     wait_interrupt_t interruptible,
2563     uint64_t deadline,
2564     void (^primitive_unlock)(void),
2565     void (^primitive_lock)(void))
2566 {
2567         gate_wait_result_t ret;
2568         void_func_void func_after_interlock_unlock;
2569         wait_result_t wait_result;
2570         uintptr_t state;
2571         thread_t holder;
2572         bool waiters;
2573
2574
2575         gate_ilock(gate);
2576         state = ordered_load_gate(gate);
2577         holder = GATE_STATE_TO_THREAD(state);
2578
2579         if (holder == NULL) {
2580                 panic("Trying to wait on open gate thread %p gate %p", current_thread(), gate);
2581         }
2582
2583         state |= GATE_WAITERS;
2584         ordered_store_gate(gate, state);
2585
2586         /*
2587          * Release the primitive lock before any
2588          * turnstile operation. Turnstile
2589          * does not support a blocking primitive as
2590          * interlock.
2591          *
2592          * In this way, concurrent threads will be
2593          * able to acquire the primitive lock
2594          * but still will wait for me through the
2595          * gate interlock.
2596          */
2597         primitive_unlock();
2598
2599         func_after_interlock_unlock = gate_wait_turnstile(    gate,
2600             interruptible,
2601             deadline,
2602             holder,
2603             &wait_result,
2604             &waiters);
2605
2606         state = ordered_load_gate(gate);
2607         holder = GATE_STATE_TO_THREAD(state);
2608
2609         switch (wait_result) {
2610         case THREAD_INTERRUPTED:
2611         case THREAD_TIMED_OUT:
2612                 assert(holder != current_thread());
2613
2614                 if (waiters) {
2615                         state |= GATE_WAITERS;
2616                 } else {
2617                         state &= ~GATE_WAITERS;
2618                 }
2619                 ordered_store_gate(gate, state);
2620
2621                 if (wait_result == THREAD_INTERRUPTED) {
2622                         ret = GATE_INTERRUPTED;
2623                 } else {
2624                         ret = GATE_TIMED_OUT;
2625                 }
2626                 break;
2627         default:
2628                 /*
2629                  * Note it is possible that even if the gate was handed off to
2630                  * me, someone called gate_steal() before I woke up.
2631                  *
2632                  * As well as it is possible that the gate was opened, but someone
2633                  * closed it while I was waking up.
2634                  *
2635                  * In both cases we return GATE_OPENED, as the gate was opened to me
2636                  * at one point, it is the caller responsibility to check again if
2637                  * the gate is open.
2638                  */
2639                 if (holder == current_thread()) {
2640                         ret = GATE_HANDOFF;
2641                 } else {
2642                         ret = GATE_OPENED;
2643                 }
2644                 break;
2645         }
2646
2647         gate_iunlock(gate);
2648
2649         /*
2650          * turnstile func that needs to be executed without
2651          * holding the primitive interlock
2652          */
2653         func_after_interlock_unlock();
2654
2655         primitive_lock();
2656
2657         return ret;
2658 }
2659 static void
2660 gate_assert(gate_t *gate, int flags)
2661 {
2662         uintptr_t state;
2663         thread_t holder;
2664
2665         gate_ilock(gate);
2666         state = ordered_load_gate(gate);
2667         holder = GATE_STATE_TO_THREAD(state);
2668
2669         switch (flags) {
2670         case GATE_ASSERT_CLOSED:
2671                 assert(holder != NULL);
2672                 break;
2673         case GATE_ASSERT_OPEN:
2674                 assert(holder == NULL);
2675                 break;
2676         case GATE_ASSERT_HELD:
2677                 assert(holder == current_thread());
2678                 break;
2679         default:
2680                 panic("invalid %s flag %d", __func__, flags);
2681         }
2682
2683         gate_iunlock(gate);
2684 }
2685
2686 static void
2687 gate_init(gate_t *gate)
2688 {
2689         gate->gate_data = 0;
2690         gate->turnstile = NULL;
2691 }
2692
2693 static void
2694 gate_destroy(__assert_only gate_t *gate)
2695 {
2696         assert(gate->gate_data == 0);
2697         assert(gate->turnstile == NULL);
2698 }
2699
2700 /*
2701  * Name: lck_rw_gate_init
2702  *
2703  * Description: initializes a variable declared with decl_lck_rw_gate_data.
2704  *
2705  * Args:
2706  *   Arg1: lck_rw_t lock used to protect the gate.
2707  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2708  */
2709 void
2710 lck_rw_gate_init(lck_rw_t *lock, gate_t *gate)
2711 {
2712         (void) lock;
2713         gate_init(gate);
2714 }
2715
2716 /*
2717  * Name: lck_rw_gate_destroy
2718  *
2719  * Description: destroys a variable previously initialized.
2720  *
2721  * Args:
2722  *   Arg1: lck_rw_t lock used to protect the gate.
2723  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2724  */
2725 void
2726 lck_rw_gate_destroy(lck_rw_t *lock, gate_t *gate)
2727 {
2728         (void) lock;
2729         gate_destroy(gate);
2730 }
2731
2732 /*
2733  * Name: lck_rw_gate_try_close
2734  *
2735  * Description: Tries to close the gate.
2736  *              In case of success the current thread will be set as
2737  *              the holder of the gate.
2738  *
2739  * Args:
2740  *   Arg1: lck_rw_t lock used to protect the gate.
2741  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2742  *
2743  * Conditions: Lock must be held. Returns with the lock held.
2744  *
2745  * Returns:
2746  *          KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
2747  *          of the gate.
2748  *          A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2749  *          to wake up possible waiters on the gate before returning to userspace.
2750  *          If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
2751  *          between the calls to lck_rw_gate_try_close() and lck_rw_gate_wait().
2752  *
2753  *          KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
2754  *          lck_rw_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
2755  *          The calls to lck_rw_gate_try_close() and lck_rw_gate_wait() should
2756  *          be done without dropping the lock that is protecting the gate in between.
2757  */
2758 int
2759 lck_rw_gate_try_close(__assert_only lck_rw_t *lock, gate_t *gate)
2760 {
2761         LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2762
2763         return gate_try_close(gate);
2764 }
2765
2766 /*
2767  * Name: lck_rw_gate_close
2768  *
2769  * Description: Closes the gate. The current thread will be set as
2770  *              the holder of the gate. Will panic if the gate is already closed.
2771  *              A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2772  *              to wake up possible waiters on the gate before returning to userspace.
2773  *
2774  * Args:
2775  *   Arg1: lck_rw_t lock used to protect the gate.
2776  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2777  *
2778  * Conditions: Lock must be held. Returns with the lock held.
2779  *             The gate must be open.
2780  *
2781  */
2782 void
2783 lck_rw_gate_close(__assert_only lck_rw_t *lock, gate_t *gate)
2784 {
2785         LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2786
2787         return gate_close(gate);
2788 }
2789
2790 /*
2791  * Name: lck_rw_gate_open
2792  *
2793  * Description: Opens the gate and wakes up possible waiters.
2794  *
2795  * Args:
2796  *   Arg1: lck_rw_t lock used to protect the gate.
2797  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2798  *
2799  * Conditions: Lock must be held. Returns with the lock held.
2800  *             The current thread must be the holder of the gate.
2801  *
2802  */
2803 void
2804 lck_rw_gate_open(__assert_only lck_rw_t *lock, gate_t *gate)
2805 {
2806         LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2807
2808         gate_open(gate);
2809 }
2810
2811 /*
2812  * Name: lck_rw_gate_handoff
2813  *
2814  * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
2815  *              priority will be selected as the new holder of the gate, and woken up,
2816  *              with the gate remaining in the closed state throughout.
2817  *              If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
2818  *              will be returned.
2819  *              GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
2820  *              case no waiters were found.
2821  *
2822  *
2823  * Args:
2824  *   Arg1: lck_rw_t lock used to protect the gate.
2825  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2826  *   Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
2827  *
2828  * Conditions: Lock must be held. Returns with the lock held.
2829  *             The current thread must be the holder of the gate.
2830  *
2831  * Returns:
2832  *          KERN_SUCCESS in case one of the waiters became the new holder.
2833  *          KERN_NOT_WAITING in case there were no waiters.
2834  *
2835  */
2836 kern_return_t
2837 lck_rw_gate_handoff(__assert_only lck_rw_t *lock, gate_t *gate, int flags)
2838 {
2839         LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2840
2841         return gate_handoff(gate, flags);
2842 }
2843
2844 /*
2845  * Name: lck_rw_gate_steal
2846  *
2847  * Description: Set the current ownership of the gate. It sets the current thread as the
2848  *              new holder of the gate.
2849  *              A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2850  *              to wake up possible waiters on the gate before returning to userspace.
2851  *              NOTE: the previous holder should not call lck_rw_gate_open() or lck_rw_gate_handoff()
2852  *              anymore.
2853  *
2854  *
2855  * Args:
2856  *   Arg1: lck_rw_t lock used to protect the gate.
2857  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2858  *
2859  * Conditions: Lock must be held. Returns with the lock held.
2860  *             The gate must be closed and the current thread must not already be the holder.
2861  *
2862  */
2863 void
2864 lck_rw_gate_steal(__assert_only lck_rw_t *lock, gate_t *gate)
2865 {
2866         LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2867
2868         gate_steal(gate);
2869 }
2870
2871 /*
2872  * Name: lck_rw_gate_wait
2873  *
2874  * Description: Waits for the current thread to become the holder of the gate or for the
2875  *              gate to become open. An interruptible mode and deadline can be specified
2876  *              to return earlier from the wait.
2877  *
2878  * Args:
2879  *   Arg1: lck_rw_t lock used to protect the gate.
2880  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2881  *   Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_SHARED, LCK_SLEEP_EXCLUSIVE.
2882  *   Arg3: interruptible flag for wait.
2883  *   Arg4: deadline
2884  *
2885  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2886  *             Lock will be dropped while waiting.
2887  *             The gate must be closed.
2888  *
2889  * Returns: Reason why the thread was woken up.
2890  *          GATE_HANDOFF - the current thread was handed off the ownership of the gate.
2891  *                         A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2892  *                         to wake up possible waiters on the gate before returning to userspace.
2893  *          GATE_OPENED - the gate was opened by the holder.
2894  *          GATE_TIMED_OUT - the thread was woken up by a timeout.
2895  *          GATE_INTERRUPTED - the thread was interrupted while sleeping.
2896  *
2897  */
2898 gate_wait_result_t
2899 lck_rw_gate_wait(lck_rw_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
2900 {
2901         __block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
2902
2903         LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2904
2905         if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2906                 return gate_wait(gate,
2907                            interruptible,
2908                            deadline,
2909                            ^{lck_rw_type = lck_rw_done(lock);},
2910                            ^{;});
2911         } else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2912                 return gate_wait(gate,
2913                            interruptible,
2914                            deadline,
2915                            ^{lck_rw_type = lck_rw_done(lock);},
2916                            ^{lck_rw_lock(lock, lck_rw_type);});
2917         } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2918                 return gate_wait(gate,
2919                            interruptible,
2920                            deadline,
2921                            ^{lck_rw_type = lck_rw_done(lock);},
2922                            ^{lck_rw_lock_exclusive(lock);});
2923         } else {
2924                 return gate_wait(gate,
2925                            interruptible,
2926                            deadline,
2927                            ^{lck_rw_type = lck_rw_done(lock);},
2928                            ^{lck_rw_lock_shared(lock);});
2929         }
2930 }
2931
2932 /*
2933  * Name: lck_rw_gate_assert
2934  *
2935  * Description: asserts that the gate is in the specified state.
2936  *
2937  * Args:
2938  *   Arg1: lck_rw_t lock used to protect the gate.
2939  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2940  *   Arg3: flags to specified assert type.
2941  *         GATE_ASSERT_CLOSED - the gate is currently closed
2942  *         GATE_ASSERT_OPEN - the gate is currently opened
2943  *         GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
2944  */
2945 void
2946 lck_rw_gate_assert(__assert_only lck_rw_t *lock, gate_t *gate, int flags)
2947 {
2948         LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2949
2950         gate_assert(gate, flags);
2951         return;
2952 }
2953
2954 /*
2955  * Name: lck_mtx_gate_init
2956  *
2957  * Description: initializes a variable declared with decl_lck_mtx_gate_data.
2958  *
2959  * Args:
2960  *   Arg1: lck_mtx_t lock used to protect the gate.
2961  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
2962  */
2963 void
2964 lck_mtx_gate_init(lck_mtx_t *lock, gate_t *gate)
2965 {
2966         (void) lock;
2967         gate_init(gate);
2968 }
2969
2970 /*
2971  * Name: lck_mtx_gate_destroy
2972  *
2973  * Description: destroys a variable previously initialized
2974  *
2975  * Args:
2976  *   Arg1: lck_mtx_t lock used to protect the gate.
2977  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
2978  */
2979 void
2980 lck_mtx_gate_destroy(lck_mtx_t *lock, gate_t *gate)
2981 {
2982         (void) lock;
2983         gate_destroy(gate);
2984 }
2985
2986 /*
2987  * Name: lck_mtx_gate_try_close
2988  *
2989  * Description: Tries to close the gate.
2990  *              In case of success the current thread will be set as
2991  *              the holder of the gate.
2992  *
2993  * Args:
2994  *   Arg1: lck_mtx_t lock used to protect the gate.
2995  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
2996  *
2997  * Conditions: Lock must be held. Returns with the lock held.
2998  *
2999  * Returns:
3000  *          KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
3001  *          of the gate.
3002  *          A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3003  *          to wake up possible waiters on the gate before returning to userspace.
3004  *          If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
3005  *          between the calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait().
3006  *
3007  *          KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
3008  *          lck_mtx_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
3009  *          The calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait() should
3010  *          be done without dropping the lock that is protecting the gate in between.
3011  */
3012 int
3013 lck_mtx_gate_try_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3014 {
3015         LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3016
3017         return gate_try_close(gate);
3018 }
3019
3020 /*
3021  * Name: lck_mtx_gate_close
3022  *
3023  * Description: Closes the gate. The current thread will be set as
3024  *              the holder of the gate. Will panic if the gate is already closed.
3025  *              A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3026  *              to wake up possible waiters on the gate before returning to userspace.
3027  *
3028  * Args:
3029  *   Arg1: lck_mtx_t lock used to protect the gate.
3030  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3031  *
3032  * Conditions: Lock must be held. Returns with the lock held.
3033  *             The gate must be open.
3034  *
3035  */
3036 void
3037 lck_mtx_gate_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3038 {
3039         LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3040
3041         return gate_close(gate);
3042 }
3043
3044 /*
3045  * Name: lck_mtx_gate_open
3046  *
3047  * Description: Opens of the gate and wakes up possible waiters.
3048  *
3049  * Args:
3050  *   Arg1: lck_mtx_t lock used to protect the gate.
3051  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3052  *
3053  * Conditions: Lock must be held. Returns with the lock held.
3054  *             The current thread must be the holder of the gate.
3055  *
3056  */
3057 void
3058 lck_mtx_gate_open(__assert_only lck_mtx_t *lock, gate_t *gate)
3059 {
3060         LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3061
3062         gate_open(gate);
3063 }
3064
3065 /*
3066  * Name: lck_mtx_gate_handoff
3067  *
3068  * Description: Set the current ownership of the gate. The waiter with highest sched
3069  *              priority will be selected as the new holder of the gate, and woken up,
3070  *              with the gate remaining in the closed state throughout.
3071  *              If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
3072  *              will be returned.
3073  *              OPEN_ON_FAILURE flag can be used to specify if the gate should be opened in
3074  *              case no waiters were found.
3075  *
3076  *
3077  * Args:
3078  *   Arg1: lck_mtx_t lock used to protect the gate.
3079  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3080  *   Arg3: flags - GATE_NO_FALGS or OPEN_ON_FAILURE
3081  *
3082  * Conditions: Lock must be held. Returns with the lock held.
3083  *             The current thread must be the holder of the gate.
3084  *
3085  * Returns:
3086  *          KERN_SUCCESS in case one of the waiters became the new holder.
3087  *          KERN_NOT_WAITING in case there were no waiters.
3088  *
3089  */
3090 kern_return_t
3091 lck_mtx_gate_handoff(__assert_only lck_mtx_t *lock, gate_t *gate, int flags)
3092 {
3093         LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3094
3095         return gate_handoff(gate, flags);
3096 }
3097
3098 /*
3099  * Name: lck_mtx_gate_steal
3100  *
3101  * Description: Steals the ownership of the gate. It sets the current thread as the
3102  *              new holder of the gate.
3103  *              A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3104  *              to wake up possible waiters on the gate before returning to userspace.
3105  *              NOTE: the previous holder should not call lck_mtx_gate_open() or lck_mtx_gate_handoff()
3106  *              anymore.
3107  *
3108  *
3109  * Args:
3110  *   Arg1: lck_mtx_t lock used to protect the gate.
3111  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3112  *
3113  * Conditions: Lock must be held. Returns with the lock held.
3114  *             The gate must be closed and the current thread must not already be the holder.
3115  *
3116  */
3117 void
3118 lck_mtx_gate_steal(__assert_only lck_mtx_t *lock, gate_t *gate)
3119 {
3120         LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3121
3122         gate_steal(gate);
3123 }
3124
3125 /*
3126  * Name: lck_mtx_gate_wait
3127  *
3128  * Description: Waits for the current thread to become the holder of the gate or for the
3129  *              gate to become open. An interruptible mode and deadline can be specified
3130  *              to return earlier from the wait.
3131  *
3132  * Args:
3133  *   Arg1: lck_mtx_t lock used to protect the gate.
3134  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3135  *   Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK, LCK_SLEEP_SPIN, LCK_SLEEP_SPIN_ALWAYS.
3136  *   Arg3: interruptible flag for wait.
3137  *   Arg4: deadline
3138  *
3139  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
3140  *             Lock will be dropped while waiting.
3141  *             The gate must be closed.
3142  *
3143  * Returns: Reason why the thread was woken up.
3144  *          GATE_HANDOFF - the current thread was handed off the ownership of the gate.
3145  *                         A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3146  *                         to wake up possible waiters on the gate before returning to userspace.
3147  *          GATE_OPENED - the gate was opened by the holder.
3148  *          GATE_TIMED_OUT - the thread was woken up by a timeout.
3149  *          GATE_INTERRUPTED - the thread was interrupted while sleeping.
3150  *
3151  */
3152 gate_wait_result_t
3153 lck_mtx_gate_wait(lck_mtx_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
3154 {
3155         LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3156
3157         if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
3158                 return gate_wait(gate,
3159                            interruptible,
3160                            deadline,
3161                            ^{lck_mtx_unlock(lock);},
3162                            ^{;});
3163         } else if (lck_sleep_action & LCK_SLEEP_SPIN) {
3164                 return gate_wait(gate,
3165                            interruptible,
3166                            deadline,
3167                            ^{lck_mtx_unlock(lock);},
3168                            ^{lck_mtx_lock_spin(lock);});
3169         } else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
3170                 return gate_wait(gate,
3171                            interruptible,
3172                            deadline,
3173                            ^{lck_mtx_unlock(lock);},
3174                            ^{lck_mtx_lock_spin_always(lock);});
3175         } else {
3176                 return gate_wait(gate,
3177                            interruptible,
3178                            deadline,
3179                            ^{lck_mtx_unlock(lock);},
3180                            ^{lck_mtx_lock(lock);});
3181         }
3182 }
3183
3184 /*
3185  * Name: lck_mtx_gate_assert
3186  *
3187  * Description: asserts that the gate is in the specified state.
3188  *
3189  * Args:
3190  *   Arg1: lck_mtx_t lock used to protect the gate.
3191  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3192  *   Arg3: flags to specified assert type.
3193  *         GATE_ASSERT_CLOSED - the gate is currently closed
3194  *         GATE_ASSERT_OPEN - the gate is currently opened
3195  *         GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
3196  */
3197 void
3198 lck_mtx_gate_assert(__assert_only lck_mtx_t *lock, gate_t *gate, int flags)
3199 {
3200         LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3201
3202         gate_assert(gate, flags);
3203 }
3204
3205 #pragma mark - LCK_*_DECLARE support
3206
3207 __startup_func
3208 void
3209 lck_grp_attr_startup_init(struct lck_grp_attr_startup_spec *sp)
3210 {
3211         lck_grp_attr_t *attr = sp->grp_attr;
3212         lck_grp_attr_setdefault(attr);
3213         attr->grp_attr_val |= sp->grp_attr_set_flags;
3214         attr->grp_attr_val &= ~sp->grp_attr_clear_flags;
3215 }
3216
3217 __startup_func
3218 void
3219 lck_grp_startup_init(struct lck_grp_startup_spec *sp)
3220 {
3221         lck_grp_init(sp->grp, sp->grp_name, sp->grp_attr);
3222 }
3223
3224 __startup_func
3225 void
3226 lck_attr_startup_init(struct lck_attr_startup_spec *sp)
3227 {
3228         lck_attr_t *attr = sp->lck_attr;
3229         lck_attr_setdefault(attr);
3230         attr->lck_attr_val |= sp->lck_attr_set_flags;
3231         attr->lck_attr_val &= ~sp->lck_attr_clear_flags;
3232 }
3233
3234 __startup_func
3235 void
3236 lck_spin_startup_init(struct lck_spin_startup_spec *sp)
3237 {
3238         lck_spin_init(sp->lck, sp->lck_grp, sp->lck_attr);
3239 }
3240
3241 __startup_func
3242 void
3243 lck_mtx_startup_init(struct lck_mtx_startup_spec *sp)
3244 {
3245         if (sp->lck_ext) {
3246                 lck_mtx_init_ext(sp->lck, sp->lck_ext, sp->lck_grp, sp->lck_attr);
3247         } else {
3248                 lck_mtx_init(sp->lck, sp->lck_grp, sp->lck_attr);
3249         }
3250 }
3251
3252 __startup_func
3253 void
3254 lck_rw_startup_init(struct lck_rw_startup_spec *sp)
3255 {
3256         lck_rw_init(sp->lck, sp->lck_grp, sp->lck_attr);
3257 }
3258
3259 __startup_func
3260 void
3261 usimple_lock_startup_init(struct usimple_lock_startup_spec *sp)
3262 {
3263         simple_lock_init(sp->lck, sp->lck_init_arg);
3264 }