osfmk/kern/locks.c

   1 /*
   2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56
  57 #define LOCK_PRIVATE 1
  58
  59 #include <mach_ldebug.h>
  60 #include <debug.h>
  61
  62 #include <mach/kern_return.h>
  63 #include <mach/mach_host_server.h>
  64 #include <mach_debug/lockgroup_info.h>
  65
  66 #include <kern/lock_stat.h>
  67 #include <kern/locks.h>
  68 #include <kern/misc_protos.h>
  69 #include <kern/zalloc.h>
  70 #include <kern/thread.h>
  71 #include <kern/processor.h>
  72 #include <kern/sched_prim.h>
  73 #include <kern/debug.h>
  74 #include <libkern/section_keywords.h>
  75 #include <machine/atomic.h>
  76 #include <machine/machine_cpu.h>
  77 #include <string.h>
  78
  79 #include <sys/kdebug.h>
  80
  81 #define LCK_MTX_SLEEP_CODE              0
  82 #define LCK_MTX_SLEEP_DEADLINE_CODE     1
  83 #define LCK_MTX_LCK_WAIT_CODE           2
  84 #define LCK_MTX_UNLCK_WAKEUP_CODE       3
  85
  86 #if MACH_LDEBUG
  87 #define ALIGN_TEST(p, t) do{if((uintptr_t)p&(sizeof(t)-1)) __builtin_trap();}while(0)
  88 #else
  89 #define ALIGN_TEST(p, t) do{}while(0)
  90 #endif
  91
  92 #define NOINLINE                __attribute__((noinline))
  93
  94 #define ordered_load_hw(lock)          os_atomic_load(&(lock)->lock_data, compiler_acq_rel)
  95 #define ordered_store_hw(lock, value)  os_atomic_store(&(lock)->lock_data, (value), compiler_acq_rel)
  96
  97
  98 queue_head_t     lck_grp_queue;
  99 unsigned int     lck_grp_cnt;
 100
 101 decl_lck_mtx_data(, lck_grp_lock);
 102 static lck_mtx_ext_t lck_grp_lock_ext;
 103
 104 SECURITY_READ_ONLY_LATE(boolean_t) spinlock_timeout_panic = TRUE;
 105
 106 /* Obtain "lcks" options:this currently controls lock statistics */
 107 TUNABLE(uint32_t, LcksOpts, "lcks", 0);
 108
 109 ZONE_VIEW_DEFINE(ZV_LCK_GRP_ATTR, "lck_grp_attr",
 110     KHEAP_ID_DEFAULT, sizeof(lck_grp_attr_t));
 111
 112 ZONE_VIEW_DEFINE(ZV_LCK_GRP, "lck_grp",
 113     KHEAP_ID_DEFAULT, sizeof(lck_grp_t));
 114
 115 ZONE_VIEW_DEFINE(ZV_LCK_ATTR, "lck_attr",
 116     KHEAP_ID_DEFAULT, sizeof(lck_attr_t));
 117
 118 lck_grp_attr_t  LockDefaultGroupAttr;
 119 lck_grp_t       LockCompatGroup;
 120 lck_attr_t      LockDefaultLckAttr;
 121
 122 #if CONFIG_DTRACE
 123 #if defined (__x86_64__)
 124 uint64_t dtrace_spin_threshold = 500; // 500ns
 125 #elif defined(__arm__) || defined(__arm64__)
 126 uint64_t dtrace_spin_threshold = LOCK_PANIC_TIMEOUT / 1000000; // 500ns
 127 #endif
 128 #endif
 129
 130 uintptr_t
 131 unslide_for_kdebug(void* object)
 132 {
 133         if (__improbable(kdebug_enable)) {
 134                 return VM_KERNEL_UNSLIDE_OR_PERM(object);
 135         } else {
 136                 return 0;
 137         }
 138 }
 139
 140 __startup_func
 141 static void
 142 lck_mod_init(void)
 143 {
 144         queue_init(&lck_grp_queue);
 145
 146         /*
 147          * Need to bootstrap the LockCompatGroup instead of calling lck_grp_init() here. This avoids
 148          * grabbing the lck_grp_lock before it is initialized.
 149          */
 150
 151         bzero(&LockCompatGroup, sizeof(lck_grp_t));
 152         (void) strncpy(LockCompatGroup.lck_grp_name, "Compatibility APIs", LCK_GRP_MAX_NAME);
 153
 154         LockCompatGroup.lck_grp_attr = LCK_ATTR_NONE;
 155
 156         if (LcksOpts & enaLkStat) {
 157                 LockCompatGroup.lck_grp_attr |= LCK_GRP_ATTR_STAT;
 158         }
 159         if (LcksOpts & enaLkTimeStat) {
 160                 LockCompatGroup.lck_grp_attr |= LCK_GRP_ATTR_TIME_STAT;
 161         }
 162
 163         os_ref_init(&LockCompatGroup.lck_grp_refcnt, NULL);
 164
 165         enqueue_tail(&lck_grp_queue, (queue_entry_t)&LockCompatGroup);
 166         lck_grp_cnt = 1;
 167
 168         lck_grp_attr_setdefault(&LockDefaultGroupAttr);
 169         lck_attr_setdefault(&LockDefaultLckAttr);
 170
 171         lck_mtx_init_ext(&lck_grp_lock, &lck_grp_lock_ext, &LockCompatGroup, &LockDefaultLckAttr);
 172 }
 173 STARTUP(LOCKS_EARLY, STARTUP_RANK_FIRST, lck_mod_init);
 174
 175 /*
 176  * Routine:     lck_grp_attr_alloc_init
 177  */
 178
 179 lck_grp_attr_t  *
 180 lck_grp_attr_alloc_init(
 181         void)
 182 {
 183         lck_grp_attr_t  *attr;
 184
 185         attr = zalloc(ZV_LCK_GRP_ATTR);
 186         lck_grp_attr_setdefault(attr);
 187         return attr;
 188 }
 189
 190
 191 /*
 192  * Routine:     lck_grp_attr_setdefault
 193  */
 194
 195 void
 196 lck_grp_attr_setdefault(
 197         lck_grp_attr_t  *attr)
 198 {
 199         if (LcksOpts & enaLkStat) {
 200                 attr->grp_attr_val = LCK_GRP_ATTR_STAT;
 201         } else {
 202                 attr->grp_attr_val = 0;
 203         }
 204 }
 205
 206
 207 /*
 208  * Routine:     lck_grp_attr_setstat
 209  */
 210
 211 void
 212 lck_grp_attr_setstat(
 213         lck_grp_attr_t  *attr)
 214 {
 215 #pragma unused(attr)
 216         os_atomic_or(&attr->grp_attr_val, LCK_GRP_ATTR_STAT, relaxed);
 217 }
 218
 219
 220 /*
 221  * Routine:     lck_grp_attr_free
 222  */
 223
 224 void
 225 lck_grp_attr_free(
 226         lck_grp_attr_t  *attr)
 227 {
 228         zfree(ZV_LCK_GRP_ATTR, attr);
 229 }
 230
 231
 232 /*
 233  * Routine: lck_grp_alloc_init
 234  */
 235
 236 lck_grp_t *
 237 lck_grp_alloc_init(
 238         const char*     grp_name,
 239         lck_grp_attr_t  *attr)
 240 {
 241         lck_grp_t       *grp;
 242
 243         grp = zalloc(ZV_LCK_GRP);
 244         lck_grp_init(grp, grp_name, attr);
 245         return grp;
 246 }
 247
 248 /*
 249  * Routine: lck_grp_init
 250  */
 251
 252 void
 253 lck_grp_init(lck_grp_t * grp, const char * grp_name, lck_grp_attr_t * attr)
 254 {
 255         /* make sure locking infrastructure has been initialized */
 256         assert(lck_grp_cnt > 0);
 257
 258         bzero((void *)grp, sizeof(lck_grp_t));
 259
 260         (void)strlcpy(grp->lck_grp_name, grp_name, LCK_GRP_MAX_NAME);
 261
 262         if (attr != LCK_GRP_ATTR_NULL) {
 263                 grp->lck_grp_attr = attr->grp_attr_val;
 264         } else {
 265                 grp->lck_grp_attr = 0;
 266                 if (LcksOpts & enaLkStat) {
 267                         grp->lck_grp_attr |= LCK_GRP_ATTR_STAT;
 268                 }
 269                 if (LcksOpts & enaLkTimeStat) {
 270                         grp->lck_grp_attr |= LCK_GRP_ATTR_TIME_STAT;
 271                 }
 272         }
 273
 274         if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT) {
 275                 lck_grp_stats_t *stats = &grp->lck_grp_stats;
 276
 277 #if LOCK_STATS
 278                 lck_grp_stat_enable(&stats->lgss_spin_held);
 279                 lck_grp_stat_enable(&stats->lgss_spin_miss);
 280 #endif /* LOCK_STATS */
 281
 282                 lck_grp_stat_enable(&stats->lgss_mtx_held);
 283                 lck_grp_stat_enable(&stats->lgss_mtx_miss);
 284                 lck_grp_stat_enable(&stats->lgss_mtx_direct_wait);
 285                 lck_grp_stat_enable(&stats->lgss_mtx_wait);
 286         }
 287         if (grp->lck_grp_attr & LCK_GRP_ATTR_TIME_STAT) {
 288 #if LOCK_STATS
 289                 lck_grp_stats_t *stats = &grp->lck_grp_stats;
 290                 lck_grp_stat_enable(&stats->lgss_spin_spin);
 291 #endif /* LOCK_STATS */
 292         }
 293
 294         os_ref_init(&grp->lck_grp_refcnt, NULL);
 295
 296         lck_mtx_lock(&lck_grp_lock);
 297         enqueue_tail(&lck_grp_queue, (queue_entry_t)grp);
 298         lck_grp_cnt++;
 299         lck_mtx_unlock(&lck_grp_lock);
 300 }
 301
 302 /*
 303  * Routine:     lck_grp_free
 304  */
 305
 306 void
 307 lck_grp_free(
 308         lck_grp_t       *grp)
 309 {
 310         lck_mtx_lock(&lck_grp_lock);
 311         lck_grp_cnt--;
 312         (void)remque((queue_entry_t)grp);
 313         lck_mtx_unlock(&lck_grp_lock);
 314         lck_grp_deallocate(grp);
 315 }
 316
 317
 318 /*
 319  * Routine:     lck_grp_reference
 320  */
 321
 322 void
 323 lck_grp_reference(
 324         lck_grp_t       *grp)
 325 {
 326         os_ref_retain(&grp->lck_grp_refcnt);
 327 }
 328
 329
 330 /*
 331  * Routine:     lck_grp_deallocate
 332  */
 333
 334 void
 335 lck_grp_deallocate(
 336         lck_grp_t       *grp)
 337 {
 338         if (os_ref_release(&grp->lck_grp_refcnt) != 0) {
 339                 return;
 340         }
 341
 342         zfree(ZV_LCK_GRP, grp);
 343 }
 344
 345 /*
 346  * Routine:     lck_grp_lckcnt_incr
 347  */
 348
 349 void
 350 lck_grp_lckcnt_incr(
 351         lck_grp_t       *grp,
 352         lck_type_t      lck_type)
 353 {
 354         unsigned int    *lckcnt;
 355
 356         switch (lck_type) {
 357         case LCK_TYPE_SPIN:
 358                 lckcnt = &grp->lck_grp_spincnt;
 359                 break;
 360         case LCK_TYPE_MTX:
 361                 lckcnt = &grp->lck_grp_mtxcnt;
 362                 break;
 363         case LCK_TYPE_RW:
 364                 lckcnt = &grp->lck_grp_rwcnt;
 365                 break;
 366         case LCK_TYPE_TICKET:
 367                 lckcnt = &grp->lck_grp_ticketcnt;
 368                 break;
 369         default:
 370                 return panic("lck_grp_lckcnt_incr(): invalid lock type: %d\n", lck_type);
 371         }
 372
 373         os_atomic_inc(lckcnt, relaxed);
 374 }
 375
 376 /*
 377  * Routine:     lck_grp_lckcnt_decr
 378  */
 379
 380 void
 381 lck_grp_lckcnt_decr(
 382         lck_grp_t       *grp,
 383         lck_type_t      lck_type)
 384 {
 385         unsigned int    *lckcnt;
 386         int             updated;
 387
 388         switch (lck_type) {
 389         case LCK_TYPE_SPIN:
 390                 lckcnt = &grp->lck_grp_spincnt;
 391                 break;
 392         case LCK_TYPE_MTX:
 393                 lckcnt = &grp->lck_grp_mtxcnt;
 394                 break;
 395         case LCK_TYPE_RW:
 396                 lckcnt = &grp->lck_grp_rwcnt;
 397                 break;
 398         case LCK_TYPE_TICKET:
 399                 lckcnt = &grp->lck_grp_ticketcnt;
 400                 break;
 401         default:
 402                 panic("lck_grp_lckcnt_decr(): invalid lock type: %d\n", lck_type);
 403                 return;
 404         }
 405
 406         updated = os_atomic_dec(lckcnt, relaxed);
 407         assert(updated >= 0);
 408 }
 409
 410 /*
 411  * Routine:     lck_attr_alloc_init
 412  */
 413
 414 lck_attr_t *
 415 lck_attr_alloc_init(
 416         void)
 417 {
 418         lck_attr_t      *attr;
 419
 420         attr = zalloc(ZV_LCK_ATTR);
 421         lck_attr_setdefault(attr);
 422         return attr;
 423 }
 424
 425
 426 /*
 427  * Routine:     lck_attr_setdefault
 428  */
 429
 430 void
 431 lck_attr_setdefault(
 432         lck_attr_t      *attr)
 433 {
 434 #if __arm__ || __arm64__
 435         /* <rdar://problem/4404579>: Using LCK_ATTR_DEBUG here causes panic at boot time for arm */
 436         attr->lck_attr_val =  LCK_ATTR_NONE;
 437 #elif __i386__ || __x86_64__
 438 #if     !DEBUG
 439         if (LcksOpts & enaLkDeb) {
 440                 attr->lck_attr_val =  LCK_ATTR_DEBUG;
 441         } else {
 442                 attr->lck_attr_val =  LCK_ATTR_NONE;
 443         }
 444 #else
 445         attr->lck_attr_val =  LCK_ATTR_DEBUG;
 446 #endif  /* !DEBUG */
 447 #else
 448 #error Unknown architecture.
 449 #endif  /* __arm__ */
 450 }
 451
 452
 453 /*
 454  * Routine:     lck_attr_setdebug
 455  */
 456 void
 457 lck_attr_setdebug(
 458         lck_attr_t      *attr)
 459 {
 460         os_atomic_or(&attr->lck_attr_val, LCK_ATTR_DEBUG, relaxed);
 461 }
 462
 463 /*
 464  * Routine:     lck_attr_setdebug
 465  */
 466 void
 467 lck_attr_cleardebug(
 468         lck_attr_t      *attr)
 469 {
 470         os_atomic_andnot(&attr->lck_attr_val, LCK_ATTR_DEBUG, relaxed);
 471 }
 472
 473
 474 /*
 475  * Routine:     lck_attr_rw_shared_priority
 476  */
 477 void
 478 lck_attr_rw_shared_priority(
 479         lck_attr_t      *attr)
 480 {
 481         os_atomic_or(&attr->lck_attr_val, LCK_ATTR_RW_SHARED_PRIORITY, relaxed);
 482 }
 483
 484
 485 /*
 486  * Routine:     lck_attr_free
 487  */
 488 void
 489 lck_attr_free(
 490         lck_attr_t      *attr)
 491 {
 492         zfree(ZV_LCK_ATTR, attr);
 493 }
 494
 495 /*
 496  * Routine:     hw_lock_init
 497  *
 498  *      Initialize a hardware lock.
 499  */
 500 void
 501 hw_lock_init(hw_lock_t lock)
 502 {
 503         ordered_store_hw(lock, 0);
 504 }
 505
 506 static inline bool
 507 hw_lock_trylock_contended(hw_lock_t lock, uintptr_t newval)
 508 {
 509 #if OS_ATOMIC_USE_LLSC
 510         uintptr_t oldval;
 511         os_atomic_rmw_loop(&lock->lock_data, oldval, newval, acquire, {
 512                 if (oldval != 0) {
 513                         wait_for_event(); // clears the monitor so we don't need give_up()
 514                         return false;
 515                 }
 516         });
 517         return true;
 518 #else // !OS_ATOMIC_USE_LLSC
 519 #if OS_ATOMIC_HAS_LLSC
 520         uintptr_t oldval = os_atomic_load_exclusive(&lock->lock_data, relaxed);
 521         if (oldval != 0) {
 522                 wait_for_event(); // clears the monitor so we don't need give_up()
 523                 return false;
 524         }
 525 #endif // OS_ATOMIC_HAS_LLSC
 526         return os_atomic_cmpxchg(&lock->lock_data, 0, newval, acquire);
 527 #endif // !OS_ATOMIC_USE_LLSC
 528 }
 529
 530 /*
 531  *      Routine: hw_lock_lock_contended
 532  *
 533  *      Spin until lock is acquired or timeout expires.
 534  *      timeout is in mach_absolute_time ticks. Called with
 535  *      preemption disabled.
 536  */
 537 static unsigned int NOINLINE
 538 hw_lock_lock_contended(hw_lock_t lock, uintptr_t data, uint64_t timeout, boolean_t do_panic LCK_GRP_ARG(lck_grp_t *grp))
 539 {
 540         uint64_t        end = 0;
 541         uintptr_t       holder = lock->lock_data;
 542         int             i;
 543
 544         if (timeout == 0) {
 545                 timeout = LOCK_PANIC_TIMEOUT;
 546         }
 547 #if CONFIG_DTRACE || LOCK_STATS
 548         uint64_t begin = 0;
 549         boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
 550 #endif /* CONFIG_DTRACE || LOCK_STATS */
 551
 552 #if LOCK_STATS || CONFIG_DTRACE
 553         if (__improbable(stat_enabled)) {
 554                 begin = mach_absolute_time();
 555         }
 556 #endif /* LOCK_STATS || CONFIG_DTRACE */
 557         for (;;) {
 558                 for (i = 0; i < LOCK_SNOOP_SPINS; i++) {
 559                         cpu_pause();
 560 #if (!__ARM_ENABLE_WFE_) || (LOCK_PRETEST)
 561                         holder = ordered_load_hw(lock);
 562                         if (holder != 0) {
 563                                 continue;
 564                         }
 565 #endif
 566                         if (hw_lock_trylock_contended(lock, data)) {
 567 #if CONFIG_DTRACE || LOCK_STATS
 568                                 if (__improbable(stat_enabled)) {
 569                                         lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp), mach_absolute_time() - begin);
 570                                 }
 571                                 lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
 572 #endif /* CONFIG_DTRACE || LOCK_STATS */
 573                                 return 1;
 574                         }
 575                 }
 576                 if (end == 0) {
 577                         end = ml_get_timebase() + timeout;
 578                 } else if (ml_get_timebase() >= end) {
 579                         break;
 580                 }
 581         }
 582         if (do_panic) {
 583                 // Capture the actual time spent blocked, which may be higher than the timeout
 584                 // if a misbehaving interrupt stole this thread's CPU time.
 585                 panic("Spinlock timeout after %llu ticks, %p = %lx",
 586                     (ml_get_timebase() - end + timeout), lock, holder);
 587         }
 588         return 0;
 589 }
 590
 591 void *
 592 hw_wait_while_equals(void **address, void *current)
 593 {
 594         void *v;
 595         uint64_t end = 0;
 596
 597         for (;;) {
 598                 for (int i = 0; i < LOCK_SNOOP_SPINS; i++) {
 599                         cpu_pause();
 600 #if OS_ATOMIC_HAS_LLSC
 601                         v = os_atomic_load_exclusive(address, relaxed);
 602                         if (__probable(v != current)) {
 603                                 os_atomic_clear_exclusive();
 604                                 return v;
 605                         }
 606                         wait_for_event();
 607 #else
 608                         v = os_atomic_load(address, relaxed);
 609                         if (__probable(v != current)) {
 610                                 return v;
 611                         }
 612 #endif // OS_ATOMIC_HAS_LLSC
 613                 }
 614                 if (end == 0) {
 615                         end = ml_get_timebase() + LOCK_PANIC_TIMEOUT;
 616                 } else if (ml_get_timebase() >= end) {
 617                         panic("Wait while equals timeout @ *%p == %p", address, v);
 618                 }
 619         }
 620 }
 621
 622 static inline void
 623 hw_lock_lock_internal(hw_lock_t lock, thread_t thread LCK_GRP_ARG(lck_grp_t *grp))
 624 {
 625         uintptr_t       state;
 626
 627         state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
 628 #if     LOCK_PRETEST
 629         if (ordered_load_hw(lock)) {
 630                 goto contended;
 631         }
 632 #endif  // LOCK_PRETEST
 633         if (hw_lock_trylock_contended(lock, state)) {
 634                 goto end;
 635         }
 636 #if     LOCK_PRETEST
 637 contended:
 638 #endif  // LOCK_PRETEST
 639         hw_lock_lock_contended(lock, state, 0, spinlock_timeout_panic LCK_GRP_ARG(grp));
 640 end:
 641         lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
 642
 643         return;
 644 }
 645
 646 /*
 647  *      Routine: hw_lock_lock
 648  *
 649  *      Acquire lock, spinning until it becomes available,
 650  *      return with preemption disabled.
 651  */
 652 void
 653 (hw_lock_lock)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
 654 {
 655         thread_t thread = current_thread();
 656         disable_preemption_for_thread(thread);
 657         hw_lock_lock_internal(lock, thread LCK_GRP_ARG(grp));
 658 }
 659
 660 /*
 661  *      Routine: hw_lock_lock_nopreempt
 662  *
 663  *      Acquire lock, spinning until it becomes available.
 664  */
 665 void
 666 (hw_lock_lock_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
 667 {
 668         thread_t thread = current_thread();
 669         if (__improbable(!preemption_disabled_for_thread(thread))) {
 670                 panic("Attempt to take no-preempt spinlock %p in preemptible context", lock);
 671         }
 672         hw_lock_lock_internal(lock, thread LCK_GRP_ARG(grp));
 673 }
 674
 675 /*
 676  *      Routine: hw_lock_to
 677  *
 678  *      Acquire lock, spinning until it becomes available or timeout.
 679  *      Timeout is in mach_absolute_time ticks, return with
 680  *      preemption disabled.
 681  */
 682 unsigned
 683 int
 684 (hw_lock_to)(hw_lock_t lock, uint64_t timeout LCK_GRP_ARG(lck_grp_t *grp))
 685 {
 686         thread_t        thread;
 687         uintptr_t       state;
 688         unsigned int success = 0;
 689
 690         thread = current_thread();
 691         disable_preemption_for_thread(thread);
 692         state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
 693 #if     LOCK_PRETEST
 694         if (ordered_load_hw(lock)) {
 695                 goto contended;
 696         }
 697 #endif  // LOCK_PRETEST
 698         if (hw_lock_trylock_contended(lock, state)) {
 699                 success = 1;
 700                 goto end;
 701         }
 702 #if     LOCK_PRETEST
 703 contended:
 704 #endif  // LOCK_PRETEST
 705         success = hw_lock_lock_contended(lock, state, timeout, FALSE LCK_GRP_ARG(grp));
 706 end:
 707         if (success) {
 708                 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
 709         }
 710         return success;
 711 }
 712
 713 /*
 714  *      Routine: hw_lock_try
 715  *
 716  *      returns with preemption disabled on success.
 717  */
 718 static inline unsigned int
 719 hw_lock_try_internal(hw_lock_t lock, thread_t thread LCK_GRP_ARG(lck_grp_t *grp))
 720 {
 721         int             success = 0;
 722
 723 #if     LOCK_PRETEST
 724         if (ordered_load_hw(lock)) {
 725                 goto failed;
 726         }
 727 #endif  // LOCK_PRETEST
 728         success = os_atomic_cmpxchg(&lock->lock_data, 0,
 729             LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK, acquire);
 730
 731 #if     LOCK_PRETEST
 732 failed:
 733 #endif  // LOCK_PRETEST
 734         if (success) {
 735                 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
 736         }
 737         return success;
 738 }
 739
 740 unsigned
 741 int
 742 (hw_lock_try)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
 743 {
 744         thread_t thread = current_thread();
 745         disable_preemption_for_thread(thread);
 746         unsigned int success = hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
 747         if (!success) {
 748                 enable_preemption();
 749         }
 750         return success;
 751 }
 752
 753 unsigned
 754 int
 755 (hw_lock_try_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
 756 {
 757         thread_t thread = current_thread();
 758         if (__improbable(!preemption_disabled_for_thread(thread))) {
 759                 panic("Attempt to test no-preempt spinlock %p in preemptible context", lock);
 760         }
 761         return hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
 762 }
 763
 764 /*
 765  *      Routine: hw_lock_unlock
 766  *
 767  *      Unconditionally release lock, release preemption level.
 768  */
 769 static inline void
 770 hw_lock_unlock_internal(hw_lock_t lock)
 771 {
 772         os_atomic_store(&lock->lock_data, 0, release);
 773 #if __arm__ || __arm64__
 774         // ARM tests are only for open-source exclusion
 775         set_event();
 776 #endif  // __arm__ || __arm64__
 777 #if     CONFIG_DTRACE
 778         LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, 0);
 779 #endif /* CONFIG_DTRACE */
 780 }
 781
 782 void
 783 (hw_lock_unlock)(hw_lock_t lock)
 784 {
 785         hw_lock_unlock_internal(lock);
 786         enable_preemption();
 787 }
 788
 789 void
 790 (hw_lock_unlock_nopreempt)(hw_lock_t lock)
 791 {
 792         if (__improbable(!preemption_disabled_for_thread(current_thread()))) {
 793                 panic("Attempt to release no-preempt spinlock %p in preemptible context", lock);
 794         }
 795         hw_lock_unlock_internal(lock);
 796 }
 797
 798 /*
 799  *      Routine hw_lock_held, doesn't change preemption state.
 800  *      N.B.  Racy, of course.
 801  */
 802 unsigned int
 803 hw_lock_held(hw_lock_t lock)
 804 {
 805         return ordered_load_hw(lock) != 0;
 806 }
 807
 808 static unsigned int
 809 hw_lock_bit_to_contended(hw_lock_bit_t *lock, uint32_t mask, uint32_t timeout LCK_GRP_ARG(lck_grp_t *grp));
 810
 811 static inline unsigned int
 812 hw_lock_bit_to_internal(hw_lock_bit_t *lock, unsigned int bit, uint32_t timeout LCK_GRP_ARG(lck_grp_t *grp))
 813 {
 814         unsigned int success = 0;
 815         uint32_t        mask = (1 << bit);
 816
 817         if (__improbable(!hw_atomic_test_and_set32(lock, mask, mask, memory_order_acquire, FALSE))) {
 818                 success = hw_lock_bit_to_contended(lock, mask, timeout LCK_GRP_ARG(grp));
 819         } else {
 820                 success = 1;
 821         }
 822
 823         if (success) {
 824                 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
 825         }
 826
 827         return success;
 828 }
 829
 830 unsigned
 831 int
 832 (hw_lock_bit_to)(hw_lock_bit_t * lock, unsigned int bit, uint32_t timeout LCK_GRP_ARG(lck_grp_t *grp))
 833 {
 834         _disable_preemption();
 835         return hw_lock_bit_to_internal(lock, bit, timeout LCK_GRP_ARG(grp));
 836 }
 837
 838 static unsigned int NOINLINE
 839 hw_lock_bit_to_contended(hw_lock_bit_t *lock, uint32_t mask, uint32_t timeout LCK_GRP_ARG(lck_grp_t *grp))
 840 {
 841         uint64_t        end = 0;
 842         int             i;
 843 #if CONFIG_DTRACE || LOCK_STATS
 844         uint64_t begin = 0;
 845         boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
 846 #endif /* CONFIG_DTRACE || LOCK_STATS */
 847
 848 #if LOCK_STATS || CONFIG_DTRACE
 849         if (__improbable(stat_enabled)) {
 850                 begin = mach_absolute_time();
 851         }
 852 #endif /* LOCK_STATS || CONFIG_DTRACE */
 853         for (;;) {
 854                 for (i = 0; i < LOCK_SNOOP_SPINS; i++) {
 855                         // Always load-exclusive before wfe
 856                         // This grabs the monitor and wakes up on a release event
 857                         if (hw_atomic_test_and_set32(lock, mask, mask, memory_order_acquire, TRUE)) {
 858                                 goto end;
 859                         }
 860                 }
 861                 if (end == 0) {
 862                         end = ml_get_timebase() + timeout;
 863                 } else if (ml_get_timebase() >= end) {
 864                         break;
 865                 }
 866         }
 867         return 0;
 868 end:
 869 #if CONFIG_DTRACE || LOCK_STATS
 870         if (__improbable(stat_enabled)) {
 871                 lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp), mach_absolute_time() - begin);
 872         }
 873         lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
 874 #endif /* CONFIG_DTRACE || LCK_GRP_STAT */
 875
 876         return 1;
 877 }
 878
 879 void
 880 (hw_lock_bit)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
 881 {
 882         if (hw_lock_bit_to(lock, bit, LOCK_PANIC_TIMEOUT, LCK_GRP_PROBEARG(grp))) {
 883                 return;
 884         }
 885         panic("hw_lock_bit(): timed out (%p)", lock);
 886 }
 887
 888 void
 889 (hw_lock_bit_nopreempt)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
 890 {
 891         if (__improbable(get_preemption_level() == 0)) {
 892                 panic("Attempt to take no-preempt bitlock %p in preemptible context", lock);
 893         }
 894         if (hw_lock_bit_to_internal(lock, bit, LOCK_PANIC_TIMEOUT LCK_GRP_ARG(grp))) {
 895                 return;
 896         }
 897         panic("hw_lock_bit_nopreempt(): timed out (%p)", lock);
 898 }
 899
 900 unsigned
 901 int
 902 (hw_lock_bit_try)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
 903 {
 904         uint32_t        mask = (1 << bit);
 905         boolean_t       success = FALSE;
 906
 907         _disable_preemption();
 908         // TODO: consider weak (non-looping) atomic test-and-set
 909         success = hw_atomic_test_and_set32(lock, mask, mask, memory_order_acquire, FALSE);
 910         if (!success) {
 911                 _enable_preemption();
 912         }
 913
 914         if (success) {
 915                 lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
 916         }
 917
 918         return success;
 919 }
 920
 921 static inline void
 922 hw_unlock_bit_internal(hw_lock_bit_t *lock, unsigned int bit)
 923 {
 924         uint32_t        mask = (1 << bit);
 925
 926         os_atomic_andnot(lock, mask, release);
 927 #if __arm__
 928         set_event();
 929 #endif
 930 #if CONFIG_DTRACE
 931         LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit);
 932 #endif
 933 }
 934
 935 /*
 936  *      Routine:        hw_unlock_bit
 937  *
 938  *              Release spin-lock. The second parameter is the bit number to test and set.
 939  *              Decrement the preemption level.
 940  */
 941 void
 942 hw_unlock_bit(hw_lock_bit_t * lock, unsigned int bit)
 943 {
 944         hw_unlock_bit_internal(lock, bit);
 945         _enable_preemption();
 946 }
 947
 948 void
 949 hw_unlock_bit_nopreempt(hw_lock_bit_t * lock, unsigned int bit)
 950 {
 951         if (__improbable(get_preemption_level() == 0)) {
 952                 panic("Attempt to release no-preempt bitlock %p in preemptible context", lock);
 953         }
 954         hw_unlock_bit_internal(lock, bit);
 955 }
 956
 957 /*
 958  * Routine:     lck_spin_sleep
 959  */
 960 wait_result_t
 961 lck_spin_sleep_grp(
 962         lck_spin_t              *lck,
 963         lck_sleep_action_t      lck_sleep_action,
 964         event_t                 event,
 965         wait_interrupt_t        interruptible,
 966         lck_grp_t               *grp)
 967 {
 968         wait_result_t   res;
 969
 970         if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
 971                 panic("Invalid lock sleep action %x\n", lck_sleep_action);
 972         }
 973
 974         res = assert_wait(event, interruptible);
 975         if (res == THREAD_WAITING) {
 976                 lck_spin_unlock(lck);
 977                 res = thread_block(THREAD_CONTINUE_NULL);
 978                 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
 979                         lck_spin_lock_grp(lck, grp);
 980                 }
 981         } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
 982                 lck_spin_unlock(lck);
 983         }
 984
 985         return res;
 986 }
 987
 988 wait_result_t
 989 lck_spin_sleep(
 990         lck_spin_t              *lck,
 991         lck_sleep_action_t      lck_sleep_action,
 992         event_t                 event,
 993         wait_interrupt_t        interruptible)
 994 {
 995         return lck_spin_sleep_grp(lck, lck_sleep_action, event, interruptible, LCK_GRP_NULL);
 996 }
 997
 998 /*
 999  * Routine:     lck_spin_sleep_deadline
1000  */
1001 wait_result_t
1002 lck_spin_sleep_deadline(
1003         lck_spin_t              *lck,
1004         lck_sleep_action_t      lck_sleep_action,
1005         event_t                 event,
1006         wait_interrupt_t        interruptible,
1007         uint64_t                deadline)
1008 {
1009         wait_result_t   res;
1010
1011         if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1012                 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1013         }
1014
1015         res = assert_wait_deadline(event, interruptible, deadline);
1016         if (res == THREAD_WAITING) {
1017                 lck_spin_unlock(lck);
1018                 res = thread_block(THREAD_CONTINUE_NULL);
1019                 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1020                         lck_spin_lock(lck);
1021                 }
1022         } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1023                 lck_spin_unlock(lck);
1024         }
1025
1026         return res;
1027 }
1028
1029 /*
1030  * Routine:     lck_mtx_sleep
1031  */
1032 wait_result_t
1033 lck_mtx_sleep(
1034         lck_mtx_t               *lck,
1035         lck_sleep_action_t      lck_sleep_action,
1036         event_t                 event,
1037         wait_interrupt_t        interruptible)
1038 {
1039         wait_result_t   res;
1040         thread_t                thread = current_thread();
1041
1042         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
1043             VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1044
1045         if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1046                 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1047         }
1048
1049         if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1050                 /*
1051                  * We overload the RW lock promotion to give us a priority ceiling
1052                  * during the time that this thread is asleep, so that when it
1053                  * is re-awakened (and not yet contending on the mutex), it is
1054                  * runnable at a reasonably high priority.
1055                  */
1056                 thread->rwlock_count++;
1057         }
1058
1059         res = assert_wait(event, interruptible);
1060         if (res == THREAD_WAITING) {
1061                 lck_mtx_unlock(lck);
1062                 res = thread_block(THREAD_CONTINUE_NULL);
1063                 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1064                         if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1065                                 lck_mtx_lock_spin(lck);
1066                         } else if ((lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS)) {
1067                                 lck_mtx_lock_spin_always(lck);
1068                         } else {
1069                                 lck_mtx_lock(lck);
1070                         }
1071                 }
1072         } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1073                 lck_mtx_unlock(lck);
1074         }
1075
1076         if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1077                 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1078                         /* sched_flags checked without lock, but will be rechecked while clearing */
1079                         lck_rw_clear_promotion(thread, unslide_for_kdebug(event));
1080                 }
1081         }
1082
1083         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1084
1085         return res;
1086 }
1087
1088
1089 /*
1090  * Routine:     lck_mtx_sleep_deadline
1091  */
1092 wait_result_t
1093 lck_mtx_sleep_deadline(
1094         lck_mtx_t               *lck,
1095         lck_sleep_action_t      lck_sleep_action,
1096         event_t                 event,
1097         wait_interrupt_t        interruptible,
1098         uint64_t                deadline)
1099 {
1100         wait_result_t   res;
1101         thread_t                thread = current_thread();
1102
1103         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
1104             VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
1105
1106         if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1107                 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1108         }
1109
1110         if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1111                 /*
1112                  * See lck_mtx_sleep().
1113                  */
1114                 thread->rwlock_count++;
1115         }
1116
1117         res = assert_wait_deadline(event, interruptible, deadline);
1118         if (res == THREAD_WAITING) {
1119                 lck_mtx_unlock(lck);
1120                 res = thread_block(THREAD_CONTINUE_NULL);
1121                 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1122                         if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1123                                 lck_mtx_lock_spin(lck);
1124                         } else {
1125                                 lck_mtx_lock(lck);
1126                         }
1127                 }
1128         } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1129                 lck_mtx_unlock(lck);
1130         }
1131
1132         if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1133                 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1134                         /* sched_flags checked without lock, but will be rechecked while clearing */
1135                         lck_rw_clear_promotion(thread, unslide_for_kdebug(event));
1136                 }
1137         }
1138
1139         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
1140
1141         return res;
1142 }
1143
1144 /*
1145  * Lock Boosting Invariants:
1146  *
1147  * The lock owner is always promoted to the max priority of all its waiters.
1148  * Max priority is capped at MAXPRI_PROMOTE.
1149  *
1150  * The last waiter is not given a promotion when it wakes up or acquires the lock.
1151  * When the last waiter is waking up, a new contender can always come in and
1152  * steal the lock without having to wait for the last waiter to make forward progress.
1153  */
1154
1155 /*
1156  * Routine: lck_mtx_lock_wait
1157  *
1158  * Invoked in order to wait on contention.
1159  *
1160  * Called with the interlock locked and
1161  * returns it unlocked.
1162  *
1163  * Always aggressively sets the owning thread to promoted,
1164  * even if it's the same or higher priority
1165  * This prevents it from lowering its own priority while holding a lock
1166  *
1167  * TODO: Come up with a more efficient way to handle same-priority promotions
1168  *      <rdar://problem/30737670> ARM mutex contention logic could avoid taking the thread lock
1169  */
1170 void
1171 lck_mtx_lock_wait(
1172         lck_mtx_t                       *lck,
1173         thread_t                        holder,
1174         struct turnstile                **ts)
1175 {
1176         thread_t                thread = current_thread();
1177         lck_mtx_t               *mutex;
1178         __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
1179
1180 #if     CONFIG_DTRACE
1181         uint64_t                sleep_start = 0;
1182
1183         if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
1184                 sleep_start = mach_absolute_time();
1185         }
1186 #endif
1187
1188         if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
1189                 mutex = lck;
1190         } else {
1191                 mutex = &lck->lck_mtx_ptr->lck_mtx;
1192         }
1193
1194         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
1195             trace_lck, (uintptr_t)thread_tid(thread), 0, 0, 0);
1196
1197         assert(thread->waiting_for_mutex == NULL);
1198         thread->waiting_for_mutex = mutex;
1199         mutex->lck_mtx_waiters++;
1200
1201         if (*ts == NULL) {
1202                 *ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1203         }
1204
1205         struct turnstile *turnstile = *ts;
1206         thread_set_pending_block_hint(thread, kThreadWaitKernelMutex);
1207         turnstile_update_inheritor(turnstile, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1208
1209         waitq_assert_wait64(&turnstile->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_UNINT | THREAD_WAIT_NOREPORT_USER, TIMEOUT_WAIT_FOREVER);
1210
1211         lck_mtx_ilk_unlock(mutex);
1212
1213         turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
1214
1215         thread_block(THREAD_CONTINUE_NULL);
1216
1217         thread->waiting_for_mutex = NULL;
1218
1219         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
1220 #if     CONFIG_DTRACE
1221         /*
1222          * Record the DTrace lockstat probe for blocking, block time
1223          * measured from when we were entered.
1224          */
1225         if (sleep_start) {
1226                 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
1227                         LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, lck,
1228                             mach_absolute_time() - sleep_start);
1229                 } else {
1230                         LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, lck,
1231                             mach_absolute_time() - sleep_start);
1232                 }
1233         }
1234 #endif
1235 }
1236
1237 /*
1238  * Routine:     lck_mtx_lock_acquire
1239  *
1240  * Invoked on acquiring the mutex when there is
1241  * contention.
1242  *
1243  * Returns the current number of waiters.
1244  *
1245  * Called with the interlock locked.
1246  */
1247 int
1248 lck_mtx_lock_acquire(
1249         lck_mtx_t               *lck,
1250         struct turnstile        *ts)
1251 {
1252         thread_t                thread = current_thread();
1253         lck_mtx_t               *mutex;
1254
1255         if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
1256                 mutex = lck;
1257         } else {
1258                 mutex = &lck->lck_mtx_ptr->lck_mtx;
1259         }
1260
1261         assert(thread->waiting_for_mutex == NULL);
1262
1263         if (mutex->lck_mtx_waiters > 0) {
1264                 if (ts == NULL) {
1265                         ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1266                 }
1267
1268                 turnstile_update_inheritor(ts, thread, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
1269                 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1270         }
1271
1272         if (ts != NULL) {
1273                 turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
1274         }
1275
1276         return mutex->lck_mtx_waiters;
1277 }
1278
1279 /*
1280  * Routine:     lck_mtx_unlock_wakeup
1281  *
1282  * Invoked on unlock when there is contention.
1283  *
1284  * Called with the interlock locked.
1285  *
1286  * NOTE: callers should call turnstile_clenup after
1287  * dropping the interlock.
1288  */
1289 boolean_t
1290 lck_mtx_unlock_wakeup(
1291         lck_mtx_t                       *lck,
1292         thread_t                        holder)
1293 {
1294         thread_t                thread = current_thread();
1295         lck_mtx_t               *mutex;
1296         __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
1297         struct turnstile *ts;
1298         kern_return_t did_wake;
1299
1300         if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
1301                 mutex = lck;
1302         } else {
1303                 mutex = &lck->lck_mtx_ptr->lck_mtx;
1304         }
1305
1306         if (thread != holder) {
1307                 panic("lck_mtx_unlock_wakeup: mutex %p holder %p\n", mutex, holder);
1308         }
1309
1310         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START,
1311             trace_lck, (uintptr_t)thread_tid(thread), 0, 0, 0);
1312
1313         assert(mutex->lck_mtx_waiters > 0);
1314         assert(thread->waiting_for_mutex == NULL);
1315
1316         ts = turnstile_prepare((uintptr_t)mutex, NULL, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
1317
1318         if (mutex->lck_mtx_waiters > 1) {
1319                 /* WAITQ_PROMOTE_ON_WAKE will call turnstile_update_inheritor on the wokenup thread */
1320                 did_wake = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE);
1321         } else {
1322                 did_wake = waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(LCK_MTX_EVENT(mutex)), THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1323                 turnstile_update_inheritor(ts, NULL, TURNSTILE_IMMEDIATE_UPDATE);
1324         }
1325         assert(did_wake == KERN_SUCCESS);
1326
1327         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
1328         turnstile_complete((uintptr_t)mutex, NULL, NULL, TURNSTILE_KERNEL_MUTEX);
1329
1330         mutex->lck_mtx_waiters--;
1331
1332         KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
1333
1334         return mutex->lck_mtx_waiters > 0;
1335 }
1336
1337 /*
1338  * Routine:     mutex_pause
1339  *
1340  * Called by former callers of simple_lock_pause().
1341  */
1342 #define MAX_COLLISION_COUNTS    32
1343 #define MAX_COLLISION   8
1344
1345 unsigned int max_collision_count[MAX_COLLISION_COUNTS];
1346
1347 uint32_t collision_backoffs[MAX_COLLISION] = {
1348         10, 50, 100, 200, 400, 600, 800, 1000
1349 };
1350
1351
1352 void
1353 mutex_pause(uint32_t collisions)
1354 {
1355         wait_result_t wait_result;
1356         uint32_t        back_off;
1357
1358         if (collisions >= MAX_COLLISION_COUNTS) {
1359                 collisions = MAX_COLLISION_COUNTS - 1;
1360         }
1361         max_collision_count[collisions]++;
1362
1363         if (collisions >= MAX_COLLISION) {
1364                 collisions = MAX_COLLISION - 1;
1365         }
1366         back_off = collision_backoffs[collisions];
1367
1368         wait_result = assert_wait_timeout((event_t)mutex_pause, THREAD_UNINT, back_off, NSEC_PER_USEC);
1369         assert(wait_result == THREAD_WAITING);
1370
1371         wait_result = thread_block(THREAD_CONTINUE_NULL);
1372         assert(wait_result == THREAD_TIMED_OUT);
1373 }
1374
1375
1376 unsigned int mutex_yield_wait = 0;
1377 unsigned int mutex_yield_no_wait = 0;
1378
1379 void
1380 lck_mtx_yield(
1381         lck_mtx_t   *lck)
1382 {
1383         int     waiters;
1384
1385 #if DEBUG
1386         lck_mtx_assert(lck, LCK_MTX_ASSERT_OWNED);
1387 #endif /* DEBUG */
1388
1389         if (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT) {
1390                 waiters = lck->lck_mtx_ptr->lck_mtx.lck_mtx_waiters;
1391         } else {
1392                 waiters = lck->lck_mtx_waiters;
1393         }
1394
1395         if (!waiters) {
1396                 mutex_yield_no_wait++;
1397         } else {
1398                 mutex_yield_wait++;
1399                 lck_mtx_unlock(lck);
1400                 mutex_pause(0);
1401                 lck_mtx_lock(lck);
1402         }
1403 }
1404
1405
1406 /*
1407  * Routine:     lck_rw_sleep
1408  */
1409 wait_result_t
1410 lck_rw_sleep(
1411         lck_rw_t                *lck,
1412         lck_sleep_action_t      lck_sleep_action,
1413         event_t                 event,
1414         wait_interrupt_t        interruptible)
1415 {
1416         wait_result_t   res;
1417         lck_rw_type_t   lck_rw_type;
1418         thread_t                thread = current_thread();
1419
1420         if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1421                 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1422         }
1423
1424         if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1425                 /*
1426                  * Although we are dropping the RW lock, the intent in most cases
1427                  * is that this thread remains as an observer, since it may hold
1428                  * some secondary resource, but must yield to avoid deadlock. In
1429                  * this situation, make sure that the thread is boosted to the
1430                  * RW lock ceiling while blocked, so that it can re-acquire the
1431                  * RW lock at that priority.
1432                  */
1433                 thread->rwlock_count++;
1434         }
1435
1436         res = assert_wait(event, interruptible);
1437         if (res == THREAD_WAITING) {
1438                 lck_rw_type = lck_rw_done(lck);
1439                 res = thread_block(THREAD_CONTINUE_NULL);
1440                 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1441                         if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
1442                                 lck_rw_lock(lck, lck_rw_type);
1443                         } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
1444                                 lck_rw_lock_exclusive(lck);
1445                         } else {
1446                                 lck_rw_lock_shared(lck);
1447                         }
1448                 }
1449         } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1450                 (void)lck_rw_done(lck);
1451         }
1452
1453         if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1454                 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1455                         /* sched_flags checked without lock, but will be rechecked while clearing */
1456
1457                         /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */
1458                         assert(lck_sleep_action & LCK_SLEEP_UNLOCK);
1459
1460                         lck_rw_clear_promotion(thread, unslide_for_kdebug(event));
1461                 }
1462         }
1463
1464         return res;
1465 }
1466
1467
1468 /*
1469  * Routine:     lck_rw_sleep_deadline
1470  */
1471 wait_result_t
1472 lck_rw_sleep_deadline(
1473         lck_rw_t                *lck,
1474         lck_sleep_action_t      lck_sleep_action,
1475         event_t                 event,
1476         wait_interrupt_t        interruptible,
1477         uint64_t                deadline)
1478 {
1479         wait_result_t   res;
1480         lck_rw_type_t   lck_rw_type;
1481         thread_t                thread = current_thread();
1482
1483         if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
1484                 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1485         }
1486
1487         if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1488                 thread->rwlock_count++;
1489         }
1490
1491         res = assert_wait_deadline(event, interruptible, deadline);
1492         if (res == THREAD_WAITING) {
1493                 lck_rw_type = lck_rw_done(lck);
1494                 res = thread_block(THREAD_CONTINUE_NULL);
1495                 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1496                         if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
1497                                 lck_rw_lock(lck, lck_rw_type);
1498                         } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
1499                                 lck_rw_lock_exclusive(lck);
1500                         } else {
1501                                 lck_rw_lock_shared(lck);
1502                         }
1503                 }
1504         } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1505                 (void)lck_rw_done(lck);
1506         }
1507
1508         if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1509                 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1510                         /* sched_flags checked without lock, but will be rechecked while clearing */
1511
1512                         /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */
1513                         assert(lck_sleep_action & LCK_SLEEP_UNLOCK);
1514
1515                         lck_rw_clear_promotion(thread, unslide_for_kdebug(event));
1516                 }
1517         }
1518
1519         return res;
1520 }
1521
1522 /*
1523  * Reader-writer lock promotion
1524  *
1525  * We support a limited form of reader-writer
1526  * lock promotion whose effects are:
1527  *
1528  *   * Qualifying threads have decay disabled
1529  *   * Scheduler priority is reset to a floor of
1530  *     of their statically assigned priority
1531  *     or MINPRI_RWLOCK
1532  *
1533  * The rationale is that lck_rw_ts do not have
1534  * a single owner, so we cannot apply a directed
1535  * priority boost from all waiting threads
1536  * to all holding threads without maintaining
1537  * lists of all shared owners and all waiting
1538  * threads for every lock.
1539  *
1540  * Instead (and to preserve the uncontended fast-
1541  * path), acquiring (or attempting to acquire)
1542  * a RW lock in shared or exclusive lock increments
1543  * a per-thread counter. Only if that thread stops
1544  * making forward progress (for instance blocking
1545  * on a mutex, or being preempted) do we consult
1546  * the counter and apply the priority floor.
1547  * When the thread becomes runnable again (or in
1548  * the case of preemption it never stopped being
1549  * runnable), it has the priority boost and should
1550  * be in a good position to run on the CPU and
1551  * release all RW locks (at which point the priority
1552  * boost is cleared).
1553  *
1554  * Care must be taken to ensure that priority
1555  * boosts are not retained indefinitely, since unlike
1556  * mutex priority boosts (where the boost is tied
1557  * to the mutex lifecycle), the boost is tied
1558  * to the thread and independent of any particular
1559  * lck_rw_t. Assertions are in place on return
1560  * to userspace so that the boost is not held
1561  * indefinitely.
1562  *
1563  * The routines that increment/decrement the
1564  * per-thread counter should err on the side of
1565  * incrementing any time a preemption is possible
1566  * and the lock would be visible to the rest of the
1567  * system as held (so it should be incremented before
1568  * interlocks are dropped/preemption is enabled, or
1569  * before a CAS is executed to acquire the lock).
1570  *
1571  */
1572
1573 /*
1574  * lck_rw_clear_promotion: Undo priority promotions when the last RW
1575  * lock is released by a thread (if a promotion was active)
1576  */
1577 void
1578 lck_rw_clear_promotion(thread_t thread, uintptr_t trace_obj)
1579 {
1580         assert(thread->rwlock_count == 0);
1581
1582         /* Cancel any promotions if the thread had actually blocked while holding a RW lock */
1583         spl_t s = splsched();
1584         thread_lock(thread);
1585
1586         if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
1587                 sched_thread_unpromote_reason(thread, TH_SFLAG_RW_PROMOTED, trace_obj);
1588         }
1589
1590         thread_unlock(thread);
1591         splx(s);
1592 }
1593
1594 /*
1595  * Callout from context switch if the thread goes
1596  * off core with a positive rwlock_count
1597  *
1598  * Called at splsched with the thread locked
1599  */
1600 void
1601 lck_rw_set_promotion_locked(thread_t thread)
1602 {
1603         if (LcksOpts & disLkRWPrio) {
1604                 return;
1605         }
1606
1607         assert(thread->rwlock_count > 0);
1608
1609         if (!(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1610                 sched_thread_promote_reason(thread, TH_SFLAG_RW_PROMOTED, 0);
1611         }
1612 }
1613
1614 kern_return_t
1615 host_lockgroup_info(
1616         host_t                                  host,
1617         lockgroup_info_array_t  *lockgroup_infop,
1618         mach_msg_type_number_t  *lockgroup_infoCntp)
1619 {
1620         lockgroup_info_t        *lockgroup_info_base;
1621         lockgroup_info_t        *lockgroup_info;
1622         vm_offset_t                     lockgroup_info_addr;
1623         vm_size_t                       lockgroup_info_size;
1624         vm_size_t                       lockgroup_info_vmsize;
1625         lck_grp_t                       *lck_grp;
1626         unsigned int            i;
1627         vm_map_copy_t           copy;
1628         kern_return_t           kr;
1629
1630         if (host == HOST_NULL) {
1631                 return KERN_INVALID_HOST;
1632         }
1633
1634         lck_mtx_lock(&lck_grp_lock);
1635
1636         lockgroup_info_size = lck_grp_cnt * sizeof(*lockgroup_info);
1637         lockgroup_info_vmsize = round_page(lockgroup_info_size);
1638         kr = kmem_alloc_pageable(ipc_kernel_map,
1639             &lockgroup_info_addr, lockgroup_info_vmsize, VM_KERN_MEMORY_IPC);
1640         if (kr != KERN_SUCCESS) {
1641                 lck_mtx_unlock(&lck_grp_lock);
1642                 return kr;
1643         }
1644
1645         lockgroup_info_base = (lockgroup_info_t *) lockgroup_info_addr;
1646         lck_grp = (lck_grp_t *)queue_first(&lck_grp_queue);
1647         lockgroup_info = lockgroup_info_base;
1648
1649         for (i = 0; i < lck_grp_cnt; i++) {
1650                 lockgroup_info->lock_spin_cnt = lck_grp->lck_grp_spincnt;
1651                 lockgroup_info->lock_rw_cnt = lck_grp->lck_grp_rwcnt;
1652                 lockgroup_info->lock_mtx_cnt = lck_grp->lck_grp_mtxcnt;
1653
1654 #if LOCK_STATS
1655                 lockgroup_info->lock_spin_held_cnt = lck_grp->lck_grp_stats.lgss_spin_held.lgs_count;
1656                 lockgroup_info->lock_spin_miss_cnt = lck_grp->lck_grp_stats.lgss_spin_miss.lgs_count;
1657 #endif /* LOCK_STATS */
1658
1659                 // Historically on x86, held was used for "direct wait" and util for "held"
1660                 lockgroup_info->lock_mtx_util_cnt = lck_grp->lck_grp_stats.lgss_mtx_held.lgs_count;
1661                 lockgroup_info->lock_mtx_held_cnt = lck_grp->lck_grp_stats.lgss_mtx_direct_wait.lgs_count;
1662                 lockgroup_info->lock_mtx_miss_cnt = lck_grp->lck_grp_stats.lgss_mtx_miss.lgs_count;
1663                 lockgroup_info->lock_mtx_wait_cnt = lck_grp->lck_grp_stats.lgss_mtx_wait.lgs_count;
1664
1665                 (void) strncpy(lockgroup_info->lockgroup_name, lck_grp->lck_grp_name, LOCKGROUP_MAX_NAME);
1666
1667                 lck_grp = (lck_grp_t *)(queue_next((queue_entry_t)(lck_grp)));
1668                 lockgroup_info++;
1669         }
1670
1671         *lockgroup_infoCntp = lck_grp_cnt;
1672         lck_mtx_unlock(&lck_grp_lock);
1673
1674         if (lockgroup_info_size != lockgroup_info_vmsize) {
1675                 bzero((char *)lockgroup_info, lockgroup_info_vmsize - lockgroup_info_size);
1676         }
1677
1678         kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)lockgroup_info_addr,
1679             (vm_map_size_t)lockgroup_info_size, TRUE, &copy);
1680         assert(kr == KERN_SUCCESS);
1681
1682         *lockgroup_infop = (lockgroup_info_t *) copy;
1683
1684         return KERN_SUCCESS;
1685 }
1686
1687 /*
1688  * sleep_with_inheritor and wakeup_with_inheritor KPI
1689  *
1690  * Functions that allow to sleep on an event and use turnstile to propagate the priority of the sleeping threads to
1691  * the latest thread specified as inheritor.
1692  *
1693  * The inheritor management is delegated to the caller, the caller needs to store a thread identifier to provide to this functions to specified upon whom
1694  * direct the push. The inheritor cannot run in user space while holding a push from an event. Therefore is the caller responsibility to call a
1695  * wakeup_with_inheritor from inheritor before running in userspace or specify another inheritor before letting the old inheritor run in userspace.
1696  *
1697  * sleep_with_inheritor requires to hold a locking primitive while invoked, but wakeup_with_inheritor and change_sleep_inheritor don't require it.
1698  *
1699  * Turnstile requires a non blocking primitive as interlock to synchronize the turnstile data structure manipulation, threfore sleep_with_inheritor, change_sleep_inheritor and
1700  * wakeup_with_inheritor will require the same interlock to manipulate turnstiles.
1701  * If sleep_with_inheritor is associated with a locking primitive that can block (like lck_mtx_t or lck_rw_t), an handoff to a non blocking primitive is required before
1702  * invoking any turnstile operation.
1703  *
1704  * All functions will save the turnstile associated with the event on the turnstile kernel hash table and will use the the turnstile kernel hash table bucket
1705  * spinlock as the turnstile interlock. Because we do not want to hold interrupt disabled while holding the bucket interlock a new turnstile kernel hash table
1706  * is instantiated for this KPI to manage the hash without interrupt disabled.
1707  * Also:
1708  * - all events on the system that hash on the same bucket will contend on the same spinlock.
1709  * - every event will have a dedicated wait_queue.
1710  *
1711  * Different locking primitives can be associated with sleep_with_inheritor as long as the primitive_lock() and primitive_unlock() functions are provided to
1712  * sleep_with_inheritor_turnstile to perform the handoff with the bucket spinlock.
1713  */
1714
1715 kern_return_t
1716 wakeup_with_inheritor_and_turnstile_type(event_t event, turnstile_type_t type, wait_result_t result, bool wake_one, lck_wake_action_t action, thread_t *thread_wokenup)
1717 {
1718         uint32_t index;
1719         struct turnstile *ts = NULL;
1720         kern_return_t ret = KERN_NOT_WAITING;
1721         int priority;
1722         thread_t wokeup;
1723
1724         /*
1725          * the hash bucket spinlock is used as turnstile interlock
1726          */
1727         turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1728
1729         ts = turnstile_prepare((uintptr_t)event, NULL, TURNSTILE_NULL, type);
1730
1731         if (wake_one) {
1732                 if (action == LCK_WAKE_DEFAULT) {
1733                         priority = WAITQ_PROMOTE_ON_WAKE;
1734                 } else {
1735                         assert(action == LCK_WAKE_DO_NOT_TRANSFER_PUSH);
1736                         priority = WAITQ_ALL_PRIORITIES;
1737                 }
1738
1739                 /*
1740                  * WAITQ_PROMOTE_ON_WAKE will call turnstile_update_inheritor
1741                  * if it finds a thread
1742                  */
1743                 wokeup = waitq_wakeup64_identify(&ts->ts_waitq, CAST_EVENT64_T(event), result, priority);
1744                 if (wokeup != NULL) {
1745                         if (thread_wokenup != NULL) {
1746                                 *thread_wokenup = wokeup;
1747                         } else {
1748                                 thread_deallocate_safe(wokeup);
1749                         }
1750                         ret = KERN_SUCCESS;
1751                         if (action == LCK_WAKE_DO_NOT_TRANSFER_PUSH) {
1752                                 goto complete;
1753                         }
1754                 } else {
1755                         if (thread_wokenup != NULL) {
1756                                 *thread_wokenup = NULL;
1757                         }
1758                         turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
1759                         ret = KERN_NOT_WAITING;
1760                 }
1761         } else {
1762                 ret = waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(event), result, WAITQ_ALL_PRIORITIES);
1763                 turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
1764         }
1765
1766         /*
1767          * turnstile_update_inheritor_complete could be called while holding the interlock.
1768          * In this case the new inheritor or is null, or is a thread that is just been woken up
1769          * and have not blocked because it is racing with the same interlock used here
1770          * after the wait.
1771          * So there is no chain to update for the new inheritor.
1772          *
1773          * However unless the current thread is the old inheritor,
1774          * old inheritor can be blocked and requires a chain update.
1775          *
1776          * The chain should be short because kernel turnstiles cannot have user turnstiles
1777          * chained after them.
1778          *
1779          * We can anyway optimize this by asking turnstile to tell us
1780          * if old inheritor needs an update and drop the lock
1781          * just in that case.
1782          */
1783         turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1784
1785         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1786
1787         turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1788
1789 complete:
1790         turnstile_complete((uintptr_t)event, NULL, NULL, type);
1791
1792         turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1793
1794         turnstile_cleanup();
1795
1796         return ret;
1797 }
1798
1799 static wait_result_t
1800 sleep_with_inheritor_and_turnstile_type(event_t event,
1801     thread_t inheritor,
1802     wait_interrupt_t interruptible,
1803     uint64_t deadline,
1804     turnstile_type_t type,
1805     void (^primitive_lock)(void),
1806     void (^primitive_unlock)(void))
1807 {
1808         wait_result_t ret;
1809         uint32_t index;
1810         struct turnstile *ts = NULL;
1811
1812         /*
1813          * the hash bucket spinlock is used as turnstile interlock,
1814          * lock it before releasing the primitive lock
1815          */
1816         turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1817
1818         primitive_unlock();
1819
1820         ts = turnstile_prepare((uintptr_t)event, NULL, TURNSTILE_NULL, type);
1821
1822         thread_set_pending_block_hint(current_thread(), kThreadWaitSleepWithInheritor);
1823         /*
1824          * We need TURNSTILE_DELAYED_UPDATE because we will call
1825          * waitq_assert_wait64 after.
1826          */
1827         turnstile_update_inheritor(ts, inheritor, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
1828
1829         ret = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(event), interruptible, deadline);
1830
1831         turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1832
1833         /*
1834          * Update new and old inheritor chains outside the interlock;
1835          */
1836         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1837
1838         if (ret == THREAD_WAITING) {
1839                 ret = thread_block(THREAD_CONTINUE_NULL);
1840         }
1841
1842         turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1843
1844         turnstile_complete((uintptr_t)event, NULL, NULL, type);
1845
1846         turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1847
1848         turnstile_cleanup();
1849
1850         primitive_lock();
1851
1852         return ret;
1853 }
1854
1855 kern_return_t
1856 change_sleep_inheritor_and_turnstile_type(event_t event,
1857     thread_t inheritor,
1858     turnstile_type_t type)
1859 {
1860         uint32_t index;
1861         struct turnstile *ts = NULL;
1862         kern_return_t ret =  KERN_SUCCESS;
1863         /*
1864          * the hash bucket spinlock is used as turnstile interlock
1865          */
1866         turnstile_hash_bucket_lock((uintptr_t)event, &index, type);
1867
1868         ts = turnstile_prepare((uintptr_t)event, NULL, TURNSTILE_NULL, type);
1869
1870         if (!turnstile_has_waiters(ts)) {
1871                 ret = KERN_NOT_WAITING;
1872         }
1873
1874         /*
1875          * We will not call an assert_wait later so use TURNSTILE_IMMEDIATE_UPDATE
1876          */
1877         turnstile_update_inheritor(ts, inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
1878
1879         turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1880
1881         /*
1882          * update the chains outside the interlock
1883          */
1884         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
1885
1886         turnstile_hash_bucket_lock((uintptr_t)NULL, &index, type);
1887
1888         turnstile_complete((uintptr_t)event, NULL, NULL, type);
1889
1890         turnstile_hash_bucket_unlock((uintptr_t)NULL, &index, type, 0);
1891
1892         turnstile_cleanup();
1893
1894         return ret;
1895 }
1896
1897 typedef void (^void_block_void)(void);
1898
1899 /*
1900  * sleep_with_inheritor functions with lck_mtx_t as locking primitive.
1901  */
1902
1903 wait_result_t
1904 lck_mtx_sleep_with_inheritor_and_turnstile_type(lck_mtx_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline, turnstile_type_t type)
1905 {
1906         LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
1907
1908         if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1909                 return sleep_with_inheritor_and_turnstile_type(event,
1910                            inheritor,
1911                            interruptible,
1912                            deadline,
1913                            type,
1914                            ^{;},
1915                            ^{lck_mtx_unlock(lock);});
1916         } else if (lck_sleep_action & LCK_SLEEP_SPIN) {
1917                 return sleep_with_inheritor_and_turnstile_type(event,
1918                            inheritor,
1919                            interruptible,
1920                            deadline,
1921                            type,
1922                            ^{lck_mtx_lock_spin(lock);},
1923                            ^{lck_mtx_unlock(lock);});
1924         } else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
1925                 return sleep_with_inheritor_and_turnstile_type(event,
1926                            inheritor,
1927                            interruptible,
1928                            deadline,
1929                            type,
1930                            ^{lck_mtx_lock_spin_always(lock);},
1931                            ^{lck_mtx_unlock(lock);});
1932         } else {
1933                 return sleep_with_inheritor_and_turnstile_type(event,
1934                            inheritor,
1935                            interruptible,
1936                            deadline,
1937                            type,
1938                            ^{lck_mtx_lock(lock);},
1939                            ^{lck_mtx_unlock(lock);});
1940         }
1941 }
1942
1943 /*
1944  * Name: lck_spin_sleep_with_inheritor
1945  *
1946  * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
1947  *              While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
1948  *              be directed to the inheritor specified.
1949  *              An interruptible mode and deadline can be specified to return earlier from the wait.
1950  *
1951  * Args:
1952  *   Arg1: lck_spin_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
1953  *   Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK.
1954  *   Arg3: event to wait on.
1955  *   Arg4: thread to propagate the event push to.
1956  *   Arg5: interruptible flag for wait.
1957  *   Arg6: deadline for wait.
1958  *
1959  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
1960  *             Lock will be dropped while waiting.
1961  *             The inheritor specified cannot run in user space until another inheritor is specified for the event or a
1962  *             wakeup for the event is called.
1963  *
1964  * Returns: result of the wait.
1965  */
1966 wait_result_t
1967 lck_spin_sleep_with_inheritor(
1968         lck_spin_t *lock,
1969         lck_sleep_action_t lck_sleep_action,
1970         event_t event,
1971         thread_t inheritor,
1972         wait_interrupt_t interruptible,
1973         uint64_t deadline)
1974 {
1975         if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1976                 return sleep_with_inheritor_and_turnstile_type(event, inheritor,
1977                            interruptible, deadline, TURNSTILE_SLEEP_INHERITOR,
1978                            ^{}, ^{ lck_spin_unlock(lock); });
1979         } else {
1980                 return sleep_with_inheritor_and_turnstile_type(event, inheritor,
1981                            interruptible, deadline, TURNSTILE_SLEEP_INHERITOR,
1982                            ^{ lck_spin_lock(lock); }, ^{ lck_spin_unlock(lock); });
1983         }
1984 }
1985
1986 /*
1987  * Name: lck_mtx_sleep_with_inheritor
1988  *
1989  * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
1990  *              While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
1991  *              be directed to the inheritor specified.
1992  *              An interruptible mode and deadline can be specified to return earlier from the wait.
1993  *
1994  * Args:
1995  *   Arg1: lck_mtx_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
1996  *   Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK, LCK_SLEEP_SPIN, LCK_SLEEP_SPIN_ALWAYS.
1997  *   Arg3: event to wait on.
1998  *   Arg4: thread to propagate the event push to.
1999  *   Arg5: interruptible flag for wait.
2000  *   Arg6: deadline for wait.
2001  *
2002  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2003  *             Lock will be dropped while waiting.
2004  *             The inheritor specified cannot run in user space until another inheritor is specified for the event or a
2005  *             wakeup for the event is called.
2006  *
2007  * Returns: result of the wait.
2008  */
2009 wait_result_t
2010 lck_mtx_sleep_with_inheritor(lck_mtx_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline)
2011 {
2012         return lck_mtx_sleep_with_inheritor_and_turnstile_type(lock, lck_sleep_action, event, inheritor, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
2013 }
2014
2015 /*
2016  * sleep_with_inheritor functions with lck_rw_t as locking primitive.
2017  */
2018
2019 wait_result_t
2020 lck_rw_sleep_with_inheritor_and_turnstile_type(lck_rw_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline, turnstile_type_t type)
2021 {
2022         __block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
2023
2024         LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2025
2026         if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2027                 return sleep_with_inheritor_and_turnstile_type(event,
2028                            inheritor,
2029                            interruptible,
2030                            deadline,
2031                            type,
2032                            ^{;},
2033                            ^{lck_rw_type = lck_rw_done(lock);});
2034         } else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2035                 return sleep_with_inheritor_and_turnstile_type(event,
2036                            inheritor,
2037                            interruptible,
2038                            deadline,
2039                            type,
2040                            ^{lck_rw_lock(lock, lck_rw_type);},
2041                            ^{lck_rw_type = lck_rw_done(lock);});
2042         } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2043                 return sleep_with_inheritor_and_turnstile_type(event,
2044                            inheritor,
2045                            interruptible,
2046                            deadline,
2047                            type,
2048                            ^{lck_rw_lock_exclusive(lock);},
2049                            ^{lck_rw_type = lck_rw_done(lock);});
2050         } else {
2051                 return sleep_with_inheritor_and_turnstile_type(event,
2052                            inheritor,
2053                            interruptible,
2054                            deadline,
2055                            type,
2056                            ^{lck_rw_lock_shared(lock);},
2057                            ^{lck_rw_type = lck_rw_done(lock);});
2058         }
2059 }
2060
2061 /*
2062  * Name: lck_rw_sleep_with_inheritor
2063  *
2064  * Description: deschedule the current thread and wait on the waitq associated with event to be woken up.
2065  *              While waiting, the sched priority of the waiting thread will contribute to the push of the event that will
2066  *              be directed to the inheritor specified.
2067  *              An interruptible mode and deadline can be specified to return earlier from the wait.
2068  *
2069  * Args:
2070  *   Arg1: lck_rw_t lock used to protect the sleep. The lock will be dropped while sleeping and reaquired before returning according to the sleep action specified.
2071  *   Arg2: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_SHARED, LCK_SLEEP_EXCLUSIVE.
2072  *   Arg3: event to wait on.
2073  *   Arg4: thread to propagate the event push to.
2074  *   Arg5: interruptible flag for wait.
2075  *   Arg6: deadline for wait.
2076  *
2077  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2078  *             Lock will be dropped while waiting.
2079  *             The inheritor specified cannot run in user space until another inheritor is specified for the event or a
2080  *             wakeup for the event is called.
2081  *
2082  * Returns: result of the wait.
2083  */
2084 wait_result_t
2085 lck_rw_sleep_with_inheritor(lck_rw_t *lock, lck_sleep_action_t lck_sleep_action, event_t event, thread_t inheritor, wait_interrupt_t interruptible, uint64_t deadline)
2086 {
2087         return lck_rw_sleep_with_inheritor_and_turnstile_type(lock, lck_sleep_action, event, inheritor, interruptible, deadline, TURNSTILE_SLEEP_INHERITOR);
2088 }
2089
2090 /*
2091  * wakeup_with_inheritor functions are independent from the locking primitive.
2092  */
2093
2094 /*
2095  * Name: wakeup_one_with_inheritor
2096  *
2097  * Description: wake up one waiter for event if any. The thread woken up will be the one with the higher sched priority waiting on event.
2098  *              The push for the event will be transferred from the last inheritor to the woken up thread if LCK_WAKE_DEFAULT is specified.
2099  *              If LCK_WAKE_DO_NOT_TRANSFER_PUSH is specified the push will not be transferred.
2100  *
2101  * Args:
2102  *   Arg1: event to wake from.
2103  *   Arg2: wait result to pass to the woken up thread.
2104  *   Arg3: wake flag. LCK_WAKE_DEFAULT or LCK_WAKE_DO_NOT_TRANSFER_PUSH.
2105  *   Arg4: pointer for storing the thread wokenup.
2106  *
2107  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
2108  *
2109  * Conditions: The new inheritor wokenup cannot run in user space until another inheritor is specified for the event or a
2110  *             wakeup for the event is called.
2111  *             A reference for the wokenup thread is acquired.
2112  *             NOTE: this cannot be called from interrupt context.
2113  */
2114 kern_return_t
2115 wakeup_one_with_inheritor(event_t event, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
2116 {
2117         return wakeup_with_inheritor_and_turnstile_type(event,
2118                    TURNSTILE_SLEEP_INHERITOR,
2119                    result,
2120                    TRUE,
2121                    action,
2122                    thread_wokenup);
2123 }
2124
2125 /*
2126  * Name: wakeup_all_with_inheritor
2127  *
2128  * Description: wake up all waiters waiting for event. The old inheritor will lose the push.
2129  *
2130  * Args:
2131  *   Arg1: event to wake from.
2132  *   Arg2: wait result to pass to the woken up threads.
2133  *
2134  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
2135  *
2136  * Conditions: NOTE: this cannot be called from interrupt context.
2137  */
2138 kern_return_t
2139 wakeup_all_with_inheritor(event_t event, wait_result_t result)
2140 {
2141         return wakeup_with_inheritor_and_turnstile_type(event,
2142                    TURNSTILE_SLEEP_INHERITOR,
2143                    result,
2144                    FALSE,
2145                    0,
2146                    NULL);
2147 }
2148
2149 /*
2150  * change_sleep_inheritor is independent from the locking primitive.
2151  */
2152
2153 /*
2154  * Name: change_sleep_inheritor
2155  *
2156  * Description: Redirect the push of the waiting threads of event to the new inheritor specified.
2157  *
2158  * Args:
2159  *   Arg1: event to redirect the push.
2160  *   Arg2: new inheritor for event.
2161  *
2162  * Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
2163  *
2164  * Conditions: In case of success, the new inheritor cannot run in user space until another inheritor is specified for the event or a
2165  *             wakeup for the event is called.
2166  *             NOTE: this cannot be called from interrupt context.
2167  */
2168 kern_return_t
2169 change_sleep_inheritor(event_t event, thread_t inheritor)
2170 {
2171         return change_sleep_inheritor_and_turnstile_type(event,
2172                    inheritor,
2173                    TURNSTILE_SLEEP_INHERITOR);
2174 }
2175
2176 void
2177 kdp_sleep_with_inheritor_find_owner(struct waitq * waitq, __unused event64_t event, thread_waitinfo_t * waitinfo)
2178 {
2179         assert(waitinfo->wait_type == kThreadWaitSleepWithInheritor);
2180         assert(waitq_is_turnstile_queue(waitq));
2181         waitinfo->owner = 0;
2182         waitinfo->context = 0;
2183
2184         if (waitq_held(waitq)) {
2185                 return;
2186         }
2187
2188         struct turnstile *turnstile = waitq_to_turnstile(waitq);
2189         assert(turnstile->ts_inheritor_flags & TURNSTILE_INHERITOR_THREAD);
2190         waitinfo->owner = thread_tid(turnstile->ts_inheritor);
2191 }
2192
2193 typedef void (*void_func_void)(void);
2194
2195 static kern_return_t
2196 gate_try_close(gate_t *gate)
2197 {
2198         uintptr_t state;
2199         thread_t holder;
2200         kern_return_t ret;
2201         __assert_only bool waiters;
2202         thread_t thread = current_thread();
2203
2204         if (os_atomic_cmpxchg(&gate->gate_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2205                 return KERN_SUCCESS;
2206         }
2207
2208         gate_ilock(gate);
2209         state = ordered_load_gate(gate);
2210         holder = GATE_STATE_TO_THREAD(state);
2211
2212         if (holder == NULL) {
2213                 waiters = gate_has_waiters(state);
2214                 assert(waiters == FALSE);
2215
2216                 state = GATE_THREAD_TO_STATE(current_thread());
2217                 state |= GATE_ILOCK;
2218                 ordered_store_gate(gate, state);
2219                 ret = KERN_SUCCESS;
2220         } else {
2221                 if (holder == current_thread()) {
2222                         panic("Trying to close a gate already owned by current thread %p", current_thread());
2223                 }
2224                 ret = KERN_FAILURE;
2225         }
2226
2227         gate_iunlock(gate);
2228         return ret;
2229 }
2230
2231 static void
2232 gate_close(gate_t* gate)
2233 {
2234         uintptr_t state;
2235         thread_t holder;
2236         __assert_only bool waiters;
2237         thread_t thread = current_thread();
2238
2239         if (os_atomic_cmpxchg(&gate->gate_data, 0, GATE_THREAD_TO_STATE(thread), acquire)) {
2240                 return;
2241         }
2242
2243         gate_ilock(gate);
2244         state = ordered_load_gate(gate);
2245         holder = GATE_STATE_TO_THREAD(state);
2246
2247         if (holder != NULL) {
2248                 panic("Closing a gate already owned by %p from current thread %p", holder, current_thread());
2249         }
2250
2251         waiters = gate_has_waiters(state);
2252         assert(waiters == FALSE);
2253
2254         state = GATE_THREAD_TO_STATE(thread);
2255         state |= GATE_ILOCK;
2256         ordered_store_gate(gate, state);
2257
2258         gate_iunlock(gate);
2259 }
2260
2261 static void
2262 gate_open_turnstile(gate_t *gate)
2263 {
2264         struct turnstile *ts = NULL;
2265
2266         ts = turnstile_prepare((uintptr_t)gate, &gate->turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2267         waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
2268         turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2269         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2270         turnstile_complete((uintptr_t)gate, &gate->turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2271         /*
2272          * We can do the cleanup while holding the interlock.
2273          * It is ok because:
2274          * 1. current_thread is the previous inheritor and it is running
2275          * 2. new inheritor is NULL.
2276          * => No chain of turnstiles needs to be updated.
2277          */
2278         turnstile_cleanup();
2279 }
2280
2281 static void
2282 gate_open(gate_t *gate)
2283 {
2284         uintptr_t state;
2285         thread_t holder;
2286         bool waiters;
2287         thread_t thread = current_thread();
2288
2289         if (os_atomic_cmpxchg(&gate->gate_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2290                 return;
2291         }
2292
2293         gate_ilock(gate);
2294         state = ordered_load_gate(gate);
2295         holder = GATE_STATE_TO_THREAD(state);
2296         waiters = gate_has_waiters(state);
2297
2298         if (holder != thread) {
2299                 panic("Opening gate owned by %p from current thread %p", holder, thread);
2300         }
2301
2302         if (waiters) {
2303                 gate_open_turnstile(gate);
2304         }
2305
2306         state = GATE_ILOCK;
2307         ordered_store_gate(gate, state);
2308
2309         gate_iunlock(gate);
2310 }
2311
2312 static kern_return_t
2313 gate_handoff_turnstile(gate_t *gate,
2314     int flags,
2315     thread_t *thread_woken_up,
2316     bool *waiters)
2317 {
2318         struct turnstile *ts = NULL;
2319         kern_return_t ret = KERN_FAILURE;
2320         thread_t hp_thread;
2321
2322         ts = turnstile_prepare((uintptr_t)gate, &gate->turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2323         /*
2324          * Wake up the higest priority thread waiting on the gate
2325          */
2326         hp_thread = waitq_wakeup64_identify(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), THREAD_AWAKENED, WAITQ_PROMOTE_ON_WAKE);
2327
2328         if (hp_thread != NULL) {
2329                 /*
2330                  * In this case waitq_wakeup64_identify has called turnstile_update_inheritor for us
2331                  */
2332                 turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2333                 *thread_woken_up = hp_thread;
2334                 *waiters = turnstile_has_waiters(ts);
2335                 /*
2336                  * Note: hp_thread is the new holder and the new inheritor.
2337                  * In case there are no more waiters, it doesn't need to be the inheritor
2338                  * and it shouldn't be it by the time it finishes the wait, so that its next open or
2339                  * handoff can go through the fast path.
2340                  * We could set the inheritor to NULL here, or the new holder itself can set it
2341                  * on its way back from the sleep. In the latter case there are more chanses that
2342                  * new waiters will come by, avoiding to do the opearation at all.
2343                  */
2344                 ret = KERN_SUCCESS;
2345         } else {
2346                 /*
2347                  * waiters can have been woken up by an interrupt and still not
2348                  * have updated gate->waiters, so we couldn't find them on the waitq.
2349                  * Update the inheritor to NULL here, so that the current thread can return to userspace
2350                  * indipendently from when the interrupted waiters will finish the wait.
2351                  */
2352                 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2353                         turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2354                         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2355                 }
2356                 // there are no waiters.
2357                 ret = KERN_NOT_WAITING;
2358         }
2359
2360         turnstile_complete((uintptr_t)gate, &gate->turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2361
2362         /*
2363          * We can do the cleanup while holding the interlock.
2364          * It is ok because:
2365          * 1. current_thread is the previous inheritor and it is running
2366          * 2. new inheritor is NULL or it is a just wokenup thread that will race acquiring the lock
2367          *    of the gate before trying to sleep.
2368          * => No chain of turnstiles needs to be updated.
2369          */
2370         turnstile_cleanup();
2371
2372         return ret;
2373 }
2374
2375 static kern_return_t
2376 gate_handoff(gate_t *gate,
2377     int flags)
2378 {
2379         kern_return_t ret;
2380         thread_t new_holder = NULL;
2381         uintptr_t state;
2382         thread_t holder;
2383         bool waiters;
2384         thread_t thread = current_thread();
2385
2386         assert(flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS || flags == GATE_HANDOFF_DEFAULT);
2387
2388         if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2389                 if (os_atomic_cmpxchg(&gate->gate_data, GATE_THREAD_TO_STATE(thread), 0, release)) {
2390                         //gate opened but there were no waiters, so return KERN_NOT_WAITING.
2391                         return KERN_NOT_WAITING;
2392                 }
2393         }
2394
2395         gate_ilock(gate);
2396         state = ordered_load_gate(gate);
2397         holder = GATE_STATE_TO_THREAD(state);
2398         waiters = gate_has_waiters(state);
2399
2400         if (holder != current_thread()) {
2401                 panic("Handing off gate owned by %p from current thread %p", holder, current_thread());
2402         }
2403
2404         if (waiters) {
2405                 ret = gate_handoff_turnstile(gate, flags, &new_holder, &waiters);
2406                 if (ret == KERN_SUCCESS) {
2407                         state = GATE_THREAD_TO_STATE(new_holder);
2408                         if (waiters) {
2409                                 state |= GATE_WAITERS;
2410                         }
2411                 } else {
2412                         if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2413                                 state = 0;
2414                         }
2415                 }
2416         } else {
2417                 if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2418                         state = 0;
2419                 }
2420                 ret = KERN_NOT_WAITING;
2421         }
2422         state |= GATE_ILOCK;
2423         ordered_store_gate(gate, state);
2424
2425         gate_iunlock(gate);
2426
2427         if (new_holder) {
2428                 thread_deallocate(new_holder);
2429         }
2430         return ret;
2431 }
2432
2433 static void_func_void
2434 gate_steal_turnstile(gate_t *gate,
2435     thread_t new_inheritor)
2436 {
2437         struct turnstile *ts = NULL;
2438
2439         ts = turnstile_prepare((uintptr_t)gate, &gate->turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2440
2441         turnstile_update_inheritor(ts, new_inheritor, (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD));
2442         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD);
2443         turnstile_complete((uintptr_t)gate, &gate->turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2444
2445         /*
2446          * turnstile_cleanup might need to update the chain of the old holder.
2447          * This operation should happen without the turnstile interlock held.
2448          */
2449         return turnstile_cleanup;
2450 }
2451
2452 static void
2453 gate_steal(gate_t *gate)
2454 {
2455         uintptr_t state;
2456         thread_t holder;
2457         thread_t thread = current_thread();
2458         bool waiters;
2459
2460         void_func_void func_after_interlock_unlock;
2461
2462         gate_ilock(gate);
2463         state = ordered_load_gate(gate);
2464         holder = GATE_STATE_TO_THREAD(state);
2465         waiters = gate_has_waiters(state);
2466
2467         assert(holder != NULL);
2468         state = GATE_THREAD_TO_STATE(thread) | GATE_ILOCK;
2469         if (waiters) {
2470                 state |= GATE_WAITERS;
2471                 ordered_store_gate(gate, state);
2472                 func_after_interlock_unlock = gate_steal_turnstile(gate, thread);
2473                 gate_iunlock(gate);
2474
2475                 func_after_interlock_unlock();
2476         } else {
2477                 ordered_store_gate(gate, state);
2478                 gate_iunlock(gate);
2479         }
2480 }
2481
2482 static void_func_void
2483 gate_wait_turnstile(gate_t *gate,
2484     wait_interrupt_t interruptible,
2485     uint64_t deadline,
2486     thread_t holder,
2487     wait_result_t* wait,
2488     bool* waiters)
2489 {
2490         struct turnstile *ts;
2491         uintptr_t state;
2492
2493         ts = turnstile_prepare((uintptr_t)gate, &gate->turnstile, TURNSTILE_NULL, TURNSTILE_KERNEL_MUTEX);
2494
2495         turnstile_update_inheritor(ts, holder, (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
2496         waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), interruptible, deadline);
2497
2498         gate_iunlock(gate);
2499
2500         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2501
2502         *wait = thread_block(THREAD_CONTINUE_NULL);
2503
2504         gate_ilock(gate);
2505
2506         *waiters = turnstile_has_waiters(ts);
2507
2508         if (!*waiters) {
2509                 /*
2510                  * We want to enable the fast path as soon as we see that there are no more waiters.
2511                  * On the fast path the holder will not do any turnstile operations.
2512                  * Set the inheritor as NULL here.
2513                  *
2514                  * NOTE: if it was an open operation that woke this thread up, the inheritor has
2515                  * already been set to NULL.
2516                  */
2517                 state = ordered_load_gate(gate);
2518                 holder = GATE_STATE_TO_THREAD(state);
2519                 if (holder &&
2520                     ((*wait != THREAD_AWAKENED) ||     // thread interrupted or timedout
2521                     holder == current_thread())) {     // thread was woken up and it is the new holder
2522                         turnstile_update_inheritor(ts, TURNSTILE_INHERITOR_NULL, TURNSTILE_IMMEDIATE_UPDATE);
2523                         turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD);
2524                 }
2525         }
2526
2527         turnstile_complete((uintptr_t)gate, &gate->turnstile, NULL, TURNSTILE_KERNEL_MUTEX);
2528
2529         /*
2530          * turnstile_cleanup might need to update the chain of the old holder.
2531          * This operation should happen without the turnstile primitive interlock held.
2532          */
2533         return turnstile_cleanup;
2534 }
2535
2536 static gate_wait_result_t
2537 gate_wait(gate_t* gate,
2538     wait_interrupt_t interruptible,
2539     uint64_t deadline,
2540     void (^primitive_unlock)(void),
2541     void (^primitive_lock)(void))
2542 {
2543         gate_wait_result_t ret;
2544         void_func_void func_after_interlock_unlock;
2545         wait_result_t wait_result;
2546         uintptr_t state;
2547         thread_t holder;
2548         bool waiters;
2549
2550
2551         gate_ilock(gate);
2552         state = ordered_load_gate(gate);
2553         holder = GATE_STATE_TO_THREAD(state);
2554
2555         if (holder == NULL) {
2556                 panic("Trying to wait on open gate thread %p gate %p", current_thread(), gate);
2557         }
2558
2559         state |= GATE_WAITERS;
2560         ordered_store_gate(gate, state);
2561
2562         /*
2563          * Release the primitive lock before any
2564          * turnstile operation. Turnstile
2565          * does not support a blocking primitive as
2566          * interlock.
2567          *
2568          * In this way, concurrent threads will be
2569          * able to acquire the primitive lock
2570          * but still will wait for me through the
2571          * gate interlock.
2572          */
2573         primitive_unlock();
2574
2575         func_after_interlock_unlock = gate_wait_turnstile(    gate,
2576             interruptible,
2577             deadline,
2578             holder,
2579             &wait_result,
2580             &waiters);
2581
2582         state = ordered_load_gate(gate);
2583         holder = GATE_STATE_TO_THREAD(state);
2584
2585         switch (wait_result) {
2586         case THREAD_INTERRUPTED:
2587         case THREAD_TIMED_OUT:
2588                 assert(holder != current_thread());
2589
2590                 if (waiters) {
2591                         state |= GATE_WAITERS;
2592                 } else {
2593                         state &= ~GATE_WAITERS;
2594                 }
2595                 ordered_store_gate(gate, state);
2596
2597                 if (wait_result == THREAD_INTERRUPTED) {
2598                         ret = GATE_INTERRUPTED;
2599                 } else {
2600                         ret = GATE_TIMED_OUT;
2601                 }
2602                 break;
2603         default:
2604                 /*
2605                  * Note it is possible that even if the gate was handed off to
2606                  * me, someone called gate_steal() before I woke up.
2607                  *
2608                  * As well as it is possible that the gate was opened, but someone
2609                  * closed it while I was waking up.
2610                  *
2611                  * In both cases we return GATE_OPENED, as the gate was opened to me
2612                  * at one point, it is the caller responsibility to check again if
2613                  * the gate is open.
2614                  */
2615                 if (holder == current_thread()) {
2616                         ret = GATE_HANDOFF;
2617                 } else {
2618                         ret = GATE_OPENED;
2619                 }
2620                 break;
2621         }
2622
2623         gate_iunlock(gate);
2624
2625         /*
2626          * turnstile func that needs to be executed without
2627          * holding the primitive interlock
2628          */
2629         func_after_interlock_unlock();
2630
2631         primitive_lock();
2632
2633         return ret;
2634 }
2635 static void
2636 gate_assert(gate_t *gate, int flags)
2637 {
2638         uintptr_t state;
2639         thread_t holder;
2640
2641         gate_ilock(gate);
2642         state = ordered_load_gate(gate);
2643         holder = GATE_STATE_TO_THREAD(state);
2644
2645         switch (flags) {
2646         case GATE_ASSERT_CLOSED:
2647                 assert(holder != NULL);
2648                 break;
2649         case GATE_ASSERT_OPEN:
2650                 assert(holder == NULL);
2651                 break;
2652         case GATE_ASSERT_HELD:
2653                 assert(holder == current_thread());
2654                 break;
2655         default:
2656                 panic("invalid %s flag %d", __func__, flags);
2657         }
2658
2659         gate_iunlock(gate);
2660 }
2661
2662 static void
2663 gate_init(gate_t *gate)
2664 {
2665         gate->gate_data = 0;
2666         gate->turnstile = NULL;
2667 }
2668
2669 static void
2670 gate_destroy(__assert_only gate_t *gate)
2671 {
2672         assert(gate->gate_data == 0);
2673         assert(gate->turnstile == NULL);
2674 }
2675
2676 /*
2677  * Name: lck_rw_gate_init
2678  *
2679  * Description: initializes a variable declared with decl_lck_rw_gate_data.
2680  *
2681  * Args:
2682  *   Arg1: lck_rw_t lock used to protect the gate.
2683  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2684  */
2685 void
2686 lck_rw_gate_init(lck_rw_t *lock, gate_t *gate)
2687 {
2688         (void) lock;
2689         gate_init(gate);
2690 }
2691
2692 /*
2693  * Name: lck_rw_gate_destroy
2694  *
2695  * Description: destroys a variable previously initialized.
2696  *
2697  * Args:
2698  *   Arg1: lck_rw_t lock used to protect the gate.
2699  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2700  */
2701 void
2702 lck_rw_gate_destroy(lck_rw_t *lock, gate_t *gate)
2703 {
2704         (void) lock;
2705         gate_destroy(gate);
2706 }
2707
2708 /*
2709  * Name: lck_rw_gate_try_close
2710  *
2711  * Description: Tries to close the gate.
2712  *              In case of success the current thread will be set as
2713  *              the holder of the gate.
2714  *
2715  * Args:
2716  *   Arg1: lck_rw_t lock used to protect the gate.
2717  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2718  *
2719  * Conditions: Lock must be held. Returns with the lock held.
2720  *
2721  * Returns:
2722  *          KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
2723  *          of the gate.
2724  *          A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2725  *          to wake up possible waiters on the gate before returning to userspace.
2726  *          If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
2727  *          between the calls to lck_rw_gate_try_close() and lck_rw_gate_wait().
2728  *
2729  *          KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
2730  *          lck_rw_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
2731  *          The calls to lck_rw_gate_try_close() and lck_rw_gate_wait() should
2732  *          be done without dropping the lock that is protecting the gate in between.
2733  */
2734 int
2735 lck_rw_gate_try_close(__assert_only lck_rw_t *lock, gate_t *gate)
2736 {
2737         LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2738
2739         return gate_try_close(gate);
2740 }
2741
2742 /*
2743  * Name: lck_rw_gate_close
2744  *
2745  * Description: Closes the gate. The current thread will be set as
2746  *              the holder of the gate. Will panic if the gate is already closed.
2747  *              A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2748  *              to wake up possible waiters on the gate before returning to userspace.
2749  *
2750  * Args:
2751  *   Arg1: lck_rw_t lock used to protect the gate.
2752  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2753  *
2754  * Conditions: Lock must be held. Returns with the lock held.
2755  *             The gate must be open.
2756  *
2757  */
2758 void
2759 lck_rw_gate_close(__assert_only lck_rw_t *lock, gate_t *gate)
2760 {
2761         LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2762
2763         return gate_close(gate);
2764 }
2765
2766 /*
2767  * Name: lck_rw_gate_open
2768  *
2769  * Description: Opens the gate and wakes up possible waiters.
2770  *
2771  * Args:
2772  *   Arg1: lck_rw_t lock used to protect the gate.
2773  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2774  *
2775  * Conditions: Lock must be held. Returns with the lock held.
2776  *             The current thread must be the holder of the gate.
2777  *
2778  */
2779 void
2780 lck_rw_gate_open(__assert_only lck_rw_t *lock, gate_t *gate)
2781 {
2782         LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2783
2784         gate_open(gate);
2785 }
2786
2787 /*
2788  * Name: lck_rw_gate_handoff
2789  *
2790  * Description: Tries to transfer the ownership of the gate. The waiter with highest sched
2791  *              priority will be selected as the new holder of the gate, and woken up,
2792  *              with the gate remaining in the closed state throughout.
2793  *              If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
2794  *              will be returned.
2795  *              GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
2796  *              case no waiters were found.
2797  *
2798  *
2799  * Args:
2800  *   Arg1: lck_rw_t lock used to protect the gate.
2801  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2802  *   Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
2803  *
2804  * Conditions: Lock must be held. Returns with the lock held.
2805  *             The current thread must be the holder of the gate.
2806  *
2807  * Returns:
2808  *          KERN_SUCCESS in case one of the waiters became the new holder.
2809  *          KERN_NOT_WAITING in case there were no waiters.
2810  *
2811  */
2812 kern_return_t
2813 lck_rw_gate_handoff(__assert_only lck_rw_t *lock, gate_t *gate, int flags)
2814 {
2815         LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2816
2817         return gate_handoff(gate, flags);
2818 }
2819
2820 /*
2821  * Name: lck_rw_gate_steal
2822  *
2823  * Description: Set the current ownership of the gate. It sets the current thread as the
2824  *              new holder of the gate.
2825  *              A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2826  *              to wake up possible waiters on the gate before returning to userspace.
2827  *              NOTE: the previous holder should not call lck_rw_gate_open() or lck_rw_gate_handoff()
2828  *              anymore.
2829  *
2830  *
2831  * Args:
2832  *   Arg1: lck_rw_t lock used to protect the gate.
2833  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2834  *
2835  * Conditions: Lock must be held. Returns with the lock held.
2836  *             The gate must be closed and the current thread must not already be the holder.
2837  *
2838  */
2839 void
2840 lck_rw_gate_steal(__assert_only lck_rw_t *lock, gate_t *gate)
2841 {
2842         LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2843
2844         gate_steal(gate);
2845 }
2846
2847 /*
2848  * Name: lck_rw_gate_wait
2849  *
2850  * Description: Waits for the current thread to become the holder of the gate or for the
2851  *              gate to become open. An interruptible mode and deadline can be specified
2852  *              to return earlier from the wait.
2853  *
2854  * Args:
2855  *   Arg1: lck_rw_t lock used to protect the gate.
2856  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2857  *   Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_SHARED, LCK_SLEEP_EXCLUSIVE.
2858  *   Arg3: interruptible flag for wait.
2859  *   Arg4: deadline
2860  *
2861  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2862  *             Lock will be dropped while waiting.
2863  *             The gate must be closed.
2864  *
2865  * Returns: Reason why the thread was woken up.
2866  *          GATE_HANDOFF - the current thread was handed off the ownership of the gate.
2867  *                         A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2868  *                         to wake up possible waiters on the gate before returning to userspace.
2869  *          GATE_OPENED - the gate was opened by the holder.
2870  *          GATE_TIMED_OUT - the thread was woken up by a timeout.
2871  *          GATE_INTERRUPTED - the thread was interrupted while sleeping.
2872  *
2873  */
2874 gate_wait_result_t
2875 lck_rw_gate_wait(lck_rw_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
2876 {
2877         __block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
2878
2879         LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2880
2881         if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2882                 return gate_wait(gate,
2883                            interruptible,
2884                            deadline,
2885                            ^{lck_rw_type = lck_rw_done(lock);},
2886                            ^{;});
2887         } else if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2888                 return gate_wait(gate,
2889                            interruptible,
2890                            deadline,
2891                            ^{lck_rw_type = lck_rw_done(lock);},
2892                            ^{lck_rw_lock(lock, lck_rw_type);});
2893         } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2894                 return gate_wait(gate,
2895                            interruptible,
2896                            deadline,
2897                            ^{lck_rw_type = lck_rw_done(lock);},
2898                            ^{lck_rw_lock_exclusive(lock);});
2899         } else {
2900                 return gate_wait(gate,
2901                            interruptible,
2902                            deadline,
2903                            ^{lck_rw_type = lck_rw_done(lock);},
2904                            ^{lck_rw_lock_shared(lock);});
2905         }
2906 }
2907
2908 /*
2909  * Name: lck_rw_gate_assert
2910  *
2911  * Description: asserts that the gate is in the specified state.
2912  *
2913  * Args:
2914  *   Arg1: lck_rw_t lock used to protect the gate.
2915  *   Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2916  *   Arg3: flags to specified assert type.
2917  *         GATE_ASSERT_CLOSED - the gate is currently closed
2918  *         GATE_ASSERT_OPEN - the gate is currently opened
2919  *         GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
2920  */
2921 void
2922 lck_rw_gate_assert(__assert_only lck_rw_t *lock, gate_t *gate, int flags)
2923 {
2924         LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2925
2926         gate_assert(gate, flags);
2927         return;
2928 }
2929
2930 /*
2931  * Name: lck_mtx_gate_init
2932  *
2933  * Description: initializes a variable declared with decl_lck_mtx_gate_data.
2934  *
2935  * Args:
2936  *   Arg1: lck_mtx_t lock used to protect the gate.
2937  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
2938  */
2939 void
2940 lck_mtx_gate_init(lck_mtx_t *lock, gate_t *gate)
2941 {
2942         (void) lock;
2943         gate_init(gate);
2944 }
2945
2946 /*
2947  * Name: lck_mtx_gate_destroy
2948  *
2949  * Description: destroys a variable previously initialized
2950  *
2951  * Args:
2952  *   Arg1: lck_mtx_t lock used to protect the gate.
2953  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
2954  */
2955 void
2956 lck_mtx_gate_destroy(lck_mtx_t *lock, gate_t *gate)
2957 {
2958         (void) lock;
2959         gate_destroy(gate);
2960 }
2961
2962 /*
2963  * Name: lck_mtx_gate_try_close
2964  *
2965  * Description: Tries to close the gate.
2966  *              In case of success the current thread will be set as
2967  *              the holder of the gate.
2968  *
2969  * Args:
2970  *   Arg1: lck_mtx_t lock used to protect the gate.
2971  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
2972  *
2973  * Conditions: Lock must be held. Returns with the lock held.
2974  *
2975  * Returns:
2976  *          KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
2977  *          of the gate.
2978  *          A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
2979  *          to wake up possible waiters on the gate before returning to userspace.
2980  *          If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
2981  *          between the calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait().
2982  *
2983  *          KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
2984  *          lck_mtx_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
2985  *          The calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait() should
2986  *          be done without dropping the lock that is protecting the gate in between.
2987  */
2988 int
2989 lck_mtx_gate_try_close(__assert_only lck_mtx_t *lock, gate_t *gate)
2990 {
2991         LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
2992
2993         return gate_try_close(gate);
2994 }
2995
2996 /*
2997  * Name: lck_mtx_gate_close
2998  *
2999  * Description: Closes the gate. The current thread will be set as
3000  *              the holder of the gate. Will panic if the gate is already closed.
3001  *              A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3002  *              to wake up possible waiters on the gate before returning to userspace.
3003  *
3004  * Args:
3005  *   Arg1: lck_mtx_t lock used to protect the gate.
3006  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3007  *
3008  * Conditions: Lock must be held. Returns with the lock held.
3009  *             The gate must be open.
3010  *
3011  */
3012 void
3013 lck_mtx_gate_close(__assert_only lck_mtx_t *lock, gate_t *gate)
3014 {
3015         LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3016
3017         return gate_close(gate);
3018 }
3019
3020 /*
3021  * Name: lck_mtx_gate_open
3022  *
3023  * Description: Opens of the gate and wakes up possible waiters.
3024  *
3025  * Args:
3026  *   Arg1: lck_mtx_t lock used to protect the gate.
3027  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3028  *
3029  * Conditions: Lock must be held. Returns with the lock held.
3030  *             The current thread must be the holder of the gate.
3031  *
3032  */
3033 void
3034 lck_mtx_gate_open(__assert_only lck_mtx_t *lock, gate_t *gate)
3035 {
3036         LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3037
3038         gate_open(gate);
3039 }
3040
3041 /*
3042  * Name: lck_mtx_gate_handoff
3043  *
3044  * Description: Set the current ownership of the gate. The waiter with highest sched
3045  *              priority will be selected as the new holder of the gate, and woken up,
3046  *              with the gate remaining in the closed state throughout.
3047  *              If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
3048  *              will be returned.
3049  *              OPEN_ON_FAILURE flag can be used to specify if the gate should be opened in
3050  *              case no waiters were found.
3051  *
3052  *
3053  * Args:
3054  *   Arg1: lck_mtx_t lock used to protect the gate.
3055  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3056  *   Arg3: flags - GATE_NO_FALGS or OPEN_ON_FAILURE
3057  *
3058  * Conditions: Lock must be held. Returns with the lock held.
3059  *             The current thread must be the holder of the gate.
3060  *
3061  * Returns:
3062  *          KERN_SUCCESS in case one of the waiters became the new holder.
3063  *          KERN_NOT_WAITING in case there were no waiters.
3064  *
3065  */
3066 kern_return_t
3067 lck_mtx_gate_handoff(__assert_only lck_mtx_t *lock, gate_t *gate, int flags)
3068 {
3069         LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3070
3071         return gate_handoff(gate, flags);
3072 }
3073
3074 /*
3075  * Name: lck_mtx_gate_steal
3076  *
3077  * Description: Steals the ownership of the gate. It sets the current thread as the
3078  *              new holder of the gate.
3079  *              A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3080  *              to wake up possible waiters on the gate before returning to userspace.
3081  *              NOTE: the previous holder should not call lck_mtx_gate_open() or lck_mtx_gate_handoff()
3082  *              anymore.
3083  *
3084  *
3085  * Args:
3086  *   Arg1: lck_mtx_t lock used to protect the gate.
3087  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3088  *
3089  * Conditions: Lock must be held. Returns with the lock held.
3090  *             The gate must be closed and the current thread must not already be the holder.
3091  *
3092  */
3093 void
3094 lck_mtx_gate_steal(__assert_only lck_mtx_t *lock, gate_t *gate)
3095 {
3096         LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3097
3098         gate_steal(gate);
3099 }
3100
3101 /*
3102  * Name: lck_mtx_gate_wait
3103  *
3104  * Description: Waits for the current thread to become the holder of the gate or for the
3105  *              gate to become open. An interruptible mode and deadline can be specified
3106  *              to return earlier from the wait.
3107  *
3108  * Args:
3109  *   Arg1: lck_mtx_t lock used to protect the gate.
3110  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3111  *   Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK, LCK_SLEEP_SPIN, LCK_SLEEP_SPIN_ALWAYS.
3112  *   Arg3: interruptible flag for wait.
3113  *   Arg4: deadline
3114  *
3115  * Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
3116  *             Lock will be dropped while waiting.
3117  *             The gate must be closed.
3118  *
3119  * Returns: Reason why the thread was woken up.
3120  *          GATE_HANDOFF - the current thread was handed off the ownership of the gate.
3121  *                         A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3122  *                         to wake up possible waiters on the gate before returning to userspace.
3123  *          GATE_OPENED - the gate was opened by the holder.
3124  *          GATE_TIMED_OUT - the thread was woken up by a timeout.
3125  *          GATE_INTERRUPTED - the thread was interrupted while sleeping.
3126  *
3127  */
3128 gate_wait_result_t
3129 lck_mtx_gate_wait(lck_mtx_t *lock, gate_t *gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
3130 {
3131         LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3132
3133         if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
3134                 return gate_wait(gate,
3135                            interruptible,
3136                            deadline,
3137                            ^{lck_mtx_unlock(lock);},
3138                            ^{;});
3139         } else if (lck_sleep_action & LCK_SLEEP_SPIN) {
3140                 return gate_wait(gate,
3141                            interruptible,
3142                            deadline,
3143                            ^{lck_mtx_unlock(lock);},
3144                            ^{lck_mtx_lock_spin(lock);});
3145         } else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
3146                 return gate_wait(gate,
3147                            interruptible,
3148                            deadline,
3149                            ^{lck_mtx_unlock(lock);},
3150                            ^{lck_mtx_lock_spin_always(lock);});
3151         } else {
3152                 return gate_wait(gate,
3153                            interruptible,
3154                            deadline,
3155                            ^{lck_mtx_unlock(lock);},
3156                            ^{lck_mtx_lock(lock);});
3157         }
3158 }
3159
3160 /*
3161  * Name: lck_mtx_gate_assert
3162  *
3163  * Description: asserts that the gate is in the specified state.
3164  *
3165  * Args:
3166  *   Arg1: lck_mtx_t lock used to protect the gate.
3167  *   Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3168  *   Arg3: flags to specified assert type.
3169  *         GATE_ASSERT_CLOSED - the gate is currently closed
3170  *         GATE_ASSERT_OPEN - the gate is currently opened
3171  *         GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
3172  */
3173 void
3174 lck_mtx_gate_assert(__assert_only lck_mtx_t *lock, gate_t *gate, int flags)
3175 {
3176         LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3177
3178         gate_assert(gate, flags);
3179 }
3180
3181 #pragma mark - LCK_*_DECLARE support
3182
3183 __startup_func
3184 void
3185 lck_grp_attr_startup_init(struct lck_grp_attr_startup_spec *sp)
3186 {
3187         lck_grp_attr_t *attr = sp->grp_attr;
3188         lck_grp_attr_setdefault(attr);
3189         attr->grp_attr_val |= sp->grp_attr_set_flags;
3190         attr->grp_attr_val &= ~sp->grp_attr_clear_flags;
3191 }
3192
3193 __startup_func
3194 void
3195 lck_grp_startup_init(struct lck_grp_startup_spec *sp)
3196 {
3197         lck_grp_init(sp->grp, sp->grp_name, sp->grp_attr);
3198 }
3199
3200 __startup_func
3201 void
3202 lck_attr_startup_init(struct lck_attr_startup_spec *sp)
3203 {
3204         lck_attr_t *attr = sp->lck_attr;
3205         lck_attr_setdefault(attr);
3206         attr->lck_attr_val |= sp->lck_attr_set_flags;
3207         attr->lck_attr_val &= ~sp->lck_attr_clear_flags;
3208 }
3209
3210 __startup_func
3211 void
3212 lck_spin_startup_init(struct lck_spin_startup_spec *sp)
3213 {
3214         lck_spin_init(sp->lck, sp->lck_grp, sp->lck_attr);
3215 }
3216
3217 __startup_func
3218 void
3219 lck_mtx_startup_init(struct lck_mtx_startup_spec *sp)
3220 {
3221         if (sp->lck_ext) {
3222                 lck_mtx_init_ext(sp->lck, sp->lck_ext, sp->lck_grp, sp->lck_attr);
3223         } else {
3224                 lck_mtx_init(sp->lck, sp->lck_grp, sp->lck_attr);
3225         }
3226 }
3227
3228 __startup_func
3229 void
3230 lck_rw_startup_init(struct lck_rw_startup_spec *sp)
3231 {
3232         lck_rw_init(sp->lck, sp->lck_grp, sp->lck_attr);
3233 }
3234
3235 __startup_func
3236 void
3237 usimple_lock_startup_init(struct usimple_lock_startup_spec *sp)
3238 {
3239         simple_lock_init(sp->lck, sp->lck_init_arg);
3240 }