X-Git-Url: https://git.saurik.com/apple/libpthread.git/blobdiff_plain/964d3577b041867f776d8eb940bf4a1108ffb97c..refs/heads/master:/src/pthread_rwlock.c diff --git a/src/pthread_rwlock.c b/src/pthread_rwlock.c index 1b38dd0..55834c9 100644 --- a/src/pthread_rwlock.c +++ b/src/pthread_rwlock.c @@ -2,7 +2,7 @@ * Copyright (c) 2000-2003, 2007, 2008 Apple Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -17,7 +17,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_LICENSE_HEADER_END@ */ /*- @@ -48,17 +48,15 @@ * $FreeBSD: src/lib/libc_r/uthread/uthread_rwlock.c,v 1.6 2001/04/10 04:19:20 deischen Exp $ */ -/* - * POSIX Pthread Library +/* + * POSIX Pthread Library * -- Read Write Lock support * 4/24/02: A. Ramesh * Ported from FreeBSD */ +#include "resolver.h" #include "internal.h" -#include /* For printf(). */ - -extern int __unix_conforming; #ifdef PLOCKSTAT #include "plockstat.h" @@ -76,40 +74,285 @@ extern int __unix_conforming; #define BLOCK_FAIL_PLOCKSTAT 0 #define BLOCK_SUCCESS_PLOCKSTAT 1 -/* maximum number of times a read lock may be obtained */ -#define MAX_READ_LOCKS (INT_MAX - 1) +#define PTHREAD_RWLOCK_INIT_UNUSED 1 + +// maximum number of times a read lock may be obtained +#define MAX_READ_LOCKS (INT_MAX - 1) + + +#if defined(__LP64__) +#define RWLOCK_USE_INT128 1 +#endif + +typedef union rwlock_seq { + uint32_t seq[4]; + struct { uint32_t lcntval; uint32_t rw_seq; uint32_t ucntval; }; + struct { uint32_t lgen; uint32_t rw_wc; uint32_t ugen; }; +#if RWLOCK_USE_INT128 + unsigned __int128 seq_LSU; + unsigned __int128 _Atomic atomic_seq_LSU; +#endif + struct { + uint64_t seq_LS; + uint32_t seq_U; + uint32_t _pad; + }; + struct { + uint64_t _Atomic atomic_seq_LS; + uint32_t _Atomic atomic_seq_U; + uint32_t _Atomic _atomic_pad; + }; +} rwlock_seq; + +_Static_assert(sizeof(rwlock_seq) == 4 * sizeof(uint32_t), + "Incorrect rwlock_seq size"); + +typedef enum rwlock_seqfields { + RWLOCK_SEQ_NONE = 0, + RWLOCK_SEQ_LS = 1, + RWLOCK_SEQ_U = 2, + RWLOCK_SEQ_LSU = RWLOCK_SEQ_LS | RWLOCK_SEQ_U, +} rwlock_seqfields; + +#if PTHREAD_DEBUG_LOG +#define RWLOCK_DEBUG_SEQ(op, rwlock, oldseq, newseq, updateval, f) \ + if (_pthread_debuglog >= 0) { \ + _simple_dprintf(_pthread_debuglog, "rw_" #op " %p tck %7llu thr %llx " \ + "L %x -> %x S %x -> %x U %x -> %x updt %x\n", rwlock, \ + mach_absolute_time() - _pthread_debugstart, _pthread_threadid_self_np_direct(), \ + (f) & RWLOCK_SEQ_LS ? (oldseq).lcntval : 0, \ + (f) & RWLOCK_SEQ_LS ? (newseq).lcntval : 0, \ + (f) & RWLOCK_SEQ_LS ? (oldseq).rw_seq : 0, \ + (f) & RWLOCK_SEQ_LS ? (newseq).rw_seq : 0, \ + (f) & RWLOCK_SEQ_U ? (oldseq).ucntval : 0, \ + (f) & RWLOCK_SEQ_U ? (newseq).ucntval : 0, updateval); } +#else +#define RWLOCK_DEBUG_SEQ(m, rwlock, oldseq, newseq, updateval, f) +#endif + +#if !__LITTLE_ENDIAN__ +#error RWLOCK_GETSEQ_ADDR assumes little endian layout of sequence words +#endif + +OS_ALWAYS_INLINE +static inline void +RWLOCK_GETSEQ_ADDR(pthread_rwlock_t *rwlock, rwlock_seq **seqaddr) +{ + // 128-bit aligned address inside rw_seq & rw_mis arrays + *seqaddr = (void*)(((uintptr_t)rwlock->rw_seq + 0xful) & ~0xful); +} + +OS_ALWAYS_INLINE +static inline void +RWLOCK_GETTID_ADDR(pthread_rwlock_t *rwlock, uint64_t **tidaddr) +{ + // 64-bit aligned address inside rw_tid array (&rw_tid[0] for aligned lock) + *tidaddr = (void*)(((uintptr_t)rwlock->rw_tid + 0x7ul) & ~0x7ul); +} + +OS_ALWAYS_INLINE +static inline void +rwlock_seq_load(rwlock_seq *seqaddr, rwlock_seq *oldseqval, + const rwlock_seqfields seqfields) +{ + switch (seqfields) { + case RWLOCK_SEQ_LSU: +#if RWLOCK_USE_INT128 + oldseqval->seq_LSU = seqaddr->seq_LSU; +#else + oldseqval->seq_LS = seqaddr->seq_LS; + oldseqval->seq_U = seqaddr->seq_U; +#endif + break; + case RWLOCK_SEQ_LS: + oldseqval->seq_LS = seqaddr->seq_LS; + break; +#if DEBUG // unused + case RWLOCK_SEQ_U: + oldseqval->seq_U = seqaddr->seq_U; + break; +#endif // unused + default: + __builtin_trap(); + } +} + +OS_ALWAYS_INLINE +static inline void +rwlock_seq_atomic_load_relaxed(rwlock_seq *seqaddr, rwlock_seq *oldseqval, + const rwlock_seqfields seqfields) +{ + switch (seqfields) { + case RWLOCK_SEQ_LSU: +#if RWLOCK_USE_INT128 +#if defined(__arm64__) && defined(__ARM_ARCH_8_2__) + // Workaround clang armv81 codegen bug for 128bit os_atomic_load + // rdar://problem/31213932 + oldseqval->seq_LSU = seqaddr->seq_LSU; + while (!os_atomic_cmpxchgv(&seqaddr->atomic_seq_LSU, + oldseqval->seq_LSU, oldseqval->seq_LSU, &oldseqval->seq_LSU, + relaxed)); +#else + oldseqval->seq_LSU = os_atomic_load_wide(&seqaddr->atomic_seq_LSU, relaxed); +#endif +#else + oldseqval->seq_LS = os_atomic_load_wide(&seqaddr->atomic_seq_LS, relaxed); + oldseqval->seq_U = os_atomic_load(&seqaddr->atomic_seq_U, relaxed); +#endif + break; + case RWLOCK_SEQ_LS: + oldseqval->seq_LS = os_atomic_load_wide(&seqaddr->atomic_seq_LS, relaxed); + break; +#if DEBUG // unused + case RWLOCK_SEQ_U: + oldseqval->seq_U = os_atomic_load(&seqaddr->atomic_seq_U, relaxed); + break; +#endif // unused + default: + __builtin_trap(); + } +} + +#define rwlock_seq_atomic_load(seqaddr, oldseqval, seqfields, m) \ + rwlock_seq_atomic_load_##m(seqaddr, oldseqval, seqfields) -#include -#include +OS_ALWAYS_INLINE +static inline rwlock_seqfields +rwlock_seq_atomic_cmpxchgv_relaxed(rwlock_seq *seqaddr, rwlock_seq *oldseqval, + rwlock_seq *newseqval, const rwlock_seqfields seqfields) +{ + bool r; + rwlock_seqfields updated_seqfields = RWLOCK_SEQ_NONE; + switch (seqfields) { +#if DEBUG // unused + case RWLOCK_SEQ_LSU: +#if RWLOCK_USE_INT128 + r = os_atomic_cmpxchgv(&seqaddr->atomic_seq_LSU, oldseqval->seq_LSU, + newseqval->seq_LSU, &oldseqval->seq_LSU, relaxed); + if (r) updated_seqfields = RWLOCK_SEQ_LSU; +#else + r = os_atomic_cmpxchgv(&seqaddr->atomic_seq_LS, oldseqval->seq_LS, + newseqval->seq_LS, &oldseqval->seq_LS, relaxed); + if (r) { + r = os_atomic_cmpxchgv(&seqaddr->atomic_seq_U, oldseqval->seq_U, + newseqval->seq_U, &oldseqval->seq_U, relaxed); + if (!r) oldseqval->seq_LS = newseqval->seq_LS; + updated_seqfields = r ? RWLOCK_SEQ_LSU : RWLOCK_SEQ_LS; + } else { + oldseqval->seq_U = os_atomic_load(&seqaddr->atomic_seq_U, relaxed); + } +#endif + break; + case RWLOCK_SEQ_U: + r = os_atomic_cmpxchgv(&seqaddr->atomic_seq_U, oldseqval->seq_U, + newseqval->seq_U, &oldseqval->seq_U, relaxed); + if (r) updated_seqfields = RWLOCK_SEQ_U; + break; +#endif // unused + case RWLOCK_SEQ_LS: + r = os_atomic_cmpxchgv(&seqaddr->atomic_seq_LS, oldseqval->seq_LS, + newseqval->seq_LS, &oldseqval->seq_LS, relaxed); + if (r) updated_seqfields = RWLOCK_SEQ_LS; + break; + default: + __builtin_trap(); + } + return updated_seqfields; +} -__private_extern__ int __pthread_rwlock_init(_pthread_rwlock *rwlock, const pthread_rwlockattr_t *attr); -__private_extern__ void _pthread_rwlock_updateval(_pthread_rwlock *rwlock, uint32_t updateval); +OS_ALWAYS_INLINE +static inline rwlock_seqfields +rwlock_seq_atomic_cmpxchgv_acquire(rwlock_seq *seqaddr, rwlock_seq *oldseqval, + rwlock_seq *newseqval, const rwlock_seqfields seqfields) +{ + bool r; + rwlock_seqfields updated_seqfields = RWLOCK_SEQ_NONE; + switch (seqfields) { +#if DEBUG // unused + case RWLOCK_SEQ_LSU: +#if RWLOCK_USE_INT128 + r = os_atomic_cmpxchgv(&seqaddr->atomic_seq_LSU, oldseqval->seq_LSU, + newseqval->seq_LSU, &oldseqval->seq_LSU, acquire); + if (r) updated_seqfields = RWLOCK_SEQ_LSU; +#else + r = os_atomic_cmpxchgv(&seqaddr->atomic_seq_LS, oldseqval->seq_LS, + newseqval->seq_LS, &oldseqval->seq_LS, acquire); + if (r) { + r = os_atomic_cmpxchgv(&seqaddr->atomic_seq_U, oldseqval->seq_U, + newseqval->seq_U, &oldseqval->seq_U, relaxed); + if (!r) oldseqval->seq_LS = newseqval->seq_LS; + updated_seqfields = r ? RWLOCK_SEQ_LSU : RWLOCK_SEQ_LS; + } else { + oldseqval->seq_U = os_atomic_load(&seqaddr->atomic_seq_U, relaxed); + } +#endif + break; + case RWLOCK_SEQ_U: + r = os_atomic_cmpxchgv(&seqaddr->atomic_seq_U, oldseqval->seq_U, + newseqval->seq_U, &oldseqval->seq_U, acquire); + if (r) updated_seqfields = RWLOCK_SEQ_U; + break; +#endif // unused + case RWLOCK_SEQ_LS: + r = os_atomic_cmpxchgv(&seqaddr->atomic_seq_LS, oldseqval->seq_LS, + newseqval->seq_LS, &oldseqval->seq_LS, acquire); + if (r) updated_seqfields = RWLOCK_SEQ_LS; + break; + default: + __builtin_trap(); + } + return updated_seqfields; +} -static void -RWLOCK_GETSEQ_ADDR(_pthread_rwlock *rwlock, - volatile uint32_t **lcntaddr, - volatile uint32_t **ucntaddr, - volatile uint32_t **seqaddr) +OS_ALWAYS_INLINE +static inline rwlock_seqfields +rwlock_seq_atomic_cmpxchgv_release(rwlock_seq *seqaddr, rwlock_seq *oldseqval, + rwlock_seq *newseqval, const rwlock_seqfields seqfields) { - if (rwlock->pshared == PTHREAD_PROCESS_SHARED) { - if (rwlock->misalign) { - *lcntaddr = &rwlock->rw_seq[1]; - *seqaddr = &rwlock->rw_seq[2]; - *ucntaddr = &rwlock->rw_seq[3]; + bool r; + rwlock_seqfields updated_seqfields = RWLOCK_SEQ_NONE; + switch (seqfields) { + case RWLOCK_SEQ_LSU: +#if RWLOCK_USE_INT128 + r = os_atomic_cmpxchgv(&seqaddr->atomic_seq_LSU, oldseqval->seq_LSU, + newseqval->seq_LSU, &oldseqval->seq_LSU, release); + if (r) updated_seqfields = RWLOCK_SEQ_LSU; +#else + r = os_atomic_cmpxchgv(&seqaddr->atomic_seq_U, oldseqval->seq_U, + newseqval->seq_U, &oldseqval->seq_U, release); + if (r) { + r = os_atomic_cmpxchgv(&seqaddr->atomic_seq_LS, oldseqval->seq_LS, + newseqval->seq_LS, &oldseqval->seq_LS, relaxed); + if (!r) oldseqval->seq_U = newseqval->seq_U; + updated_seqfields = r ? RWLOCK_SEQ_LSU : RWLOCK_SEQ_U; } else { - *lcntaddr = &rwlock->rw_seq[0]; - *seqaddr = &rwlock->rw_seq[1]; - *ucntaddr = &rwlock->rw_seq[2]; + oldseqval->seq_LS = os_atomic_load_wide(&seqaddr->atomic_seq_LS, + relaxed); } - } else { - *lcntaddr = rwlock->rw_lcntaddr; - *seqaddr = rwlock->rw_seqaddr; - *ucntaddr = rwlock->rw_ucntaddr; +#endif + break; + case RWLOCK_SEQ_LS: + r = os_atomic_cmpxchgv(&seqaddr->atomic_seq_LS, oldseqval->seq_LS, + newseqval->seq_LS, &oldseqval->seq_LS, release); + if (r) updated_seqfields = RWLOCK_SEQ_LS; + break; +#if DEBUG // unused + case RWLOCK_SEQ_U: + r = os_atomic_cmpxchgv(&seqaddr->atomic_seq_U, oldseqval->seq_U, + newseqval->seq_U, &oldseqval->seq_U, release); + if (r) updated_seqfields = RWLOCK_SEQ_U; + break; +#endif // unused + default: + __builtin_trap(); } + return updated_seqfields; } +#define rwlock_seq_atomic_cmpxchgv(seqaddr, oldseqval, newseqval, seqfields, m)\ + rwlock_seq_atomic_cmpxchgv_##m(seqaddr, oldseqval, newseqval, seqfields) + #ifndef BUILDING_VARIANT /* [ */ -static uint32_t modbits(uint32_t lgenval, uint32_t updateval, uint32_t savebits); int pthread_rwlockattr_init(pthread_rwlockattr_t *attr) @@ -119,7 +362,7 @@ pthread_rwlockattr_init(pthread_rwlockattr_t *attr) return 0; } -int +int pthread_rwlockattr_destroy(pthread_rwlockattr_t *attr) { attr->sig = _PTHREAD_NO_SIG; @@ -143,11 +386,8 @@ pthread_rwlockattr_setpshared(pthread_rwlockattr_t * attr, int pshared) { int res = EINVAL; if (attr->sig == _PTHREAD_RWLOCK_ATTR_SIG) { -#if __DARWIN_UNIX03 - if (( pshared == PTHREAD_PROCESS_PRIVATE) || (pshared == PTHREAD_PROCESS_SHARED)) -#else /* __DARWIN_UNIX03 */ - if ( pshared == PTHREAD_PROCESS_PRIVATE) -#endif /* __DARWIN_UNIX03 */ + if (( pshared == PTHREAD_PROCESS_PRIVATE) || + (pshared == PTHREAD_PROCESS_SHARED)) { attr->pshared = pshared ; res = 0; @@ -156,16 +396,34 @@ pthread_rwlockattr_setpshared(pthread_rwlockattr_t * attr, int pshared) return res; } -__private_extern__ int -__pthread_rwlock_init(_pthread_rwlock *rwlock, const pthread_rwlockattr_t *attr) +#endif /* !BUILDING_VARIANT ] */ + +OS_ALWAYS_INLINE +static inline int +_pthread_rwlock_init(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr) { - // Force RWLOCK_GETSEQ_ADDR to calculate addresses by setting pshared. - rwlock->pshared = PTHREAD_PROCESS_SHARED; - rwlock->misalign = (((uintptr_t)&rwlock->rw_seq[0]) & 0x7) != 0; - RWLOCK_GETSEQ_ADDR(rwlock, &rwlock->rw_lcntaddr, &rwlock->rw_ucntaddr, &rwlock->rw_seqaddr); - *rwlock->rw_lcntaddr = PTHRW_RWLOCK_INIT; - *rwlock->rw_seqaddr = PTHRW_RWS_INIT; - *rwlock->rw_ucntaddr = 0; + uint64_t *tidaddr; + RWLOCK_GETTID_ADDR(rwlock, &tidaddr); + + rwlock_seq *seqaddr; + RWLOCK_GETSEQ_ADDR(rwlock, &seqaddr); + +#if PTHREAD_RWLOCK_INIT_UNUSED + if ((uint32_t*)tidaddr != rwlock->rw_tid) { + rwlock->misalign = 1; + __builtin_memset(rwlock->rw_tid, 0xff, sizeof(rwlock->rw_tid)); + } + if ((uint32_t*)seqaddr != rwlock->rw_seq) { + __builtin_memset(rwlock->rw_seq, 0xff, sizeof(rwlock->rw_seq)); + } + __builtin_memset(rwlock->rw_mis, 0xff, sizeof(rwlock->rw_mis)); +#endif // PTHREAD_MUTEX_INIT_UNUSED + *tidaddr = 0; + *seqaddr = (rwlock_seq){ + .lcntval = PTHRW_RWLOCK_INIT, + .rw_seq = PTHRW_RWS_INIT, + .ucntval = 0, + }; if (attr != NULL && attr->pshared == PTHREAD_PROCESS_SHARED) { rwlock->pshared = PTHREAD_PROCESS_SHARED; @@ -174,33 +432,56 @@ __pthread_rwlock_init(_pthread_rwlock *rwlock, const pthread_rwlockattr_t *attr) rwlock->pshared = _PTHREAD_DEFAULT_PSHARED; rwlock->rw_flags = PTHRW_KERN_PROCESS_PRIVATE; } - - rwlock->rw_owner = NULL; + + long sig = _PTHREAD_RWLOCK_SIG; + +#if DEBUG bzero(rwlock->_reserved, sizeof(rwlock->_reserved)); +#endif +#if PTHREAD_RWLOCK_INIT_UNUSED + // For detecting copied rwlocks and smashes during debugging + uint32_t sig32 = (uint32_t)sig; + uintptr_t guard = ~(uintptr_t)rwlock; // use ~ to hide from leaks + __builtin_memcpy(rwlock->_reserved, &guard, sizeof(guard)); +#define countof(x) (sizeof(x) / sizeof(x[0])) + rwlock->_reserved[countof(rwlock->_reserved) - 1] = sig32; +#if defined(__LP64__) + rwlock->_pad = sig32; +#endif +#endif // PTHREAD_RWLOCK_INIT_UNUSED // Ensure all contents are properly set before setting signature. - OSMemoryBarrier(); - rwlock->sig = _PTHREAD_RWLOCK_SIG; - +#if defined(__LP64__) + // For binary compatibility reasons we cannot require natural alignment of + // the 64bit 'sig' long value in the struct. rdar://problem/21610439 + uint32_t *sig32_ptr = (uint32_t*)&rwlock->sig; + uint32_t *sig32_val = (uint32_t*)&sig; + *(sig32_ptr + 1) = *(sig32_val + 1); + os_atomic_store(sig32_ptr, *sig32_val, release); +#else + os_atomic_store(&rwlock->sig, sig, release); +#endif + return 0; } static uint32_t -modbits(uint32_t lgenval, uint32_t updateval, uint32_t savebits) +_pthread_rwlock_modbits(uint32_t lgenval, uint32_t updateval, uint32_t savebits) { uint32_t lval = lgenval & PTHRW_BIT_MASK; uint32_t uval = updateval & PTHRW_BIT_MASK; uint32_t rval, nlval; nlval = (lval | uval) & ~(PTH_RWL_MBIT); - - /* reconcile bits on the lock with what kernel needs to set */ + + // reconcile bits on the lock with what kernel needs to set if ((uval & PTH_RWL_KBIT) == 0 && (lval & PTH_RWL_WBIT) == 0) { nlval &= ~PTH_RWL_KBIT; } if (savebits != 0) { - if ((savebits & PTH_RWS_WSVBIT) != 0 && (nlval & PTH_RWL_WBIT) == 0 && (nlval & PTH_RWL_EBIT) == 0) { + if ((savebits & PTH_RWS_WSVBIT) != 0 && (nlval & PTH_RWL_WBIT) == 0 && + (nlval & PTH_RWL_EBIT) == 0) { nlval |= (PTH_RWL_WBIT | PTH_RWL_KBIT); } } @@ -208,436 +489,568 @@ modbits(uint32_t lgenval, uint32_t updateval, uint32_t savebits) return(rval); } -__private_extern__ void -_pthread_rwlock_updateval(_pthread_rwlock *rwlock, uint32_t updateval) +OS_ALWAYS_INLINE +static inline void +_pthread_rwlock_updateval(pthread_rwlock_t *rwlock, uint32_t updateval) { bool isoverlap = (updateval & PTH_RWL_MBIT) != 0; - uint64_t oldval64, newval64; - volatile uint32_t *lcntaddr, *ucntaddr, *seqaddr; - - /* TBD: restore U bit */ - RWLOCK_GETSEQ_ADDR(rwlock, &lcntaddr, &ucntaddr, &seqaddr); + // TBD: restore U bit + rwlock_seq *seqaddr; + RWLOCK_GETSEQ_ADDR(rwlock, &seqaddr); + rwlock_seq oldseq, newseq; + rwlock_seq_load(seqaddr, &oldseq, RWLOCK_SEQ_LS); do { - uint32_t lcntval = *lcntaddr; - uint32_t rw_seq = *seqaddr; - - uint32_t newval, newsval; - if (isoverlap || is_rws_setunlockinit(rw_seq) != 0) { + newseq = oldseq; + if (isoverlap || is_rws_unlockinit_set(oldseq.rw_seq)) { // Set S word to the specified value - uint32_t savebits = (rw_seq & PTHRW_RWS_SAVEMASK); - newval = modbits(lcntval, updateval, savebits); - newsval = rw_seq + (updateval & PTHRW_COUNT_MASK); + uint32_t savebits = (oldseq.rw_seq & PTHRW_RWS_SAVEMASK); + newseq.lcntval = _pthread_rwlock_modbits(oldseq.lcntval, updateval, + savebits); + newseq.rw_seq += (updateval & PTHRW_COUNT_MASK); if (!isoverlap) { - newsval &= PTHRW_COUNT_MASK; + newseq.rw_seq &= PTHRW_COUNT_MASK; } - newsval &= ~PTHRW_RWS_SAVEMASK; - } else { - newval = lcntval; - newsval = rw_seq; + newseq.rw_seq &= ~PTHRW_RWS_SAVEMASK; } - - oldval64 = (((uint64_t)rw_seq) << 32); - oldval64 |= lcntval; - newval64 = (((uint64_t)newsval) << 32); - newval64 |= newval; - } while (OSAtomicCompareAndSwap64Barrier(oldval64, newval64, (volatile int64_t *)lcntaddr) != TRUE); + } while (!rwlock_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, + RWLOCK_SEQ_LS, relaxed)); + RWLOCK_DEBUG_SEQ(update, rwlock, oldseq, newseq, updateval, RWLOCK_SEQ_LS); } -#endif /* !BUILDING_VARIANT ] */ - -static int -_pthread_rwlock_check_busy(_pthread_rwlock *rwlock) +OS_ALWAYS_INLINE +static inline int +_pthread_rwlock_check_busy(pthread_rwlock_t *rwlock) { int res = 0; - - volatile uint32_t *lcntaddr, *ucntaddr, *seqaddr; - - RWLOCK_GETSEQ_ADDR(rwlock, &lcntaddr, &ucntaddr, &seqaddr); - - uint32_t rw_lcnt = *lcntaddr; - uint32_t rw_ucnt = *ucntaddr; - - if ((rw_lcnt & PTHRW_COUNT_MASK) != rw_ucnt) { + + rwlock_seq *seqaddr; + RWLOCK_GETSEQ_ADDR(rwlock, &seqaddr); + + rwlock_seq seq; + rwlock_seq_atomic_load(seqaddr, &seq, RWLOCK_SEQ_LSU, relaxed); + if ((seq.lcntval & PTHRW_COUNT_MASK) != seq.ucntval) { res = EBUSY; } - + return res; } +PTHREAD_NOEXPORT_VARIANT int -pthread_rwlock_destroy(pthread_rwlock_t *orwlock) +pthread_rwlock_destroy(pthread_rwlock_t *rwlock) { int res = 0; - _pthread_rwlock *rwlock = (_pthread_rwlock *)orwlock; - if (rwlock->sig == _PTHREAD_RWLOCK_SIG) { -#if __DARWIN_UNIX03 + _pthread_lock_lock(&rwlock->lock); + if (_pthread_rwlock_check_signature(rwlock)) { res = _pthread_rwlock_check_busy(rwlock); -#endif /* __DARWIN_UNIX03 */ - } else if (rwlock->sig != _PTHREAD_RWLOCK_SIG_init) { + } else if (!_pthread_rwlock_check_signature_init(rwlock)) { res = EINVAL; } if (res == 0) { rwlock->sig = _PTHREAD_NO_SIG; } + _pthread_lock_unlock(&rwlock->lock); return res; } - +PTHREAD_NOEXPORT_VARIANT int -pthread_rwlock_init(pthread_rwlock_t *orwlock, const pthread_rwlockattr_t *attr) +pthread_rwlock_init(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr) { int res = 0; - _pthread_rwlock *rwlock = (_pthread_rwlock *)orwlock; - -#if __DARWIN_UNIX03 + if (attr && attr->sig != _PTHREAD_RWLOCK_ATTR_SIG) { res = EINVAL; } - if (res == 0 && rwlock->sig == _PTHREAD_RWLOCK_SIG) { + if (res == 0 && _pthread_rwlock_check_signature(rwlock)) { res = _pthread_rwlock_check_busy(rwlock); } -#endif if (res == 0) { - LOCK_INIT(rwlock->lock); - res = __pthread_rwlock_init(rwlock, attr); + _pthread_lock_init(&rwlock->lock); + res = _pthread_rwlock_init(rwlock, attr); } return res; } -PTHREAD_NOINLINE +OS_NOINLINE static int -_pthread_rwlock_check_init_slow(pthread_rwlock_t *orwlock) +_pthread_rwlock_check_init_slow(pthread_rwlock_t *rwlock) { int res = EINVAL; - _pthread_rwlock *rwlock = (_pthread_rwlock *)orwlock; - if (rwlock->sig == _PTHREAD_RWLOCK_SIG_init) { - LOCK(rwlock->lock); - if (rwlock->sig == _PTHREAD_RWLOCK_SIG_init) { - res = __pthread_rwlock_init(rwlock, NULL); - } else if (rwlock->sig == _PTHREAD_RWLOCK_SIG){ + if (_pthread_rwlock_check_signature_init(rwlock)) { + _pthread_lock_lock(&rwlock->lock); + if (_pthread_rwlock_check_signature_init(rwlock)) { + res = _pthread_rwlock_init(rwlock, NULL); + } else if (_pthread_rwlock_check_signature(rwlock)){ res = 0; } - UNLOCK(rwlock->lock); - } else if (rwlock->sig == _PTHREAD_RWLOCK_SIG){ + _pthread_lock_unlock(&rwlock->lock); + } else if (_pthread_rwlock_check_signature(rwlock)){ res = 0; } if (res != 0) { - PLOCKSTAT_RW_ERROR(orwlock, READ_LOCK_PLOCKSTAT, res); + PLOCKSTAT_RW_ERROR(rwlock, READ_LOCK_PLOCKSTAT, res); } return res; } -PTHREAD_ALWAYS_INLINE -static int -_pthread_rwlock_check_init(pthread_rwlock_t *orwlock) +OS_ALWAYS_INLINE +static inline int +_pthread_rwlock_check_init(pthread_rwlock_t *rwlock) { int res = 0; - _pthread_rwlock *rwlock = (_pthread_rwlock *)orwlock; - if (rwlock->sig != _PTHREAD_RWLOCK_SIG) { - return _pthread_rwlock_check_init_slow(orwlock); + if (!_pthread_rwlock_check_signature(rwlock)) { + return _pthread_rwlock_check_init_slow(rwlock); } return res; } +OS_NOINLINE static int -_pthread_rwlock_lock(pthread_rwlock_t *orwlock, bool readlock, bool trylock) +_pthread_rwlock_lock_wait(pthread_rwlock_t *rwlock, bool readlock, + rwlock_seq newseq) { int res; - _pthread_rwlock *rwlock = (_pthread_rwlock *)orwlock; - res = _pthread_rwlock_check_init(orwlock); - if (res != 0) { - return res; - } +#ifdef PLOCKSTAT + int plockstat = readlock ? READ_LOCK_PLOCKSTAT : WRITE_LOCK_PLOCKSTAT; +#endif - uint64_t oldval64, newval64; - volatile uint32_t *lcntaddr, *ucntaddr, *seqaddr; - RWLOCK_GETSEQ_ADDR(rwlock, &lcntaddr, &ucntaddr, &seqaddr); + if (readlock) { + RWLOCK_DEBUG_SEQ(rdlock, rwlock, oldseq, newseq, gotlock, + RWLOCK_SEQ_LSU); + } else { + RWLOCK_DEBUG_SEQ(wrlock, rwlock, oldseq, newseq, gotlock, + RWLOCK_SEQ_LSU); + } - uint32_t newval, newsval; - uint32_t lcntval, ucntval, rw_seq; + uint32_t updateval; - bool gotlock; - bool retry; - int retry_count = 0; + PLOCKSTAT_RW_BLOCK(rwlock, plockstat); do { - res = 0; - retry = false; - - lcntval = *lcntaddr; - ucntval = *ucntaddr; - rw_seq = *seqaddr; - -#if __DARWIN_UNIX03 - if (is_rwl_ebit_set(lcntval)) { - if (rwlock->rw_owner == pthread_self()) { - res = EDEADLK; - break; - } + if (readlock) { + updateval = __psynch_rw_rdlock(rwlock, newseq.lcntval, + newseq.ucntval, newseq.rw_seq, rwlock->rw_flags); + } else { + updateval = __psynch_rw_wrlock(rwlock, newseq.lcntval, + newseq.ucntval, newseq.rw_seq, rwlock->rw_flags); } -#endif /* __DARWIN_UNIX03 */ + if (updateval == (uint32_t)-1) { + res = errno; + } else { + res = 0; + } + } while (res == EINTR); + + if (res == 0) { + _pthread_rwlock_updateval(rwlock, updateval); + PLOCKSTAT_RW_BLOCKED(rwlock, plockstat, BLOCK_SUCCESS_PLOCKSTAT); + } else { + PLOCKSTAT_RW_BLOCKED(rwlock, plockstat, BLOCK_FAIL_PLOCKSTAT); + PTHREAD_INTERNAL_CRASH(res, "kernel rwlock returned unknown error"); + } + + return res; +} + +OS_NOINLINE +int +_pthread_rwlock_lock_slow(pthread_rwlock_t *rwlock, bool readlock, + bool trylock) +{ + int res; + +#ifdef PLOCKSTAT + int plockstat = readlock ? READ_LOCK_PLOCKSTAT : WRITE_LOCK_PLOCKSTAT; +#endif + + res = _pthread_rwlock_check_init(rwlock); + if (res != 0) return res; - oldval64 = (((uint64_t)rw_seq) << 32); - oldval64 |= lcntval; + rwlock_seq *seqaddr; + RWLOCK_GETSEQ_ADDR(rwlock, &seqaddr); - /* if l bit is on or u and k bit is clear, acquire lock in userland */ + rwlock_seq oldseq, newseq; + rwlock_seq_atomic_load(seqaddr, &oldseq, RWLOCK_SEQ_LSU, relaxed); + + uint64_t *tidaddr; + RWLOCK_GETTID_ADDR(rwlock, &tidaddr); + uint64_t selfid = _pthread_threadid_self_np_direct(); + if (is_rwl_ebit_set(oldseq.lcntval)) { + if (os_atomic_load_wide(tidaddr, relaxed) == selfid) return EDEADLK; + } + + int retry_count; + bool gotlock; + do { + retry_count = 0; +retry: + newseq = oldseq; + + // if W and K bit are clear or U bit is on, acquire lock in userland if (readlock) { - gotlock = can_rwl_readinuser(lcntval); + gotlock = (oldseq.lcntval & (PTH_RWL_WBIT | PTH_RWL_KBIT)) == 0; } else { - gotlock = (lcntval & PTH_RWL_RBIT) != 0; + gotlock = (oldseq.lcntval & PTH_RWL_UBIT) != 0; } - uint32_t bits = 0; - uint32_t mask = ~0ul; - - newval = lcntval + PTHRW_INC; - - if (gotlock) { + if (trylock && !gotlock) { + // A trylock on a held lock will fail immediately. But since + // we did not load the sequence words atomically, perform a + // no-op CAS to ensure that nobody has unlocked concurrently. + } else if (gotlock) { if (readlock) { - if (diff_genseq(lcntval, ucntval) >= PTHRW_MAX_READERS) { - /* since ucntval may be newer, just redo */ + if (diff_genseq(oldseq.lcntval, oldseq.ucntval) >= + PTHRW_MAX_READERS) { + // since ucntval may be newer, just redo retry_count++; if (retry_count > 1024) { + gotlock = false; res = EAGAIN; - break; + goto out; } else { sched_yield(); - retry = true; - continue; + rwlock_seq_atomic_load(seqaddr, &oldseq, + RWLOCK_SEQ_LSU, relaxed); + goto retry; } } - - // Need to update L (remove R bit) and S word - mask = PTH_RWLOCK_RESET_RBIT; + // Need to update L (remove U bit) and S word + newseq.lcntval &= ~PTH_RWL_UBIT; } else { - mask = PTHRW_COUNT_MASK; - bits = PTH_RWL_IBIT | PTH_RWL_KBIT | PTH_RWL_EBIT; + newseq.lcntval &= PTHRW_COUNT_MASK; + newseq.lcntval |= PTH_RWL_IBIT | PTH_RWL_KBIT | PTH_RWL_EBIT; } - newsval = rw_seq + PTHRW_INC; - } else if (trylock) { - res = EBUSY; - break; + newseq.lcntval += PTHRW_INC; + newseq.rw_seq += PTHRW_INC; } else { if (readlock) { - // Need to block in kernel. Remove R bit. - mask = PTH_RWLOCK_RESET_RBIT; + // Need to block in kernel. Remove U bit. + newseq.lcntval &= ~PTH_RWL_UBIT; } else { - bits = PTH_RWL_KBIT | PTH_RWL_WBIT; + newseq.lcntval |= PTH_RWL_KBIT | PTH_RWL_WBIT; } - newsval = rw_seq; - if (is_rws_setseq(rw_seq)) { - newsval &= PTHRW_SW_Reset_BIT_MASK; - newsval |= (newval & PTHRW_COUNT_MASK); + newseq.lcntval += PTHRW_INC; + if (is_rws_sbit_set(oldseq.rw_seq)) { + // Clear the S bit and set S to L + newseq.rw_seq &= (PTHRW_BIT_MASK & ~PTH_RWS_SBIT); + newseq.rw_seq |= (oldseq.lcntval & PTHRW_COUNT_MASK); } } - newval = (newval & mask) | bits; - newval64 = (((uint64_t)newsval) << 32); - newval64 |= newval; + } while (!rwlock_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, + RWLOCK_SEQ_LS, acquire)); - } while (retry || OSAtomicCompareAndSwap64Barrier(oldval64, newval64, (volatile int64_t *)lcntaddr) != TRUE); + if (gotlock) { + if (!readlock) os_atomic_store_wide(tidaddr, selfid, relaxed); + res = 0; + } else if (trylock) { + res = EBUSY; + } else { + res = _pthread_rwlock_lock_wait(rwlock, readlock, newseq); + } +out: #ifdef PLOCKSTAT - int plockstat = readlock ? READ_LOCK_PLOCKSTAT : WRITE_LOCK_PLOCKSTAT; + if (res == 0) { + PLOCKSTAT_RW_ACQUIRE(rwlock, plockstat); + } else { + PLOCKSTAT_RW_ERROR(rwlock, plockstat, res); + } #endif - // Unable to acquire in userland, transition to kernel. - if (res == 0 && !gotlock) { - uint32_t updateval; + return res; +} + +OS_ALWAYS_INLINE +static inline int +_pthread_rwlock_lock(pthread_rwlock_t *rwlock, bool readlock, bool trylock) +{ +#if PLOCKSTAT + if (PLOCKSTAT_RW_ACQUIRE_ENABLED() || PLOCKSTAT_RW_ERROR_ENABLED()) { + return _pthread_rwlock_lock_slow(rwlock, readlock, trylock); + } +#endif + + if (os_unlikely(!_pthread_rwlock_check_signature(rwlock))) { + return _pthread_rwlock_lock_slow(rwlock, readlock, trylock); + } + + rwlock_seq *seqaddr; + RWLOCK_GETSEQ_ADDR(rwlock, &seqaddr); + + rwlock_seq oldseq, newseq; + // no need to perform a single-copy-atomic 128-bit load in the fastpath, + // if stores to L and U are seen out of order, we will fallback to the + // slowpath below (which has rwlock_seq_atomic_load) + rwlock_seq_load(seqaddr, &oldseq, RWLOCK_SEQ_LSU); + + if (os_unlikely(is_rwl_ebit_set(oldseq.lcntval))) { + return _pthread_rwlock_lock_slow(rwlock, readlock, trylock); + } + + bool gotlock; + do { + newseq = oldseq; - PLOCKSTAT_RW_BLOCK(orwlock, plockstat); - - do { + // if W and K bit are clear or U bit is on, acquire lock in userland + if (readlock) { + gotlock = (oldseq.lcntval & (PTH_RWL_WBIT | PTH_RWL_KBIT)) == 0; + } else { + gotlock = (oldseq.lcntval & PTH_RWL_UBIT) != 0; + } + + if (trylock && !gotlock) { + // A trylock on a held lock will fail immediately. But since + // we did not load the sequence words atomically, perform a + // no-op CAS to ensure that nobody has unlocked concurrently. + } else if (os_likely(gotlock)) { if (readlock) { - updateval = __psynch_rw_rdlock(orwlock, newval, ucntval, newsval, rwlock->rw_flags); - } else { - updateval = __psynch_rw_wrlock(orwlock, newval, ucntval, newsval, rwlock->rw_flags); - } - if (updateval == (uint32_t)-1) { - res = errno; + if (os_unlikely(diff_genseq(oldseq.lcntval, oldseq.ucntval) >= + PTHRW_MAX_READERS)) { + return _pthread_rwlock_lock_slow(rwlock, readlock, trylock); + } + // Need to update L (remove U bit) and S word + newseq.lcntval &= ~PTH_RWL_UBIT; } else { - res = 0; + newseq.lcntval &= PTHRW_COUNT_MASK; + newseq.lcntval |= PTH_RWL_IBIT | PTH_RWL_KBIT | PTH_RWL_EBIT; } - } while (res == EINTR); - - if (res == 0) { - _pthread_rwlock_updateval(rwlock, updateval); - PLOCKSTAT_RW_BLOCKED(orwlock, plockstat, BLOCK_SUCCESS_PLOCKSTAT); + newseq.lcntval += PTHRW_INC; + newseq.rw_seq += PTHRW_INC; } else { - PLOCKSTAT_RW_BLOCKED(orwlock, plockstat, BLOCK_FAIL_PLOCKSTAT); - uint64_t myid; - (void)pthread_threadid_np(pthread_self(), &myid); - PTHREAD_ABORT("kernel lock returned unknown error %x with tid %x\n", updateval, (uint32_t)myid); + return _pthread_rwlock_lock_slow(rwlock, readlock, trylock); } - } - - if (res == 0) { -#if __DARWIN_UNIX03 + } while (os_unlikely(!rwlock_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, + RWLOCK_SEQ_LS, acquire))); + + if (os_likely(gotlock)) { if (!readlock) { - rwlock->rw_owner = pthread_self(); + uint64_t *tidaddr; + RWLOCK_GETTID_ADDR(rwlock, &tidaddr); + uint64_t selfid = _pthread_threadid_self_np_direct(); + os_atomic_store_wide(tidaddr, selfid, relaxed); } -#endif /* __DARWIN_UNIX03 */ - PLOCKSTAT_RW_ACQUIRE(orwlock, plockstat); + return 0; + } else if (trylock) { + return EBUSY; } else { - PLOCKSTAT_RW_ERROR(orwlock, plockstat, res); + __builtin_trap(); } - - return res; } +PTHREAD_NOEXPORT_VARIANT int -pthread_rwlock_rdlock(pthread_rwlock_t *orwlock) +pthread_rwlock_rdlock(pthread_rwlock_t *rwlock) { // read lock, no try - return _pthread_rwlock_lock(orwlock, true, false); + return _pthread_rwlock_lock(rwlock, true, false); } +PTHREAD_NOEXPORT_VARIANT int -pthread_rwlock_tryrdlock(pthread_rwlock_t *orwlock) +pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock) { // read lock, try lock - return _pthread_rwlock_lock(orwlock, true, true); + return _pthread_rwlock_lock(rwlock, true, true); } +PTHREAD_NOEXPORT_VARIANT int -pthread_rwlock_wrlock(pthread_rwlock_t *orwlock) +pthread_rwlock_wrlock(pthread_rwlock_t *rwlock) { // write lock, no try - return _pthread_rwlock_lock(orwlock, false, false); + return _pthread_rwlock_lock(rwlock, false, false); } +PTHREAD_NOEXPORT_VARIANT int -pthread_rwlock_trywrlock(pthread_rwlock_t *orwlock) +pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock) { // write lock, try lock - return _pthread_rwlock_lock(orwlock, false, true); + return _pthread_rwlock_lock(rwlock, false, true); } +OS_NOINLINE +static int +_pthread_rwlock_unlock_drop(pthread_rwlock_t *rwlock, rwlock_seq oldseq, + rwlock_seq newseq) +{ + int res; + + RWLOCK_DEBUG_SEQ(unlock, rwlock, oldseq, newseq, !droplock, RWLOCK_SEQ_LSU); + uint32_t updateval; + do { + updateval = __psynch_rw_unlock(rwlock, oldseq.lcntval, + newseq.ucntval, newseq.rw_seq, rwlock->rw_flags); + if (updateval == (uint32_t)-1) { + res = errno; + } else { + res = 0; + RWLOCK_DEBUG_SEQ(wakeup, rwlock, oldseq, newseq, updateval, + RWLOCK_SEQ_LSU); + } + } while (res == EINTR); + + if (res != 0) { + PTHREAD_INTERNAL_CRASH(res, "kernel rwunlock returned unknown error"); + } + + return res; +} + +OS_NOINLINE int -pthread_rwlock_unlock(pthread_rwlock_t *orwlock) +_pthread_rwlock_unlock_slow(pthread_rwlock_t *rwlock, + rwlock_seqfields updated_seqfields) { int res; - _pthread_rwlock *rwlock = (_pthread_rwlock *)orwlock; + rwlock_seqfields seqfields = RWLOCK_SEQ_LSU; #ifdef PLOCKSTAT int wrlock = 0; #endif - res = _pthread_rwlock_check_init(orwlock); - if (res != 0) { - return res; + res = _pthread_rwlock_check_init(rwlock); + if (res != 0) return res; + + rwlock_seq *seqaddr; + RWLOCK_GETSEQ_ADDR(rwlock, &seqaddr); + + rwlock_seq oldseq, newseq; + rwlock_seq_load(seqaddr, &oldseq, seqfields); + + if ((oldseq.lcntval & PTH_RWL_UBIT) != 0) { + // spurious unlock (unlock of unlocked lock) + return 0; } - uint64_t oldval64 = 0, newval64 = 0; - volatile uint32_t *lcntaddr, *ucntaddr, *seqaddr; - RWLOCK_GETSEQ_ADDR(rwlock, &lcntaddr, &ucntaddr, &seqaddr); + if (is_rwl_ebit_set(oldseq.lcntval)) { +#ifdef PLOCKSTAT + wrlock = 1; +#endif + uint64_t *tidaddr; + RWLOCK_GETTID_ADDR(rwlock, &tidaddr); + os_atomic_store_wide(tidaddr, 0, relaxed); + } bool droplock; - bool reload; - bool incr_ucnt = true; - bool check_spurious = true; - uint32_t lcntval, ucntval, rw_seq, ulval = 0, newval, newsval; - do { - reload = false; - droplock = true; - - lcntval = *lcntaddr; - ucntval = *ucntaddr; - rw_seq = *seqaddr; + // stop loading & updating fields that have successfully been stored + seqfields &= ~updated_seqfields; - oldval64 = (((uint64_t)rw_seq) << 32); - oldval64 |= lcntval; + newseq = oldseq; + if (seqfields & RWLOCK_SEQ_U) { + newseq.ucntval += PTHRW_INC; + } - // check for spurious unlocks - if (check_spurious) { - if ((lcntval & PTH_RWL_RBIT) != 0) { - droplock = false; + droplock = false; + uint32_t oldlcnt = (oldseq.lcntval & PTHRW_COUNT_MASK); + if (newseq.ucntval == oldlcnt) { + // last unlock, set L with U and init bits and set S to L with S bit + newseq.lcntval = oldlcnt | PTHRW_RWLOCK_INIT; + newseq.rw_seq = oldlcnt | PTHRW_RWS_INIT; + } else { + // no L/S update if lock is not exclusive or no writer pending + if ((oldseq.lcntval & + (PTH_RWL_EBIT | PTH_RWL_WBIT | PTH_RWL_KBIT)) == 0) { + continue; + } - newval64 = oldval64; + // kernel transition only needed if U == S + if (newseq.ucntval != (oldseq.rw_seq & PTHRW_COUNT_MASK)) { continue; } - check_spurious = false; + + droplock = true; + // reset all bits and set K + newseq.lcntval = oldlcnt | PTH_RWL_KBIT; + // set I bit on S word + newseq.rw_seq |= PTH_RWS_IBIT; + if ((oldseq.lcntval & PTH_RWL_WBIT) != 0) { + newseq.rw_seq |= PTH_RWS_WSVBIT; + } } + } while (seqfields != (updated_seqfields = rwlock_seq_atomic_cmpxchgv( + seqaddr, &oldseq, &newseq, seqfields, release))); - if (is_rwl_ebit_set(lcntval)) { -#ifdef PLOCKSTAT - wrlock = 1; + if (droplock) { + res = _pthread_rwlock_unlock_drop(rwlock, oldseq, newseq); + } + + PLOCKSTAT_RW_RELEASE(rwlock, wrlock); + + return res; +} + +PTHREAD_NOEXPORT_VARIANT +int +pthread_rwlock_unlock(pthread_rwlock_t *rwlock) +{ + rwlock_seqfields seqfields = RWLOCK_SEQ_LSU; + rwlock_seqfields updated_seqfields = RWLOCK_SEQ_NONE; + +#if PLOCKSTAT + if (PLOCKSTAT_RW_RELEASE_ENABLED() || PLOCKSTAT_RW_ERROR_ENABLED()) { + return _pthread_rwlock_unlock_slow(rwlock, updated_seqfields); + } #endif -#if __DARWIN_UNIX03 - rwlock->rw_owner = NULL; -#endif /* __DARWIN_UNIX03 */ - } - // update U - if (incr_ucnt) { - ulval = (ucntval + PTHRW_INC); - incr_ucnt = (OSAtomicCompareAndSwap32Barrier(ucntval, ulval, (volatile int32_t *)ucntaddr) != TRUE); - newval64 = oldval64; - reload = true; - continue; - } + if (os_unlikely(!_pthread_rwlock_check_signature(rwlock))) { + return _pthread_rwlock_unlock_slow(rwlock, updated_seqfields); + } - // last unlock, note U is already updated ? - if ((lcntval & PTHRW_COUNT_MASK) == (ulval & PTHRW_COUNT_MASK)) { - /* Set L with R and init bits and set S to L */ - newval = (lcntval & PTHRW_COUNT_MASK)| PTHRW_RWLOCK_INIT; - newsval = (lcntval & PTHRW_COUNT_MASK)| PTHRW_RWS_INIT; + rwlock_seq *seqaddr; + RWLOCK_GETSEQ_ADDR(rwlock, &seqaddr); - droplock = false; - } else { - /* if it is not exclusive or no Writer/yield pending, skip */ - if ((lcntval & (PTH_RWL_EBIT | PTH_RWL_WBIT | PTH_RWL_KBIT)) == 0) { - droplock = false; - break; - } + rwlock_seq oldseq, newseq; + rwlock_seq_load(seqaddr, &oldseq, seqfields); - /* kernel transition needed? */ - /* U+1 == S? */ - if ((ulval + PTHRW_INC) != (rw_seq & PTHRW_COUNT_MASK)) { - droplock = false; - break; - } + if (os_unlikely(oldseq.lcntval & PTH_RWL_UBIT)) { + // spurious unlock (unlock of unlocked lock) + return 0; + } - /* reset all bits and set k */ - newval = (lcntval & PTHRW_COUNT_MASK) | PTH_RWL_KBIT; - /* set I bit on S word */ - newsval = rw_seq | PTH_RWS_IBIT; - if ((lcntval & PTH_RWL_WBIT) != 0) { - newsval |= PTH_RWS_WSVBIT; - } - } + if (is_rwl_ebit_set(oldseq.lcntval)) { + uint64_t *tidaddr; + RWLOCK_GETTID_ADDR(rwlock, &tidaddr); + os_atomic_store_wide(tidaddr, 0, relaxed); + } - newval64 = (((uint64_t)newsval) << 32); - newval64 |= newval; + do { + if (updated_seqfields) { + return _pthread_rwlock_unlock_slow(rwlock, updated_seqfields); + } - } while (OSAtomicCompareAndSwap64Barrier(oldval64, newval64, (volatile int64_t *)lcntaddr) != TRUE || reload); + newseq = oldseq; + if (seqfields & RWLOCK_SEQ_U) { + newseq.ucntval += PTHRW_INC; + } - if (droplock) { - uint32_t updateval; - do { - updateval = __psynch_rw_unlock(orwlock, lcntval, ulval, newsval, rwlock->rw_flags); - if (updateval == (uint32_t)-1) { - res = errno; + uint32_t oldlcnt = (oldseq.lcntval & PTHRW_COUNT_MASK); + if (os_likely(newseq.ucntval == oldlcnt)) { + // last unlock, set L with U and init bits and set S to L with S bit + newseq.lcntval = oldlcnt | PTHRW_RWLOCK_INIT; + newseq.rw_seq = oldlcnt | PTHRW_RWS_INIT; + } else { + if (os_likely((oldseq.lcntval & + (PTH_RWL_EBIT | PTH_RWL_WBIT | PTH_RWL_KBIT)) == 0 || + newseq.ucntval != (oldseq.rw_seq & PTHRW_COUNT_MASK))) { + // no L/S update if lock is not exclusive or no writer pending + // kernel transition only needed if U == S } else { - res = 0; + return _pthread_rwlock_unlock_slow(rwlock, updated_seqfields); } - } while (res == EINTR); - - if (res != 0) { - uint64_t myid = 0; - (void)pthread_threadid_np(pthread_self(), &myid); - PTHREAD_ABORT("rwunlock from kernel with unknown error %x: tid %x\n", res, (uint32_t)myid); } - } + } while (os_unlikely(seqfields != (updated_seqfields = + rwlock_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, seqfields, + release)))); - PLOCKSTAT_RW_RELEASE(orwlock, wrlock); - - return res; + return 0; }