X-Git-Url: https://git.saurik.com/apple/libc.git/blobdiff_plain/59e0d9fe772464b93d835d2a2964457702469a43..34e8f8296870d0e8695f90e1a54240a589d41312:/include/libkern/OSAtomic.h?ds=sidebyside diff --git a/include/libkern/OSAtomic.h b/include/libkern/OSAtomic.h index 4bd3c15..9f2dba4 100644 --- a/include/libkern/OSAtomic.h +++ b/include/libkern/OSAtomic.h @@ -1,10 +1,8 @@ /* - * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * - * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. - * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -38,52 +36,124 @@ * WARNING: all addresses passed to these functions must be "naturally aligned", ie * int32_t's must be 32-bit aligned (low 2 bits of address zero), and int64_t's * must be 64-bit aligned (low 3 bits of address zero.) + * + * Note that some versions of the atomic functions incorporate memory barriers, + * and some do not. Barriers strictly order memory access on a weakly-ordered + * architecture such as PPC. All loads and stores executed in sequential program + * order before the barrier will complete before any load or store executed after + * the barrier. On a uniprocessor, the barrier operation is typically a nop. + * On a multiprocessor, the barrier can be quite expensive on some platforms, + * eg PPC. + * + * Most code will want to use the barrier functions to insure that memory shared + * between threads is properly synchronized. For example, if you want to initialize + * a shared data structure and then atomically increment a variable to indicate + * that the initialization is complete, then you must use OSAtomicIncrement32Barrier() + * to ensure that the stores to your data structure complete before the atomic add. + * Likewise, the consumer of that data structure must use OSAtomicDecrement32Barrier(), + * in order to ensure that their loads of the structure are not executed before + * the atomic decrement. On the other hand, if you are simply incrementing a global + * counter, then it is safe and potentially faster to use OSAtomicIncrement32(). + * + * If you are unsure which version to use, prefer the barrier variants as they are + * safer. + * + * The spinlock and queue operations always incorporate a barrier. */ __BEGIN_DECLS -/* Arithmetic functions. They do not incorporate memory barriers and thus cannot - * be used by themselves to synchronize shared memory. They return the new value. - * The "or", "and", and "xor" operations are layered on top of compare-and-swap. + +/* Arithmetic functions. They return the new value. */ -int32_t OSAtomicAdd32( int32_t theAmount, int32_t *theValue ); -inline static -int32_t OSAtomicIncrement32( int32_t *theValue ) { return OSAtomicAdd32( 1, theValue); } -inline static -int32_t OSAtomicDecrement32( int32_t *theValue ) { return OSAtomicAdd32( -1, theValue); } -int32_t OSAtomicOr32( uint32_t theMask, uint32_t *theValue ); -int32_t OSAtomicAnd32( uint32_t theMask, uint32_t *theValue ); -int32_t OSAtomicXor32( uint32_t theMask, uint32_t *theValue ); -#if defined(__ppc64__) || defined(__i386__) -int64_t OSAtomicAdd64( int64_t theAmount, int64_t *theValue ); -inline static -int64_t OSAtomicIncrement64( int64_t *theValue ) { return OSAtomicAdd64( 1, theValue); } -inline static -int64_t OSAtomicDecrement64( int64_t *theValue ) { return OSAtomicAdd64( -1, theValue); } -#endif /* defined(__ppc64__) || defined(__i386__) */ - -/* Compare and swap. They do not incorporate memory barriers and thus cannot be used - * by themselved to synchronize shared memory. They return true if the swap occured. +int32_t OSAtomicAdd32( int32_t __theAmount, volatile int32_t *__theValue ); +int32_t OSAtomicAdd32Barrier( int32_t __theAmount, volatile int32_t *__theValue ); + +__inline static +int32_t OSAtomicIncrement32( volatile int32_t *__theValue ) + { return OSAtomicAdd32( 1, __theValue); } +__inline static +int32_t OSAtomicIncrement32Barrier( volatile int32_t *__theValue ) + { return OSAtomicAdd32Barrier( 1, __theValue); } + +__inline static +int32_t OSAtomicDecrement32( volatile int32_t *__theValue ) + { return OSAtomicAdd32( -1, __theValue); } +__inline static +int32_t OSAtomicDecrement32Barrier( volatile int32_t *__theValue ) + { return OSAtomicAdd32Barrier( -1, __theValue); } + +#if defined(__ppc64__) || defined(__i386__) || defined(__x86_64__) || defined(__arm__) + +int64_t OSAtomicAdd64( int64_t __theAmount, volatile int64_t *__theValue ); +int64_t OSAtomicAdd64Barrier( int64_t __theAmount, volatile int64_t *__theValue ); + +__inline static +int64_t OSAtomicIncrement64( volatile int64_t *__theValue ) + { return OSAtomicAdd64( 1, __theValue); } +__inline static +int64_t OSAtomicIncrement64Barrier( volatile int64_t *__theValue ) + { return OSAtomicAdd64Barrier( 1, __theValue); } + +__inline static +int64_t OSAtomicDecrement64( volatile int64_t *__theValue ) + { return OSAtomicAdd64( -1, __theValue); } +__inline static +int64_t OSAtomicDecrement64Barrier( volatile int64_t *__theValue ) + { return OSAtomicAdd64Barrier( -1, __theValue); } + +#endif /* defined(__ppc64__) || defined(__i386__) || defined(__x86_64__) || defined(__arm__) */ + + +/* Boolean functions (and, or, xor.) These come in four versions for each operation: + * with and without barriers, and returning the old or new value of the operation. + * The "Orig" versions return the original value, ie before the operation, the non-Orig + * versions return the value after the operation. All are layered on top of + * compare-and-swap. */ -bool OSAtomicCompareAndSwap32( int32_t oldValue, int32_t newValue, int32_t *theValue ); -#if defined(__ppc64__) || defined(__i386__) -bool OSAtomicCompareAndSwap64( int64_t oldValue, int64_t newValue, int64_t *theValue ); -#endif /* defined(__ppc64__) || defined(__i386__) */ - -/* Test and set. They do not incorporate memory barriers and thus cannot be used by - * themselves to synchronize shared memory. They return the original value of the bit. - * They operate on bit (0x80>>(n&7)) in byte ((char*)theAddress + (n>>3)). They are - * layered on top of the compare-and-swap operation. +int32_t OSAtomicOr32( uint32_t __theMask, volatile uint32_t *__theValue ); +int32_t OSAtomicOr32Barrier( uint32_t __theMask, volatile uint32_t *__theValue ); +int32_t OSAtomicOr32Orig( uint32_t __theMask, volatile uint32_t *__theValue ); +int32_t OSAtomicOr32OrigBarrier( uint32_t __theMask, volatile uint32_t *__theValue ); + +int32_t OSAtomicAnd32( uint32_t __theMask, volatile uint32_t *__theValue ); +int32_t OSAtomicAnd32Barrier( uint32_t __theMask, volatile uint32_t *__theValue ); +int32_t OSAtomicAnd32Orig( uint32_t __theMask, volatile uint32_t *__theValue ); +int32_t OSAtomicAnd32OrigBarrier( uint32_t __theMask, volatile uint32_t *__theValue ); + +int32_t OSAtomicXor32( uint32_t __theMask, volatile uint32_t *__theValue ); +int32_t OSAtomicXor32Barrier( uint32_t __theMask, volatile uint32_t *__theValue ); +int32_t OSAtomicXor32Orig( uint32_t __theMask, volatile uint32_t *__theValue ); +int32_t OSAtomicXor32OrigBarrier( uint32_t __theMask, volatile uint32_t *__theValue ); + + +/* Compare and swap. They return true if the swap occured. There are several versions, + * depending on data type and whether or not a barrier is used. */ -bool OSAtomicTestAndSet( uint32_t n, void *theAddress ); -bool OSAtomicTestAndClear( uint32_t n, void *theAddress ); +bool OSAtomicCompareAndSwap32( int32_t __oldValue, int32_t __newValue, volatile int32_t *__theValue ); +bool OSAtomicCompareAndSwap32Barrier( int32_t __oldValue, int32_t __newValue, volatile int32_t *__theValue ); +bool OSAtomicCompareAndSwapPtr( void *__oldValue, void *__newValue, void * volatile *__theValue ); +bool OSAtomicCompareAndSwapPtrBarrier( void *__oldValue, void *__newValue, void * volatile *__theValue ); +bool OSAtomicCompareAndSwapInt( int __oldValue, int __newValue, volatile int *__theValue ); +bool OSAtomicCompareAndSwapIntBarrier( int __oldValue, int __newValue, volatile int *__theValue ); +bool OSAtomicCompareAndSwapLong( long __oldValue, long __newValue, volatile long *__theValue ); +bool OSAtomicCompareAndSwapLongBarrier( long __oldValue, long __newValue, volatile long *__theValue ); + +#if defined(__ppc64__) || defined(__i386__) || defined(__x86_64__) || defined(__arm__) + +bool OSAtomicCompareAndSwap64( int64_t __oldValue, int64_t __newValue, volatile int64_t *__theValue ); +bool OSAtomicCompareAndSwap64Barrier( int64_t __oldValue, int64_t __newValue, volatile int64_t *__theValue ); + +#endif /* defined(__ppc64__) || defined(__i386__) || defined(__x86_64__) || defined(__arm__) */ -/* FILO queue and dequeue. These use memory barriers as required to synchronize access to - * the queued/dequeued structure. The "inOffset" field is the offset within the structure - * of the link field. "inList" is the list head; it is not a struct. The queue is a singly - * linked list with a zero terminator. + +/* Test and set. They return the original value of the bit, and operate on bit (0x80>>(n&7)) + * in byte ((char*)theAddress + (n>>3)). */ -void * OSAtomicDequeue( void ** inList, size_t inOffset); -void OSAtomicEnqueue( void ** inList, void * inNewLink, size_t inOffset); +bool OSAtomicTestAndSet( uint32_t __n, volatile void *__theAddress ); +bool OSAtomicTestAndSetBarrier( uint32_t __n, volatile void *__theAddress ); +bool OSAtomicTestAndClear( uint32_t __n, volatile void *__theAddress ); +bool OSAtomicTestAndClearBarrier( uint32_t __n, volatile void *__theAddress ); + /* Spinlocks. These use memory barriers as required to synchronize access to shared * memory protected by the lock. The lock operation spins, but employs various strategies @@ -93,22 +163,47 @@ void OSAtomicEnqueue( void ** inList, void * inNewLink, size_t inOffset); */ #define OS_SPINLOCK_INIT 0 -typedef int32_t OSSpinLock; +typedef int32_t OSSpinLock; + +bool OSSpinLockTry( volatile OSSpinLock *__lock ); +void OSSpinLockLock( volatile OSSpinLock *__lock ); +void OSSpinLockUnlock( volatile OSSpinLock *__lock ); + + +/* Lockless atomic enqueue and dequeue. These routines manipulate singly + * linked LIFO lists. Ie, a dequeue will return the most recently enqueued + * element, or NULL if the list is empty. The "offset" parameter is the offset + * in bytes of the link field within the data structure being queued. The + * link field should be a pointer type. Memory barriers are incorporated as + * needed to permit thread-safe access to the queue element. + */ +#if defined(__x86_64__) + +typedef volatile struct { + void *opaque1; + long opaque2; +} OSQueueHead __attribute__ ((aligned (16))); + +#else + +typedef volatile struct { + void *opaque1; + long opaque2; +} OSQueueHead; -bool OSSpinLockTry( OSSpinLock *lock ); -void OSSpinLockLock( OSSpinLock *lock ); -void OSSpinLockUnlock( OSSpinLock *lock ); +#endif -/* Memory barrier. This strictly orders memory accesses in a weakly ordered model such - * as PPC. All loads and stores executed in sequential program order before the barrier - * will complete with respect to the coherence mechanism, before any load or store - * executed after the barrier. Used with an atomic operation, the barrier can be used to - * create custom synchronization protocols, as an alternative to the spinlock or queue/ - * dequeue operations. Note that this barrier does not order uncached loads and stores. - * On a uniprocessor, the barrier is typically a nop. +#define OS_ATOMIC_QUEUE_INIT { NULL, 0 } + +void OSAtomicEnqueue( OSQueueHead *__list, void *__new, size_t __offset); +void* OSAtomicDequeue( OSQueueHead *__list, size_t __offset); + + +/* Memory barrier. It is both a read and write barrier. */ void OSMemoryBarrier( void ); + __END_DECLS #endif /* _OSATOMIC_H_ */