/*
- * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2004-2006 Apple Computer, Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
- * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
- *
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* WARNING: all addresses passed to these functions must be "naturally aligned", ie
* int32_t's must be 32-bit aligned (low 2 bits of address zero), and int64_t's
* must be 64-bit aligned (low 3 bits of address zero.)
+ *
+ * Note that some versions of the atomic functions incorporate memory barriers,
+ * and some do not. Barriers strictly order memory access on a weakly-ordered
+ * architecture such as PPC. All loads and stores executed in sequential program
+ * order before the barrier will complete before any load or store executed after
+ * the barrier. On a uniprocessor, the barrier operation is typically a nop.
+ * On a multiprocessor, the barrier can be quite expensive on some platforms,
+ * eg PPC.
+ *
+ * Most code will want to use the barrier functions to insure that memory shared
+ * between threads is properly synchronized. For example, if you want to initialize
+ * a shared data structure and then atomically increment a variable to indicate
+ * that the initialization is complete, then you must use OSAtomicIncrement32Barrier()
+ * to ensure that the stores to your data structure complete before the atomic add.
+ * Likewise, the consumer of that data structure must use OSAtomicDecrement32Barrier(),
+ * in order to ensure that their loads of the structure are not executed before
+ * the atomic decrement. On the other hand, if you are simply incrementing a global
+ * counter, then it is safe and potentially faster to use OSAtomicIncrement32().
+ *
+ * If you are unsure which version to use, prefer the barrier variants as they are
+ * safer.
+ *
+ * The spinlock and queue operations always incorporate a barrier.
*/
__BEGIN_DECLS
-/* Arithmetic functions. They do not incorporate memory barriers and thus cannot
- * be used by themselves to synchronize shared memory. They return the new value.
- * The "or", "and", and "xor" operations are layered on top of compare-and-swap.
+
+/* Arithmetic functions. They return the new value.
*/
-int32_t OSAtomicAdd32( int32_t theAmount, int32_t *theValue );
-inline static
-int32_t OSAtomicIncrement32( int32_t *theValue ) { return OSAtomicAdd32( 1, theValue); }
-inline static
-int32_t OSAtomicDecrement32( int32_t *theValue ) { return OSAtomicAdd32( -1, theValue); }
-int32_t OSAtomicOr32( uint32_t theMask, uint32_t *theValue );
-int32_t OSAtomicAnd32( uint32_t theMask, uint32_t *theValue );
-int32_t OSAtomicXor32( uint32_t theMask, uint32_t *theValue );
-#if defined(__ppc64__) || defined(__i386__)
-int64_t OSAtomicAdd64( int64_t theAmount, int64_t *theValue );
-inline static
-int64_t OSAtomicIncrement64( int64_t *theValue ) { return OSAtomicAdd64( 1, theValue); }
-inline static
-int64_t OSAtomicDecrement64( int64_t *theValue ) { return OSAtomicAdd64( -1, theValue); }
-#endif /* defined(__ppc64__) || defined(__i386__) */
-
-/* Compare and swap. They do not incorporate memory barriers and thus cannot be used
- * by themselved to synchronize shared memory. They return true if the swap occured.
+int32_t OSAtomicAdd32( int32_t __theAmount, volatile int32_t *__theValue );
+int32_t OSAtomicAdd32Barrier( int32_t __theAmount, volatile int32_t *__theValue );
+
+__inline static
+int32_t OSAtomicIncrement32( volatile int32_t *__theValue )
+ { return OSAtomicAdd32( 1, __theValue); }
+__inline static
+int32_t OSAtomicIncrement32Barrier( volatile int32_t *__theValue )
+ { return OSAtomicAdd32Barrier( 1, __theValue); }
+
+__inline static
+int32_t OSAtomicDecrement32( volatile int32_t *__theValue )
+ { return OSAtomicAdd32( -1, __theValue); }
+__inline static
+int32_t OSAtomicDecrement32Barrier( volatile int32_t *__theValue )
+ { return OSAtomicAdd32Barrier( -1, __theValue); }
+
+#if defined(__ppc64__) || defined(__i386__) || defined(__x86_64__) || defined(__arm__)
+
+int64_t OSAtomicAdd64( int64_t __theAmount, volatile int64_t *__theValue );
+int64_t OSAtomicAdd64Barrier( int64_t __theAmount, volatile int64_t *__theValue );
+
+__inline static
+int64_t OSAtomicIncrement64( volatile int64_t *__theValue )
+ { return OSAtomicAdd64( 1, __theValue); }
+__inline static
+int64_t OSAtomicIncrement64Barrier( volatile int64_t *__theValue )
+ { return OSAtomicAdd64Barrier( 1, __theValue); }
+
+__inline static
+int64_t OSAtomicDecrement64( volatile int64_t *__theValue )
+ { return OSAtomicAdd64( -1, __theValue); }
+__inline static
+int64_t OSAtomicDecrement64Barrier( volatile int64_t *__theValue )
+ { return OSAtomicAdd64Barrier( -1, __theValue); }
+
+#endif /* defined(__ppc64__) || defined(__i386__) || defined(__x86_64__) || defined(__arm__) */
+
+
+/* Boolean functions (and, or, xor.) These come in four versions for each operation:
+ * with and without barriers, and returning the old or new value of the operation.
+ * The "Orig" versions return the original value, ie before the operation, the non-Orig
+ * versions return the value after the operation. All are layered on top of
+ * compare-and-swap.
*/
-bool OSAtomicCompareAndSwap32( int32_t oldValue, int32_t newValue, int32_t *theValue );
-#if defined(__ppc64__) || defined(__i386__)
-bool OSAtomicCompareAndSwap64( int64_t oldValue, int64_t newValue, int64_t *theValue );
-#endif /* defined(__ppc64__) || defined(__i386__) */
-
-/* Test and set. They do not incorporate memory barriers and thus cannot be used by
- * themselves to synchronize shared memory. They return the original value of the bit.
- * They operate on bit (0x80>>(n&7)) in byte ((char*)theAddress + (n>>3)). They are
- * layered on top of the compare-and-swap operation.
+int32_t OSAtomicOr32( uint32_t __theMask, volatile uint32_t *__theValue );
+int32_t OSAtomicOr32Barrier( uint32_t __theMask, volatile uint32_t *__theValue );
+int32_t OSAtomicOr32Orig( uint32_t __theMask, volatile uint32_t *__theValue );
+int32_t OSAtomicOr32OrigBarrier( uint32_t __theMask, volatile uint32_t *__theValue );
+
+int32_t OSAtomicAnd32( uint32_t __theMask, volatile uint32_t *__theValue );
+int32_t OSAtomicAnd32Barrier( uint32_t __theMask, volatile uint32_t *__theValue );
+int32_t OSAtomicAnd32Orig( uint32_t __theMask, volatile uint32_t *__theValue );
+int32_t OSAtomicAnd32OrigBarrier( uint32_t __theMask, volatile uint32_t *__theValue );
+
+int32_t OSAtomicXor32( uint32_t __theMask, volatile uint32_t *__theValue );
+int32_t OSAtomicXor32Barrier( uint32_t __theMask, volatile uint32_t *__theValue );
+int32_t OSAtomicXor32Orig( uint32_t __theMask, volatile uint32_t *__theValue );
+int32_t OSAtomicXor32OrigBarrier( uint32_t __theMask, volatile uint32_t *__theValue );
+
+
+/* Compare and swap. They return true if the swap occured. There are several versions,
+ * depending on data type and whether or not a barrier is used.
*/
-bool OSAtomicTestAndSet( uint32_t n, void *theAddress );
-bool OSAtomicTestAndClear( uint32_t n, void *theAddress );
+bool OSAtomicCompareAndSwap32( int32_t __oldValue, int32_t __newValue, volatile int32_t *__theValue );
+bool OSAtomicCompareAndSwap32Barrier( int32_t __oldValue, int32_t __newValue, volatile int32_t *__theValue );
+bool OSAtomicCompareAndSwapPtr( void *__oldValue, void *__newValue, void * volatile *__theValue );
+bool OSAtomicCompareAndSwapPtrBarrier( void *__oldValue, void *__newValue, void * volatile *__theValue );
+bool OSAtomicCompareAndSwapInt( int __oldValue, int __newValue, volatile int *__theValue );
+bool OSAtomicCompareAndSwapIntBarrier( int __oldValue, int __newValue, volatile int *__theValue );
+bool OSAtomicCompareAndSwapLong( long __oldValue, long __newValue, volatile long *__theValue );
+bool OSAtomicCompareAndSwapLongBarrier( long __oldValue, long __newValue, volatile long *__theValue );
+
+#if defined(__ppc64__) || defined(__i386__) || defined(__x86_64__) || defined(__arm__)
+
+bool OSAtomicCompareAndSwap64( int64_t __oldValue, int64_t __newValue, volatile int64_t *__theValue );
+bool OSAtomicCompareAndSwap64Barrier( int64_t __oldValue, int64_t __newValue, volatile int64_t *__theValue );
+
+#endif /* defined(__ppc64__) || defined(__i386__) || defined(__x86_64__) || defined(__arm__) */
-/* FILO queue and dequeue. These use memory barriers as required to synchronize access to
- * the queued/dequeued structure. The "inOffset" field is the offset within the structure
- * of the link field. "inList" is the list head; it is not a struct. The queue is a singly
- * linked list with a zero terminator.
+
+/* Test and set. They return the original value of the bit, and operate on bit (0x80>>(n&7))
+ * in byte ((char*)theAddress + (n>>3)).
*/
-void * OSAtomicDequeue( void ** inList, size_t inOffset);
-void OSAtomicEnqueue( void ** inList, void * inNewLink, size_t inOffset);
+bool OSAtomicTestAndSet( uint32_t __n, volatile void *__theAddress );
+bool OSAtomicTestAndSetBarrier( uint32_t __n, volatile void *__theAddress );
+bool OSAtomicTestAndClear( uint32_t __n, volatile void *__theAddress );
+bool OSAtomicTestAndClearBarrier( uint32_t __n, volatile void *__theAddress );
+
/* Spinlocks. These use memory barriers as required to synchronize access to shared
* memory protected by the lock. The lock operation spins, but employs various strategies
*/
#define OS_SPINLOCK_INIT 0
-typedef int32_t OSSpinLock;
+typedef int32_t OSSpinLock;
+
+bool OSSpinLockTry( volatile OSSpinLock *__lock );
+void OSSpinLockLock( volatile OSSpinLock *__lock );
+void OSSpinLockUnlock( volatile OSSpinLock *__lock );
+
+
+/* Lockless atomic enqueue and dequeue. These routines manipulate singly
+ * linked LIFO lists. Ie, a dequeue will return the most recently enqueued
+ * element, or NULL if the list is empty. The "offset" parameter is the offset
+ * in bytes of the link field within the data structure being queued. The
+ * link field should be a pointer type. Memory barriers are incorporated as
+ * needed to permit thread-safe access to the queue element.
+ */
+#if defined(__x86_64__)
+
+typedef volatile struct {
+ void *opaque1;
+ long opaque2;
+} OSQueueHead __attribute__ ((aligned (16)));
+
+#else
+
+typedef volatile struct {
+ void *opaque1;
+ long opaque2;
+} OSQueueHead;
-bool OSSpinLockTry( OSSpinLock *lock );
-void OSSpinLockLock( OSSpinLock *lock );
-void OSSpinLockUnlock( OSSpinLock *lock );
+#endif
-/* Memory barrier. This strictly orders memory accesses in a weakly ordered model such
- * as PPC. All loads and stores executed in sequential program order before the barrier
- * will complete with respect to the coherence mechanism, before any load or store
- * executed after the barrier. Used with an atomic operation, the barrier can be used to
- * create custom synchronization protocols, as an alternative to the spinlock or queue/
- * dequeue operations. Note that this barrier does not order uncached loads and stores.
- * On a uniprocessor, the barrier is typically a nop.
+#define OS_ATOMIC_QUEUE_INIT { NULL, 0 }
+
+void OSAtomicEnqueue( OSQueueHead *__list, void *__new, size_t __offset);
+void* OSAtomicDequeue( OSQueueHead *__list, size_t __offset);
+
+
+/* Memory barrier. It is both a read and write barrier.
*/
void OSMemoryBarrier( void );
+
__END_DECLS
#endif /* _OSATOMIC_H_ */