Libc-583.tar.gz

[apple/libc.git] / include / libkern / OSAtomic.h
diff --git a/include/libkern/OSAtomic.h b/include/libkern/OSAtomic.h

index 4bd3c156122cae46fdf2e0e25f57b01f5432fb4f..9f2dba4390ebdd3d60cf57388a6aee84bcde28c5 100644 (file)
--- a/include/libkern/OSAtomic.h
+++ b/include/libkern/OSAtomic.h
@@ -1,10 +1,8 @@
  /*
- * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2004-2006 Apple Computer, Inc. All rights reserved.
   *
   * @APPLE_LICENSE_HEADER_START@
   * 
- * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
- * 
   * This file contains Original Code and/or Modifications of Original Code
   * as defined in and that are subject to the Apple Public Source License
   * Version 2.0 (the 'License'). You may not use this file except in
@@ -38,52 +36,124 @@
   * WARNING: all addresses passed to these functions must be "naturally aligned", ie
   * int32_t's must be 32-bit aligned (low 2 bits of address zero), and int64_t's
   * must be 64-bit aligned (low 3 bits of address zero.)
+ *
+ * Note that some versions of the atomic functions incorporate memory barriers,
+ * and some do not.  Barriers strictly order memory access on a weakly-ordered
+ * architecture such as PPC.  All loads and stores executed in sequential program
+ * order before the barrier will complete before any load or store executed after
+ * the barrier.  On a uniprocessor, the barrier operation is typically a nop.
+ * On a multiprocessor, the barrier can be quite expensive on some platforms,
+ * eg PPC.
+ *
+ * Most code will want to use the barrier functions to insure that memory shared
+ * between threads is properly synchronized.  For example, if you want to initialize
+ * a shared data structure and then atomically increment a variable to indicate
+ * that the initialization is complete, then you must use OSAtomicIncrement32Barrier()
+ * to ensure that the stores to your data structure complete before the atomic add.
+ * Likewise, the consumer of that data structure must use OSAtomicDecrement32Barrier(),
+ * in order to ensure that their loads of the structure are not executed before
+ * the atomic decrement.  On the other hand, if you are simply incrementing a global
+ * counter, then it is safe and potentially faster to use OSAtomicIncrement32().
+ *
+ * If you are unsure which version to use, prefer the barrier variants as they are
+ * safer.
+ *
+ * The spinlock and queue operations always incorporate a barrier.
   */ 
  __BEGIN_DECLS
  
-/* Arithmetic functions.  They do not incorporate memory barriers and thus cannot
- * be used by themselves to synchronize shared memory.  They return the new value.
- * The "or", "and", and "xor" operations are layered on top of compare-and-swap.
+
+/* Arithmetic functions.  They return the new value.
   */
-int32_t        OSAtomicAdd32( int32_t theAmount, int32_t *theValue );
-inline static
-int32_t        OSAtomicIncrement32( int32_t *theValue ) { return OSAtomicAdd32(  1, theValue); }
-inline static
-int32_t        OSAtomicDecrement32( int32_t *theValue ) { return OSAtomicAdd32( -1, theValue); }
-int32_t        OSAtomicOr32( uint32_t theMask, uint32_t *theValue );
-int32_t        OSAtomicAnd32( uint32_t theMask, uint32_t *theValue ); 
-int32_t        OSAtomicXor32( uint32_t theMask, uint32_t *theValue );
-#if defined(__ppc64__) || defined(__i386__)
-int64_t        OSAtomicAdd64( int64_t theAmount, int64_t *theValue );
-inline static
-int64_t        OSAtomicIncrement64( int64_t *theValue ) { return OSAtomicAdd64(  1, theValue); }
-inline static
-int64_t        OSAtomicDecrement64( int64_t *theValue ) { return OSAtomicAdd64( -1, theValue); }
-#endif  /* defined(__ppc64__) || defined(__i386__) */
-
-/* Compare and swap.  They do not incorporate memory barriers and thus cannot be used
- * by themselved to synchronize shared memory.  They return true if the swap occured.
+int32_t        OSAtomicAdd32( int32_t __theAmount, volatile int32_t *__theValue );
+int32_t        OSAtomicAdd32Barrier( int32_t __theAmount, volatile int32_t *__theValue );
+
+__inline static
+int32_t        OSAtomicIncrement32( volatile int32_t *__theValue )
+            { return OSAtomicAdd32(  1, __theValue); }
+__inline static
+int32_t        OSAtomicIncrement32Barrier( volatile int32_t *__theValue )
+            { return OSAtomicAdd32Barrier(  1, __theValue); }
+
+__inline static
+int32_t        OSAtomicDecrement32( volatile int32_t *__theValue )
+            { return OSAtomicAdd32( -1, __theValue); }
+__inline static
+int32_t        OSAtomicDecrement32Barrier( volatile int32_t *__theValue )
+            { return OSAtomicAdd32Barrier( -1, __theValue); }
+
+#if defined(__ppc64__) || defined(__i386__) || defined(__x86_64__) || defined(__arm__)
+
+int64_t        OSAtomicAdd64( int64_t __theAmount, volatile int64_t *__theValue );
+int64_t        OSAtomicAdd64Barrier( int64_t __theAmount, volatile int64_t *__theValue );
+
+__inline static
+int64_t        OSAtomicIncrement64( volatile int64_t *__theValue )
+            { return OSAtomicAdd64(  1, __theValue); }
+__inline static
+int64_t        OSAtomicIncrement64Barrier( volatile int64_t *__theValue )
+            { return OSAtomicAdd64Barrier(  1, __theValue); }
+
+__inline static
+int64_t        OSAtomicDecrement64( volatile int64_t *__theValue )
+            { return OSAtomicAdd64( -1, __theValue); }
+__inline static
+int64_t        OSAtomicDecrement64Barrier( volatile int64_t *__theValue )
+            { return OSAtomicAdd64Barrier( -1, __theValue); }
+
+#endif  /* defined(__ppc64__) || defined(__i386__) || defined(__x86_64__) || defined(__arm__) */
+
+
+/* Boolean functions (and, or, xor.)  These come in four versions for each operation:
+ * with and without barriers, and returning the old or new value of the operation.
+ * The "Orig" versions return the original value, ie before the operation, the non-Orig
+ * versions return the value after the operation.  All are layered on top of
+ * compare-and-swap.
   */
-bool    OSAtomicCompareAndSwap32( int32_t oldValue, int32_t newValue, int32_t *theValue );
-#if defined(__ppc64__) || defined(__i386__)
-bool    OSAtomicCompareAndSwap64( int64_t oldValue, int64_t newValue, int64_t *theValue );
-#endif  /* defined(__ppc64__) || defined(__i386__) */
-
-/* Test and set.  They do not incorporate memory barriers and thus cannot be used by
- * themselves to synchronize shared memory.  They return the original value of the bit.
- * They operate on bit (0x80>>(n&7)) in byte ((char*)theAddress + (n>>3)).  They are 
- * layered on top of the compare-and-swap operation.
+int32_t        OSAtomicOr32( uint32_t __theMask, volatile uint32_t *__theValue );
+int32_t        OSAtomicOr32Barrier( uint32_t __theMask, volatile uint32_t *__theValue );
+int32_t        OSAtomicOr32Orig( uint32_t __theMask, volatile uint32_t *__theValue );
+int32_t        OSAtomicOr32OrigBarrier( uint32_t __theMask, volatile uint32_t *__theValue );
+
+int32_t        OSAtomicAnd32( uint32_t __theMask, volatile uint32_t *__theValue ); 
+int32_t        OSAtomicAnd32Barrier( uint32_t __theMask, volatile uint32_t *__theValue ); 
+int32_t        OSAtomicAnd32Orig( uint32_t __theMask, volatile uint32_t *__theValue ); 
+int32_t        OSAtomicAnd32OrigBarrier( uint32_t __theMask, volatile uint32_t *__theValue ); 
+
+int32_t        OSAtomicXor32( uint32_t __theMask, volatile uint32_t *__theValue );
+int32_t        OSAtomicXor32Barrier( uint32_t __theMask, volatile uint32_t *__theValue );
+int32_t        OSAtomicXor32Orig( uint32_t __theMask, volatile uint32_t *__theValue );
+int32_t        OSAtomicXor32OrigBarrier( uint32_t __theMask, volatile uint32_t *__theValue );
+ 
+
+/* Compare and swap.  They return true if the swap occured.  There are several versions,
+ * depending on data type and whether or not a barrier is used.
   */
-bool    OSAtomicTestAndSet( uint32_t n, void *theAddress );
-bool    OSAtomicTestAndClear( uint32_t n, void *theAddress );
+bool    OSAtomicCompareAndSwap32( int32_t __oldValue, int32_t __newValue, volatile int32_t *__theValue );
+bool    OSAtomicCompareAndSwap32Barrier( int32_t __oldValue, int32_t __newValue, volatile int32_t *__theValue );
+bool   OSAtomicCompareAndSwapPtr( void *__oldValue, void *__newValue, void * volatile *__theValue );
+bool   OSAtomicCompareAndSwapPtrBarrier( void *__oldValue, void *__newValue, void * volatile *__theValue );
+bool   OSAtomicCompareAndSwapInt( int __oldValue, int __newValue, volatile int *__theValue );
+bool   OSAtomicCompareAndSwapIntBarrier( int __oldValue, int __newValue, volatile int *__theValue );
+bool   OSAtomicCompareAndSwapLong( long __oldValue, long __newValue, volatile long *__theValue );
+bool   OSAtomicCompareAndSwapLongBarrier( long __oldValue, long __newValue, volatile long *__theValue );
+
+#if defined(__ppc64__) || defined(__i386__) || defined(__x86_64__) || defined(__arm__)
+
+bool    OSAtomicCompareAndSwap64( int64_t __oldValue, int64_t __newValue, volatile int64_t *__theValue );
+bool    OSAtomicCompareAndSwap64Barrier( int64_t __oldValue, int64_t __newValue, volatile int64_t *__theValue );
+
+#endif  /* defined(__ppc64__) || defined(__i386__) || defined(__x86_64__) || defined(__arm__) */
  
-/* FILO queue and dequeue.  These use memory barriers as required to synchronize access to
- * the queued/dequeued structure.  The "inOffset" field is the offset within the structure
- * of the link field. "inList" is the list head; it is not a struct.  The queue is a singly
- * linked list with a zero terminator.
+
+/* Test and set.  They return the original value of the bit, and operate on bit (0x80>>(n&7))
+ * in byte ((char*)theAddress + (n>>3)).
   */
-void * OSAtomicDequeue( void ** inList, size_t inOffset);
-void    OSAtomicEnqueue( void ** inList, void * inNewLink, size_t inOffset);
+bool    OSAtomicTestAndSet( uint32_t __n, volatile void *__theAddress );
+bool    OSAtomicTestAndSetBarrier( uint32_t __n, volatile void *__theAddress );
+bool    OSAtomicTestAndClear( uint32_t __n, volatile void *__theAddress );
+bool    OSAtomicTestAndClearBarrier( uint32_t __n, volatile void *__theAddress );
+ 
  
  /* Spinlocks.  These use memory barriers as required to synchronize access to shared
   * memory protected by the lock.  The lock operation spins, but employs various strategies
@@ -93,22 +163,47 @@ void    OSAtomicEnqueue( void ** inList, void * inNewLink, size_t inOffset);
   */
  #define        OS_SPINLOCK_INIT    0
  
-typedef        int32_t OSSpinLock;
+typedef int32_t OSSpinLock;
+
+bool    OSSpinLockTry( volatile OSSpinLock *__lock );
+void    OSSpinLockLock( volatile OSSpinLock *__lock );
+void    OSSpinLockUnlock( volatile OSSpinLock *__lock );
+
+
+/* Lockless atomic enqueue and dequeue.  These routines manipulate singly
+ * linked LIFO lists.  Ie, a dequeue will return the most recently enqueued
+ * element, or NULL if the list is empty.  The "offset" parameter is the offset
+ * in bytes of the link field within the data structure being queued.  The
+ * link field should be a pointer type.  Memory barriers are incorporated as 
+ * needed to permit thread-safe access to the queue element.
+ */
+#if defined(__x86_64__)
+
+typedef volatile struct {
+       void    *opaque1;
+       long     opaque2;
+} OSQueueHead __attribute__ ((aligned (16)));
+
+#else
+
+typedef volatile struct {
+       void    *opaque1;
+       long     opaque2;
+} OSQueueHead;
  
-bool    OSSpinLockTry( OSSpinLock *lock );
-void    OSSpinLockLock( OSSpinLock *lock );
-void    OSSpinLockUnlock( OSSpinLock *lock );
+#endif
  
-/* Memory barrier.  This strictly orders memory accesses in a weakly ordered model such
- * as PPC.  All loads and stores executed in sequential program order before the barrier
- * will complete with respect to the coherence mechanism, before any load or store
- * executed after the barrier.  Used with an atomic operation, the barrier can be used to
- * create custom synchronization protocols, as an alternative to the spinlock or queue/
- * dequeue operations.  Note that this barrier does not order uncached loads and stores.
- * On a uniprocessor, the barrier is typically a nop.
+#define        OS_ATOMIC_QUEUE_INIT    { NULL, 0 }
+
+void  OSAtomicEnqueue( OSQueueHead *__list, void *__new, size_t __offset);
+void* OSAtomicDequeue( OSQueueHead *__list, size_t __offset);
+
+
+/* Memory barrier.  It is both a read and write barrier.
   */
  void    OSMemoryBarrier( void );
  
+
  __END_DECLS
  
  #endif /* _OSATOMIC_H_ */