+#define ATOMIC_UP 0
+#define ATOMIC_MP 1
+#define ATOMIC_RET_ORIG 0
+#define ATOMIC_RET_NEW 1
+
+// compare and exchange 32-bit
+// xchg32 <new> <dst> <mp>
+.macro xchg32
+ .if $2 == ATOMIC_MP
+ lock
+ .endif
+ cmpxchgl $0, ($1)
+.endm
+
+// xchg64 <new> <dst> <mp>
+.macro xchg64
+ .if $2 == ATOMIC_MP
+ lock
+ .endif
+ cmpxchg $0, ($1)
+.endm
+
+#define ATOMIC_ARITHMETIC(instr, orig, mp) \
+ movl (%rsi), %eax /* get 2nd arg -> eax */ ;\
+1: movl %eax, %edx /* copy value to new reg */ ;\
+ instr %edi, %edx /* apply instr to %edx with arg2 */ ;\
+ xchg32 %edx, %rsi, mp /* do the compare swap (see macro above) */ ;\
+ jnz 1b /* jump if failed */ ;\
+ .if orig == 1 /* to return the new value, overwrite eax */ ;\
+ movl %edx, %eax /* return the new value */ ;\
+ .endif
+
+// Used in OSAtomicTestAndSet( uint32_t n, void *value ), assumes ABI parameter loctions
+// Manpage says bit to test/set is (0x80 >> (n & 7)) of byte (addr + (n >> 3))
+#define ATOMIC_BIT_OP(instr, mp) \
+ xorl $7, %edi /* bit position is numbered big endian so convert to little endian */ ;\
+ shlq $3, %rsi ;\
+ addq %rdi, %rsi /* generate bit address */ ;\
+ movq %rsi, %rdi ;\
+ andq $31, %rdi /* keep bit offset in range 0..31 */ ;\
+ xorq %rdi, %rsi /* 4-byte align address */ ;\
+ shrq $3, %rsi /* get 4-byte aligned address */ ;\
+ .if mp == ATOMIC_MP /* don't plant the lock in UP code */ ;\
+ lock /* lock the bit test */ ;\
+ .endif ;\
+ instr %edi, (%rsi) /* do the bit test, supplied into the macro */ ;\
+ setc %al ;\
+ movzbl %al,%eax /* widen in case caller assumes we return an int */