]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/i386/commpage/atomic.s
xnu-1456.1.26.tar.gz
[apple/xnu.git] / osfmk / i386 / commpage / atomic.s
index 25f9ca6ca22d77363d8320ab771d4e5cc54ef3e2..769698b0fe05cb40e5db68ae73e15dc071d13644 100644 (file)
@@ -1,14 +1,19 @@
 /*
- * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2004-2006 Apple Computer, Inc. All rights reserved.
  *
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
  * 
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
@@ -18,7 +23,7 @@
  * Please see the License for the specific language governing rights and
  * limitations under the License.
  * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
 #include <sys/appleapiopts.h>
@@ -27,9 +32,6 @@
 
 /* OSAtomic.h library native implementations. */
 
-       .text
-       .align  2, 0x90
-
 // This is a regparm(3) subroutine used by:
 
 // bool OSAtomicCompareAndSwap32( int32_t old, int32_t new, int32_t *value);
 
 //     TODO: move the .long onto a separate page to reduce icache pollution (?)
 
-Lcompare_and_swap32_mp:
+COMMPAGE_FUNCTION_START(compare_and_swap32_mp, 32, 4)
 .long  _COMM_PAGE_COMPARE_AND_SWAP32+4
        lock
        cmpxchgl  %edx, (%ecx)
        ret
+COMMPAGE_DESCRIPTOR(compare_and_swap32_mp,_COMM_PAGE_COMPARE_AND_SWAP32,0,kUP)
 
-    COMMPAGE_DESCRIPTOR(compare_and_swap32_mp,_COMM_PAGE_COMPARE_AND_SWAP32,0,kUP)
-
-Lcompare_and_swap32_up:
+COMMPAGE_FUNCTION_START(compare_and_swap32_up, 32, 4)
 .long  _COMM_PAGE_COMPARE_AND_SWAP32+4
        cmpxchgl %edx, (%ecx)
        ret
-
-    COMMPAGE_DESCRIPTOR(compare_and_swap32_up,_COMM_PAGE_COMPARE_AND_SWAP32,kUP,0)
+COMMPAGE_DESCRIPTOR(compare_and_swap32_up,_COMM_PAGE_COMPARE_AND_SWAP32,kUP,0)
 
 // This is a subroutine used by:
 // bool OSAtomicCompareAndSwap64( int64_t old, int64_t new, int64_t *value);
@@ -70,20 +70,18 @@ Lcompare_and_swap32_up:
 // on success: returns with ZF set
 // on failure: returns with *value in %eax/%edx, ZF clear
 
-Lcompare_and_swap64_mp:
+COMMPAGE_FUNCTION_START(compare_and_swap64_mp, 32, 4)
 .long  _COMM_PAGE_COMPARE_AND_SWAP64+4
        lock
        cmpxchg8b (%esi)
        ret
+COMMPAGE_DESCRIPTOR(compare_and_swap64_mp,_COMM_PAGE_COMPARE_AND_SWAP64,0,kUP)
 
-    COMMPAGE_DESCRIPTOR(compare_and_swap64_mp,_COMM_PAGE_COMPARE_AND_SWAP64,0,kUP)
-
-Lcompare_and_swap64_up:
+COMMPAGE_FUNCTION_START(compare_and_swap64_up, 32, 4)
 .long  _COMM_PAGE_COMPARE_AND_SWAP64+4
        cmpxchg8b (%esi)
        ret
-
-    COMMPAGE_DESCRIPTOR(compare_and_swap64_up,_COMM_PAGE_COMPARE_AND_SWAP64,kUP,0)
+COMMPAGE_DESCRIPTOR(compare_and_swap64_up,_COMM_PAGE_COMPARE_AND_SWAP64,kUP,0)
 
 // This is a subroutine used by:
 // bool OSAtomicTestAndSet( uint32_t n, void *value );
@@ -91,20 +89,18 @@ Lcompare_and_swap64_up:
 
 // Returns: old value of bit in CF
 
-Lbit_test_and_set_mp:
+COMMPAGE_FUNCTION_START(bit_test_and_set_mp, 32, 4)
 .long  _COMM_PAGE_BTS+4
        lock
-       bts %eax, (%edx)
+       btsl %eax, (%edx)
        ret
+COMMPAGE_DESCRIPTOR(bit_test_and_set_mp,_COMM_PAGE_BTS,0,kUP)
 
-    COMMPAGE_DESCRIPTOR(bit_test_and_set_mp,_COMM_PAGE_BTS,0,kUP)
-
-Lbit_test_and_set_up:
+COMMPAGE_FUNCTION_START(bit_test_and_set_up, 32, 4)
 .long  _COMM_PAGE_BTS+4
-       bts %eax, (%edx)
+       btsl %eax, (%edx)
        ret
-
-    COMMPAGE_DESCRIPTOR(bit_test_and_set_up,_COMM_PAGE_BTS,kUP,0)
+COMMPAGE_DESCRIPTOR(bit_test_and_set_up,_COMM_PAGE_BTS,kUP,0)
 
 // This is a subroutine used by:
 // bool OSAtomicTestAndClear( uint32_t n, void *value );
@@ -112,20 +108,18 @@ Lbit_test_and_set_up:
 
 // Returns: old value of bit in CF
 
-Lbit_test_and_clear_mp:
+COMMPAGE_FUNCTION_START(bit_test_and_clear_mp, 32, 4)
 .long  _COMM_PAGE_BTC+4
        lock
-       btc %eax, (%edx)
+       btrl %eax, (%edx)
        ret
+COMMPAGE_DESCRIPTOR(bit_test_and_clear_mp,_COMM_PAGE_BTC,0,kUP)
 
-    COMMPAGE_DESCRIPTOR(bit_test_and_clear_mp,_COMM_PAGE_BTC,0,kUP)
-
-Lbit_test_and_clear_up:
+COMMPAGE_FUNCTION_START(bit_test_and_clear_up, 32, 4)
 .long  _COMM_PAGE_BTC+4
-       btc %eax, (%edx)
+       btrl %eax, (%edx)
        ret
-
-    COMMPAGE_DESCRIPTOR(bit_test_and_clear_up,_COMM_PAGE_BTC,kUP,0)
+COMMPAGE_DESCRIPTOR(bit_test_and_clear_up,_COMM_PAGE_BTC,kUP,0)
 
 // This is a subroutine used by:
 // int32_t OSAtomicAdd32( int32_t amt, int32_t *value );
@@ -134,17 +128,269 @@ Lbit_test_and_clear_up:
 // Returns: old value in %eax
 // NB: OSAtomicAdd32 returns the new value,  so clients will add amt to %eax 
 
-Latomic_add32_mp:
+COMMPAGE_FUNCTION_START(atomic_add32_mp, 32, 4)
 .long  _COMM_PAGE_ATOMIC_ADD32+4
        lock
        xaddl   %eax, (%edx)
        ret
-               
-    COMMPAGE_DESCRIPTOR(atomic_add32_mp,_COMM_PAGE_ATOMIC_ADD32,0,kUP)
+COMMPAGE_DESCRIPTOR(atomic_add32_mp,_COMM_PAGE_ATOMIC_ADD32,0,kUP)
 
-Latomic_add32_up:
+COMMPAGE_FUNCTION_START(atomic_add32_up, 32, 4)
 .long  _COMM_PAGE_ATOMIC_ADD32+4
        xaddl   %eax, (%edx)
        ret
+COMMPAGE_DESCRIPTOR(atomic_add32_up,_COMM_PAGE_ATOMIC_ADD32,kUP,0)
+    
+    
+// OSMemoryBarrier()
+// These are used both in 32 and 64-bit mode.  We use a fence even on UP
+// machines, so this function can be used with nontemporal stores.
+
+COMMPAGE_FUNCTION_START(memory_barrier, 32, 4)
+       lock
+       addl    $0,(%esp)
+       ret
+COMMPAGE_DESCRIPTOR(memory_barrier,_COMM_PAGE_MEMORY_BARRIER,0,kHasSSE2);
+
+COMMPAGE_FUNCTION_START(memory_barrier_sse2, 32, 4)
+       mfence
+       ret
+COMMPAGE_DESCRIPTOR(memory_barrier_sse2,_COMM_PAGE_MEMORY_BARRIER,kHasSSE2,0);
+    
+
+/*
+ *     typedef volatile struct {
+ *             void    *opaque1;  <-- ptr to 1st queue element or null
+ *             long     opaque2;  <-- generation count
+ *     } OSQueueHead;
+ *
+ * void  OSAtomicEnqueue( OSQueueHead *list, void *new, size_t offset);
+ */
+
+COMMPAGE_FUNCTION_START(AtomicEnqueue, 32, 4)
+       pushl   %edi
+       pushl   %esi
+       pushl   %ebx
+       movl    16(%esp),%edi   // %edi == ptr to list head
+       movl    20(%esp),%ebx   // %ebx == new
+       movl    24(%esp),%esi   // %esi == offset
+       movl    (%edi),%eax     // %eax == ptr to 1st element in Q
+       movl    4(%edi),%edx    // %edx == current generation count
+1:
+       movl    %eax,(%ebx,%esi)// link to old list head from new element
+       movl    %edx,%ecx
+       incl    %ecx            // increment generation count
+       lock                    // always lock for now...
+       cmpxchg8b (%edi)        // ...push on new element
+       jnz     1b
+       popl    %ebx
+       popl    %esi
+       popl    %edi
+       ret
+COMMPAGE_DESCRIPTOR(AtomicEnqueue,_COMM_PAGE_ENQUEUE,0,0)
+       
+       
+/* void* OSAtomicDequeue( OSQueueHead *list, size_t offset); */
+
+COMMPAGE_FUNCTION_START(AtomicDequeue, 32, 4)
+       pushl   %edi
+       pushl   %esi
+       pushl   %ebx
+       movl    16(%esp),%edi   // %edi == ptr to list head
+       movl    20(%esp),%esi   // %esi == offset
+       movl    (%edi),%eax     // %eax == ptr to 1st element in Q
+       movl    4(%edi),%edx    // %edx == current generation count
+1:
+       testl   %eax,%eax       // list empty?
+       jz      2f              // yes
+       movl    (%eax,%esi),%ebx // point to 2nd in Q
+       movl    %edx,%ecx
+       incl    %ecx            // increment generation count
+       lock                    // always lock for now...
+       cmpxchg8b (%edi)        // ...pop off 1st element
+       jnz     1b
+2:
+       popl    %ebx
+       popl    %esi
+       popl    %edi
+       ret                     // ptr to 1st element in Q still in %eax
+COMMPAGE_DESCRIPTOR(AtomicDequeue,_COMM_PAGE_DEQUEUE,0,0)
+
+
+
+/************************* x86_64 versions follow **************************/
+
+
+// This is a subroutine used by:
+
+// bool OSAtomicCompareAndSwap32( int32_t old, int32_t new, int32_t *value);
+// int32_t OSAtomicAnd32( int32_t mask, int32_t *value);
+// int32_t OSAtomicOr32( int32_t mask, int32_t *value);
+// int32_t OSAtomicXor32( int32_t mask, int32_t *value);
+
+// It assumes: old -> %rdi  (ie, it follows the ABI parameter conventions)
+//             new -> %rsi
+//             value -> %rdx
+// on success: returns with ZF set
+// on failure: returns with *value in %eax, ZF clear
+
+COMMPAGE_FUNCTION_START(compare_and_swap32_mp_64, 64, 4)
+       movl    %edi,%eax                       // put old value where "cmpxchg" wants it
+       lock
+       cmpxchgl  %esi, (%rdx)
+       ret
+COMMPAGE_DESCRIPTOR(compare_and_swap32_mp_64,_COMM_PAGE_COMPARE_AND_SWAP32,0,kUP)
+
+COMMPAGE_FUNCTION_START(compare_and_swap32_up_64, 64, 4)
+       movl    %edi,%eax                       // put old value where "cmpxchg" wants it
+       cmpxchgl  %esi, (%rdx)
+       ret
+COMMPAGE_DESCRIPTOR(compare_and_swap32_up_64,_COMM_PAGE_COMPARE_AND_SWAP32,kUP,0)
+
+// This is a subroutine used by:
+// bool OSAtomicCompareAndSwap64( int64_t old, int64_t new, int64_t *value);
+
+// It assumes: old -> %rdi  (ie, it follows the ABI parameter conventions)
+//             new -> %rsi
+//             value -> %rdx
+// on success: returns with ZF set
+// on failure: returns with *value in %rax, ZF clear
+
+COMMPAGE_FUNCTION_START(compare_and_swap64_mp_64, 64, 4)
+       movq    %rdi,%rax                       // put old value where "cmpxchg" wants it
+       lock
+       cmpxchgq  %rsi, (%rdx)
+       ret
+COMMPAGE_DESCRIPTOR(compare_and_swap64_mp_64,_COMM_PAGE_COMPARE_AND_SWAP64,0,kUP)
+
+COMMPAGE_FUNCTION_START(compare_and_swap64_up_64, 64, 4)
+       movq    %rdi,%rax                       // put old value where "cmpxchg" wants it
+       cmpxchgq  %rsi, (%rdx)
+       ret
+COMMPAGE_DESCRIPTOR(compare_and_swap64_up_64,_COMM_PAGE_COMPARE_AND_SWAP64,kUP,0)
+
+// This is a subroutine used by:
+// bool OSAtomicTestAndSet( uint32_t n, void *value );
+// It is called with standard register conventions:
+//                     n = %rdi
+//                     value = %rsi
+// Returns: old value of bit in CF
+
+COMMPAGE_FUNCTION_START(bit_test_and_set_mp_64, 64, 4)
+       lock
+       btsl %edi, (%rsi)
+       ret
+COMMPAGE_DESCRIPTOR(bit_test_and_set_mp_64,_COMM_PAGE_BTS,0,kUP)
+
+COMMPAGE_FUNCTION_START(bit_test_and_set_up_64, 64, 4)
+       btsl %edi, (%rsi)
+       ret
+COMMPAGE_DESCRIPTOR(bit_test_and_set_up_64,_COMM_PAGE_BTS,kUP,0)
+
+// This is a subroutine used by:
+// bool OSAtomicTestAndClear( uint32_t n, void *value );
+// It is called with standard register conventions:
+//                     n = %rdi
+//                     value = %rsi
+// Returns: old value of bit in CF
+
+COMMPAGE_FUNCTION_START(bit_test_and_clear_mp_64, 64, 4)
+       lock
+       btrl %edi, (%rsi)
+       ret
+COMMPAGE_DESCRIPTOR(bit_test_and_clear_mp_64,_COMM_PAGE_BTC,0,kUP)
+
+COMMPAGE_FUNCTION_START(bit_test_and_clear_up_64, 64, 4)
+       btrl %edi, (%rsi)
+       ret
+COMMPAGE_DESCRIPTOR(bit_test_and_clear_up_64,_COMM_PAGE_BTC,kUP,0)
+
+// This is a subroutine used by:
+// int32_t OSAtomicAdd32( int32_t amt, int32_t *value );
+// It is called with standard register conventions:
+//                     amt = %rdi
+//                     value = %rsi
+// Returns: old value in %edi
+// NB: OSAtomicAdd32 returns the new value,  so clients will add amt to %edi 
 
-    COMMPAGE_DESCRIPTOR(atomic_add32_up,_COMM_PAGE_ATOMIC_ADD32,kUP,0)
+COMMPAGE_FUNCTION_START(atomic_add32_mp_64, 64, 4)
+       lock
+       xaddl   %edi, (%rsi)
+       ret
+COMMPAGE_DESCRIPTOR(atomic_add32_mp_64,_COMM_PAGE_ATOMIC_ADD32,0,kUP)
+
+COMMPAGE_FUNCTION_START(atomic_add32_up_64, 64, 4)
+       xaddl   %edi, (%rsi)
+       ret
+COMMPAGE_DESCRIPTOR(atomic_add32_up_64,_COMM_PAGE_ATOMIC_ADD32,kUP,0)
+
+// This is a subroutine used by:
+// int64_t OSAtomicAdd64( int64_t amt, int64_t *value );
+// It is called with standard register conventions:
+//                     amt = %rdi
+//                     value = %rsi
+// Returns: old value in %rdi
+// NB: OSAtomicAdd64 returns the new value,  so clients will add amt to %rdi 
+
+COMMPAGE_FUNCTION_START(atomic_add64_mp_64, 64, 4)
+       lock
+       xaddq   %rdi, (%rsi)
+       ret
+COMMPAGE_DESCRIPTOR(atomic_add64_mp_64,_COMM_PAGE_ATOMIC_ADD64,0,kUP)
+
+COMMPAGE_FUNCTION_START(atomic_add64_up_64, 64, 4)
+       xaddq   %rdi, (%rsi)
+       ret
+COMMPAGE_DESCRIPTOR(atomic_add64_up_64,_COMM_PAGE_ATOMIC_ADD64,kUP,0)
+
+
+/*
+ *     typedef volatile struct {
+ *             void    *opaque1;  <-- ptr to 1st queue element or null
+ *             long     opaque2;  <-- generation count
+ *     } OSQueueHead;
+ *
+ * void  OSAtomicEnqueue( OSQueueHead *list, void *new, size_t offset);
+ */
+
+// %rdi == list head, %rsi == new, %rdx == offset
+
+COMMPAGE_FUNCTION_START(AtomicEnqueue_64, 64, 4)
+       pushq   %rbx
+       movq    %rsi,%rbx       // %rbx == new
+       movq    %rdx,%rsi       // %rsi == offset
+       movq    (%rdi),%rax     // %rax == ptr to 1st element in Q
+       movq    8(%rdi),%rdx    // %rdx == current generation count
+1:
+       movq    %rax,(%rbx,%rsi)// link to old list head from new element
+       movq    %rdx,%rcx
+       incq    %rcx            // increment generation count
+       lock                    // always lock for now...
+       cmpxchg16b (%rdi)       // ...push on new element
+       jnz     1b
+       popq    %rbx
+       ret
+COMMPAGE_DESCRIPTOR(AtomicEnqueue_64,_COMM_PAGE_ENQUEUE,0,0)
+       
+       
+/* void* OSAtomicDequeue( OSQueueHead *list, size_t offset); */
+
+// %rdi == list head, %rsi == offset
+
+COMMPAGE_FUNCTION_START(AtomicDequeue_64, 64, 4)
+       pushq   %rbx
+       movq    (%rdi),%rax     // %rax == ptr to 1st element in Q
+       movq    8(%rdi),%rdx    // %rdx == current generation count
+1:
+       testq   %rax,%rax       // list empty?
+       jz      2f              // yes
+       movq    (%rax,%rsi),%rbx // point to 2nd in Q
+       movq    %rdx,%rcx
+       incq    %rcx            // increment generation count
+       lock                    // always lock for now...
+       cmpxchg16b (%rdi)       // ...pop off 1st element
+       jnz     1b
+2:
+       popq    %rbx
+       ret                     // ptr to 1st element in Q still in %rax
+COMMPAGE_DESCRIPTOR(AtomicDequeue_64,_COMM_PAGE_DEQUEUE,0,0)