Libc-594.1.4.tar.gz

author Apple <opensource@apple.com>

Fri, 22 Jan 2010 22:55:51 +0000 (22:55 +0000)

committer Apple <opensource@apple.com>

Fri, 22 Jan 2010 22:55:51 +0000 (22:55 +0000)
author Apple <opensource@apple.com>
Fri, 22 Jan 2010 22:55:51 +0000 (22:55 +0000)
committer Apple <opensource@apple.com>
Fri, 22 Jan 2010 22:55:51 +0000 (22:55 +0000)
diff --git a/arm/pthreads/Makefile.inc b/arm/pthreads/Makefile.inc

index cc925649a1745c40597db83e54e73e6d6fdac067..4addcfe7525832af9b68fc8da90b6f46cb7b6cc6 100644 (file)
--- a/arm/pthreads/Makefile.inc
+++ b/arm/pthreads/Makefile.inc
@@ -4,4 +4,7 @@ MDSRCS += \
         pthread_set_self.s \
         pthread_self.s \
         pthread_getspecific.s \
-       init_cpu_capabilities.c
+       init_cpu_capabilities.c \
+       start_wqthread.s \
+       thread_start.s
+
diff --git a/arm/pthreads/start_wqthread.s b/arm/pthreads/start_wqthread.s

new file mode 100644 (file)

index 0000000..3cf471e
--- /dev/null
+++ b/arm/pthreads/start_wqthread.s
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2009 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+#include <arm/arch.h>
+
+#define        __APPLE_API_PRIVATE
+#include <machine/cpu_capabilities.h>
+#undef __APPLE_API_PRIVATE
+
+// This routine is never called directly by user code, jumped from kernel
+// args 0 to 3 are already in the regs 0 to 3
+// should set stack with the 2 extra args before calling pthread_wqthread()
+// arg4 is in r[4]
+// arg5 is in r[5]
+
+        .text
+        .align 2
+        .globl _start_wqthread
+_start_wqthread:
+    stmfd sp!, {r4, r5}
+       bl __pthread_wqthread
diff --git a/arm/pthreads/thread_start.s b/arm/pthreads/thread_start.s

new file mode 100644 (file)

index 0000000..e7574d6
--- /dev/null
+++ b/arm/pthreads/thread_start.s
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2009 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+#include <arm/arch.h>
+
+#define        __APPLE_API_PRIVATE
+#include <machine/cpu_capabilities.h>
+#undef __APPLE_API_PRIVATE
+
+// This routine is never called directly by user code, jumped from kernel
+// args 0 to 3 are already in the regs 0 to 3
+// should set stack with the 2 extra args before calling pthread_wqthread()
+// arg4 is in r[4]
+// arg5 is in r[5]
+  
+        .text
+        .align 2
+        .globl _thread_start
+_thread_start:
+    stmfd sp!, {r4, r5}
+       bl __pthread_start
diff --git a/arm/string/Makefile.inc b/arm/string/Makefile.inc

index c89ffa2d5cf1f408276a630931aa28040cc89b3c..73dcb7fe9f34f5815ef631e159c6fce81f2acabf 100644 (file)
--- a/arm/string/Makefile.inc
+++ b/arm/string/Makefile.inc
@@ -4,10 +4,11 @@
  #
  .PATH: ${.CURDIR}/arm/string
  
-MDSRCS +=      \
-       bcopy.s \
-       bzero.s \
-       ffs.s \
+MDSRCS +=       \
+       bcopy.s  \
+       bzero.s  \
+       ffs.s    \
+       memcmp.s \
         strcmp.s \
         strlen.s
  
@@ -15,4 +16,4 @@ MDSRCS +=     \
  MDSRCS += memset_pattern.s
  .endif
  
-SUPPRESSSRCS += memcpy.c memmove.c memset.c strlen.c
+SUPPRESSSRCS += bcmp.c memcpy.c memmove.c memset.c strlen.c
diff --git a/arm/string/NEON/bcopy.s b/arm/string/NEON/bcopy.s

new file mode 100644 (file)

index 0000000..30abab1
--- /dev/null
+++ b/arm/string/NEON/bcopy.s
@@ -0,0 +1,433 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+/*****************************************************************************
+ * Cortex-A8 implementation                                                  *
+ *****************************************************************************/
+ 
+// Cortex-A8 implementations of memcpy( ), memmove( ) and bcopy( ).
+//
+// Our tests have shown that NEON is always a performance win for memcpy( ).
+// However, for the specific case of copies from a warm source to a cold
+// destination when the buffer size is between 1k and 32k, it is not enough
+// of a performance win to offset the increased power footprint, resulting
+// in an energy usage regression.  Thus, we detect that particular case, and
+// pass those copies through the ARM core registers.  All other copies larger
+// than 8 bytes are handled on NEON.
+//
+// Stephen Canon, August 2009
+
+.text
+.code 16
+.syntax unified
+
+// void bcopy(const void * source,
+//            void * destination,
+//            size_t length);
+//
+// void *memmove(void * destination,
+//               const void * source,
+//               size_t n);
+//
+// void *memcpy(void * restrict destination,
+//              const void * restrict source,
+//              size_t n);
+//
+// all copy n successive bytes from source to destination. memmove and memcpy
+// returns destination, whereas bcopy has no return value. copying takes place
+// as if it were through a temporary buffer -- after return destination contains
+// exactly the bytes from source, even if the buffers overlap.
+
+.thumb_func _bcopy
+.globl _bcopy    
+.thumb_func _memmove
+.globl _memmove
+.thumb_func _memcpy
+.globl _memcpy
+
+.align 2
+_bcopy:
+       mov       r3,      r0           // swap the first and second arguments
+       mov       r0,      r1           // and fall through into memmove
+       mov       r1,      r3           //
+
+.align 2
+_memmove:
+_memcpy:
+    subs      r3,      r0,  r1      // offset = destination addr - source addr
+    it        eq
+    bxeq      lr                    // if source == destination, early out
+
+//  Our preference is for using a (faster) front-to-back copy.  However, if
+//  0 < offset < length, it is necessary to copy back-to-front for correctness.
+//  We have already ruled out offset == 0, so we can use an unsigned compare
+//  with length -- if offset is higher, offset is either greater than length
+//  or negative.
+
+    cmp       r3,      r2
+    bhs       L_copyFrontToBack
+                             
+/*****************************************************************************
+ *  back to front copy                                                       *
+ *****************************************************************************/
+
+    mov       ip,      r0           // copy destination pointer.
+    add       r1,           r2      // move source pointer to end of source array
+    add       ip,           r2      // move destination pointer to end of dest array
+    
+    subs      r2,           $8      // if length - 8 is negative (i.e. length
+    blt       L_scalarReverseCopy   // is less than 8), jump to cleanup path.
+    tst       ip,           $7      // if (destination + length) is doubleword
+    beq       L_vectorReverseCopy   // aligned, jump to fast path.
+    
+0:  ldrb      r3,     [r1, $-1]!    // load byte
+    sub       r2,           $1      // decrement length
+    strb      r3,     [ip, $-1]!    // store byte
+    tst       ip,           $7      // test alignment
+    bne       0b
+    
+    cmp       r2,           $0      // if length - 8 is negative,
+    blt       L_scalarReverseCopy   // jump to the cleanup code
+                                    
+/*****************************************************************************
+ *  destination is doubleword aligned                                        *
+ *****************************************************************************/
+
+L_vectorReverseCopy:
+    ands      r3,      r1,  $3      // Extract the alignment of the source
+    bic       r1,           $3
+    tbh      [pc, r3, lsl $1]       // Dispatch table on source alignment
+0:  
+.short (L_reverseAligned0-0b)/2     // The NEON alignment hardware does not work
+.short (L_reverseAligned1-0b)/2     // properly with sub 4-byte alignment and
+.short (L_reverseAligned2-0b)/2     // buffers that are uncacheable, so we need
+.short (L_reverseAligned3-0b)/2     // to have a software workaround.
+
+/*****************************************************************************
+ *  source is also at least word aligned                                     *
+ *****************************************************************************/
+    
+L_reverseAligned0:
+    subs      r2,           $0x38   // if length - 64 is negative, jump to
+    blt       L_reverseVectorCleanup// the cleanup path.
+    tst       ip,           $0x38   // if (destination + length) is cacheline
+    beq       L_reverseCachelineAligned // aligned, jump to the fast path.
+    
+0:  sub       r1,           $8      // copy eight bytes at a time until the
+    vld1.32  {d0},    [r1]          // destination is 8 byte aligned.
+    sub       ip,           $8      //
+    sub       r2,           $8      //
+    tst       ip,           $0x38   //
+    vst1.64  {d0},    [ip, :64]     //
+    bne       0b                    //
+    
+    cmp       r2,           $0      // if length - 64 is negative,
+    blt       L_reverseVectorCleanup// jump to the cleanup code
+    
+L_reverseCachelineAligned:
+    sub       r3,      r2,  $0x3c0  // If 1024 < length < 32768, use core
+    cmp       r3,          $0x7c00  // register copies instead of NEON to
+    blo       L_useSTMDB            // control energy usage.
+    
+    sub       r1,           $32     // decrement source
+    sub       ip,           $32     // decrement destination
+    mov       r3,           $-32    // load address increment
+    tst       r1,           $0x1f   // if source shares 32 byte alignment
+    beq       L_reverseSourceAligned// jump to loop with more alignment hints
+    
+    vld1.32  {q2,q3}, [r1], r3      // This loop handles 4-byte aligned copies
+    vld1.32  {q0,q1}, [r1], r3      // as generally as possible.
+    subs      r2,           $64     // 
+    vst1.64  {q2,q3}, [ip,:256], r3 // The Cortex-A8 NEON unit does not always
+    blt       1f                    // properly handle misalignment in vld1
+.align 3                            // with an element size of 8 or 16, so
+0:  vld1.32  {q2,q3}, [r1], r3      // this is the best we can do without
+    vst1.64  {q0,q1}, [ip,:256], r3 // handling alignment in software.
+    vld1.32   {q0,q1}, [r1], r3     // 
+    subs      r2,           $64     // 
+    vst1.64  {q2,q3}, [ip,:256], r3 // 
+    bge       0b                    // 
+    b         1f                    // 
+    
+L_reverseSourceAligned:
+    vld1.64  {q2,q3}, [r1,:256], r3 // Identical to loop above except for
+    vld1.64  {q0,q1}, [r1,:256], r3 // additional alignment information; this
+    subs      r2,           $64     // gets an additional .5 bytes per cycle
+    vst1.64  {q2,q3}, [ip,:256], r3 // on Cortex-A8.
+    blt       1f                    // 
+.align 3                            // 
+0:  vld1.64  {q2,q3}, [r1,:256], r3 //
+    vst1.64  {q0,q1}, [ip,:256], r3 //
+    vld1.64  {q0,q1}, [r1,:256], r3 //
+    subs      r2,           $64     //
+    vst1.64  {q2,q3}, [ip,:256], r3 //
+    bge       0b                    //
+1:  vst1.64  {q0,q1}, [ip,:256], r3 // loop cleanup: final 32 byte store
+    add       r1,           $32     // point source at last element stored
+    add       ip,           $32     // point destination at last element stored
+    
+L_reverseVectorCleanup:
+    adds      r2,           $0x38   // If (length - 8) < 0, goto scalar cleanup
+    blt       L_scalarReverseCopy   //
+
+0:  sub       r1,           $8      // copy eight bytes at a time until
+    vld1.32  {d0},    [r1]          // (length - 8) < 0.
+    sub       ip,           $8      //
+    subs      r2,           $8      //
+    vst1.64  {d0},    [ip, :64]     //
+    bge       0b                    //
+
+/*****************************************************************************
+ *  sub-doubleword cleanup copies                                            *
+ *****************************************************************************/
+
+L_scalarReverseCopy:
+    adds      r2,           #0x8    // restore length
+    it        eq                    // if this is zero
+    bxeq      lr                    // early out
+         
+0:  ldrb      r3,     [r1, #-1]!    // load a byte from source
+    strb      r3,     [ip, #-1]!    // store to destination
+    subs      r2,           #0x1    // subtract one from length
+    bne       0b                    // if non-zero, repeat
+    bx        lr                    // return
+         
+/*****************************************************************************
+ *  STMDB loop for 1k-32k buffers                                            *
+ *****************************************************************************/
+
+L_useSTMDB:
+    push     {r4-r8,r10,r11}
+.align 3
+0:  ldmdb        r1!,  {r3-r8,r10,r11}
+    subs      r2,           #0x40
+    stmdb     ip!,  {r3-r8,r10,r11}
+    ldmdb        r1!,  {r3-r8,r10,r11}
+       pld              [r1, #-0x40]
+    stmdb     ip!,  {r3-r8,r10,r11}
+    bge       0b
+    pop      {r4-r8,r10,r11}
+    b         L_reverseVectorCleanup
+    
+/*****************************************************************************
+ *  Misaligned vld1 loop                                                     *
+ *****************************************************************************/
+
+// Software alignment fixup to handle source and dest that are relatively
+// misaligned mod 4 bytes.  Load two 4-byte aligned double words from source, 
+// use vext.8 to extract a double word to store, and perform an 8-byte aligned
+// store to destination.
+
+#define RCOPY_UNALIGNED(offset)      \
+    subs      r2,          $8       ;\
+    blt       2f                    ;\
+    sub       r1,          $8       ;\
+    sub       ip,          $8       ;\
+    mov       r3,          $-8      ;\
+    vld1.32  {d2,d3}, [r1], r3      ;\
+    subs      r2,          $8       ;\
+    blt       1f                    ;\
+0:  vext.8    d0,  d2, d3, $(offset);\
+    vmov      d3,      d2           ;\
+    vld1.32  {d2},    [r1], r3      ;\
+    subs      r2,          $8       ;\
+    vst1.64  {d0},    [ip, :64], r3 ;\
+    bge       0b                    ;\
+1:  vext.8    d0,  d2, d3, $(offset);\
+    add       r1,          $8       ;\
+    vst1.64  {d0},    [ip, :64]     ;\
+2:  add       r2,          $8       ;\
+    add       r1,          $(offset);\
+    b         L_scalarReverseCopy
+
+L_reverseAligned1:
+    RCOPY_UNALIGNED(1)
+L_reverseAligned2:
+    RCOPY_UNALIGNED(2)
+L_reverseAligned3:
+    RCOPY_UNALIGNED(3)
+
+/*****************************************************************************
+ *  front to back copy                                                       *
+ *****************************************************************************/
+
+L_copyFrontToBack:
+    mov       ip,      r0           // copy destination pointer.
+    subs      r2,           $8      // if length - 8 is negative (i.e. length
+    blt       L_scalarCopy          // is less than 8), jump to cleanup path.
+    tst       ip,           $7      // if the destination is doubleword
+    beq       L_vectorCopy          // aligned, jump to fast path.
+    
+0:  ldrb      r3,     [r1], $1      // load byte
+    sub       r2,           $1      // decrement length
+    strb      r3,     [ip], $1      // store byte
+    tst       ip,           $7      // test alignment
+    bne       0b
+    
+    cmp       r2,           $0      // if length - 8 is negative,
+    blt       L_scalarCopy          // jump to the cleanup code
+    
+/*****************************************************************************
+ *  destination is doubleword aligned                                        *
+ *****************************************************************************/
+
+L_vectorCopy:
+    ands      r3,      r1,  $3      // Extract the alignment of the source
+    bic       r1,           $3
+    tbh      [pc, r3, lsl $1]       // Dispatch table on source alignment
+0:  
+.short (L_sourceAligned0-0b)/2      // The NEON alignment hardware does not work
+.short (L_sourceAligned1-0b)/2      // properly with sub 4-byte alignment and
+.short (L_sourceAligned2-0b)/2      // buffers that are uncacheable, so we need
+.short (L_sourceAligned3-0b)/2      // to have a software workaround.
+
+/*****************************************************************************
+ *  source is also at least word aligned                                     *
+ *****************************************************************************/
+    
+L_sourceAligned0:
+    subs      r2,           $0x38   // If (length - 64) < 0
+    blt       L_vectorCleanup       //   jump to cleanup code
+    tst       ip,           $0x38   // If destination is 64 byte aligned
+    beq       L_cachelineAligned    //   jump to main loop
+    
+0:  vld1.32  {d0},    [r1]!         // Copy one double word at a time until
+    sub       r2,           $8      // the destination is 64-byte aligned.
+    vst1.64  {d0},    [ip, :64]!    //
+    tst       ip,           $0x38   //
+    bne       0b                    //
+    
+    cmp       r2,           $0      // If (length - 64) < 0, goto cleanup
+    blt       L_vectorCleanup       //
+    
+L_cachelineAligned:
+    sub       r3,      r2,  $0x3c0  // If 1024 < length < 32768, use core
+    cmp       r3,          $0x7c00  // register copies instead of NEON to
+    blo       L_useSTMIA            // control energy usage.
+    tst       r1,           $0x1f   // If source has 32-byte alignment, use
+    beq       L_sourceAligned32     // an optimized loop.
+    
+    vld1.32  {q2,q3}, [r1]!         // This is the most common path for small
+    vld1.32  {q0,q1}, [r1]!         // copies, which are alarmingly frequent.
+    subs      r2,           #0x40   // It requires 4-byte alignment on the
+    vst1.64  {q2,q3}, [ip, :256]!   // source.  For ordinary malloc'd buffers,
+    blt       1f                    // this path could handle only single-byte
+.align 3                            // alignment at speed by using vld1.8
+0:  vld1.32  {q2,q3}, [r1]!         // instead of vld1.32; however, the NEON
+    vst1.64  {q0,q1}, [ip, :256]!   // alignment handler misbehaves for some
+    vld1.32  {q0,q1}, [r1]!         // special copies if the element size is
+    subs      r2,           #0x40   // 8 or 16, so we need to work around
+    vst1.64  {q2,q3}, [ip, :256]!   // sub 4-byte alignment in software, in
+    bge       0b                    // another code path.
+    b         1f
+    
+L_sourceAligned32:
+    vld1.64  {q2,q3}, [r1, :256]!   // When the source shares 32-byte alignment
+    vld1.64  {q0,q1}, [r1, :256]!   // with the destination, we use this loop
+    subs      r2,           #0x40   // instead, which specifies the maximum
+    vst1.64  {q2,q3}, [ip, :256]!   // :256 alignment on all loads and stores.
+    blt       1f                    // 
+.align 3                            // This gets an additional .5 bytes per
+0:  vld1.64  {q2,q3}, [r1, :256]!   // cycle for in-cache copies, which is not
+    vst1.64  {q0,q1}, [ip, :256]!   // insignificant for this (rather common)
+    vld1.64  {q0,q1}, [r1, :256]!   // case.
+    subs      r2,           #0x40   // 
+    vst1.64  {q2,q3}, [ip, :256]!   // This is identical to the above loop,
+    bge       0b                    // except for the additional alignment.
+1:  vst1.64  {q0,q1}, [ip, :256]!   // 
+
+L_vectorCleanup:
+    adds      r2,           $0x38   // If (length - 8) < 0, goto scalar cleanup
+    blt       L_scalarCopy          //
+    
+0:  vld1.32  {d0},    [r1]!         // Copy one doubleword at a time until
+    subs      r2,           $8      // (length - 8) < 0.
+    vst1.64  {d0},    [ip, :64]!    //
+    bge       0b                    //
+
+/*****************************************************************************
+ *  sub-doubleword cleanup copies                                            *
+ *****************************************************************************/
+
+L_scalarCopy:
+    adds      r2,           #0x8    // restore length
+    it        eq                    // if this is zero
+    bxeq      lr                    // early out
+         
+0:  ldrb      r3,     [r1], #1      // load a byte from source
+    strb      r3,     [ip], #1      // store to destination
+    subs      r2,           #1      // subtract one from length
+    bne       0b                    // if non-zero, repeat
+    bx        lr                    // return
+    
+/*****************************************************************************
+ *  STMIA loop for 1k-32k buffers                                            *
+ *****************************************************************************/
+
+L_useSTMIA:
+    push     {r4-r8,r10,r11}
+.align 3
+0:  ldmia     r1!,  {r3-r8,r10,r11}
+    subs      r2,      r2,  #64
+    stmia     ip!,  {r3-r8,r10,r11}
+    ldmia     r1!,  {r3-r8,r10,r11}
+    pld      [r1, #64]
+    stmia     ip!,  {r3-r8,r10,r11}
+    bge       0b
+    pop      {r4-r8,r10,r11}
+    b         L_vectorCleanup
+    
+/*****************************************************************************
+ *  Misaligned reverse vld1 loop                                             *
+ *****************************************************************************/
+
+// Software alignment fixup to handle source and dest that are relatively
+// misaligned mod 4 bytes.  Load two 4-byte aligned double words from source, 
+// use vext.8 to extract a double word to store, and perform an 8-byte aligned
+// store to destination.
+
+#define COPY_UNALIGNED(offset)       \
+    subs      r2,          $8       ;\
+    blt       2f                    ;\
+    vld1.32  {d2,d3}, [r1]!         ;\
+    subs      r2,          $8       ;\
+    blt       1f                    ;\
+0:  vext.8    d0,  d2, d3, $(offset);\
+    vmov      d2,      d3           ;\
+    vld1.32  {d3},    [r1]!         ;\
+    subs      r2,          $8       ;\
+    vst1.64  {d0},    [ip, :64]!    ;\
+    bge       0b                    ;\
+1:  vext.8    d0,  d2, d3, $(offset);\
+    sub       r1,          $8       ;\
+    vst1.64  {d0},    [ip, :64]!    ;\
+2:  add       r1,          $(offset);\
+    add       r2,          $8       ;\
+    b         L_scalarCopy
+
+L_sourceAligned1:
+    COPY_UNALIGNED(1)
+L_sourceAligned2:
+    COPY_UNALIGNED(2)
+L_sourceAligned3:
+    COPY_UNALIGNED(3)
diff --git a/arm/string/NEON/bzero.s b/arm/string/NEON/bzero.s

new file mode 100644 (file)

index 0000000..50b1c8e
--- /dev/null
+++ b/arm/string/NEON/bzero.s
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+/**********************************************************************
+ * Cortex-A8 implementation                                           *
+ **********************************************************************/
+
+// Cortex-A8 implementations of memset( ) and bzero( ).  Main loop is 64-byte
+// NEON stores, unless the buffer length is > 1k.  Beyond that point, there is
+// little to no speed advantage with NEON (and a slight regression in some
+// measured cases), so we switch to the GPRs.
+//
+// The crossover point should be reevaluated for future architectures.
+//
+// -- Stephen Canon, August 2009
+
+.text
+.syntax unified
+.code 16
+
+// void bzero(void * destination,
+//            size_t length);
+//
+// zeros out a buffer length bytes long, beginning at the address destination.
+.thumb_func ___bzero
+.globl ___bzero
+.thumb_func _bzero
+.globl _bzero
+.align 2
+___bzero:
+_bzero:
+    mov     r2,     r1              // match the API to memset(dest, 0, length)
+    eor     r1,     r1              // and fall through into memset
+
+// void *memset(void * destination,
+//              int value, size_t n);
+//
+// writes value converted to an unsigned char to n successive bytes, beginning
+// at destination.
+
+// Notes on register usage:
+// 
+// Throughout this function, registers have nearly constant usage; the pattern
+// is:
+//
+//     r0 holds the original destination pointer, unmodified.  This value
+//        must be returned by the routine, so it is easiest to just leave it
+//        in place.
+//     r1 holds the value that is being copied into the buffer, in some stage
+//        of splattedness.  The low byte is guaranteed to always have the value
+//        but the higher bytes may or may not contain copies of it.
+//     r2 holds the length minus some offset, where the offset is always the
+//        number of bytes that the current loop stores per iteration.
+//     r3-r6,r8,r10,r11 are used with stmia, and will only ever contain splatted
+//        copies of the value to be stored.
+//     ip holds a pointer to the lowest byte in the array that has not yet been
+//        set to hold value.
+//     q0 and q1 hold splatted copies of the value in the vector path, and are
+//        otherwise unused.
+
+.thumb_func _memset
+.globl _memset
+.align 2
+_memset:
+    mov       ip,      r0           // copy destination pointer.
+    subs      r2,           #0x8    // if length - 8 is negative (i.e. length
+    and       r1,           #0xff   // is less than 8), jump to cleanup path.
+    blt       L_scalarCleanup       // 
+    
+    tst       ip,           #0x7    // if the destination is doubleword
+    beq       L_vectorCopy          // aligned, jump to fast path.
+    
+0:  strb      r1,     [ip], #1      // store one byte at a time until
+    sub       r2,           #1      // destination pointer is 8 byte aligned.
+    tst       ip,           #7      //
+    bne       0b                    //
+    
+    cmp       r2,           #0x0    // if length - 8 is negative,
+    blt       L_scalarCleanup       // jump to the cleanup code
+
+L_vectorCopy:
+    vdup.8    q0,      r1           // splat the byte to be stored across
+    subs      r2,           #0x38   // q0 and q1, and check if length - 64
+    vmov      q1,      q0           // is negative; if so, jump to the
+    blt       L_vectorCleanup       // cleanup code.
+    
+    tst       ip,           #0x38   // if the destination is cacheline
+    beq       L_cachelineAligned    // aligned, jump to the fast path.
+
+0:  vst1.64  {d0},    [ip, :64]!    // store one double word at a time until
+    sub       r2,           #8      // the destination is 64-byte aligned
+    tst       ip,           #0x38   // 
+    bne       0b
+    
+    cmp       r2,           #0x0    // if length - 64 is negative,
+    blt       L_vectorCleanup       // jump to the cleanup code
+
+L_cachelineAligned:
+    cmp       r2,           #0x3c0  // if length > 1024
+    bge       L_useSTMIA            // we use stmia instead
+
+.align 4                            // main loop
+0:  vst1.64  {q0,q1}, [ip, :256]!   // store 32 bytes
+    subs      r2,           #0x40   // decrement length by 64
+    vst1.64  {q0,q1}, [ip, :256]!   // store 32 bytes
+    bge       0b                    // if length - 64 >= 0, continue
+    
+L_vectorCleanup:
+    adds      r2,           #0x38   // if (length - 8) < 0, goto scalar cleanup
+    blt       L_scalarCleanup       //
+    
+0:  subs      r2,           #8      // store one double word at a time until
+    vst1.64  {d0},    [ip, :64]!    // (length - 8) < 0.
+    bge       0b
+    
+L_scalarCleanup:
+    adds      r2,           #8      // restore length
+    beq       1f                    // early out if zero.
+    
+0:  strb      r1,     [ip], #1      // store one byte at a time until length
+    subs      r2,           #1      // is zero.
+    bne       0b                    //
+1:  bx        lr                    // return.
+
+//  STMIA loop for large buffers
+//
+//  For stores larger than 1024 bytes, we use STMIA because we can't get enough
+//  of a speedup from NEON to offset the higher power draw of the NEON unit.
+//
+//  This crossover should be reevaluated on future architectures.
+//
+//  We avoid using r7 and r9 even though it's not strictly necessary.
+
+L_useSTMIA:
+    push     {r4,r5,r6,r8,r10,r11}
+    orr       r1,      r1,  r1, lsl #8
+    orr       r1,      r1,  r1, lsl #16
+    mov       r3,      r1
+    mov       r4,      r1
+    mov       r5,      r1
+    mov       r6,      r1
+    mov       r8,      r1
+    mov       r10,     r1
+    mov       r11,     r1
+.align 4
+0:  stmia     ip!,  {r1,r3,r4,r5,r6,r8,r10,r11}
+    subs      r2,           #0x40
+    stmia     ip!,  {r1,r3,r4,r5,r6,r8,r10,r11}
+    bge       0b
+    pop      {r4,r5,r6,r8,r10,r11}
+    b         L_vectorCleanup
diff --git a/arm/string/bcopy.s b/arm/string/bcopy.s

index da24152f7e9e073bf676dad4946aa6158440b52c..2e67e1cc4089ded65021534bb2dd743ea55b19c4 100644 (file)
--- a/arm/string/bcopy.s
+++ b/arm/string/bcopy.s
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2006, 2009 Apple Inc. All rights reserved.
   *
   * @APPLE_LICENSE_HEADER_START@
   * 
@@ -21,6 +21,19 @@
   * @APPLE_LICENSE_HEADER_END@
   */
  
+#if defined __thumb2__ && defined __ARM_NEON__
+    
+// Use our tuned NEON implementation when it is available.  Otherwise fall back
+// on more generic ARM code.
+
+#include "NEON/bcopy.s"
+    
+#else // defined __thumb2__ && defined __ARM_NEON__
+
+/*****************************************************************************
+ * ARMv5 and ARMv6 implementation                                            *
+ *****************************************************************************/
+ 
  #include <arm/arch.h>
  
  .text
@@ -398,4 +411,5 @@ Lalign3_forward_loop:
  Lexit:
         ldmfd   sp!, {r0, r4, r5, r7, pc}
  
+#endif // defined __thumb2__ && defined __ARM_NEON__
  
diff --git a/arm/string/bzero.s b/arm/string/bzero.s

index ada372792ee67826824065416017c0d45bf1351c..e3a3a8de13ea234528ceab6e7bf10341af50abc5 100644 (file)
--- a/arm/string/bzero.s
+++ b/arm/string/bzero.s
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2006, 2009 Apple Inc. All rights reserved.
   *
   * @APPLE_LICENSE_HEADER_START@
   * 
@@ -20,6 +20,15 @@
   * 
   * @APPLE_LICENSE_HEADER_END@
   */
+ 
+#if defined __thumb2__ && defined __ARM_NEON__
+
+// Use our tuned NEON implementation when it is available.  Otherwise fall back
+// on more generic ARM code.
+
+#include "NEON/bzero.s"
+
+#else // defined __thumb2__ && defined __ARM_NEON__
  
  #include <mach/machine/asm.h>
  #include <architecture/arm/asm_help.h>
@@ -160,3 +169,5 @@ L_unaligned:
         b               L_lessthan64aligned
  
  X_LEAF(___bzero, _bzero)
+
+#endif // defined __thumb2__ && defined __ARM_NEON__
diff --git a/arm/string/memcmp.s b/arm/string/memcmp.s

new file mode 100644 (file)

index 0000000..83e0f87
--- /dev/null
+++ b/arm/string/memcmp.s
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+// ARM Assembly implementation of memcmp( ) from <string.h>
+// Uses Thumb2 if it is available, otherwise generates ARM code.
+//
+// -- Stephen Canon, August 2009
+//
+// The basic idea is to use word compares instead of byte compares as long as
+// at least four bytes remain to be compared.  However, because memcmp( )
+// compares the buffers as though they were big-endian unsigned integers, we
+// need to byte-reverse each word before comparing them.
+//
+// If the buffers are not word aligned, or they are shorter than four bytes,
+// we just use a simple byte comparison loop instead.
+//
+// int   bcmp(void *src1, void *src2, size_t length);
+// int memcmp(void *src1, void *src2, size_t length);
+
+#include <arm/arch.h>
+
+    .text
+    .syntax unified
+#if defined __thumb2__
+    .code 16
+    .thumb_func _bcmp
+    .thumb_func _memcmp
+#else
+    .code 32
+#endif
+    .globl _bcmp
+    .globl _memcmp
+    .align 3
+_bcmp:
+_memcmp:
+
+#ifdef _ARM_ARCH_6
+    subs    ip,     r2,  #4     // if length < 4
+    bmi     L_useByteCompares   // jump to the byte comparison loop
+    
+    orr     r3,     r0,  r1     // if the buffers are
+    tst     r3,          #3     // not word aligned
+    bne     L_useByteCompares   // jump to the byte comparison loop
+
+.align 3
+L_wordCompare:                  // Here we know that both buffers are word
+    ldr     r2,    [r0], #4     // aligned, and (length - 4) > 0, so at least
+    ldr     r3,    [r1], #4     // four bytes remain to be compared.  We load
+    subs    ip,          #4     // a word from each buffer, and byte reverse
+    bmi     L_lastWord          // the loaded words.  We also decrement the
+    rev     r2,     r2          // length by four and jump out of this loop if
+    rev     r3,     r3          // the result is negative.  Then we compare the
+    cmp     r2,     r3          // reversed words, and continue the loop only
+    beq     L_wordCompare       // if they are equal.
+L_wordsUnequal:
+    ite     hi                  // If the words compared unequal, return +/- 1
+    movhi   r0,     #1          // according to the result of the comparison.
+    movls   r0,     #-1         //
+    bx      lr                  //
+L_lastWord:
+    rev     r2,     r2          // If we just loaded the last complete words
+    rev     r3,     r3          // from the buffers, byte-reverse them and
+    cmp     r2,     r3          // compare.  If they are unequal, jump to the
+    bne     L_wordsUnequal      // return path.
+    add     r2,     ip,  #4     // Otherwise, fall into the cleanup code.
+#endif // _ARM_ARCH_6
+
+L_useByteCompares:
+    tst     r2,     r2          // If the length is exactly zero
+    beq     L_returnZero        // avoid doing any loads and return zero.
+    mov     r3,     r0
+.align 3
+L_byteCompareLoop:
+    ldrb    r0,    [r3], #1     // Load a byte from each buffer, and decrement
+    ldrb    ip,    [r1], #1     // the length by one.  If the decremented
+    subs    r2,     #1          // length is zero, exit the loop.  Otherwise
+    beq     L_lastByte          // subtract the loaded bytes; if their
+    subs    r0,     ip          // difference is zero, continue the comparison
+    beq     L_byteCompareLoop   // loop.  Otherwise, return their difference.
+    bx      lr
+L_returnZero:
+    mov     r0,     ip
+L_lastByte:
+    sub     r0,     ip          // Return the difference of the final bytes
+    bx      lr
diff --git a/arm/sys/OSAtomic-v4.c b/arm/sys/OSAtomic-v4.c

index c725cb49f6c04c65b083f63257dc23d6da1c0ab1..723d84f2d7844ac33b2eaaeaaaa87e5a13af4b80 100644 (file)
--- a/arm/sys/OSAtomic-v4.c
+++ b/arm/sys/OSAtomic-v4.c
@@ -187,6 +187,30 @@ bool    OSAtomicCompareAndSwap32Barrier( int32_t oldValue, int32_t newValue, vol
      return OSAtomicCompareAndSwap32(oldValue, newValue, theValue);
  }
  
+bool
+OSAtomicCompareAndSwapInt(int oldValue, int newValue, volatile int *theValue)
+{
+       return OSAtomicCompareAndSwap32(oldValue, newValue, theValue);
+}
+
+bool
+OSAtomicCompareAndSwapIntBarrier(int oldValue, int newValue, volatile int *theValue)
+{
+       return OSAtomicCompareAndSwap32(oldValue, newValue, theValue);
+}
+
+bool
+OSAtomicCompareAndSwapLong(long oldValue, long newValue, volatile long *theValue)
+{
+       return OSAtomicCompareAndSwap32(oldValue, newValue, (volatile int32_t *)theValue);
+}
+
+bool
+OSAtomicCompareAndSwapLongBarrier(long oldValue, long newValue, volatile long *theValue)
+{
+       return OSAtomicCompareAndSwap32(oldValue, newValue, (volatile int32_t *)theValue);
+}
+
  bool   OSAtomicCompareAndSwap64( int64_t oldValue, int64_t newValue, volatile int64_t *theValue )
  {
      bool result;
diff --git a/gdtoa/FreeBSD/gdtoa-misc.c.patch b/gdtoa/FreeBSD/gdtoa-misc.c.patch

index 89746545f8274a9f639fadd660fa5dbd3f744696..261ec59d8b854c37e4795dbbdc0baa741dec74bb 100644 (file)
--- a/gdtoa/FreeBSD/gdtoa-misc.c.patch
+++ b/gdtoa/FreeBSD/gdtoa-misc.c.patch
@@ -1,5 +1,5 @@
---- gdtoa-misc.c.orig  2008-11-05 15:59:34.000000000 -0800
-+++ gdtoa-misc.c       2008-11-05 16:05:28.000000000 -0800
+--- gdtoa-misc.c.orig  2010-01-07 22:03:21.000000000 -0800
++++ gdtoa-misc.c       2010-01-07 22:25:33.000000000 -0800
  @@ -29,9 +29,20 @@ THIS SOFTWARE.
   /* Please send bug reports to David M. Gay (dmg at acm dot org,
    * with " at " changed at "@" and " dot " changed to ".").    */
@@ -48,7 +48,7 @@
    Bigint *
   Balloc
   #ifdef KR_headers
-@@ -53,8 +84,25 @@ Balloc
+@@ -53,9 +84,26 @@ Balloc
   #ifndef Omit_Private_Memory
         unsigned int len;
   #endif
@@ -70,10 +70,21 @@
  +              }
  +#else /* !GDTOA_TSD */
         ACQUIRE_DTOA_LOCK(0);
+-      if ( (rv = freelist[k]) !=0) {
  +#endif /* GDTOA_TSD */
-       if ( (rv = freelist[k]) !=0) {
++      if (k <= Kmax && (rv = freelist[k]) !=0) {
                 freelist[k] = rv->next;
                 }
+       else {
+@@ -65,7 +113,7 @@ Balloc
+ #else
+               len = (sizeof(Bigint) + (x-1)*sizeof(ULong) + sizeof(double) - 1)
+                       /sizeof(double);
+-              if (pmem_next - private_mem + len <= PRIVATE_mem) {
++              if (k <= Kmax && pmem_next - private_mem + len <= PRIVATE_mem) {
+                       rv = (Bigint*)pmem_next;
+                       pmem_next += len;
+                       }
  @@ -75,7 +123,9 @@ Balloc
                 rv->k = k;
                 rv->maxwds = x;
@@ -84,20 +95,28 @@
         rv->sign = rv->wds = 0;
         return rv;
         }
-@@ -89,10 +139,16 @@ Bfree
+@@ -89,10 +139,20 @@ Bfree
   #endif
   {
         if (v) {
+-              ACQUIRE_DTOA_LOCK(0);
+-              v->next = freelist[v->k];
+-              freelist[v->k] = v;
+-              FREE_DTOA_LOCK(0);
++              if (v->k > Kmax)
++                      free((void*)v);
++              else {
  +#ifdef GDTOA_TSD
-+              Bigint **freelist = (Bigint **)pthread_getspecific(gdtoa_tsd_key);
++                      Bigint **freelist = (Bigint **)pthread_getspecific(gdtoa_tsd_key);
  +#else /* !GDTOA_TSD */
-               ACQUIRE_DTOA_LOCK(0);
++                      ACQUIRE_DTOA_LOCK(0);
  +#endif /* GDTOA_TSD */
-               v->next = freelist[v->k];
-               freelist[v->k] = v;
++                      v->next = freelist[v->k];
++                      freelist[v->k] = v;
  +#ifndef GDTOA_TSD
-               FREE_DTOA_LOCK(0);
++                      FREE_DTOA_LOCK(0);
  +#endif /* GDTOA_TSD */
++                      }
                 }
         }
   
diff --git a/gdtoa/gdtoa-misc-fbsd.c b/gdtoa/gdtoa-misc-fbsd.c

index 8540a0c73f993a8a3b1ef9bbee0791ecfa09c6a3..659f69c34ca21196253ba21c3eecfaca9a5dd088 100644 (file)
--- a/gdtoa/gdtoa-misc-fbsd.c
+++ b/gdtoa/gdtoa-misc-fbsd.c
@@ -103,7 +103,7 @@ Balloc
  #else /* !GDTOA_TSD */
         ACQUIRE_DTOA_LOCK(0);
  #endif /* GDTOA_TSD */
-       if ( (rv = freelist[k]) !=0) {
+       if (k <= Kmax && (rv = freelist[k]) !=0) {
                 freelist[k] = rv->next;
                 }
         else {
@@ -113,7 +113,7 @@ Balloc
  #else
                 len = (sizeof(Bigint) + (x-1)*sizeof(ULong) + sizeof(double) - 1)
                         /sizeof(double);
-               if (pmem_next - private_mem + len <= PRIVATE_mem) {
+               if (k <= Kmax && pmem_next - private_mem + len <= PRIVATE_mem) {
                         rv = (Bigint*)pmem_next;
                         pmem_next += len;
                         }
@@ -139,16 +139,20 @@ Bfree
  #endif
  {
         if (v) {
+               if (v->k > Kmax)
+                       free((void*)v);
+               else {
  #ifdef GDTOA_TSD
-               Bigint **freelist = (Bigint **)pthread_getspecific(gdtoa_tsd_key);
+                       Bigint **freelist = (Bigint **)pthread_getspecific(gdtoa_tsd_key);
  #else /* !GDTOA_TSD */
-               ACQUIRE_DTOA_LOCK(0);
+                       ACQUIRE_DTOA_LOCK(0);
  #endif /* GDTOA_TSD */
-               v->next = freelist[v->k];
-               freelist[v->k] = v;
+                       v->next = freelist[v->k];
+                       freelist[v->k] = v;
  #ifndef GDTOA_TSD
-               FREE_DTOA_LOCK(0);
+                       FREE_DTOA_LOCK(0);
  #endif /* GDTOA_TSD */
+                       }
                 }
         }
  
diff --git a/gen/asl.c b/gen/asl.c

index 5a18acd2887284281de9a5b620327afcc2fd8362..8f1f34df0afadec70a7a15643073a3ca273567b9 100644 (file)
--- a/gen/asl.c
+++ b/gen/asl.c
@@ -84,6 +84,7 @@
  time_t asl_parse_time(const char *);
  const char *asl_syslog_faciliy_num_to_name(int n);
  __private_extern__ asl_client_t *_asl_open_default();
+__private_extern__ int _asl_send_level_message(aslclient ac, aslmsg msg, int level, const char *message);
  
  /* notify SPI */
  uint32_t notify_register_plain(const char *name, int *out_token);
@@ -2312,17 +2313,6 @@ asl_vlog(aslclient ac, aslmsg a, int level, const char *format, va_list ap)
         if (level < ASL_LEVEL_EMERG) level = ASL_LEVEL_EMERG;
         if (level > ASL_LEVEL_DEBUG) level = ASL_LEVEL_DEBUG;
  
-       str = NULL;
-       asprintf(&str, "%d", level);
-       if (str == NULL)
-       {
-               if ((msg != NULL) && (my_msg != 0)) asl_free(msg);
-               return -1;
-       }
-
-       asl_set(msg, ASL_KEY_LEVEL, str);
-       free(str);
-
         /* insert strerror for %m */
         len = 0;
         elen = 0;
@@ -2409,11 +2399,9 @@ asl_vlog(aslclient ac, aslmsg a, int level, const char *format, va_list ap)
                 return -1;
         }
  
-       asl_set(msg, ASL_KEY_MSG, str);
+       status = _asl_send_level_message(ac, (aslmsg)msg, level, str);
         free(str);
  
-       status = asl_send(ac, (aslmsg)msg);
-
         if ((msg != NULL) && (my_msg != 0)) asl_free(msg);
         return status;
  }
@@ -2725,18 +2713,17 @@ asl_format_message(aslmsg msg, const char *mfmt, const char *tfmt, uint32_t text
  }
  
  /*
- * asl_send: send a message 
+ * asl_send (internal version): send a message 
   * This routine may be used instead of asl_log() or asl_vlog() if asl_set() 
   * has been used to set all of a message's attributes.
- * msg:  an aslmsg
   * returns 0 for success, non-zero for failure
   */
-int
-asl_send(aslclient ac, aslmsg msg)
+__private_extern__ int
+_asl_send_level_message(aslclient ac, aslmsg msg, int level, const char *message)
  {
         char *str, *out_raw;
         caddr_t out;
-       uint32_t i, len, outlen, level, lmask, outstatus, filter, check, senderx, facilityx;
+       uint32_t i, len, outlen, lmask, outstatus, filter, check, senderx, facilityx;
         uint64_t v64;
         const char *val;
         char *name, *x;
@@ -2745,7 +2732,7 @@ asl_send(aslclient ac, aslmsg msg)
         int status, rc_filter;
         asl_client_t *asl;
         int use_global_lock;
-       asl_msg_t *mt;
+       asl_msg_t *mt, *tmp_msg;
         char hname[_POSIX_HOST_NAME_MAX];
         kern_return_t kstatus;
  
@@ -2760,8 +2747,6 @@ asl_send(aslclient ac, aslmsg msg)
  
         if (msg == NULL) return 0;
  
-       level = ASL_LEVEL_DEBUG;
-
         val = asl_get(msg, ASL_KEY_LEVEL);
         if (val != NULL) level = atoi(val);
  
@@ -2814,6 +2799,26 @@ asl_send(aslclient ac, aslmsg msg)
                 rc_filter = 1;
         }
  
+       /*
+        * Copy the message to tmp_msg to make setting values thread-safe
+        */
+       tmp_msg = calloc(1, sizeof(asl_msg_t));
+       if (tmp_msg == NULL) return -1;
+
+       tmp_msg->type = ASL_TYPE_MSG;
+
+       mt = (asl_msg_t *)msg;
+       for (i = 0; i < mt->count; i++)
+       {
+               asl_set(tmp_msg, mt->key[i], mt->val[i]);
+       }
+
+       /*
+        * Set Level and Message from parameters.
+        */
+       if (message != NULL) asl_set(tmp_msg, ASL_KEY_MSG, message);
+       asl_set(tmp_msg, ASL_KEY_LEVEL, _asl_level_string(level));
+
         /* 
          * Time, TimeNanoSec, Host, PID, UID, and GID values get set here
          */
@@ -2826,7 +2831,7 @@ asl_send(aslclient ac, aslmsg msg)
                 asprintf(&str, "%lu", tval.tv_sec);
                 if (str != NULL)
                 {
-                       asl_set(msg, ASL_KEY_TIME, str);
+                       asl_set(tmp_msg, ASL_KEY_TIME, str);
                         free(str);
                         str = NULL;
                 }
@@ -2834,7 +2839,7 @@ asl_send(aslclient ac, aslmsg msg)
                 asprintf(&str, "%lu", tval.tv_usec * 1000);
                 if (str != NULL)
                 {
-                       asl_set(msg, ASL_KEY_TIME_NSEC, str);
+                       asl_set(tmp_msg, ASL_KEY_TIME_NSEC, str);
                         free(str);
                         str = NULL;
                 }
@@ -2845,7 +2850,7 @@ asl_send(aslclient ac, aslmsg msg)
                 asprintf(&str, "%lu", tick);
                 if (str != NULL)
                 {
-                       asl_set(msg, ASL_KEY_TIME, str);
+                       asl_set(tmp_msg, ASL_KEY_TIME, str);
                         free(str);
                         str = NULL;
                 }
@@ -2854,14 +2859,14 @@ asl_send(aslclient ac, aslmsg msg)
         memset(&hname, 0, _POSIX_HOST_NAME_MAX);
         if (gethostname(hname, _POSIX_HOST_NAME_MAX) == 0)
         {
-               asl_set(msg, ASL_KEY_HOST, hname);
+               asl_set(tmp_msg, ASL_KEY_HOST, hname);
         }
  
         str = NULL;
         asprintf(&str, "%u", getpid());
         if (str != NULL)
         {
-               asl_set(msg, ASL_KEY_PID, str);
+               asl_set(tmp_msg, ASL_KEY_PID, str);
                 free(str);
         }
  
@@ -2869,7 +2874,7 @@ asl_send(aslclient ac, aslmsg msg)
         asprintf(&str, "%d", getuid());
         if (str != NULL)
         {
-               asl_set(msg, ASL_KEY_UID, str);
+               asl_set(tmp_msg, ASL_KEY_UID, str);
                 free(str);
         }
  
@@ -2877,30 +2882,29 @@ asl_send(aslclient ac, aslmsg msg)
         asprintf(&str, "%u", getgid());
         if (str != NULL)
         {
-               asl_set(msg, ASL_KEY_GID, str);
+               asl_set(tmp_msg, ASL_KEY_GID, str);
                 free(str);
         }
  
         senderx = (uint32_t)-1;
         facilityx = (uint32_t)-1;
-       mt = (asl_msg_t *)msg;
  
-       for (i = 0; (i < mt->count) && ((senderx == (uint32_t)-1) || (facilityx == (uint32_t)-1)); i++)
+       for (i = 0; (i < tmp_msg->count) && ((senderx == (uint32_t)-1) || (facilityx == (uint32_t)-1)); i++)
         {
-               if (mt->key[i] == NULL) continue;
-               if (streq(mt->key[i], ASL_KEY_SENDER)) senderx = i;
-               else if (streq(mt->key[i], ASL_KEY_FACILITY)) facilityx = i;
+               if (tmp_msg->key[i] == NULL) continue;
+               if (streq(tmp_msg->key[i], ASL_KEY_SENDER)) senderx = i;
+               else if (streq(tmp_msg->key[i], ASL_KEY_FACILITY)) facilityx = i;
         }
  
         /*
          * Set Sender if needed
          */
-       if ((senderx == (uint32_t)-1) || (mt->val[senderx] == NULL))
+       if ((senderx == (uint32_t)-1) || (tmp_msg->val[senderx] == NULL))
         {
                 if ((ac != NULL) && (ac->name != NULL))
                 {
                         /* Use the Sender name from the client handle */
-                       asl_set(msg, ASL_KEY_SENDER, ac->name);
+                       asl_set(tmp_msg, ASL_KEY_SENDER, ac->name);
                 }
                 else
                 {
@@ -2921,20 +2925,20 @@ asl_send(aslclient ac, aslmsg msg)
                                 }
                         }
  
-                       if (_asl_global.sender != NULL) asl_set(msg, ASL_KEY_SENDER, _asl_global.sender);
-                       else asl_set(msg, ASL_KEY_SENDER, "Unknown");
+                       if (_asl_global.sender != NULL) asl_set(tmp_msg, ASL_KEY_SENDER, _asl_global.sender);
+                       else asl_set(tmp_msg, ASL_KEY_SENDER, "Unknown");
                 }
         }
  
         /*
          * Set Facility
          */
-       if ((facilityx == (uint32_t)-1) || (mt->val[facilityx] == NULL))
+       if ((facilityx == (uint32_t)-1) || (tmp_msg->val[facilityx] == NULL))
         {
                 if ((ac != NULL) && (ac->facility != NULL))
                 {
                         /* Use the Facility name from the client handle */
-                       asl_set(msg, ASL_KEY_FACILITY, ac->facility);
+                       asl_set(tmp_msg, ASL_KEY_FACILITY, ac->facility);
                 }
         }
  
@@ -2944,7 +2948,7 @@ asl_send(aslclient ac, aslmsg msg)
                 val = asl_get(msg, ASL_KEY_OPTION);
                 if (val == NULL)
                 {
-                       asl_set(msg, ASL_KEY_OPTION, ASL_OPT_STORE);
+                       asl_set(tmp_msg, ASL_KEY_OPTION, ASL_OPT_STORE);
                 }
                 else
                 {
@@ -2952,7 +2956,7 @@ asl_send(aslclient ac, aslmsg msg)
                         asprintf(&str, "%s %s", ASL_OPT_STORE, val);
                         if (str != NULL)
                         {
-                               asl_set(msg, ASL_KEY_OPTION, str);
+                               asl_set(tmp_msg, ASL_KEY_OPTION, str);
                                 free(str);
                                 str = NULL;
                         }
@@ -2966,7 +2970,7 @@ asl_send(aslclient ac, aslmsg msg)
         if ((filter != 0) && ((filter & lmask) != 0))
         {
                 len = 0;
-               out_raw = asl_msg_to_string((asl_msg_t *)msg, &len);
+               out_raw = asl_msg_to_string(tmp_msg, &len);
  
                 if ((out_raw != NULL) && (len != 0))
                 {
@@ -3011,7 +3015,7 @@ asl_send(aslclient ac, aslmsg msg)
                 if (asl->fd_list[i] < 0) continue;
  
                 len = 0;
-               out = asl_format_message(msg, asl->fd_mfmt[i], asl->fd_tfmt[i], asl->fd_encoding[i], &len);
+               out = asl_format_message(tmp_msg, asl->fd_mfmt[i], asl->fd_tfmt[i], asl->fd_encoding[i], &len);
                 if (out == NULL) continue;
  
                 status = write(asl->fd_list[i], out, len - 1);
@@ -3024,11 +3028,23 @@ asl_send(aslclient ac, aslmsg msg)
                 free(out);
         }
  
+       asl_free((aslmsg)tmp_msg);
+
         if (use_global_lock != 0) pthread_mutex_unlock(&_asl_global.lock);
  
         return outstatus;
  }
  
+/*
+ * asl_send: send a message 
+ * returns 0 for success, non-zero for failure
+ */
+int
+asl_send(aslclient ac, aslmsg msg)
+{
+       return _asl_send_level_message(ac, msg, ASL_LEVEL_DEBUG, NULL);
+}
+
  char *
  asl_msg_string(aslmsg a)
  {
diff --git a/gen/magazine_malloc.c b/gen/magazine_malloc.c

index 402510c349d9858c3503ca80ea609eae9e9f6b0e..a1fb6f000f51b5c4ef651abd1504aa09658a445a 100644 (file)
--- a/gen/magazine_malloc.c
+++ b/gen/magazine_malloc.c
@@ -1061,6 +1061,14 @@ mag_get_thread_index(szone_t *szone)
         return CPU_NUMBER() & (TINY_MAX_MAGAZINES - 1);
  }
  
+#elif defined(__arm__)
+
+static INLINE mag_index_t
+mag_get_thread_index(szone_t *szone)
+{
+    return 0;
+}
+
  #else
  #warning deriving magazine index from pthread_self() [want processor number]
  
diff --git a/gen/stack_logging_disk.c b/gen/stack_logging_disk.c

index 83c882f81dbff98a6543cb2cf0653f092b878365..aa0bf2852b5bac6b4be674244f6871923e297b3e 100644 (file)
--- a/gen/stack_logging_disk.c
+++ b/gen/stack_logging_disk.c
@@ -285,10 +285,13 @@ __expand_uniquing_table(backtrace_uniquing_table *uniquing_table)
  static int 
  __enter_frames_in_table(backtrace_uniquing_table *uniquing_table, uint64_t *foundIndex, mach_vm_address_t *frames, int32_t count)
  {      
+       // The hash values need to be the same size as the addresses (because we use the value -1), for clarity, define a new type
+       typedef mach_vm_address_t hash_index_t;
+
         mach_vm_address_t thisPC;
-       uint64_t hash, uParent = (uint64_t)(-1ll), modulus = (uniquing_table->numNodes-uniquing_table->untouchableNodes-1);
+       hash_index_t hash, uParent = (hash_index_t)(-1ll), modulus = (uniquing_table->numNodes-uniquing_table->untouchableNodes-1);
         int32_t collisions, lcopy = count, returnVal = 1;
-       uint64_t hash_multiplier = ((uniquing_table->numNodes - uniquing_table->untouchableNodes)/(uniquing_table->max_collide*2+1));
+       hash_index_t hash_multiplier = ((uniquing_table->numNodes - uniquing_table->untouchableNodes)/(uniquing_table->max_collide*2+1));
         mach_vm_address_t *node;
         while (--lcopy >= 0) {
          thisPC = frames[lcopy];
diff --git a/locale/xlocale.c b/locale/xlocale.c

index 17a12676a6485fa81809df4c292beef300b6086e..a43e1cd0ad50d2d27efc104f0d2a3c511a735534 100644 (file)
--- a/locale/xlocale.c
+++ b/locale/xlocale.c
@@ -112,6 +112,9 @@ _duplocale(locale_t loc)
                 loc = &__global_locale;
         else if (loc == &__c_locale) {
                 *new = __c_locale;
+               new->__refcount = 1;
+               new->__free_extra = (__free_extra_t)_releaselocale;
+               new->__lock = LOCK_INITIALIZER;
                 return new;
         }
         XL_LOCK(loc);
@@ -446,10 +449,13 @@ uselocale(locale_t loc)
                         errno = EINVAL;
                         return NULL;
                 }
-               if (loc == &__global_locale)    /* should never happen */
-                       loc = LC_GLOBAL_LOCALE;
+               if (loc == LC_GLOBAL_LOCALE ||
+                   loc == &__global_locale)    /* should never happen */
+                       loc = NULL;
+               XL_RETAIN(loc);
                 orig = pthread_getspecific(__locale_key);
-               pthread_setspecific(__locale_key, loc == LC_GLOBAL_LOCALE ? NULL : loc);
+               pthread_setspecific(__locale_key, loc);
+               XL_RELEASE(orig);
         }
         return (orig ? orig : LC_GLOBAL_LOCALE);
  }
diff --git a/pthreads/pthread.c b/pthreads/pthread.c

index 09367523a9815df66879c4ca2e37d7f80857ac03..5e9aefbfc05458b3db2775cc267bb44f4578a744 100644 (file)
--- a/pthreads/pthread.c
+++ b/pthreads/pthread.c
@@ -222,7 +222,7 @@ _________________________________________
  __private_extern__
  void _pthread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags);
  
-__private_extern__ 
+__private_extern__
  void _pthread_wqthread(pthread_t self, mach_port_t kport, void * stackaddr, pthread_workitem_t item, int reuse);
  
  #define PTHREAD_START_CUSTOM   0x01000000
@@ -836,9 +836,9 @@ _pthread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * f
         if ((pflags & PTHREAD_START_CUSTOM) == 0) {
                 stackaddr = (char *)self;
                 _pthread_struct_init(self, attrs, stackaddr,  stacksize, 1, 1);
-               #if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__)
                 _pthread_set_self(self);
-               #endif
+#endif
                 LOCK(_pthread_list_lock);
                 if (pflags & PTHREAD_START_SETSCHED) {
                         self->policy = ((pflags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK);
@@ -850,9 +850,9 @@ _pthread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * f
                         self->detached |= PTHREAD_CREATE_DETACHED;
                 }
         }  else { 
-               #if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__)
                 _pthread_set_self(self);
-               #endif
+#endif
                 LOCK(_pthread_list_lock);
         }
         self->kernel_thread = kport;
@@ -2090,9 +2090,6 @@ pthread_init(void)
                 __oldstyle = 1;
         }
  #endif
-#if defined(__arm__)
-       __oldstyle = 1;
-#endif
  
  #if defined(_OBJC_PAGE_BASE_ADDRESS)
  {
@@ -2110,7 +2107,7 @@ pthread_init(void)
  
         mig_init(1);            /* enable multi-threaded mig interfaces */
         if (__oldstyle == 0) {
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__)
                 __bsdthread_register(thread_start, start_wqthread, round_page(sizeof(struct _pthread)), _pthread_start, &workq_targetconc[0], (__uint64_t)(&thread->tsd[__PTK_LIBDISPATCH_KEY0]) - (__uint64_t)thread);
  #else
                 __bsdthread_register(_pthread_start, _pthread_wqthread, round_page(sizeof(struct _pthread)), NULL, &workq_targetconc[0], (__uint64_t)&thread->tsd[__PTK_LIBDISPATCH_KEY0] - (__uint64_t)thread);
@@ -2493,7 +2490,7 @@ pthread_workqueue_atfork_parent(void)
  void
  pthread_workqueue_atfork_child(void)
  {
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__)
         /* 
          * NOTE:  workq additions here  
          * are for i386,x86_64 only as
@@ -2517,7 +2514,7 @@ _pthread_work_internal_init(void)
         pthread_workqueue_t wq;
  
         if (kernel_workq_setup == 0) {
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__)
                 __bsdthread_register(thread_start, start_wqthread, round_page(sizeof(struct _pthread)),NULL,NULL, NULL);
  #else
                 __bsdthread_register(_pthread_start, _pthread_wqthread, round_page(sizeof(struct _pthread)),NULL,NULL, NULL);
@@ -2913,7 +2910,7 @@ _pthread_wqthread(pthread_t self, mach_port_t kport, void * stackaddr, pthread_w
                 /* These are not joinable threads */
                 self->detached &= ~PTHREAD_CREATE_JOINABLE;
                 self->detached |= PTHREAD_CREATE_DETACHED;
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) 
                 _pthread_set_self(self);
  #endif
  #if WQ_TRACE
@@ -3094,10 +3091,6 @@ pthread_workqueue_create_np(pthread_workqueue_t * workqp, const pthread_workqueu
         pthread_workqueue_t wq;
         pthread_workqueue_head_t headp;
  
-#if defined(__arm__)
-       /* not supported under arm */
-       return(ENOTSUP);
-#endif
  #if defined(__ppc__)
         IF_ROSETTA() {
                 return(ENOTSUP);
diff --git a/pthreads/pthread_machdep.h b/pthreads/pthread_machdep.h

index 6ec989971fafa370dde53946fd209c4fb4cf0e0b..819df97b2d8de4197cebd789fe8a1e5d8055d404 100644 (file)
--- a/pthreads/pthread_machdep.h
+++ b/pthreads/pthread_machdep.h
@@ -227,10 +227,12 @@ _pthread_getspecific_direct(unsigned long slot)
  #elif defined(__ppc64__)
          register void **__pthread_tsd asm ("r13");
          ret = __pthread_tsd[slot + (_PTHREAD_TSD_OFFSET / sizeof(void *))];
+#elif defined(__arm__) && defined(_ARM_ARCH_6) && !defined(_ARM_ARCH_7) && defined(__thumb__) && !defined(__OPTIMIZE__)
+        ret = pthread_getspecific(slot);
  #elif defined(__arm__) && defined(_ARM_ARCH_6)
-       void **__pthread_tsd;
-       __asm__ ("mrc p15, 0, %0, c13, c0, 3" : "=r"(__pthread_tsd));
-       ret = __pthread_tsd[slot + (_PTHREAD_TSD_OFFSET / sizeof(void *))];
+        void **__pthread_tsd;
+        __asm__ ("mrc p15, 0, %0, c13, c0, 3" : "=r"(__pthread_tsd));
+        ret = __pthread_tsd[slot + (_PTHREAD_TSD_OFFSET / sizeof(void *))];
  #elif defined(__arm__) && !defined(_ARM_ARCH_6)
          register void **__pthread_tsd asm ("r9");
          ret = __pthread_tsd[slot + (_PTHREAD_TSD_OFFSET / sizeof(void *))];
author	Apple <opensource@apple.com>
	Fri, 22 Jan 2010 22:55:51 +0000 (22:55 +0000)
committer	Apple <opensource@apple.com>
	Fri, 22 Jan 2010 22:55:51 +0000 (22:55 +0000)
arm/pthreads/Makefile.inc		patch \| blob \| blame \| history
arm/pthreads/start_wqthread.s	[new file with mode: 0644]	patch \| blob
arm/pthreads/thread_start.s	[new file with mode: 0644]	patch \| blob
arm/string/Makefile.inc		patch \| blob \| blame \| history
arm/string/NEON/bcopy.s	[new file with mode: 0644]	patch \| blob
arm/string/NEON/bzero.s	[new file with mode: 0644]	patch \| blob
arm/string/bcopy.s		patch \| blob \| blame \| history
arm/string/bzero.s		patch \| blob \| blame \| history
arm/string/memcmp.s	[new file with mode: 0644]	patch \| blob
arm/sys/OSAtomic-v4.c		patch \| blob \| blame \| history
gdtoa/FreeBSD/gdtoa-misc.c.patch		patch \| blob \| blame \| history
gdtoa/gdtoa-misc-fbsd.c		patch \| blob \| blame \| history
gen/asl.c		patch \| blob \| blame \| history
gen/magazine_malloc.c		patch \| blob \| blame \| history
gen/stack_logging_disk.c		patch \| blob \| blame \| history
locale/xlocale.c		patch \| blob \| blame \| history
pthreads/pthread.c		patch \| blob \| blame \| history
pthreads/pthread_machdep.h		patch \| blob \| blame \| history