pthread_set_self.s \
pthread_self.s \
pthread_getspecific.s \
- init_cpu_capabilities.c
+ init_cpu_capabilities.c \
+ start_wqthread.s \
+ thread_start.s
+
--- /dev/null
+/*
+ * Copyright (c) 2009 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+#include <arm/arch.h>
+
+#define __APPLE_API_PRIVATE
+#include <machine/cpu_capabilities.h>
+#undef __APPLE_API_PRIVATE
+
+// This routine is never called directly by user code, jumped from kernel
+// args 0 to 3 are already in the regs 0 to 3
+// should set stack with the 2 extra args before calling pthread_wqthread()
+// arg4 is in r[4]
+// arg5 is in r[5]
+
+ .text
+ .align 2
+ .globl _start_wqthread
+_start_wqthread:
+ stmfd sp!, {r4, r5}
+ bl __pthread_wqthread
--- /dev/null
+/*
+ * Copyright (c) 2009 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+#include <arm/arch.h>
+
+#define __APPLE_API_PRIVATE
+#include <machine/cpu_capabilities.h>
+#undef __APPLE_API_PRIVATE
+
+// This routine is never called directly by user code, jumped from kernel
+// args 0 to 3 are already in the regs 0 to 3
+// should set stack with the 2 extra args before calling pthread_wqthread()
+// arg4 is in r[4]
+// arg5 is in r[5]
+
+ .text
+ .align 2
+ .globl _thread_start
+_thread_start:
+ stmfd sp!, {r4, r5}
+ bl __pthread_start
#
.PATH: ${.CURDIR}/arm/string
-MDSRCS += \
- bcopy.s \
- bzero.s \
- ffs.s \
+MDSRCS += \
+ bcopy.s \
+ bzero.s \
+ ffs.s \
+ memcmp.s \
strcmp.s \
strlen.s
MDSRCS += memset_pattern.s
.endif
-SUPPRESSSRCS += memcpy.c memmove.c memset.c strlen.c
+SUPPRESSSRCS += bcmp.c memcpy.c memmove.c memset.c strlen.c
--- /dev/null
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+/*****************************************************************************
+ * Cortex-A8 implementation *
+ *****************************************************************************/
+
+// Cortex-A8 implementations of memcpy( ), memmove( ) and bcopy( ).
+//
+// Our tests have shown that NEON is always a performance win for memcpy( ).
+// However, for the specific case of copies from a warm source to a cold
+// destination when the buffer size is between 1k and 32k, it is not enough
+// of a performance win to offset the increased power footprint, resulting
+// in an energy usage regression. Thus, we detect that particular case, and
+// pass those copies through the ARM core registers. All other copies larger
+// than 8 bytes are handled on NEON.
+//
+// Stephen Canon, August 2009
+
+.text
+.code 16
+.syntax unified
+
+// void bcopy(const void * source,
+// void * destination,
+// size_t length);
+//
+// void *memmove(void * destination,
+// const void * source,
+// size_t n);
+//
+// void *memcpy(void * restrict destination,
+// const void * restrict source,
+// size_t n);
+//
+// all copy n successive bytes from source to destination. memmove and memcpy
+// returns destination, whereas bcopy has no return value. copying takes place
+// as if it were through a temporary buffer -- after return destination contains
+// exactly the bytes from source, even if the buffers overlap.
+
+.thumb_func _bcopy
+.globl _bcopy
+.thumb_func _memmove
+.globl _memmove
+.thumb_func _memcpy
+.globl _memcpy
+
+.align 2
+_bcopy:
+ mov r3, r0 // swap the first and second arguments
+ mov r0, r1 // and fall through into memmove
+ mov r1, r3 //
+
+.align 2
+_memmove:
+_memcpy:
+ subs r3, r0, r1 // offset = destination addr - source addr
+ it eq
+ bxeq lr // if source == destination, early out
+
+// Our preference is for using a (faster) front-to-back copy. However, if
+// 0 < offset < length, it is necessary to copy back-to-front for correctness.
+// We have already ruled out offset == 0, so we can use an unsigned compare
+// with length -- if offset is higher, offset is either greater than length
+// or negative.
+
+ cmp r3, r2
+ bhs L_copyFrontToBack
+
+/*****************************************************************************
+ * back to front copy *
+ *****************************************************************************/
+
+ mov ip, r0 // copy destination pointer.
+ add r1, r2 // move source pointer to end of source array
+ add ip, r2 // move destination pointer to end of dest array
+
+ subs r2, $8 // if length - 8 is negative (i.e. length
+ blt L_scalarReverseCopy // is less than 8), jump to cleanup path.
+ tst ip, $7 // if (destination + length) is doubleword
+ beq L_vectorReverseCopy // aligned, jump to fast path.
+
+0: ldrb r3, [r1, $-1]! // load byte
+ sub r2, $1 // decrement length
+ strb r3, [ip, $-1]! // store byte
+ tst ip, $7 // test alignment
+ bne 0b
+
+ cmp r2, $0 // if length - 8 is negative,
+ blt L_scalarReverseCopy // jump to the cleanup code
+
+/*****************************************************************************
+ * destination is doubleword aligned *
+ *****************************************************************************/
+
+L_vectorReverseCopy:
+ ands r3, r1, $3 // Extract the alignment of the source
+ bic r1, $3
+ tbh [pc, r3, lsl $1] // Dispatch table on source alignment
+0:
+.short (L_reverseAligned0-0b)/2 // The NEON alignment hardware does not work
+.short (L_reverseAligned1-0b)/2 // properly with sub 4-byte alignment and
+.short (L_reverseAligned2-0b)/2 // buffers that are uncacheable, so we need
+.short (L_reverseAligned3-0b)/2 // to have a software workaround.
+
+/*****************************************************************************
+ * source is also at least word aligned *
+ *****************************************************************************/
+
+L_reverseAligned0:
+ subs r2, $0x38 // if length - 64 is negative, jump to
+ blt L_reverseVectorCleanup// the cleanup path.
+ tst ip, $0x38 // if (destination + length) is cacheline
+ beq L_reverseCachelineAligned // aligned, jump to the fast path.
+
+0: sub r1, $8 // copy eight bytes at a time until the
+ vld1.32 {d0}, [r1] // destination is 8 byte aligned.
+ sub ip, $8 //
+ sub r2, $8 //
+ tst ip, $0x38 //
+ vst1.64 {d0}, [ip, :64] //
+ bne 0b //
+
+ cmp r2, $0 // if length - 64 is negative,
+ blt L_reverseVectorCleanup// jump to the cleanup code
+
+L_reverseCachelineAligned:
+ sub r3, r2, $0x3c0 // If 1024 < length < 32768, use core
+ cmp r3, $0x7c00 // register copies instead of NEON to
+ blo L_useSTMDB // control energy usage.
+
+ sub r1, $32 // decrement source
+ sub ip, $32 // decrement destination
+ mov r3, $-32 // load address increment
+ tst r1, $0x1f // if source shares 32 byte alignment
+ beq L_reverseSourceAligned// jump to loop with more alignment hints
+
+ vld1.32 {q2,q3}, [r1], r3 // This loop handles 4-byte aligned copies
+ vld1.32 {q0,q1}, [r1], r3 // as generally as possible.
+ subs r2, $64 //
+ vst1.64 {q2,q3}, [ip,:256], r3 // The Cortex-A8 NEON unit does not always
+ blt 1f // properly handle misalignment in vld1
+.align 3 // with an element size of 8 or 16, so
+0: vld1.32 {q2,q3}, [r1], r3 // this is the best we can do without
+ vst1.64 {q0,q1}, [ip,:256], r3 // handling alignment in software.
+ vld1.32 {q0,q1}, [r1], r3 //
+ subs r2, $64 //
+ vst1.64 {q2,q3}, [ip,:256], r3 //
+ bge 0b //
+ b 1f //
+
+L_reverseSourceAligned:
+ vld1.64 {q2,q3}, [r1,:256], r3 // Identical to loop above except for
+ vld1.64 {q0,q1}, [r1,:256], r3 // additional alignment information; this
+ subs r2, $64 // gets an additional .5 bytes per cycle
+ vst1.64 {q2,q3}, [ip,:256], r3 // on Cortex-A8.
+ blt 1f //
+.align 3 //
+0: vld1.64 {q2,q3}, [r1,:256], r3 //
+ vst1.64 {q0,q1}, [ip,:256], r3 //
+ vld1.64 {q0,q1}, [r1,:256], r3 //
+ subs r2, $64 //
+ vst1.64 {q2,q3}, [ip,:256], r3 //
+ bge 0b //
+1: vst1.64 {q0,q1}, [ip,:256], r3 // loop cleanup: final 32 byte store
+ add r1, $32 // point source at last element stored
+ add ip, $32 // point destination at last element stored
+
+L_reverseVectorCleanup:
+ adds r2, $0x38 // If (length - 8) < 0, goto scalar cleanup
+ blt L_scalarReverseCopy //
+
+0: sub r1, $8 // copy eight bytes at a time until
+ vld1.32 {d0}, [r1] // (length - 8) < 0.
+ sub ip, $8 //
+ subs r2, $8 //
+ vst1.64 {d0}, [ip, :64] //
+ bge 0b //
+
+/*****************************************************************************
+ * sub-doubleword cleanup copies *
+ *****************************************************************************/
+
+L_scalarReverseCopy:
+ adds r2, #0x8 // restore length
+ it eq // if this is zero
+ bxeq lr // early out
+
+0: ldrb r3, [r1, #-1]! // load a byte from source
+ strb r3, [ip, #-1]! // store to destination
+ subs r2, #0x1 // subtract one from length
+ bne 0b // if non-zero, repeat
+ bx lr // return
+
+/*****************************************************************************
+ * STMDB loop for 1k-32k buffers *
+ *****************************************************************************/
+
+L_useSTMDB:
+ push {r4-r8,r10,r11}
+.align 3
+0: ldmdb r1!, {r3-r8,r10,r11}
+ subs r2, #0x40
+ stmdb ip!, {r3-r8,r10,r11}
+ ldmdb r1!, {r3-r8,r10,r11}
+ pld [r1, #-0x40]
+ stmdb ip!, {r3-r8,r10,r11}
+ bge 0b
+ pop {r4-r8,r10,r11}
+ b L_reverseVectorCleanup
+
+/*****************************************************************************
+ * Misaligned vld1 loop *
+ *****************************************************************************/
+
+// Software alignment fixup to handle source and dest that are relatively
+// misaligned mod 4 bytes. Load two 4-byte aligned double words from source,
+// use vext.8 to extract a double word to store, and perform an 8-byte aligned
+// store to destination.
+
+#define RCOPY_UNALIGNED(offset) \
+ subs r2, $8 ;\
+ blt 2f ;\
+ sub r1, $8 ;\
+ sub ip, $8 ;\
+ mov r3, $-8 ;\
+ vld1.32 {d2,d3}, [r1], r3 ;\
+ subs r2, $8 ;\
+ blt 1f ;\
+0: vext.8 d0, d2, d3, $(offset);\
+ vmov d3, d2 ;\
+ vld1.32 {d2}, [r1], r3 ;\
+ subs r2, $8 ;\
+ vst1.64 {d0}, [ip, :64], r3 ;\
+ bge 0b ;\
+1: vext.8 d0, d2, d3, $(offset);\
+ add r1, $8 ;\
+ vst1.64 {d0}, [ip, :64] ;\
+2: add r2, $8 ;\
+ add r1, $(offset);\
+ b L_scalarReverseCopy
+
+L_reverseAligned1:
+ RCOPY_UNALIGNED(1)
+L_reverseAligned2:
+ RCOPY_UNALIGNED(2)
+L_reverseAligned3:
+ RCOPY_UNALIGNED(3)
+
+/*****************************************************************************
+ * front to back copy *
+ *****************************************************************************/
+
+L_copyFrontToBack:
+ mov ip, r0 // copy destination pointer.
+ subs r2, $8 // if length - 8 is negative (i.e. length
+ blt L_scalarCopy // is less than 8), jump to cleanup path.
+ tst ip, $7 // if the destination is doubleword
+ beq L_vectorCopy // aligned, jump to fast path.
+
+0: ldrb r3, [r1], $1 // load byte
+ sub r2, $1 // decrement length
+ strb r3, [ip], $1 // store byte
+ tst ip, $7 // test alignment
+ bne 0b
+
+ cmp r2, $0 // if length - 8 is negative,
+ blt L_scalarCopy // jump to the cleanup code
+
+/*****************************************************************************
+ * destination is doubleword aligned *
+ *****************************************************************************/
+
+L_vectorCopy:
+ ands r3, r1, $3 // Extract the alignment of the source
+ bic r1, $3
+ tbh [pc, r3, lsl $1] // Dispatch table on source alignment
+0:
+.short (L_sourceAligned0-0b)/2 // The NEON alignment hardware does not work
+.short (L_sourceAligned1-0b)/2 // properly with sub 4-byte alignment and
+.short (L_sourceAligned2-0b)/2 // buffers that are uncacheable, so we need
+.short (L_sourceAligned3-0b)/2 // to have a software workaround.
+
+/*****************************************************************************
+ * source is also at least word aligned *
+ *****************************************************************************/
+
+L_sourceAligned0:
+ subs r2, $0x38 // If (length - 64) < 0
+ blt L_vectorCleanup // jump to cleanup code
+ tst ip, $0x38 // If destination is 64 byte aligned
+ beq L_cachelineAligned // jump to main loop
+
+0: vld1.32 {d0}, [r1]! // Copy one double word at a time until
+ sub r2, $8 // the destination is 64-byte aligned.
+ vst1.64 {d0}, [ip, :64]! //
+ tst ip, $0x38 //
+ bne 0b //
+
+ cmp r2, $0 // If (length - 64) < 0, goto cleanup
+ blt L_vectorCleanup //
+
+L_cachelineAligned:
+ sub r3, r2, $0x3c0 // If 1024 < length < 32768, use core
+ cmp r3, $0x7c00 // register copies instead of NEON to
+ blo L_useSTMIA // control energy usage.
+ tst r1, $0x1f // If source has 32-byte alignment, use
+ beq L_sourceAligned32 // an optimized loop.
+
+ vld1.32 {q2,q3}, [r1]! // This is the most common path for small
+ vld1.32 {q0,q1}, [r1]! // copies, which are alarmingly frequent.
+ subs r2, #0x40 // It requires 4-byte alignment on the
+ vst1.64 {q2,q3}, [ip, :256]! // source. For ordinary malloc'd buffers,
+ blt 1f // this path could handle only single-byte
+.align 3 // alignment at speed by using vld1.8
+0: vld1.32 {q2,q3}, [r1]! // instead of vld1.32; however, the NEON
+ vst1.64 {q0,q1}, [ip, :256]! // alignment handler misbehaves for some
+ vld1.32 {q0,q1}, [r1]! // special copies if the element size is
+ subs r2, #0x40 // 8 or 16, so we need to work around
+ vst1.64 {q2,q3}, [ip, :256]! // sub 4-byte alignment in software, in
+ bge 0b // another code path.
+ b 1f
+
+L_sourceAligned32:
+ vld1.64 {q2,q3}, [r1, :256]! // When the source shares 32-byte alignment
+ vld1.64 {q0,q1}, [r1, :256]! // with the destination, we use this loop
+ subs r2, #0x40 // instead, which specifies the maximum
+ vst1.64 {q2,q3}, [ip, :256]! // :256 alignment on all loads and stores.
+ blt 1f //
+.align 3 // This gets an additional .5 bytes per
+0: vld1.64 {q2,q3}, [r1, :256]! // cycle for in-cache copies, which is not
+ vst1.64 {q0,q1}, [ip, :256]! // insignificant for this (rather common)
+ vld1.64 {q0,q1}, [r1, :256]! // case.
+ subs r2, #0x40 //
+ vst1.64 {q2,q3}, [ip, :256]! // This is identical to the above loop,
+ bge 0b // except for the additional alignment.
+1: vst1.64 {q0,q1}, [ip, :256]! //
+
+L_vectorCleanup:
+ adds r2, $0x38 // If (length - 8) < 0, goto scalar cleanup
+ blt L_scalarCopy //
+
+0: vld1.32 {d0}, [r1]! // Copy one doubleword at a time until
+ subs r2, $8 // (length - 8) < 0.
+ vst1.64 {d0}, [ip, :64]! //
+ bge 0b //
+
+/*****************************************************************************
+ * sub-doubleword cleanup copies *
+ *****************************************************************************/
+
+L_scalarCopy:
+ adds r2, #0x8 // restore length
+ it eq // if this is zero
+ bxeq lr // early out
+
+0: ldrb r3, [r1], #1 // load a byte from source
+ strb r3, [ip], #1 // store to destination
+ subs r2, #1 // subtract one from length
+ bne 0b // if non-zero, repeat
+ bx lr // return
+
+/*****************************************************************************
+ * STMIA loop for 1k-32k buffers *
+ *****************************************************************************/
+
+L_useSTMIA:
+ push {r4-r8,r10,r11}
+.align 3
+0: ldmia r1!, {r3-r8,r10,r11}
+ subs r2, r2, #64
+ stmia ip!, {r3-r8,r10,r11}
+ ldmia r1!, {r3-r8,r10,r11}
+ pld [r1, #64]
+ stmia ip!, {r3-r8,r10,r11}
+ bge 0b
+ pop {r4-r8,r10,r11}
+ b L_vectorCleanup
+
+/*****************************************************************************
+ * Misaligned reverse vld1 loop *
+ *****************************************************************************/
+
+// Software alignment fixup to handle source and dest that are relatively
+// misaligned mod 4 bytes. Load two 4-byte aligned double words from source,
+// use vext.8 to extract a double word to store, and perform an 8-byte aligned
+// store to destination.
+
+#define COPY_UNALIGNED(offset) \
+ subs r2, $8 ;\
+ blt 2f ;\
+ vld1.32 {d2,d3}, [r1]! ;\
+ subs r2, $8 ;\
+ blt 1f ;\
+0: vext.8 d0, d2, d3, $(offset);\
+ vmov d2, d3 ;\
+ vld1.32 {d3}, [r1]! ;\
+ subs r2, $8 ;\
+ vst1.64 {d0}, [ip, :64]! ;\
+ bge 0b ;\
+1: vext.8 d0, d2, d3, $(offset);\
+ sub r1, $8 ;\
+ vst1.64 {d0}, [ip, :64]! ;\
+2: add r1, $(offset);\
+ add r2, $8 ;\
+ b L_scalarCopy
+
+L_sourceAligned1:
+ COPY_UNALIGNED(1)
+L_sourceAligned2:
+ COPY_UNALIGNED(2)
+L_sourceAligned3:
+ COPY_UNALIGNED(3)
--- /dev/null
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+/**********************************************************************
+ * Cortex-A8 implementation *
+ **********************************************************************/
+
+// Cortex-A8 implementations of memset( ) and bzero( ). Main loop is 64-byte
+// NEON stores, unless the buffer length is > 1k. Beyond that point, there is
+// little to no speed advantage with NEON (and a slight regression in some
+// measured cases), so we switch to the GPRs.
+//
+// The crossover point should be reevaluated for future architectures.
+//
+// -- Stephen Canon, August 2009
+
+.text
+.syntax unified
+.code 16
+
+// void bzero(void * destination,
+// size_t length);
+//
+// zeros out a buffer length bytes long, beginning at the address destination.
+.thumb_func ___bzero
+.globl ___bzero
+.thumb_func _bzero
+.globl _bzero
+.align 2
+___bzero:
+_bzero:
+ mov r2, r1 // match the API to memset(dest, 0, length)
+ eor r1, r1 // and fall through into memset
+
+// void *memset(void * destination,
+// int value, size_t n);
+//
+// writes value converted to an unsigned char to n successive bytes, beginning
+// at destination.
+
+// Notes on register usage:
+//
+// Throughout this function, registers have nearly constant usage; the pattern
+// is:
+//
+// r0 holds the original destination pointer, unmodified. This value
+// must be returned by the routine, so it is easiest to just leave it
+// in place.
+// r1 holds the value that is being copied into the buffer, in some stage
+// of splattedness. The low byte is guaranteed to always have the value
+// but the higher bytes may or may not contain copies of it.
+// r2 holds the length minus some offset, where the offset is always the
+// number of bytes that the current loop stores per iteration.
+// r3-r6,r8,r10,r11 are used with stmia, and will only ever contain splatted
+// copies of the value to be stored.
+// ip holds a pointer to the lowest byte in the array that has not yet been
+// set to hold value.
+// q0 and q1 hold splatted copies of the value in the vector path, and are
+// otherwise unused.
+
+.thumb_func _memset
+.globl _memset
+.align 2
+_memset:
+ mov ip, r0 // copy destination pointer.
+ subs r2, #0x8 // if length - 8 is negative (i.e. length
+ and r1, #0xff // is less than 8), jump to cleanup path.
+ blt L_scalarCleanup //
+
+ tst ip, #0x7 // if the destination is doubleword
+ beq L_vectorCopy // aligned, jump to fast path.
+
+0: strb r1, [ip], #1 // store one byte at a time until
+ sub r2, #1 // destination pointer is 8 byte aligned.
+ tst ip, #7 //
+ bne 0b //
+
+ cmp r2, #0x0 // if length - 8 is negative,
+ blt L_scalarCleanup // jump to the cleanup code
+
+L_vectorCopy:
+ vdup.8 q0, r1 // splat the byte to be stored across
+ subs r2, #0x38 // q0 and q1, and check if length - 64
+ vmov q1, q0 // is negative; if so, jump to the
+ blt L_vectorCleanup // cleanup code.
+
+ tst ip, #0x38 // if the destination is cacheline
+ beq L_cachelineAligned // aligned, jump to the fast path.
+
+0: vst1.64 {d0}, [ip, :64]! // store one double word at a time until
+ sub r2, #8 // the destination is 64-byte aligned
+ tst ip, #0x38 //
+ bne 0b
+
+ cmp r2, #0x0 // if length - 64 is negative,
+ blt L_vectorCleanup // jump to the cleanup code
+
+L_cachelineAligned:
+ cmp r2, #0x3c0 // if length > 1024
+ bge L_useSTMIA // we use stmia instead
+
+.align 4 // main loop
+0: vst1.64 {q0,q1}, [ip, :256]! // store 32 bytes
+ subs r2, #0x40 // decrement length by 64
+ vst1.64 {q0,q1}, [ip, :256]! // store 32 bytes
+ bge 0b // if length - 64 >= 0, continue
+
+L_vectorCleanup:
+ adds r2, #0x38 // if (length - 8) < 0, goto scalar cleanup
+ blt L_scalarCleanup //
+
+0: subs r2, #8 // store one double word at a time until
+ vst1.64 {d0}, [ip, :64]! // (length - 8) < 0.
+ bge 0b
+
+L_scalarCleanup:
+ adds r2, #8 // restore length
+ beq 1f // early out if zero.
+
+0: strb r1, [ip], #1 // store one byte at a time until length
+ subs r2, #1 // is zero.
+ bne 0b //
+1: bx lr // return.
+
+// STMIA loop for large buffers
+//
+// For stores larger than 1024 bytes, we use STMIA because we can't get enough
+// of a speedup from NEON to offset the higher power draw of the NEON unit.
+//
+// This crossover should be reevaluated on future architectures.
+//
+// We avoid using r7 and r9 even though it's not strictly necessary.
+
+L_useSTMIA:
+ push {r4,r5,r6,r8,r10,r11}
+ orr r1, r1, r1, lsl #8
+ orr r1, r1, r1, lsl #16
+ mov r3, r1
+ mov r4, r1
+ mov r5, r1
+ mov r6, r1
+ mov r8, r1
+ mov r10, r1
+ mov r11, r1
+.align 4
+0: stmia ip!, {r1,r3,r4,r5,r6,r8,r10,r11}
+ subs r2, #0x40
+ stmia ip!, {r1,r3,r4,r5,r6,r8,r10,r11}
+ bge 0b
+ pop {r4,r5,r6,r8,r10,r11}
+ b L_vectorCleanup
/*
- * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2006, 2009 Apple Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* @APPLE_LICENSE_HEADER_END@
*/
+#if defined __thumb2__ && defined __ARM_NEON__
+
+// Use our tuned NEON implementation when it is available. Otherwise fall back
+// on more generic ARM code.
+
+#include "NEON/bcopy.s"
+
+#else // defined __thumb2__ && defined __ARM_NEON__
+
+/*****************************************************************************
+ * ARMv5 and ARMv6 implementation *
+ *****************************************************************************/
+
#include <arm/arch.h>
.text
Lexit:
ldmfd sp!, {r0, r4, r5, r7, pc}
+#endif // defined __thumb2__ && defined __ARM_NEON__
/*
- * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2006, 2009 Apple Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
*
* @APPLE_LICENSE_HEADER_END@
*/
+
+#if defined __thumb2__ && defined __ARM_NEON__
+
+// Use our tuned NEON implementation when it is available. Otherwise fall back
+// on more generic ARM code.
+
+#include "NEON/bzero.s"
+
+#else // defined __thumb2__ && defined __ARM_NEON__
#include <mach/machine/asm.h>
#include <architecture/arm/asm_help.h>
b L_lessthan64aligned
X_LEAF(___bzero, _bzero)
+
+#endif // defined __thumb2__ && defined __ARM_NEON__
--- /dev/null
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+// ARM Assembly implementation of memcmp( ) from <string.h>
+// Uses Thumb2 if it is available, otherwise generates ARM code.
+//
+// -- Stephen Canon, August 2009
+//
+// The basic idea is to use word compares instead of byte compares as long as
+// at least four bytes remain to be compared. However, because memcmp( )
+// compares the buffers as though they were big-endian unsigned integers, we
+// need to byte-reverse each word before comparing them.
+//
+// If the buffers are not word aligned, or they are shorter than four bytes,
+// we just use a simple byte comparison loop instead.
+//
+// int bcmp(void *src1, void *src2, size_t length);
+// int memcmp(void *src1, void *src2, size_t length);
+
+#include <arm/arch.h>
+
+ .text
+ .syntax unified
+#if defined __thumb2__
+ .code 16
+ .thumb_func _bcmp
+ .thumb_func _memcmp
+#else
+ .code 32
+#endif
+ .globl _bcmp
+ .globl _memcmp
+ .align 3
+_bcmp:
+_memcmp:
+
+#ifdef _ARM_ARCH_6
+ subs ip, r2, #4 // if length < 4
+ bmi L_useByteCompares // jump to the byte comparison loop
+
+ orr r3, r0, r1 // if the buffers are
+ tst r3, #3 // not word aligned
+ bne L_useByteCompares // jump to the byte comparison loop
+
+.align 3
+L_wordCompare: // Here we know that both buffers are word
+ ldr r2, [r0], #4 // aligned, and (length - 4) > 0, so at least
+ ldr r3, [r1], #4 // four bytes remain to be compared. We load
+ subs ip, #4 // a word from each buffer, and byte reverse
+ bmi L_lastWord // the loaded words. We also decrement the
+ rev r2, r2 // length by four and jump out of this loop if
+ rev r3, r3 // the result is negative. Then we compare the
+ cmp r2, r3 // reversed words, and continue the loop only
+ beq L_wordCompare // if they are equal.
+L_wordsUnequal:
+ ite hi // If the words compared unequal, return +/- 1
+ movhi r0, #1 // according to the result of the comparison.
+ movls r0, #-1 //
+ bx lr //
+L_lastWord:
+ rev r2, r2 // If we just loaded the last complete words
+ rev r3, r3 // from the buffers, byte-reverse them and
+ cmp r2, r3 // compare. If they are unequal, jump to the
+ bne L_wordsUnequal // return path.
+ add r2, ip, #4 // Otherwise, fall into the cleanup code.
+#endif // _ARM_ARCH_6
+
+L_useByteCompares:
+ tst r2, r2 // If the length is exactly zero
+ beq L_returnZero // avoid doing any loads and return zero.
+ mov r3, r0
+.align 3
+L_byteCompareLoop:
+ ldrb r0, [r3], #1 // Load a byte from each buffer, and decrement
+ ldrb ip, [r1], #1 // the length by one. If the decremented
+ subs r2, #1 // length is zero, exit the loop. Otherwise
+ beq L_lastByte // subtract the loaded bytes; if their
+ subs r0, ip // difference is zero, continue the comparison
+ beq L_byteCompareLoop // loop. Otherwise, return their difference.
+ bx lr
+L_returnZero:
+ mov r0, ip
+L_lastByte:
+ sub r0, ip // Return the difference of the final bytes
+ bx lr
return OSAtomicCompareAndSwap32(oldValue, newValue, theValue);
}
+bool
+OSAtomicCompareAndSwapInt(int oldValue, int newValue, volatile int *theValue)
+{
+ return OSAtomicCompareAndSwap32(oldValue, newValue, theValue);
+}
+
+bool
+OSAtomicCompareAndSwapIntBarrier(int oldValue, int newValue, volatile int *theValue)
+{
+ return OSAtomicCompareAndSwap32(oldValue, newValue, theValue);
+}
+
+bool
+OSAtomicCompareAndSwapLong(long oldValue, long newValue, volatile long *theValue)
+{
+ return OSAtomicCompareAndSwap32(oldValue, newValue, (volatile int32_t *)theValue);
+}
+
+bool
+OSAtomicCompareAndSwapLongBarrier(long oldValue, long newValue, volatile long *theValue)
+{
+ return OSAtomicCompareAndSwap32(oldValue, newValue, (volatile int32_t *)theValue);
+}
+
bool OSAtomicCompareAndSwap64( int64_t oldValue, int64_t newValue, volatile int64_t *theValue )
{
bool result;
---- gdtoa-misc.c.orig 2008-11-05 15:59:34.000000000 -0800
-+++ gdtoa-misc.c 2008-11-05 16:05:28.000000000 -0800
+--- gdtoa-misc.c.orig 2010-01-07 22:03:21.000000000 -0800
++++ gdtoa-misc.c 2010-01-07 22:25:33.000000000 -0800
@@ -29,9 +29,20 @@ THIS SOFTWARE.
/* Please send bug reports to David M. Gay (dmg at acm dot org,
* with " at " changed at "@" and " dot " changed to "."). */
Bigint *
Balloc
#ifdef KR_headers
-@@ -53,8 +84,25 @@ Balloc
+@@ -53,9 +84,26 @@ Balloc
#ifndef Omit_Private_Memory
unsigned int len;
#endif
+ }
+#else /* !GDTOA_TSD */
ACQUIRE_DTOA_LOCK(0);
+- if ( (rv = freelist[k]) !=0) {
+#endif /* GDTOA_TSD */
- if ( (rv = freelist[k]) !=0) {
++ if (k <= Kmax && (rv = freelist[k]) !=0) {
freelist[k] = rv->next;
}
+ else {
+@@ -65,7 +113,7 @@ Balloc
+ #else
+ len = (sizeof(Bigint) + (x-1)*sizeof(ULong) + sizeof(double) - 1)
+ /sizeof(double);
+- if (pmem_next - private_mem + len <= PRIVATE_mem) {
++ if (k <= Kmax && pmem_next - private_mem + len <= PRIVATE_mem) {
+ rv = (Bigint*)pmem_next;
+ pmem_next += len;
+ }
@@ -75,7 +123,9 @@ Balloc
rv->k = k;
rv->maxwds = x;
rv->sign = rv->wds = 0;
return rv;
}
-@@ -89,10 +139,16 @@ Bfree
+@@ -89,10 +139,20 @@ Bfree
#endif
{
if (v) {
+- ACQUIRE_DTOA_LOCK(0);
+- v->next = freelist[v->k];
+- freelist[v->k] = v;
+- FREE_DTOA_LOCK(0);
++ if (v->k > Kmax)
++ free((void*)v);
++ else {
+#ifdef GDTOA_TSD
-+ Bigint **freelist = (Bigint **)pthread_getspecific(gdtoa_tsd_key);
++ Bigint **freelist = (Bigint **)pthread_getspecific(gdtoa_tsd_key);
+#else /* !GDTOA_TSD */
- ACQUIRE_DTOA_LOCK(0);
++ ACQUIRE_DTOA_LOCK(0);
+#endif /* GDTOA_TSD */
- v->next = freelist[v->k];
- freelist[v->k] = v;
++ v->next = freelist[v->k];
++ freelist[v->k] = v;
+#ifndef GDTOA_TSD
- FREE_DTOA_LOCK(0);
++ FREE_DTOA_LOCK(0);
+#endif /* GDTOA_TSD */
++ }
}
}
#else /* !GDTOA_TSD */
ACQUIRE_DTOA_LOCK(0);
#endif /* GDTOA_TSD */
- if ( (rv = freelist[k]) !=0) {
+ if (k <= Kmax && (rv = freelist[k]) !=0) {
freelist[k] = rv->next;
}
else {
#else
len = (sizeof(Bigint) + (x-1)*sizeof(ULong) + sizeof(double) - 1)
/sizeof(double);
- if (pmem_next - private_mem + len <= PRIVATE_mem) {
+ if (k <= Kmax && pmem_next - private_mem + len <= PRIVATE_mem) {
rv = (Bigint*)pmem_next;
pmem_next += len;
}
#endif
{
if (v) {
+ if (v->k > Kmax)
+ free((void*)v);
+ else {
#ifdef GDTOA_TSD
- Bigint **freelist = (Bigint **)pthread_getspecific(gdtoa_tsd_key);
+ Bigint **freelist = (Bigint **)pthread_getspecific(gdtoa_tsd_key);
#else /* !GDTOA_TSD */
- ACQUIRE_DTOA_LOCK(0);
+ ACQUIRE_DTOA_LOCK(0);
#endif /* GDTOA_TSD */
- v->next = freelist[v->k];
- freelist[v->k] = v;
+ v->next = freelist[v->k];
+ freelist[v->k] = v;
#ifndef GDTOA_TSD
- FREE_DTOA_LOCK(0);
+ FREE_DTOA_LOCK(0);
#endif /* GDTOA_TSD */
+ }
}
}
time_t asl_parse_time(const char *);
const char *asl_syslog_faciliy_num_to_name(int n);
__private_extern__ asl_client_t *_asl_open_default();
+__private_extern__ int _asl_send_level_message(aslclient ac, aslmsg msg, int level, const char *message);
/* notify SPI */
uint32_t notify_register_plain(const char *name, int *out_token);
if (level < ASL_LEVEL_EMERG) level = ASL_LEVEL_EMERG;
if (level > ASL_LEVEL_DEBUG) level = ASL_LEVEL_DEBUG;
- str = NULL;
- asprintf(&str, "%d", level);
- if (str == NULL)
- {
- if ((msg != NULL) && (my_msg != 0)) asl_free(msg);
- return -1;
- }
-
- asl_set(msg, ASL_KEY_LEVEL, str);
- free(str);
-
/* insert strerror for %m */
len = 0;
elen = 0;
return -1;
}
- asl_set(msg, ASL_KEY_MSG, str);
+ status = _asl_send_level_message(ac, (aslmsg)msg, level, str);
free(str);
- status = asl_send(ac, (aslmsg)msg);
-
if ((msg != NULL) && (my_msg != 0)) asl_free(msg);
return status;
}
}
/*
- * asl_send: send a message
+ * asl_send (internal version): send a message
* This routine may be used instead of asl_log() or asl_vlog() if asl_set()
* has been used to set all of a message's attributes.
- * msg: an aslmsg
* returns 0 for success, non-zero for failure
*/
-int
-asl_send(aslclient ac, aslmsg msg)
+__private_extern__ int
+_asl_send_level_message(aslclient ac, aslmsg msg, int level, const char *message)
{
char *str, *out_raw;
caddr_t out;
- uint32_t i, len, outlen, level, lmask, outstatus, filter, check, senderx, facilityx;
+ uint32_t i, len, outlen, lmask, outstatus, filter, check, senderx, facilityx;
uint64_t v64;
const char *val;
char *name, *x;
int status, rc_filter;
asl_client_t *asl;
int use_global_lock;
- asl_msg_t *mt;
+ asl_msg_t *mt, *tmp_msg;
char hname[_POSIX_HOST_NAME_MAX];
kern_return_t kstatus;
if (msg == NULL) return 0;
- level = ASL_LEVEL_DEBUG;
-
val = asl_get(msg, ASL_KEY_LEVEL);
if (val != NULL) level = atoi(val);
rc_filter = 1;
}
+ /*
+ * Copy the message to tmp_msg to make setting values thread-safe
+ */
+ tmp_msg = calloc(1, sizeof(asl_msg_t));
+ if (tmp_msg == NULL) return -1;
+
+ tmp_msg->type = ASL_TYPE_MSG;
+
+ mt = (asl_msg_t *)msg;
+ for (i = 0; i < mt->count; i++)
+ {
+ asl_set(tmp_msg, mt->key[i], mt->val[i]);
+ }
+
+ /*
+ * Set Level and Message from parameters.
+ */
+ if (message != NULL) asl_set(tmp_msg, ASL_KEY_MSG, message);
+ asl_set(tmp_msg, ASL_KEY_LEVEL, _asl_level_string(level));
+
/*
* Time, TimeNanoSec, Host, PID, UID, and GID values get set here
*/
asprintf(&str, "%lu", tval.tv_sec);
if (str != NULL)
{
- asl_set(msg, ASL_KEY_TIME, str);
+ asl_set(tmp_msg, ASL_KEY_TIME, str);
free(str);
str = NULL;
}
asprintf(&str, "%lu", tval.tv_usec * 1000);
if (str != NULL)
{
- asl_set(msg, ASL_KEY_TIME_NSEC, str);
+ asl_set(tmp_msg, ASL_KEY_TIME_NSEC, str);
free(str);
str = NULL;
}
asprintf(&str, "%lu", tick);
if (str != NULL)
{
- asl_set(msg, ASL_KEY_TIME, str);
+ asl_set(tmp_msg, ASL_KEY_TIME, str);
free(str);
str = NULL;
}
memset(&hname, 0, _POSIX_HOST_NAME_MAX);
if (gethostname(hname, _POSIX_HOST_NAME_MAX) == 0)
{
- asl_set(msg, ASL_KEY_HOST, hname);
+ asl_set(tmp_msg, ASL_KEY_HOST, hname);
}
str = NULL;
asprintf(&str, "%u", getpid());
if (str != NULL)
{
- asl_set(msg, ASL_KEY_PID, str);
+ asl_set(tmp_msg, ASL_KEY_PID, str);
free(str);
}
asprintf(&str, "%d", getuid());
if (str != NULL)
{
- asl_set(msg, ASL_KEY_UID, str);
+ asl_set(tmp_msg, ASL_KEY_UID, str);
free(str);
}
asprintf(&str, "%u", getgid());
if (str != NULL)
{
- asl_set(msg, ASL_KEY_GID, str);
+ asl_set(tmp_msg, ASL_KEY_GID, str);
free(str);
}
senderx = (uint32_t)-1;
facilityx = (uint32_t)-1;
- mt = (asl_msg_t *)msg;
- for (i = 0; (i < mt->count) && ((senderx == (uint32_t)-1) || (facilityx == (uint32_t)-1)); i++)
+ for (i = 0; (i < tmp_msg->count) && ((senderx == (uint32_t)-1) || (facilityx == (uint32_t)-1)); i++)
{
- if (mt->key[i] == NULL) continue;
- if (streq(mt->key[i], ASL_KEY_SENDER)) senderx = i;
- else if (streq(mt->key[i], ASL_KEY_FACILITY)) facilityx = i;
+ if (tmp_msg->key[i] == NULL) continue;
+ if (streq(tmp_msg->key[i], ASL_KEY_SENDER)) senderx = i;
+ else if (streq(tmp_msg->key[i], ASL_KEY_FACILITY)) facilityx = i;
}
/*
* Set Sender if needed
*/
- if ((senderx == (uint32_t)-1) || (mt->val[senderx] == NULL))
+ if ((senderx == (uint32_t)-1) || (tmp_msg->val[senderx] == NULL))
{
if ((ac != NULL) && (ac->name != NULL))
{
/* Use the Sender name from the client handle */
- asl_set(msg, ASL_KEY_SENDER, ac->name);
+ asl_set(tmp_msg, ASL_KEY_SENDER, ac->name);
}
else
{
}
}
- if (_asl_global.sender != NULL) asl_set(msg, ASL_KEY_SENDER, _asl_global.sender);
- else asl_set(msg, ASL_KEY_SENDER, "Unknown");
+ if (_asl_global.sender != NULL) asl_set(tmp_msg, ASL_KEY_SENDER, _asl_global.sender);
+ else asl_set(tmp_msg, ASL_KEY_SENDER, "Unknown");
}
}
/*
* Set Facility
*/
- if ((facilityx == (uint32_t)-1) || (mt->val[facilityx] == NULL))
+ if ((facilityx == (uint32_t)-1) || (tmp_msg->val[facilityx] == NULL))
{
if ((ac != NULL) && (ac->facility != NULL))
{
/* Use the Facility name from the client handle */
- asl_set(msg, ASL_KEY_FACILITY, ac->facility);
+ asl_set(tmp_msg, ASL_KEY_FACILITY, ac->facility);
}
}
val = asl_get(msg, ASL_KEY_OPTION);
if (val == NULL)
{
- asl_set(msg, ASL_KEY_OPTION, ASL_OPT_STORE);
+ asl_set(tmp_msg, ASL_KEY_OPTION, ASL_OPT_STORE);
}
else
{
asprintf(&str, "%s %s", ASL_OPT_STORE, val);
if (str != NULL)
{
- asl_set(msg, ASL_KEY_OPTION, str);
+ asl_set(tmp_msg, ASL_KEY_OPTION, str);
free(str);
str = NULL;
}
if ((filter != 0) && ((filter & lmask) != 0))
{
len = 0;
- out_raw = asl_msg_to_string((asl_msg_t *)msg, &len);
+ out_raw = asl_msg_to_string(tmp_msg, &len);
if ((out_raw != NULL) && (len != 0))
{
if (asl->fd_list[i] < 0) continue;
len = 0;
- out = asl_format_message(msg, asl->fd_mfmt[i], asl->fd_tfmt[i], asl->fd_encoding[i], &len);
+ out = asl_format_message(tmp_msg, asl->fd_mfmt[i], asl->fd_tfmt[i], asl->fd_encoding[i], &len);
if (out == NULL) continue;
status = write(asl->fd_list[i], out, len - 1);
free(out);
}
+ asl_free((aslmsg)tmp_msg);
+
if (use_global_lock != 0) pthread_mutex_unlock(&_asl_global.lock);
return outstatus;
}
+/*
+ * asl_send: send a message
+ * returns 0 for success, non-zero for failure
+ */
+int
+asl_send(aslclient ac, aslmsg msg)
+{
+ return _asl_send_level_message(ac, msg, ASL_LEVEL_DEBUG, NULL);
+}
+
char *
asl_msg_string(aslmsg a)
{
return CPU_NUMBER() & (TINY_MAX_MAGAZINES - 1);
}
+#elif defined(__arm__)
+
+static INLINE mag_index_t
+mag_get_thread_index(szone_t *szone)
+{
+ return 0;
+}
+
#else
#warning deriving magazine index from pthread_self() [want processor number]
static int
__enter_frames_in_table(backtrace_uniquing_table *uniquing_table, uint64_t *foundIndex, mach_vm_address_t *frames, int32_t count)
{
+ // The hash values need to be the same size as the addresses (because we use the value -1), for clarity, define a new type
+ typedef mach_vm_address_t hash_index_t;
+
mach_vm_address_t thisPC;
- uint64_t hash, uParent = (uint64_t)(-1ll), modulus = (uniquing_table->numNodes-uniquing_table->untouchableNodes-1);
+ hash_index_t hash, uParent = (hash_index_t)(-1ll), modulus = (uniquing_table->numNodes-uniquing_table->untouchableNodes-1);
int32_t collisions, lcopy = count, returnVal = 1;
- uint64_t hash_multiplier = ((uniquing_table->numNodes - uniquing_table->untouchableNodes)/(uniquing_table->max_collide*2+1));
+ hash_index_t hash_multiplier = ((uniquing_table->numNodes - uniquing_table->untouchableNodes)/(uniquing_table->max_collide*2+1));
mach_vm_address_t *node;
while (--lcopy >= 0) {
thisPC = frames[lcopy];
loc = &__global_locale;
else if (loc == &__c_locale) {
*new = __c_locale;
+ new->__refcount = 1;
+ new->__free_extra = (__free_extra_t)_releaselocale;
+ new->__lock = LOCK_INITIALIZER;
return new;
}
XL_LOCK(loc);
errno = EINVAL;
return NULL;
}
- if (loc == &__global_locale) /* should never happen */
- loc = LC_GLOBAL_LOCALE;
+ if (loc == LC_GLOBAL_LOCALE ||
+ loc == &__global_locale) /* should never happen */
+ loc = NULL;
+ XL_RETAIN(loc);
orig = pthread_getspecific(__locale_key);
- pthread_setspecific(__locale_key, loc == LC_GLOBAL_LOCALE ? NULL : loc);
+ pthread_setspecific(__locale_key, loc);
+ XL_RELEASE(orig);
}
return (orig ? orig : LC_GLOBAL_LOCALE);
}
__private_extern__
void _pthread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags);
-__private_extern__
+__private_extern__
void _pthread_wqthread(pthread_t self, mach_port_t kport, void * stackaddr, pthread_workitem_t item, int reuse);
#define PTHREAD_START_CUSTOM 0x01000000
if ((pflags & PTHREAD_START_CUSTOM) == 0) {
stackaddr = (char *)self;
_pthread_struct_init(self, attrs, stackaddr, stacksize, 1, 1);
- #if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__)
_pthread_set_self(self);
- #endif
+#endif
LOCK(_pthread_list_lock);
if (pflags & PTHREAD_START_SETSCHED) {
self->policy = ((pflags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK);
self->detached |= PTHREAD_CREATE_DETACHED;
}
} else {
- #if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__)
_pthread_set_self(self);
- #endif
+#endif
LOCK(_pthread_list_lock);
}
self->kernel_thread = kport;
__oldstyle = 1;
}
#endif
-#if defined(__arm__)
- __oldstyle = 1;
-#endif
#if defined(_OBJC_PAGE_BASE_ADDRESS)
{
mig_init(1); /* enable multi-threaded mig interfaces */
if (__oldstyle == 0) {
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__)
__bsdthread_register(thread_start, start_wqthread, round_page(sizeof(struct _pthread)), _pthread_start, &workq_targetconc[0], (__uint64_t)(&thread->tsd[__PTK_LIBDISPATCH_KEY0]) - (__uint64_t)thread);
#else
__bsdthread_register(_pthread_start, _pthread_wqthread, round_page(sizeof(struct _pthread)), NULL, &workq_targetconc[0], (__uint64_t)&thread->tsd[__PTK_LIBDISPATCH_KEY0] - (__uint64_t)thread);
void
pthread_workqueue_atfork_child(void)
{
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__)
/*
* NOTE: workq additions here
* are for i386,x86_64 only as
pthread_workqueue_t wq;
if (kernel_workq_setup == 0) {
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__)
__bsdthread_register(thread_start, start_wqthread, round_page(sizeof(struct _pthread)),NULL,NULL, NULL);
#else
__bsdthread_register(_pthread_start, _pthread_wqthread, round_page(sizeof(struct _pthread)),NULL,NULL, NULL);
/* These are not joinable threads */
self->detached &= ~PTHREAD_CREATE_JOINABLE;
self->detached |= PTHREAD_CREATE_DETACHED;
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__)
_pthread_set_self(self);
#endif
#if WQ_TRACE
pthread_workqueue_t wq;
pthread_workqueue_head_t headp;
-#if defined(__arm__)
- /* not supported under arm */
- return(ENOTSUP);
-#endif
#if defined(__ppc__)
IF_ROSETTA() {
return(ENOTSUP);
#elif defined(__ppc64__)
register void **__pthread_tsd asm ("r13");
ret = __pthread_tsd[slot + (_PTHREAD_TSD_OFFSET / sizeof(void *))];
+#elif defined(__arm__) && defined(_ARM_ARCH_6) && !defined(_ARM_ARCH_7) && defined(__thumb__) && !defined(__OPTIMIZE__)
+ ret = pthread_getspecific(slot);
#elif defined(__arm__) && defined(_ARM_ARCH_6)
- void **__pthread_tsd;
- __asm__ ("mrc p15, 0, %0, c13, c0, 3" : "=r"(__pthread_tsd));
- ret = __pthread_tsd[slot + (_PTHREAD_TSD_OFFSET / sizeof(void *))];
+ void **__pthread_tsd;
+ __asm__ ("mrc p15, 0, %0, c13, c0, 3" : "=r"(__pthread_tsd));
+ ret = __pthread_tsd[slot + (_PTHREAD_TSD_OFFSET / sizeof(void *))];
#elif defined(__arm__) && !defined(_ARM_ARCH_6)
register void **__pthread_tsd asm ("r9");
ret = __pthread_tsd[slot + (_PTHREAD_TSD_OFFSET / sizeof(void *))];