From e3cf15b684ccf1496b6a682c8d46192674711eb2 Mon Sep 17 00:00:00 2001 From: Apple Date: Thu, 18 Sep 2003 19:11:01 +0000 Subject: [PATCH] Libc-262.3.2.tar.gz --- Makefile | 2 - Makefile.xbs | 4 - gen/Makefile.inc | 6 +- gen/cache.c | 18 -- gen/sysctlnametomib.c | 52 ---- i386/gen/Makefile.inc | 25 +- i386/gen/icacheinval.s | 7 - i386/pthreads/Makefile.inc | 2 - i386/string/Makefile.inc | 11 - ppc/gen/Makefile.inc | 19 +- ppc/gen/{icacheinval.s => bcmp.c} | 30 +- ppc/gen/bcopy.s | 28 +- ppc/gen/bzero.s | 79 +---- .../gen/strcat.c | 39 +-- ppc/gen/strcmp.c | 56 ++++ gen/OSSystemInfo.c => ppc/gen/strcpy.c | 66 ++--- ppc/{string => gen}/strlen.s | 5 +- .../init_cpu_capabilities.c => gen/strncat.c} | 42 ++- ppc/gen/strncmp.c | 59 ++++ ppc/gen/strncpy.c | 64 ++++ ppc/mach/mach_absolute_time.s | 13 +- ppc/pthreads/Makefile.inc | 2 - ppc/string/Makefile.inc | 18 -- ppc/string/memcmp.s | 164 ----------- ppc/string/strcat.s | 168 ----------- ppc/string/strcmp.s | 166 ----------- ppc/string/strcpy.s | 118 -------- ppc/string/strlcat.s | 274 ------------------ ppc/string/strlcpy.s | 183 ------------ ppc/string/strncat.s | 217 -------------- ppc/string/strncmp.s | 188 ------------ ppc/string/strncpy.s | 221 -------------- ppc/sys/ur_cthread.s | 9 +- pthreads/Makefile.inc | 5 - pthreads/lock.s | 37 ++- pthreads/pthread.c | 45 ++- {i386/string => string}/strcmp.s | 15 + sys/gettimeofday.c | 56 ++-- sys/sigtramp.c | 159 +--------- 39 files changed, 390 insertions(+), 2282 deletions(-) delete mode 100644 gen/cache.c delete mode 100644 gen/sysctlnametomib.c delete mode 100644 i386/gen/icacheinval.s delete mode 100644 i386/pthreads/Makefile.inc delete mode 100644 i386/string/Makefile.inc rename ppc/gen/{icacheinval.s => bcmp.c} (70%) rename i386/pthreads/init_cpu_capabilities.c => ppc/gen/strcat.c (62%) create mode 100644 ppc/gen/strcmp.c rename gen/OSSystemInfo.c => ppc/gen/strcpy.c (53%) rename ppc/{string => gen}/strlen.s (99%) rename ppc/{pthreads/init_cpu_capabilities.c => gen/strncat.c} (57%) create mode 100644 ppc/gen/strncmp.c create mode 100644 ppc/gen/strncpy.c delete mode 100644 ppc/pthreads/Makefile.inc delete mode 100644 ppc/string/Makefile.inc delete mode 100644 ppc/string/memcmp.s delete mode 100644 ppc/string/strcat.s delete mode 100644 ppc/string/strcmp.s delete mode 100644 ppc/string/strcpy.s delete mode 100644 ppc/string/strlcat.s delete mode 100644 ppc/string/strlcpy.s delete mode 100644 ppc/string/strncat.s delete mode 100644 ppc/string/strncmp.s delete mode 100644 ppc/string/strncpy.s rename {i386/string => string}/strcmp.s (92%) diff --git a/Makefile b/Makefile index d640105..069ccc4 100644 --- a/Makefile +++ b/Makefile @@ -19,8 +19,6 @@ MACHINE_ARCH = ppc CFLAGS += -faltivec -DALTIVEC .endif CFLAGS += -DNOID -DALL_STATE -I${.CURDIR}/include -I${.CURDIR}/include/objc -PRIVINC = ${NEXT_ROOT}/System/Library/Frameworks/System.framework/PrivateHeaders -CFLAGS += -I${PRIVINC} CFLAGS += -DLIBC_MAJOR=${SHLIB_MAJOR} -no-cpp-precomp -force_cpusubtype_ALL CFLAGS += -arch ${MACHINE_ARCH} -fno-common -pipe -Wmost -g CFLAGS += -finline-limit=5000 diff --git a/Makefile.xbs b/Makefile.xbs index d9cdfa6..7409a24 100644 --- a/Makefile.xbs +++ b/Makefile.xbs @@ -84,10 +84,6 @@ lib${LIB}.a:: ${SOBJS} CLEANFILES += ${DOBJS} libc_static.a libc_profile.a libc_debug.a -# XXX not used here yet, but used in Makefile -SYSTEMFRAMEWORK = ${DESTDIR}/System/Library/Frameworks/System.framework -PRIVHDRS = ${SYSTEMFRAMEWORK}/Versions/B/PrivateHeaders - installhdrs: gen_mig_defs mkdir -p ${DESTDIR}/usr/include/arpa mkdir -p ${DESTDIR}/usr/include/protocols diff --git a/gen/Makefile.inc b/gen/Makefile.inc index f0842ef..ed08ce0 100644 --- a/gen/Makefile.inc +++ b/gen/Makefile.inc @@ -9,11 +9,11 @@ CFLAGS+= -I${.CURDIR}/pthreads SRCS += NSSystemDirectories.c getpagesize.c siginterrupt.c \ alarm.c getpass.c siglist.c \ assert.c getttyent.c signal.c \ - cache.c getusershell.c sigsetops.c \ + getusershell.c sigsetops.c \ clock.c getvfsbyname.c sleep.c \ closedir.c stack_logging.c \ confstr.c strchr.c \ - crypt.c OSSystemInfo.c sysconf.c \ + crypt.c sysconf.c \ ctermid.c isatty.c sysctl.c \ isnan.c sysctlbyname.c \ daemon.c malloc.c syslog.c \ @@ -34,7 +34,7 @@ SRCS += NSSystemDirectories.c getpagesize.c siginterrupt.c \ gethostname.c seekdir.c wait3.c \ getloadavg.c sethostname.c waitpid.c \ getlogin.c setlogin.c zone.c \ - getmntinfo.c setmode.c sysctlnametomib.c \ + getmntinfo.c setmode.c \ _rand48.c erand48.c lcong48.c mrand48.c seed48.c drand48.c jrand48.c \ nrand48.c srand48.c lrand48.c basename.c dirname.c arc4random.c \ strtofflags.c lockf.c readdir_r.c ulimit.c diff --git a/gen/cache.c b/gen/cache.c deleted file mode 100644 index 2c8d286..0000000 --- a/gen/cache.c +++ /dev/null @@ -1,18 +0,0 @@ -/* cache control */ - -#include -#include -#include -#include - -static const unsigned int kCacheOptionsSyncForExecution = 0x1; - -int -sys_cache_control(unsigned int options, caddr_t start, size_t len) -{ - if (options == kCacheOptionsSyncForExecution) { - sys_icache_invalidate(start, len); - return 0; - } - return ENOTSUP; -} diff --git a/gen/sysctlnametomib.c b/gen/sysctlnametomib.c deleted file mode 100644 index 54270d6..0000000 --- a/gen/sysctlnametomib.c +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2001 The FreeBSD Project. All Rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE FREEBSD PROJECT ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE FREEBSD PROJECT BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include - -/* - * This function uses a presently undocumented interface to the kernel - * to walk the tree and get the type so it can print the value. - * This interface is under work and consideration, and should probably - * be killed with a big axe by the first person who can find the time. - * (be aware though, that the proper interface isn't as obvious as it - * may seem, there are various conflicting requirements. - */ -int -sysctlnametomib(const char *name, int *mibp, size_t *sizep) -{ - int oid[2]; - int error; - - oid[0] = 0; - oid[1] = 3; - - *sizep *= sizeof (int); - error = sysctl(oid, 2, mibp, sizep, (void *)name, strlen(name)); - *sizep /= sizeof (int); - return (error); -} diff --git a/i386/gen/Makefile.inc b/i386/gen/Makefile.inc index 0df11f0..ed150c0 100644 --- a/i386/gen/Makefile.inc +++ b/i386/gen/Makefile.inc @@ -1,22 +1,5 @@ -SRCS+= abs.c \ - ecvt.c \ - memcpy.s \ - strcat.c \ - strncat.c \ - bcmp.c \ - ffs.c \ - memmove.s \ - strcmp.c \ - strncmp.c \ - bcopy.s \ - insque.c \ - remque.c \ - strcpy.c \ - strncpy.c \ - bzero.s \ - icacheinval.s \ - isinf.c \ - setjmperr.c \ - strlen.c \ - mcount.s \ +SRCS+= abs.c ecvt.c memcpy.s strcat.c strncat.c \ + bcmp.c ffs.c memmove.s strcmp.c strncmp.c \ + bcopy.s insque.c remque.c strcpy.c strncpy.c \ + bzero.s isinf.c setjmperr.c strlen.c mcount.s \ bcopy_init.c diff --git a/i386/gen/icacheinval.s b/i386/gen/icacheinval.s deleted file mode 100644 index 1c0e07c..0000000 --- a/i386/gen/icacheinval.s +++ /dev/null @@ -1,7 +0,0 @@ - .text - .align 4, 0x00 - -/* void sys_icache_invalidate(addr_t start, int length) */ -.globl _sys_icache_invalidate -_sys_icache_invalidate: - ret diff --git a/i386/pthreads/Makefile.inc b/i386/pthreads/Makefile.inc deleted file mode 100644 index 6663dd6..0000000 --- a/i386/pthreads/Makefile.inc +++ /dev/null @@ -1,2 +0,0 @@ -MDSRCS += \ - init_cpu_capabilities.c diff --git a/i386/string/Makefile.inc b/i386/string/Makefile.inc deleted file mode 100644 index e8b30d7..0000000 --- a/i386/string/Makefile.inc +++ /dev/null @@ -1,11 +0,0 @@ -# $Version$ -# -# i386-optimised string functions. -# -# -# -#MDSRCS += \ -# strcmp.s - - - diff --git a/ppc/gen/Makefile.inc b/ppc/gen/Makefile.inc index 0d80d77..6344540 100644 --- a/ppc/gen/Makefile.inc +++ b/ppc/gen/Makefile.inc @@ -1,13 +1,6 @@ -MDSRCS += \ - abs.s \ - bcopy.s \ - bzero.s \ - ecvt.c \ - ffs.s \ - fp.h \ - icacheinval.s \ - insque.c \ - isinf.c \ - mcount.s \ - remque.c \ - setjmperr.c +SRCS += abs.s bzero.s mcount.s strcmp.c strncmp.c \ + ecvt.c remque.c strcpy.c strncpy.c \ + bcmp.c ffs.s insque.c setjmperr.c strlen.s \ + bcopy.s fp.h isinf.c strcat.c strncat.c \ + mcount.s + diff --git a/ppc/gen/icacheinval.s b/ppc/gen/bcmp.c similarity index 70% rename from ppc/gen/icacheinval.s rename to ppc/gen/bcmp.c index 6fd589b..abdb714 100644 --- a/ppc/gen/icacheinval.s +++ b/ppc/gen/bcmp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,15 +22,23 @@ * * @APPLE_LICENSE_HEADER_END@ */ +/* Copyright (c) 1992, 1997 NeXT Software, Inc. All rights reserved. + * + * File: libc/gen/ppc/bcmp.c + * + * Byte-compare routine. + * + * HISTORY + * 24-Jan-1997 Umesh Vaishampayan (umeshv@NeXT.com) + * Ported to PPC. + */ + +#import -#define __APPLE_API_PRIVATE -#include -#undef __APPLE_API_PRIVATE +#undef bcmp -/* sys_icache_invalidate(char *start, long len) */ - - .text - .globl _sys_icache_invalidate - .align 2 -_sys_icache_invalidate: - ba _COMM_PAGE_FLUSH_ICACHE +int +bcmp(const void *b1, const void *b2, size_t length) +{ + return memcmp(b1, b2, length); +} diff --git a/ppc/gen/bcopy.s b/ppc/gen/bcopy.s index e335357..e25eb72 100644 --- a/ppc/gen/bcopy.s +++ b/ppc/gen/bcopy.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,29 +22,6 @@ * * @APPLE_LICENSE_HEADER_END@ */ - -#define __APPLE_API_PRIVATE -#include -#undef __APPLE_API_PRIVATE - - // These functions have migrated to the comm page. - -.text -.globl _bcopy -.globl _memcpy -.globl _memmove - - .align 5 -_bcopy: // void bcopy(const void *src, void *dst, size_t len) - ba _COMM_PAGE_BCOPY - - .align 5 -_memcpy: // void* memcpy(void *dst, void *src, size_t len) -_memmove: // void* memmove(void *dst, const void *src, size_t len) - ba _COMM_PAGE_MEMCPY - - -#if 0 /* ======================================= * BCOPY, MEMCPY, and MEMMOVE for Mac OS X * ======================================= @@ -1182,5 +1159,4 @@ LTest32: cmpwi w1,0 // more to go? bne 1b // loop if so blr - -#endif /* 0 */ \ No newline at end of file + \ No newline at end of file diff --git a/ppc/gen/bzero.s b/ppc/gen/bzero.s index 2fc7b25..1a04b69 100644 --- a/ppc/gen/bzero.s +++ b/ppc/gen/bzero.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,82 +22,6 @@ * * @APPLE_LICENSE_HEADER_END@ */ - -#define __APPLE_API_PRIVATE -#include -#undef __APPLE_API_PRIVATE - -// Bzero has migrated to the comm page. - - .text - .globl _bzero - .globl _memset - - .align 5 -_bzero: // void bzero(void *b, size_t len); - ba _COMM_PAGE_BZERO - - .align 5 -_memset: // void * memset(void *b, int c, size_t len); - andi. r9,r4,0xFF // copy "c" and test for 0 - mr r4,r5 // move length down to where bzero() expects it - beqa++ _COMM_PAGE_BZERO // c==0, so treat like bzero() - -// The nonzero memset() case is uncommon. - - cmplwi r5,8 // too short to align? - rlwimi r9,r9,8,16,23 // replicate c to all 4 bytes - neg r7,r3 // start to compute #bytes to word align - mr r8,r3 // copy ptr so we can preserve r3 - rlwimi r9,r9,16,0,15 - blt 4f // fewer than 8 bytes - andi. r0,r7,3 // get #bytes to word align - mtcrf 0x01,r7 // set up #bytes to word align - sub r5,r5,r0 // adjust length for word alignment - srwi r6,r5,3 // get #8-byte chunks to memset() - cmplwi cr1,r6,0 // any chunks? - mtctr r6 - beq 3f // already word aligned (r6!=0) - - bf 31,1f // odd byte? - stb r9,0(r8) - addi r8,r8,1 -1: - bf 30,2f // halfword? - sth r9,0(r8) - addi r8,r8,2 -2: - bne cr1,3f // handle 8-byte chunks - b 4f // no chunks - - .align 5 -3: - stw r9,0(r8) - stw r9,4(r8) - addi r8,r8,8 - bdnz 3b - -// Store up to 8 leftover bytes. -// r9 = value in all 4 bytes -// r8 = ptr -// r5 = length - -4: - mtcrf 0x01,r5 // move remaining length to cr7 - bf 29,6f - stw r9,0(r8) - addi r8,r8,4 -6: - bf 30,7f - sth r9,0(r8) - addi r8,r8,2 -7: - bflr 31 - stb r9,0(r8) - blr - - -#if 0 // // ============================= // BZERO and MEMSET FOR Mac OS X @@ -279,4 +203,3 @@ Lmemset1: bflr 31 stb rv,0(rp) blr -#endif /* 0 */ \ No newline at end of file diff --git a/i386/pthreads/init_cpu_capabilities.c b/ppc/gen/strcat.c similarity index 62% rename from i386/pthreads/init_cpu_capabilities.c rename to ppc/gen/strcat.c index 04ecb52..be23a09 100644 --- a/i386/pthreads/init_cpu_capabilities.c +++ b/ppc/gen/strcat.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,23 +22,26 @@ * * @APPLE_LICENSE_HEADER_END@ */ +/* Copyright (c) 1991, 1997 NeXT Software, Inc. All rights reserved. + * + * File: libc/gen/ppc/strcat.c + * Author: Mike DeMoney, NeXT Software, Inc. + * + * This file contains machine dependent code for string copy + * + * HISTORY + * 24-Jan-1997 Umesh Vaishampayan (umeshv@NeXT.com) + * Ported to PPC. + * 9-Nov-92 Derek B Clegg (dclegg@next.com) + * Ported to m98k. + * 4-Jun-91 Mike DeMoney (mike@next.com) + * Created. + */ +#import -/* Initialize the "_cpu_capabilities" vector on Intel processors. */ - -#include -#include -#include -#include - -#define _APPLE_API_PRIVATE -#include -#undef _APPLE_API_PRIVATE - -int _cpu_has_altivec = 0; // DEPRECATED - -__private_extern__ void -_init_cpu_capabilities( void ) +char * +strcat(char *s1, const char *s2) { - - _cpu_capabilities = 0; + strcpy(&s1[strlen(s1)], s2); + return s1; } diff --git a/ppc/gen/strcmp.c b/ppc/gen/strcmp.c new file mode 100644 index 0000000..8d8a78d --- /dev/null +++ b/ppc/gen/strcmp.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* Copyright (c) 1992, 1997 NeXT Software, Inc. All rights reserved. + * + * File: libc/gen/ppc/strcmp.c + * + * This file contains machine dependent code for string comparison + * on NeXT 88K-based products. + * + * HISTORY + * 24-Jan-1997 Umesh Vaishampayan (umeshv@NeXT.com) + * Ported to PPC. + * 24-Nov-92 Derek B Clegg (dclegg@next.com) + * Created. + */ +#import + +/* This routine should be optimized. */ + +/* ANSI sez: + * The `strcmp' function compares the string pointed to by `s1' to the + * string pointed to by `s2'. + * The `strcmp' function returns an integer greater than, equal to, or less + * than zero, according as the string pointed to by `s1' is greater than, + * equal to, or less than the string pointed to by `s2'. [4.11.4.2] + */ +int +strcmp(const char *s1, const char *s2) +{ + for ( ; *s1 == *s2; s1++, s2++) + if (*s1 == '\0') + return 0; + return ((*(unsigned char *)s1 < *(unsigned char *)s2) ? -1 : +1); +} diff --git a/gen/OSSystemInfo.c b/ppc/gen/strcpy.c similarity index 53% rename from gen/OSSystemInfo.c rename to ppc/gen/strcpy.c index 42f1691..54c1dbc 100644 --- a/gen/OSSystemInfo.c +++ b/ppc/gen/strcpy.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,44 +22,34 @@ * * @APPLE_LICENSE_HEADER_END@ */ +/* Copyright (c) 1992, 1997 NeXT Software, Inc. All rights reserved. + * + * File: libc/gen/ppc/strcpy.c + * + * This file contains machine dependent code for string copy + * + * HISTORY + * 24-Jan-1997 Umesh Vaishampayan (umeshv@NeXT.com) + * Ported to PPC. + * 24-Nov-92 Derek B Clegg (dclegg@next.com) + * Created. + */ +#import -#include -#include -#include - -static int osi_oid[2] = {-1, 0}; +/* XXX This routine should be optimized. */ -bool -OSSystemInfo(int selector, unsigned long long *resultp) +/* ANSI sez: + * The `strcpy' function copies the string pointed to by `s2' (including + * the terminating null character) into the array pointed to by `s1'. + * If copying takes place between objects that overlap, the behavior + * is undefined. + * The `strcpy' function returns the value of `s1'. [4.11.2.3] + */ +char * +strcpy(char *s1, const char *s2) { - int oid[3]; - size_t size; - - /* - * Check cached OID, look it up if we haven't already. - * - * NB. Whilst this isn't strictly thread safe, since the - * result as written by any thread will be the same - * there is no actual risk of corruption. - */ - if (osi_oid[0] == -1) { - size = 2; - if (sysctlnametomib("hw.systeminfo", &osi_oid, &size) || - (size != 2)) - return(false); - } - - /* build OID */ - oid[0] = osi_oid[0]; - oid[1] = osi_oid[1]; - oid[2] = selector; - - /* make the call */ - size = sizeof(*resultp); - if (sysctl(oid, 3, resultp, &size, NULL, 0) || - (size != sizeof(*resultp))) - return(false); - - return(true); + char *s = s1; + while ((*s++ = *s2++) != 0) + ; + return (s1); } - diff --git a/ppc/string/strlen.s b/ppc/gen/strlen.s similarity index 99% rename from ppc/string/strlen.s rename to ppc/gen/strlen.s index 806e00e..77a9769 100644 --- a/ppc/string/strlen.s +++ b/ppc/gen/strlen.s @@ -22,10 +22,6 @@ * * @APPLE_LICENSE_HEADER_END@ */ -#define ASSEMBLER -#include -#undef ASSEMBLER - ; ; ; Strlen, optimized for PPC. The routine we use is 2-3x faster @@ -44,6 +40,7 @@ ; with one exception: 0x01 bytes preceeding the first zero are also ; mapped to 0x80. ; +#include ; ; int strlen(ptr) ; diff --git a/ppc/pthreads/init_cpu_capabilities.c b/ppc/gen/strncat.c similarity index 57% rename from ppc/pthreads/init_cpu_capabilities.c rename to ppc/gen/strncat.c index 8c9266a..15f25eb 100644 --- a/ppc/pthreads/init_cpu_capabilities.c +++ b/ppc/gen/strncat.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,20 +22,34 @@ * * @APPLE_LICENSE_HEADER_END@ */ +/* Copyright (c) 1991, 1997 NeXT Software, Inc. All rights reserved. + * + * File: libc/gen/ppc/strncat.c + * Author: Mike DeMoney, NeXT Software, Inc. + * + * This file contains machine dependent code for string copy + * + * HISTORY + * 24-Jan-1997 Umesh Vaishampayan (umeshv@NeXT.com) + * Ported to PPC. + * 23-Nov-92 Derek B Clegg (dclegg@next.com) + * Ported to m98k. + * 4-Jun-91 Mike DeMoney (mike@next.com) + * Created. + */ +#import -/* Initialize the "_cpu_capabilities" vector on PowerPC processors. */ - -#define __APPLE_API_PRIVATE -#include -#undef __APPLE_API_PRIVATE - -int _cpu_has_altivec = 0; // DEPRECATED: use _cpu_capabilities instead -int _cpu_capabilities = 0; - -__private_extern__ void -_init_cpu_capabilities( void ) +char * +strncat(char *s1, const char *s2, size_t n) { - _cpu_capabilities = *(int*) _COMM_PAGE_CPU_CAPABILITIES; // pull out of commpage + unsigned len1 = strlen(s1); + unsigned len2 = strlen(s2); - _cpu_has_altivec = (_cpu_capabilities & kHasAltivec) ? 1 : 0; + if (len2 < n) { + strcpy(&s1[len1], s2); + } else { + strncpy(&s1[len1], s2, n); + s1[len1 + n] = '\0'; + } + return s1; } diff --git a/ppc/gen/strncmp.c b/ppc/gen/strncmp.c new file mode 100644 index 0000000..51b16f8 --- /dev/null +++ b/ppc/gen/strncmp.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* Copyright (c) 1992, 1997 NeXT Software, Inc. All rights reserved. + * + * File: libc/gen/ppc/strncmp.c + * + * This file contains machine dependent code for string comparison + * + * HISTORY + * 24-Jan-1997 Umesh Vaishampayan (umeshv@NeXT.com) + * Ported to PPC. + * 24-Nov-92 Derek B Clegg (dclegg@next.com) + * Created. + */ +#import + +/* This routine should be optimized. */ + +/* ANSI sez: + * The `strncmp' function compares not more than `n' characters (characters + * that follow a null character are not compared) from the array pointed to + * by `s1' to the array pointed to by `s2'. + * The `strncmp' function returns an integer greater than, equal to, or less + * than zero, according as the possibly null-terminated array pointed to by + * `s1' is greater than, equal to, or less than the possibly null-terminated + * array pointed to by `s2'. [4.11.4.4] + */ +int +strncmp(const char *s1, const char *s2, size_t n) +{ + for ( ; n > 0; s1++, s2++, --n) + if (*s1 != *s2) + return ((*(unsigned char *)s1 < *(unsigned char *)s2) ? -1 : +1); + else if (*s1 == '\0') + return 0; + return 0; +} diff --git a/ppc/gen/strncpy.c b/ppc/gen/strncpy.c new file mode 100644 index 0000000..10ca272 --- /dev/null +++ b/ppc/gen/strncpy.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* Copyright (c) 1992, 1997 NeXT Software, Inc. All rights reserved. + * + * File: libc/gen/ppc/strncpy.c + * + * This file contains machine dependent code for string copy + * + * HISTORY + * 24-Jan-1997 Umesh Vaishampayan (umeshv@NeXT.com) + * Ported to PPC. + * 24-Nov-92 Derek B Clegg (dclegg@next.com) + * Created. + */ +#import + +/* This routine should be optimized. */ + +/* ANSI sez: + * The `strncpy' function copies not more than `n' characters (characters + * that follow a null character are not copied) from the array pointed to + * by `s2' to the array pointed to by `s1'. If copying takes place between + * objects that overlap, the behavior is undefined. + * If the array pointed to by `s2' is a string that is shorter than `n' + * characters, null characters are appended to the copy in the array + * pointed to by `s1', until `n' characters in all have been written. + * The `strncpy' function returns the value of `s1'. [4.11.2.4] + */ +char * +strncpy(char *s1, const char *s2, size_t n) +{ + char *s = s1; + while (n > 0 && *s2 != '\0') { + *s++ = *s2++; + --n; + } + while (n > 0) { + *s++ = '\0'; + --n; + } + return s1; +} diff --git a/ppc/mach/mach_absolute_time.s b/ppc/mach/mach_absolute_time.s index 172a323..5f35df3 100644 --- a/ppc/mach/mach_absolute_time.s +++ b/ppc/mach/mach_absolute_time.s @@ -23,14 +23,15 @@ * @APPLE_LICENSE_HEADER_END@ */ -#define __APPLE_API_PRIVATE -#include -#undef __APPLE_API_PRIVATE - #if defined(__ppc__) .text -.align 5 +.align 2 .globl _mach_absolute_time _mach_absolute_time: - ba _COMM_PAGE_ABSOLUTE_TIME +1: mftbu r3 + mftb r4 + mftbu r0 + cmpw r0,r3 + bne- 1b + blr #endif diff --git a/ppc/pthreads/Makefile.inc b/ppc/pthreads/Makefile.inc deleted file mode 100644 index 6663dd6..0000000 --- a/ppc/pthreads/Makefile.inc +++ /dev/null @@ -1,2 +0,0 @@ -MDSRCS += \ - init_cpu_capabilities.c diff --git a/ppc/string/Makefile.inc b/ppc/string/Makefile.inc deleted file mode 100644 index 8c5c9eb..0000000 --- a/ppc/string/Makefile.inc +++ /dev/null @@ -1,18 +0,0 @@ -# $Version$ -# -# PPC-optimised string functions. -# -MDSRCS += \ - memcmp.s \ - strcat.s \ - strcmp.s \ - strcpy.s \ - strlcat.s \ - strlcpy.s \ - strlen.s \ - strncat.s \ - strncmp.s \ - strncpy.s - - - diff --git a/ppc/string/memcmp.s b/ppc/string/memcmp.s deleted file mode 100644 index 11e2254..0000000 --- a/ppc/string/memcmp.s +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this - * file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -#define ASSEMBLER // we need the defs for cr7_eq etc -#include -#undef ASSEMBLER - -// *************** *********** -// * M E M C M P * and * B C M P * -// *************** *********** -// -// int memcmp(const char *s1, const char *s2, size_t len); -// int bcmp(const char *s1, const char *s2, size_t len); -// -// Bcmp returns (+,0,-), whereas memcmp returns the true difference -// between the first differing bytes, but we treat them identically. -// -// We optimize the compare by doing it word parallel. This introduces -// a complication: if we blindly did word loads from both sides until -// finding a difference, we might get a spurious page fault by -// reading bytes past the difference. To avoid this, we never do a "lwz" -// that crosses a page boundary. - - .text - .globl EXT(memcmp) - .globl EXT(bcmp) - - .align 5 -LEXT(memcmp) // int memcmp(const char *s1,const char *s2,size_t len); -LEXT(bcmp) // int bcmp(const char *s1,const char *s2,size_t len); - cmplwi cr1,r5,8 // is buffer too short to bother with word compares? - andi. r0,r3,3 // is LHS word aligned? - blt cr1,Lshort // short buffer, so just compare byte-by-byte - beq Laligned // skip if aligned - subfic r0,r0,4 // r0 <- #bytes to word align LHS - mtctr r0 // set up for byte loop - b Lbyteloop - -// Handle short buffer or end-of-buffer. -// r3 = LHS ptr (unaligned) -// r4 = RHS ptr (unaligned) -// r5 = length remaining in buffer (0..7) - -Lshort: - cmpwi r5,0 // null buffer? - mtctr r5 // assume not null, and set up for loop - bne Lshortloop // buffer not null - li r3,0 // say "equal" - blr - - .align 5 -Lshortloop: - lbz r7,0(r3) // next LHS byte - addi r3,r3,1 - lbz r8,0(r4) // next RHS byte - addi r4,r4,1 - cmpw r7,r8 // compare the bytes - bdnzt eq,Lshortloop // loop if more to go and bytes are equal - - sub r3,r7,r8 // generate return value - blr - -// We're at a RHS page boundary. Compare 4 bytes in order to cross the -// page but still keep the LHS ptr word-aligned. - -Lcrosspage: - cmplwi r5,8 // enough bytes left to use word compares? - li r0,4 // get #bytes to cross RHS page - blt Lshort // buffer is about to end - mtctr r0 // set up to compare 4 bytes - b Lbyteloop - -// Compare byte-by-byte. -// r3 = LHS ptr (unaligned) -// r4 = RHS ptr (unaligned) -// r5 = length remaining in buffer (must be >0) -// ctr = bytes to compare - - .align 5 -Lbyteloop: - lbz r7,0(r3) // next LHS byte - addi r3,r3,1 - lbz r8,0(r4) // next RHS byte - addi r4,r4,1 - subi r5,r5,1 // decrement bytes remaining in buffer - cmpw r7,r8 // compare the bytes - bdnzt eq,Lbyteloop // loop if more to go and bytes are equal - - bne Ldifferent // done if we found differing bytes - -// LHS is now word aligned. Loop over words until end of RHS page or buffer. -// When we get to the end of the page, we compare 4 bytes, so that we keep -// the LHS word aligned. -// r3 = LHS ptr (aligned) -// r4 = RHS ptr (unaligned) -// r5 = length remaining in buffer (>= 4 bytes) - -Laligned: - rlwinm r9,r4,0,0xFFF // get RHS offset in page - subfic r0,r9,4096 // get #bytes left in RHS page - subfc r7,r0,r5 // *** - subfe r8,r5,r5 // * r9 <- min(r0,r5), - and r7,r7,r8 // * using algorithm in Compiler Writer's Guide - add r9,r0,r7 // *** - srwi. r8,r9,2 // get #words we can compare - rlwinm r9,r9,0,0,29 // get #bytes we will compare word-parallel - beq-- Lcrosspage // we're at a RHS page boundary - mtctr r8 // set up loop count - sub r5,r5,r9 // decrement length remaining - b Lwordloop - -// Compare a word at a time, until one of two conditions: -// - a difference is found -// - end of count (ie, end of buffer or RHS page, whichever is first) -// At this point, registers are as follows: -// r3 = LHS ptr (aligned) -// r4 = RHS ptr (unaligned) -// r5 = length remaining in buffer (may be 0) -// ctr = count of words until end of buffer or RHS page - - .align 5 // align inner loop, which is 8 words long -Lwordloop: - lwz r7,0(r3) // r7 <- next 4 LHS bytes - addi r3,r3,4 - lwz r8,0(r4) // r8 <- next 4 RHS bytes - addi r4,r4,4 - xor. r11,r7,r8 // compare the words - bdnzt eq,Lwordloop // loop if ctr!=0 and cr0_eq - - beq-- Lcrosspage // skip if buffer or page end reached - -// Found differing bytes. - - cntlzw r0,r11 // find 1st difference (r0 = 0..31) - rlwinm r9,r0,0,0x18 // byte align bit offset (r9 = 0,8,16, or 24) - addi r0,r9,8 // now, r0 = 8, 16, 24, or 32 - rlwnm r7,r7,r0,24,31 // right justify differing bytes and mask off rest - rlwnm r8,r8,r0,24,31 - -Ldifferent: // bytes in r7 and r8 differ - sub r3,r7,r8 // compute return value - blr - diff --git a/ppc/string/strcat.s b/ppc/string/strcat.s deleted file mode 100644 index 469ae20..0000000 --- a/ppc/string/strcat.s +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this - * file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -#define ASSEMBLER -#include -#undef ASSEMBLER - -// *************** -// * S T R C A T * -// *************** -// -// char* strcat(const char *dst, const char *src); -// -// We optimize the move by doing it word parallel. This introduces -// a complication: if we blindly did word load/stores until finding -// a 0, we might get a spurious page fault by touching bytes past it. -// To avoid this, we never do a "lwz" that crosses a page boundary, -// and never store a byte we don't have to. -// -// The test for 0s relies on the following inobvious but very efficient -// word-parallel test: -// x = dataWord + 0xFEFEFEFF -// y = ~dataWord & 0x80808080 -// if (x & y) == 0 then no zero found -// The test maps any non-zero byte to zero, and any zero byte to 0x80, -// with one exception: 0x01 bytes preceeding the first zero are also -// mapped to 0x80. - - .text - .globl EXT(strcat) - - .align 5 -LEXT(strcat) // char* strcat(const char *s, const char *append); - andi. r0,r3,3 // is dst aligned? - dcbtst 0,r3 // touch in dst - lis r6,hi16(0xFEFEFEFF) // start to load magic constants - lis r7,hi16(0x80808080) - dcbt 0,r4 // touch in source - ori r6,r6,lo16(0xFEFEFEFF) - ori r7,r7,lo16(0x80808080) - mr r9,r3 // use r9 for dest ptr (must return r3 intact) - beq Lword0loop // dest is aligned - subfic r0,r0,4 // r0 <- #bytes to word align dest - mtctr r0 - -// Loop over bytes looking for 0-byte marking end of dest. -// r4 = source ptr (unalaigned) -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r9 = dest ptr (unaligned) -// ctr = byte count - -Lbyte0loop: - lbz r8,0(r9) // r8 <- next dest byte - addi r9,r9,1 - cmpwi r8,0 // test for 0 - bdnzf eq,Lbyte0loop // loop until (ctr==0) | (r8==0) - - bne Lword0loop // enter word loop if we haven't found the 0-byte - subi r9,r9,1 // point to 0-byte - b L0found // start to append the source - -// Loop over words looking for 0-byte marking end of dest. -// r4 = source ptr (unalaigned) -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r9 = dest ptr (word aligned) - - .align 5 // align inner loops for speed -Lword0loop: - lwz r8,0(r9) // r8 <- next dest word - addi r9,r9,4 - add r10,r8,r6 // r10 <- word + 0xFEFEFEFF - andc r12,r7,r8 // r12 <- ~word & 0x80808080 - and. r11,r10,r12 // r11 <- nonzero iff word has a 0-byte - beq Lword0loop // loop until 0 found - - slwi r0,r8,7 // move 0x01 bits (false hits) into 0x80 position - subi r9,r9,4 // back r9 up to beginning of word - andc r11,r11,r0 // mask out false hits - cntlzw r0,r11 // find 0 byte (r0 = 0, 8, 16, or 24) - srwi r0,r0,3 // now r0 = 0, 1, 2, or 3 - add r9,r9,r0 // now r9 points to the 0-byte in dest - -// End of dest found, so we can start appending source. -// We align the _source_, which allows us to avoid all worries about -// spurious page faults. Doing so is faster than aligning the dest. -// r4 = source ptr (unaligned) -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r9 = ptr to 0-byte (unaligned) - -L0found: - andi. r0,r4,3 // is source aligned? - beq LwordloopEnter // skip if so - subfic r0,r0,4 // not aligned, get #bytes to align r4 - mtctr r0 // set up loop - -// Loop over bytes. -// r4 = source ptr (unaligned) -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r9 = dest ptr (unaligned) -// ctr = byte count - -Lbyteloop: - lbz r8,0(r4) // r8 <- next source byte - addi r4,r4,1 - cmpwi r8,0 // 0 ? - stb r8,0(r9) // pack into dest - addi r9,r9,1 - bdnzf eq,Lbyteloop // loop until (ctr==0) | (r8==0) - - bne LwordloopEnter // 0-byte not found, so enter word loop - blr // 0-byte found, done - -// Word loop: move a word at a time until 0-byte found. -// r4 = source ptr (word aligned) -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r9 = dest ptr (unaligned) - - .align 5 // align inner loop, which is 8 words ling -Lwordloop: - stw r8,0(r9) // pack word into destination - addi r9,r9,4 -LwordloopEnter: - lwz r8,0(r4) // r8 <- next 4 source bytes - addi r4,r4,4 - add r10,r8,r6 // r10 <- word + 0xFEFEFEFF - andc r12,r7,r8 // r12 <- ~word & 0x80808080 - and. r0,r10,r12 // r0 <- nonzero iff word has a 0-byte - beq Lwordloop // loop if ctr!=0 and cr0_eq - -// Found a 0-byte. Store last word up to and including the 0, a byte at a time. -// r8 = last word, known to have a 0-byte -// r9 = dest ptr - -Lstorelastbytes: - srwi. r0,r8,24 // right justify next byte and test for 0 - slwi r8,r8,8 // shift next byte into position - stb r0,0(r9) // pack into dest - addi r9,r9,1 - bne Lstorelastbytes // loop until 0 stored - - blr - diff --git a/ppc/string/strcmp.s b/ppc/string/strcmp.s deleted file mode 100644 index 7cac4da..0000000 --- a/ppc/string/strcmp.s +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this - * file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -#define ASSEMBLER -#include -#undef ASSEMBLER - -// *************** -// * S T R C M P * -// *************** -// -// int strcmp(const char *s1, const char *s2); -// -// We optimize the compare by doing it word parallel. This introduces -// a complication: if we blindly did word loads from both sides until -// finding a difference (or 0), we might get a spurious page fault by -// reading bytes past the difference. To avoid this, we never do a "lwz" -// that crosses a page boundary. -// -// The test for 0s relies on the following inobvious but very efficient -// word-parallel test: -// x = dataWord + 0xFEFEFEFF -// y = ~dataWord & 0x80808080 -// if (x & y) == 0 then no zero found -// The test maps any non-zero byte to zero, and any zero byte to 0x80, -// with one exception: 0x01 bytes preceeding the first zero are also -// mapped to 0x80. - - .text - .globl EXT(strcmp) - - .align 5 -LEXT(strcmp) // int strcmp(const char *s1, const char *s2); - andi. r0,r3,3 // is LHS aligned? - dcbt 0,r3 // touch in LHS - lis r5,hi16(0xFEFEFEFF) // start to load magic constants - lis r6,hi16(0x80808080) - dcbt 0,r4 // touch in RHS - ori r5,r5,lo16(0xFEFEFEFF) - ori r6,r6,lo16(0x80808080) - subi r3,r3,4 // we use "lwzu" in the inner loops - subi r4,r4,4 - beq Laligned // LHS is aligned - subfic r0,r0,4 // r0 <- #bytes to word align LHS - mtctr r0 - -// Loop over bytes. - -Lbyteloop: - lbz r7,4(r3) // r7 <- next LHS byte - addi r3,r3,1 - lbz r8,4(r4) // r8 <- next RHS byte - addi r4,r4,1 - cntlzw r9,r7 // is r7 zero? - sub r0,r7,r8 // different? - srwi r9,r9,5 // r9 <- (r7==0) ? 1 : 0 - or. r9,r9,r0 // r9 is nonzero if either different or 0 - bdnzt eq,Lbyteloop // loop until different, 0, or buf end - - bne Ldone // done if different or 0 - -// LHS is word aligned. If RHS also is, we need not worry about page -// crossing. Otherwise, we must stop the word loop before page is crossed. - -Laligned: - andi. r0,r4,3 // is RHS now word aligned too? - addi r9,r4,4 // restore true address of next RHS byte - rlwinm r9,r9,0,0xFFF // get RHS offset in page - beq Lalignedloop // RHS word aligned, use simple loop - subfic r9,r9,4096 // get #bytes left in RHS page - srwi. r0,r9,2 // get #words left in RHS page - mtctr r0 // set up loop count - bne++ Lunalignedloop // at least one word left in RHS page - li r0,4 // must check 4 bytes, a byte at a time... - mtctr r0 // ...in order to keep LHS word aligned - b Lbyteloop // go cross RHS page - -// Unaligned inner loop: compare a word at a time, until one of three conditions: -// - a difference is found -// - a zero byte is found -// - end of RHS page (we dare not touch next page until we must) -// At this point, registers are as follows: -// r3 = LHS ptr - 4 (word aligned) -// r4 = RHS ptr - 4 (not aligned) -// r5 = 0xFEFEFEFF -// r6 = 0x80808080 -// ctr = whole words left in RHS page - - .align 5 // align inner loop, which is 8 words long -Lunalignedloop: - lwzu r7,4(r3) // r7 <- next 4 LHS bytes - lwzu r8,4(r4) // r8 <- next 4 RHS bytes - add r10,r7,r5 // r10 <- LHS + 0xFEFEFEFF - andc r12,r6,r7 // r12 <- ~LHS & 0x80808080 - xor r11,r7,r8 // r11 <- compare the words - and r0,r10,r12 // r0 <- nonzero iff LHS has a 0-byte - or. r12,r0,r11 // combine difference and 0-test vectors - bdnzt eq,Lunalignedloop // loop if ctr!=0 and cr0_eq - - bne++ Ldifferent // done if we found a 0 or difference - li r0,4 // must check 4 bytes, a byte at a time... - mtctr r0 // ...in order to keep LHS word aligned - b Lbyteloop // cross RHS page, then resume word loop - -// Aligned inner loop: compare a word at a time, until one of two conditions: -// - a difference is found -// - a zero byte is found -// At this point, registers are as follows: -// r3 = LHS ptr - 4 (word aligned) -// r4 = RHS ptr - 4 (word aligned) -// r5 = 0xFEFEFEFF -// r6 = 0x80808080 - - .align 5 // align inner loop, which is 8 words ling -Lalignedloop: - lwzu r7,4(r3) // r7 <- next 4 LHS bytes - lwzu r8,4(r4) // r8 <- next 4 RHS bytes - add r10,r7,r5 // r10 <- LHS + 0xFEFEFEFF - andc r12,r6,r7 // r12 <- ~LHS & 0x80808080 - xor r11,r7,r8 // r11 <- compare the words - and r0,r10,r12 // r0 <- nonzero iff LHS has a 0-byte - or. r12,r0,r11 // combine difference and 0-test vectors - beq Lalignedloop // loop if neither found - -// Found differing bytes and/or a 0-byte. Determine which comes first, and -// subtract the bytes to compute the return value. We also need to mask out the -// false hits in the 0-byte test, which consist of 0x01 bytes that preceed -// the 0-byte. - -Ldifferent: // r0 == 0-test vector (with 0x01 false hits) - slwi r9,r7,7 // move 0x01 bits in LHS into position 0x80 - andc r0,r0,r9 // mask out the false 0-hits from 0x01 bytes - or r11,r11,r0 // recompute difference vector - cntlzw r9,r11 // find 1st difference (r9 = 0..31) - rlwinm r9,r9,0,0x18 // byte align bit offset (now, r9 = 0,8,16, or 24) - addi r9,r9,8 // now, r9 = 8, 16, 24, or 32 - rlwnm r5,r7,r9,24,31 // right justify differing bytes and mask off rest - rlwnm r6,r8,r9,24,31 - sub r3,r5,r6 // compute difference (0, +, or -) - blr - -Ldone: // r0 = return value - mr r3,r0 // return in r3 - blr - diff --git a/ppc/string/strcpy.s b/ppc/string/strcpy.s deleted file mode 100644 index 9d74580..0000000 --- a/ppc/string/strcpy.s +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this - * file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -#define ASSEMBLER -#include -#undef ASSEMBLER - -// *************** -// * S T R C P Y * -// *************** -// -// char* strcpy(const char *dst, const char *src); -// -// We optimize the move by doing it word parallel. This introduces -// a complication: if we blindly did word load/stores until finding -// a 0, we might get a spurious page fault by touching bytes past it. -// To avoid this, we never do a "lwz" that crosses a page boundary, -// and never store a byte we don't have to. -// -// The test for 0s relies on the following inobvious but very efficient -// word-parallel test: -// x = dataWord + 0xFEFEFEFF -// y = ~dataWord & 0x80808080 -// if (x & y) == 0 then no zero found -// The test maps any non-zero byte to zero, and any zero byte to 0x80, -// with one exception: 0x01 bytes preceeding the first zero are also -// mapped to 0x80. -// -// We align the _source_, which allows us to avoid all worries about -// spurious page faults. Doing so is faster than aligning the dest. - - .text - .globl EXT(strcpy) - - .align 5 -LEXT(strcpy) // char* strcpy(const char *dst, const char *src); - andi. r0,r4,3 // is source aligned? - dcbt 0,r4 // touch in source - lis r6,hi16(0xFEFEFEFF) // start to load magic constants - lis r7,hi16(0x80808080) - dcbtst 0,r3 // touch in dst - ori r6,r6,lo16(0xFEFEFEFF) - ori r7,r7,lo16(0x80808080) - mr r9,r3 // use r9 for dest ptr (must return r3 intact) - beq LwordloopEnter // source is aligned - subfic r0,r0,4 // r0 <- #bytes to word align source - mtctr r0 - -// Loop over bytes. -// r4 = source ptr (unaligned) -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r9 = dest ptr (unaligned) -// ctr = byte count - -Lbyteloop: - lbz r8,0(r4) // r8 <- next source byte - addi r4,r4,1 - cmpwi r8,0 // 0 ? - stb r8,0(r9) // pack into dest - addi r9,r9,1 - bdnzf eq,Lbyteloop // loop until (ctr==0) | (r8==0) - - bne LwordloopEnter // 0-byte not found, so enter word loop - blr // 0-byte found, done - -// Word loop: move a word at a time until 0-byte found. -// r4 = source ptr (word aligned) -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r9 = dest ptr (unaligned) - - .align 5 // align inner loop, which is 8 words ling -Lwordloop: - stw r8,0(r9) // pack word into destination - addi r9,r9,4 -LwordloopEnter: - lwz r8,0(r4) // r8 <- next 4 source bytes - addi r4,r4,4 - add r10,r8,r6 // r10 <- word + 0xFEFEFEFF - andc r12,r7,r8 // r12 <- ~word & 0x80808080 - and. r0,r10,r12 // r0 <- nonzero iff word has a 0-byte - beq Lwordloop // loop if ctr!=0 and cr0_eq - -// Found a 0-byte. Store last word up to and including the 0, a byte at a time. -// r8 = last word, known to have a 0-byte -// r9 = dest ptr - -Lstorelastbytes: - srwi. r0,r8,24 // right justify next byte and test for 0 - slwi r8,r8,8 // shift next byte into position - stb r0,0(r9) // pack into dest - addi r9,r9,1 - bne Lstorelastbytes // loop until 0 stored - - blr - diff --git a/ppc/string/strlcat.s b/ppc/string/strlcat.s deleted file mode 100644 index 4a6622f..0000000 --- a/ppc/string/strlcat.s +++ /dev/null @@ -1,274 +0,0 @@ -/* - * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this - * file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -#define ASSEMBLER -#include -#undef ASSEMBLER - -// ***************** -// * S T R L C A T * -// ***************** -// -// size_t strlcat(char *dst, const char *src, size_t count); -// -// We optimize the move by doing it word parallel. This introduces -// a complication: if we blindly did word load/stores until finding -// a 0, we might get a spurious page fault by touching bytes past it. -// We are allowed to touch the "count" bytes starting at "dst", but -// when appending the "src", we must not do a "lwz" that crosses a page -// boundary, or store past "count". -// -// The test for 0s relies on the following inobvious but very efficient -// word-parallel test: -// x = dataWord + 0xFEFEFEFF -// y = ~dataWord & 0x80808080 -// if (x & y) == 0 then no zero found -// The test maps any non-zero byte to zero, and any zero byte to 0x80, -// with one exception: 0x01 bytes preceeding the first zero are also -// mapped to 0x80. -// -// Note that "count" is the total buffer length, including the length -// of the "dst" string. This is different than strncat(). - - .text - .globl EXT(strlcat) - - .align 5 -LEXT(strlcat) - srwi. r0,r5,2 // get #words to scan - dcbtst 0,r3 // touch in dst - lis r6,hi16(0xFEFEFEFF) // start to load magic constants - lis r7,hi16(0x80808080) - dcbt 0,r4 // touch in source - ori r6,r6,lo16(0xFEFEFEFF) - ori r7,r7,lo16(0x80808080) - mr r9,r3 // use r9 for dest ptr (r3 remembers dst start) - beq-- L0bytes // buffer length <4 - mtctr r0 // set up loop - b L0words // enter word loop - -// Loop over words looking for 0. -// r3 = original start of buffer -// r4 = source ptr (unaligned) -// r5 = original buffer size -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r9 = dest ptr (unaligned) -// ctr = #words remaining in buffer - - .align 5 // align inner loops for speed -L0words: - lwz r8,0(r9) // r8 <- next dest word - addi r9,r9,4 - add r10,r8,r6 // r10 <- word + 0xFEFEFEFF - andc r12,r7,r8 // r12 <- ~word & 0x80808080 - and. r11,r10,r12 // r11 <- nonzero iff word has a 0-byte - bdnzt eq,L0words // loop until 0 found or buffer end - - beq-- L0bytes // skip if 0 not found - - slwi r0,r8,7 // move 0x01 bits (false hits) into 0x80 position - subi r9,r9,4 // back up r9 to the start of the word - andc r11,r11,r0 // mask out false hits - cntlzw r0,r11 // find 0 byte (r0 = 0, 8, 16, or 24) - srwi r0,r0,3 // now r0 = 0, 1, 2, or 3 - add r9,r9,r0 // now r9 points to the 0-byte in dest - b L0found // start to append source - -// Loop over bytes looking for 0. -// r3 = original start of buffer -// r4 = source ptr (unaligned) -// r5 = original buffer size -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r9 = dest ptr (unaligned) - -L0bytes: - andi. r0,r5,3 // get #bytes remaining in buffer - mtctr r0 // set up byte loop - beq-- L0notfound // skip if 0 not found in buffer (error) -L0byteloop: - lbz r8,0(r9) // r8 <- next dest byte - addi r9,r9,1 - cmpwi r8,0 // 0 ? - bdnzf eq,L0byteloop // loop until 0 found or buffer end - - bne-- L0notfound // skip if 0 not found (error) - subi r9,r9,1 // back up, so r9 points to the 0 - -// End of dest found, so we can start appending source. First, align the source, -// in order to avoid spurious page faults. -// r3 = original start of buffer -// r4 = original source ptr (unaligned) -// r5 = original buffer size -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r9 = ptr to 0-byte in dest (unaligned) - -L0found: - andi. r0,r4,3 // is source aligned? - add r5,r5,r3 // get ptr to end of buffer - sub r5,r5,r9 // get #bytes remaining in buffer, counting the 0 (r5>0) - beq Laligned // skip if source already word aligned - subfic r0,r0,4 // not aligned, get #bytes to align r4 - b Lbyteloop1 // r5!=0, so skip check - -// Copy min(r0,r5) bytes, until 0-byte. -// r0 = #bytes we propose to copy (NOTE: must be >0) -// r4 = source ptr (unaligned) -// r5 = length remaining in buffer (may be 0) -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r9 = dest ptr (unaligned) - -Lbyteloop: - cmpwi r5,0 // buffer empty? (note: unsigned) - beq-- Loverrun // buffer filled before end of source reached -Lbyteloop1: // entry when we know r5!=0 - lbz r8,0(r4) // r8 <- next source byte - subic. r0,r0,1 // decrement count of bytes to move - addi r4,r4,1 - subi r5,r5,1 // decrement buffer length remaining - stb r8,0(r9) // pack into dest - cmpwi cr1,r8,0 // 0-byte? - addi r9,r9,1 - beq cr1,L0stored // byte was 0, so done - bne Lbyteloop // r0!=0, source not yet aligned - -// Source is word aligned. Loop over words until 0-byte found or end -// of buffer. -// r3 = original start of buffer -// r4 = source ptr (word aligned) -// r5 = length remaining in buffer -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r9 = dest ptr (unaligned) - -Laligned: - srwi. r8,r5,2 // get #words in buffer - addi r0,r5,1 // if no words... - beq-- Lbyteloop // ...copy to end of buffer - mtctr r8 // set up word loop count - rlwinm r5,r5,0,0x3 // mask buffer length down to leftover bytes - b LwordloopEnter - -// Inner loop: move a word at a time, until one of two conditions: -// - a zero byte is found -// - end of buffer -// At this point, registers are as follows: -// r3 = original start of buffer -// r4 = source ptr (word aligned) -// r5 = bytes leftover in buffer (0..3) -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r9 = dest ptr (unaligned) -// ctr = whole words left in buffer - - .align 5 // align inner loop, which is 8 words long -Lwordloop: - stw r8,0(r9) // pack word into destination - addi r9,r9,4 -LwordloopEnter: - lwz r8,0(r4) // r8 <- next 4 source bytes - addi r4,r4,4 - add r10,r8,r6 // r10 <- word + 0xFEFEFEFF - andc r12,r7,r8 // r12 <- ~word & 0x80808080 - and. r11,r10,r12 // r11 <- nonzero iff word has a 0-byte - bdnzt eq,Lwordloop // loop if ctr!=0 and cr0_eq - - beq-- Lleftovers // skip if no 0-byte found, copy leftovers - -// Found a 0-byte. Store last word up to and including the 0, a byte at a time. -// r3 = original start of buffer -// r8 = last word, known to have a 0-byte -// r9 = dest ptr (one past 0) - -Lstorelastbytes: - srwi. r0,r8,24 // right justify next byte and test for 0 - slwi r8,r8,8 // shift next byte into position - stb r0,0(r9) // pack into dest - addi r9,r9,1 - bne Lstorelastbytes // loop until 0 stored - -// Append op successful, O stored into buffer. Return total length. -// r3 = original start of buffer -// r9 = dest ptr (one past 0) - -L0stored: - sub r3,r9,r3 // get (length+1) of string in buffer - subi r3,r3,1 // return length - blr - -// 0-byte not found in aligned source words. There are up to 3 leftover source -// bytes, hopefully the 0-byte is among them. -// r4 = source ptr (word aligned) -// r5 = leftover bytes in buffer (0..3) -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r8 = last full word of source -// r9 = dest ptr (unaligned) - -Lleftovers: - stw r8,0(r9) // store last word - addi r9,r9,4 - addi r0,r5,1 // make sure r5 terminates byte loop (not r0) - b Lbyteloop - -// Buffer filled during append without finding the end of source. Overwrite the -// last byte in buffer with a 0, and compute how long the concatenated string would -// have been, if the buffer had been large enough. -// r3 = original start of buffer -// r4 = source ptr (1st byte not copied into buffer) -// r9 = dest ptr (one past end of buffer) - -Loverrun: - sub. r3,r9,r3 // compute #bytes stored in buffer - li r0,0 // get a 0 - beq-- Lskip // buffer was 0-length - stb r0,-1(r9) // jam in delimiting 0 - -// Buffer full, check to see how much longer source is. We don't optimize this, -// since overruns are an error. - -Lskip: - lbz r8,0(r4) // get next source byte - addi r4,r4,1 - addi r3,r3,1 // increment length of "ideal" string - cmpwi r8,0 // 0? - bne Lskip - - subi r3,r3,1 // don't count 0 in length - blr // return length of string we "wanted" to create - -// 0 not found in buffer (append not yet begun.) We don't store a delimiting 0, -// but do compute how long the concatenated string would have been, assuming the length -// of "dst" is the length of the buffer. -// r3 = original start of buffer -// r4 = original source ptr -// r9 = dest ptr (one past end of buffer) - -L0notfound: - sub r3,r9,r3 // compute #bytes in buffer - b Lskip // add strlen(source) to r3 - diff --git a/ppc/string/strlcpy.s b/ppc/string/strlcpy.s deleted file mode 100644 index 1707da6..0000000 --- a/ppc/string/strlcpy.s +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this - * file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -#define ASSEMBLER -#include -#undef ASSEMBLER - -// ***************** -// * S T R L C P Y * -// ***************** -// -// size_t strlcpy(char *dst, const char *src, size_t size); -// -// We optimize the move by doing it word parallel. This introduces -// a complication: if we blindly did word load/stores until finding -// a 0, we might get a spurious page fault by touching bytes past it. -// To avoid this, we never do a "lwz" that crosses a page boundary, -// or store unnecessary bytes. -// -// The test for 0s relies on the following inobvious but very efficient -// word-parallel test: -// x = dataWord + 0xFEFEFEFF -// y = ~dataWord & 0x80808080 -// if (x & y) == 0 then no zero found -// The test maps any non-zero byte to zero, and any zero byte to 0x80, -// with one exception: 0x01 bytes preceeding the first zero are also -// mapped to 0x80. - - .text - .globl EXT(strlcpy) - - .align 5 -LEXT(strlcpy) - andi. r0,r4,3 // is source aligned? - dcbt 0,r4 // touch in source - lis r6,hi16(0xFEFEFEFF) // start to load magic constants - lis r7,hi16(0x80808080) - dcbtst 0,r3 // touch in dst - ori r6,r6,lo16(0xFEFEFEFF) - ori r7,r7,lo16(0x80808080) - mr r9,r3 // use r9 for dest ptr (r3 remembers dst start) - beq Laligned // source is aligned - subfic r0,r0,4 // r0 <- #bytes to word align source - -// Copy min(r0,r5) bytes, until 0-byte found. -// r0 = #bytes we propose to copy (NOTE: must be >0) -// r4 = source ptr (unaligned) -// r5 = length remaining in buffer (may be 0) -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r9 = dest ptr (unaligned) - -Lbyteloop: - cmpwi r5,0 // buffer empty? - beq-- L0notfound // buffer full but 0 not found - lbz r8,0(r4) // r8 <- next source byte - subic. r0,r0,1 // decrement count of bytes to move - addi r4,r4,1 - subi r5,r5,1 // decrement buffer length remaining - stb r8,0(r9) // pack into dest - cmpwi cr1,r8,0 // 0-byte? - addi r9,r9,1 - beq cr1,L0found // byte was 0 - bne Lbyteloop // r0!=0, source not yet aligned - -// Source is word aligned. Loop over words until end of buffer. We align -// the source, rather than the dest, to avoid getting spurious page faults. -// r4 = source ptr (word aligned) -// r5 = length remaining in buffer -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r9 = dest ptr (unaligned) - -Laligned: - srwi. r8,r5,2 // get #words in buffer - addi r0,r5,1 // if no words, compare rest of buffer - beq Lbyteloop // r8==0, no words - mtctr r8 // set up word loop count - rlwinm r5,r5,0,0x3 // mask buffer length down to leftover bytes - b LwordloopEnter - -// Move a word at a time, until one of two conditions: -// - a zero byte is found -// - end of buffer -// At this point, registers are as follows: -// r4 = source ptr (word aligned) -// r5 = leftover bytes in buffer (0..3) -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r9 = dest ptr (unaligned) -// ctr = whole words left in buffer - - .align 5 // align inner loop, which is 8 words long -Lwordloop: - stw r8,0(r9) // pack word into destination - addi r9,r9,4 -LwordloopEnter: - lwz r8,0(r4) // r8 <- next 4 source bytes - addi r4,r4,4 - add r10,r8,r6 // r10 <- word + 0xFEFEFEFF - andc r12,r7,r8 // r12 <- ~word & 0x80808080 - and. r11,r10,r12 // r11 <- nonzero iff word has a 0-byte - bdnzt eq,Lwordloop // loop if ctr!=0 and cr0_eq - - beq Lleftovers // 0-byte not found in aligned words - -// Found a 0-byte. Store last word up to and including the 0, a byte at a time. -// r8 = last word, known to have a 0-byte -// r9 = dest ptr - -Lstorelastbytes: - srwi. r0,r8,24 // right justify next byte and test for 0 - slwi r8,r8,8 // shift next byte into position - stb r0,0(r9) // pack into dest - addi r9,r9,1 - bne Lstorelastbytes // loop until 0 stored - -L0found: - sub r3,r9,r3 // get #bytes stored, including 0 - subi r3,r3,1 // don't count the 0 - blr // return strlen(src) - -// 0-byte not found in aligned source words. There are up to 3 leftover source -// bytes, hopefully the 0-byte is among them. -// r4 = source ptr (word aligned) -// r5 = leftover bytes in buffer (0..3) -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r8 = last full word of source -// r9 = dest ptr (unaligned) - -Lleftovers: - stw r8,0(r9) // store last word - addi r9,r9,4 - addi r0,r5,1 // make sure r5 terminate byte loop (not r0) - b Lbyteloop - -// Buffer full but 0-byte not found. Stuff a 0 into last byte of buffer. -// r3 = start of buffer -// r4 = ptr to next byte in source -// r9 = ptr to first byte past end of buffer - -L0notfound: - sub. r3,r9,r3 // get #bytes stored, ie original buffer length - beq Lfind0 // skip if buffer 0-length - li r0,0 // get a 0 - stb r0,-1(r9) // always store 0-byte unless buffer was 0-length - -// Keep searching for 0-byte ending source, so we can return strlen(source). -// Not optimized, since this is an error condition. -// r3 = number of bytes already copied -// r4 = ptr to next byte in source - -Lfind0: - lbz r0,0(r4) // get next byte - addi r4,r4,1 - addi r3,r3,1 // increment strlen - cmpwi r0,0 - bne Lfind0 // loop if not 0 - - subi r3,r3,1 // don't count the 0-byte - blr // return strlen(source) diff --git a/ppc/string/strncat.s b/ppc/string/strncat.s deleted file mode 100644 index 91b114e..0000000 --- a/ppc/string/strncat.s +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this - * file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -#define ASSEMBLER -#include -#undef ASSEMBLER - -// ***************** -// * S T R N C A T * -// ***************** -// -// char* strncat(char *dst, const char *src, size_t count); -// -// We optimize the move by doing it word parallel. This introduces -// a complication: if we blindly did word load/stores until finding -// a 0, we might get a spurious page fault by touching bytes past it. -// To avoid this, we never do a "lwz" that crosses a page boundary, -// or store extra bytes. -// -// The test for 0s relies on the following inobvious but very efficient -// word-parallel test: -// x = dataWord + 0xFEFEFEFF -// y = ~dataWord & 0x80808080 -// if (x & y) == 0 then no zero found -// The test maps any non-zero byte to zero, and any zero byte to 0x80, -// with one exception: 0x01 bytes preceeding the first zero are also -// mapped to 0x80. -// -// Note that "count" refers to the max number of bytes to _append_. -// There is no limit to the number of bytes we will scan looking for -// the end of the "dst" string. - - .text - .globl EXT(strncat) - - .align 5 -LEXT(strncat) - andi. r0,r3,3 // is dst aligned? - dcbtst 0,r3 // touch in dst - lis r6,hi16(0xFEFEFEFF) // start to load magic constants - lis r7,hi16(0x80808080) - dcbt 0,r4 // touch in source - ori r6,r6,lo16(0xFEFEFEFF) - ori r7,r7,lo16(0x80808080) - mr r9,r3 // use r9 for dest ptr (must return r3 intact) - beq Lword0loop // dest is aligned - subfic r0,r0,4 // r0 <- #bytes to word align dest - mtctr r0 // set up byte loop - -// Loop over bytes looking for 0-byte marking end of dest, until dest is -// word aligned. -// r4 = source ptr (unaligned) -// r5 = count (unchanged so far) -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r9 = dest ptr (unaligned) -// ctr = byte count - -Lbyte0loop: - lbz r8,0(r9) // r8 <- next dest byte - addi r9,r9,1 - cmpwi r8,0 // test for 0 - bdnzf eq,Lbyte0loop // loop until (ctr==0) | (r8==0) - - bne Lword0loop // haven't found 0, so enter word-aligned loop - andi. r0,r4,3 // is source aligned? - subi r9,r9,1 // point to the 0-byte we just stored - beq Laligned // source is already aligned - subfic r0,r0,4 // r0 <- #bytes to word align source - b Lbyteloop // must align source - -// Loop over words looking for 0-byte marking end of dest. -// r4 = source ptr (unaligned) -// r5 = count (unchanged so far) -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r9 = dest ptr (word aligned) - - .align 5 // align inner loops for speed -Lword0loop: - lwz r8,0(r9) // r8 <- next dest word - addi r9,r9,4 - add r10,r8,r6 // r10 <- word + 0xFEFEFEFF - andc r12,r7,r8 // r12 <- ~word & 0x80808080 - and. r11,r10,r12 // r11 <- nonzero iff word has a 0-byte - beq Lword0loop // loop until 0 found - - slwi r10,r8,7 // move 0x01 bits (false hits) into 0x80 position - andi. r0,r4,3 // is source aligned? - andc r11,r11,r10 // mask out false hits - subi r9,r9,4 // back up r9 to the start of the word - cntlzw r10,r11 // find 0 byte (r0 = 0, 8, 16, or 24) - srwi r10,r10,3 // now r10 = 0, 1, 2, or 3 - add r9,r9,r10 // now r9 points to the 0-byte in dest - beq Laligned // skip if source already aligned - subfic r0,r0,4 // r0 <- #bytes to word align source - -// Copy min(r0,r5) bytes, until 0-byte. -// r0 = #bytes we propose to copy (NOTE: must be >0) -// r4 = source ptr (unaligned) -// r5 = length remaining in buffer (may be 0) -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r9 = dest ptr (unaligned) - -Lbyteloop: - cmpwi r5,0 // buffer empty? (note: unsigned) - beq-- L0notfound // buffer full but 0 not found - lbz r8,0(r4) // r8 <- next source byte - subic. r0,r0,1 // decrement count of bytes to move - addi r4,r4,1 - subi r5,r5,1 // decrement buffer length remaining - stb r8,0(r9) // pack into dest - cmpwi cr1,r8,0 // 0-byte? - addi r9,r9,1 - beqlr cr1 // byte was 0, so done - bne Lbyteloop // r0!=0, source not yet aligned - -// Source is word aligned. Loop over words until 0-byte found or end -// of buffer. -// r4 = source ptr (word aligned) -// r5 = length remaining in buffer -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r9 = dest ptr (unaligned) - -Laligned: - srwi. r8,r5,2 // get #words in buffer - addi r0,r5,1 // if no words, copy rest of buffer - beq-- Lbyteloop // fewer than 4 bytes in buffer - mtctr r8 // set up word loop count - rlwinm r5,r5,0,0x3 // mask buffer length down to leftover bytes - b LwordloopEnter - -// Inner loop: move a word at a time, until one of two conditions: -// - a zero byte is found -// - end of buffer -// At this point, registers are as follows: -// r4 = source ptr (word aligned) -// r5 = bytes leftover in buffer (0..3) -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r9 = dest ptr (unaligned) -// ctr = whole words left in buffer - - .align 5 // align inner loop, which is 8 words long -Lwordloop: - stw r8,0(r9) // pack word into destination - addi r9,r9,4 -LwordloopEnter: - lwz r8,0(r4) // r8 <- next 4 source bytes - addi r4,r4,4 - add r10,r8,r6 // r10 <- word + 0xFEFEFEFF - andc r12,r7,r8 // r12 <- ~word & 0x80808080 - and. r11,r10,r12 // r11 <- nonzero iff word has a 0-byte - bdnzt eq,Lwordloop // loop if ctr!=0 and cr0_eq - - beq-- LcheckLeftovers // skip if 0-byte not found - -// Found a 0-byte. Store last word up to and including the 0, a byte at a time. -// r8 = last word, known to have a 0-byte -// r9 = dest ptr - -Lstorelastbytes: - srwi. r0,r8,24 // right justify next byte and test for 0 - slwi r8,r8,8 // shift next byte into position - stb r0,0(r9) // pack into dest - addi r9,r9,1 - bne Lstorelastbytes // loop until 0 stored - - blr - -// 0-byte not found while appending words to source. There might be up to -// 3 "leftover" bytes to append, hopefully the 0-byte is in there. -// r4 = source ptr (past word in r8) -// r5 = bytes leftover in buffer (0..3) -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r8 = last word of source, with no 0-byte -// r9 = dest ptr (unaligned) - -LcheckLeftovers: - stw r8,0(r9) // store last whole word of source - addi r9,r9,4 - addi r0,r5,1 // let r5 (not r0) terminate byte loop - b Lbyteloop // append last few bytes - -// 0-byte not found in source. We append a 0 anyway, even though it will -// be past the end of the buffer. That's the way it's defined. -// r9 = dest ptr - -L0notfound: - li r0,0 - stb r0,0(r9) // add a 0, past end of buffer - blr - diff --git a/ppc/string/strncmp.s b/ppc/string/strncmp.s deleted file mode 100644 index 6fee8ce..0000000 --- a/ppc/string/strncmp.s +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this - * file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -#define ASSEMBLER // we need the defs for cr7_eq etc -#include -#undef ASSEMBLER - -// ***************** -// * S T R N C M P * -// ***************** -// -// int strncmp(const char *s1, const char *s2, size_t len); -// -// We optimize the compare by doing it word parallel. This introduces -// a complication: if we blindly did word loads from both sides until -// finding a difference (or 0), we might get a spurious page fault by -// reading bytes past the difference. To avoid this, we never do a "lwz" -// that crosses a page boundary. -// -// The test for 0s relies on the following inobvious but very efficient -// word-parallel test: -// x = dataWord + 0xFEFEFEFF -// y = ~dataWord & 0x80808080 -// if (x & y) == 0 then no zero found -// The test maps any non-zero byte to zero, and any zero byte to 0x80, -// with one exception: 0x01 bytes preceeding the first zero are also -// mapped to 0x80. - - .text - .globl EXT(strncmp) - - .align 5 -LEXT(strncmp) // int strncmp(const char *s1,const char *s2,size_t len); - cmplwi cr1,r5,8 // is buffer too short to bother with word compares? - andi. r0,r3,3 // is LHS aligned? - dcbt 0,r3 // touch in LHS - subi r3,r3,4 // we use "lwzu" in the word inner loop - subi r4,r4,4 - blt cr1,Lshort // short buffer, just compare a byte at a time - lis r2,hi16(0xFEFEFEFF) // start to load magic constants - lis r6,hi16(0x80808080) - ori r2,r2,lo16(0xFEFEFEFF) - ori r6,r6,lo16(0x80808080) - beq Laligned // LHS is aligned - subfic r0,r0,4 // r0 <- #bytes to word align LHS - mtctr r0 // set up for byte loop - sub r5,r5,r0 // adjust length - b Lbyteloop - -// Handle short operands or end-of-buffer. -// r3 = LHS ptr - 4 (unaligned) -// r4 = RHS ptr - 4 (unaligned) -// r5 = length remaining in buffer (0..7) -// cr1 = blt set - -Lshort: - cmpwi r5,0 // buffer null? - mtctr r5 // assume not null, set up for loop - bne Lbyteloop // buffer not null - li r3,0 // if buffer null, say "equal" - blr - -// We're at a RHS page boundary. Compare 4 bytes in order to cross the page -// but still keep the LHS ptr word-aligned. -// r2 = 0xFEFEFEFF -// r3 = LHS ptr - 4 (aligned) -// r4 = RHS ptr - 4 (unaligned) -// r5 = length remaining in buffer (may be 0) -// r6 = 0x80808080 - -Lcrosspage: - cmplwi cr1,r5,8 // not enough left in buffer for word compares? - li r0,4 // get #bytes to cross RHS page - blt cr1,Lshort // buffer is about to end - mtctr r0 // set up to compare 4 bytes - sub r5,r5,r0 // adjust length - b Lbyteloop - -// Compare bytes, until 0-byte or difference found. -// r2 = 0xFEFEFEFF (if cr1 bge) -// r3 = LHS ptr - 4 (unaligned) -// r4 = RHS ptr - 4 (unaligned) -// r5 = length remaining in buffer (may be 0) -// r6 = 0x80808080 (if cr1 bge) -// cr1 = blt if this is end of buffer - - .align 5 // align inner loop, which is 8 words long -Lbyteloop: - lbz r7,4(r3) // next LHS byte - addi r3,r3,1 - lbz r8,4(r4) // next RHS byte - addi r4,r4,1 - cmpwi cr0,r7,0 // zero? - cmpw cr7,r7,r8 // equal? - crandc cr0_eq,cr7_eq,cr0_eq// set cr0_eq if equal and not 0 - bdnzt eq,Lbyteloop // loop until different, 0, or (ctr==0) - - bne Ldifferent // done if bytes differ or are 0 - blt cr1,Ldifferent // done if buffer end (ie, if r5==0) - -// LHS is now word aligned. Loop over words until end of RHS page or buffer. -// When we get to the end of the page, we compare 4 bytes, so that we keep -// the LHS word aligned. -// r2 = 0xFEFEFEFF -// r3 = LHS ptr - 4 (aligned) -// r4 = RHS ptr - 4 (unaligned) -// r5 = length remaining in buffer (may be 0) -// r6 = 0x80808080 - -Laligned: - addi r9,r4,4 // restore true address of next RHS byte - rlwinm r9,r9,0,0xFFF // get RHS offset in page - subfic r0,r9,4096 // get #bytes left in RHS page - subfc r7,r0,r5 // *** - subfe r8,r5,r5 // * r9 <- min(r0,r5), - and r7,r7,r8 // * using algorithm in Compiler Writer's Guide - add r9,r0,r7 // *** - srwi. r8,r9,2 // get #words we can compare - beq-- Lcrosspage // no words so advance to next RHS page - slwi r9,r8,2 // convert #words to #bytes - mtctr r8 // set up loop count - sub r5,r5,r9 // decrement length remaining - b Lwordloop - -// Inner loop: compare a word at a time, until one of three conditions: -// - a difference is found -// - a zero byte is found -// - end of count (ie, end of buffer or RHS page, whichever is first) -// At this point, registers are as follows: -// r2 = 0xFEFEFEFF -// r3 = LHS ptr - 4 (aligned) -// r4 = RHS ptr - 4 (unaligned) -// r5 = length remaining in buffer (may be 0) -// r6 = 0x80808080 -// ctr = count of words until end of buffer or RHS page - - .align 5 // align inner loop, which is 8 words long -Lwordloop: - lwzu r7,4(r3) // r7 <- next 4 LHS bytes - lwzu r8,4(r4) // r8 <- next 4 RHS bytes - add r10,r7,r2 // r10 <- LHS + 0xFEFEFEFF - andc r12,r6,r7 // r12 <- ~LHS & 0x80808080 - xor r11,r7,r8 // r11 <- compare the words - and r9,r10,r12 // r9 <- nonzero iff LHS has a 0-byte - or. r12,r9,r11 // combine difference and 0-test vectors - bdnzt eq,Lwordloop // loop if ctr!=0 and cr0_eq - - beq-- Lcrosspage // skip if buffer or page end reached - -// Found differing bytes and/or a 0-byte. Determine which comes first, and -// subtract the bytes to compute the return value. We also need to mask out the -// false hits in the 0-byte test, which consist of 0x01 bytes that preceed -// the 0-byte. - - slwi r0,r7,7 // move 0x01 bits in LHS into position 0x80 - andc r9,r9,r0 // mask out the false 0-hits from 0x01 bytes - or r11,r11,r9 // recompute difference vector - cntlzw r0,r11 // find 1st difference (r0 = 0..31) - rlwinm r9,r0,0,0x18 // byte align bit offset (r9 = 0,8,16, or 24) - addi r0,r9,8 // now, r0 = 8, 16, 24, or 32 - rlwnm r7,r7,r0,24,31 // right justify differing bytes and mask off rest - rlwnm r8,r8,r0,24,31 - -Ldifferent: // bytes in r7 and r8 differ or are 0 - sub r3,r7,r8 // compute return value - blr - diff --git a/ppc/string/strncpy.s b/ppc/string/strncpy.s deleted file mode 100644 index c703b66..0000000 --- a/ppc/string/strncpy.s +++ /dev/null @@ -1,221 +0,0 @@ -/* - * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this - * file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -#define ASSEMBLER -#include -#undef ASSEMBLER - -// ***************** -// * S T R N C P Y * -// ***************** -// -// char* strncpy(const char *dst, const char *src, size_t len)); -// -// We optimize the move by doing it word parallel. This introduces -// a complication: if we blindly did word load/stores until finding -// a 0, we might get a spurious page fault by touching bytes past it. -// To avoid this, we never do a "lwz" that crosses a page boundary, -// or store unnecessary bytes. -// -// The test for 0s relies on the following inobvious but very efficient -// word-parallel test: -// x = dataWord + 0xFEFEFEFF -// y = ~dataWord & 0x80808080 -// if (x & y) == 0 then no zero found -// The test maps any non-zero byte to zero, and any zero byte to 0x80, -// with one exception: 0x01 bytes preceeding the first zero are also -// mapped to 0x80. - - .text - .globl EXT(strncpy) - - .align 5 -LEXT(strncpy) - andi. r0,r4,3 // is source aligned? - dcbt 0,r4 // touch in source - lis r6,hi16(0xFEFEFEFF) // start to load magic constants - lis r7,hi16(0x80808080) - dcbtst 0,r3 // touch in dst - ori r6,r6,lo16(0xFEFEFEFF) - ori r7,r7,lo16(0x80808080) - mr r9,r3 // use r9 for dest ptr (must return r3 intact) - add r2,r3,r5 // remember where end of buffer is - beq Laligned // source is aligned - subfic r0,r0,4 // r0 <- #bytes to word align source - -// Copy min(r0,r5) bytes, until 0-byte. -// r0 = #bytes we propose to copy (NOTE: must be >0) -// r2 = ptr to 1st byte not in buffer -// r4 = source ptr (unaligned) -// r5 = length remaining in buffer (may be 0) -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r9 = dest ptr (unaligned) - -Lbyteloop: - cmpwi r5,0 // buffer empty? (note: unsigned) - beqlr-- // buffer full but 0 not found - lbz r8,0(r4) // r8 <- next source byte - subic. r0,r0,1 // decrement count of bytes to move - addi r4,r4,1 - subi r5,r5,1 // decrement buffer length remaining - stb r8,0(r9) // pack into dest - cmpwi cr1,r8,0 // 0-byte? - addi r9,r9,1 - beq cr1,L0found // byte was 0 - bne Lbyteloop // r0!=0, source not yet aligned - -// Source is word aligned. Loop over words until end of buffer. Note that we -// have aligned the source, rather than the dest, in order to avoid spurious -// page faults. -// r2 = ptr to 1st byte not in buffer -// r4 = source ptr (word aligned) -// r5 = length remaining in buffer -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r9 = dest ptr (unaligned) - -Laligned: - srwi. r8,r5,2 // get #words in buffer - addi r0,r5,1 // if no words, compare rest of buffer - beq-- Lbyteloop // r8==0, no words - mtctr r8 // set up word loop count - rlwinm r5,r5,0,0x3 // mask buffer length down to leftover bytes - b LwordloopEnter - -// Move a word at a time, until one of two conditions: -// - a zero byte is found -// - end of buffer -// At this point, registers are as follows: -// r2 = ptr to 1st byte not in buffer -// r4 = source ptr (word aligned) -// r5 = leftover bytes in buffer (0..3) -// r6 = 0xFEFEFEFF -// r7 = 0x80808080 -// r9 = dest ptr (unaligned) -// ctr = whole words left in buffer - - .align 5 // align inner loop, which is 8 words long -Lwordloop: - stw r8,0(r9) // pack word into destination - addi r9,r9,4 -LwordloopEnter: - lwz r8,0(r4) // r8 <- next 4 source bytes - addi r4,r4,4 - add r10,r8,r6 // r10 <- word + 0xFEFEFEFF - andc r12,r7,r8 // r12 <- ~word & 0x80808080 - and. r11,r10,r12 // r11 <- nonzero iff word has a 0-byte - bdnzt eq,Lwordloop // loop if ctr!=0 and cr0_eq - - stw r8,0(r9) // pack in last word - addi r9,r9,4 - addi r0,r5,1 // if no 0-byte found... - beq-- Lbyteloop // ...fill rest of buffer a byte at a time - -// Found a 0-byte, point to following byte with r9. - - slwi r0,r8,7 // move 0x01 false hit bits to 0x80 position - andc r11,r11,r0 // mask out false hits - cntlzw r0,r11 // find the 0-byte (r0 = 0,8,16, or 24) - srwi r0,r0,3 // now r0 = 0, 1, 2, or 3 - subfic r0,r0,3 // now r0 = 3, 2, 1, or 0 - sub r9,r9,r0 // now r9 points one past the 0-byte - -// Zero rest of buffer, if any. We don't simply branch to bzero or memset, because -// r3 is set up incorrectly, and there is a fair amt of overhead involved in using them. -// Instead we use a simpler routine, which will nonetheless be faster unless the number -// of bytes to 0 is large and we're on a 64-bit machine. -// r2 = ptr to 1st byte not in buffer -// r9 = ptr to 1st byte to zero - -L0found: - sub r5,r2,r9 // r5 <- #bytes to zero (ie, rest of buffer) - cmplwi r5,32 // how many? - neg r8,r9 // start to compute #bytes to align ptr - li r0,0 // get a 0 - blt Ltail // skip if <32 bytes - andi. r10,r8,31 // get #bytes to 32-byte align - sub r5,r5,r10 // adjust buffer length - srwi r11,r5,5 // get #32-byte chunks - cmpwi cr1,r11,0 // any chunks? - mtctr r11 // set up dcbz loop count - beq 1f // skip if already 32-byte aligned - -// 32-byte align. We just store 32 0s, rather than test and use conditional -// branches. - - stw r0,0(r9) // zero next 32 bytes - stw r0,4(r9) - stw r0,8(r9) - stw r0,12(r9) - stw r0,16(r9) - stw r0,20(r9) - stw r0,24(r9) - stw r0,28(r9) - add r9,r9,r10 // now r9 is 32-byte aligned - beq cr1,Ltail // skip if no 32-byte chunks - b 1f - -// Loop doing 32-byte version of DCBZ instruction. - - .align 4 // align the inner loop -1: - dcbz 0,r9 // zero another 32 bytes - addi r9,r9,32 - bdnz 1b - -// Store trailing bytes. -// r0 = 0 -// r5 = #bytes to store (<32) -// r9 = address - -Ltail: - mtcrf 0x02,r5 // remaining byte count to cr6 and cr7 - mtcrf 0x01,r5 - bf 27,2f // 16-byte chunk? - stw r0,0(r9) - stw r0,4(r9) - stw r0,8(r9) - stw r0,12(r9) - addi r9,r9,16 -2: - bf 28,4f // 8-byte chunk? - stw r0,0(r9) - stw r0,4(r9) - addi r9,r9,8 -4: - bf 29,5f // word? - stw r0,0(r9) - addi r9,r9,4 -5: - bf 30,6f // halfword? - sth r0,0(r9) - addi r9,r9,2 -6: - bflr 31 // byte? - stb r0,0(r9) - blr - - - diff --git a/ppc/sys/ur_cthread.s b/ppc/sys/ur_cthread.s index 02e1955..8cbd330 100644 --- a/ppc/sys/ur_cthread.s +++ b/ppc/sys/ur_cthread.s @@ -22,13 +22,10 @@ * * @APPLE_LICENSE_HEADER_END@ */ - -#define __APPLE_API_PRIVATE -#include -#undef __APPLE_API_PRIVATE - .text .align 2 .globl _pthread_self _pthread_self: - ba _COMM_PAGE_PTHREAD_SELF + li r0, 0x7FF2 + sc + blr diff --git a/pthreads/Makefile.inc b/pthreads/Makefile.inc index 39af25d..acd3d05 100644 --- a/pthreads/Makefile.inc +++ b/pthreads/Makefile.inc @@ -3,11 +3,6 @@ SRCS += pthread_cond.c pthread_tsd.c pthread.c \ pthread_mutex.c thread_setup.c lock.s stack.s pthread_rwlock.c -# machine-dependent pthreads sources -.if exists(${.CURDIR}/${MACHINE_ARCH}/pthreads/Makefile.inc) -.include "${.CURDIR}/${MACHINE_ARCH}/pthreads/Makefile.inc" -.endif - PTHREADS_INSTHDRS += pthread.h pthread_impl.h sched.h PTHREADS_INSTHDRS := ${PTHREADS_INSTHDRS:S/^/${.CURDIR}\/pthreads\//} INSTHDRS += ${PTHREADS_INSTHDRS} diff --git a/pthreads/lock.s b/pthreads/lock.s index 3465321..257bc1b 100644 --- a/pthreads/lock.s +++ b/pthreads/lock.s @@ -25,11 +25,6 @@ #if defined(__ppc__) - -#define __APPLE_API_PRIVATE -#include -#undef __APPLE_API_PRIVATE - #import #import @@ -40,14 +35,37 @@ */ .text + LEAF(__spin_lock_try) - ba _COMM_PAGE_SPINLOCK_TRY +1: + lwarx r5,0,r3 // Read the lock + addi r4,0,0x1 // Lock value + cmpwi r5,0x0 // Is it busy? + bne- 2f // Yes, return 0 + stwcx. r4,0,r3 // Try to lock the lock + bne- 1b // Lost reservation, try again + addi r3,0,1 // Got the lock + isync // Sync instruction stream + blr // Return 1 +2: addi r3,0,0 // Could not get the lock + blr // Return 0 END(__spin_lock_try) .globl _spin_lock LEAF(__spin_lock) _spin_lock: - ba _COMM_PAGE_SPINLOCK_LOCK +1: + lwarx r5,0,r3 // Read the lock + addi r4,0,0x1 // Lock value + cmpwi r5,0x0 // Is it busy? + bne- 2f // Yes, goto retry logic + stwcx. r4,0,r3 // Try to lock the lock + bne- 1b // Lost reservation, try again + isync // Sync instruction stream + blr // Return +2: + CALL_EXTERN(__spin_lock_retry) + blr // Return END(__spin_lock) /* void spin_unlock(int *p); @@ -57,7 +75,10 @@ END(__spin_lock) .globl _spin_unlock LEAF(__spin_unlock) _spin_unlock: - ba _COMM_PAGE_SPINLOCK_UNLOCK + sync + li32 r4,0 + stw r4,0(r3) + blr END(__spin_unlock) #elif defined(__i386__) diff --git a/pthreads/pthread.c b/pthreads/pthread.c index 0b9edad..c067de1 100644 --- a/pthreads/pthread.c +++ b/pthreads/pthread.c @@ -38,8 +38,6 @@ #include #include #include -#define __APPLE_API_PRIVATE -#include #include "pthread_internals.h" @@ -47,9 +45,6 @@ extern void _pthread_set_self(pthread_t); extern void mig_init(int); -/* Get CPU capabilities from the kernel */ -__private_extern__ void _init_cpu_capabilities(void); - /* Needed to tell the malloc subsystem we're going multithreaded */ extern void set_malloc_singlethreaded(int); @@ -81,6 +76,9 @@ __private_extern__ void _spin_lock_retry(pthread_lock_t *lock) } while(!_spin_lock_try(lock)); } +/* Apparently, bcopy doesn't declare _cpu_has_altivec anymore */ +int _cpu_has_altivec = 0; + extern mach_port_t thread_recycle_port; /* These are used to keep track of a semaphore pool shared by mutexes and condition @@ -1289,6 +1287,12 @@ pthread_setconcurrency(int new_level) * Perform package initialization - called automatically when application starts */ +extern int _cpu_capabilities; + +#define kHasAltivec 0x01 +#define kCache32 0x04 +#define kUseDcba 0x20 + static int pthread_init(void) { @@ -1304,6 +1308,9 @@ pthread_init(void) int mib[2]; size_t len; int numcpus; + + extern int _bcopy_initialize(void); + count = HOST_PRIORITY_INFO_COUNT; info = (host_info_t)&priority_info; @@ -1314,8 +1321,8 @@ pthread_init(void) printf("host_info failed (%d); probably need privilege.\n", kr); else { default_priority = priority_info.user_priority; - min_priority = priority_info.minimum_priority; - max_priority = priority_info.maximum_priority; + min_priority = priority_info.minimum_priority; + max_priority = priority_info.maximum_priority; } attrs = &_pthread_attr_default; pthread_attr_init(attrs); @@ -1343,26 +1350,18 @@ pthread_init(void) else { if (basic_info.avail_cpus > 1) _spin_tries = MP_SPIN_TRIES; + /* This is a crude test */ + if (basic_info.cpu_subtype >= CPU_SUBTYPE_POWERPC_7400) + _cpu_has_altivec = 1; } } - mach_port_deallocate(mach_task_self(), host); - - _init_cpu_capabilities(); /* check for vector unit, cache line size etc */ -#if defined(__ppc__) - /* Use fsqrt instruction in sqrt() if available. */ - if (_cpu_capabilities & kHasFsqrt) { - extern size_t hw_sqrt_len; - extern double sqrt( double ); - extern double hw_sqrt( double ); - extern void sys_icache_invalidate(void *, size_t); - - memcpy ( (void *)sqrt, (void *)hw_sqrt, hw_sqrt_len ); - sys_icache_invalidate((void *)sqrt, hw_sqrt_len); - } -#endif - + len = sizeof(_cpu_capabilities); + sysctlbyname("hw._cpu_capabilities", &_cpu_capabilities, &len, NULL, 0); + + _bcopy_initialize(); + mig_init(1); /* enable multi-threaded mig interfaces */ return 0; } diff --git a/i386/string/strcmp.s b/string/strcmp.s similarity index 92% rename from i386/string/strcmp.s rename to string/strcmp.s index 781f6de..94c82b1 100644 --- a/i386/string/strcmp.s +++ b/string/strcmp.s @@ -25,6 +25,7 @@ .text .globl _strcmp _strcmp: +#if defined(__i386__) movl 0x04(%esp),%eax movl 0x08(%esp),%edx jmp L2 /* Jump into the loop! */ @@ -91,3 +92,17 @@ L3: movzbl (%eax),%eax /* unsigned comparison */ movzbl (%edx),%edx subl %edx,%eax ret +#elif defined(__ppc__) + mr r5,r3 +1: lbz r3,0(r5) + addi r5,r5,1 + cmpwi cr1,r3,0 + lbz r0,0(r4) + addi r4,r4,1 + subf. r3,r0,r3 + beqlr+ cr1 + beq- 1b + blr +#else +#error strcmp is not defined for this architecture +#endif diff --git a/sys/gettimeofday.c b/sys/gettimeofday.c index eb5bca7..1a0e4bb 100644 --- a/sys/gettimeofday.c +++ b/sys/gettimeofday.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -35,58 +35,38 @@ #include #include -#define __APPLE_API_PRIVATE -#include -#undef __APPLE_API_PRIVATE - -#ifdef __ppc__ -#define expand(arg) strgfy(arg) -#define strgfy(arg) #arg - -static __attribute__ ((noinline)) -int commpage_gettimeofday(struct timeval *tp) -{ - asm volatile("ba " expand(_COMM_PAGE_GETTIMEOFDAY) ); - return 1; -} -#endif /* __ppc__ */ - int gettimeofday (struct timeval *tp, struct timezone *tzp) { static int validtz = 0; static struct timezone cached_tz = {0}; struct timeval localtv; +#ifdef __ppc__ + extern __ppc_gettimeofday(struct timeval *, struct timezone *); +#endif - if (tp == NULL) { - tp = &localtv; + if (tzp && (tp == NULL) && (validtz == 0)) { + tp = &localtv; } #ifdef __ppc__ - { - extern int __ppc_gettimeofday(struct timeval *, struct timezone *); - - if (commpage_gettimeofday(tp)) { /* first try commpage */ - if (__ppc_gettimeofday(tp,tzp)) { /* if it fails, use syscall */ - return (-1); - } - } - } + if(__ppc_gettimeofday(tp, tzp)) + return(-1); #else if (syscall (SYS_gettimeofday, tp, tzp) < 0) { return (-1); } #endif if (tzp) { - if (validtz == 0) { - struct tm *localtm = localtime ((time_t *)&tp->tv_sec); - cached_tz.tz_dsttime = localtm->tm_isdst; - cached_tz.tz_minuteswest = - (-localtm->tm_gmtoff / SECSPERMIN) + - (localtm->tm_isdst * MINSPERHOUR); - validtz = 1; - } - tzp->tz_dsttime = cached_tz.tz_dsttime; - tzp->tz_minuteswest = cached_tz.tz_minuteswest; + if (validtz == 0) { + struct tm *localtm = localtime ((time_t *)&tp->tv_sec); + cached_tz.tz_dsttime = localtm->tm_isdst; + cached_tz.tz_minuteswest = + (-localtm->tm_gmtoff / SECSPERMIN) + + (localtm->tm_isdst * MINSPERHOUR); + validtz = 1; + } + tzp->tz_dsttime = cached_tz.tz_dsttime; + tzp->tz_minuteswest = cached_tz.tz_minuteswest; } return (0); } diff --git a/sys/sigtramp.c b/sys/sigtramp.c index 373d1ff..cb1ad53 100644 --- a/sys/sigtramp.c +++ b/sys/sigtramp.c @@ -41,117 +41,6 @@ int __in_sigtramp = 0; #endif -/* These defn should match the kernel one */ -#define UC_TRAD 1 -#ifdef __ppc__ -#define UC_TRAD64 20 -#define UC_TRAD64_VEC 25 -#define UC_FLAVOR 30 -#define UC_FLAVOR_VEC 35 -#define UC_FLAVOR64 40 -#define UC_FLAVOR64_VEC 45 -#define UC_DUAL 50 -#define UC_DUAL_VEC 55 - - /* The following are valid mcontext sizes */ -#define UC_FLAVOR_SIZE ((PPC_THREAD_STATE_COUNT + PPC_EXCEPTION_STATE_COUNT + PPC_FLOAT_STATE_COUNT) * sizeof(int)) - -#define UC_FLAVOR_VEC_SIZE ((PPC_THREAD_STATE_COUNT + PPC_EXCEPTION_STATE_COUNT + PPC_FLOAT_STATE_COUNT + PPC_VECTOR_STATE_COUNT) * sizeof(int)) - -#define UC_FLAVOR64_SIZE ((PPC_THREAD_STATE64_COUNT + PPC_EXCEPTION_STATE64_COUNT + PPC_FLOAT_STATE_COUNT) * sizeof(int)) - -#define UC_FLAVOR64_VEC_SIZE ((PPC_THREAD_STATE64_COUNT + PPC_EXCEPTION_STATE64_COUNT + PPC_FLOAT_STATE_COUNT + PPC_VECTOR_STATE_COUNT) * sizeof(int)) -#endif - -#ifdef __ppc__ -/* This routine will be replaced by an assembly soon */ -static int -restore64_state(mcontext_t mctx, mcontext64_t mctx64) -{ - if (mctx->ss.srr0 != (unsigned int)mctx64->ss.srr0) - return(0); - if (mctx->ss.srr1 != (unsigned int)mctx64->ss.srr1) - return(0); - if (mctx->ss.r0 != (unsigned int)mctx64->ss.r0) - return(0); - if (mctx->ss.r1 != (unsigned int)mctx->ss.r1) - return(0); - if (mctx->ss.r2 != (unsigned int)mctx->ss.r2) - return(0); - if (mctx->ss.r3 != (unsigned int)mctx->ss.r3) - return(0); - if (mctx->ss.r4 != (unsigned int)mctx->ss.r4) - return(0); - if (mctx->ss.r5 != (unsigned int)mctx->ss.r5) - return(0); - if (mctx->ss.r6 != (unsigned int)mctx->ss.r6) - return(0); - if (mctx->ss.r7 != (unsigned int)mctx->ss.r7) - return(0); - if (mctx->ss.r8 != (unsigned int)mctx->ss.r8) - return(0); - if (mctx->ss.r9 != (unsigned int)mctx->ss.r9) - return(0); - if (mctx->ss.r10 != (unsigned int)mctx->ss.r10) - return(0); - if (mctx->ss.r11 != (unsigned int)mctx->ss.r11) - return(0); - if (mctx->ss.r12 != (unsigned int)mctx->ss.r12) - return(0); - if (mctx->ss.r13 != (unsigned int)mctx->ss.r13) - return(0); - if (mctx->ss.r14 != (unsigned int)mctx->ss.r14) - return(0); - if (mctx->ss.r15 != (unsigned int)mctx->ss.r15) - return(0); - if (mctx->ss.r16 != (unsigned int)mctx->ss.r16) - return(0); - if (mctx->ss.r17 != (unsigned int)mctx->ss.r17) - return(0); - if (mctx->ss.r18 != (unsigned int)mctx->ss.r18) - return(0); - if (mctx->ss.r19 != (unsigned int)mctx->ss.r19) - return(0); - if (mctx->ss.r20 != (unsigned int)mctx->ss.r20) - return(0); - if (mctx->ss.r21 != (unsigned int)mctx->ss.r21) - return(0); - if (mctx->ss.r22 != (unsigned int)mctx64->ss.r22) - return(0); - if (mctx->ss.r23 != (unsigned int)mctx64->ss.r23) - return(0); - if (mctx->ss.r24 != (unsigned int)mctx64->ss.r24) - return(0); - if (mctx->ss.r25 != (unsigned int)mctx64->ss.r25) - return(0); - if (mctx->ss.r26 != (unsigned int)mctx64->ss.r26) - return(0); - if (mctx->ss.r27 != (unsigned int)mctx64->ss.r27) - return(0); - if (mctx->ss.r28 != (unsigned int)mctx64->ss.r28) - return(0); - if (mctx->ss.r29 != (unsigned int)mctx64->ss.r29) - return(0); - if (mctx->ss.r30 != (unsigned int)mctx64->ss.r30) - return(0); - if (mctx->ss.r31 != (unsigned int)mctx64->ss.r31) - return(0); - - if (mctx->ss.cr != mctx64->ss.cr) - return(0); - if (mctx->ss.xer != (unsigned int)mctx64->ss.xer) - return(0); - if (mctx->ss.lr != (unsigned int)mctx64->ss.lr) - return(0); - if (mctx->ss.ctr != (unsigned int)mctx64->ss.ctr) - return(0); - - return(1); - -} - -#endif - void _sigtramp( union __sigaction_u __sigaction_u, @@ -160,59 +49,19 @@ _sigtramp( siginfo_t *sinfo, struct ucontext *uctx ) { -#ifdef __ppc__ - int ctxstyle = UC_FLAVOR; -#endif - mcontext_t mctx; - mcontext64_t mctx64; #if defined(__DYNAMIC__) __in_sigtramp++; #endif -#ifndef __ppc__ - if (sigstyle == UC_TRAD) + if (sigstyle == 1) sa_handler(sig); -#else /* __ppc__ */ - if ((sigstyle == UC_TRAD) || (sigstyle == UC_TRAD64) || (sigstyle == UC_TRAD64_VEC)) - sa_handler(sig); - +#ifdef __ppc__ else sa_sigaction(sig, sinfo, uctx); - - if ((sigstyle == UC_DUAL) || (sigstyle == UC_DUAL_VEC)) { - mctx = uctx->uc_mcontext; - mctx64 = (mcontext64_t)((char *)(uctx->uc_mcontext) + sizeof(struct mcontext)); - /* restore 64bit state ? */ - if (restore64_state(mctx, mctx64)) { - uctx->uc_mcontext = (void *)mctx64; - if (sigstyle == UC_DUAL) { - uctx->uc_mcsize = UC_FLAVOR64_SIZE; - ctxstyle = UC_FLAVOR64; - } else { - uctx->uc_mcsize = UC_FLAVOR64_VEC_SIZE; - ctxstyle = UC_FLAVOR64_VEC; - } - } else { - if (sigstyle == UC_DUAL) - ctxstyle = UC_FLAVOR; - else - ctxstyle = UC_FLAVOR_VEC; - } - } else - ctxstyle = sigstyle; #endif /* __ppc__ */ - + #if defined(__DYNAMIC__) __in_sigtramp--; #endif -#ifdef __ppc__ - { - /* sigreturn(uctx, ctxstyle); */ - /* syscall (SYS_SIGRETURN, uctx, ctxstyle); */ - syscall (184, uctx, ctxstyle); - } -#else - sigreturn(uctx); -#endif /* __ppc__ */ + sigreturn(uctx); } - -- 2.45.2