From 7c78c5292a74d7cc20fc03e02fbfc976e072f928 Mon Sep 17 00:00:00 2001 From: Apple Date: Mon, 20 Feb 2006 22:46:08 +0000 Subject: [PATCH] Libc-391.1.21.tar.gz --- Makefile | 5 +- darwin/copyfile.c | 35 +- gdtoa/FreeBSD/_ldtoa.c.patch | 12 +- gdtoa/FreeBSD/gdtoa-strtod.c | 18 +- gdtoa/FreeBSD/gdtoa-strtod.c.patch | 26 +- gdtoa/FreeBSD/gdtoa-strtodg.c | 22 +- gdtoa/FreeBSD/gdtoa-strtodg.c.patch | 6 +- gdtoa/FreeBSD/gdtoa_strtopx.c.patch | 12 +- gen/FreeBSD/popen.c.patch | 13 +- gen/FreeBSD/setprogname.c.patch | 30 +- gen/filesec.c | 9 +- gen/malloc.c | 21 +- gen/scalable_malloc.c | 7 +- gen/stack_logging.c | 16 +- i386/pthreads/pthread_set_self.s | 4 +- i386/string/Makefile.inc | 9 +- i386/string/bcmp.s | 29 ++ i386/string/memcmp.s | 185 ++++++++++ i386/string/memset.s | 241 +++++++++++++ i386/string/strcmp.s | 194 +++++++---- i386/string/strcpy.s | 151 +++++++++ i386/string/strlen.s | 78 +++++ i386/string/strncmp.s | 191 +++++++++++ i386/string/strncpy.s | 190 +++++++++++ i386/sys/Makefile.inc | 5 + i386/sys/OSAtomic.s | 10 + i386/sys/SYS.h | 30 +- i386/sys/__sysenter_trap.s | 30 ++ i386/sys/_setjmp.s | 3 +- i386/sys/cerror.s | 8 +- i386/sys/commpage.c | 23 ++ i386/sys/fork.s | 74 ++-- i386/sys/getpid.s | 1 + i386/sys/i386_get_ldt.s | 35 ++ i386/sys/i386_gettimeofday.s | 44 +++ i386/sys/i386_set_ldt.s | 35 ++ i386/sys/lseek.s | 2 +- i386/sys/pipe.s | 2 +- i386/sys/setjmp.s | 92 ++--- i386/sys/sigaltstack.s | 2 +- i386/sys/sigreturn.s | 2 +- include/asl.h | 8 +- include/assert.h | 2 +- include/sys/acl.h | 4 +- mach/panic.c | 3 + posix1e/acl_translate.c | 153 +++++++-- ppc/sys/ppc_gettimeofday.s | 7 +- pthreads/lock.s | 2 - pthreads/pthread_tsd.c | 14 +- stdio/FreeBSD/printf.3.patch | 22 +- stdio/FreeBSD/vfprintf.c.patch | 484 ++++++++++++++------------ stdio/FreeBSD/vfwprintf.c.patch | 505 ++++++++++++++++------------ stdio/Makefile.inc | 6 +- sys/Makefile.inc | 4 + sys/OpenBSD/stack_protector.c | 75 +++++ sys/gettimeofday.c | 30 +- sys/sigtramp.c | 15 +- 57 files changed, 2438 insertions(+), 798 deletions(-) create mode 100644 i386/string/bcmp.s create mode 100644 i386/string/memcmp.s create mode 100644 i386/string/memset.s create mode 100644 i386/string/strcpy.s create mode 100644 i386/string/strlen.s create mode 100644 i386/string/strncmp.s create mode 100644 i386/string/strncpy.s create mode 100644 i386/sys/__sysenter_trap.s create mode 100644 i386/sys/commpage.c create mode 100644 i386/sys/i386_get_ldt.s create mode 100644 i386/sys/i386_gettimeofday.s create mode 100644 i386/sys/i386_set_ldt.s create mode 100644 sys/OpenBSD/stack_protector.c diff --git a/Makefile b/Makefile index 64c7f8b..b190072 100644 --- a/Makefile +++ b/Makefile @@ -19,13 +19,16 @@ SHLIB_MINOR= 0 .if (${MACHINE_ARCH} == unknown) MACHINE_ARCH != /usr/bin/arch .endif -CC = gcc-3.5 +CC = gcc-4.0 # always set __DARWIN_UNIX03 to zero (variant will set to one) except for ppc64 .if (${MACHINE_ARCH} == ppc64) CFLAGS += -D__DARWIN_UNIX03=1 .else CFLAGS += -D__DARWIN_UNIX03=0 .endif +.if (${MACHINE_ARCH} == i386) +CFLAGS += -march=prescott -msse3 +.endif CFLAGS += -D__LIBC__ -DNOID -I${.CURDIR}/include .ifdef ALTLIBCHEADERS INCLUDEDIR = ${ALTLIBCHEADERS} diff --git a/darwin/copyfile.c b/darwin/copyfile.c index bb28d55..3e5fc22 100644 --- a/darwin/copyfile.c +++ b/darwin/copyfile.c @@ -835,7 +835,7 @@ typedef struct apple_double_entry u_int32_t type; /* entry type: see list, 0 invalid */ u_int32_t offset; /* entry data offset from the beginning of the file. */ u_int32_t length; /* entry data length in bytes. */ -} apple_double_entry_t; +} __attribute__((packed)) apple_double_entry_t; typedef struct apple_double_header @@ -847,7 +847,7 @@ typedef struct apple_double_header apple_double_entry_t entries[2]; /* 'finfo' & 'rsrc' always exist */ u_int8_t finfo[FINDERINFOSIZE]; /* Must start with Finder Info (32 bytes) */ u_int8_t pad[2]; /* get better alignment inside attr_header */ -} apple_double_header_t; +} __attribute__((packed)) apple_double_header_t; /* Entries are aligned on 4 byte boundaries */ @@ -858,7 +858,7 @@ typedef struct attr_entry u_int16_t flags; u_int8_t namelen; /* length of name including NULL termination char */ u_int8_t name[1]; /* NULL-terminated UTF-8 name (up to 128 bytes max) */ -} attr_entry_t; +} __attribute__((packed)) attr_entry_t; /* Header + entries must fit into 64K */ @@ -873,7 +873,7 @@ typedef struct attr_header u_int32_t reserved[3]; u_int16_t flags; u_int16_t num_attrs; -} attr_header_t; +} __attribute__((packed)) attr_header_t; #pragma options align=reset @@ -1217,23 +1217,23 @@ static int copyfile_pack(copyfile_state_t s) /* * Fill in the Apple Double Header defaults. */ - filehdr->appledouble.magic = SWAP32 (ADH_MAGIC); - filehdr->appledouble.version = SWAP32 (ADH_VERSION); - filehdr->appledouble.numEntries = SWAP16 (2); - filehdr->appledouble.entries[0].type = SWAP32 (AD_FINDERINFO); - filehdr->appledouble.entries[0].offset = SWAP32 (offsetof(apple_double_header_t, finfo)); - filehdr->appledouble.entries[0].length = SWAP32 (FINDERINFOSIZE); - filehdr->appledouble.entries[1].type = SWAP32 (AD_RESOURCE); - filehdr->appledouble.entries[1].offset = SWAP32 (offsetof(apple_double_header_t, pad)); + filehdr->appledouble.magic = ADH_MAGIC; + filehdr->appledouble.version = ADH_VERSION; + filehdr->appledouble.numEntries = 2; + filehdr->appledouble.entries[0].type = AD_FINDERINFO; + filehdr->appledouble.entries[0].offset = offsetof(apple_double_header_t, finfo); + filehdr->appledouble.entries[0].length = FINDERINFOSIZE; + filehdr->appledouble.entries[1].type = AD_RESOURCE; + filehdr->appledouble.entries[1].offset = offsetof(apple_double_header_t, pad); filehdr->appledouble.entries[1].length = 0; bcopy(ADH_MACOSX, filehdr->appledouble.filler, sizeof(filehdr->appledouble.filler)); /* * Fill in the initial Attribute Header. */ - filehdr->magic = SWAP32 (ATTR_HDR_MAGIC); - filehdr->debug_tag = SWAP32 (s->sb.st_ino); - filehdr->data_start = SWAP32 (sizeof(attr_header_t)); + filehdr->magic = ATTR_HDR_MAGIC; + filehdr->debug_tag = s->sb.st_ino; + filehdr->data_start = sizeof(attr_header_t); /* * Collect the attribute names. @@ -1380,7 +1380,7 @@ next: filehdr->appledouble.entries[0].length = filehdr->appledouble.entries[1].offset - filehdr->appledouble.entries[0].offset; - filehdr->total_size = SWAP32 (filehdr->appledouble.entries[1].offset); + filehdr->total_size = filehdr->appledouble.entries[1].offset; } /* Copy Resource Fork. */ @@ -1390,6 +1390,9 @@ next: /* Write the header to disk. */ datasize = filehdr->appledouble.entries[1].offset; + swap_adhdr(&filehdr->appledouble); + swap_attrhdr(filehdr); + if (pwrite(s->dst_fd, filehdr, datasize, 0) != datasize) { if (COPYFILE_VERBOSE & s->flags) diff --git a/gdtoa/FreeBSD/_ldtoa.c.patch b/gdtoa/FreeBSD/_ldtoa.c.patch index c4a62ed..e7a5319 100644 --- a/gdtoa/FreeBSD/_ldtoa.c.patch +++ b/gdtoa/FreeBSD/_ldtoa.c.patch @@ -1,5 +1,5 @@ ---- _ldtoa.c.orig 2004-12-08 22:50:28.000000000 -0800 -+++ _ldtoa.c 2004-12-08 22:52:58.000000000 -0800 +--- _ldtoa.c.orig 2004-06-03 15:17:18.000000000 -0700 ++++ _ldtoa.c 2005-10-08 22:43:25.000000000 -0700 @@ -61,14 +61,34 @@ char *ret; union IEEEl2bits u; @@ -35,22 +35,22 @@ kind = STRTOG_Normal; #ifdef LDBL_IMPLICIT_NBIT bits[LDBL_MANT_DIG / 32] |= 1 << ((LDBL_MANT_DIG - 1) % 32); -@@ -77,12 +97,14 @@ +@@ -77,12 +97,12 @@ case FP_ZERO: kind = STRTOG_Zero; break; +#if !defined(__ppc__) && !defined(__ppc64__) case FP_SUBNORMAL: kind = STRTOG_Denormal; - #ifdef LDBL_IMPLICIT_NBIT +-#ifdef LDBL_IMPLICIT_NBIT be++; - #endif +-#endif break; +#endif /* !defined(__ppc__) && !defined(__ppc64__) */ case FP_INFINITE: kind = STRTOG_Infinite; break; -@@ -96,5 +118,9 @@ +@@ -96,5 +116,9 @@ ret = gdtoa(&fpi, be, (ULong *)bits, &kind, mode, ndigits, decpt, rve); if (*decpt == -32768) *decpt = INT_MAX; diff --git a/gdtoa/FreeBSD/gdtoa-strtod.c b/gdtoa/FreeBSD/gdtoa-strtod.c index bc06bfe..66a3baa 100644 --- a/gdtoa/FreeBSD/gdtoa-strtod.c +++ b/gdtoa/FreeBSD/gdtoa-strtod.c @@ -30,6 +30,9 @@ THIS SOFTWARE. * with " at " changed at "@" and " dot " changed to "."). */ #include "gdtoaimp.h" +#ifndef NO_FENV_H +#include +#endif #ifdef USE_LOCALE #include "locale.h" @@ -112,7 +115,18 @@ strtod switch(s[1]) { case 'x': case 'X': - switch((i = gethex(&s, &fpi, &exp, &bb, sign)) & STRTOG_Retmask) { + { +#if defined(FE_DOWNWARD) && defined(FE_TONEAREST) && defined(FE_TOWARDZERO) && defined(FE_UPWARD) + FPI fpi1 = fpi; + switch(fegetround()) { + case FE_TOWARDZERO: fpi1.rounding = 0; break; + case FE_UPWARD: fpi1.rounding = 2; break; + case FE_DOWNWARD: fpi1.rounding = 3; + } +#else +#define fpi1 fpi +#endif + switch((i = gethex(&s, &fpi1, &exp, &bb, sign)) & STRTOG_Retmask) { case STRTOG_NoNumber: s = s00; sign = 0; @@ -124,7 +138,7 @@ strtod Bfree(bb); } ULtod(((U*)&rv)->L, bits, exp, i); - } + }} goto ret; } } diff --git a/gdtoa/FreeBSD/gdtoa-strtod.c.patch b/gdtoa/FreeBSD/gdtoa-strtod.c.patch index 4a87763..99db605 100644 --- a/gdtoa/FreeBSD/gdtoa-strtod.c.patch +++ b/gdtoa/FreeBSD/gdtoa-strtod.c.patch @@ -1,5 +1,5 @@ ---- gdtoa-strtod.c.orig 2005-01-20 20:12:37.000000000 -0800 -+++ gdtoa-strtod.c 2005-02-17 01:31:26.000000000 -0800 +--- gdtoa-strtod.c.orig 2005-10-08 11:32:33.000000000 -0700 ++++ gdtoa-strtod.c 2005-10-08 11:38:17.000000000 -0700 @@ -29,6 +29,8 @@ /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ @@ -7,9 +7,9 @@ +#include "xlocale_private.h" + #include "gdtoaimp.h" - - #ifdef USE_LOCALE -@@ -56,11 +58,11 @@ + #ifndef NO_FENV_H + #include +@@ -59,11 +61,11 @@ #endif double @@ -24,16 +24,16 @@ #endif { #ifdef Avoid_Underflow -@@ -112,7 +114,7 @@ - switch(s[1]) { - case 'x': - case 'X': -- switch((i = gethex(&s, &fpi, &exp, &bb, sign)) & STRTOG_Retmask) { -+ switch((i = gethex(&s, &fpi, &exp, &bb, sign, loc)) & STRTOG_Retmask) { +@@ -126,7 +128,7 @@ + #else + #define fpi1 fpi + #endif +- switch((i = gethex(&s, &fpi1, &exp, &bb, sign)) & STRTOG_Retmask) { ++ switch((i = gethex(&s, &fpi1, &exp, &bb, sign, loc)) & STRTOG_Retmask) { case STRTOG_NoNumber: s = s00; sign = 0; -@@ -142,8 +144,9 @@ +@@ -156,8 +158,9 @@ else if (nd < 16) z = 10*z + c - '0'; nd0 = nd; @@ -44,7 +44,7 @@ #else if (c == '.') #endif -@@ -966,3 +969,13 @@ +@@ -980,3 +983,13 @@ return sign ? -dval(rv) : dval(rv); } diff --git a/gdtoa/FreeBSD/gdtoa-strtodg.c b/gdtoa/FreeBSD/gdtoa-strtodg.c index a7d25e9..cbdf4aa 100644 --- a/gdtoa/FreeBSD/gdtoa-strtodg.c +++ b/gdtoa/FreeBSD/gdtoa-strtodg.c @@ -649,16 +649,8 @@ strtodg } bb0 = 0; /* trailing zero bits in rvb */ e2 = rve + rvbits - nbits; - if (e2 > fpi->emax) { - rvb->wds = 0; - irv = STRTOG_Infinite | STRTOG_Overflow | STRTOG_Inexhi; -#ifndef NO_ERRNO - errno = ERANGE; -#endif - infnanexp: - *exp = fpi->emax + 1; - goto ret; - } + if (e2 > fpi->emax + 1) + goto huge; rve1 = rve + rvbits - nbits; if (e2 < (emin = fpi->emin)) { denorm = 1; @@ -985,6 +977,16 @@ strtodg Bfree(bs); Bfree(bd0); Bfree(delta); + if (rve > fpi->emax) { + huge: + rvb->wds = 0; + irv = STRTOG_Infinite | STRTOG_Overflow | STRTOG_Inexhi; +#ifndef NO_ERRNO + errno = ERANGE; +#endif + infnanexp: + *exp = fpi->emax + 1; + } ret: if (denorm) { if (sudden_underflow) { diff --git a/gdtoa/FreeBSD/gdtoa-strtodg.c.patch b/gdtoa/FreeBSD/gdtoa-strtodg.c.patch index 977a233..d60d887 100644 --- a/gdtoa/FreeBSD/gdtoa-strtodg.c.patch +++ b/gdtoa/FreeBSD/gdtoa-strtodg.c.patch @@ -1,5 +1,5 @@ ---- gdtoa-strtodg.c.orig 2005-01-20 20:12:37.000000000 -0800 -+++ gdtoa-strtodg.c 2005-02-17 01:32:24.000000000 -0800 +--- gdtoa-strtodg.c.orig 2005-10-08 11:33:23.000000000 -0700 ++++ gdtoa-strtodg.c 2005-10-08 11:40:57.000000000 -0700 @@ -29,6 +29,8 @@ /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ @@ -43,7 +43,7 @@ #else if (c == '.') #endif -@@ -676,6 +679,9 @@ +@@ -668,6 +671,9 @@ rvb->x[0] = 0; *exp = emin; irv = STRTOG_Underflow | STRTOG_Inexlo; diff --git a/gdtoa/FreeBSD/gdtoa_strtopx.c.patch b/gdtoa/FreeBSD/gdtoa_strtopx.c.patch index 2bd3ec1..f810e57 100644 --- a/gdtoa/FreeBSD/gdtoa_strtopx.c.patch +++ b/gdtoa/FreeBSD/gdtoa_strtopx.c.patch @@ -1,5 +1,5 @@ --- gdtoa_strtopx.c.orig 2005-01-20 20:12:37.000000000 -0800 -+++ gdtoa_strtopx.c 2005-02-17 01:54:02.000000000 -0800 ++++ gdtoa_strtopx.c 2005-10-08 17:10:15.000000000 -0700 @@ -29,6 +29,8 @@ /* Please send bug reports to David M. Gay (dmg at acm dot org, * with " at " changed at "@" and " dot " changed to "."). */ @@ -30,3 +30,13 @@ switch(k & STRTOG_Retmask) { case STRTOG_NoNumber: case STRTOG_Zero: +@@ -87,7 +89,8 @@ + + case STRTOG_Infinite: + L[_0] = 0x7fff; +- L[_1] = L[_2] = L[_3] = L[_4] = 0; ++ L[_1] = 0x8000; ++ L[_2] = L[_3] = L[_4] = 0; + break; + + case STRTOG_NaN: diff --git a/gen/FreeBSD/popen.c.patch b/gen/FreeBSD/popen.c.patch index e0ce5b8..9a2785c 100644 --- a/gen/FreeBSD/popen.c.patch +++ b/gen/FreeBSD/popen.c.patch @@ -1,5 +1,5 @@ ---- popen.c.orig Mon May 24 23:50:41 2004 -+++ popen.c Tue May 25 00:09:39 2004 +--- popen.c.orig 2003-05-20 15:21:02.000000000 -0700 ++++ popen.c 2005-09-17 16:08:55.000000000 -0700 @@ -43,6 +43,7 @@ #include "namespace.h" #include @@ -49,6 +49,15 @@ if ((cur = malloc(sizeof(struct pid))) == NULL) { (void)_close(pdes[0]); +@@ -104,7 +106,7 @@ + argv[3] = NULL; + + THREAD_LOCK(); +- switch (pid = vfork()) { ++ switch (pid = fork()) { + case -1: /* Error. */ + THREAD_UNLOCK(); + (void)_close(pdes[0]); @@ -138,7 +140,7 @@ (void)_close(pdes[1]); } diff --git a/gen/FreeBSD/setprogname.c.patch b/gen/FreeBSD/setprogname.c.patch index 6ca8162..19cc554 100644 --- a/gen/FreeBSD/setprogname.c.patch +++ b/gen/FreeBSD/setprogname.c.patch @@ -1,21 +1,37 @@ ---- setprogname.c.orig Mon Apr 28 15:05:02 2003 -+++ setprogname.c Fri May 16 14:13:59 2003 -@@ -3,6 +3,8 @@ +--- setprogname.c.orig 2003-05-20 15:21:02.000000000 -0700 ++++ setprogname.c 2005-10-26 00:58:44.000000000 -0700 +@@ -3,6 +3,10 @@ #include #include ++#include ++#include +#include +#define __progname (*_NSGetProgname()) #include "libc_private.h" -@@ -13,7 +15,7 @@ - +@@ -10,10 +14,20 @@ + setprogname(const char *progname) + { + const char *p; +- ++ char buf[2*MAXCOMLEN+1]; ++ int mib[2]; ++ p = strrchr(progname, '/'); if (p != NULL) - __progname = p + 1; -+ __progname = (char *)(p + 1); ++ __progname = (char *)(++p); else - __progname = progname; -+ __progname = (char *)progname; ++ __progname = (char *)(p = progname); ++ ++ strlcpy(&buf[0], p, sizeof(buf)); ++ ++ mib[0] = CTL_KERN; ++ mib[1] = KERN_PROCNAME; ++ ++ /* ignore errors as this is not a hard error */ ++ sysctl(mib, 2, NULL, NULL, &buf[0], 2*MAXCOMLEN); } diff --git a/gen/filesec.c b/gen/filesec.c index 55d29d5..343839f 100644 --- a/gen/filesec.c +++ b/gen/filesec.c @@ -29,6 +29,11 @@ #include #include +/* + * Versions of copy_int/copy_ext that retain native endianity. + */ +extern ssize_t acl_copy_ext_native(void *buf_p, acl_t acl, ssize_t size); +extern acl_t acl_copy_int_native(const void *buf_p); struct _filesec { int fs_valid; @@ -149,7 +154,7 @@ filesec_get_property(filesec_t fsec, filesec_property_t property, void *propptr) if (fsec->fs_aclbuf == _FILESEC_REMOVE_ACL) { *(acl_t *)propptr = _FILESEC_REMOVE_ACL; } else { - *(acl_t *)propptr = acl_copy_int(fsec->fs_aclbuf); + *(acl_t *)propptr = acl_copy_int_native(fsec->fs_aclbuf); if (*(acl_t *)propptr == NULL) error = errno; } @@ -252,7 +257,7 @@ filesec_set_property(filesec_t fsec, filesec_property_t property, const void *pr error = errno; break; } - copysize = acl_copy_ext(aclbuf, acl, aclsize); + copysize = acl_copy_ext_native(aclbuf, acl, aclsize); if (copysize < 0) { free(aclbuf); error = EINVAL; diff --git a/gen/malloc.c b/gen/malloc.c index a0837c3..44e7705 100644 --- a/gen/malloc.c +++ b/gen/malloc.c @@ -129,8 +129,8 @@ set_flags_from_environment(void) { if (flag) { fd = open(flag, O_WRONLY|O_APPEND|O_CREAT, 0644); if (fd >= 0) { - malloc_debug_file = fd; - fcntl(fd, F_SETFD, 0); // clear close-on-exec flag XXX why? + malloc_debug_file = fd; + fcntl(fd, F_SETFD, 0); // clear close-on-exec flag XXX why? } else { malloc_printf("Could not open %s, using stderr\n", flag); } @@ -222,21 +222,30 @@ set_flags_from_environment(void) { } malloc_zone_t * -malloc_create_zone(vm_size_t start_size, unsigned flags) { +malloc_create_zone(vm_size_t start_size, unsigned flags) +{ malloc_zone_t *zone; + if (!malloc_num_zones) { char **env = * _NSGetEnviron(); char **p; char *c; - /* Given that all environment variables start with "Malloc" we optimize by scanning quickly first the environment, therefore avoiding repeated calls to getenv() */ + malloc_debug_file = STDERR_FILENO; + + /* + * Given that all environment variables start with "Malloc" we optimize by scanning quickly + * first the environment, therefore avoiding repeated calls to getenv(). + * If we are setu/gid these flags are ignored to prevent a malicious invoker from changing + * our behaviour. + */ for (p = env; (c = *p) != NULL; ++p) { if (!strncmp(c, "Malloc", 6)) { - set_flags_from_environment(); + if (!issetugid()) + set_flags_from_environment(); break; } } - } zone = create_scalable_zone(start_size, malloc_debug_flags); malloc_zone_register(zone); diff --git a/gen/scalable_malloc.c b/gen/scalable_malloc.c index b47e48e..386aab4 100644 --- a/gen/scalable_malloc.c +++ b/gen/scalable_malloc.c @@ -603,7 +603,7 @@ allocate_pages(szone_t *szone, size_t size, unsigned char align, unsigned debug_ boolean_t add_guard_pages = debug_flags & SCALABLE_MALLOC_ADD_GUARD_PAGES; size_t allocation_size = round_page(size); size_t delta; - + if (align) add_guard_pages = 0; // too cumbersome to deal with that if (!allocation_size) allocation_size = 1 << vm_page_shift; if (add_guard_pages) allocation_size += 2 * (1 << vm_page_shift); @@ -3075,7 +3075,10 @@ szone_malloc_should_clear(szone_t *szone, size_t size, boolean_t cleared_request } else { // large or huge num_pages = round_page(size) >> vm_page_shift; - ptr = large_and_huge_malloc(szone, num_pages); + if (num_pages == 0) /* Overflowed */ + ptr = 0; + else + ptr = large_and_huge_malloc(szone, num_pages); } #if DEBUG_MALLOC if (LOG(szone, ptr)) diff --git a/gen/stack_logging.c b/gen/stack_logging.c index cd96a7c..ff59ae4 100644 --- a/gen/stack_logging.c +++ b/gen/stack_logging.c @@ -57,9 +57,17 @@ static inline void copy_pages(const void *source, void *dest, unsigned bytes) { /*************** Recording stack ***********/ -static void *first_frame_address(void) { +// The three functions below are marked as noinline to ensure consistent inlining across +// all versions of GCC and all compiler flags. The malloc stack logging code expects +// these functions to not be inlined. +// For details, see . +// +// The performance cost of not inlining these functions is negligible, and they're only +// called when MallocStackLogging is set anyway, so they won't affect normal usage. + +static __attribute__((noinline)) void *first_frame_address(void) { #if defined(__i386__) - return __builtin_frame_address(1); + return __builtin_frame_address(0); #elif defined(__ppc__) || defined(__ppc64__) void *addr; #warning __builtin_frame_address IS BROKEN IN BEAKER: RADAR #2340421 @@ -71,7 +79,7 @@ static void *first_frame_address(void) { #endif } -static void *next_frame_address(void *addr) { +static __attribute__((noinline)) void *next_frame_address(void *addr) { void *ret; #if defined(__MACH__) && defined(__i386__) __asm__ volatile("movl (%1),%0" : "=r" (ret) : "r" (addr)); @@ -100,7 +108,7 @@ static void *next_frame_address(void *addr) { #error ********** Unimplemented architecture #endif -void thread_stack_pcs(vm_address_t *buffer, unsigned max, unsigned *nb) { +__attribute__((noinline)) void thread_stack_pcs(vm_address_t *buffer, unsigned max, unsigned *nb) { void *addr; addr = first_frame_address(); *nb = 0; diff --git a/i386/pthreads/pthread_set_self.s b/i386/pthreads/pthread_set_self.s index 3e3b519..82aab3a 100644 --- a/i386/pthreads/pthread_set_self.s +++ b/i386/pthreads/pthread_set_self.s @@ -21,6 +21,8 @@ * @APPLE_LICENSE_HEADER_END@ */ +#include + .text .align 2, 0x90 .globl ___pthread_set_self @@ -28,7 +30,7 @@ ___pthread_set_self: pushl 4(%esp) pushl $0 movl $3,%eax - lcall $0x3b,$0 + MACHDEP_SYSCALL_TRAP addl $8,%esp movw %ax,%gs ret diff --git a/i386/string/Makefile.inc b/i386/string/Makefile.inc index 4c6264d..bfccaae 100644 --- a/i386/string/Makefile.inc +++ b/i386/string/Makefile.inc @@ -9,4 +9,11 @@ MDSRCS += bcopy.s \ bzero.s \ memcpy.s \ memmove.s \ - strcmp.s + strlen.s \ + strcpy.s \ + strcmp.s \ + strncpy.s \ + strncmp.s \ + memcmp.s \ + bcmp.s \ + memset.s diff --git a/i386/string/bcmp.s b/i386/string/bcmp.s new file mode 100644 index 0000000..8f2cd80 --- /dev/null +++ b/i386/string/bcmp.s @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* + * bcmp() is implemented in memcmp.s, as it is equivalent to memcmp() in OSX. + * (The two symbols, bcmp and memcmp, have the same value.) + * This empty file is here to prevent the Free BSD machine independent version + * from building. + */ diff --git a/i386/string/memcmp.s b/i386/string/memcmp.s new file mode 100644 index 0000000..a69e3ea --- /dev/null +++ b/i386/string/memcmp.s @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + + +// *************** *********** +// * M E M C M P * and * B C M P * +// *************** *********** +// +// int memcmp(const char *s1, const char *s2, size_t len); +// int bcmp(const char *s1, const char *s2, size_t len); +// +// Bcmp returns (+,0,-), whereas memcmp returns the true difference +// between the first differing bytes, but we treat them identically. +// +// We optimize the compare by doing it with SSE. This introduces +// a complication: if we blindly did vector loads from both sides until +// finding a difference, we might get a spurious page fault by +// reading bytes past the difference. To avoid this, we never do a load +// that crosses a page boundary. + +#define kShort 18 // too short for vectors (must be >16) + + .text + .align 4 + + .globl _memcmp + .globl _bcmp + +_memcmp: // int memcmp(const char *s1,const char *s2,size_t len); +_bcmp: // int bcmp(const char *s1,const char *s2,size_t len); + pushl %esi + pushl %edi + movl 20(%esp),%ecx // get length + movl 12(%esp),%esi // get LHS ptr + movl 16(%esp),%edi // get RHS ptr + cmpl $(kShort),%ecx // worth accelerating? + ja LNotShort // yes + + +// Too short to bother with parallel compares. Loop over bytes. +// %esi = LHS ptr +// %edi = RHS ptr +// %ecx = length (<= kShort) + +LShort: + testl %ecx,%ecx // 0-length? + jnz LShortLoop // no + xorl %eax,%eax // return 0 + jmp LExit + .align 4,0x90 // align inner loops to optimize I-fetch +LShortLoop: // loop over bytes + movzb (%esi),%eax // get LHS byte + movzb (%edi),%edx // get RHS byte + incl %esi + incl %edi + subl %edx,%eax // compare them + jnz LExit // done if not equal + decl %ecx // decrement length + jnz LShortLoop +LExit: // return value is in %eax + popl %edi + popl %esi + ret + +LNotEqual: // here from LLoopOverBytes with LHS in eax + movzb (%edi),%edx // get RHS byte + subl %edx,%eax // generate return value (nonzero) + popl %edi + popl %esi + ret + + +// Loop over bytes until we reach end of a page. +// %esi = LHS ptr +// %edi = RHS ptr +// %ecx = length remaining after end of loop (ie, already adjusted) +// %edx = #bytes until next page (1..15) + + .align 4,0x90 // align inner loops to optimize I-fetch +LLoopOverBytes: + movzb (%esi),%eax // get LHS byte + inc %esi + cmpb (%edi),%al // compare to RHS byte + jnz LNotEqual // done if not equal + inc %edi + dec %edx // more to go? + jnz LLoopOverBytes + + +// Long enough to justify overhead of setting up vector compares. In order to +// avoid spurious page faults, we loop over: +// +// min( length, bytes_in_LHS_page, bytes_in_RHS_page) >> 4 +// +// 16-byte chunks. When we near a page end, we have to revert to a byte-by-byte +// comparison until reaching the next page, then resume the vector comparison. +// %esi = LHS ptr +// %edi = RHS ptr +// %ecx = length (> kShort) + +LNotShort: + movl %esi,%eax // copy ptrs + movl %edi,%edx + andl $4095,%eax // mask down to page offsets + andl $4095,%edx + cmpl %eax,%edx // which is bigger? + cmova %edx,%eax // %eax = max(LHS offset, RHS offset); + movl $4096,%edx + subl %eax,%edx // get #bytes to next page crossing + cmpl %ecx,%edx // will operand run out first? + cmova %ecx,%edx // get min(length remaining, bytes to page end) + movl %edx,%eax + shrl $4,%edx // get #chunks till end of operand or page + jnz LLoopOverChunks // enter vector loop + +// Too near page end for vectors. + + subl %eax,%ecx // adjust length remaining + movl %eax,%edx // %edx <- #bytes to page end + cmpl $(kShort),%ecx // will there be enough after we cross page for vectors? + ja LLoopOverBytes // yes + addl %eax,%ecx // no, restore total length remaining + jmp LShortLoop // compare rest byte-by-byte (%ecx != 0) + + +// Loop over 16-byte chunks. +// %esi = LHS ptr +// %edi = RHS ptr +// %ecx = length remaining +// %edx = chunk count + + .align 4,0x90 // align inner loops to optimize I-fetch +LLoopOverChunks: + movdqu (%esi),%xmm0 // get LHS + movdqu (%edi),%xmm1 // get RHS + addl $16,%esi + pcmpeqb %xmm1,%xmm0 // compare LHS to RHS + addl $16,%edi + pmovmskb %xmm0,%eax // collect comparison result bits (1 if equal) + subl $16,%ecx // adjust length remaining + xorl $0xFFFF,%eax // all equal? + jne LDifferent // no, we found differing bytes + dec %edx // more to go? + jnz LLoopOverChunks + + cmpl $(kShort),%ecx // a lot more to compare? + jbe LShort // no + jmp LNotShort // compute distance to next page crossing etc + + +// Found a difference. +// %esi = LHS ptr, already advanced by 16 +// %edi = RHS ptr, already advanced by 16 +// %eax = complemented compare vector (ie, 0 == equal) + +LDifferent: + bsf %eax,%edx // which byte differed? + subl $16,%esi // point to byte 0 while we wait for bit scan + subl $16,%edi + movzb (%esi,%edx),%eax // get LHS byte + movzb (%edi,%edx),%ecx // get RHS byte + subl %ecx,%eax // compute difference (ie, return value) + popl %edi + popl %esi + ret diff --git a/i386/string/memset.s b/i386/string/memset.s new file mode 100644 index 0000000..0e0ae2a --- /dev/null +++ b/i386/string/memset.s @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include + + +/* This file contains the following functions: + * + * void *memset(void *b, int c, size_t len); + * void memset_pattern4(void *b, const void *c4, size_t len); + * void memset_pattern8(void *b, const void *c8, size_t len); + * void memset_pattern16(void *b, const void *c16, size_t len); + * + * Calls of memset() with c==0 are routed to the bzero() routine. Most of the + * others go to _COMM_PAGE_MEMSET_PATTERN, which is entered as follows: + * %edi = ptr to memory to set (aligned) + * %edx = length (which can be short, though we bias in favor of long operands) + * %xmm0 = the pattern to store + * Return conditions: + * %eax, %edi, %esi, %ecx, and %edx all trashed + * + * NB: we avoid "stos" family of instructions (stosl, stosb), as they are very slow + * on P4s and probably other processors. + */ + + #define kShort 255 // for nonzero memset(), too short for commpage + + + .text + .globl _memset + .align 2 +_memset: // void *memset(void *b, int c, size_t len); + movl 8(%esp),%eax // get 1-byte pattern + movl 12(%esp),%edx // get length + andl $0xFF,%eax // (c==0) ? + jnz LNonzero // not a bzero + + movl $(_COMM_PAGE_BZERO),%eax// map memset(p,0,n) into bzero(p,n) + movl %edx,8(%esp) // put count where bzero() expects it + jmp %eax // enter commpage + + + // Handle memset of a nonzero value. + +LNonzero: + pushl %edi // save a few nonvolatiles + pushl %esi + movl %eax,%esi // replicate byte in %al into all four bytes + movl 12(%esp),%edi // point to operand + shll $8,%esi + orl %esi,%eax + movl %eax,%esi + shll $16,%esi + orl %esi,%eax // now %eax has "c" in all 4 bytes + cmpl $(kShort),%edx // is operand too short for SSE? + ja LCallCommpage // no + +// Nonzero memset() too short to call commpage. +// %eax = replicated 4-byte pattern +// %edi = ptr +// %edx = length (<= kShort) + + cmpl $16,%edx // long enough to word align? + jge 3f // yes + test %edx,%edx // length==0? + jz 6f +1: + movb %al,(%edi) // pack in a byte + inc %edi + dec %edx + jnz 1b + jmp 6f +2: + movb %al,(%edi) // pack in a byte + inc %edi + dec %edx +3: + test $3,%edi // is ptr doubleword aligned? + jnz 2b // no + movl %edx,%ecx // copy length + shrl $2,%edx // #doublewords to store +4: + movl %eax,(%edi) // store aligned doubleword + addl $4,%edi + dec %edx + jnz 4b + andl $3,%ecx // any leftover bytes? + jz 6f // no +5: + movb %al,(%edi) // pack in a byte + inc %edi + dec %ecx + jnz 5b +6: + movl 12(%esp),%eax // get return value (ie, original ptr) + popl %esi + popl %edi + ret + +// Nonzero memset() is long enough to call commpage. +// %eax = replicated 4-byte pattern +// %edi = ptr +// %edx = length (> kShort) + +LCallCommpage: + movd %eax,%xmm0 // move %eax to low 4 bytes of %xmm0 + pshufd $(0x00),%xmm0,%xmm0 // replicate across the vector + movl %edi,%ecx // copy dest ptr + negl %ecx + andl $15,%ecx // get #bytes to align ptr + jz 2f // skip if already aligned + subl %ecx,%edx // decrement length +1: + movb %al,(%edi) // pack in a byte + inc %edi + dec %ecx + jnz 1b +2: // ptr aligned, length long enough to justify + movl $(_COMM_PAGE_MEMSET_PATTERN),%eax + call %eax // call commpage to do the heavy lifting + movl 12(%esp),%eax // get return value (ie, original ptr) + popl %esi + popl %edi + ret + + +// Handle memset of a 16-byte pattern. + + .globl _memset_pattern16 + .align 2, 0x90 +_memset_pattern16: // void memset_pattern16(void *b, const void *c16, size_t len); + pushl %edi + pushl %esi + movl 20(%esp),%edx // get length + movl 16(%esp),%esi // get ptr to 16-byte pattern + movl 12(%esp),%edi // point to operand + movdqu (%esi),%xmm0 // load the pattern + jmp LAlignPtr + + +// Handle memset of an 8-byte pattern. + + .globl _memset_pattern8 + .align 2, 0x90 +_memset_pattern8: // void memset_pattern8(void *b, const void *c8, size_t len); + pushl %edi + pushl %esi + movl 20(%esp),%edx // get length + movl 16(%esp),%esi // get ptr to 8-byte pattern + movl 12(%esp),%edi // point to operand + movq (%esi),%xmm0 // load pattern into low 8 bytes + punpcklqdq %xmm0,%xmm0 // replicate into all 16 + jmp LAlignPtr + +// Handle memset of a 4-byte pattern. + + .globl _memset_pattern4 + .align 2, 0x90 +_memset_pattern4: // void memset_pattern4(void *b, const void *c4, size_t len); + pushl %edi + pushl %esi + movl 20(%esp),%edx // get length + movl 16(%esp),%esi // get ptr to 4-byte pattern + movl 12(%esp),%edi // point to operand + movd (%esi),%xmm0 // load pattern into low 4 bytes + pshufd $(0x00),%xmm0,%xmm0 // replicate the 4 bytes across the vector + + +// Align ptr if necessary. We must rotate the pattern right for each byte we +// store while aligning the ptr. Since there is no rotate instruction in SSE3, +// we have to synthesize the rotates. +// %edi = ptr +// %edx = length +// %xmm0 = pattern + +LAlignPtr: // NB: can drop down to here! + cmpl $100,%edx // long enough to bother aligning ptr? + movl %edi,%ecx // copy ptr + jb LReady // not long enough + negl %ecx + andl $15,%ecx // get #bytes to align ptr + jz LReady // already aligned + subl %ecx,%edx // adjust length + + test $1,%cl // 1-byte store required? + movd %xmm0,%eax // get 4 low bytes in %eax + jz 2f // no + movdqa %xmm0,%xmm1 // copy pattern so we can shift in both directions + movb %al,(%edi) // pack in the low-order byte + psrldq $1,%xmm0 // shift pattern right 1 byte + inc %edi + pslldq $15,%xmm1 // shift pattern left 15 bytes + shrl $8,%eax // in case 2-byte store is required + por %xmm1,%xmm0 // complete right rotate of pattern by 1 byte +2: + test $2,%cl // 2-byte store required? + jz 4f // no + psrldq $2,%xmm0 // shift pattern down 2 bytes + movw %ax,(%edi) // pack in next two bytes + pinsrw $7,%eax,%xmm0 // insert low word of %eax into high word of %xmm0 + addl $2,%edi // adjust ptr +4: + test $4,%cl // 4-byte store required? + jz 8f // no + movd %xmm0,(%edi) // store low 4 bytes of %xmm0 + pshufd $(0x39),%xmm0,%xmm0 // rotate %xmm0 right 4 bytes (mask == 00 11 10 01) + addl $4,%edi // adjust ptr +8: + test $8,%cl // 8-byte store required? + jz LReady // no + movq %xmm0,(%edi) // store low 8 bytes of %xmm0 + pshufd $(0x4e),%xmm0,%xmm0 // rotate %xmm0 right 8 bytes (mask == 01 00 11 10) + addl $8,%edi // adjust ptr + +// Ptr is aligned if practical, we're ready to call commpage to do the heavy lifting. + +LReady: + movl $(_COMM_PAGE_MEMSET_PATTERN),%eax + call %eax // call commpage to do the heavy lifting + popl %esi + popl %edi + ret diff --git a/i386/string/strcmp.s b/i386/string/strcmp.s index a21cea6..fb1047f 100644 --- a/i386/string/strcmp.s +++ b/i386/string/strcmp.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -20,72 +20,126 @@ * * @APPLE_LICENSE_HEADER_END@ */ -.text -.globl _strcmp -_strcmp: - movl 0x04(%esp),%eax - movl 0x08(%esp),%edx - jmp L2 /* Jump into the loop! */ - - .align 2,0x90 -L1: incl %eax - incl %edx -L2: movb (%eax),%cl - testb %cl,%cl /* null terminator??? */ - jz L3 - cmpb %cl,(%edx) /* chars match??? */ - jne L3 - incl %eax - incl %edx - movb (%eax),%cl - testb %cl,%cl - jz L3 - cmpb %cl,(%edx) - jne L3 - incl %eax - incl %edx - movb (%eax),%cl - testb %cl,%cl - jz L3 - cmpb %cl,(%edx) - jne L3 - incl %eax - incl %edx - movb (%eax),%cl - testb %cl,%cl - jz L3 - cmpb %cl,(%edx) - jne L3 - incl %eax - incl %edx - movb (%eax),%cl - testb %cl,%cl - jz L3 - cmpb %cl,(%edx) - jne L3 - incl %eax - incl %edx - movb (%eax),%cl - testb %cl,%cl - jz L3 - cmpb %cl,(%edx) - jne L3 - incl %eax - incl %edx - movb (%eax),%cl - testb %cl,%cl - jz L3 - cmpb %cl,(%edx) - jne L3 - incl %eax - incl %edx - movb (%eax),%cl - testb %cl,%cl - jz L3 - cmpb %cl,(%edx) - je L1 - .align 2, 0x90 -L3: movzbl (%eax),%eax /* unsigned comparison */ - movzbl (%edx),%edx - subl %edx,%eax - ret + + +// *************** +// * S T R C M P * +// *************** +// +// int strcmp(const char *s1, const char *s2); +// +// We optimize the compare by doing it in parallel, using SSE. This introduces +// a complication: if we blindly did vector loads from both sides until +// finding a difference (or 0), we might get a spurious page fault by +// reading bytes past the difference. To avoid this, we never do a load +// that crosses a page boundary. + + .text + .globl _strcmp + + .align 4 +_strcmp: // int strcmp(const char *s1,const char *s2); + pushl %esi + pushl %edi + movl 12(%esp),%esi // get LHS ptr + movl 16(%esp),%edi // get RHS ptr + + +// In order to avoid spurious page faults, we loop over: +// +// min( bytes_in_LHS_page, bytes_in_RHS_page) >> 4 +// +// 16-byte chunks. When we near a page end, we have to revert to a byte-by-byte +// comparison until reaching the next page, then resume the vector comparison. +// %esi = LHS ptr +// %edi = RHS ptr + +LNextChunk: + movl %esi,%eax // copy ptrs + movl %edi,%edx + andl $4095,%eax // mask down to page offsets + andl $4095,%edx + cmpl %eax,%edx // which is bigger? + cmova %edx,%eax // %eax = max(LHS offset, RHS offset); + movl $4096,%edx + subl %eax,%edx // get #bytes to next page crossing + movl %edx,%eax + shrl $4,%edx // get #chunks till end of operand or page + jnz LLoopOverChunks // enter vector loop + movl %eax,%edx // no chunks... + jmp LLoopOverBytes // ...so loop over bytes until page end + + +// Loop over bytes. +// %esi = LHS ptr +// %edi = RHS ptr +// %edx = byte count + + .align 4,0x90 // align inner loops to optimize I-fetch +LLoopOverBytes: + movzb (%esi),%eax // get LHS byte + movzb (%edi),%ecx // get RHS byte + inc %esi + inc %edi + testl %eax,%eax // 0? + jz LExit0 // yes, we're done + subl %ecx,%eax // compare them + jnz LExit // done if not equal + dec %edx // more to go? + jnz LLoopOverBytes + + jmp LNextChunk // we've come to end of page + + +// Loop over 16-byte chunks. +// %esi = LHS ptr +// %edi = RHS ptr +// %edx = chunk count + + .align 4,0x90 // align inner loops to optimize I-fetch +LLoopOverChunks: + movdqu (%esi),%xmm1 // get LHS + movdqu (%edi),%xmm2 // get RHS + pxor %xmm0,%xmm0 // get some 0s in the shadow of the loads + addl $16,%esi + pcmpeqb %xmm1,%xmm2 // compare LHS to RHS + pcmpeqb %xmm1,%xmm0 // compare LHS to 0s + addl $16,%edi + pmovmskb %xmm2,%eax // get result mask for comparison of LHS and RHS + pmovmskb %xmm0,%ecx // get result mask for 0 check + xorl $0xFFFF,%eax // complement compare mask so 1 means "not equal" + orl %ecx,%eax // combine the masks and check for 1-bits + jnz LFoundDiffOr0 // we found differing bytes or a 0-byte + dec %edx // more to go? + jnz LLoopOverChunks + + jmp LNextChunk // compare up to next page boundary + + +// Found a zero and/or a difference in vector compare. +// %esi = LHS ptr, already advanced by 16 +// %edi = RHS ptr, already advanced by 16 +// %eax = bit n set if bytes n differed or were 0 + +LFoundDiffOr0: + bsf %eax,%edx // which byte differed or was 0? + subl $16,%esi // point to start of vectors while we wait for bit scan + subl $16,%edi + movzb (%esi,%edx),%eax // get LHS byte + movzb (%edi,%edx),%ecx // get RHS byte + subl %ecx,%eax // compute difference (ie, return value) + popl %edi + popl %esi + ret + + +// Found a zero and/or difference in byte loop. +// %eax = LHS byte +// %ecx = RHS byte + +LExit0: + subl %ecx,%eax // compute difference (ie, return value) +LExit: // here with difference already in %eax + popl %edi + popl %esi + ret diff --git a/i386/string/strcpy.s b/i386/string/strcpy.s new file mode 100644 index 0000000..771eec4 --- /dev/null +++ b/i386/string/strcpy.s @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + + +// *************** +// * S T R C P Y * +// *************** +// +// char *strcpy(const char *dst, const char *src); +// +// We optimize the move by doing it vector parallel. This introduces +// a complication: if we blindly did vector load/stores until finding +// a 0, we might get a spurious page fault by touching bytes past it. +// To avoid this, we never do a load that crosses a page boundary, +// and never store a byte we don't have to. +// +// We align the destination, because unaligned vector stores are slow. + + .text + .globl _strcpy + + .align 4 +_strcpy: // char *strcpy(const char *dst, const char *src); + pushl %edi + movl 8(%esp),%edi // get dest ptr + movl 12(%esp),%ecx // get source ptr + movl %edi,%edx // copy dest ptr + negl %edx + andl $15,%edx // how many bytes to align dest ptr? + jnz LLoopOverBytes // not aligned, so go do so + + +// In order to avoid spurious page faults, we loop until nearing the source page +// end. Then we revert to a byte-by-byte loop for 16 bytes until the page is crossed, +// then resume the vector loop. +// %ecx = source ptr (unaligned) +// %edi = dest ptr (aligned) + +LNextChunk: + movl %ecx,%eax // copy source ptr + movl $4096,%edx + andl $4095,%eax // get offset into source page + subl %eax,%edx // get #bytes remaining in source page + shrl $4,%edx // get #chunks till end of page + jnz LLoopOverChunks // enter vector loop + movl $16,%edx // move 16 bytes to cross page but keep dest aligned + jmp LLoopOverBytes + + +// Loop over bytes. +// %ecx = source ptr +// %edi = dest ptr +// %edx = byte count + + .align 4,0x90 // align inner loops to optimize I-fetch +LLoopOverBytes: + movzb (%ecx),%eax // get source byte + inc %ecx + movb %al,(%edi) // pack into dest + inc %edi + testl %eax,%eax // 0? + jz LDone // yes, we're done + dec %edx // more to go? + jnz LLoopOverBytes + + jmp LNextChunk // we've come to end of page + + +// Loop over 16-byte chunks. +// %ecx = source ptr (unaligned) +// %edi = dest ptr (aligned) +// %edx = chunk count + + .align 4,0x90 // align inner loops to optimize I-fetch +LLoopOverChunks: + movdqu (%ecx),%xmm1 // get source + pxor %xmm0,%xmm0 // get some 0s + addl $16,%ecx + pcmpeqb %xmm1,%xmm0 // compare source to 0s + pmovmskb %xmm0,%eax // get result mask for 0 check + testl %eax,%eax // any 0s? + jnz LFound0 // yes, exit loop + movdqa %xmm1,(%edi) // no 0s so do aligned store into destination + addl $16,%edi + dec %edx // more to go? + jnz LLoopOverChunks + + movl $16,%edx // move 16 bytes + jmp LLoopOverBytes // cross page but keep dest aligned + + +// Found a zero in the vector. Figure out where it is, and store the bytes +// up to it. +// %edi = dest ptr (aligned) +// %eax = result mask +// %xmm1 = source vector + +LFound0: + bsf %eax,%edx // find first 0 + inc %edx // we need to store the 0 too + test $16,%dl // was 0 last byte? + jz 8f // no + movdqa %xmm1,(%edi) // yes, store entire vector + jmp LDone +8: + test $8,%dl // 8-byte store required? + jz 4f // no + movq %xmm1,(%edi) // pack in 8 low bytes + psrldq $8,%xmm1 // then shift vector down 8 bytes + addl $8,%edi +4: + test $4,%dl // 4-byte store required? + jz 3f // no + movd %xmm1,(%edi) // pack in 4 low bytes + psrldq $4,%xmm1 // then shift vector down 4 bytes + addl $4,%edi +3: + andl $3,%edx // more to go? + jz LDone // no + movd %xmm1,%eax // move remainders out of vector into %eax +1: // loop on up to three bytes + movb %al,(%edi) // pack in next byte + shrl $8,%eax // shift next byte into position + inc %edi + dec %edx + jnz 1b + +LDone: + movl 8(%esp),%eax // original dest ptr is return value + popl %edi + ret diff --git a/i386/string/strlen.s b/i386/string/strlen.s new file mode 100644 index 0000000..b21bee5 --- /dev/null +++ b/i386/string/strlen.s @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* + * Strlen, for processors with SSE3. + * + * Note that all memory references must be aligned, in order to avoid spurious + * page faults. Thus we have to load the aligned 16-byte chunk containing the + * first byte of the operand, then mask out false 0s that may occur before the + * first byte. + * + * We favor the fall-through (ie, short operand) path. + */ + + .text + .globl _strlen + .align 4, 0x90 +_strlen: // size_t strlen(char *b); + movl 4(%esp),%edx // get ptr to string + pxor %xmm0,%xmm0 // zero %xmm0 + movl %edx,%ecx // copy ptr + andl $(-16),%edx // 16-byte align ptr + orl $(-1),%eax + pcmpeqb (%edx),%xmm0 // check whole qw for 0s + andl $15,%ecx // get #bytes in aligned dq before operand + shl %cl,%eax // create mask for the bytes of aligned dq in operand + pmovmskb %xmm0,%ecx // collect mask of 0-bytes + andl %eax,%ecx // mask out any 0s that occur before 1st byte + jz LEnterLoop // no 0-bytes (ie, 1-bits), so enter by-16 loop + +// We've found a 0-byte. +// %edx = aligned address of 16-byte block containing the terminating 0-byte +// %ecx = compare bit vector + +LFoundIt: + bsf %ecx,%eax // find first 1-bit (ie, first 0-byte) + movl 4(%esp),%ecx // recover ptr to 1st byte in string + addl %edx,%eax // get address of the 0-byte + subl %ecx,%eax // subtract address of 1st byte to get string length + ret + +// Loop over aligned 16-byte blocks: +// %edx = address of previous block + +LEnterLoop: + pxor %xmm0,%xmm0 // get some 0-bytes + addl $16,%edx // advance ptr +LLoop: + movdqa (%edx),%xmm1 // get next chunk + addl $16,%edx + pcmpeqb %xmm0,%xmm1 // check for 0s + pmovmskb %xmm1,%ecx // collect mask of 0-bytes + test %ecx,%ecx // any 0-bytes? + jz LLoop // no 0-bytes, so get next dq + + subl $16,%edx // back up ptr + jmp LFoundIt + + \ No newline at end of file diff --git a/i386/string/strncmp.s b/i386/string/strncmp.s new file mode 100644 index 0000000..a833be7 --- /dev/null +++ b/i386/string/strncmp.s @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + + +// ***************** +// * S T R N C M P * +// ***************** +// +// int strncmp(const char *s1, const char *s2, size_t len); +// +// We optimize the compare by doing it vector parallel. This introduces +// a complication: if we blindly did vector loads from both sides until +// finding a difference (or 0), we might get a spurious page fault by +// reading bytes past the difference. To avoid this, we never do a load +// that crosses a page boundary. + +#define kShort 20 // too short for vectors (must be >16) + + .text + .globl _strncmp + + .align 4 +_strncmp: // int strncmp(const char *s1, const char *s2, size_t len); + pushl %esi + pushl %edi + movl 20(%esp),%ecx // get length + movl 12(%esp),%esi // get LHS ptr + movl 16(%esp),%edi // get RHS ptr + push %ebx + cmpl $(kShort),%ecx // worth accelerating? + ja LNotShort // yes + + +// Too short to bother with parallel compares. Loop over bytes. +// %esi = LHS ptr +// %edi = RHS ptr +// %ecx = length (<= kShort) + +LShort: + testl %ecx,%ecx // 0-length? + jnz LShortLoop // no + jmp LReturn0 // yes, return 0 + .align 4,0x90 // align inner loops to optimize I-fetch +LShortLoop: // loop over bytes + movzb (%esi),%eax // get LHS byte + movzb (%edi),%ebx // get RHS byte + incl %esi + incl %edi + testl %eax,%eax // LHS==0 ? + jz LNotEqual // yes, this terminates comparison + subl %ebx,%eax // compare them + jnz LExit // done if not equal + decl %ecx // decrement length + jnz LShortLoop +LReturn0: + xorl %eax,%eax // all bytes equal, so return 0 +LExit: // return value is in %eax + popl %ebx + popl %edi + popl %esi + ret + +LNotEqual: // LHS in eax, RHS in ebx + subl %ebx,%eax // generate return value (nonzero) + popl %ebx + popl %edi + popl %esi + ret + + +// Loop over bytes until we reach end of a page. +// %esi = LHS ptr +// %edi = RHS ptr +// %ecx = length remaining after end of loop (ie, already adjusted) +// %edx = #bytes until next page (1..15) + + .align 4,0x90 // align inner loops to optimize I-fetch +LLoopOverBytes: + movzb (%esi),%eax // get LHS byte + movzb (%edi),%ebx // get RHS byte + inc %esi + inc %edi + testl %eax,%eax // LHS==0 ? + jz LNotEqual // yes, this terminates comparison + subl %ebx,%eax // compare them + jnz LExit // done if not equal + dec %edx // more to go? + jnz LLoopOverBytes + + +// Long enough to justify overhead of setting up vector compares. In order to +// avoid spurious page faults, we loop over: +// +// min( length, bytes_in_LHS_page, bytes_in_RHS_page) >> 4 +// +// 16-byte chunks. When we near a page end, we have to revert to a byte-by-byte +// comparison until reaching the next page, then resume the vector comparison. +// %esi = LHS ptr +// %edi = RHS ptr +// %ecx = length (> kShort) + +LNotShort: + movl %esi,%eax // copy ptrs + movl %edi,%edx + andl $4095,%eax // mask down to page offsets + andl $4095,%edx + cmpl %eax,%edx // which is bigger? + cmova %edx,%eax // %eax = max(LHS offset, RHS offset); + movl $4096,%edx + subl %eax,%edx // get #bytes to next page crossing + cmpl %ecx,%edx // will operand run out first? + cmova %ecx,%edx // get min(length remaining, bytes to page end) + movl %edx,%eax + shrl $4,%edx // get #chunks till end of operand or page + jnz LLoopOverChunks // enter vector loop + +// Too near page end for vectors. + + subl %eax,%ecx // adjust length remaining + movl %eax,%edx // %edx <- #bytes to page end + cmpl $(kShort),%ecx // will there be enough after we cross page for vectors? + ja LLoopOverBytes // yes + addl %eax,%ecx // no, restore total length remaining + jmp LShortLoop // compare rest byte-by-byte (%ecx != 0) + + +// Loop over 16-byte chunks. +// %esi = LHS ptr +// %edi = RHS ptr +// %ecx = length remaining +// %edx = chunk count + + .align 4,0x90 // align inner loops to optimize I-fetch +LLoopOverChunks: + movdqu (%esi),%xmm1 // get LHS + movdqu (%edi),%xmm2 // get RHS + pxor %xmm0,%xmm0 // get some 0s in the shadow of the loads + addl $16,%esi + pcmpeqb %xmm1,%xmm2 // compare LHS to RHS + pcmpeqb %xmm1,%xmm0 // compare LHS to 0s + addl $16,%edi + pmovmskb %xmm2,%eax // get result mask for comparison of LHS and RHS + pmovmskb %xmm0,%ebx // get result mask for 0 check + subl $16,%ecx // decrement length remaining + xorl $0xFFFF,%eax // complement compare mask so 1 means "not equal" + orl %ebx,%eax // combine the masks and check for 1-bits + jnz LFoundDiffOr0 // we found differing bytes or a 0-byte + dec %edx // more to go? + jnz LLoopOverChunks // yes + + cmpl $(kShort),%ecx // a lot more to compare? + jbe LShort // no + jmp LNotShort // compute distance to next page crossing etc + + +// Found a zero and/or a difference in vector compare. +// %esi = LHS ptr, already advanced by 16 +// %edi = RHS ptr, already advanced by 16 +// %eax = bit n set if bytes n differed or were 0 + +LFoundDiffOr0: + bsf %eax,%edx // which byte differed or was 0? + subl $16,%esi // point to start of vectors while we wait for bit scan + subl $16,%edi + movzb (%esi,%edx),%eax // get LHS byte + movzb (%edi,%edx),%ecx // get RHS byte + popl %ebx + popl %edi + subl %ecx,%eax // compute difference (ie, return value) + popl %esi + ret diff --git a/i386/string/strncpy.s b/i386/string/strncpy.s new file mode 100644 index 0000000..dced178 --- /dev/null +++ b/i386/string/strncpy.s @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include + + +// ***************** +// * S T R N C P Y * +// ***************** +// +// char *strncpy(const char *dst, const char *src, size_t n); +// +// We optimize the move by doing it vector parallel. This introduces +// a complication: if we blindly did vector load/stores until finding +// a 0, we might get a spurious page fault by touching bytes past it. +// To avoid this, we never do a load that crosses a page boundary, +// and never store a byte we don't have to. +// +// We align the destination, because unaligned vector stores are slow. +// +// Recall that strncpy() zero fills the remainder of the dest buffer, +// and does not terminate the string if it's length is greater than or +// equal to n. + +#define kShort 31 // too short to bother with vector loop + + .text + .globl _strncpy + + .align 4 +_strncpy: // char *strncpy(const char *dst, const char *src, size_t n); + pushl %edi + pushl %esi + movl 12(%esp),%edi // get dest ptr + movl 16(%esp),%esi // get source ptr + movl 20(%esp),%ecx // get length + movl %edi,%edx // copy dest ptr + negl %edx + andl $15,%edx // how many bytes to align dest ptr? + jnz LCheckShortCopy // align destination first + + +// In order to avoid spurious page faults, we loop until nearing the source page +// end. Then we revert to a byte-by-byte loop for 16 bytes until the page is crossed, +// then resume the vector loop. +// %esi = source ptr (unaligned) +// %edi = dest ptr (aligned) +// %ecx = buffer length remaining + +LNextChunk: // NB: can drop down to here + movl %esi,%eax // copy source ptr + movl $4096,%edx + andl $4095,%eax // get offset into source page + subl %eax,%edx // get #bytes remaining in source page + cmpl %ecx,%edx // will buffer run out before the page end? + cmova %ecx,%edx // get min(length remaining, bytes to page end) + shrl $4,%edx // get #chunks till end of page + jnz LLoopOverChunks // enter vector loop + +// We can't use the chunk loop yet. Check for short and empty buffers, then use byte loop. + +LCrossPage: // if buffer is large enough, cross source page + movl $16,%edx // move 16 bytes to cross page but keep dest aligned +LCheckShortCopy: // we propose to copy %edx bytes in byte loop + cmpl $(kShort),%ecx // much left? + ja LLoopOverBytes // yes, loop over bytes then more chunks + movl %ecx,%edx // no, use the byte loop for everything + testl %ecx,%ecx // have we filled buffer? + jnz LLoopOverBytes // no + jmp LDone + + +// Loop over bytes. +// %esi = source ptr +// %edi = dest ptr +// %ecx = buffer length remaining +// %edx = count of bytes to loop over (<= buffer length) + + .align 4,0x90 // align inner loops to optimize I-fetch +LLoopOverBytes: + movzb (%esi),%eax // get source byte + inc %esi + dec %ecx // decrement length + movb %al,(%edi) // pack into dest + inc %edi + testl %eax,%eax // 0? + jz LZeroBuffer // yes, we're done copying string + dec %edx // more to go? + jnz LLoopOverBytes + + testl %ecx,%ecx // at end of buffer? + jnz LNextChunk // no, xfer chunks + jmp LDone // yes + + +// Loop over 16-byte chunks. +// %esi = source ptr (unaligned) +// %edi = dest ptr (aligned) +// %ecx = buffer length remaining +// %edx = chunk count + + .align 4,0x90 // align inner loops to optimize I-fetch +LLoopOverChunks: + movdqu (%esi),%xmm1 // get source + pxor %xmm0,%xmm0 // get some 0s + addl $16,%esi + pcmpeqb %xmm1,%xmm0 // compare source to 0s + pmovmskb %xmm0,%eax // get result mask for 0 check + testl %eax,%eax // any 0s? + jnz LFound0 // yes, exit loop + movdqa %xmm1,(%edi) // no 0s so do aligned store into destination + addl $16,%edi + subl $16,%ecx // decrement length remaining + dec %edx // more to go? + jnz LLoopOverChunks + + jmp LCrossPage // cross page but keep dest aligned + + +// Found a zero in the vector. Figure out where it is, and store the bytes +// up to it. It is possible that we should check to be sure (%ecx >= 16), and +// just do an aligned store of %xmm1 if so. But if we did, we'd be doing byte +// stores into the same double quadword in bzero(), which might hit a hazard. +// Experimentation needed. +// %edi = dest ptr (aligned) +// %eax = result mask +// %ecx = buffer length remaining +// %xmm1 = source vector + +LFound0: + bsf %eax,%edx // find first 0 + subl %edx,%ecx // decrement remaining buffer length + test $8,%dl // 8-byte store required? + jz 4f // no + movq %xmm1,(%edi) // pack in 8 low bytes + psrldq $8,%xmm1 // then shift vector down 8 bytes + addl $8,%edi +4: + test $4,%dl // 4-byte store required? + jz 3f // no + movd %xmm1,(%edi) // pack in 4 low bytes + psrldq $4,%xmm1 // then shift vector down 4 bytes + addl $4,%edi +3: + andl $3,%edx // more to go? + jz LZeroBuffer // no + movd %xmm1,%eax // move remainders out of vector into %eax +1: // loop on up to three bytes + movb %al,(%edi) // pack in next byte + shrl $8,%eax // shift next byte into position + inc %edi + dec %edx + jnz 1b + +// We've copied the string. Now zero the rest of the buffer, using commpage bzero(). +// %edi = dest ptr +// %ecx = buffer length remaining + +LZeroBuffer: + pushl %ecx // remaining buffer size + pushl %edi // ptr to 1st unstored byte + movl $(_COMM_PAGE_BZERO),%eax + call %eax + addl $8,%esp // pop off the arguments + +LDone: + movl 12(%esp),%eax // original dest ptr is return value + popl %esi + popl %edi + ret diff --git a/i386/sys/Makefile.inc b/i386/sys/Makefile.inc index c0dcbf4..ee1e06a 100644 --- a/i386/sys/Makefile.inc +++ b/i386/sys/Makefile.inc @@ -13,6 +13,7 @@ MDSRCS+= ATPgetreq.s \ __pthread_canceled.s \ __pthread_markcancel.s \ __semwait_signal.s \ + __sysenter_trap.s \ _setjmp.s \ _setlogin.s \ _sysctl.s \ @@ -38,6 +39,7 @@ MDSRCS+= ATPgetreq.s \ chflags.s \ chmod.s \ chown.s \ + commpage.c \ chroot.s \ close.s \ connect.s \ @@ -92,6 +94,9 @@ MDSRCS+= ATPgetreq.s \ getsockopt.s \ getuid.s \ getxattr.s \ + i386_gettimeofday.s \ + i386_get_ldt.s \ + i386_set_ldt.s \ ioctl.s \ issetugid.s \ kevent.s \ diff --git a/i386/sys/OSAtomic.s b/i386/sys/OSAtomic.s index f43b40b..c6dd151 100644 --- a/i386/sys/OSAtomic.s +++ b/i386/sys/OSAtomic.s @@ -117,6 +117,11 @@ DECLARE(_OSAtomicAdd64) DECLARE(_OSAtomicTestAndSet) movl 4(%esp), %eax movl 8(%esp), %edx + movl %eax, %ecx + andl $-8, %ecx + notl %eax + andl $7, %eax + orl %ecx, %eax call *_COMM_PAGE_BTS setc %al ret @@ -124,6 +129,11 @@ DECLARE(_OSAtomicTestAndSet) DECLARE(_OSAtomicTestAndClear) movl 4(%esp), %eax movl 8(%esp), %edx + movl %eax, %ecx + andl $-8, %ecx + notl %eax + andl $7, %eax + orl %ecx, %eax call *_COMM_PAGE_BTC setc %al ret diff --git a/i386/sys/SYS.h b/i386/sys/SYS.h index 1212eb0..60c70e8 100644 --- a/i386/sys/SYS.h +++ b/i386/sys/SYS.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -40,10 +40,13 @@ #include #include +/* + * We have two entry points. int's is used for syscalls which need to preserve + * %ecx across the call, or return a 64-bit value in %eax:%edx. sysenter is used + * for the majority of syscalls which just return a value in %eax. + */ -#define UNIX_SYSCALL_TRAP lcall $0x2b, $0 -#define MACHDEP_SYSCALL_TRAP lcall $0x7, $0 - +#define UNIX_SYSCALL_SYSENTER SYSENTER_PAD call __sysenter_trap /* * This is the same as UNIX_SYSCALL, but it can call an alternate error @@ -53,13 +56,22 @@ .globl error_ret ;\ LEAF(_##name, 0) ;\ movl $ SYS_##name, %eax ;\ - UNIX_SYSCALL_TRAP ;\ + UNIX_SYSCALL_SYSENTER ;\ jnb 2f ;\ BRANCH_EXTERN(error_ret) ;\ 2: #define UNIX_SYSCALL(name, nargs) \ .globl cerror ;\ +LEAF(_##name, 0) ;\ + movl $ SYS_##name, %eax ;\ + UNIX_SYSCALL_SYSENTER ;\ + jnb 2f ;\ + BRANCH_EXTERN(cerror) ;\ +2: + +#define UNIX_SYSCALL_INT(name, nargs) \ + .globl cerror ;\ LEAF(_##name, 0) ;\ movl $ SYS_##name, %eax ;\ UNIX_SYSCALL_TRAP ;\ @@ -68,6 +80,14 @@ LEAF(_##name, 0) ;\ 2: #define UNIX_SYSCALL_NONAME(name, nargs) \ + .globl cerror ;\ + movl $ SYS_##name, %eax ;\ + UNIX_SYSCALL_SYSENTER ;\ + jnb 2f ;\ + BRANCH_EXTERN(cerror) ;\ +2: + +#define UNIX_SYSCALL_INT_NONAME(name, nargs) \ .globl cerror ;\ movl $ SYS_##name, %eax ;\ UNIX_SYSCALL_TRAP ;\ diff --git a/i386/sys/__sysenter_trap.s b/i386/sys/__sysenter_trap.s new file mode 100644 index 0000000..53c854e --- /dev/null +++ b/i386/sys/__sysenter_trap.s @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +.text +.align 2,0x90 +.private_extern __sysenter_trap +__sysenter_trap: + popl %edx + movl %esp, %ecx + sysenter diff --git a/i386/sys/_setjmp.s b/i386/sys/_setjmp.s index 8513cad..0a09c63 100644 --- a/i386/sys/_setjmp.s +++ b/i386/sys/_setjmp.s @@ -61,6 +61,7 @@ #define JB_FS 64 #define JB_GS 68 +#define SAVE_SEG_REGS 1 LEAF(__setjmp, 0) movl 4(%esp), %ecx // jmp_buf (struct sigcontext *) @@ -122,7 +123,7 @@ LEAF(__longjmp, 0) #if SAVE_SEG_REGS // segment registers mov JB_SS(%ecx), %ss - mov JB_CS(%ecx), %cs + // mov JB_CS(%ecx), %cs // can't set cs? mov JB_DS(%ecx), %ds mov JB_ES(%ecx), %es mov JB_FS(%ecx), %fs diff --git a/i386/sys/cerror.s b/i386/sys/cerror.s index dee3c4a..b071f0a 100644 --- a/i386/sys/cerror.s +++ b/i386/sys/cerror.s @@ -33,9 +33,13 @@ LABEL(cerror_cvt) movl $45, %eax /* Yes; make ENOTSUP for compatibility */ LABEL(cerror) REG_TO_EXTERN(%eax, _errno) - pushl %eax + mov %esp,%edx + andl $0xfffffff0,%esp + subl $16,%esp + movl %edx,4(%esp) + movl %eax,(%esp) CALL_EXTERN(_cthread_set_errno_self) - addl $4,%esp + movl 4(%esp),%esp movl $-1,%eax movl $-1,%edx /* in case a 64-bit value is returned */ ret diff --git a/i386/sys/commpage.c b/i386/sys/commpage.c new file mode 100644 index 0000000..fa1d6fe --- /dev/null +++ b/i386/sys/commpage.c @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + diff --git a/i386/sys/fork.s b/i386/sys/fork.s index ca4506e..8dc35d2 100644 --- a/i386/sys/fork.s +++ b/i386/sys/fork.s @@ -25,7 +25,8 @@ */ #include "SYS.h" -LEAF(_fork, 0) +LEAF(_fork, 0) + subl $28, %esp // Align the stack, with 16 bytes of extra padding that we'll need CALL_EXTERN(__cthread_fork_prepare) #if defined(__DYNAMIC__) // Just like __cthread_fork_prepare we need to prevent threads on the child's @@ -36,22 +37,20 @@ LEAF(_fork, 0) LC1: .ascii "__dyld_fork_prepare\0" .text - subl $4,%esp // allocate space for the address parameter - leal 0(%esp),%eax // get the address of the allocated space - pushl %eax // push the address of the allocated space + // Put a pointer to 8(%esp) in 4(%esp) for _dyld_func_lookup to fill in. + leal 0x8(%esp),%eax // get the address where we're going to store the pointer + movl %eax, 0x4(%esp) // copy the address of the pointer call 1f 1: popl %eax leal LC1-1b(%eax),%eax - pushl %eax // push the name of the function to look up + movl %eax, 0x0(%esp) // copy the name of the function to look up call __dyld_func_lookup - addl $8,%esp // remove parameters to __dyld_func_lookup - movl 0(%esp),%eax // move the value returned in address parameter - addl $4,%esp // deallocate the space for the address param + movl 0x8(%esp),%eax // move the value returned in address parameter call *%eax // call __dyld_fork_prepare indirectly #endif movl $ SYS_fork,%eax; // code for fork -> eax - UNIX_SYSCALL_TRAP; // do the system call + UNIX_SYSCALL_TRAP // do the system call jnc L1 // jump if CF==0 #if defined(__DYNAMIC__) @@ -63,24 +62,22 @@ LC1: LC2: .ascii "__dyld_fork_parent\0" .text - pushl %eax // save the return value (errno) - subl $4,%esp // allocate space for the address parameter - leal 0(%esp),%eax // get the address of the allocated space - pushl %eax // push the address of the allocated space + movl %eax, 0xc(%esp) // save the return value (errno) + leal 0x8(%esp),%eax // get the address where we're going to store the pointer + movl %eax, 0x4(%esp) // copy the address of the pointer call 1f 1: popl %eax leal LC2-1b(%eax),%eax - pushl %eax // push the name of the function to look up + movl %eax, 0x0(%esp) // copy the name of the function to look up call __dyld_func_lookup - addl $8,%esp // remove parameters to __dyld_func_lookup - movl 0(%esp),%eax // move the value returned in address parameter - addl $4,%esp // deallocate the space for the address param + movl 0x8(%esp),%eax // move the value returned in address parameter call *%eax // call __dyld_fork_parent indirectly - popl %eax // restore the return value (errno) + movl 0xc(%esp), %eax // restore the return value (errno) #endif CALL_EXTERN(cerror) CALL_EXTERN(__cthread_fork_parent) movl $-1,%eax + addl $28, %esp // restore the stack ret L1: @@ -91,7 +88,7 @@ L1: #if defined(__DYNAMIC__) // Here on the child side of the fork we need to tell the dynamic linker that // we have forked. To do this we call __dyld_fork_child in the dyanmic -// linker. But since we can't dynamicly bind anything until this is done we +// linker. But since we can't dynamically bind anything until this is done we // do this by using the private extern __dyld_func_lookup() function to get the // address of __dyld_fork_child (the 'C' code equivlent): // @@ -103,17 +100,14 @@ LC0: .ascii "__dyld_fork_child\0" .text - subl $4,%esp // allocate space for the address parameter - leal 0(%esp),%eax // get the address of the allocated space - pushl %eax // push the address of the allocated space + leal 0x8(%esp),%eax // get the address where we're going to store the pointer + movl %eax, 0x4(%esp) // copy the address of the pointer call 1f 1: popl %eax leal LC0-1b(%eax),%eax - pushl %eax // push the name of the function to look up + movl %eax, 0x0(%esp) // copy the name of the function to look up call __dyld_func_lookup - addl $8,%esp // remove parameters to __dyld_func_lookup - movl 0(%esp),%eax // move the value returned in address parameter - addl $4,%esp // deallocate the space for the address param + movl 0x8(%esp),%eax // move the value returned in address parameter call *%eax // call __dyld_fork_child indirectly #endif xorl %eax, %eax @@ -125,42 +119,38 @@ LC10: .ascii "__dyld_fork_child_final\0" .text - subl $4,%esp // allocate space for the address parameter - leal 0(%esp),%eax // get the address of the allocated space - pushl %eax // push the address of the allocated space + leal 0x8(%esp),%eax // get the address where we're going to store the pointer + movl %eax, 0x4(%esp) // copy the address of the pointer call 1f 1: popl %eax leal LC10-1b(%eax),%eax - pushl %eax // push the name of the function to look up + movl %eax, 0x0(%esp) // copy the name of the function to look up call __dyld_func_lookup - addl $8,%esp // remove parameters to __dyld_func_lookup - movl 0(%esp),%eax // move the value returned in address parameter - addl $4,%esp // deallocate the space for the address param + movl 0x8(%esp),%eax // move the value returned in address parameter call *%eax // call __dyld_fork_child_final indirectly #endif xorl %eax,%eax // zero eax + addl $28, %esp // restore the stack ret //parent here... L2: - push %eax // save pid + movl %eax, 0xc(%esp) // save pid #if defined(__DYNAMIC__) // __dyld_fork_parent() is called by the parent process after a fork syscall. // This releases the dyld lock acquired by __dyld_fork_prepare(). - subl $4,%esp // allocate space for the address parameter - leal 0(%esp),%eax // get the address of the allocated space - pushl %eax // push the address of the allocated space + leal 0x8(%esp),%eax // get the address where we're going to store the pointer + movl %eax, 0x4(%esp) // copy the address of the allocated space call 1f 1: popl %eax leal LC2-1b(%eax),%eax - pushl %eax // push the name of the function to look up + movl %eax, 0x0(%esp) // copy the name of the function to look up call __dyld_func_lookup - addl $8,%esp // remove parameters to __dyld_func_lookup - movl 0(%esp),%eax // move the value returned in address parameter - addl $4,%esp // deallocate the space for the address param + movl 0x8(%esp),%eax // move the value returned in address parameter call *%eax // call __dyld_fork_parent indirectly #endif CALL_EXTERN_AGAIN(__cthread_fork_parent) - pop %eax + movl 0xc(%esp), %eax // return pid + addl $28, %esp // restore the stack ret diff --git a/i386/sys/getpid.s b/i386/sys/getpid.s index 4a4e4d1..d08bd95 100644 --- a/i386/sys/getpid.s +++ b/i386/sys/getpid.s @@ -59,6 +59,7 @@ LEAF(_getpid, 0) UNIX_SYSCALL_NONAME(getpid, 0) movl %eax, %edx xorl %eax, %eax + GET_CURRENT_PID lock cmpxchgl %edx, __current_pid movl %edx, %eax diff --git a/i386/sys/i386_get_ldt.s b/i386/sys/i386_get_ldt.s new file mode 100644 index 0000000..4a6611f --- /dev/null +++ b/i386/sys/i386_get_ldt.s @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include + +.text +.globl cerror +LEAF(_i386_get_ldt, 0) + movl $6,%eax + MACHDEP_SYSCALL_TRAP + jnb 2f + BRANCH_EXTERN(cerror) +2: ret diff --git a/i386/sys/i386_gettimeofday.s b/i386/sys/i386_gettimeofday.s new file mode 100644 index 0000000..5de6ca7 --- /dev/null +++ b/i386/sys/i386_gettimeofday.s @@ -0,0 +1,44 @@ +/* + * Copyright (c) 1999-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* Copyright 1998 Apple Computer, Inc. */ + +#include "SYS.h" + +#define __APPLE_API_PRIVATE +#include +#undef __APPLE_API_PRIVATE + +LABEL(___commpage_gettimeofday) + mov $ _COMM_PAGE_GETTIMEOFDAY,%eax + jmp %eax + +/* + * This syscall is special cased: the timeval is returned in eax/edx. + */ +LABEL(___gettimeofday) + UNIX_SYSCALL_INT_NONAME(gettimeofday,0) + mov 4(%esp),%ecx + mov %eax,(%ecx) + mov %edx,4(%ecx) + xor %eax,%eax + ret diff --git a/i386/sys/i386_set_ldt.s b/i386/sys/i386_set_ldt.s new file mode 100644 index 0000000..3eeb2a5 --- /dev/null +++ b/i386/sys/i386_set_ldt.s @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include + +.text +.globl cerror +LEAF(_i386_set_ldt, 0) + movl $5,%eax + MACHDEP_SYSCALL_TRAP + jnb 2f + BRANCH_EXTERN(cerror) +2: ret diff --git a/i386/sys/lseek.s b/i386/sys/lseek.s index dfa0b22..8dfc808 100644 --- a/i386/sys/lseek.s +++ b/i386/sys/lseek.s @@ -25,5 +25,5 @@ */ #include "SYS.h" -UNIX_SYSCALL(lseek, 3) +UNIX_SYSCALL_INT(lseek, 3) ret diff --git a/i386/sys/pipe.s b/i386/sys/pipe.s index bcb5883..e681240 100644 --- a/i386/sys/pipe.s +++ b/i386/sys/pipe.s @@ -25,7 +25,7 @@ */ #include "SYS.h" -UNIX_SYSCALL(pipe, 0) +UNIX_SYSCALL_INT(pipe, 0) movl 4(%esp),%ecx movl %eax,(%ecx) movl %edx,4(%ecx) diff --git a/i386/sys/setjmp.s b/i386/sys/setjmp.s index 2727458..ac7b28b 100644 --- a/i386/sys/setjmp.s +++ b/i386/sys/setjmp.s @@ -71,85 +71,37 @@ LEAF(_sigsetjmp, 0) movl %ecx, JB_SAVEMASK(%eax) // jmpbuf[_JBLEN] = savemask; cmpl $0, %ecx // if savemask != 0 jne _setjmp // setjmp(jmpbuf); - BRANCH_EXTERN(__setjmp) // else - // _setjmp(jmpbuf); + jmp L_do__setjmp // else _setjmp(jmpbuf); LEAF(_setjmp, 0) + subl $4, %esp // make space for return from sigprocmask + pushl %esp // oset + pushl $0 // set = NULL + pushl $1 // how = SIG_BLOCK + CALL_EXTERN(_sigprocmask) + movl 12(%esp),%eax // save the mask + addl $16, %esp // restore original esp movl 4(%esp), %ecx // jmp_buf (struct sigcontext *) - pushl %ecx // save ecx - - // call sigstack to get the current signal stack - subl $12, %esp // space for return structure - pushl %esp - pushl $0 - CALL_EXTERN(_sigaltstack) - movl 12(%esp), %eax // save stack pointer - movl %eax, JB_ONSTACK(%ecx) - addl $20, %esp - - // call sigblock to get signal mask - pushl $0 - CALL_EXTERN(_sigblock) - addl $4, %esp - popl %ecx // restore ecx movl %eax, JB_MASK(%ecx) - - // now build sigcontext - movl %ebx, JB_EBX(%ecx) - movl %edi, JB_EDI(%ecx) - movl %esi, JB_ESI(%ecx) - movl %ebp, JB_EBP(%ecx) - - // EIP is set to the frame return address value - movl (%esp), %eax - movl %eax, JB_EIP(%ecx) - // ESP is set to the frame return address plus 4 - movl %esp, %eax - addl $4, %eax - movl %eax, JB_ESP(%ecx) - - // segment registers - movl $0, JB_SS(%ecx) - mov %ss, JB_SS(%ecx) - movl $0, JB_CS(%ecx) - mov %cs, JB_CS(%ecx) - movl $0, JB_DS(%ecx) - mov %ds, JB_DS(%ecx) - movl $0, JB_ES(%ecx) - mov %es, JB_ES(%ecx) - movl $0, JB_FS(%ecx) - mov %fs, JB_FS(%ecx) - movl $0, JB_GS(%ecx) - mov %gs, JB_GS(%ecx) - - // save eflags - you can't use movl - pushf - popl %eax - movl %eax, JB_EFLAGS(%ecx) - - // return 0 - xorl %eax, %eax - ret +L_do__setjmp: + BRANCH_EXTERN(__setjmp) LEAF(_siglongjmp, 0) movl 4(%esp), %eax // sigjmp_buf * jmpbuf; cmpl $0, JB_SAVEMASK(%eax) // if jmpbuf[_JBLEN] != 0 jne _longjmp // longjmp(jmpbuf, var); - BRANCH_EXTERN(__longjmp) // else - // _longjmp(jmpbuf, var); + jmp L_do__longjmp // else _longjmp(jmpbuf, var); LEAF(_longjmp, 0) - subl $2,%esp - fnstcw (%esp) // save FP control word - fninit // reset FP coprocessor - fldcw (%esp) // restore FP control word - addl $2,%esp - movl 4(%esp), %eax // address of jmp_buf (saved context) - movl 8(%esp), %edx // return value - movl %edx, JB_EAX(%eax) // return value into saved context - movl $ SYS_sigreturn, %eax // sigreturn system call - UNIX_SYSCALL_TRAP - addl $8, %esp - CALL_EXTERN(_longjmperror) - CALL_EXTERN(_abort) + movl 4(%esp), %ecx // address of jmp_buf (saved context) + movl JB_MASK(%ecx),%eax // get the mask + pushl %eax // store the mask + movl %esp, %edx // save the address where we stored the mask + pushl $0 // oset = NULL + pushl %edx // set + pushl $3 // how = SIG_SETMASK + CALL_EXTERN_AGAIN(_sigprocmask) + addl $16, %esp // restore original esp +L_do__longjmp: + BRANCH_EXTERN(__longjmp) // else END(_longjmp) diff --git a/i386/sys/sigaltstack.s b/i386/sys/sigaltstack.s index baa6979..724180a 100644 --- a/i386/sys/sigaltstack.s +++ b/i386/sys/sigaltstack.s @@ -25,5 +25,5 @@ */ #include "SYS.h" -UNIX_SYSCALL(sigaltstack, 3) +UNIX_SYSCALL_INT(sigaltstack, 3) ret diff --git a/i386/sys/sigreturn.s b/i386/sys/sigreturn.s index 9453116..76560ee 100644 --- a/i386/sys/sigreturn.s +++ b/i386/sys/sigreturn.s @@ -25,5 +25,5 @@ */ #include "SYS.h" -UNIX_SYSCALL(sigreturn, 1) +UNIX_SYSCALL_INT(sigreturn, 2) ret diff --git a/include/asl.h b/include/asl.h index 3eb751d..688132e 100644 --- a/include/asl.h +++ b/include/asl.h @@ -213,8 +213,8 @@ const char *asl_get(aslmsg msg, const char *key); * format: A formating string followed by a list of arguments, like printf() * returns 0 for success, non-zero for failure */ -#ifdef __DARWIN_LDBL_COMPAT -int asl_log(aslclient asl, aslmsg msg, int level, const char *format, ...) __DARWIN_LDBL_COMPAT(asl_log); +#ifdef __DARWIN_LDBL_COMPAT2 +int asl_log(aslclient asl, aslmsg msg, int level, const char *format, ...) __DARWIN_LDBL_COMPAT2(asl_log); #else int asl_log(aslclient asl, aslmsg msg, int level, const char *format, ...); #endif @@ -229,8 +229,8 @@ int asl_log(aslclient asl, aslmsg msg, int level, const char *format, ...); * format: A formating string followed by a list of arguments, like vprintf() * returns 0 for success, non-zero for failure */ -#ifdef __DARWIN_LDBL_COMPAT -int asl_vlog(aslclient asl, aslmsg msg, int level, const char *format, va_list ap) __DARWIN_LDBL_COMPAT(asl_vlog); +#ifdef __DARWIN_LDBL_COMPAT2 +int asl_vlog(aslclient asl, aslmsg msg, int level, const char *format, va_list ap) __DARWIN_LDBL_COMPAT2(asl_vlog); #else int asl_vlog(aslclient asl, aslmsg msg, int level, const char *format, va_list ap); #endif diff --git a/include/assert.h b/include/assert.h index 804cbbc..a9c9b55 100644 --- a/include/assert.h +++ b/include/assert.h @@ -68,7 +68,7 @@ __END_DECLS #define assert(e) \ ((void) ((e) ? 0 : __assert (#e, __FILE__, __LINE__))) #define __assert(e, file, line) \ - (printf ("%s:%u: failed assertion `%s'\n", file, line, e), abort (), 0) + ((void)printf ("%s:%u: failed assertion `%s'\n", file, line, e), abort(), 0) #else /* __GNUC__ */ diff --git a/include/sys/acl.h b/include/sys/acl.h index 55e82d0..15ef22b 100644 --- a/include/sys/acl.h +++ b/include/sys/acl.h @@ -161,10 +161,12 @@ extern int acl_set_link(const char *path_p, acl_type_t type, acl_t acl); /* 23.1.6.4 ACL Format translation */ extern ssize_t acl_copy_ext(void *buf_p, acl_t acl, ssize_t size); +extern ssize_t acl_copy_ext_native(void *buf_p, acl_t acl, ssize_t size); extern acl_t acl_copy_int(const void *buf_p); +extern acl_t acl_copy_int_native(const void *buf_p); extern acl_t acl_from_text(const char *buf_p); extern ssize_t acl_size(acl_t acl); extern char *acl_to_text(acl_t acl, ssize_t *len_p); __END_DECLS -#endif _SYS_ACL_H +#endif /* _SYS_ACL_H */ diff --git a/mach/panic.c b/mach/panic.c index b6e91a2..363739f 100644 --- a/mach/panic.c +++ b/mach/panic.c @@ -77,4 +77,7 @@ panic(const char *s, ...) #define RB_DEBUGGER 0x1000 /* enter debugger NOW */ (void) host_reboot(master_host_port, RB_DEBUGGER); + + /* 4279008 - don't return */ + abort(); } diff --git a/posix1e/acl_translate.c b/posix1e/acl_translate.c index 6ac2841..960a6d1 100644 --- a/posix1e/acl_translate.c +++ b/posix1e/acl_translate.c @@ -33,8 +33,19 @@ #include #include +#include + #include "aclvar.h" +/* + * NOTE: the copy_int/copy_ext functions are duplicated here, one version of each for + * each of native and portable endianity. A more elegant solution might be called for + * if the functions become much more complicated. + */ + +/* + * acl_t -> external representation, portable endianity + */ ssize_t acl_copy_ext(void *buf, acl_t acl, ssize_t size) { @@ -51,6 +62,42 @@ acl_copy_ext(void *buf, acl_t acl, ssize_t size) return(-1); } + /* export the header */ + ext->fsec_magic = OSSwapHostToBigInt32(KAUTH_FILESEC_MAGIC); + ext->fsec_entrycount = OSSwapHostToBigInt32(acl->a_entries); + ext->fsec_flags = OSSwapHostToBigInt32(acl->a_flags); + + /* copy ACEs */ + for (i = 0; i < acl->a_entries; i++) { + /* ACE contents are almost identical */ + ext->fsec_ace[i].ace_applicable = acl->a_ace[i].ae_applicable; + ext->fsec_ace[i].ace_flags = + OSSwapHostToBigInt32((acl->a_ace[i].ae_tag & KAUTH_ACE_KINDMASK) | (acl->a_ace[i].ae_flags & ~KAUTH_ACE_KINDMASK)); + ext->fsec_ace[i].ace_rights = OSSwapHostToBigInt32(acl->a_ace[i].ae_perms); + } + + return(reqsize); +} + +/* + * acl_t -> external representation, native system endianity + */ +ssize_t +acl_copy_ext_native(void *buf, acl_t acl, ssize_t size) +{ + struct kauth_filesec *ext = (struct kauth_filesec *)buf; + ssize_t reqsize; + int i; + + /* validate arguments, compute required size */ + reqsize = acl_size(acl); + if (reqsize < 0) + return(-1); + if (reqsize > size) { + errno = ERANGE; + return(-1); + } + /* export the header */ ext->fsec_magic = KAUTH_FILESEC_MAGIC; ext->fsec_entrycount = acl->a_entries; @@ -70,6 +117,11 @@ acl_copy_ext(void *buf, acl_t acl, ssize_t size) return(reqsize); } +/* + * external representation, portable system endianity -> acl_t + * + * Unlike acl_copy_ext, we can't mung the buffer as it doesn't belong to us. + */ acl_t acl_copy_int(const void *buf) { @@ -77,6 +129,38 @@ acl_copy_int(const void *buf) acl_t ap; int i; + if (ext->fsec_magic != OSSwapHostToBigInt32(KAUTH_FILESEC_MAGIC)) { + errno = EINVAL; + return(NULL); + } + + if ((ap = acl_init(OSSwapBigToHostInt32(ext->fsec_entrycount))) != NULL) { + /* copy useful header fields */ + ap->a_flags = OSSwapBigToHostInt32(ext->fsec_flags); + ap->a_entries = OSSwapBigToHostInt32(ext->fsec_entrycount); + /* copy ACEs */ + for (i = 0; i < ap->a_entries; i++) { + /* ACE contents are literally identical */ + ap->a_ace[i].ae_magic = _ACL_ENTRY_MAGIC; + ap->a_ace[i].ae_applicable = ext->fsec_ace[i].ace_applicable; + ap->a_ace[i].ae_flags = OSSwapBigToHostInt32(ext->fsec_ace[i].ace_flags) & ~KAUTH_ACE_KINDMASK; + ap->a_ace[i].ae_tag = OSSwapBigToHostInt32(ext->fsec_ace[i].ace_flags) & KAUTH_ACE_KINDMASK; + ap->a_ace[i].ae_perms = OSSwapBigToHostInt32(ext->fsec_ace[i].ace_rights); + } + } + return(ap); +} + +/* + * external representation, native system endianity -> acl_t + */ +acl_t +acl_copy_int_native(const void *buf) +{ + struct kauth_filesec *ext = (struct kauth_filesec *)buf; + acl_t ap; + int i; + if (ext->fsec_magic != KAUTH_FILESEC_MAGIC) { errno = EINVAL; return(NULL); @@ -89,9 +173,6 @@ acl_copy_int(const void *buf) /* copy ACEs */ for (i = 0; i < ap->a_entries; i++) { /* ACE contents are literally identical */ -/* XXX Consider writing the magic out to the persistent store - * to detect corruption - */ ap->a_ace[i].ae_magic = _ACL_ENTRY_MAGIC; ap->a_ace[i].ae_applicable = ext->fsec_ace[i].ace_applicable; ap->a_ace[i].ae_flags = ext->fsec_ace[i].ace_flags & ~KAUTH_ACE_KINDMASK; @@ -136,6 +217,7 @@ static struct { char *name; int type; } acl_flags[] = { + {ACL_ENTRY_INHERITED, "inherited", ACL_TYPE_FILE | ACL_TYPE_DIR}, {ACL_FLAG_DEFER_INHERIT, "defer_inherit", ACL_TYPE_ACL}, {ACL_ENTRY_FILE_INHERIT, "file_inherit", ACL_TYPE_DIR}, {ACL_ENTRY_DIRECTORY_INHERIT, "directory_inherit", ACL_TYPE_DIR}, @@ -161,16 +243,16 @@ raosnprintf(char **buf, size_t *size, ssize_t *offset, char *fmt, ...) va_start(ap, fmt); ret = vsnprintf(*buf + *offset, *size - *offset, fmt, ap); va_end(ap); - if (ret < *size) + if (ret < (*size - *offset)) { *offset += ret; return ret; } } - *buf = realloc(*buf, (*size *= 2)); + *buf = reallocf(*buf, (*size *= 2)); } while (*buf); - //warn("realloc failure"); + //warn("reallocf failure"); return 0; } @@ -198,7 +280,7 @@ uuid_to_name(uuid_t *uu, uid_t *id, int *isgid) errout: ; //warn("Unable to translate qualifier on ACL\n"); } } - return ""; + return strdup(""); } acl_t @@ -306,7 +388,9 @@ acl_from_text(const char *buf_p) need_tag = 0; } /* name */ - if ((field = strtok_r(NULL, ":", &last_field)) != NULL && need_tag) + if (*last_field == ':') // empty username field + last_field++; + else if ((field = strtok_r(NULL, ":", &last_field)) != NULL && need_tag) { switch(ug_tag) { @@ -330,7 +414,9 @@ acl_from_text(const char *buf_p) need_tag = 0; } /* uid */ - if ((field = strtok_r(NULL, ":", &last_field)) != NULL && need_tag) + if (*last_field == ':') // empty uid field + last_field++; + else if ((field = strtok_r(NULL, ":", &last_field)) != NULL && need_tag) { uid_t id; error = 0; @@ -404,29 +490,25 @@ acl_from_text(const char *buf_p) } } - if((field = strtok_r(NULL, ":", &last_field)) == NULL) - { - error = EINVAL; - goto exit; - } - - for (sub = strtok_r(field, ",", &last_sub); sub; - sub = strtok_r(NULL, ",", &last_sub)) - { - for (i = 0; acl_perms[i].name != NULL; i++) + if((field = strtok_r(NULL, ":", &last_field)) != NULL) { + for (sub = strtok_r(field, ",", &last_sub); sub; + sub = strtok_r(NULL, ",", &last_sub)) { - if (acl_perms[i].type & (ACL_TYPE_FILE | ACL_TYPE_DIR) - && !strcmp(acl_perms[i].name, sub)) + for (i = 0; acl_perms[i].name != NULL; i++) { - acl_add_perm(perms, acl_perms[i].perm); - break; + if (acl_perms[i].type & (ACL_TYPE_FILE | ACL_TYPE_DIR) + && !strcmp(acl_perms[i].name, sub)) + { + acl_add_perm(perms, acl_perms[i].perm); + break; + } + } + if (acl_perms[i].name == NULL) + { + /* couldn't find perm */ + error = EINVAL; + goto exit; } - } - if (acl_perms[i].name == NULL) - { - /* couldn't find perm */ - error = EINVAL; - goto exit; } } acl_set_tag_type(acl_entry, tag); @@ -455,16 +537,21 @@ acl_to_text(acl_t acl, ssize_t *len_p) char *str, uu_str[256]; int i, first; int isgid; - size_t bufsize = 1024; - char *buf = malloc(bufsize); + char *buf; + + if (!_ACL_VALID_ACL(acl)) { + errno = EINVAL; + return NULL; + } + buf = malloc(bufsize); if (len_p == NULL) len_p = alloca(sizeof(ssize_t)); *len_p = 0; - if(!raosnprintf(&buf, &bufsize, len_p, "!#acl %d", 1)) + if (!raosnprintf(&buf, &bufsize, len_p, "!#acl %d", 1)) return NULL; if (acl_get_flagset_np(acl, &flags) == 0) @@ -530,7 +617,7 @@ acl_to_text(acl_t acl, ssize_t *len_p) } } buf[(*len_p)++] = '\n'; - buf[(*len_p)++] = 0; + buf[(*len_p)] = 0; return buf; } diff --git a/ppc/sys/ppc_gettimeofday.s b/ppc/sys/ppc_gettimeofday.s index e1dab52..9d84417 100644 --- a/ppc/sys/ppc_gettimeofday.s +++ b/ppc/sys/ppc_gettimeofday.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -36,14 +36,11 @@ MI_ENTRY_POINT(___commpage_gettimeofday) * Note also that the "seconds" field of the timeval is a long, so * it's size is mode dependent. */ -MI_ENTRY_POINT(___ppc_gettimeofday) +MI_ENTRY_POINT(___gettimeofday) mr r12,r3 // save ptr to timeval SYSCALL_NONAME(gettimeofday,0) - mr. r12,r12 // was timeval ptr null? - beq 3f stg r3,0(r12) // "stw" in 32-bit mode, "std" in 64-bit mode stw r4,GPR_BYTES(r12) li r3,0 -3: blr diff --git a/pthreads/lock.s b/pthreads/lock.s index 8658051..6ff476f 100644 --- a/pthreads/lock.s +++ b/pthreads/lock.s @@ -95,9 +95,7 @@ END(__spin_unlock) TEXT ALIGN -.globl _spin_lock_try LEAF(__spin_lock_try, 0) -_spin_lock_try: movl $(_COMM_PAGE_SPINLOCK_TRY), %eax jmpl %eax diff --git a/pthreads/pthread_tsd.c b/pthreads/pthread_tsd.c index df1ee6f..91cdf3c 100644 --- a/pthreads/pthread_tsd.c +++ b/pthreads/pthread_tsd.c @@ -59,6 +59,12 @@ static struct void (*destructor)(void *); } _pthread_keys[_POSIX_THREAD_KEYS_MAX]; static pthread_lock_t tds_lock = LOCK_INITIALIZER; +/* + * Partition _pthread_keys in a lower part that dyld can use, and an upper + * part for libSystem. The libSystem part starts at __pthread_tsd_first = 4. + * dyld will set this value to 1. + */ +__private_extern__ int __pthread_tsd_first = 4; /* * Create a new key for thread specific data @@ -71,7 +77,7 @@ pthread_key_create(pthread_key_t *key, LOCK(tds_lock); res = ENOMEM; /* No 'free' keys */ /* The first slot is reserved for pthread_self() */ - for (i = 1; i < _POSIX_THREAD_KEYS_MAX; i++) + for (i = __pthread_tsd_first; i < _POSIX_THREAD_KEYS_MAX; i++) { if (_pthread_keys[i].created == FALSE) { @@ -95,7 +101,7 @@ pthread_key_delete(pthread_key_t key) int res; LOCK(tds_lock); /* The first slot is reserved for pthread_self() */ - if ((key > 0) && (key < _POSIX_THREAD_KEYS_MAX)) + if ((key >= __pthread_tsd_first) && (key < _POSIX_THREAD_KEYS_MAX)) { if (_pthread_keys[key].created) { @@ -137,7 +143,7 @@ pthread_setspecific(pthread_key_t key, int res; pthread_t self; /* The first slot is reserved for pthread_self() */ - if ((key > 0) && (key < _POSIX_THREAD_KEYS_MAX)) + if ((key >= __pthread_tsd_first) && (key < _POSIX_THREAD_KEYS_MAX)) { if (_pthread_keys[key].created) { @@ -166,7 +172,7 @@ _pthread_tsd_cleanup(pthread_t self) for (j = 0; j < PTHREAD_DESTRUCTOR_ITERATIONS; j++) { /* The first slot is reserved for pthread_self() */ - for (i = 1; i < _POSIX_THREAD_KEYS_MAX; i++) + for (i = __pthread_tsd_first; i < _POSIX_THREAD_KEYS_MAX; i++) { if (_pthread_keys[i].created && (param = self->tsd[i])) { diff --git a/stdio/FreeBSD/printf.3.patch b/stdio/FreeBSD/printf.3.patch index a8da314..ca5226e 100644 --- a/stdio/FreeBSD/printf.3.patch +++ b/stdio/FreeBSD/printf.3.patch @@ -1,5 +1,5 @@ ---- printf.3.orig Fri Mar 11 17:08:43 2005 -+++ printf.3 Fri Mar 11 17:04:50 2005 +--- printf.3.orig 2004-11-25 11:38:35.000000000 -0800 ++++ printf.3 2005-08-09 22:37:08.000000000 -0700 @@ -101,6 +101,12 @@ dynamically allocate a new string with .Xr malloc 3 . @@ -19,10 +19,10 @@ .It +An optional separator character ( +.Cm \ , | \; | \ : | _ -+) used for separating multiple values when printing an AltiVec vector, ++) used for separating multiple values when printing an AltiVec or SSE vector, +or other multi-value unit. +.Pp -+NOTE: This is an AltiVec only extension onto the ++NOTE: This is an extension to the +.Fn printf +specification. +Behaviour of these values for @@ -34,13 +34,13 @@ An optional decimal digit string specifying a minimum field width. If the converted value has fewer characters than the field width, it will be padded with spaces on the left (or right, if the left-adjustment -@@ -379,6 +399,28 @@ +@@ -379,6 +399,34 @@ .It Sy Modifier Ta Cm c Ta Cm s .It Cm l No (ell) Ta Vt wint_t Ta Vt "wchar_t *" .El +.Pp +The AltiVec Technology Programming Interface Manual also defines five additional length modifiers -+which can be used (in place of the conventional length modifiers) for the printing of AltiVec vectors: ++which can be used (in place of the conventional length modifiers) for the printing of AltiVec or SSE vectors: +.Bl -tag -compact +.It Cm v +Treat the argument as a vector value, unit length will be determined by the conversion @@ -52,7 +52,7 @@ +Treat the argument as a vector of 4 32-bit units. +.El +.Pp -+NOTE: The vector length specifiers are AltiVec only extensions onto the ++NOTE: The vector length specifiers are extensions to the +.Fn printf +specification. +Behaviour of these values for @@ -60,10 +60,16 @@ +is only defined for operating systems conforming to the +AltiVec Technology Programming Interface Manual. +(At time of writing this includes only Mac OS X 10.2 and later.) ++.Pp ++As a further extension, for SSE2 64-bit units: ++.Bl -tag -compact ++.It Cm vll, llv ++Treat the argument as a vector of 2 64-bit units. ++.El .It A character that specifies the type of conversion to be applied. .El -@@ -792,12 +834,8 @@ +@@ -792,12 +840,8 @@ .Xr fmtcheck 3 , .Xr scanf 3 , .Xr setlocale 3 , diff --git a/stdio/FreeBSD/vfprintf.c.patch b/stdio/FreeBSD/vfprintf.c.patch index 82cc3c6..88df771 100644 --- a/stdio/FreeBSD/vfprintf.c.patch +++ b/stdio/FreeBSD/vfprintf.c.patch @@ -1,5 +1,5 @@ --- vfprintf.c.orig 2004-11-25 11:38:35.000000000 -0800 -+++ vfprintf.c 2005-02-24 15:16:20.000000000 -0800 ++++ vfprintf.c 2005-11-08 22:43:11.000000000 -0800 @@ -40,6 +40,8 @@ #include __FBSDID("$FreeBSD: src/lib/libc/stdio/vfprintf.c,v 1.68 2004/08/26 06:25:28 des Exp $"); @@ -17,24 +17,25 @@ #include #include "un-namespace.h" -@@ -66,6 +69,12 @@ +@@ -66,6 +69,13 @@ #include "local.h" #include "fvwrite.h" -+#ifdef ALTIVEC -+#include -+ -+#define VECTORTYPE vector unsigned char -+#endif /* ALTIVEC */ ++#ifdef VECTORS ++typedef __attribute__ ((vector_size(16))) unsigned char VECTORTYPE; ++#ifdef __SSE2__ ++#define V64TYPE ++#endif /* __SSE2__ */ ++#endif /* VECTORS */ + union arg { int intarg; u_int uintarg; -@@ -93,6 +102,16 @@ +@@ -93,6 +103,21 @@ #endif wint_t wintarg; wchar_t *pwchararg; -+#ifdef ALTIVEC ++#ifdef VECTORS + VECTORTYPE vectorarg; + unsigned char vuchararg[16]; + signed char vchararg[16]; @@ -43,19 +44,24 @@ + unsigned int vuintarg[4]; + signed int vintarg[4]; + float vfloatarg[4]; -+#endif /* ALTIVEC */ ++#ifdef V64TYPE ++ double vdoublearg[2]; ++ unsigned long long vulonglongarg[2]; ++ long long vlonglongarg[2]; ++#endif /* V64TYPE */ ++#endif /* VECTORS */ }; /* -@@ -103,19 +122,56 @@ +@@ -103,16 +128,20 @@ T_LONG, T_U_LONG, TP_LONG, T_LLONG, T_U_LLONG, TP_LLONG, T_PTRDIFFT, TP_PTRDIFFT, T_SIZET, TP_SIZET, T_INTMAXT, T_UINTMAXT, TP_INTMAXT, TP_VOID, TP_CHAR, TP_SCHAR, -+#ifdef ALTIVEC ++#ifdef VECTORS + T_DOUBLE, T_LONG_DOUBLE, T_WINT, TP_WCHAR, T_VECTOR -+#else /* ! ALTIVEC */ ++#else /* ! VECTORS */ T_DOUBLE, T_LONG_DOUBLE, T_WINT, TP_WCHAR -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ }; static int __sprint(FILE *, struct __suio *); @@ -70,43 +76,7 @@ static void __find_arguments(const char *, va_list, union arg **); static void __grow_type_table(int, enum typeid **, int *); -+ /* -+ * Get the argument indexed by nextarg. If the argument table is -+ * built, use it to get the argument. If its not, get the next -+ * argument (and arguments must be gotten sequentially). -+ */ -+#define GETARG(type) \ -+ ((argtable != NULL) ? *((type*)(&argtable[nextarg++])) : \ -+ (nextarg++, va_arg(ap, type))) -+ -+#ifdef ALTIVEC -+#define hasAltivec (_cpu_capabilities & kHasAltivec) -+/*----------------------------------------------------------------------- -+ * getvec() must be a real subroutine. If it is a #define, then __vfprintf() -+ * would have its calling sequence changed by Altivec so that a non-Altivec -+ * processor would crash on illegal instruction. By isolating the calling -+ * sequence in getvec(), __vprintf() is callable by a non-Altivec processor. -+ *-----------------------------------------------------------------------*/ -+static va_list getvec(union arg *, const union arg *, int, va_list) __attribute__((noinline)); -+ -+static va_list -+getvec(union arg *dst, const union arg *argtable, int nextarg, va_list ap) -+{ -+ dst->vectorarg = GETARG(VECTORTYPE); -+ return ap; -+} -+ -+#define SETVEC(dst) \ -+{ \ -+ ap = getvec(&dst, argtable, nextarg, ap); \ -+ nextarg++; \ -+} -+#endif /* ALTIVEC */ -+ - /* - * Flush out all the vectors defined by the given uio, - * then reset it so that it can be reused. -@@ -141,7 +197,7 @@ +@@ -141,7 +170,7 @@ * worries about ungetc buffers and so forth. */ static int @@ -115,7 +85,7 @@ { int ret; FILE fake; -@@ -160,7 +216,7 @@ +@@ -160,7 +189,7 @@ fake._lbfsize = 0; /* not actually used, but Just In Case */ /* do the work, then copy any error status */ @@ -124,7 +94,7 @@ if (ret >= 0 && __fflush(&fake)) ret = EOF; if (fake._flags & __SERR) -@@ -336,7 +392,7 @@ +@@ -336,7 +365,7 @@ * that the wide char. string ends in a null character. */ static char * @@ -133,7 +103,7 @@ { static const mbstate_t initial; mbstate_t mbs; -@@ -354,7 +410,7 @@ +@@ -354,7 +383,7 @@ p = wcsarg; mbs = initial; for (;;) { @@ -142,7 +112,7 @@ if (clen == 0 || clen == (size_t)-1 || nbytes + clen > prec) break; -@@ -363,7 +419,7 @@ +@@ -363,7 +392,7 @@ } else { p = wcsarg; mbs = initial; @@ -151,7 +121,7 @@ if (nbytes == (size_t)-1) return (NULL); } -@@ -378,7 +434,7 @@ +@@ -378,7 +407,7 @@ p = wcsarg; mbs = initial; while (mbp - convbuf < nbytes) { @@ -160,7 +130,7 @@ if (clen == 0 || clen == (size_t)-1) break; mbp += clen; -@@ -402,7 +458,21 @@ +@@ -402,7 +431,21 @@ int ret; FLOCKFILE(fp); @@ -183,13 +153,13 @@ FUNLOCKFILE(fp); return (ret); } -@@ -451,12 +521,15 @@ +@@ -451,12 +494,15 @@ #define PTRDIFFT 0x800 /* ptrdiff_t */ #define INTMAXT 0x1000 /* intmax_t */ #define CHARINT 0x2000 /* print char using int format */ -+#ifdef ALTIVEC -+#define VECTOR 0x4000 /* Altivec vector */ -+#endif /* ALTIVEC */ ++#ifdef VECTORS ++#define VECTOR 0x4000 /* Altivec or SSE vector */ ++#endif /* VECTORS */ /* * Non-MT-safe version @@ -201,11 +171,11 @@ { char *fmt; /* format string */ int ch; /* character from fmt */ -@@ -502,6 +575,11 @@ +@@ -502,6 +548,11 @@ int nseps; /* number of group separators with ' */ int nrepeats; /* number of repeats of the last group */ #endif -+#ifdef ALTIVEC ++#ifdef VECTORS + union arg vval; /* Vector argument. */ + char *pct; /* Pointer to '%' at beginning of specifier. */ + char vsep; /* Vector separator character. */ @@ -213,23 +183,7 @@ u_long ulval; /* integer arguments %[diouxX] */ uintmax_t ujval; /* %j, %ll, %q, %t, %z integers */ int base; /* base for [diouxX] conversion */ -@@ -574,15 +652,6 @@ - } - - /* -- * Get the argument indexed by nextarg. If the argument table is -- * built, use it to get the argument. If its not, get the next -- * argument (and arguments must be gotten sequentially). -- */ --#define GETARG(type) \ -- ((argtable != NULL) ? *((type*)(&argtable[nextarg++])) : \ -- (nextarg++, va_arg(ap, type))) -- -- /* - * To extend shorts properly, we need both signed and unsigned - * argument extraction methods. - */ -@@ -633,22 +702,23 @@ +@@ -633,22 +684,23 @@ val = GETARG (int); \ } @@ -257,39 +211,39 @@ fmt = (char *)fmt0; argtable = NULL; -@@ -675,6 +745,9 @@ +@@ -675,6 +727,9 @@ } if (ch == '\0') goto done; -+#ifdef ALTIVEC ++#ifdef VECTORS + pct = fmt; -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ fmt++; /* skip over '%' */ flags = 0; -@@ -683,6 +756,9 @@ +@@ -683,6 +738,9 @@ prec = -1; sign = '\0'; ox[1] = '\0'; -+#ifdef ALTIVEC ++#ifdef VECTORS + vsep = 'X'; /* Illegal value, changed to defaults later. */ -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ rflag: ch = *fmt++; reswitch: switch (ch) { -@@ -698,6 +774,11 @@ +@@ -698,6 +756,11 @@ case '#': flags |= ALT; goto rflag; -+#ifdef ALTIVEC ++#ifdef VECTORS + case ',': case ';': case ':': case '_': + vsep = ch; + goto rflag; -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ case '*': /*- * ``A negative field width argument is taken as a -@@ -718,8 +799,8 @@ +@@ -718,8 +781,8 @@ goto rflag; case '\'': flags |= GROUPING; @@ -300,16 +254,14 @@ goto rflag; case '.': if ((ch = *fmt++) == '*') { -@@ -793,14 +874,20 @@ +@@ -793,14 +856,18 @@ flags |= LONGINT; /*FALLTHROUGH*/ case 'c': -+#ifdef ALTIVEC -+ if (flags & VECTOR) { -+ SETVEC(vval); ++#ifdef VECTORS ++ if (flags & VECTOR) + break; -+ } -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ if (flags & LONGINT) { static const mbstate_t initial; mbstate_t mbs; @@ -323,34 +275,31 @@ if (mbseqlen == (size_t)-1) { fp->_flags |= __SERR; goto error; -@@ -817,6 +904,12 @@ +@@ -817,6 +884,10 @@ /*FALLTHROUGH*/ case 'd': case 'i': -+#ifdef ALTIVEC -+ if (flags & VECTOR) { -+ SETVEC(vval); ++#ifdef VECTORS ++ if (flags & VECTOR) + break; -+ } -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ if (flags & INTMAX_SIZE) { ujval = SJARG(); if ((intmax_t)ujval < 0) { -@@ -835,6 +928,13 @@ +@@ -835,6 +906,12 @@ #ifndef NO_FLOATING_POINT case 'a': case 'A': -+#ifdef ALTIVEC ++#ifdef VECTORS + if (flags & VECTOR) { + flags |= FPT; -+ SETVEC(vval); + break; + } -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ if (ch == 'a') { ox[1] = 'x'; xdigs = xdigs_lower; -@@ -848,6 +948,12 @@ +@@ -848,6 +925,12 @@ prec++; if (dtoaresult != NULL) freedtoa(dtoaresult); @@ -363,7 +312,7 @@ if (flags & LONGDBL) { fparg.ldbl = GETARG(long double); dtoaresult = cp = -@@ -859,6 +965,7 @@ +@@ -859,6 +942,7 @@ __hdtoa(fparg.dbl, xdigs, prec, &expt, &signflag, &dtoaend); } @@ -371,46 +320,43 @@ if (prec < 0) prec = dtoaend - cp; if (expt == INT_MAX) -@@ -866,6 +973,13 @@ +@@ -866,6 +950,12 @@ goto fp_common; case 'e': case 'E': -+#ifdef ALTIVEC ++#ifdef VECTORS + if (flags & VECTOR) { + flags |= FPT; -+ SETVEC(vval); + break; + } -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ expchar = ch; if (prec < 0) /* account for digit before decpt */ prec = DEFPREC + 1; -@@ -874,10 +988,24 @@ +@@ -874,10 +964,22 @@ goto fp_begin; case 'f': case 'F': -+#ifdef ALTIVEC ++#ifdef VECTORS + if (flags & VECTOR) { + flags |= FPT; -+ SETVEC(vval); + break; + } -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ expchar = '\0'; goto fp_begin; case 'g': case 'G': -+#ifdef ALTIVEC ++#ifdef VECTORS + if (flags & VECTOR) { + flags |= FPT; -+ SETVEC(vval); + break; + } -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ expchar = ch - ('g' - 'e'); if (prec == 0) prec = 1; -@@ -886,6 +1014,14 @@ +@@ -886,6 +988,14 @@ prec = DEFPREC; if (dtoaresult != NULL) freedtoa(dtoaresult); @@ -425,7 +371,7 @@ if (flags & LONGDBL) { fparg.ldbl = GETARG(long double); dtoaresult = cp = -@@ -899,6 +1035,7 @@ +@@ -899,6 +1009,7 @@ if (expt == 9999) expt = INT_MAX; } @@ -433,33 +379,29 @@ fp_common: if (signflag) sign = '-'; -@@ -993,6 +1130,12 @@ +@@ -993,6 +1104,10 @@ flags |= LONGINT; /*FALLTHROUGH*/ case 'o': -+#ifdef ALTIVEC -+ if (flags & VECTOR) { -+ SETVEC(vval); ++#ifdef VECTORS ++ if (flags & VECTOR) + break; -+ } -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ if (flags & INTMAX_SIZE) ujval = UJARG(); else -@@ -1007,6 +1150,12 @@ +@@ -1007,6 +1122,10 @@ * defined manner.'' * -- ANSI X3J11 */ -+#ifdef ALTIVEC -+ if (flags & VECTOR) { -+ SETVEC(vval); ++#ifdef VECTORS ++ if (flags & VECTOR) + break; -+ } -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ ujval = (uintmax_t)(uintptr_t)GETARG(void *); base = 16; xdigs = xdigs_lower; -@@ -1025,7 +1174,7 @@ +@@ -1025,7 +1144,7 @@ if ((wcp = GETARG(wchar_t *)) == NULL) cp = "(null)"; else { @@ -468,52 +410,45 @@ if (convbuf == NULL) { fp->_flags |= __SERR; goto error; -@@ -1056,6 +1205,12 @@ +@@ -1056,6 +1175,10 @@ flags |= LONGINT; /*FALLTHROUGH*/ case 'u': -+#ifdef ALTIVEC -+ if (flags & VECTOR) { -+ SETVEC(vval); ++#ifdef VECTORS ++ if (flags & VECTOR) + break; -+ } -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ if (flags & INTMAX_SIZE) ujval = UJARG(); else -@@ -1068,6 +1223,12 @@ +@@ -1068,6 +1191,10 @@ case 'x': xdigs = xdigs_lower; hex: -+#ifdef ALTIVEC -+ if (flags & VECTOR) { -+ SETVEC(vval); ++#ifdef VECTORS ++ if (flags & VECTOR) + break; -+ } -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ if (flags & INTMAX_SIZE) ujval = UJARG(); else -@@ -1112,6 +1273,14 @@ +@@ -1112,6 +1239,11 @@ if (size > BUF) /* should never happen */ abort(); break; -+#ifdef ALTIVEC ++#ifdef VECTORS + case 'v': -+ if (hasAltivec) { -+ flags |= VECTOR; -+ goto rflag; -+ } -+ /* drap through */ -+#endif /* ALTIVEC */ ++ flags |= VECTOR; ++ goto rflag; ++#endif /* VECTORS */ default: /* "%?" prints ?, unless ? is NUL */ if (ch == '\0') goto done; -@@ -1123,6 +1292,184 @@ +@@ -1123,6 +1255,290 @@ break; } -+#ifdef ALTIVEC ++#ifdef VECTORS + if (flags & VECTOR) { + /* + * Do the minimum amount of work necessary to construct @@ -523,24 +458,32 @@ + int i, j; /* Counter. */ + int vcnt; /* Number of elements in vector. */ + char *vfmt; /* Pointer to format specifier. */ -+ char vfmt_buf[32]; /* Static buffer for format spec. */ ++#define EXTRAHH 2 ++ char vfmt_buf[32 + EXTRAHH]; /* Static buffer for format spec. */ + int vwidth = 0; /* Width specified via '*'. */ + int vprec = 0; /* Precision specified via '*'. */ -+ union { /* Element. */ -+ int i; -+ float f; -+ } velm; + char *vstr; /* Used for asprintf(). */ + int vlen; /* Length returned by asprintf(). */ ++ enum { ++ V_CHAR, V_SHORT, V_INT, ++ V_PCHAR, V_PSHORT, V_PINT, ++ V_FLOAT, ++#ifdef V64TYPE ++ V_LONGLONG, V_PLONGLONG, ++ V_DOUBLE, ++#endif /* V64TYPE */ ++ } vtype; + ++ vval.vectorarg = GETARG(VECTORTYPE); + /* + * Set vfmt. If vfmt_buf may not be big enough, + * malloc() space, taking care to free it later. ++ * (EXTRAHH is for possible extra "hh") + */ -+ if (&fmt[-1] - pct < sizeof(vfmt_buf)) ++ if (&fmt[-1] - pct + EXTRAHH < sizeof(vfmt_buf)) + vfmt = vfmt_buf; + else -+ vfmt = (char *)malloc(&fmt[-1] - pct + 1); ++ vfmt = (char *)malloc(&fmt[-1] - pct + EXTRAHH + 1); + + /* Set the separator character, if not specified. */ + if (vsep == 'X') { @@ -573,13 +516,57 @@ + * finish up the format specifier. + */ + if (flags & SHORTINT) { -+ if (ch != 'c') ++ switch (ch) { ++ case 'c': ++ vtype = V_SHORT; ++ break; ++ case 'p': ++ vtype = V_PSHORT; ++ break; ++ default: + vfmt[j++] = 'h'; ++ vtype = V_SHORT; ++ break; ++ } + vcnt = 8; + } else if (flags & LONGINT) { -+ if (ch != 'c') -+ vfmt[j++] = 'l'; + vcnt = 4; ++ vtype = (ch == 'p') ? V_PINT : V_INT; ++#ifdef V64TYPE ++ } else if (flags & LLONGINT) { ++ switch (ch) { ++ case 'a': ++ case 'A': ++ case 'e': ++ case 'E': ++ case 'f': ++ case 'g': ++ case 'G': ++ vcnt = 2; ++ vtype = V_DOUBLE; ++ break; ++ case 'd': ++ case 'i': ++ case 'u': ++ case 'o': ++ case 'p': ++ case 'x': ++ case 'X': ++ vfmt[j++] = 'l'; ++ vfmt[j++] = 'l'; ++ vcnt = 2; ++ vtype = (ch == 'p') ? V_PLONGLONG : V_LONGLONG; ++ break; ++ default: ++ /* ++ * The default case should never ++ * happen. ++ */ ++ case 'c': ++ vcnt = 16; ++ vtype = V_CHAR; ++ } ++#endif /* V64TYPE */ + } else { + switch (ch) { + case 'a': @@ -590,96 +577,150 @@ + case 'g': + case 'G': + vcnt = 4; ++ vtype = V_FLOAT; + break; + default: + /* + * The default case should never + * happen. + */ -+ case 'c': + case 'd': + case 'i': + case 'u': + case 'o': -+ case 'p': + case 'x': + case 'X': ++ vfmt[j++] = 'h'; ++ vfmt[j++] = 'h'; ++ /* drop through */ ++ case 'p': ++ case 'c': + vcnt = 16; ++ vtype = (ch == 'p') ? V_PCHAR : V_CHAR; + } + } + vfmt[j++] = ch; + vfmt[j++] = '\0'; + +/* Get a vector element. */ -+#define VPRINT(cnt, ind, args...) do { \ -+ if (flags & FPT) { \ -+ velm.f = vval.vfloatarg[ind]; \ -+ vlen = asprintf_l(&vstr, loc, vfmt , ## args, velm.f); \ -+ } else { \ -+ switch (cnt) { \ -+ default: \ -+ /* The default case should never happen. */ \ -+ case 4: \ -+ velm.i = (unsigned)vval.vintarg[ind]; \ -+ break; \ -+ case 8: \ -+ velm.i = (unsigned short)vval.vshortarg[ind]; \ -+ break; \ -+ case 16: \ -+ velm.i = (unsigned char)vval.vchararg[ind]; \ -+ break; \ -+ } \ -+ vlen = asprintf_l(&vstr, loc, vfmt , ## args, velm.i); \ ++#ifdef V64TYPE ++#define VPRINT(type, ind, args...) do { \ ++ switch (type) { \ ++ case V_CHAR: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vuchararg[ind]); \ ++ break; \ ++ case V_PCHAR: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vuchararg[ind]); \ ++ break; \ ++ case V_SHORT: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vushortarg[ind]); \ ++ break; \ ++ case V_PSHORT: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vushortarg[ind]); \ ++ break; \ ++ case V_INT: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vuintarg[ind]); \ ++ break; \ ++ case V_PINT: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vuintarg[ind]); \ ++ break; \ ++ case V_LONGLONG: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vulonglongarg[ind]); \ ++ break; \ ++ case V_PLONGLONG: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vulonglongarg[ind]); \ ++ break; \ ++ case V_FLOAT: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vfloatarg[ind]); \ ++ break; \ ++ case V_DOUBLE: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vdoublearg[ind]); \ ++ break; \ ++ } \ ++ ret += vlen; \ ++ PRINT(vstr, vlen); \ ++ FLUSH(); \ ++ free(vstr); \ ++} while (0) ++#else /* !V64TYPE */ ++#define VPRINT(type, ind, args...) do { \ ++ switch (type) { \ ++ case V_CHAR: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vuchararg[ind]); \ ++ break; \ ++ case V_PCHAR: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vuchararg[ind]); \ ++ break; \ ++ case V_SHORT: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vushortarg[ind]); \ ++ break; \ ++ case V_PSHORT: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vushortarg[ind]); \ ++ break; \ ++ case V_INT: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vuintarg[ind]); \ ++ break; \ ++ case V_PINT: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vuintarg[ind]); \ ++ break; \ ++ case V_FLOAT: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vfloatarg[ind]); \ ++ break; \ + } \ + ret += vlen; \ + PRINT(vstr, vlen); \ + FLUSH(); \ + free(vstr); \ +} while (0) ++#endif /* V64TYPE */ + + /* Actually print. */ + if (vwidth == 0) { + if (vprec == 0) { + /* First element. */ -+ VPRINT(vcnt, 0); ++ VPRINT(vtype, 0); + for (i = 1; i < vcnt; i++) { + /* Separator. */ -+ PRINT(&vsep, 1); ++ if(vsep) ++ PRINT(&vsep, 1); + + /* Element. */ -+ VPRINT(vcnt, i); ++ VPRINT(vtype, i); + } + } else { + /* First element. */ -+ VPRINT(vcnt, 0, prec); ++ VPRINT(vtype, 0, prec); + for (i = 1; i < vcnt; i++) { + /* Separator. */ -+ PRINT(&vsep, 1); ++ if(vsep) ++ PRINT(&vsep, 1); + + /* Element. */ -+ VPRINT(vcnt, i, prec); ++ VPRINT(vtype, i, prec); + } + } + } else { + if (vprec == 0) { + /* First element. */ -+ VPRINT(vcnt, 0, width); ++ VPRINT(vtype, 0, width); + for (i = 1; i < vcnt; i++) { + /* Separator. */ -+ PRINT(&vsep, 1); ++ if(vsep) ++ PRINT(&vsep, 1); + + /* Element. */ -+ VPRINT(vcnt, i, width); ++ VPRINT(vtype, i, width); + } + } else { + /* First element. */ -+ VPRINT(vcnt, 0, width, prec); ++ VPRINT(vtype, 0, width, prec); + for (i = 1; i < vcnt; i++) { + /* Separator. */ -+ PRINT(&vsep, 1); ++ if(vsep) ++ PRINT(&vsep, 1); + + /* Element. */ -+ VPRINT(vcnt, i, width, prec); ++ VPRINT(vtype, i, width, prec); + } + } + } @@ -690,27 +731,27 @@ + + continue; + } -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ /* * All reasonable formats wind up here. At this point, `cp' * points to a string which (if not flags&LADJUST) should be -@@ -1406,6 +1753,11 @@ +@@ -1406,6 +1822,11 @@ if (flags & LONGINT) ADDTYPE(T_WINT); else -+#ifdef ALTIVEC ++#ifdef VECTORS + if (flags & VECTOR) + ADDTYPE(T_VECTOR); + else -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ ADDTYPE(T_INT); break; case 'D': -@@ -1413,6 +1765,11 @@ +@@ -1413,6 +1834,11 @@ /*FALLTHROUGH*/ case 'd': case 'i': -+#ifdef ALTIVEC ++#ifdef VECTORS + if (flags & VECTOR) + ADDTYPE(T_VECTOR); + else @@ -718,51 +759,51 @@ ADDSARG(); break; #ifndef NO_FLOATING_POINT -@@ -1423,6 +1780,11 @@ +@@ -1423,6 +1849,11 @@ case 'f': case 'g': case 'G': -+#ifdef ALTIVEC ++#ifdef VECTORS + if (flags & VECTOR) + ADDTYPE(T_VECTOR); + else -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ if (flags & LONGDBL) ADDTYPE(T_LONG_DOUBLE); else -@@ -1451,9 +1813,19 @@ +@@ -1451,9 +1882,19 @@ flags |= LONGINT; /*FALLTHROUGH*/ case 'o': -+#ifdef ALTIVEC ++#ifdef VECTORS + if (flags & VECTOR) + ADDTYPE(T_VECTOR); + else -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ ADDUARG(); break; case 'p': -+#ifdef ALTIVEC ++#ifdef VECTORS + if (flags & VECTOR) + ADDTYPE(T_VECTOR); + else -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ ADDTYPE(TP_VOID); break; case 'S': -@@ -1471,6 +1843,11 @@ +@@ -1471,6 +1912,11 @@ case 'u': case 'X': case 'x': -+#ifdef ALTIVEC ++#ifdef VECTORS + if (flags & VECTOR) + ADDTYPE(T_VECTOR); + else -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ ADDUARG(); break; default: /* "%?" prints ?, unless ? is NUL */ -@@ -1537,7 +1914,7 @@ +@@ -1537,7 +1983,7 @@ (*argtable) [n].sizearg = va_arg (ap, size_t); break; case TP_SIZET: @@ -771,16 +812,15 @@ break; case T_INTMAXT: (*argtable) [n].intmaxarg = va_arg (ap, intmax_t); -@@ -1556,6 +1933,12 @@ +@@ -1556,6 +2002,11 @@ (*argtable) [n].longdoublearg = va_arg (ap, long double); break; #endif -+#ifdef ALTIVEC ++#ifdef VECTORS + case T_VECTOR: -+ if (hasAltivec) -+ ap = getvec( &((*argtable) [n]), NULL, 0, ap ); ++ (*argtable) [n].vectorarg = va_arg (ap, VECTORTYPE); + break; -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ case TP_CHAR: (*argtable) [n].pchararg = va_arg (ap, char *); break; diff --git a/stdio/FreeBSD/vfwprintf.c.patch b/stdio/FreeBSD/vfwprintf.c.patch index 8327adc..a645ab8 100644 --- a/stdio/FreeBSD/vfwprintf.c.patch +++ b/stdio/FreeBSD/vfwprintf.c.patch @@ -1,5 +1,5 @@ --- vfwprintf.c.orig 2004-11-25 11:38:36.000000000 -0800 -+++ vfwprintf.c 2005-02-24 15:17:14.000000000 -0800 ++++ vfwprintf.c 2005-11-08 22:46:07.000000000 -0800 @@ -42,6 +42,8 @@ #include __FBSDID("$FreeBSD: src/lib/libc/stdio/vfwprintf.c,v 1.23 2004/08/26 06:25:28 des Exp $"); @@ -9,7 +9,7 @@ /* * Actual wprintf innards. * -@@ -63,12 +65,19 @@ +@@ -63,12 +65,20 @@ #include #include #include @@ -20,20 +20,21 @@ #include "local.h" #include "fvwrite.h" -+#ifdef ALTIVEC -+#include -+ -+#define VECTORTYPE vector unsigned char -+#endif /* ALTIVEC */ ++#ifdef VECTORS ++typedef __attribute__ ((vector_size(16))) unsigned char VECTORTYPE; ++#ifdef __SSE2__ ++#define V64TYPE ++#endif /* __SSE2__ */ ++#endif /* VECTORS */ + union arg { int intarg; u_int uintarg; -@@ -96,6 +105,16 @@ +@@ -96,6 +106,21 @@ #endif wint_t wintarg; wchar_t *pwchararg; -+#ifdef ALTIVEC ++#ifdef VECTORS + VECTORTYPE vectorarg; + unsigned char vuchararg[16]; + signed char vchararg[16]; @@ -42,19 +43,24 @@ + unsigned int vuintarg[4]; + signed int vintarg[4]; + float vfloatarg[4]; -+#endif /* ALTIVEC */ ++#ifdef V64TYPE ++ double vdoublearg[2]; ++ unsigned long long vulonglongarg[2]; ++ long long vlonglongarg[2]; ++#endif /* V64TYPE */ ++#endif /* VECTORS */ }; /* -@@ -106,26 +125,63 @@ +@@ -106,16 +131,20 @@ T_LONG, T_U_LONG, TP_LONG, T_LLONG, T_U_LLONG, TP_LLONG, T_PTRDIFFT, TP_PTRDIFFT, T_SIZET, TP_SIZET, T_INTMAXT, T_UINTMAXT, TP_INTMAXT, TP_VOID, TP_CHAR, TP_SCHAR, -+#ifdef ALTIVEC ++#ifdef VECTORS + T_DOUBLE, T_LONG_DOUBLE, T_WINT, TP_WCHAR, T_VECTOR -+#else /* ! ALTIVEC */ ++#else /* ! VECTORS */ T_DOUBLE, T_LONG_DOUBLE, T_WINT, TP_WCHAR -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ }; -static int __sbprintf(FILE *, const wchar_t *, va_list); @@ -70,42 +76,7 @@ static void __find_arguments(const wchar_t *, va_list, union arg **); static void __grow_type_table(int, enum typeid **, int *); -+ /* -+ * Get the argument indexed by nextarg. If the argument table is -+ * built, use it to get the argument. If its not, get the next -+ * argument (and arguments must be gotten sequentially). -+ */ -+#define GETARG(type) \ -+ ((argtable != NULL) ? *((type*)(&argtable[nextarg++])) : \ -+ (nextarg++, va_arg(ap, type))) -+ -+#ifdef ALTIVEC -+#define hasAltivec (_cpu_capabilities & kHasAltivec) -+/*----------------------------------------------------------------------- -+ * getvec() must be a real subroutine. If it is a #define, then __vfprintf() -+ * would have its calling sequence changed by Altivec so that a non-Altivec -+ * processor would crash on illegal instruction. By isolating the calling -+ * sequence in getvec(), __vprintf() is callable by a non-Altivec processor. -+ *-----------------------------------------------------------------------*/ -+static va_list getvec(union arg *, const union arg *, int, va_list) __attribute__((noinline)); -+ -+static va_list -+getvec(union arg *dst, const union arg *argtable, int nextarg, va_list ap) -+{ -+ dst->vectorarg = GETARG(VECTORTYPE); -+ return ap; -+} -+ -+#define SETVEC(dst) \ -+{ \ -+ ap = getvec(&dst, argtable, nextarg, ap); \ -+ nextarg++; \ -+} -+#endif /* ALTIVEC */ -+ - /* - * Helper function for `fprintf to unbuffered unix file': creates a - * temporary buffer. We only work on write-only files; this avoids +@@ -125,7 +154,7 @@ * worries about ungetc buffers and so forth. */ static int @@ -114,7 +85,7 @@ { int ret; FILE fake; -@@ -144,7 +200,7 @@ +@@ -144,7 +173,7 @@ fake._lbfsize = 0; /* not actually used, but Just In Case */ /* do the work, then copy any error status */ @@ -123,7 +94,7 @@ if (ret >= 0 && __fflush(&fake)) ret = WEOF; if (fake._flags & __SERR) -@@ -157,7 +213,7 @@ +@@ -157,7 +186,7 @@ * File must already be locked. */ static wint_t @@ -132,7 +103,7 @@ { static const mbstate_t initial; mbstate_t mbs; -@@ -167,10 +223,10 @@ +@@ -167,10 +196,10 @@ size_t len; if ((fp->_flags & __SSTR) == 0) @@ -145,7 +116,7 @@ fp->_flags |= __SERR; return (WEOF); } -@@ -350,13 +406,14 @@ +@@ -350,13 +379,14 @@ * that the multibyte char. string ends in a null character. */ static wchar_t * @@ -161,7 +132,7 @@ if (mbsarg == NULL) return (NULL); -@@ -374,7 +431,7 @@ +@@ -374,7 +404,7 @@ insize = nchars = 0; mbs = initial; while (nchars != (size_t)prec) { @@ -170,7 +141,7 @@ if (nconv == 0 || nconv == (size_t)-1 || nconv == (size_t)-2) break; -@@ -399,7 +456,7 @@ +@@ -399,7 +429,7 @@ p = mbsarg; mbs = initial; while (insize != 0) { @@ -179,7 +150,7 @@ if (nconv == 0 || nconv == (size_t)-1 || nconv == (size_t)-2) break; wcp++; -@@ -425,7 +482,21 @@ +@@ -425,7 +455,21 @@ int ret; FLOCKFILE(fp); @@ -202,13 +173,13 @@ FUNLOCKFILE(fp); return (ret); } -@@ -474,12 +545,15 @@ +@@ -474,12 +518,15 @@ #define PTRDIFFT 0x800 /* ptrdiff_t */ #define INTMAXT 0x1000 /* intmax_t */ #define CHARINT 0x2000 /* print char using int format */ -+#ifdef ALTIVEC -+#define VECTOR 0x4000 /* Altivec vector */ -+#endif /* ALTIVEC */ ++#ifdef VECTORS ++#define VECTOR 0x4000 /* Altivec or SSE vector */ ++#endif /* VECTORS */ /* * Non-MT-safe version @@ -220,11 +191,11 @@ { wchar_t *fmt; /* format string */ wchar_t ch; /* character from fmt */ -@@ -524,6 +598,11 @@ +@@ -524,6 +571,11 @@ int nseps; /* number of group separators with ' */ int nrepeats; /* number of repeats of the last group */ #endif -+#ifdef ALTIVEC ++#ifdef VECTORS + union arg vval; /* Vector argument. */ + wchar_t *pct; /* Pointer to '%' at beginning of specifier. */ + wchar_t vsep; /* Vector separator character. */ @@ -232,7 +203,7 @@ u_long ulval; /* integer arguments %[diouxX] */ uintmax_t ujval; /* %j, %ll, %q, %t, %z integers */ int base; /* base for [diouxX] conversion */ -@@ -560,7 +639,7 @@ +@@ -560,7 +612,7 @@ */ #define PRINT(ptr, len) do { \ for (n3 = 0; n3 < (len); n3++) \ @@ -241,23 +212,7 @@ } while (0) #define PAD(howmany, with) do { \ if ((n = (howmany)) > 0) { \ -@@ -581,15 +660,6 @@ - } while(0) - - /* -- * Get the argument indexed by nextarg. If the argument table is -- * built, use it to get the argument. If its not, get the next -- * argument (and arguments must be gotten sequentially). -- */ --#define GETARG(type) \ -- ((argtable != NULL) ? *((type*)(&argtable[nextarg++])) : \ -- (nextarg++, va_arg(ap, type))) -- -- /* - * To extend shorts properly, we need both signed and unsigned - * argument extraction methods. - */ -@@ -640,21 +710,22 @@ +@@ -640,21 +692,22 @@ val = GETARG (int); \ } @@ -284,39 +239,39 @@ fmt = (wchar_t *)fmt0; argtable = NULL; -@@ -678,6 +749,9 @@ +@@ -678,6 +731,9 @@ } if (ch == '\0') goto done; -+#ifdef ALTIVEC ++#ifdef VECTORS + pct = fmt; -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ fmt++; /* skip over '%' */ flags = 0; -@@ -686,6 +760,9 @@ +@@ -686,6 +742,9 @@ prec = -1; sign = '\0'; ox[1] = '\0'; -+#ifdef ALTIVEC ++#ifdef VECTORS + vsep = 'X'; /* Illegal value, changed to defaults later. */ -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ rflag: ch = *fmt++; reswitch: switch (ch) { -@@ -701,6 +778,11 @@ +@@ -701,6 +760,11 @@ case '#': flags |= ALT; goto rflag; -+#ifdef ALTIVEC ++#ifdef VECTORS + case ',': case ';': case ':': case '_': + vsep = ch; + goto rflag; -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ case '*': /*- * ``A negative field width argument is taken as a -@@ -721,8 +803,8 @@ +@@ -721,8 +785,8 @@ goto rflag; case '\'': flags |= GROUPING; @@ -327,16 +282,14 @@ goto rflag; case '.': if ((ch = *fmt++) == '*') { -@@ -796,10 +878,16 @@ +@@ -796,10 +860,14 @@ flags |= LONGINT; /*FALLTHROUGH*/ case 'c': -+#ifdef ALTIVEC -+ if (flags & VECTOR) { -+ SETVEC(vval); ++#ifdef VECTORS ++ if (flags & VECTOR) + break; -+ } -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ if (flags & LONGINT) *(cp = buf) = (wchar_t)GETARG(wint_t); else @@ -345,20 +298,31 @@ size = 1; sign = '\0'; break; -@@ -808,6 +896,12 @@ +@@ -808,6 +876,10 @@ /*FALLTHROUGH*/ case 'd': case 'i': -+#ifdef ALTIVEC -+ if (flags & VECTOR) { -+ SETVEC(vval); ++#ifdef VECTORS ++ if (flags & VECTOR) + break; -+ } -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ if (flags & INTMAX_SIZE) { ujval = SJARG(); if ((intmax_t)ujval < 0) { -@@ -837,6 +931,12 @@ +@@ -826,6 +898,12 @@ + #ifndef NO_FLOATING_POINT + case 'a': + case 'A': ++#ifdef VECTORS ++ if (flags & VECTOR) { ++ flags |= FPT; ++ break; ++ } ++#endif /* VECTORS */ + if (ch == 'a') { + ox[1] = 'x'; + xdigs = xdigs_lower; +@@ -837,6 +915,12 @@ } if (prec >= 0) prec++; @@ -371,7 +335,7 @@ if (flags & LONGDBL) { fparg.ldbl = GETARG(long double); dtoaresult = -@@ -848,6 +948,7 @@ +@@ -848,6 +932,7 @@ __hdtoa(fparg.dbl, xdigs, prec, &expt, &signflag, &dtoaend); } @@ -379,7 +343,7 @@ if (prec < 0) prec = dtoaend - dtoaresult; if (expt == INT_MAX) -@@ -855,7 +956,7 @@ +@@ -855,11 +940,17 @@ if (convbuf != NULL) free(convbuf); ndig = dtoaend - dtoaresult; @@ -388,32 +352,40 @@ freedtoa(dtoaresult); goto fp_common; case 'e': -@@ -868,10 +969,24 @@ + case 'E': ++#ifdef VECTORS ++ if (flags & VECTOR) { ++ flags |= FPT; ++ break; ++ } ++#endif /* VECTORS */ + expchar = ch; + if (prec < 0) /* account for digit before decpt */ + prec = DEFPREC + 1; +@@ -868,10 +959,22 @@ goto fp_begin; case 'f': case 'F': -+#ifdef ALTIVEC ++#ifdef VECTORS + if (flags & VECTOR) { + flags |= FPT; -+ SETVEC(vval); + break; + } -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ expchar = '\0'; goto fp_begin; case 'g': case 'G': -+#ifdef ALTIVEC ++#ifdef VECTORS + if (flags & VECTOR) { + flags |= FPT; -+ SETVEC(vval); + break; + } -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ expchar = ch - ('g' - 'e'); if (prec == 0) prec = 1; -@@ -880,6 +995,14 @@ +@@ -880,6 +983,14 @@ prec = DEFPREC; if (convbuf != NULL) free(convbuf); @@ -428,7 +400,7 @@ if (flags & LONGDBL) { fparg.ldbl = GETARG(long double); dtoaresult = -@@ -893,8 +1016,9 @@ +@@ -893,8 +1004,9 @@ if (expt == 9999) expt = INT_MAX; } @@ -439,33 +411,29 @@ freedtoa(dtoaresult); fp_common: if (signflag) -@@ -989,6 +1113,12 @@ +@@ -989,6 +1101,10 @@ flags |= LONGINT; /*FALLTHROUGH*/ case 'o': -+#ifdef ALTIVEC -+ if (flags & VECTOR) { -+ SETVEC(vval); ++#ifdef VECTORS ++ if (flags & VECTOR) + break; -+ } -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ if (flags & INTMAX_SIZE) ujval = UJARG(); else -@@ -1003,6 +1133,12 @@ +@@ -1003,6 +1119,10 @@ * defined manner.'' * -- ANSI X3J11 */ -+#ifdef ALTIVEC -+ if (flags & VECTOR) { -+ SETVEC(vval); ++#ifdef VECTORS ++ if (flags & VECTOR) + break; -+ } -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ ujval = (uintmax_t)(uintptr_t)GETARG(void *); base = 16; xdigs = xdigs_lower; -@@ -1024,7 +1160,7 @@ +@@ -1024,7 +1144,7 @@ if ((mbp = GETARG(char *)) == NULL) cp = L"(null)"; else { @@ -474,52 +442,45 @@ if (convbuf == NULL) { fp->_flags |= __SERR; goto error; -@@ -1055,6 +1191,12 @@ +@@ -1055,6 +1175,10 @@ flags |= LONGINT; /*FALLTHROUGH*/ case 'u': -+#ifdef ALTIVEC -+ if (flags & VECTOR) { -+ SETVEC(vval); ++#ifdef VECTORS ++ if (flags & VECTOR) + break; -+ } -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ if (flags & INTMAX_SIZE) ujval = UJARG(); else -@@ -1067,6 +1209,12 @@ +@@ -1067,6 +1191,10 @@ case 'x': xdigs = xdigs_lower; hex: -+#ifdef ALTIVEC -+ if (flags & VECTOR) { -+ SETVEC(vval); ++#ifdef VECTORS ++ if (flags & VECTOR) + break; -+ } -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ if (flags & INTMAX_SIZE) ujval = UJARG(); else -@@ -1111,6 +1259,14 @@ +@@ -1111,6 +1239,11 @@ if (size > BUF) /* should never happen */ abort(); break; -+#ifdef ALTIVEC ++#ifdef VECTORS + case 'v': -+ if (hasAltivec) { -+ flags |= VECTOR; -+ goto rflag; -+ } -+ /* drop through */ -+#endif /* ALTIVEC */ ++ flags |= VECTOR; ++ goto rflag; ++#endif /* VECTORS */ default: /* "%?" prints ?, unless ? is NUL */ if (ch == '\0') goto done; -@@ -1122,6 +1278,183 @@ +@@ -1122,6 +1255,288 @@ break; } -+#ifdef ALTIVEC ++#ifdef VECTORS + if (flags & VECTOR) { + /* + * Do the minimum amount of work necessary to construct @@ -529,24 +490,32 @@ + int i, j; /* Counter. */ + int vcnt; /* Number of elements in vector. */ + char *vfmt; /* Pointer to format specifier. */ -+ char vfmt_buf[32]; /* Static buffer for format spec. */ ++#define EXTRAHH 2 ++ char vfmt_buf[32 + EXTRAHH]; /* Static buffer for format spec. */ + int vwidth = 0; /* Width specified via '*'. */ + int vprec = 0; /* Precision specified via '*'. */ -+ union { /* Element. */ -+ int i; -+ float f; -+ } velm; + char *vstr; /* Used for asprintf(). */ + int vlen; /* Length returned by asprintf(). */ ++ enum { ++ V_CHAR, V_SHORT, V_INT, ++ V_PCHAR, V_PSHORT, V_PINT, ++ V_FLOAT, ++#ifdef V64TYPE ++ V_LONGLONG, V_PLONGLONG, ++ V_DOUBLE, ++#endif /* V64TYPE */ ++ } vtype; + ++ vval.vectorarg = GETARG(VECTORTYPE); + /* + * Set vfmt. If vfmt_buf may not be big enough, + * malloc() space, taking care to free it later. ++ * (EXTRAHH is for possible extra "hh") + */ -+ if (&fmt[-1] - pct < sizeof(vfmt_buf)) ++ if (&fmt[-1] - pct + EXTRAHH < sizeof(vfmt_buf)) + vfmt = vfmt_buf; + else -+ vfmt = (char *)malloc(&fmt[-1] - pct + 1); ++ vfmt = (char *)malloc(&fmt[-1] - pct + EXTRAHH + 1); + + /* Set the separator character, if not specified. */ + if (vsep == 'X') { @@ -579,13 +548,57 @@ + * finish up the format specifier. + */ + if (flags & SHORTINT) { -+ if (ch != 'c') ++ switch (ch) { ++ case 'c': ++ vtype = V_SHORT; ++ break; ++ case 'p': ++ vtype = V_PSHORT; ++ break; ++ default: + vfmt[j++] = 'h'; ++ vtype = V_SHORT; ++ break; ++ } + vcnt = 8; + } else if (flags & LONGINT) { -+ if (ch != 'c') -+ vfmt[j++] = 'l'; + vcnt = 4; ++ vtype = (ch == 'p') ? V_PINT : V_INT; ++#ifdef V64TYPE ++ } else if (flags & LLONGINT) { ++ switch (ch) { ++ case 'a': ++ case 'A': ++ case 'e': ++ case 'E': ++ case 'f': ++ case 'g': ++ case 'G': ++ vcnt = 2; ++ vtype = V_DOUBLE; ++ break; ++ case 'd': ++ case 'i': ++ case 'u': ++ case 'o': ++ case 'p': ++ case 'x': ++ case 'X': ++ vfmt[j++] = 'l'; ++ vfmt[j++] = 'l'; ++ vcnt = 2; ++ vtype = (ch == 'p') ? V_PLONGLONG : V_LONGLONG; ++ break; ++ default: ++ /* ++ * The default case should never ++ * happen. ++ */ ++ case 'c': ++ vcnt = 16; ++ vtype = V_CHAR; ++ } ++#endif /* V64TYPE */ + } else { + switch (ch) { + case 'a': @@ -596,95 +609,148 @@ + case 'g': + case 'G': + vcnt = 4; ++ vtype = V_FLOAT; + break; + default: + /* + * The default case should never + * happen. + */ -+ case 'c': + case 'd': + case 'i': + case 'u': + case 'o': -+ case 'p': + case 'x': + case 'X': ++ vfmt[j++] = 'h'; ++ vfmt[j++] = 'h'; ++ /* drop through */ ++ case 'p': ++ case 'c': + vcnt = 16; ++ vtype = (ch == 'p') ? V_PCHAR : V_CHAR; + } + } + vfmt[j++] = ch; + vfmt[j++] = '\0'; + +/* Get a vector element. */ -+#define VPRINT(cnt, ind, args...) do { \ -+ if (flags & FPT) { \ -+ velm.f = vval.vfloatarg[ind]; \ -+ vlen = asprintf_l(&vstr, loc, vfmt , ## args, velm.f); \ -+ } else { \ -+ switch (cnt) { \ -+ default: \ -+ /* The default case should never happen. */ \ -+ case 4: \ -+ velm.i = (unsigned)vval.vintarg[ind]; \ -+ break; \ -+ case 8: \ -+ velm.i = (unsigned short)vval.vshortarg[ind]; \ -+ break; \ -+ case 16: \ -+ velm.i = (unsigned char)vval.vchararg[ind]; \ -+ break; \ -+ } \ -+ vlen = asprintf_l(&vstr, loc, vfmt , ## args, velm.i); \ ++#ifdef V64TYPE ++#define VPRINT(type, ind, args...) do { \ ++ switch (type) { \ ++ case V_CHAR: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vuchararg[ind]); \ ++ break; \ ++ case V_PCHAR: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vuchararg[ind]); \ ++ break; \ ++ case V_SHORT: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vushortarg[ind]); \ ++ break; \ ++ case V_PSHORT: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vushortarg[ind]); \ ++ break; \ ++ case V_INT: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vuintarg[ind]); \ ++ break; \ ++ case V_PINT: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vuintarg[ind]); \ ++ break; \ ++ case V_LONGLONG: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vulonglongarg[ind]); \ ++ break; \ ++ case V_PLONGLONG: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vulonglongarg[ind]); \ ++ break; \ ++ case V_FLOAT: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vfloatarg[ind]); \ ++ break; \ ++ case V_DOUBLE: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vdoublearg[ind]); \ ++ break; \ ++ } \ ++ ret += vlen; \ ++ PRINT(vstr, vlen); \ ++ free(vstr); \ ++} while (0) ++#else /* !V64TYPE */ ++#define VPRINT(type, ind, args...) do { \ ++ switch (type) { \ ++ case V_CHAR: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vuchararg[ind]); \ ++ break; \ ++ case V_PCHAR: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vuchararg[ind]); \ ++ break; \ ++ case V_SHORT: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vushortarg[ind]); \ ++ break; \ ++ case V_PSHORT: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vushortarg[ind]); \ ++ break; \ ++ case V_INT: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vuintarg[ind]); \ ++ break; \ ++ case V_PINT: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vuintarg[ind]); \ ++ break; \ ++ case V_FLOAT: \ ++ vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vfloatarg[ind]); \ ++ break; \ + } \ + ret += vlen; \ + PRINT(vstr, vlen); \ + free(vstr); \ +} while (0) ++#endif /* V64TYPE */ + + /* Actually print. */ + if (vwidth == 0) { + if (vprec == 0) { + /* First element. */ -+ VPRINT(vcnt, 0); ++ VPRINT(vtype, 0); + for (i = 1; i < vcnt; i++) { + /* Separator. */ -+ PRINT(&vsep, 1); ++ if(vsep) ++ PRINT(&vsep, 1); + + /* Element. */ -+ VPRINT(vcnt, i); ++ VPRINT(vtype, i); + } + } else { + /* First element. */ -+ VPRINT(vcnt, 0, prec); ++ VPRINT(vtype, 0, prec); + for (i = 1; i < vcnt; i++) { + /* Separator. */ -+ PRINT(&vsep, 1); ++ if(vsep) ++ PRINT(&vsep, 1); + + /* Element. */ -+ VPRINT(vcnt, i, prec); ++ VPRINT(vtype, i, prec); + } + } + } else { + if (vprec == 0) { + /* First element. */ -+ VPRINT(vcnt, 0, width); ++ VPRINT(vtype, 0, width); + for (i = 1; i < vcnt; i++) { + /* Separator. */ -+ PRINT(&vsep, 1); ++ if(vsep) ++ PRINT(&vsep, 1); + + /* Element. */ -+ VPRINT(vcnt, i, width); ++ VPRINT(vtype, i, width); + } + } else { + /* First element. */ -+ VPRINT(vcnt, 0, width, prec); ++ VPRINT(vtype, 0, width, prec); + for (i = 1; i < vcnt; i++) { + /* Separator. */ -+ PRINT(&vsep, 1); ++ if(vsep) ++ PRINT(&vsep, 1); + + /* Element. */ -+ VPRINT(vcnt, i, width, prec); ++ VPRINT(vtype, i, width, prec); + } + } + } @@ -695,67 +761,79 @@ + + continue; + } -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ /* * All reasonable formats wind up here. At this point, `cp' * points to a string which (if not flags&LADJUST) should be -@@ -1401,6 +1734,11 @@ +@@ -1401,6 +1816,11 @@ if (flags & LONGINT) ADDTYPE(T_WINT); else -+#ifdef ALTIVEC ++#ifdef VECTORS + if (flags & VECTOR) + ADDTYPE(T_VECTOR); + else -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ ADDTYPE(T_INT); break; case 'D': -@@ -1418,6 +1756,11 @@ +@@ -1408,6 +1828,11 @@ + /*FALLTHROUGH*/ + case 'd': + case 'i': ++#ifdef VECTORS ++ if (flags & VECTOR) ++ ADDTYPE(T_VECTOR); ++ else ++#endif /* VECTORS */ + ADDSARG(); + break; + #ifndef NO_FLOATING_POINT +@@ -1418,6 +1843,11 @@ case 'f': case 'g': case 'G': -+#ifdef ALTIVEC ++#ifdef VECTORS + if (flags & VECTOR) + ADDTYPE(T_VECTOR); + else -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ if (flags & LONGDBL) ADDTYPE(T_LONG_DOUBLE); else -@@ -1446,9 +1789,19 @@ +@@ -1446,9 +1876,19 @@ flags |= LONGINT; /*FALLTHROUGH*/ case 'o': -+#ifdef ALTIVEC ++#ifdef VECTORS + if (flags & VECTOR) + ADDTYPE(T_VECTOR); + else -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ ADDUARG(); break; case 'p': -+#ifdef ALTIVEC ++#ifdef VECTORS + if (flags & VECTOR) + ADDTYPE(T_VECTOR); + else -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ ADDTYPE(TP_VOID); break; case 'S': -@@ -1466,6 +1819,11 @@ +@@ -1466,6 +1906,11 @@ case 'u': case 'X': case 'x': -+#ifdef ALTIVEC ++#ifdef VECTORS + if (flags & VECTOR) + ADDTYPE(T_VECTOR); + else -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ ADDUARG(); break; default: /* "%?" prints ?, unless ? is NUL */ -@@ -1532,7 +1890,7 @@ +@@ -1532,7 +1977,7 @@ (*argtable) [n].sizearg = va_arg (ap, size_t); break; case TP_SIZET: @@ -764,16 +842,15 @@ break; case T_INTMAXT: (*argtable) [n].intmaxarg = va_arg (ap, intmax_t); -@@ -1551,6 +1909,12 @@ +@@ -1551,6 +1996,11 @@ (*argtable) [n].longdoublearg = va_arg (ap, long double); break; #endif -+#ifdef ALTIVEC ++#ifdef VECTORS + case T_VECTOR: -+ if (hasAltivec) -+ ap = getvec( &((*argtable) [n]), NULL, 0, ap ); ++ (*argtable) [n].vectorarg = va_arg (ap, VECTORTYPE); + break; -+#endif /* ALTIVEC */ ++#endif /* VECTORS */ case TP_CHAR: (*argtable) [n].pchararg = va_arg (ap, char *); break; diff --git a/stdio/Makefile.inc b/stdio/Makefile.inc index 548c169..0e6892a 100644 --- a/stdio/Makefile.inc +++ b/stdio/Makefile.inc @@ -29,11 +29,7 @@ LDBLSRCS += asprintf.c fprintf.c fscanf.c fwprintf.c fwscanf.c printf.c \ vswscanf.c vwprintf.c vwscanf.c wprintf.c wscanf.c .for _src in vfprintf-fbsd.c vfwprintf-fbsd.c -CFLAGS-${_src} += -fshort-enums -# add altivec options on per file basis, since it now disables inlining -.if (${MACHINE_ARCH} == ppc) || (${MACHINE_ARCH} == ppc64) -CFLAGS-${_src} += -faltivec -DALTIVEC -.endif +CFLAGS-${_src} += -fshort-enums -DVECTORS .endfor UNIX03SRCS+= freopen.c fwrite.c diff --git a/sys/Makefile.inc b/sys/Makefile.inc index b4aa2ae..9e6510b 100644 --- a/sys/Makefile.inc +++ b/sys/Makefile.inc @@ -34,6 +34,10 @@ MISRCS+= errno.c gettimeofday.c sigcatch.c sigsuspend.c \ CFLAGS-${_src} += -D__APPLE_PR3375657_HACK__ .endfor +.include "Makefile.obsd_begin" +OBSDMISRCS= stack_protector.c +.include "Makefile.obsd_end" + UNIX03SRCS += mmap.c mprotect.c msgctl.c msync.c munmap.c semctl.c shmctl.c # Add machine dependent asm sources: diff --git a/sys/OpenBSD/stack_protector.c b/sys/OpenBSD/stack_protector.c new file mode 100644 index 0000000..368e365 --- /dev/null +++ b/sys/OpenBSD/stack_protector.c @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2002 Hiroaki Etoh, Federico G. Schwindt, and Miodrag Vallat. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#if defined(LIBC_SCCS) && !defined(list) +static char rcsid[] = "$OpenBSD: stack_protector.c,v 1.3 2002/12/10 08:53:42 etoh Exp $"; +#endif + +#include +#include +#include + +long __guard[8] = {0, 0, 0, 0, 0, 0, 0, 0}; +static void __guard_setup(void) __attribute__ ((constructor)); +void __stack_smash_handler(char func[], int damaged __attribute__((unused))); + +static void +__guard_setup(void) +{ + int fd; + if (__guard[0]!=0) return; + fd = open ("/dev/urandom", 0); + if (fd != -1) { + ssize_t size = read (fd, (char*)&__guard, sizeof(__guard)); + close (fd) ; + if (size == sizeof(__guard)) return; + } + /* If a random generator can't be used, the protector switches the guard + to the "terminator canary" */ + ((char*)__guard)[0] = 0; ((char*)__guard)[1] = 0; + ((char*)__guard)[2] = '\n'; ((char*)__guard)[3] = 255; +} + +void +__stack_smash_handler(char func[], int damaged) +{ + const char message[] = "stack overflow in function %s"; + struct sigaction sa; + + /* this may fail on a chroot jail, though luck */ + syslog(LOG_CRIT, message, func); + + bzero(&sa, sizeof(struct sigaction)); + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + sa.sa_handler = SIG_DFL; + sigaction(SIGABRT, &sa, NULL); + + kill(getpid(), SIGABRT); + + _exit(127); +} diff --git a/sys/gettimeofday.c b/sys/gettimeofday.c index c328715..959bbf5 100644 --- a/sys/gettimeofday.c +++ b/sys/gettimeofday.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -40,32 +40,24 @@ int gettimeofday (struct timeval *tp, struct timezone *tzp) { + extern int __gettimeofday(struct timeval *, struct timezone *); + extern int __commpage_gettimeofday(struct timeval *); static int validtz = 0; static struct timezone cached_tz = {0}; - struct timeval localtv; + struct timeval atv; if (tp == NULL) { if (tzp == NULL) return (0); - tp = &localtv; + tp = &atv; } -#if defined(__ppc__) || defined(__ppc64__) - { - extern int __ppc_gettimeofday(struct timeval *, struct timezone *); - extern int __commpage_gettimeofday(struct timeval *); - - if (__commpage_gettimeofday(tp)) { /* first try commpage */ - if (__ppc_gettimeofday(tp,tzp)) { /* if it fails, use syscall */ - return (-1); - } - } - } -#else - if (syscall (SYS_gettimeofday, tp, tzp) < 0) { - return (-1); - } -#endif + if (__commpage_gettimeofday(tp)) { /* first try commpage */ + if (__gettimeofday(tp, tzp) < 0) { /* if it fails, use syscall */ + return (-1); + } + } + if (tzp) { if (validtz == 0) { struct tm *localtm = localtime ((time_t *)&tp->tv_sec); diff --git a/sys/sigtramp.c b/sys/sigtramp.c index b387329..e960491 100644 --- a/sys/sigtramp.c +++ b/sys/sigtramp.c @@ -42,10 +42,10 @@ int __in_sigtramp = 0; /* These defn should match the kernel one */ #define UC_TRAD 1 +#define UC_FLAVOR 30 #if defined(__ppc__) || defined(__ppc64__) #define UC_TRAD64 20 #define UC_TRAD64_VEC 25 -#define UC_FLAVOR 30 #define UC_FLAVOR_VEC 35 #define UC_FLAVOR64 40 #define UC_FLAVOR64_VEC 45 @@ -164,11 +164,11 @@ _sigtramp( siginfo_t *sinfo, ucontext_t *uctx ) { -#if defined(__ppc__) || defined(__ppc64__) int ctxstyle = UC_FLAVOR; -#endif +#if defined(__ppc__) || defined(__ppc64__) mcontext_t mctx; mcontext64_t mctx64; +#endif #if defined(__DYNAMIC__) __in_sigtramp++; @@ -176,6 +176,9 @@ _sigtramp( #ifdef __i386__ if (sigstyle == UC_TRAD) sa_handler(sig); + else { + sa_sigaction(sig, sinfo, uctx); + } #elif defined(__ppc__) || defined(__ppc64__) if ((sigstyle == UC_TRAD) || (sigstyle == UC_TRAD64) || (sigstyle == UC_TRAD64_VEC)) sa_handler(sig); @@ -209,14 +212,8 @@ _sigtramp( #if defined(__DYNAMIC__) __in_sigtramp--; #endif -#if defined(__ppc__) || defined(__ppc64__) - { /* sigreturn(uctx, ctxstyle); */ /* syscall (SYS_SIGRETURN, uctx, ctxstyle); */ syscall (184, uctx, ctxstyle); - } -#else - sigreturn(uctx); -#endif /* __ppc__ || __ppc64__ */ } -- 2.45.2