From eb1cde05bb040f65c511ae4fa854abf1628afdf2 Mon Sep 17 00:00:00 2001
From: Apple <opensource@apple.com>
Date: Mon, 20 Feb 2006 23:25:56 +0000
Subject: [PATCH] Libc-391.4.1.tar.gz

---
 Makefile                            |   3 +
 darwin/copyfile.c                   | 358 +++++---------------
 db/hash/FreeBSD/hash_page.c.patch   |  40 +--
 db/mpool/FreeBSD/mpool.c.patch      |  23 --
 gdtoa/FreeBSD/_hdtoa.c              | 145 +++++++-
 gdtoa/FreeBSD/_hdtoa.c.patch        |  97 +++++-
 gdtoa/FreeBSD/_ldtoa.c.patch        |  12 +-
 gdtoa/FreeBSD/gdtoa-strtod.c        |  18 +-
 gdtoa/FreeBSD/gdtoa-strtod.c.patch  |  26 +-
 gdtoa/FreeBSD/gdtoa-strtodg.c       |  22 +-
 gdtoa/FreeBSD/gdtoa-strtodg.c.patch |   6 +-
 gdtoa/FreeBSD/gdtoa_strtopx.c.patch |  12 +-
 gen/FreeBSD/popen.c.patch           |  13 +-
 gen/FreeBSD/setprogname.c.patch     |  30 +-
 gen/filesec.c                       |   9 +-
 gen/stack_logging.c                 |  16 +-
 i386/pthreads/pthread_set_self.s    |   4 +-
 i386/string/Makefile.inc            |   9 +-
 i386/string/bcmp.s                  |  29 ++
 i386/string/memcmp.s                | 185 ++++++++++
 i386/string/memset.s                | 241 +++++++++++++
 i386/string/strcmp.s                | 194 +++++++----
 i386/string/strcpy.s                | 151 +++++++++
 i386/string/strlen.s                |  78 +++++
 i386/string/strncmp.s               | 191 +++++++++++
 i386/string/strncpy.s               | 190 +++++++++++
 i386/sys/Makefile.inc               |   5 +
 i386/sys/OSAtomic.s                 |  10 +
 i386/sys/SYS.h                      |  30 +-
 i386/sys/__sysenter_trap.s          |  30 ++
 i386/sys/_setjmp.s                  |   3 +-
 i386/sys/cerror.s                   |   8 +-
 i386/sys/commpage.c                 |  23 ++
 i386/sys/fork.s                     |  74 ++--
 i386/sys/getpid.s                   |   1 +
 i386/sys/i386_get_ldt.s             |  35 ++
 i386/sys/i386_gettimeofday.s        |  44 +++
 i386/sys/i386_set_ldt.s             |  35 ++
 i386/sys/lseek.s                    |   2 +-
 i386/sys/pipe.s                     |   2 +-
 i386/sys/setjmp.s                   |  92 ++---
 i386/sys/sigaltstack.s              |   2 +-
 i386/sys/sigreturn.s                |   2 +-
 include/sys/acl.h                   |   4 +-
 mach/panic.c                        |   3 +
 posix1e/acl_translate.c             |  87 ++++-
 ppc/sys/ppc_gettimeofday.s          |   7 +-
 pthreads/lock.s                     |   2 -
 stdio/FreeBSD/printf.3.patch        |  22 +-
 stdio/FreeBSD/vfprintf.c.patch      | 484 ++++++++++++++------------
 stdio/FreeBSD/vfwprintf.c.patch     | 505 ++++++++++++++++------------
 stdio/Makefile.inc                  |   6 +-
 stdlib/FreeBSD/grantpt.c.patch      |  96 +-----
 sys/Makefile.inc                    |   4 +
 sys/OpenBSD/stack_protector.c       |  75 +++++
 sys/gettimeofday.c                  |  30 +-
 sys/sigtramp.c                      |  15 +-
 57 files changed, 2640 insertions(+), 1200 deletions(-)
 delete mode 100644 db/mpool/FreeBSD/mpool.c.patch
 create mode 100644 i386/string/bcmp.s
 create mode 100644 i386/string/memcmp.s
 create mode 100644 i386/string/memset.s
 create mode 100644 i386/string/strcpy.s
 create mode 100644 i386/string/strlen.s
 create mode 100644 i386/string/strncmp.s
 create mode 100644 i386/string/strncpy.s
 create mode 100644 i386/sys/__sysenter_trap.s
 create mode 100644 i386/sys/commpage.c
 create mode 100644 i386/sys/i386_get_ldt.s
 create mode 100644 i386/sys/i386_gettimeofday.s
 create mode 100644 i386/sys/i386_set_ldt.s
 create mode 100644 sys/OpenBSD/stack_protector.c

diff --git a/Makefile b/Makefile
index f861530..b190072 100644
--- a/Makefile
+++ b/Makefile
@@ -26,6 +26,9 @@ CFLAGS += -D__DARWIN_UNIX03=1
 .else
 CFLAGS += -D__DARWIN_UNIX03=0
 .endif
+.if (${MACHINE_ARCH} == i386)
+CFLAGS += -march=prescott -msse3
+.endif
 CFLAGS += -D__LIBC__ -DNOID -I${.CURDIR}/include
 .ifdef ALTLIBCHEADERS
 INCLUDEDIR = ${ALTLIBCHEADERS}
diff --git a/darwin/copyfile.c b/darwin/copyfile.c
index ac3a3b7..3e5fc22 100644
--- a/darwin/copyfile.c
+++ b/darwin/copyfile.c
@@ -34,7 +34,6 @@
 #include <fcntl.h>
 #include <sys/errno.h>
 #include <sys/stat.h>
-#include <sys/time.h>
 #include <sys/xattr.h>
 #include <sys/syscall.h>
 #include <sys/param.h>
@@ -124,6 +123,7 @@ int copyfile(const char *src, const char *dst, copyfile_state_t state, copyfile_
 	}
 	copyfile_debug(1, "debug value set to: %d\n", s->debug);
     }
+
     if (COPYFILE_CHECK & flags)
 	return copyfile_check(s);
 
@@ -143,9 +143,6 @@ int copyfile(const char *src, const char *dst, copyfile_state_t state, copyfile_
 	    }
 	} else if (COPYFILE_UNPACK & flags)
 	{
-	    if (!(COPYFILE_STAT & flags || COPYFILE_ACL & flags))
-		fix_perms = !copyfile_fix_perms(s, &original_fsec, 1);
-
 	    if (copyfile_unpack(s) < 0)
 		ret = -1;
 	} else
@@ -199,12 +196,8 @@ exit:
 
     filesec_free(original_fsec);
 
-    if (state == NULL) {
-	if (copyfile_free(s) < 0) {
-		ret = -1;
-	}
-    }
-
+    if (state == NULL)
+	ret -= copyfile_free(s);
     return ret;
 }
 
@@ -245,10 +238,10 @@ int copyfile_free(copyfile_state_t s)
 
 static int copyfile_close(copyfile_state_t s)
 {
-    if (s->src_fd >= 0)
+    if (s->src_fd != -2)
 	close(s->src_fd);
 
-    if (s->dst_fd >= 0 && close(s->dst_fd))
+    if (s->dst_fd != -2 && close(s->dst_fd))
     {
 	copyfile_warn("close on %s", s->dst);
 	return -1;
@@ -265,7 +258,7 @@ static int copyfile_fix_perms(copyfile_state_t s, filesec_t *fsec, int on)
 
     if (on)
     {
-	if(fstatx_np(s->dst_fd, &sb, *fsec))
+	if(statx_np(s->dst, &sb, *fsec))
 	    goto error;
 
 	tmp_fsec = filesec_dup(*fsec);
@@ -395,33 +388,12 @@ static int copyfile_open(copyfile_state_t s)
 	    return -1;
 	}
 
-	while((s->dst_fd = open(s->dst, oflags, s->sb.st_mode | S_IWUSR)) < 0)
+	while((s->dst_fd = openx_np(s->dst, oflags, s->fsec)) < 0)
 	{
-	    /*
-	     * We set S_IWUSR because fsetxattr does not -- at the time this comment
-	     * was written -- allow one to set an extended attribute on a file descriptor
-	     * for a read-only file, even if the file descriptor is opened for writing.
-	     * This will only matter if the file does not already exist.
-	     */
-	    switch(errno)
+	    if (EEXIST == errno)
 	    {
-		case EEXIST:
-		    copyfile_debug(3, "open failed, retrying (%s)", s->dst);
-		    if (s->flags & COPYFILE_EXCL)
-			break;
-		    oflags = oflags & ~O_CREAT;
-		    if (s->flags & (COPYFILE_PACK | COPYFILE_DATA))
-		    {
-			copyfile_debug(4, "truncating existing file (%s)", s->dst);
-			oflags |= O_TRUNC;
-		    }
-		    continue;
-		case EACCES:
-		    if(chmod(s->dst, (s->sb.st_mode | S_IWUSR) & ~S_IFMT) == 0)
-			continue;
-		    else {
-			break;
-		    }
+		oflags = oflags & ~O_CREAT;
+		continue;
 	    }
 	    copyfile_warn("open on %s", s->dst);
 	    return -1;
@@ -558,17 +530,14 @@ static int copyfile_security(copyfile_state_t s)
 	    }
 	}
 
-	if (!filesec_set_property(fsec_dst, FILESEC_ACL, &acl_dst))
+	if (!filesec_set_property(s->fsec, FILESEC_ACL, &acl_dst))
 	{
 	    copyfile_debug(1, "altered acl");
 	}
     }
 no_acl:
-    if (fchmodx_np(s->dst_fd, fsec_dst) < 0 && errno != ENOTSUP)
-    {
+    if (fchmodx_np(s->dst_fd, s->fsec) < 0 && errno != ENOTSUP)
 	copyfile_warn("setting security information: %s", s->dst);
-	ret = -1;
-    }
 
 cleanup:
     filesec_free(fsec_dst);
@@ -866,7 +835,7 @@ typedef struct apple_double_entry
 	u_int32_t   type;     /* entry type: see list, 0 invalid */ 
 	u_int32_t   offset;   /* entry data offset from the beginning of the file. */
 	u_int32_t   length;   /* entry data length in bytes. */
-} apple_double_entry_t;
+} __attribute__((packed)) apple_double_entry_t;
 
 
 typedef struct apple_double_header
@@ -878,7 +847,7 @@ typedef struct apple_double_header
 	apple_double_entry_t   entries[2];  /* 'finfo' & 'rsrc' always exist */
 	u_int8_t    finfo[FINDERINFOSIZE];  /* Must start with Finder Info (32 bytes) */
 	u_int8_t    pad[2];        /* get better alignment inside attr_header */
-} apple_double_header_t;
+} __attribute__((packed)) apple_double_header_t;
 
 
 /* Entries are aligned on 4 byte boundaries */
@@ -889,7 +858,7 @@ typedef struct attr_entry
 	u_int16_t   flags;
 	u_int8_t    namelen;   /* length of name including NULL termination char */ 
 	u_int8_t    name[1];   /* NULL-terminated UTF-8 name (up to 128 bytes max) */
-} attr_entry_t;
+} __attribute__((packed)) attr_entry_t;
 
 
 /* Header + entries must fit into 64K */
@@ -904,7 +873,7 @@ typedef struct attr_header
 	u_int32_t   reserved[3];
 	u_int16_t   flags;
 	u_int16_t   num_attrs;
-} attr_header_t;
+} __attribute__((packed)) attr_header_t;
 
 
 #pragma options align=reset
@@ -979,12 +948,12 @@ swap_attrhdr(attr_header_t *ah)
 #endif
 }
 
-static const u_int32_t emptyfinfo[8] = {0};
+static u_int32_t emptyfinfo[8] = {0};
 
 static int copyfile_unpack(copyfile_state_t s)
 {
-    ssize_t bytes;
-    void * buffer, * endptr;
+    int bytes;
+    void * buffer;
     apple_double_header_t *adhdr;
     size_t hdrsize;
     int error = 0;
@@ -995,13 +964,6 @@ static int copyfile_unpack(copyfile_state_t s)
 	hdrsize = ATTR_MAX_HDR_SIZE;
 
     buffer = calloc(1, hdrsize);
-    if (buffer == NULL) {
-	copyfile_debug(1, "copyfile_unpack: calloc(1, %u) returned NULL", hdrsize);
-	error = -1;
-	goto exit;
-    } else
-	endptr = (char*)buffer + hdrsize;
-
     bytes = pread(s->src_fd, buffer, hdrsize, 0);
 
     if (bytes < 0)
@@ -1051,12 +1013,6 @@ static int copyfile_unpack(copyfile_state_t s)
 	int count;
 	int i;
 
-	if (hdrsize < sizeof(attr_header_t)) {
-		copyfile_warn("bad attribute header:  %u < %u", hdrsize, sizeof(attr_header_t));
-		error = -1;
-		goto exit;
-	}
-
 	attrhdr = (attr_header_t *)buffer;
 	swap_attrhdr(attrhdr);
 	if (attrhdr->magic != ATTR_HDR_MAGIC)
@@ -1068,104 +1024,10 @@ static int copyfile_unpack(copyfile_state_t s)
 	}
 	count = attrhdr->num_attrs;
 	entry = (attr_entry_t *)&attrhdr[1];
-
 	for (i = 0; i < count; i++)
 	{
 	    void * dataptr;
 
-	    /*
-	     * First we do some simple sanity checking.
-	     * +) See if entry is within the buffer's range;
-	     *
-	     * +) Check the attribute name length; if it's longer than the
-	     * maximum, we truncate it down.  (We could error out as well;
-	     * I'm not sure which is the better way to go here.)
-	     *
-	     * +) If, given the name length, it goes beyond the end of
-	     * the buffer, error out.
-	     *
-	     * +) If the last byte isn't a NUL, make it a NUL.  (Since we
-	     * truncated the name length above, we truncate the name here.)
-	     *
-	     * +) If entry->offset is so large that it causes dataptr to
-	     * go beyond the end of the buffer -- or, worse, so large that
-	     * it wraps around! -- we error out.
-	     *
-	     * +) If entry->length would cause the entry to go beyond the
-	     * end of the buffer (or, worse, wrap around to before it),
-	     * *or* if the length is larger than the hdrsize, we error out.
-	     * (An explanation of that:  what we're checking for there is
-	     * the small range of values such that offset+length would cause
-	     * it to go beyond endptr, and then wrap around past buffer.  We
-	     * care about this because we are passing entry->length down to
-	     * fgetxattr() below, and an erroneously large value could cause
-	     * problems there.  By making sure that it's less than hdrsize,
-	     * which has already been sanity-checked above, we're safe.
-	     * That may mean that the check against < buffer is unnecessary.)
-	     */
-	    if ((void*)entry >= endptr || (void*)entry < buffer) {
-		if (COPYFILE_VERBOSE & s->flags)
-		    copyfile_warn("Incomplete or corrupt attribute entry");
-		error = -1;
-		goto exit;
-	    }
-
-	    if (((void*)entry + sizeof(*entry)) > endptr) {
-		if (COPYFILE_VERBOSE & s->flags)
-		    copyfile_warn("Incomplete or corrupt attribute entry");
-		error = -1;
-		goto exit;
-	    }
-
-	    if (entry->namelen < 2) {
-		if (COPYFILE_VERBOSE & s->flags)
-		    copyfile_warn("Corrupt attribute entry (only %d bytes)", entry->namelen);
-		error = -1;
-		goto exit;
-	    }
-	    if (entry->namelen > ATTR_MAX_NAME_LEN + 1) {
-		if (COPYFILE_VERBOSE & s->flags)
-		    copyfile_warn("Corrupt attribute entry (name length is %d bytes)", entry->namelen);
-		error = -1;
-		goto exit;
-	    }
-	    if ((void*)(entry->name + entry->namelen) >= endptr) {
-		if (COPYFILE_VERBOSE & s->flags)
-		    copyfile_warn("Incomplete or corrupt attribute entry");
-		error = -1;
-		goto exit;
-	    }
-
-	    /* Because namelen includes the NUL, we check one byte back */
-	    if (entry->name[entry->namelen-1] != 0) {
-		if (COPYFILE_VERBOSE & s->flags)
-		    copyfile_warn("Corrupt attribute entry (name is not NUL-terminated)");
-		error = -1;
-		goto exit;
-	    }
-
-	    copyfile_debug(3, "extracting \"%s\" (%d bytes) at offset %u",
-		entry->name, entry->length, entry->offset);
-
-	    dataptr = (char *)attrhdr + entry->offset;
-
-	    if (dataptr >= endptr || dataptr < buffer) {
-		copyfile_debug(1, "Entry %d overflows:  offset = %u", entry->offset);
-		error = -1;
-		goto exit;
-	    }
-	    if ((dataptr + entry->length) > endptr ||
-		((dataptr + entry->length) < buffer) ||
-		(entry->length > hdrsize)) {
-		if (COPYFILE_VERBOSE & s->flags)
-		    copyfile_warn("Incomplete or corrupt attribute entry");
-		copyfile_debug(1, "Entry %d length overflows:  dataptr = %u, offset = %u, length = %u, buffer = %u, endptr = %u",
-			i, dataptr, entry->offset, entry->length, buffer, endptr);
-		error = -1;
-		goto exit;
-	    }
-
-	    if (COPYFILE_ACL & s->flags && strcmp((char*)entry->name, XATTR_SECURITY_NAME) == 0)
 	    copyfile_debug(2, "extracting \"%s\" (%d bytes)",
 		entry->name, entry->length);
 	    dataptr = (char *)attrhdr + entry->offset;
@@ -1173,33 +1035,31 @@ static int copyfile_unpack(copyfile_state_t s)
 	    if (COPYFILE_ACL & s->flags && strncmp(entry->name, XATTR_SECURITY_NAME, strlen(XATTR_SECURITY_NAME)) == 0)
 	    {
 		acl_t acl;
-		char *tacl = strdup(dataptr);
-		if (tacl)
+		if ((acl = acl_from_text(dataptr)) != NULL)
 		{
-		    tacl[entry->length] = 0;	/* Ensure it is NUL-terminated */
-		    if (acl = acl_from_text(tacl))
+		    if (filesec_set_property(s->fsec, FILESEC_ACL, &acl) < 0)
 		    {
-			filesec_t tfsec = filesec_init();
-			if (tfsec)
-			{
-			    if (filesec_set_property(tfsec, FILESEC_ACL, &acl) < 0)
+			    acl_t acl;
+			    if ((acl = acl_from_text(dataptr)) != NULL)
 			    {
-				copyfile_debug(1, "setting acl");
-				error = -1;
+				if (filesec_set_property(s->fsec, FILESEC_ACL, &acl) < 0)
+				{
+				    copyfile_debug(1, "setting acl");
+				}
+				else if (fchmodx_np(s->dst_fd, s->fsec) < 0 && errno != ENOTSUP)
+					copyfile_warn("setting security information");
+				acl_free(acl);
 			    }
-			    else if (fchmodx_np(s->dst_fd, tfsec) < 0 && errno != ENOTSUP)
-			    {
-				error = -1;
-				copyfile_debug(1, "applying acl to file");
-			    }
-			    filesec_free(tfsec);
-			}
-			acl_free(acl);
+		    } else
+		    if (COPYFILE_XATTR & s->flags && (fsetxattr(s->dst_fd, entry->name, dataptr, entry->length, 0, 0))) {
+			    if (COPYFILE_VERBOSE & s->flags)
+				    copyfile_warn("error %d setting attribute %s", error, entry->name);
+			    goto exit;
 		    }
-		    free(tacl);
+		    else if (fchmodx_np(s->dst_fd, s->fsec) < 0 && errno != ENOTSUP)
+			    copyfile_warn("setting security information");
+		    acl_free(acl);
 		}
-		if (error)
-		    goto exit;
 	    } else
 	    if (COPYFILE_XATTR & s->flags && (fsetxattr(s->dst_fd, entry->name, dataptr, entry->length, 0, 0))) {
 		if (COPYFILE_VERBOSE & s->flags)
@@ -1213,11 +1073,6 @@ static int copyfile_unpack(copyfile_state_t s)
     /*
      * Extract the Finder Info.
      */
-    if (adhdr->entries[0].offset > (hdrsize - sizeof(emptyfinfo))) {
-	error = -1;
-	goto exit;
-    }
-
     if (bcmp((u_int8_t*)buffer + adhdr->entries[0].offset, emptyfinfo, sizeof(emptyfinfo)) != 0)
     {
 	copyfile_debug(1, " extracting \"%s\" (32 bytes)", XATTR_FINDERINFO_NAME);
@@ -1232,30 +1087,14 @@ static int copyfile_unpack(copyfile_state_t s)
     if (adhdr->entries[1].type == AD_RESOURCE &&
 	adhdr->entries[1].length > 0)
     {
-	void * rsrcforkdata = NULL;
+	void * rsrcforkdata;
 	size_t length;
 	off_t offset;
-	struct stat sb;
-	struct timeval tval[2];
 
 	length = adhdr->entries[1].length;
 	offset = adhdr->entries[1].offset;
 	rsrcforkdata = malloc(length);
 
-	if (rsrcforkdata == NULL) {
-		copyfile_debug(1, "could not allocate %u bytes for rsrcforkdata",
-			       length);
-		error = -1;
-		goto bad;
-	}
-
-	if (fstat(s->dst_fd, &sb) < 0)
-	{
-	  copyfile_debug(1, "couldn't stat destination file");
-	  error = -1;
-	  goto exit;
-	}
-
 	bytes = pread(s->src_fd, rsrcforkdata, length, offset);
 	if (bytes < length)
 	{
@@ -1270,33 +1109,21 @@ static int copyfile_unpack(copyfile_state_t s)
 		    (int)bytes, (int)length);
 	    }
 	    error = -1;
-	    goto bad;
+	    goto exit;
 	}
 	error = fsetxattr(s->dst_fd, XATTR_RESOURCEFORK_NAME, rsrcforkdata, bytes, 0, 0);
 	if (error)
 	{
 	    copyfile_debug(1, "error %d setting resource fork attribute", error);
 	    error = -1;
-	    goto bad;
+	    goto exit;
 	}
-	copyfile_debug(1, "extracting \"%s\" (%d bytes)",
+	    copyfile_debug(1, "extracting \"%s\" (%d bytes)",
 		    XATTR_RESOURCEFORK_NAME, (int)length);
-
-	tval[0].tv_sec = sb.st_atime;
-	tval[1].tv_sec = sb.st_mtime;
-	tval[0].tv_usec = tval[1].tv_usec = 0;
-
-	if (futimes(s->dst_fd, tval))
-	{
-	    copyfile_warn("cannot set time on destination file %s", s->dst ? s->dst : "<no filename>");
-	}
-
-bad:
-	if (rsrcforkdata)
-	    free(rsrcforkdata);
+	free(rsrcforkdata);
     }
 exit:
-    if (buffer) free(buffer);
+    free(buffer);
     return error;
 }
 
@@ -1371,11 +1198,11 @@ static int copyfile_pack_rsrcfork(copyfile_state_t s, attr_header_t *filehdr)
 
 static int copyfile_pack(copyfile_state_t s)
 {
-    char *attrnamebuf = NULL, *endnamebuf;
-    void *databuf = NULL;
-    attr_header_t *filehdr, *endfilehdr;
+    char *attrnamebuf;
+    void *databuf;
+    attr_header_t *filehdr;
     attr_entry_t *entry;
-    ssize_t listsize = 0;
+    ssize_t listsize;
     char *nameptr;
     int namelen;
     int entrylen;
@@ -1385,41 +1212,28 @@ static int copyfile_pack(copyfile_state_t s)
     int error = 0;
 
     filehdr = (attr_header_t *) calloc(1, ATTR_MAX_SIZE);
-    if (filehdr == NULL) {
-	error = -1;
-	goto exit;
-    } else {
-	    endfilehdr = ((void*)filehdr) + ATTR_MAX_SIZE;
-    }
-
     attrnamebuf = calloc(1, ATTR_MAX_HDR_SIZE);
-    if (attrnamebuf == NULL) {
-	error = -1;
-	goto exit;
-    } else {
-	endnamebuf = ((char*)attrnamebuf) + ATTR_MAX_HDR_SIZE;
-    }
 
     /*
      * Fill in the Apple Double Header defaults.
      */
-    filehdr->appledouble.magic              = SWAP32 (ADH_MAGIC);
-    filehdr->appledouble.version            = SWAP32 (ADH_VERSION);
-    filehdr->appledouble.numEntries         = SWAP16 (2);
-    filehdr->appledouble.entries[0].type    = SWAP32 (AD_FINDERINFO);
-    filehdr->appledouble.entries[0].offset  = SWAP32 (offsetof(apple_double_header_t, finfo));
-    filehdr->appledouble.entries[0].length  = SWAP32 (FINDERINFOSIZE);
-    filehdr->appledouble.entries[1].type    = SWAP32 (AD_RESOURCE);
-    filehdr->appledouble.entries[1].offset  = SWAP32 (offsetof(apple_double_header_t, pad));
+    filehdr->appledouble.magic              = ADH_MAGIC;
+    filehdr->appledouble.version            = ADH_VERSION;
+    filehdr->appledouble.numEntries         = 2;
+    filehdr->appledouble.entries[0].type    = AD_FINDERINFO;
+    filehdr->appledouble.entries[0].offset  = offsetof(apple_double_header_t, finfo);
+    filehdr->appledouble.entries[0].length  = FINDERINFOSIZE;
+    filehdr->appledouble.entries[1].type    = AD_RESOURCE;
+    filehdr->appledouble.entries[1].offset  = offsetof(apple_double_header_t, pad);
     filehdr->appledouble.entries[1].length  = 0;
     bcopy(ADH_MACOSX, filehdr->appledouble.filler, sizeof(filehdr->appledouble.filler));
 
     /*
      * Fill in the initial Attribute Header.
      */
-    filehdr->magic       = SWAP32 (ATTR_HDR_MAGIC);
-    filehdr->debug_tag   = SWAP32 (s->sb.st_ino);
-    filehdr->data_start  = SWAP32 (sizeof(attr_header_t));
+    filehdr->magic       = ATTR_HDR_MAGIC;
+    filehdr->debug_tag   = s->sb.st_ino;
+    filehdr->data_start  = sizeof(attr_header_t);
 
     /*
      * Collect the attribute names.
@@ -1443,36 +1257,26 @@ static int copyfile_pack(copyfile_state_t s)
 
     if (COPYFILE_XATTR & s->flags)
     {
-	ssize_t left = ATTR_MAX_HDR_SIZE - offset;
-        if ((listsize = flistxattr(s->src_fd, attrnamebuf + offset, left, 0)) <= 0)
+        if ((listsize = flistxattr(s->src_fd, attrnamebuf + offset, ATTR_MAX_HDR_SIZE, 0)) <= 0)
 	{
 	    copyfile_debug(1, "no extended attributes found (%d)", errno);
 	}
-	if (listsize > left)
+	if (listsize > ATTR_MAX_HDR_SIZE)
 	{
 	    copyfile_debug(1, "extended attribute list too long");
 	    listsize = ATTR_MAX_HDR_SIZE;
 	}
 
 	listsize += offset;
-	endnamebuf = attrnamebuf + listsize;
-	if (endnamebuf > (attrnamebuf + ATTR_MAX_HDR_SIZE)) {
-	    error = -1;
-	    goto exit;
-	}
 
-	for (nameptr = attrnamebuf; nameptr <endnamebuf; nameptr += namelen)
+	for (nameptr = attrnamebuf; nameptr < attrnamebuf + listsize; nameptr += namelen)
 	{
 	    namelen = strlen(nameptr) + 1;
 	    /* Skip over FinderInfo or Resource Fork names */
-	    if (strcmp(nameptr, XATTR_FINDERINFO_NAME) == 0 ||
-		strcmp(nameptr, XATTR_RESOURCEFORK_NAME) == 0)
+	    if (strncmp(nameptr, XATTR_FINDERINFO_NAME, strlen(XATTR_FINDERINFO_NAME)) == 0 ||
+		strncmp(nameptr, XATTR_RESOURCEFORK_NAME, strlen(XATTR_RESOURCEFORK_NAME)) == 0)
 		    continue;
 
-	    /* The system should prevent this from happening, but... */
-	    if (namelen > XATTR_MAXNAMELEN + 1) {
-	        namelen = XATTR_MAXNAMELEN + 1;
-	    }
 	    entry->namelen = namelen;
 	    entry->flags = 0;
 	    bcopy(nameptr, &entry->name[0], namelen);
@@ -1480,12 +1284,7 @@ static int copyfile_pack(copyfile_state_t s)
 
 	    entrylen = ATTR_ENTRY_LENGTH(namelen);
 	    entry = (attr_entry_t *)(((char *)entry) + entrylen);
-
-	    if ((void*)entry > (void*)endfilehdr) {
-		    error = -1;
-		    goto exit;
-	    }
-
+	    
 	    /* Update the attributes header. */
 	    filehdr->num_attrs++;
 	    filehdr->data_start += entrylen;
@@ -1499,13 +1298,14 @@ static int copyfile_pack(copyfile_state_t s)
 
     for (nameptr = attrnamebuf; nameptr < attrnamebuf + listsize; nameptr += namelen + 1)
     {
+	nameptr = nameptr;
 	namelen = strlen(nameptr);
 
-	if (strcmp(nameptr, XATTR_SECURITY_NAME) == 0)
+	if (strncmp(nameptr, XATTR_SECURITY_NAME, strlen(XATTR_SECURITY_NAME)) == 0)
 	    copyfile_pack_acl(s, &databuf, &datasize);
 	else
 	/* Check for Finder Info. */
-	if (strcmp(nameptr, XATTR_FINDERINFO_NAME) == 0)
+	if (strncmp(nameptr, XATTR_FINDERINFO_NAME, strlen(XATTR_FINDERINFO_NAME)) == 0)
 	{
 	    datasize = fgetxattr(s->src_fd, nameptr, (u_int8_t*)filehdr + filehdr->appledouble.entries[0].offset, 32, 0, 0);
 	    if (datasize < 0)
@@ -1525,7 +1325,7 @@ static int copyfile_pack(copyfile_state_t s)
 	    continue;  /* finder info doesn't have an attribute entry */
 	} else
 	/* Check for Resource Fork. */
-	if (strcmp(nameptr, XATTR_RESOURCEFORK_NAME) == 0)
+	if (strncmp(nameptr, XATTR_RESOURCEFORK_NAME, strlen(XATTR_RESOURCEFORK_NAME)) == 0)
 	{
 	    hasrsrcfork = 1;
 	    continue;
@@ -1548,10 +1348,6 @@ static int copyfile_pack(copyfile_state_t s)
 		goto next;
 	    }
 	    databuf = malloc(datasize);
-	    if (databuf == NULL) {
-		error = -1;
-		continue;
-	    }
 	    datasize = fgetxattr(s->src_fd, nameptr, databuf, datasize, 0, 0);
 	}
 
@@ -1565,12 +1361,7 @@ static int copyfile_pack(copyfile_state_t s)
 	 * the case when there are lots of attributes or one of
 	 * the attributes is very large.
 	 */
-	if (entry->offset > ATTR_MAX_SIZE ||
-		(entry->offset + datasize > ATTR_MAX_SIZE)) {
-		error = -1;
-	} else {
-		bcopy(databuf, (char*)filehdr + entry->offset, datasize);
-	}
+	bcopy(databuf, (char*)filehdr + entry->offset, datasize);
 	free(databuf);
 
 	copyfile_debug(1, "copied %ld bytes of \"%s\" data @ offset 0x%08x", datasize, nameptr, entry->offset);
@@ -1589,7 +1380,7 @@ next:
 	filehdr->appledouble.entries[0].length =
 	    filehdr->appledouble.entries[1].offset - filehdr->appledouble.entries[0].offset;
 
-	filehdr->total_size  = SWAP32 (filehdr->appledouble.entries[1].offset);
+	filehdr->total_size  = filehdr->appledouble.entries[1].offset;
     }
 
     /* Copy Resource Fork. */
@@ -1599,6 +1390,9 @@ next:
     /* Write the header to disk. */
     datasize = filehdr->appledouble.entries[1].offset;
 
+    swap_adhdr(&filehdr->appledouble);
+    swap_attrhdr(filehdr);
+
     if (pwrite(s->dst_fd, filehdr, datasize, 0) != datasize)
     {
 	if (COPYFILE_VERBOSE & s->flags)
@@ -1607,8 +1401,8 @@ next:
 	goto exit;
     }
 exit:
-    if (filehdr) free(filehdr);
-    if (attrnamebuf) free(attrnamebuf);
+    free(filehdr);
+    free(attrnamebuf);
 
     if (error)
 	return error;
diff --git a/db/hash/FreeBSD/hash_page.c.patch b/db/hash/FreeBSD/hash_page.c.patch
index 64d1390..807837c 100644
--- a/db/hash/FreeBSD/hash_page.c.patch
+++ b/db/hash/FreeBSD/hash_page.c.patch
@@ -1,5 +1,5 @@
---- hash_page.c.orig	2006-04-22 23:04:55.000000000 -0700
-+++ hash_page.c	2006-04-23 00:23:46.000000000 -0700
+--- hash_page.c.orig	Thu Mar 21 14:46:26 2002
++++ hash_page.c	Sat Oct 18 18:31:10 2003
 @@ -74,7 +74,7 @@
  #include <db.h>
  #include "hash.h"
@@ -9,39 +9,3 @@
  
  static u_int32_t	*fetch_bitmap(HTAB *, int);
  static u_int32_t	 first_free(u_int32_t);
-@@ -586,7 +586,7 @@
- 	int is_bucket, is_bitmap;
- {
- 	int fd, page, size;
--	int wsize;
-+	int wsize, max;
- 
- 	size = hashp->BSIZE;
- 	if ((hashp->fp == -1) && open_temp(hashp))
-@@ -595,7 +595,6 @@
- 
- 	if (hashp->LORDER != BYTE_ORDER) {
- 		int i;
--		int max;
- 
- 		if (is_bitmap) {
- 			max = hashp->BSIZE >> 2;	/* divide by 4 */
-@@ -619,6 +618,18 @@
- 		errno = EFTYPE;
- 		return (-1);
- 	}
-+	/* 4485533 - reswap the in-memory copy */
-+	if (hashp->LORDER != BYTE_ORDER) {
-+		int i;
-+
-+		if (is_bitmap) {
-+			for (i = 0; i < max; i++)
-+				M_32_SWAP(((int *)p)[i]);
-+		} else {
-+			for (i = 0; i <= max; i++)
-+				M_16_SWAP(((u_int16_t *)p)[i]);
-+		}
-+	}
- 	return (0);
- }
- 
diff --git a/db/mpool/FreeBSD/mpool.c.patch b/db/mpool/FreeBSD/mpool.c.patch
deleted file mode 100644
index d1de995..0000000
--- a/db/mpool/FreeBSD/mpool.c.patch
+++ /dev/null
@@ -1,23 +0,0 @@
---- mpool.c.orig	2006-12-13 22:19:43.000000000 -0800
-+++ mpool.c	2006-12-13 22:27:26.000000000 -0800
-@@ -294,10 +294,16 @@
- 	BKT *bp;
- 
- 	/* Walk the lru chain, flushing any dirty pages to disk. */
--	TAILQ_FOREACH(bp, &mp->lqh, q)
--		if (bp->flags & MPOOL_DIRTY &&
--		    mpool_write(mp, bp) == RET_ERROR)
--			return (RET_ERROR);
-+	TAILQ_FOREACH(bp, &mp->lqh, q) {
-+		if (bp->flags & MPOOL_DIRTY)
-+			if (mpool_write(mp, bp) == RET_ERROR) {
-+				return (RET_ERROR);
-+			} else {
-+				/* 4892134: Re-run through the user's pgin filter. */
-+				if (mp->pgin != NULL)
-+					(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
-+			}
-+	}
- 
- 	/* Sync the file descriptor. */
- 	return (_fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
diff --git a/gdtoa/FreeBSD/_hdtoa.c b/gdtoa/FreeBSD/_hdtoa.c
index 77e99ac..1a85986 100644
--- a/gdtoa/FreeBSD/_hdtoa.c
+++ b/gdtoa/FreeBSD/_hdtoa.c
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2004, 2005 David Schultz <das@FreeBSD.ORG>
+ * Copyright (c) 2004 David Schultz <das@FreeBSD.ORG>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -25,11 +25,13 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/lib/libc/gdtoa/_hdtoa.c,v 1.3 2005/01/18 18:44:07 das Exp $");
+__FBSDID("$FreeBSD: src/lib/libc/gdtoa/_hdtoa.c,v 1.2 2004/01/21 04:51:50 grehan Exp $");
 
 #include <float.h>
+#include <inttypes.h>
 #include <limits.h>
 #include <math.h>
+#include <stdlib.h>
 #include "fpmath.h"
 #include "gdtoaimp.h"
 
@@ -37,8 +39,51 @@ __FBSDID("$FreeBSD: src/lib/libc/gdtoa/_hdtoa.c,v 1.3 2005/01/18 18:44:07 das Ex
 #define	INFSTR	"Infinity"
 #define	NANSTR	"NaN"
 
-#define	DBL_ADJ		(DBL_MAX_EXP - 2 + ((DBL_MANT_DIG - 1) % 4))
-#define	LDBL_ADJ	(LDBL_MAX_EXP - 2 + ((LDBL_MANT_DIG - 1) % 4))
+#define	DBL_BIAS	(DBL_MAX_EXP - 1)
+#define	LDBL_BIAS	(LDBL_MAX_EXP - 1)
+
+#ifdef	LDBL_IMPLICIT_NBIT
+#define	LDBL_NBIT_ADJ	0
+#else
+#define	LDBL_NBIT_ADJ	1
+#endif
+
+/*
+ * Efficiently compute the log2 of an integer.  Uses a combination of
+ * arcane tricks found in fortune and arcane tricks not (yet) in
+ * fortune.  This routine behaves similarly to fls(9).
+ */
+static int
+log2_32(uint32_t n)
+{
+
+        n |= (n >> 1);
+        n |= (n >> 2);
+        n |= (n >> 4);
+        n |= (n >> 8);
+        n |= (n >> 16);
+
+	n = (n & 0x55555555) + ((n & 0xaaaaaaaa) >> 1);
+	n = (n & 0x33333333) + ((n & 0xcccccccc) >> 2);
+	n = (n & 0x0f0f0f0f) + ((n & 0xf0f0f0f0) >> 4);
+	n = (n & 0x00ff00ff) + ((n & 0xff00ff00) >> 8);
+	n = (n & 0x0000ffff) + ((n & 0xffff0000) >> 16);
+	return (n - 1);
+}
+
+#if (LDBL_MANH_SIZE > 32 || LDBL_MANL_SIZE > 32)
+
+static int
+log2_64(uint64_t n)
+{
+
+	if (n >> 32 != 0)
+		return (log2_32((uint32_t)(n >> 32)) + 32);
+	else
+		return (log2_32((uint32_t)n));
+}
+
+#endif	/* (LDBL_MANH_SIZE > 32 || LDBL_MANL_SIZE > 32) */
 
 /*
  * Round up the given digit string.  If the digit string is fff...f,
@@ -123,24 +168,46 @@ char *
 __hdtoa(double d, const char *xdigs, int ndigits, int *decpt, int *sign,
     char **rve)
 {
-	static const int sigfigs = (DBL_MANT_DIG + 3) / 4;
 	union IEEEd2bits u;
 	char *s, *s0;
 	int bufsize;
+	int impnbit;	/* implicit normalization bit */
+	int pos;
+	int shift;	/* for subnormals, # of shifts required to normalize */
+	int sigfigs;	/* number of significant hex figures in result */
 
 	u.d = d;
 	*sign = u.bits.sign;
 
 	switch (fpclassify(d)) {
 	case FP_NORMAL:
-		*decpt = u.bits.exp - DBL_ADJ;
+		sigfigs = (DBL_MANT_DIG + 3) / 4;
+		impnbit = 1 << ((DBL_MANT_DIG - 1) % 4);
+		*decpt = u.bits.exp - DBL_BIAS + 1 -
+		    ((DBL_MANT_DIG - 1) % 4);
 		break;
 	case FP_ZERO:
 		*decpt = 1;
 		return (nrv_alloc("0", rve, 1));
 	case FP_SUBNORMAL:
-		u.d *= 0x1p514;
-		*decpt = u.bits.exp - (514 + DBL_ADJ);
+		/*
+		 * The position of the highest-order bit tells us by
+		 * how much to adjust the exponent (decpt).  The
+		 * adjustment is raised to the next nibble boundary
+		 * since we will later choose the leftmost hexadecimal
+		 * digit so that all subsequent digits align on nibble
+		 * boundaries.
+		 */
+		if (u.bits.manh != 0) {
+			pos = log2_32(u.bits.manh);
+			shift = DBL_MANH_SIZE - pos;
+		} else {
+			pos = log2_32(u.bits.manl);
+			shift = DBL_MANH_SIZE + DBL_MANL_SIZE - pos;
+		}
+		sigfigs = (3 + DBL_MANT_DIG - shift) / 4;
+		impnbit = 0;
+		*decpt = DBL_MIN_EXP - ((shift + 3) & ~(4 - 1));
 		break;
 	case FP_INFINITE:
 		*decpt = INT_MAX;
@@ -187,9 +254,11 @@ __hdtoa(double d, const char *xdigs, int ndigits, int *decpt, int *sign,
 	 * At this point, we have snarfed all the bits in the
 	 * mantissa, with the possible exception of the highest-order
 	 * (partial) nibble, which is dealt with by the next
-	 * statement.  We also tack on the implicit normalization bit.
+	 * statement.  That nibble is usually in manh, but it could be
+	 * in manl instead for small subnormals.  We also tack on the
+	 * implicit normalization bit if appropriate.
 	 */
-	*s = u.bits.manh | (1U << ((DBL_MANT_DIG - 1) % 4));
+	*s = u.bits.manh | u.bits.manl | impnbit;
 
 	/* If ndigits < 0, we are expected to auto-size the precision. */
 	if (ndigits < 0) {
@@ -214,29 +283,71 @@ __hdtoa(double d, const char *xdigs, int ndigits, int *decpt, int *sign,
 
 /*
  * This is the long double version of __hdtoa().
+ *
+ * On architectures that have an explicit integer bit, unnormals and
+ * pseudo-denormals cause problems in the conversion routine, so they
+ * are ``fixed'' by effectively toggling the integer bit.  Although
+ * this is not correct behavior, the hardware will not produce these
+ * formats externally.
  */
 char *
 __hldtoa(long double e, const char *xdigs, int ndigits, int *decpt, int *sign,
     char **rve)
 {
-	static const int sigfigs = (LDBL_MANT_DIG + 3) / 4;
 	union IEEEl2bits u;
 	char *s, *s0;
 	int bufsize;
+	int impnbit;	/* implicit normalization bit */
+	int pos;
+	int shift;	/* for subnormals, # of shifts required to normalize */
+	int sigfigs;	/* number of significant hex figures in result */
 
 	u.e = e;
 	*sign = u.bits.sign;
 
 	switch (fpclassify(e)) {
 	case FP_NORMAL:
-		*decpt = u.bits.exp - LDBL_ADJ;
+		sigfigs = (LDBL_MANT_DIG + 3) / 4;
+		impnbit = 1 << ((LDBL_MANT_DIG - 1) % 4);
+		*decpt = u.bits.exp - LDBL_BIAS + 1 -
+		    ((LDBL_MANT_DIG - 1) % 4);
 		break;
 	case FP_ZERO:
 		*decpt = 1;
 		return (nrv_alloc("0", rve, 1));
 	case FP_SUBNORMAL:
-		u.e *= 0x1p514L;
-		*decpt = u.bits.exp - (514 + LDBL_ADJ);
+		/*
+		 * The position of the highest-order bit tells us by
+		 * how much to adjust the exponent (decpt).  The
+		 * adjustment is raised to the next nibble boundary
+		 * since we will later choose the leftmost hexadecimal
+		 * digit so that all subsequent digits align on nibble
+		 * boundaries.
+		 */
+#ifdef	LDBL_IMPLICIT_NBIT
+		/* Don't trust the normalization bit to be off. */
+		u.bits.manh &= ~(~0ULL << (LDBL_MANH_SIZE - 1));
+#endif
+		if (u.bits.manh != 0) {
+#if LDBL_MANH_SIZE > 32
+			pos = log2_64(u.bits.manh);
+#else
+			pos = log2_32(u.bits.manh);
+#endif
+			shift = LDBL_MANH_SIZE - LDBL_NBIT_ADJ - pos;
+		} else {
+#if LDBL_MANL_SIZE > 32
+			pos = log2_64(u.bits.manl);
+#else
+			pos = log2_32(u.bits.manl);
+#endif
+			shift = LDBL_MANH_SIZE + LDBL_MANL_SIZE -
+			    LDBL_NBIT_ADJ - pos;
+		}
+		sigfigs = (3 + LDBL_MANT_DIG - LDBL_NBIT_ADJ - shift) / 4;
+		*decpt = LDBL_MIN_EXP + LDBL_NBIT_ADJ -
+		    ((shift + 3) & ~(4 - 1));
+		impnbit = 0;
 		break;
 	case FP_INFINITE:
 		*decpt = INT_MAX;
@@ -283,9 +394,11 @@ __hldtoa(long double e, const char *xdigs, int ndigits, int *decpt, int *sign,
 	 * At this point, we have snarfed all the bits in the
 	 * mantissa, with the possible exception of the highest-order
 	 * (partial) nibble, which is dealt with by the next
-	 * statement.  We also tack on the implicit normalization bit.
+	 * statement.  That nibble is usually in manh, but it could be
+	 * in manl instead for small subnormals.  We also tack on the
+	 * implicit normalization bit if appropriate.
 	 */
-	*s = u.bits.manh | (1U << ((LDBL_MANT_DIG - 1) % 4));
+	*s = u.bits.manh | u.bits.manl | impnbit;
 
 	/* If ndigits < 0, we are expected to auto-size the precision. */
 	if (ndigits < 0) {
diff --git a/gdtoa/FreeBSD/_hdtoa.c.patch b/gdtoa/FreeBSD/_hdtoa.c.patch
index de6da95..5b4d7cf 100644
--- a/gdtoa/FreeBSD/_hdtoa.c.patch
+++ b/gdtoa/FreeBSD/_hdtoa.c.patch
@@ -1,17 +1,86 @@
---- _hdtoa.c.orig	2006-01-31 15:21:41.000000000 -0800
-+++ _hdtoa.c	2006-01-31 23:37:12.000000000 -0800
-@@ -223,6 +223,10 @@
- 	union IEEEl2bits u;
- 	char *s, *s0;
- 	int bufsize;
+--- _hdtoa.c.orig	2004-06-03 15:22:08.000000000 -0700
++++ _hdtoa.c	2004-08-28 17:10:21.000000000 -0700
+@@ -32,6 +32,9 @@
+ #include <limits.h>
+ #include <math.h>
+ #include <stdlib.h>
++#ifdef LDBL_HEAD_TAIL_PAIR
++#include <alloca.h>
++#endif /* LDBL_HEAD_TAIL_PAIR */
+ #include "fpmath.h"
+ #include "gdtoaimp.h"
+ 
+@@ -301,16 +304,31 @@
+ 	int pos;
+ 	int shift;	/* for subnormals, # of shifts required to normalize */
+ 	int sigfigs;	/* number of significant hex figures in result */
 +#ifdef LDBL_HEAD_TAIL_PAIR
 +	uint32_t bits[4];
-+	int i, pos;
++	int i;
 +#endif /* LDBL_HEAD_TAIL_PAIR */
  
  	u.e = e;
  	*sign = u.bits.sign;
-@@ -270,6 +274,19 @@
+ 
++#ifdef LDBL_HEAD_TAIL_PAIR
++	switch (__fpclassifyd(u.d[0])) {
++#else /* LDBL_HEAD_TAIL_PAIR */
+ 	switch (fpclassify(e)) {
++#endif /* LDBL_HEAD_TAIL_PAIR */
+ 	case FP_NORMAL:
++#ifdef LDBL_HEAD_TAIL_PAIR
+ 		sigfigs = (LDBL_MANT_DIG + 3) / 4;
+ 		impnbit = 1 << ((LDBL_MANT_DIG - 1) % 4);
+ 		*decpt = u.bits.exp - LDBL_BIAS + 1 -
+ 		    ((LDBL_MANT_DIG - 1) % 4);
++#else /* LDBL_HEAD_TAIL_PAIR */
++		sigfigs = (LDBL_MANT_DIG + 3) / 4;
++		impnbit = 1 << ((LDBL_MANT_DIG - 1) % 4);
++		*decpt = u.bits.exp - LDBL_BIAS + 1 -
++		    ((LDBL_MANT_DIG - 1) % 4);
++#endif /* LDBL_HEAD_TAIL_PAIR */
+ 		break;
+ 	case FP_ZERO:
+ 		*decpt = 1;
+@@ -328,13 +346,26 @@
+ 		/* Don't trust the normalization bit to be off. */
+ 		u.bits.manh &= ~(~0ULL << (LDBL_MANH_SIZE - 1));
+ #endif
++#ifndef LDBL_HEAD_TAIL_PAIR
+ 		if (u.bits.manh != 0) {
++#endif /* LDBL_HEAD_TAIL_PAIR */
+ #if LDBL_MANH_SIZE > 32
+ 			pos = log2_64(u.bits.manh);
+ #else
+ 			pos = log2_32(u.bits.manh);
+ #endif
+ 			shift = LDBL_MANH_SIZE - LDBL_NBIT_ADJ - pos;
++#ifdef LDBL_HEAD_TAIL_PAIR
++		sigfigs = (3 + LDBL_MANT_DIG - LDBL_NBIT_ADJ - shift) / 4;
++		// we use DBL_MIN_EXP below because the head double is
++		// subnormal (and the tail double is zero)
++		*decpt = DBL_MIN_EXP + LDBL_NBIT_ADJ;
++		pos = (LDBL_MANT_DIG + 3) % 4;
++		if (pos < shift)
++			*decpt -= pos + ((shift - pos + 3) & ~(4 - 1));
++		else
++			*decpt -= shift;
++#else /* LDBL_HEAD_TAIL_PAIR */
+ 		} else {
+ #if LDBL_MANL_SIZE > 32
+ 			pos = log2_64(u.bits.manl);
+@@ -345,8 +376,9 @@
+ 			    LDBL_NBIT_ADJ - pos;
+ 		}
+ 		sigfigs = (3 + LDBL_MANT_DIG - LDBL_NBIT_ADJ - shift) / 4;
+-		*decpt = LDBL_MIN_EXP + LDBL_NBIT_ADJ -
++		*decpt = DBL_MIN_EXP + LDBL_NBIT_ADJ -
+ 		    ((shift + 3) & ~(4 - 1));
++#endif /* LDBL_HEAD_TAIL_PAIR */
+ 		impnbit = 0;
+ 		break;
+ 	case FP_INFINITE:
+@@ -381,6 +413,19 @@
  	 */
  	for (s = s0 + bufsize - 1; s > s0 + sigfigs - 1; s--)
  		*s = 0;
@@ -31,7 +100,7 @@
  	for (; s > s0 + sigfigs - (LDBL_MANL_SIZE / 4) - 1 && s > s0; s--) {
  		*s = u.bits.manl & 0xf;
  		u.bits.manl >>= 4;
-@@ -278,6 +295,7 @@
+@@ -389,6 +434,7 @@
  		*s = u.bits.manh & 0xf;
  		u.bits.manh >>= 4;
  	}
@@ -39,14 +108,14 @@
  
  	/*
  	 * At this point, we have snarfed all the bits in the
-@@ -285,7 +303,11 @@
- 	 * (partial) nibble, which is dealt with by the next
- 	 * statement.  We also tack on the implicit normalization bit.
+@@ -398,7 +444,11 @@
+ 	 * in manl instead for small subnormals.  We also tack on the
+ 	 * implicit normalization bit if appropriate.
  	 */
 +#ifdef LDBL_HEAD_TAIL_PAIR
-+	*s = bits[i];
++	*s = bits[i] | impnbit;
 +#else /* LDBL_HEAD_TAIL_PAIR */
- 	*s = u.bits.manh | (1U << ((LDBL_MANT_DIG - 1) % 4));
+ 	*s = u.bits.manh | u.bits.manl | impnbit;
 +#endif /* LDBL_HEAD_TAIL_PAIR */
  
  	/* If ndigits < 0, we are expected to auto-size the precision. */
diff --git a/gdtoa/FreeBSD/_ldtoa.c.patch b/gdtoa/FreeBSD/_ldtoa.c.patch
index c4a62ed..e7a5319 100644
--- a/gdtoa/FreeBSD/_ldtoa.c.patch
+++ b/gdtoa/FreeBSD/_ldtoa.c.patch
@@ -1,5 +1,5 @@
---- _ldtoa.c.orig	2004-12-08 22:50:28.000000000 -0800
-+++ _ldtoa.c	2004-12-08 22:52:58.000000000 -0800
+--- _ldtoa.c.orig	2004-06-03 15:17:18.000000000 -0700
++++ _ldtoa.c	2005-10-08 22:43:25.000000000 -0700
 @@ -61,14 +61,34 @@
  	char *ret;
  	union IEEEl2bits u;
@@ -35,22 +35,22 @@
  		kind = STRTOG_Normal;
  #ifdef	LDBL_IMPLICIT_NBIT
  		bits[LDBL_MANT_DIG / 32] |= 1 << ((LDBL_MANT_DIG - 1) % 32);
-@@ -77,12 +97,14 @@
+@@ -77,12 +97,12 @@
  	case FP_ZERO:
  		kind = STRTOG_Zero;
  		break;
 +#if !defined(__ppc__) && !defined(__ppc64__)
  	case FP_SUBNORMAL:
  		kind = STRTOG_Denormal;
- #ifdef	LDBL_IMPLICIT_NBIT
+-#ifdef	LDBL_IMPLICIT_NBIT
  		be++;
- #endif
+-#endif
  		break;
 +#endif /* !defined(__ppc__) && !defined(__ppc64__) */
  	case FP_INFINITE:
  		kind = STRTOG_Infinite;
  		break;
-@@ -96,5 +118,9 @@
+@@ -96,5 +116,9 @@
  	ret = gdtoa(&fpi, be, (ULong *)bits, &kind, mode, ndigits, decpt, rve);
  	if (*decpt == -32768)
  		*decpt = INT_MAX;
diff --git a/gdtoa/FreeBSD/gdtoa-strtod.c b/gdtoa/FreeBSD/gdtoa-strtod.c
index bc06bfe..66a3baa 100644
--- a/gdtoa/FreeBSD/gdtoa-strtod.c
+++ b/gdtoa/FreeBSD/gdtoa-strtod.c
@@ -30,6 +30,9 @@ THIS SOFTWARE.
  * with " at " changed at "@" and " dot " changed to ".").	*/
 
 #include "gdtoaimp.h"
+#ifndef NO_FENV_H
+#include <fenv.h>
+#endif
 
 #ifdef USE_LOCALE
 #include "locale.h"
@@ -112,7 +115,18 @@ strtod
 		switch(s[1]) {
 		  case 'x':
 		  case 'X':
-			switch((i = gethex(&s, &fpi, &exp, &bb, sign)) & STRTOG_Retmask) {
+			{
+#if defined(FE_DOWNWARD) && defined(FE_TONEAREST) && defined(FE_TOWARDZERO) && defined(FE_UPWARD)
+			FPI fpi1 = fpi;
+			switch(fegetround()) {
+			  case FE_TOWARDZERO:	fpi1.rounding = 0; break;
+			  case FE_UPWARD:	fpi1.rounding = 2; break;
+			  case FE_DOWNWARD:	fpi1.rounding = 3;
+			  }
+#else
+#define fpi1 fpi
+#endif
+			switch((i = gethex(&s, &fpi1, &exp, &bb, sign)) & STRTOG_Retmask) {
 			  case STRTOG_NoNumber:
 				s = s00;
 				sign = 0;
@@ -124,7 +138,7 @@ strtod
 					Bfree(bb);
 					}
 				ULtod(((U*)&rv)->L, bits, exp, i);
-			  }
+			  }}
 			goto ret;
 		  }
 		}
diff --git a/gdtoa/FreeBSD/gdtoa-strtod.c.patch b/gdtoa/FreeBSD/gdtoa-strtod.c.patch
index 4a87763..99db605 100644
--- a/gdtoa/FreeBSD/gdtoa-strtod.c.patch
+++ b/gdtoa/FreeBSD/gdtoa-strtod.c.patch
@@ -1,5 +1,5 @@
---- gdtoa-strtod.c.orig	2005-01-20 20:12:37.000000000 -0800
-+++ gdtoa-strtod.c	2005-02-17 01:31:26.000000000 -0800
+--- gdtoa-strtod.c.orig	2005-10-08 11:32:33.000000000 -0700
++++ gdtoa-strtod.c	2005-10-08 11:38:17.000000000 -0700
 @@ -29,6 +29,8 @@
  /* Please send bug reports to David M. Gay (dmg at acm dot org,
   * with " at " changed at "@" and " dot " changed to ".").	*/
@@ -7,9 +7,9 @@
 +#include "xlocale_private.h"
 +
  #include "gdtoaimp.h"
- 
- #ifdef USE_LOCALE
-@@ -56,11 +58,11 @@
+ #ifndef NO_FENV_H
+ #include <fenv.h>
+@@ -59,11 +61,11 @@
  #endif
  
   double
@@ -24,16 +24,16 @@
  #endif
  {
  #ifdef Avoid_Underflow
-@@ -112,7 +114,7 @@
- 		switch(s[1]) {
- 		  case 'x':
- 		  case 'X':
--			switch((i = gethex(&s, &fpi, &exp, &bb, sign)) & STRTOG_Retmask) {
-+			switch((i = gethex(&s, &fpi, &exp, &bb, sign, loc)) & STRTOG_Retmask) {
+@@ -126,7 +128,7 @@
+ #else
+ #define fpi1 fpi
+ #endif
+-			switch((i = gethex(&s, &fpi1, &exp, &bb, sign)) & STRTOG_Retmask) {
++			switch((i = gethex(&s, &fpi1, &exp, &bb, sign, loc)) & STRTOG_Retmask) {
  			  case STRTOG_NoNumber:
  				s = s00;
  				sign = 0;
-@@ -142,8 +144,9 @@
+@@ -156,8 +158,9 @@
  		else if (nd < 16)
  			z = 10*z + c - '0';
  	nd0 = nd;
@@ -44,7 +44,7 @@
  #else
  	if (c == '.')
  #endif
-@@ -966,3 +969,13 @@
+@@ -980,3 +983,13 @@
  	return sign ? -dval(rv) : dval(rv);
  	}
  
diff --git a/gdtoa/FreeBSD/gdtoa-strtodg.c b/gdtoa/FreeBSD/gdtoa-strtodg.c
index a7d25e9..cbdf4aa 100644
--- a/gdtoa/FreeBSD/gdtoa-strtodg.c
+++ b/gdtoa/FreeBSD/gdtoa-strtodg.c
@@ -649,16 +649,8 @@ strtodg
 		}
 	bb0 = 0;	/* trailing zero bits in rvb */
 	e2 = rve + rvbits - nbits;
-	if (e2 > fpi->emax) {
-		rvb->wds = 0;
-		irv = STRTOG_Infinite | STRTOG_Overflow | STRTOG_Inexhi;
-#ifndef NO_ERRNO
-		errno = ERANGE;
-#endif
- infnanexp:
-		*exp = fpi->emax + 1;
-		goto ret;
-		}
+	if (e2 > fpi->emax + 1)
+		goto huge;
 	rve1 = rve + rvbits - nbits;
 	if (e2 < (emin = fpi->emin)) {
 		denorm = 1;
@@ -985,6 +977,16 @@ strtodg
 	Bfree(bs);
 	Bfree(bd0);
 	Bfree(delta);
+	if (rve > fpi->emax) {
+ huge:
+		rvb->wds = 0;
+		irv = STRTOG_Infinite | STRTOG_Overflow | STRTOG_Inexhi;
+#ifndef NO_ERRNO
+		errno = ERANGE;
+#endif
+ infnanexp:
+		*exp = fpi->emax + 1;
+		}
  ret:
 	if (denorm) {
 		if (sudden_underflow) {
diff --git a/gdtoa/FreeBSD/gdtoa-strtodg.c.patch b/gdtoa/FreeBSD/gdtoa-strtodg.c.patch
index 977a233..d60d887 100644
--- a/gdtoa/FreeBSD/gdtoa-strtodg.c.patch
+++ b/gdtoa/FreeBSD/gdtoa-strtodg.c.patch
@@ -1,5 +1,5 @@
---- gdtoa-strtodg.c.orig	2005-01-20 20:12:37.000000000 -0800
-+++ gdtoa-strtodg.c	2005-02-17 01:32:24.000000000 -0800
+--- gdtoa-strtodg.c.orig	2005-10-08 11:33:23.000000000 -0700
++++ gdtoa-strtodg.c	2005-10-08 11:40:57.000000000 -0700
 @@ -29,6 +29,8 @@
  /* Please send bug reports to David M. Gay (dmg at acm dot org,
   * with " at " changed at "@" and " dot " changed to ".").	*/
@@ -43,7 +43,7 @@
  #else
  	if (c == '.')
  #endif
-@@ -676,6 +679,9 @@
+@@ -668,6 +671,9 @@
  					rvb->x[0] = 0;
  					*exp = emin;
  					irv = STRTOG_Underflow | STRTOG_Inexlo;
diff --git a/gdtoa/FreeBSD/gdtoa_strtopx.c.patch b/gdtoa/FreeBSD/gdtoa_strtopx.c.patch
index 2bd3ec1..f810e57 100644
--- a/gdtoa/FreeBSD/gdtoa_strtopx.c.patch
+++ b/gdtoa/FreeBSD/gdtoa_strtopx.c.patch
@@ -1,5 +1,5 @@
 --- gdtoa_strtopx.c.orig	2005-01-20 20:12:37.000000000 -0800
-+++ gdtoa_strtopx.c	2005-02-17 01:54:02.000000000 -0800
++++ gdtoa_strtopx.c	2005-10-08 17:10:15.000000000 -0700
 @@ -29,6 +29,8 @@
  /* Please send bug reports to David M. Gay (dmg at acm dot org,
   * with " at " changed at "@" and " dot " changed to ".").	*/
@@ -30,3 +30,13 @@
  	switch(k & STRTOG_Retmask) {
  	  case STRTOG_NoNumber:
  	  case STRTOG_Zero:
+@@ -87,7 +89,8 @@
+ 
+ 	  case STRTOG_Infinite:
+ 		L[_0] = 0x7fff;
+-		L[_1] = L[_2] = L[_3] = L[_4] = 0;
++		L[_1] = 0x8000;
++		L[_2] = L[_3] = L[_4] = 0;
+ 		break;
+ 
+ 	  case STRTOG_NaN:
diff --git a/gen/FreeBSD/popen.c.patch b/gen/FreeBSD/popen.c.patch
index e0ce5b8..9a2785c 100644
--- a/gen/FreeBSD/popen.c.patch
+++ b/gen/FreeBSD/popen.c.patch
@@ -1,5 +1,5 @@
---- popen.c.orig	Mon May 24 23:50:41 2004
-+++ popen.c	Tue May 25 00:09:39 2004
+--- popen.c.orig	2003-05-20 15:21:02.000000000 -0700
++++ popen.c	2005-09-17 16:08:55.000000000 -0700
 @@ -43,6 +43,7 @@
  #include "namespace.h"
  #include <sys/param.h>
@@ -49,6 +49,15 @@
  
  	if ((cur = malloc(sizeof(struct pid))) == NULL) {
  		(void)_close(pdes[0]);
+@@ -104,7 +106,7 @@
+ 	argv[3] = NULL;
+ 
+ 	THREAD_LOCK();
+-	switch (pid = vfork()) {
++	switch (pid = fork()) {
+ 	case -1:			/* Error. */
+ 		THREAD_UNLOCK();
+ 		(void)_close(pdes[0]);
 @@ -138,7 +140,7 @@
  			(void)_close(pdes[1]);
  		}
diff --git a/gen/FreeBSD/setprogname.c.patch b/gen/FreeBSD/setprogname.c.patch
index 6ca8162..19cc554 100644
--- a/gen/FreeBSD/setprogname.c.patch
+++ b/gen/FreeBSD/setprogname.c.patch
@@ -1,21 +1,37 @@
---- setprogname.c.orig	Mon Apr 28 15:05:02 2003
-+++ setprogname.c	Fri May 16 14:13:59 2003
-@@ -3,6 +3,8 @@
+--- setprogname.c.orig	2003-05-20 15:21:02.000000000 -0700
++++ setprogname.c	2005-10-26 00:58:44.000000000 -0700
+@@ -3,6 +3,10 @@
  
  #include <stdlib.h>
  #include <string.h>
++#include <sys/param.h>
++#include <sys/sysctl.h>
 +#include <crt_externs.h>
 +#define	__progname	(*_NSGetProgname())
  
  #include "libc_private.h"
  
-@@ -13,7 +15,7 @@
- 
+@@ -10,10 +14,20 @@
+ setprogname(const char *progname)
+ {
+ 	const char *p;
+-
++	char buf[2*MAXCOMLEN+1];
++	int mib[2];
++	
  	p = strrchr(progname, '/');
  	if (p != NULL)
 -		__progname = p + 1;
-+		__progname = (char *)(p + 1);
++		__progname = (char *)(++p);
  	else
 -		__progname = progname;
-+		__progname = (char *)progname;
++		__progname = (char *)(p = progname);
++
++	strlcpy(&buf[0], p, sizeof(buf));
++
++	mib[0] = CTL_KERN;
++	mib[1] = KERN_PROCNAME;
++
++	/* ignore errors as this is not a hard error */
++	sysctl(mib, 2, NULL, NULL, &buf[0], 2*MAXCOMLEN);
  }
diff --git a/gen/filesec.c b/gen/filesec.c
index 55d29d5..343839f 100644
--- a/gen/filesec.c
+++ b/gen/filesec.c
@@ -29,6 +29,11 @@
 #include <errno.h>
 #include <uuid/uuid.h>
 
+/*
+ * Versions of copy_int/copy_ext that retain native endianity.
+ */
+extern ssize_t	acl_copy_ext_native(void *buf_p, acl_t acl, ssize_t size);
+extern acl_t	acl_copy_int_native(const void *buf_p);
 
 struct _filesec {
 	int	fs_valid;
@@ -149,7 +154,7 @@ filesec_get_property(filesec_t fsec, filesec_property_t property, void *propptr)
 			if (fsec->fs_aclbuf == _FILESEC_REMOVE_ACL) {
 				*(acl_t *)propptr = _FILESEC_REMOVE_ACL;
 			} else {
-				*(acl_t *)propptr = acl_copy_int(fsec->fs_aclbuf);
+				*(acl_t *)propptr = acl_copy_int_native(fsec->fs_aclbuf);
 				if (*(acl_t *)propptr == NULL)
 					error = errno;
 			}
@@ -252,7 +257,7 @@ filesec_set_property(filesec_t fsec, filesec_property_t property, const void *pr
 				error = errno;
 				break;
 			}
-			copysize = acl_copy_ext(aclbuf, acl, aclsize);
+			copysize = acl_copy_ext_native(aclbuf, acl, aclsize);
 			if (copysize < 0) {
 				free(aclbuf);
 				error = EINVAL;
diff --git a/gen/stack_logging.c b/gen/stack_logging.c
index cd96a7c..ff59ae4 100644
--- a/gen/stack_logging.c
+++ b/gen/stack_logging.c
@@ -57,9 +57,17 @@ static inline void copy_pages(const void *source, void *dest, unsigned bytes) {
 
 /***************	Recording stack		***********/
 
-static void *first_frame_address(void) {
+// The three functions below are marked as noinline to ensure consistent inlining across
+// all versions of GCC and all compiler flags.  The malloc stack logging code expects
+// these functions to not be inlined.
+// For details, see <rdar://problem/4199620>.
+//
+// The performance cost of not inlining these functions is negligible, and they're only
+// called when MallocStackLogging is set anyway, so they won't affect normal usage.
+
+static __attribute__((noinline)) void *first_frame_address(void) {
 #if defined(__i386__)
-    return __builtin_frame_address(1);
+    return __builtin_frame_address(0);
 #elif defined(__ppc__) || defined(__ppc64__)
     void *addr;
 #warning __builtin_frame_address IS BROKEN IN BEAKER: RADAR #2340421
@@ -71,7 +79,7 @@ static void *first_frame_address(void) {
 #endif
 }
 
-static void *next_frame_address(void *addr) {
+static __attribute__((noinline)) void *next_frame_address(void *addr) {
     void *ret;
 #if defined(__MACH__) && defined(__i386__)
     __asm__ volatile("movl (%1),%0" : "=r" (ret) : "r" (addr));
@@ -100,7 +108,7 @@ static void *next_frame_address(void *addr) {
 #error  ********** Unimplemented architecture
 #endif
 
-void thread_stack_pcs(vm_address_t *buffer, unsigned max, unsigned *nb) {
+__attribute__((noinline)) void thread_stack_pcs(vm_address_t *buffer, unsigned max, unsigned *nb) {
     void *addr;
     addr = first_frame_address();
     *nb = 0;
diff --git a/i386/pthreads/pthread_set_self.s b/i386/pthreads/pthread_set_self.s
index 3e3b519..82aab3a 100644
--- a/i386/pthreads/pthread_set_self.s
+++ b/i386/pthreads/pthread_set_self.s
@@ -21,6 +21,8 @@
  * @APPLE_LICENSE_HEADER_END@
  */
 
+#include <mach/i386/syscall_sw.h>
+
 .text
 .align 2, 0x90
 .globl ___pthread_set_self
@@ -28,7 +30,7 @@ ___pthread_set_self:
 	pushl   4(%esp)
 	pushl   $0
 	movl    $3,%eax
-	lcall   $0x3b,$0
+	MACHDEP_SYSCALL_TRAP
 	addl    $8,%esp     
 	movw    %ax,%gs
 	ret
diff --git a/i386/string/Makefile.inc b/i386/string/Makefile.inc
index 4c6264d..bfccaae 100644
--- a/i386/string/Makefile.inc
+++ b/i386/string/Makefile.inc
@@ -9,4 +9,11 @@ MDSRCS += bcopy.s \
 	bzero.s \
 	memcpy.s \
 	memmove.s \
-	strcmp.s
+	strlen.s \
+	strcpy.s \
+	strcmp.s \
+	strncpy.s \
+	strncmp.s \
+	memcmp.s \
+	bcmp.s \
+	memset.s
diff --git a/i386/string/bcmp.s b/i386/string/bcmp.s
new file mode 100644
index 0000000..8f2cd80
--- /dev/null
+++ b/i386/string/bcmp.s
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+/*
+ * bcmp() is implemented in memcmp.s, as it is equivalent to memcmp() in OSX.
+ * (The two symbols, bcmp and memcmp, have the same value.)
+ * This empty file is here to prevent the Free BSD machine independent version
+ * from building.
+ */
diff --git a/i386/string/memcmp.s b/i386/string/memcmp.s
new file mode 100644
index 0000000..a69e3ea
--- /dev/null
+++ b/i386/string/memcmp.s
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+
+// ***************     ***********
+// * M E M C M P * and * B C M P *
+// ***************     ***********
+//
+// int	memcmp(const char *s1, const char *s2, size_t len);
+// int	  bcmp(const char *s1, const char *s2, size_t len);
+//
+// Bcmp returns (+,0,-), whereas memcmp returns the true difference
+// between the first differing bytes, but we treat them identically.
+//
+// We optimize the compare by doing it with SSE.  This introduces
+// a complication: if we blindly did vector loads from both sides until
+// finding a difference, we might get a spurious page fault by
+// reading bytes past the difference.  To avoid this, we never do a load
+// that crosses a page boundary.
+
+#define	kShort	18			// too short for vectors (must be >16)
+
+        .text
+        .align 	4
+
+        .globl _memcmp
+        .globl _bcmp
+
+_memcmp:				// int memcmp(const char *s1,const char *s2,size_t len);
+_bcmp:					// int   bcmp(const char *s1,const char *s2,size_t len);
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%ecx		// get length
+	movl	12(%esp),%esi		// get LHS ptr
+	movl	16(%esp),%edi		// get RHS ptr
+	cmpl	$(kShort),%ecx		// worth accelerating?
+	ja	LNotShort		// yes
+	
+
+// Too short to bother with parallel compares.  Loop over bytes.
+//	%esi = LHS ptr
+//	%edi = RHS ptr
+//	%ecx = length (<= kShort)
+
+LShort:
+	testl	%ecx,%ecx		// 0-length?
+	jnz	LShortLoop		// no
+	xorl	%eax,%eax		// return 0
+	jmp	LExit
+	.align	4,0x90			// align inner loops to optimize I-fetch
+LShortLoop:				// loop over bytes
+	movzb	(%esi),%eax		// get LHS byte
+	movzb	(%edi),%edx		// get RHS byte
+	incl	%esi
+	incl	%edi
+	subl	%edx,%eax		// compare them
+	jnz	LExit			// done if not equal
+	decl	%ecx			// decrement length
+	jnz	LShortLoop
+LExit:					// return value is in %eax
+	popl	%edi
+	popl	%esi
+	ret
+	
+LNotEqual:				// here from LLoopOverBytes with LHS in eax
+	movzb	(%edi),%edx		// get RHS byte
+	subl	%edx,%eax		// generate return value (nonzero)
+	popl	%edi
+	popl	%esi
+	ret
+
+	
+// Loop over bytes until we reach end of a page.
+//	%esi = LHS ptr
+//	%edi = RHS ptr
+//	%ecx = length remaining after end of loop (ie, already adjusted)
+//	%edx = #bytes until next page (1..15)
+
+	.align	4,0x90			// align inner loops to optimize I-fetch
+LLoopOverBytes:
+	movzb	(%esi),%eax		// get LHS byte
+	inc	%esi
+	cmpb	(%edi),%al		// compare to RHS byte
+	jnz	LNotEqual		// done if not equal
+	inc	%edi
+	dec	%edx			// more to go?
+	jnz	LLoopOverBytes
+	
+
+// Long enough to justify overhead of setting up vector compares.  In order to
+// avoid spurious page faults, we loop over:
+//
+//	min( length, bytes_in_LHS_page, bytes_in_RHS_page) >> 4
+//
+// 16-byte chunks.  When we near a page end, we have to revert to a byte-by-byte
+// comparison until reaching the next page, then resume the vector comparison.
+//	%esi = LHS ptr
+//	%edi = RHS ptr
+//	%ecx = length (> kShort)
+
+LNotShort:
+	movl	%esi,%eax		// copy ptrs
+	movl	%edi,%edx
+	andl	$4095,%eax		// mask down to page offsets
+	andl	$4095,%edx
+	cmpl	%eax,%edx		// which is bigger?
+	cmova	%edx,%eax		// %eax = max(LHS offset, RHS offset);
+	movl	$4096,%edx
+	subl	%eax,%edx		// get #bytes to next page crossing
+	cmpl	%ecx,%edx		// will operand run out first?
+	cmova	%ecx,%edx		// get min(length remaining, bytes to page end)
+	movl	%edx,%eax
+	shrl	$4,%edx			// get #chunks till end of operand or page
+	jnz	LLoopOverChunks		// enter vector loop
+	
+// Too near page end for vectors.
+
+	subl	%eax,%ecx		// adjust length remaining
+	movl	%eax,%edx		// %edx <- #bytes to page end
+	cmpl	$(kShort),%ecx		// will there be enough after we cross page for vectors?
+	ja	LLoopOverBytes		// yes
+	addl	%eax,%ecx		// no, restore total length remaining
+	jmp	LShortLoop		// compare rest byte-by-byte (%ecx != 0)
+
+
+// Loop over 16-byte chunks.
+//	%esi = LHS ptr
+//	%edi = RHS ptr
+//	%ecx = length remaining
+//	%edx = chunk count
+
+	.align	4,0x90			// align inner loops to optimize I-fetch
+LLoopOverChunks:
+	movdqu	(%esi),%xmm0		// get LHS
+	movdqu	(%edi),%xmm1		// get RHS
+	addl	$16,%esi
+	pcmpeqb	%xmm1,%xmm0		// compare LHS to RHS
+	addl	$16,%edi
+	pmovmskb %xmm0,%eax		// collect comparison result bits (1 if equal)
+	subl	$16,%ecx		// adjust length remaining
+	xorl	$0xFFFF,%eax		// all equal?
+	jne	LDifferent		// no, we found differing bytes
+	dec	%edx			// more to go?
+	jnz	LLoopOverChunks
+	
+	cmpl	$(kShort),%ecx		// a lot more to compare?
+	jbe	LShort			// no
+	jmp	LNotShort		// compute distance to next page crossing etc
+
+
+// Found a difference.  
+//	%esi = LHS ptr, already advanced by 16
+//	%edi = RHS ptr, already advanced by 16
+//	%eax = complemented compare vector (ie, 0 == equal)
+
+LDifferent:
+	bsf	%eax,%edx		// which byte differed?
+	subl	$16,%esi		// point to byte 0 while we wait for bit scan
+	subl	$16,%edi
+	movzb	(%esi,%edx),%eax	// get LHS byte
+	movzb	(%edi,%edx),%ecx	// get RHS byte
+	subl	%ecx,%eax		// compute difference (ie, return value)
+	popl	%edi
+	popl	%esi
+	ret
diff --git a/i386/string/memset.s b/i386/string/memset.s
new file mode 100644
index 0000000..0e0ae2a
--- /dev/null
+++ b/i386/string/memset.s
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License").  You may not use this file except in compliance with the
+ * License.  Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
+ *
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <machine/cpu_capabilities.h>
+ 
+
+/* This file contains the following functions:
+ *
+ *	void *memset(void *b, int c, size_t len);
+ *	void memset_pattern4(void *b, const void *c4, size_t len);
+ *	void memset_pattern8(void *b, const void *c8, size_t len);
+ *	void memset_pattern16(void *b, const void *c16, size_t len);
+ *
+ * Calls of memset() with c==0 are routed to the bzero() routine.  Most of the
+ * others go to _COMM_PAGE_MEMSET_PATTERN, which is entered as follows:
+ *	%edi = ptr to memory to set (aligned)
+ *	%edx = length (which can be short, though we bias in favor of long operands)
+ *	%xmm0 = the pattern to store
+ * Return conditions:
+ *	%eax, %edi, %esi, %ecx, and %edx all trashed
+ *
+ * NB: we avoid "stos" family of instructions (stosl, stosb), as they are very slow
+ * on P4s and probably other processors.
+ */
+ 
+ #define kShort	255			// for nonzero memset(), too short for commpage
+ 
+ 
+	.text
+	.globl	_memset
+	.align	2
+_memset:				// void *memset(void *b, int c, size_t len);
+	movl	8(%esp),%eax		// get 1-byte pattern
+	movl	12(%esp),%edx		// get length
+	andl	$0xFF,%eax		// (c==0) ?
+	jnz	LNonzero		// not a bzero
+	
+	movl	$(_COMM_PAGE_BZERO),%eax// map memset(p,0,n) into bzero(p,n)
+	movl	%edx,8(%esp)		// put count where bzero() expects it
+	jmp	%eax			// enter commpage
+
+
+	// Handle memset of a nonzero value.
+	
+LNonzero:	
+	pushl	%edi			// save a few nonvolatiles
+	pushl	%esi
+	movl	%eax,%esi		// replicate byte in %al into all four bytes
+	movl	12(%esp),%edi		// point to operand
+	shll	$8,%esi
+	orl	%esi,%eax
+	movl	%eax,%esi
+	shll	$16,%esi
+	orl	%esi,%eax		// now %eax has "c" in all 4 bytes
+	cmpl	$(kShort),%edx		// is operand too short for SSE?
+	ja	LCallCommpage		// no
+	
+// Nonzero memset() too short to call commpage.
+//	%eax = replicated 4-byte pattern
+//	%edi = ptr
+//	%edx = length (<= kShort)
+	
+	cmpl	$16,%edx		// long enough to word align?
+	jge	3f			// yes
+	test	%edx,%edx		// length==0?
+	jz	6f
+1:
+	movb	%al,(%edi)		// pack in a byte
+	inc	%edi
+	dec	%edx
+	jnz	1b
+	jmp	6f
+2:
+	movb	%al,(%edi)		// pack in a byte
+	inc	%edi
+	dec	%edx
+3:
+	test	$3,%edi			// is ptr doubleword aligned?
+	jnz	2b			// no
+	movl	%edx,%ecx		// copy length
+	shrl	$2,%edx			// #doublewords to store
+4:      
+	movl	%eax,(%edi)		// store aligned doubleword
+	addl	$4,%edi
+	dec	%edx
+	jnz	4b
+	andl	$3,%ecx			// any leftover bytes?
+	jz	6f			// no
+5:
+	movb	%al,(%edi)		// pack in a byte
+	inc	%edi
+	dec	%ecx
+	jnz	5b
+6:
+	movl	12(%esp),%eax		// get return value (ie, original ptr)
+	popl	%esi
+	popl	%edi
+	ret
+	
+// Nonzero memset() is long enough to call commpage.
+//	%eax = replicated 4-byte pattern
+//	%edi = ptr
+//	%edx = length (> kShort)
+	
+LCallCommpage:
+	movd	%eax,%xmm0		// move %eax to low 4 bytes of %xmm0
+	pshufd	$(0x00),%xmm0,%xmm0	// replicate across the vector
+	movl	%edi,%ecx		// copy dest ptr
+	negl	%ecx
+	andl	$15,%ecx		// get #bytes to align ptr
+	jz	2f			// skip if already aligned
+	subl	%ecx,%edx		// decrement length
+1:
+	movb	%al,(%edi)		// pack in a byte
+	inc	%edi
+	dec	%ecx
+	jnz	1b
+2:					// ptr aligned, length long enough to justify
+	movl	$(_COMM_PAGE_MEMSET_PATTERN),%eax
+	call	%eax			// call commpage to do the heavy lifting
+	movl	12(%esp),%eax		// get return value (ie, original ptr)
+	popl	%esi
+	popl	%edi
+	ret
+
+
+// Handle memset of a 16-byte pattern.
+	
+	.globl	_memset_pattern16
+	.align	2, 0x90
+_memset_pattern16:			// void memset_pattern16(void *b, const void *c16, size_t len);
+	pushl	%edi
+	pushl	%esi
+	movl	20(%esp),%edx		// get length
+	movl	16(%esp),%esi		// get ptr to 16-byte pattern
+	movl	12(%esp),%edi		// point to operand
+	movdqu	(%esi),%xmm0		// load the pattern
+	jmp	LAlignPtr
+
+
+// Handle memset of an 8-byte pattern.
+	
+	.globl	_memset_pattern8
+	.align	2, 0x90
+_memset_pattern8:			// void memset_pattern8(void *b, const void *c8, size_t len);
+	pushl	%edi
+	pushl	%esi
+	movl	20(%esp),%edx		// get length
+	movl	16(%esp),%esi		// get ptr to 8-byte pattern
+	movl	12(%esp),%edi		// point to operand
+	movq	(%esi),%xmm0		// load pattern into low 8 bytes
+	punpcklqdq %xmm0,%xmm0		// replicate into all 16
+	jmp	LAlignPtr
+
+// Handle memset of a 4-byte pattern.
+	
+	.globl	_memset_pattern4
+	.align	2, 0x90
+_memset_pattern4:			// void memset_pattern4(void *b, const void *c4, size_t len);
+	pushl	%edi
+	pushl	%esi
+	movl	20(%esp),%edx		// get length
+	movl	16(%esp),%esi		// get ptr to 4-byte pattern
+	movl	12(%esp),%edi		// point to operand
+	movd	(%esi),%xmm0		// load pattern into low 4 bytes
+	pshufd	$(0x00),%xmm0,%xmm0	// replicate the 4 bytes across the vector
+
+
+// Align ptr if necessary.  We must rotate the pattern right for each byte we
+// store while aligning the ptr.  Since there is no rotate instruction in SSE3,
+// we have to synthesize the rotates.
+//	%edi = ptr
+//	%edx = length
+//	%xmm0 = pattern
+	
+LAlignPtr:				// NB: can drop down to here!
+	cmpl	$100,%edx		// long enough to bother aligning ptr?
+	movl	%edi,%ecx		// copy ptr
+	jb	LReady			// not long enough
+	negl	%ecx
+	andl	$15,%ecx		// get #bytes to align ptr
+	jz	LReady			// already aligned
+	subl	%ecx,%edx		// adjust length
+	
+	test	$1,%cl			// 1-byte store required?
+	movd	%xmm0,%eax		// get 4 low bytes in %eax
+	jz	2f			// no
+	movdqa	%xmm0,%xmm1		// copy pattern so we can shift in both directions
+	movb	%al,(%edi)		// pack in the low-order byte
+	psrldq	$1,%xmm0		// shift pattern right 1 byte
+	inc	%edi
+	pslldq	$15,%xmm1		// shift pattern left 15 bytes
+	shrl	$8,%eax			// in case 2-byte store is required
+	por	%xmm1,%xmm0		// complete right rotate of pattern by 1 byte
+2:
+	test	$2,%cl			// 2-byte store required?
+	jz	4f			// no
+	psrldq	$2,%xmm0		// shift pattern down 2 bytes
+	movw	%ax,(%edi)		// pack in next two bytes
+	pinsrw	$7,%eax,%xmm0		// insert low word of %eax into high word of %xmm0
+	addl	$2,%edi			// adjust ptr
+4:
+	test	$4,%cl			// 4-byte store required?
+	jz	8f			// no
+	movd	%xmm0,(%edi)		// store low 4 bytes of %xmm0
+	pshufd	$(0x39),%xmm0,%xmm0	// rotate %xmm0 right 4 bytes (mask == 00 11 10 01)
+	addl	$4,%edi			// adjust ptr
+8:
+	test	$8,%cl			// 8-byte store required?
+	jz	LReady			// no
+	movq	%xmm0,(%edi)		// store low 8 bytes of %xmm0
+	pshufd	$(0x4e),%xmm0,%xmm0	// rotate %xmm0 right 8 bytes (mask == 01 00 11 10)
+	addl	$8,%edi			// adjust ptr
+	
+// Ptr is aligned if practical, we're ready to call commpage to do the heavy lifting.
+
+LReady:
+	movl	$(_COMM_PAGE_MEMSET_PATTERN),%eax
+	call	%eax			// call commpage to do the heavy lifting
+	popl	%esi
+	popl	%edi
+	ret
diff --git a/i386/string/strcmp.s b/i386/string/strcmp.s
index a21cea6..fb1047f 100644
--- a/i386/string/strcmp.s
+++ b/i386/string/strcmp.s
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -20,72 +20,126 @@
  * 
  * @APPLE_LICENSE_HEADER_END@
  */
-.text
-.globl _strcmp
-_strcmp:
-        movl    0x04(%esp),%eax
-        movl    0x08(%esp),%edx
-        jmp     L2                      /* Jump into the loop! */
-
-        .align  2,0x90
-L1:     incl    %eax
-        incl    %edx
-L2:     movb    (%eax),%cl
-        testb   %cl,%cl                 /* null terminator??? */
-        jz      L3
-        cmpb    %cl,(%edx)              /* chars match??? */
-        jne     L3
-        incl    %eax
-        incl    %edx
-        movb    (%eax),%cl
-        testb   %cl,%cl
-        jz      L3
-        cmpb    %cl,(%edx)
-        jne     L3
-        incl    %eax
-        incl    %edx
-        movb    (%eax),%cl
-        testb   %cl,%cl
-        jz      L3
-        cmpb    %cl,(%edx)
-        jne     L3
-        incl    %eax
-        incl    %edx
-        movb    (%eax),%cl
-        testb   %cl,%cl
-        jz      L3
-        cmpb    %cl,(%edx)
-        jne     L3
-        incl    %eax
-        incl    %edx
-        movb    (%eax),%cl
-        testb   %cl,%cl
-        jz      L3
-        cmpb    %cl,(%edx)
-        jne     L3
-        incl    %eax
-        incl    %edx
-        movb    (%eax),%cl
-        testb   %cl,%cl
-        jz      L3
-        cmpb    %cl,(%edx)
-        jne     L3
-        incl    %eax
-        incl    %edx
-        movb    (%eax),%cl
-        testb   %cl,%cl
-        jz      L3
-        cmpb    %cl,(%edx)
-        jne     L3
-        incl    %eax
-        incl    %edx
-        movb    (%eax),%cl
-        testb   %cl,%cl
-        jz      L3
-        cmpb    %cl,(%edx)
-        je      L1
-        .align 2, 0x90
-L3:     movzbl  (%eax),%eax             /* unsigned comparison */
-        movzbl  (%edx),%edx
-        subl    %edx,%eax
-        ret
+
+
+// ***************
+// * S T R C M P *
+// ***************
+//
+// int	strcmp(const char *s1, const char *s2);
+//
+// We optimize the compare by doing it in parallel, using SSE.  This introduces
+// a complication: if we blindly did vector loads from both sides until
+// finding a difference (or 0), we might get a spurious page fault by
+// reading bytes past the difference.  To avoid this, we never do a load
+// that crosses a page boundary.
+
+        .text
+        .globl _strcmp
+
+        .align 	4
+_strcmp:				// int strcmp(const char *s1,const char *s2);
+	pushl	%esi
+	pushl	%edi
+	movl	12(%esp),%esi		// get LHS ptr
+	movl	16(%esp),%edi		// get RHS ptr
+	
+
+// In order to avoid spurious page faults, we loop over:
+//
+//	min( bytes_in_LHS_page, bytes_in_RHS_page) >> 4
+//
+// 16-byte chunks.  When we near a page end, we have to revert to a byte-by-byte
+// comparison until reaching the next page, then resume the vector comparison.
+//	%esi = LHS ptr
+//	%edi = RHS ptr
+
+LNextChunk:
+	movl	%esi,%eax		// copy ptrs
+	movl	%edi,%edx
+	andl	$4095,%eax		// mask down to page offsets
+	andl	$4095,%edx
+	cmpl	%eax,%edx		// which is bigger?
+	cmova	%edx,%eax		// %eax = max(LHS offset, RHS offset);
+	movl	$4096,%edx
+	subl	%eax,%edx		// get #bytes to next page crossing
+	movl	%edx,%eax
+	shrl	$4,%edx			// get #chunks till end of operand or page
+	jnz	LLoopOverChunks		// enter vector loop
+	movl	%eax,%edx		// no chunks...
+	jmp	LLoopOverBytes		// ...so loop over bytes until page end
+
+
+// Loop over bytes.
+//	%esi = LHS ptr
+//	%edi = RHS ptr
+//	%edx = byte count
+
+	.align	4,0x90			// align inner loops to optimize I-fetch
+LLoopOverBytes:
+	movzb	(%esi),%eax		// get LHS byte
+	movzb	(%edi),%ecx		// get RHS byte
+	inc	%esi
+	inc	%edi
+	testl	%eax,%eax		// 0?
+	jz	LExit0			// yes, we're done
+	subl	%ecx,%eax		// compare them
+	jnz	LExit			// done if not equal
+	dec	%edx			// more to go?
+	jnz	LLoopOverBytes
+	
+	jmp	LNextChunk		// we've come to end of page
+
+
+// Loop over 16-byte chunks.
+//	%esi = LHS ptr
+//	%edi = RHS ptr
+//	%edx = chunk count
+
+	.align	4,0x90			// align inner loops to optimize I-fetch
+LLoopOverChunks:
+	movdqu	(%esi),%xmm1		// get LHS
+	movdqu	(%edi),%xmm2		// get RHS
+	pxor	%xmm0,%xmm0		// get some 0s in the shadow of the loads
+	addl	$16,%esi
+	pcmpeqb	%xmm1,%xmm2		// compare LHS to RHS
+	pcmpeqb	%xmm1,%xmm0		// compare LHS to 0s
+	addl	$16,%edi
+	pmovmskb %xmm2,%eax		// get result mask for comparison of LHS and RHS
+	pmovmskb %xmm0,%ecx		// get result mask for 0 check
+	xorl	$0xFFFF,%eax		// complement compare mask so 1 means "not equal"
+	orl	%ecx,%eax		// combine the masks and check for 1-bits
+	jnz	LFoundDiffOr0		// we found differing bytes or a 0-byte
+	dec	%edx			// more to go?
+	jnz	LLoopOverChunks
+	
+	jmp	LNextChunk		// compare up to next page boundary
+	
+
+// Found a zero and/or a difference in vector compare.
+//	%esi = LHS ptr, already advanced by 16
+//	%edi = RHS ptr, already advanced by 16
+//	%eax = bit n set if bytes n differed or were 0
+
+LFoundDiffOr0:
+	bsf	%eax,%edx		// which byte differed or was 0?
+	subl	$16,%esi		// point to start of vectors while we wait for bit scan
+	subl	$16,%edi
+	movzb	(%esi,%edx),%eax	// get LHS byte
+	movzb	(%edi,%edx),%ecx	// get RHS byte
+	subl	%ecx,%eax		// compute difference (ie, return value)
+	popl	%edi
+	popl	%esi
+	ret
+
+
+// Found a zero and/or difference in byte loop.
+//	%eax = LHS byte
+//	%ecx = RHS byte
+
+LExit0:
+	subl	%ecx,%eax		// compute difference (ie, return value)
+LExit:					// here with difference already in %eax
+	popl	%edi
+	popl	%esi
+	ret
diff --git a/i386/string/strcpy.s b/i386/string/strcpy.s
new file mode 100644
index 0000000..771eec4
--- /dev/null
+++ b/i386/string/strcpy.s
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+
+// ***************
+// * S T R C P Y *
+// ***************
+//
+// char  *strcpy(const char *dst, const char *src);
+//
+// We optimize the move by doing it vector parallel.  This introduces
+// a complication: if we blindly did vector load/stores until finding
+// a 0, we might get a spurious page fault by touching bytes past it.
+// To avoid this, we never do a load that crosses a page boundary,
+// and never store a byte we don't have to.
+//
+// We align the destination, because unaligned vector stores are slow.
+
+        .text
+        .globl _strcpy
+
+        .align 	4
+_strcpy:				// char *strcpy(const char *dst, const char *src);
+	pushl	%edi
+	movl	8(%esp),%edi		// get dest ptr
+	movl	12(%esp),%ecx		// get source ptr
+	movl	%edi,%edx		// copy dest ptr
+	negl	%edx
+	andl	$15,%edx		// how many bytes to align dest ptr?
+	jnz	LLoopOverBytes		// not aligned, so go do so
+	
+	
+// In order to avoid spurious page faults, we loop until nearing the source page
+// end.  Then we revert to a byte-by-byte loop for 16 bytes until the page is crossed,
+// then resume the vector loop. 
+//	%ecx = source ptr (unaligned)
+//	%edi = dest ptr (aligned)
+
+LNextChunk:
+	movl	%ecx,%eax		// copy source ptr
+	movl	$4096,%edx
+	andl	$4095,%eax		// get offset into source page
+	subl	%eax,%edx		// get #bytes remaining in source page
+	shrl	$4,%edx			// get #chunks till end of page
+	jnz	LLoopOverChunks		// enter vector loop
+	movl	$16,%edx		// move 16 bytes to cross page but keep dest aligned
+	jmp	LLoopOverBytes
+
+
+// Loop over bytes.
+//	%ecx = source ptr
+//	%edi = dest ptr
+//	%edx = byte count
+
+	.align	4,0x90			// align inner loops to optimize I-fetch
+LLoopOverBytes:
+	movzb	(%ecx),%eax		// get source byte
+	inc	%ecx
+	movb	%al,(%edi)		// pack into dest
+	inc	%edi
+	testl	%eax,%eax		// 0?
+	jz	LDone			// yes, we're done
+	dec	%edx			// more to go?
+	jnz	LLoopOverBytes
+	
+	jmp	LNextChunk		// we've come to end of page
+
+
+// Loop over 16-byte chunks.
+//	%ecx = source ptr (unaligned)
+//	%edi = dest ptr (aligned)
+//	%edx = chunk count
+
+	.align	4,0x90			// align inner loops to optimize I-fetch
+LLoopOverChunks:
+	movdqu	(%ecx),%xmm1		// get source
+	pxor	%xmm0,%xmm0		// get some 0s
+	addl	$16,%ecx
+	pcmpeqb	%xmm1,%xmm0		// compare source to 0s
+	pmovmskb %xmm0,%eax		// get result mask for 0 check
+	testl	%eax,%eax		// any 0s?
+	jnz	LFound0			// yes, exit loop
+	movdqa	%xmm1,(%edi)		// no 0s so do aligned store into destination
+	addl	$16,%edi
+	dec	%edx			// more to go?
+	jnz	LLoopOverChunks
+	
+	movl	$16,%edx		// move 16 bytes
+	jmp	LLoopOverBytes		// cross page but keep dest aligned
+	
+
+// Found a zero in the vector.  Figure out where it is, and store the bytes
+// up to it.
+//	%edi = dest ptr (aligned)
+//	%eax = result mask
+//	%xmm1 = source vector
+
+LFound0:
+	bsf	%eax,%edx		// find first 0
+	inc	%edx			// we need to store the 0 too
+	test	$16,%dl			// was 0 last byte?
+	jz	8f			// no
+	movdqa	%xmm1,(%edi)		// yes, store entire vector
+	jmp	LDone
+8:	
+	test	$8,%dl			// 8-byte store required?
+	jz	4f			// no
+	movq	%xmm1,(%edi)		// pack in 8 low bytes
+	psrldq	$8,%xmm1		// then shift vector down 8 bytes
+	addl	$8,%edi
+4:
+	test	$4,%dl			// 4-byte store required?
+	jz	3f			// no
+	movd	%xmm1,(%edi)		// pack in 4 low bytes
+	psrldq	$4,%xmm1		// then shift vector down 4 bytes
+	addl	$4,%edi
+3:
+	andl	$3,%edx			// more to go?
+	jz	LDone			// no
+	movd	%xmm1,%eax		// move remainders out of vector into %eax
+1:					// loop on up to three bytes
+	movb	%al,(%edi)		// pack in next byte
+	shrl	$8,%eax			// shift next byte into position
+	inc	%edi
+	dec	%edx
+	jnz	1b
+	
+LDone:
+	movl	8(%esp),%eax		// original dest ptr is return value
+	popl	%edi
+	ret
diff --git a/i386/string/strlen.s b/i386/string/strlen.s
new file mode 100644
index 0000000..b21bee5
--- /dev/null
+++ b/i386/string/strlen.s
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License").  You may not use this file except in compliance with the
+ * License.  Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
+ *
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+/*
+ * Strlen, for processors with SSE3.
+ *
+ * Note that all memory references must be aligned, in order to avoid spurious
+ * page faults.  Thus we have to load the aligned 16-byte chunk containing the
+ * first byte of the operand, then mask out false 0s that may occur before the
+ * first byte.
+ *
+ * We favor the fall-through (ie, short operand) path.
+ */
+
+        .text
+        .globl  _strlen
+        .align  4, 0x90
+_strlen:				// size_t strlen(char *b);
+	movl	4(%esp),%edx		// get ptr to string
+	pxor	%xmm0,%xmm0		// zero %xmm0
+	movl	%edx,%ecx		// copy ptr
+	andl	$(-16),%edx		// 16-byte align ptr
+	orl	$(-1),%eax
+	pcmpeqb	(%edx),%xmm0		// check whole qw for 0s
+	andl	$15,%ecx		// get #bytes in aligned dq before operand
+	shl	%cl,%eax		// create mask for the bytes of aligned dq in operand
+	pmovmskb %xmm0,%ecx		// collect mask of 0-bytes
+	andl	%eax,%ecx		// mask out any 0s that occur before 1st byte
+	jz	LEnterLoop		// no 0-bytes (ie, 1-bits), so enter by-16 loop
+	
+// We've found a 0-byte.
+//	%edx = aligned address of 16-byte block containing the terminating 0-byte
+//	%ecx = compare bit vector
+
+LFoundIt:
+	bsf	%ecx,%eax		// find first 1-bit (ie, first 0-byte)
+	movl	4(%esp),%ecx		// recover ptr to 1st byte in string
+	addl	%edx,%eax		// get address of the 0-byte
+	subl	%ecx,%eax		// subtract address of 1st byte to get string length
+	ret
+	
+// Loop over aligned 16-byte blocks:
+//	%edx = address of previous block
+
+LEnterLoop:
+	pxor	%xmm0,%xmm0		// get some 0-bytes
+	addl	$16,%edx		// advance ptr
+LLoop:
+	movdqa	(%edx),%xmm1		// get next chunk
+	addl	$16,%edx
+	pcmpeqb	%xmm0,%xmm1		// check for 0s
+	pmovmskb %xmm1,%ecx		// collect mask of 0-bytes
+	test	%ecx,%ecx		// any 0-bytes?
+	jz	LLoop			// no 0-bytes, so get next dq
+
+	subl	$16,%edx		// back up ptr
+	jmp	LFoundIt
+	
+	
\ No newline at end of file
diff --git a/i386/string/strncmp.s b/i386/string/strncmp.s
new file mode 100644
index 0000000..a833be7
--- /dev/null
+++ b/i386/string/strncmp.s
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+
+// *****************
+// * S T R N C M P *
+// *****************
+//
+// int	strncmp(const char *s1, const char *s2, size_t len);
+//
+// We optimize the compare by doing it vector parallel.  This introduces
+// a complication: if we blindly did vector loads from both sides until
+// finding a difference (or 0), we might get a spurious page fault by
+// reading bytes past the difference.  To avoid this, we never do a load
+// that crosses a page boundary.
+
+#define	kShort	20			// too short for vectors (must be >16)
+
+        .text
+        .globl _strncmp
+
+        .align 	4
+_strncmp:				// int strncmp(const char *s1, const char *s2, size_t len);
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%ecx		// get length
+	movl	12(%esp),%esi		// get LHS ptr
+	movl	16(%esp),%edi		// get RHS ptr
+	push	%ebx
+	cmpl	$(kShort),%ecx		// worth accelerating?
+	ja	LNotShort		// yes
+	
+
+// Too short to bother with parallel compares.  Loop over bytes.
+//	%esi = LHS ptr
+//	%edi = RHS ptr
+//	%ecx = length (<= kShort)
+
+LShort:
+	testl	%ecx,%ecx		// 0-length?
+	jnz	LShortLoop		// no
+	jmp	LReturn0		// yes, return 0
+	.align	4,0x90			// align inner loops to optimize I-fetch
+LShortLoop:				// loop over bytes
+	movzb	(%esi),%eax		// get LHS byte
+	movzb	(%edi),%ebx		// get RHS byte
+	incl	%esi
+	incl	%edi
+	testl	%eax,%eax		// LHS==0 ?
+	jz	LNotEqual		// yes, this terminates comparison
+	subl	%ebx,%eax		// compare them
+	jnz	LExit			// done if not equal
+	decl	%ecx			// decrement length
+	jnz	LShortLoop
+LReturn0:
+	xorl	%eax,%eax		// all bytes equal, so return 0
+LExit:					// return value is in %eax
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+	ret
+	
+LNotEqual:				// LHS in eax, RHS in ebx
+	subl	%ebx,%eax		// generate return value (nonzero)
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+	ret
+
+	
+// Loop over bytes until we reach end of a page.
+//	%esi = LHS ptr
+//	%edi = RHS ptr
+//	%ecx = length remaining after end of loop (ie, already adjusted)
+//	%edx = #bytes until next page (1..15)
+
+	.align	4,0x90			// align inner loops to optimize I-fetch
+LLoopOverBytes:
+	movzb	(%esi),%eax		// get LHS byte
+	movzb	(%edi),%ebx		// get RHS byte
+	inc	%esi
+	inc	%edi
+	testl	%eax,%eax		// LHS==0 ?
+	jz	LNotEqual		// yes, this terminates comparison
+	subl	%ebx,%eax		// compare them
+	jnz	LExit			// done if not equal
+	dec	%edx			// more to go?
+	jnz	LLoopOverBytes
+	
+
+// Long enough to justify overhead of setting up vector compares.  In order to
+// avoid spurious page faults, we loop over:
+//
+//	min( length, bytes_in_LHS_page, bytes_in_RHS_page) >> 4
+//
+// 16-byte chunks.  When we near a page end, we have to revert to a byte-by-byte
+// comparison until reaching the next page, then resume the vector comparison.
+//	%esi = LHS ptr
+//	%edi = RHS ptr
+//	%ecx = length (> kShort)
+
+LNotShort:
+	movl	%esi,%eax		// copy ptrs
+	movl	%edi,%edx
+	andl	$4095,%eax		// mask down to page offsets
+	andl	$4095,%edx
+	cmpl	%eax,%edx		// which is bigger?
+	cmova	%edx,%eax		// %eax = max(LHS offset, RHS offset);
+	movl	$4096,%edx
+	subl	%eax,%edx		// get #bytes to next page crossing
+	cmpl	%ecx,%edx		// will operand run out first?
+	cmova	%ecx,%edx		// get min(length remaining, bytes to page end)
+	movl	%edx,%eax
+	shrl	$4,%edx			// get #chunks till end of operand or page
+	jnz	LLoopOverChunks		// enter vector loop
+	
+// Too near page end for vectors.
+
+	subl	%eax,%ecx		// adjust length remaining
+	movl	%eax,%edx		// %edx <- #bytes to page end
+	cmpl	$(kShort),%ecx		// will there be enough after we cross page for vectors?
+	ja	LLoopOverBytes		// yes
+	addl	%eax,%ecx		// no, restore total length remaining
+	jmp	LShortLoop		// compare rest byte-by-byte (%ecx != 0)
+
+
+// Loop over 16-byte chunks.
+//	%esi = LHS ptr
+//	%edi = RHS ptr
+//	%ecx = length remaining
+//	%edx = chunk count
+
+	.align	4,0x90			// align inner loops to optimize I-fetch
+LLoopOverChunks:
+	movdqu	(%esi),%xmm1		// get LHS
+	movdqu	(%edi),%xmm2		// get RHS
+	pxor	%xmm0,%xmm0		// get some 0s in the shadow of the loads
+	addl	$16,%esi
+	pcmpeqb	%xmm1,%xmm2		// compare LHS to RHS
+	pcmpeqb	%xmm1,%xmm0		// compare LHS to 0s
+	addl	$16,%edi
+	pmovmskb %xmm2,%eax		// get result mask for comparison of LHS and RHS
+	pmovmskb %xmm0,%ebx		// get result mask for 0 check
+	subl	$16,%ecx		// decrement length remaining
+	xorl	$0xFFFF,%eax		// complement compare mask so 1 means "not equal"
+	orl	%ebx,%eax		// combine the masks and check for 1-bits
+	jnz	LFoundDiffOr0		// we found differing bytes or a 0-byte
+	dec	%edx			// more to go?
+	jnz	LLoopOverChunks		// yes
+	
+	cmpl	$(kShort),%ecx		// a lot more to compare?
+	jbe	LShort			// no
+	jmp	LNotShort		// compute distance to next page crossing etc
+
+
+// Found a zero and/or a difference in vector compare.
+//	%esi = LHS ptr, already advanced by 16
+//	%edi = RHS ptr, already advanced by 16
+//	%eax = bit n set if bytes n differed or were 0
+
+LFoundDiffOr0:
+	bsf	%eax,%edx		// which byte differed or was 0?
+	subl	$16,%esi		// point to start of vectors while we wait for bit scan
+	subl	$16,%edi
+	movzb	(%esi,%edx),%eax	// get LHS byte
+	movzb	(%edi,%edx),%ecx	// get RHS byte
+	popl	%ebx
+	popl	%edi
+	subl	%ecx,%eax		// compute difference (ie, return value)
+	popl	%esi
+	ret
diff --git a/i386/string/strncpy.s b/i386/string/strncpy.s
new file mode 100644
index 0000000..dced178
--- /dev/null
+++ b/i386/string/strncpy.s
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <machine/cpu_capabilities.h>
+
+
+// *****************
+// * S T R N C P Y *
+// *****************
+//
+// char  *strncpy(const char *dst, const char *src, size_t n);
+//
+// We optimize the move by doing it vector parallel.  This introduces
+// a complication: if we blindly did vector load/stores until finding
+// a 0, we might get a spurious page fault by touching bytes past it.
+// To avoid this, we never do a load that crosses a page boundary,
+// and never store a byte we don't have to.
+//
+// We align the destination, because unaligned vector stores are slow.
+//
+// Recall that strncpy() zero fills the remainder of the dest buffer,
+// and does not terminate the string if it's length is greater than or
+// equal to n.
+
+#define	kShort	31			// too short to bother with vector loop
+
+        .text
+        .globl _strncpy
+
+        .align 	4
+_strncpy:				// char  *strncpy(const char *dst, const char *src, size_t n);
+	pushl	%edi
+	pushl	%esi
+	movl	12(%esp),%edi		// get dest ptr
+	movl	16(%esp),%esi		// get source ptr
+	movl	20(%esp),%ecx		// get length
+	movl	%edi,%edx		// copy dest ptr
+	negl	%edx
+	andl	$15,%edx		// how many bytes to align dest ptr?
+	jnz	LCheckShortCopy		// align destination first
+	
+	
+// In order to avoid spurious page faults, we loop until nearing the source page
+// end.  Then we revert to a byte-by-byte loop for 16 bytes until the page is crossed,
+// then resume the vector loop. 
+//	%esi = source ptr (unaligned)
+//	%edi = dest ptr (aligned)
+//	%ecx = buffer length remaining
+
+LNextChunk:				// NB: can drop down to here
+	movl	%esi,%eax		// copy source ptr
+	movl	$4096,%edx
+	andl	$4095,%eax		// get offset into source page
+	subl	%eax,%edx		// get #bytes remaining in source page
+	cmpl	%ecx,%edx		// will buffer run out before the page end?
+	cmova	%ecx,%edx		// get min(length remaining, bytes to page end)
+	shrl	$4,%edx			// get #chunks till end of page
+	jnz	LLoopOverChunks		// enter vector loop
+	
+// We can't use the chunk loop yet.  Check for short and empty buffers, then use byte loop.
+
+LCrossPage:				// if buffer is large enough, cross source page
+	movl	$16,%edx		// move 16 bytes to cross page but keep dest aligned
+LCheckShortCopy:			// we propose to copy %edx bytes in byte loop
+	cmpl	$(kShort),%ecx		// much left?
+	ja	LLoopOverBytes		// yes, loop over bytes then more chunks
+	movl	%ecx,%edx		// no, use the byte loop for everything
+	testl	%ecx,%ecx		// have we filled buffer?
+	jnz	LLoopOverBytes		// no
+	jmp	LDone
+
+
+// Loop over bytes.
+//	%esi = source ptr
+//	%edi = dest ptr
+//	%ecx = buffer length remaining
+//	%edx = count of bytes to loop over (<= buffer length)
+
+	.align	4,0x90			// align inner loops to optimize I-fetch
+LLoopOverBytes:
+	movzb	(%esi),%eax		// get source byte
+	inc	%esi
+	dec	%ecx			// decrement length
+	movb	%al,(%edi)		// pack into dest
+	inc	%edi
+	testl	%eax,%eax		// 0?
+	jz	LZeroBuffer		// yes, we're done copying string
+	dec	%edx			// more to go?
+	jnz	LLoopOverBytes
+	
+	testl	%ecx,%ecx		// at end of buffer?
+	jnz	LNextChunk		// no, xfer chunks
+	jmp	LDone			// yes
+
+
+// Loop over 16-byte chunks.
+//	%esi = source ptr (unaligned)
+//	%edi = dest ptr (aligned)
+//	%ecx = buffer length remaining
+//	%edx = chunk count
+
+	.align	4,0x90			// align inner loops to optimize I-fetch
+LLoopOverChunks:
+	movdqu	(%esi),%xmm1		// get source
+	pxor	%xmm0,%xmm0		// get some 0s
+	addl	$16,%esi
+	pcmpeqb	%xmm1,%xmm0		// compare source to 0s
+	pmovmskb %xmm0,%eax		// get result mask for 0 check
+	testl	%eax,%eax		// any 0s?
+	jnz	LFound0			// yes, exit loop
+	movdqa	%xmm1,(%edi)		// no 0s so do aligned store into destination
+	addl	$16,%edi
+	subl	$16,%ecx		// decrement length remaining
+	dec	%edx			// more to go?
+	jnz	LLoopOverChunks
+	
+	jmp	LCrossPage		// cross page but keep dest aligned
+	
+
+// Found a zero in the vector.  Figure out where it is, and store the bytes
+// up to it.  It is possible that we should check to be sure (%ecx >= 16), and
+// just do an aligned store of %xmm1 if so.  But if we did, we'd be doing byte
+// stores into the same double quadword in bzero(), which might hit a hazard.
+// Experimentation needed.
+//	%edi = dest ptr (aligned)
+//	%eax = result mask
+//	%ecx = buffer length remaining
+//	%xmm1 = source vector
+
+LFound0:
+	bsf	%eax,%edx		// find first 0
+	subl	%edx,%ecx		// decrement remaining buffer length
+	test	$8,%dl			// 8-byte store required?
+	jz	4f			// no
+	movq	%xmm1,(%edi)		// pack in 8 low bytes
+	psrldq	$8,%xmm1		// then shift vector down 8 bytes
+	addl	$8,%edi
+4:
+	test	$4,%dl			// 4-byte store required?
+	jz	3f			// no
+	movd	%xmm1,(%edi)		// pack in 4 low bytes
+	psrldq	$4,%xmm1		// then shift vector down 4 bytes
+	addl	$4,%edi
+3:
+	andl	$3,%edx			// more to go?
+	jz	LZeroBuffer		// no
+	movd	%xmm1,%eax		// move remainders out of vector into %eax
+1:					// loop on up to three bytes
+	movb	%al,(%edi)		// pack in next byte
+	shrl	$8,%eax			// shift next byte into position
+	inc	%edi
+	dec	%edx
+	jnz	1b
+
+// We've copied the string.  Now zero the rest of the buffer, using commpage bzero().
+//	%edi = dest ptr
+//	%ecx = buffer length remaining
+
+LZeroBuffer:
+	pushl	%ecx			// remaining buffer size
+	pushl	%edi			// ptr to 1st unstored byte
+	movl	$(_COMM_PAGE_BZERO),%eax
+	call	%eax
+	addl	$8,%esp			// pop off the arguments
+
+LDone:
+	movl	12(%esp),%eax		// original dest ptr is return value
+	popl	%esi
+	popl	%edi
+	ret
diff --git a/i386/sys/Makefile.inc b/i386/sys/Makefile.inc
index 9535aa1..eee1da7 100644
--- a/i386/sys/Makefile.inc
+++ b/i386/sys/Makefile.inc
@@ -13,6 +13,7 @@ MDSRCS+=  ATPgetreq.s \
 	__pthread_canceled.s \
 	__pthread_markcancel.s \
 	__semwait_signal.s \
+	__sysenter_trap.s \
 	_setjmp.s \
 	_setlogin.s \
 	_sysctl.s \
@@ -38,6 +39,7 @@ MDSRCS+=  ATPgetreq.s \
 	chflags.s \
 	chmod.s \
 	chown.s \
+	commpage.c \
 	chroot.s \
 	close.s \
 	connect.s \
@@ -92,6 +94,9 @@ MDSRCS+=  ATPgetreq.s \
 	getsockopt.s \
 	getuid.s \
 	getxattr.s  \
+	i386_gettimeofday.s \
+	i386_get_ldt.s \
+	i386_set_ldt.s \
 	ioctl.s \
 	issetugid.s \
 	kevent.s \
diff --git a/i386/sys/OSAtomic.s b/i386/sys/OSAtomic.s
index f43b40b..c6dd151 100644
--- a/i386/sys/OSAtomic.s
+++ b/i386/sys/OSAtomic.s
@@ -117,6 +117,11 @@ DECLARE(_OSAtomicAdd64)
 DECLARE(_OSAtomicTestAndSet)
 	movl	4(%esp), %eax
 	movl	8(%esp), %edx
+	movl	%eax, %ecx
+	andl	$-8, %ecx
+	notl	%eax
+	andl	$7, %eax
+	orl	%ecx, %eax
 	call	*_COMM_PAGE_BTS
 	setc	%al
 	ret
@@ -124,6 +129,11 @@ DECLARE(_OSAtomicTestAndSet)
 DECLARE(_OSAtomicTestAndClear)
 	movl	4(%esp), %eax
 	movl	8(%esp), %edx
+	movl	%eax, %ecx
+	andl	$-8, %ecx
+	notl	%eax
+	andl	$7, %eax
+	orl	%ecx, %eax
 	call	*_COMM_PAGE_BTC
 	setc	%al
 	ret
diff --git a/i386/sys/SYS.h b/i386/sys/SYS.h
index 83ac264..12de2b4 100644
--- a/i386/sys/SYS.h
+++ b/i386/sys/SYS.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1999-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -40,10 +40,13 @@
 #include <architecture/i386/asm_help.h>
 #include <mach/i386/syscall_sw.h>
 
+/*
+ * We have two entry points. int's is used for syscalls which need to preserve
+ * %ecx across the call, or return a 64-bit value in %eax:%edx. sysenter is used
+ * for the majority of syscalls which just return a value in %eax.
+ */
 
-#define UNIX_SYSCALL_TRAP	lcall	$0x2b, $0
-#define MACHDEP_SYSCALL_TRAP	lcall	$0x7, $0
-
+#define UNIX_SYSCALL_SYSENTER	SYSENTER_PAD call __sysenter_trap
 
 /*
  * This is the same as UNIX_SYSCALL, but it can call an alternate error
@@ -53,13 +56,22 @@
 	.globl	error_ret				;\
 LEAF(_##name, 0)					;\
 	movl	$ SYS_##name, %eax			;\
-	UNIX_SYSCALL_TRAP				;\
+	UNIX_SYSCALL_SYSENTER				;\
 	jnb	2f					;\
 	BRANCH_EXTERN(error_ret)  			;\
 2:
 
 #define UNIX_SYSCALL(name, nargs)			\
 	.globl	cerror					;\
+LEAF(_##name, 0)					;\
+	movl	$ SYS_##name, %eax			;\
+	UNIX_SYSCALL_SYSENTER				;\
+	jnb	2f					;\
+	BRANCH_EXTERN(cerror)  				;\
+2:
+
+#define UNIX_SYSCALL_INT(name, nargs)			\
+	.globl	cerror					;\
 LEAF(_##name, 0)					;\
 	movl	$ SYS_##name, %eax			;\
 	UNIX_SYSCALL_TRAP				;\
@@ -68,6 +80,14 @@ LEAF(_##name, 0)					;\
 2:
 
 #define UNIX_SYSCALL_NONAME(name, nargs)		\
+	.globl	cerror					;\
+	movl	$ SYS_##name, %eax			;\
+	UNIX_SYSCALL_SYSENTER				;\
+	jnb	2f					;\
+	BRANCH_EXTERN(cerror)  				;\
+2:
+
+#define UNIX_SYSCALL_INT_NONAME(name, nargs)		\
 	.globl	cerror					;\
 	movl	$ SYS_##name, %eax			;\
 	UNIX_SYSCALL_TRAP				;\
diff --git a/i386/sys/__sysenter_trap.s b/i386/sys/__sysenter_trap.s
new file mode 100644
index 0000000..53c854e
--- /dev/null
+++ b/i386/sys/__sysenter_trap.s
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+.text
+.align 2,0x90
+.private_extern __sysenter_trap
+__sysenter_trap:
+	popl %edx
+	movl %esp, %ecx
+	sysenter
diff --git a/i386/sys/_setjmp.s b/i386/sys/_setjmp.s
index 8513cad..0a09c63 100644
--- a/i386/sys/_setjmp.s
+++ b/i386/sys/_setjmp.s
@@ -61,6 +61,7 @@
 #define JB_FS           64
 #define JB_GS           68
 
+#define SAVE_SEG_REGS	1
 
 LEAF(__setjmp, 0)
         movl    4(%esp), %ecx           // jmp_buf (struct sigcontext *)
@@ -122,7 +123,7 @@ LEAF(__longjmp, 0)
 #if SAVE_SEG_REGS
 	// segment registers
 	mov	JB_SS(%ecx), %ss
-	mov	JB_CS(%ecx), %cs
+	// mov	JB_CS(%ecx), %cs		// can't set cs?
 	mov	JB_DS(%ecx), %ds
 	mov	JB_ES(%ecx), %es
 	mov	JB_FS(%ecx), %fs
diff --git a/i386/sys/cerror.s b/i386/sys/cerror.s
index dee3c4a..b071f0a 100644
--- a/i386/sys/cerror.s
+++ b/i386/sys/cerror.s
@@ -33,9 +33,13 @@ LABEL(cerror_cvt)
 	movl	$45, %eax	/* Yes; make ENOTSUP for compatibility */
 LABEL(cerror)
 	REG_TO_EXTERN(%eax, _errno)
-	pushl	%eax
+	mov		%esp,%edx
+	andl	$0xfffffff0,%esp
+	subl	$16,%esp
+	movl	%edx,4(%esp)
+	movl	%eax,(%esp)
 	CALL_EXTERN(_cthread_set_errno_self)
-	addl	$4,%esp
+	movl	4(%esp),%esp
 	movl	$-1,%eax
 	movl	$-1,%edx /* in case a 64-bit value is returned */
 	ret
diff --git a/i386/sys/commpage.c b/i386/sys/commpage.c
new file mode 100644
index 0000000..fa1d6fe
--- /dev/null
+++ b/i386/sys/commpage.c
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
diff --git a/i386/sys/fork.s b/i386/sys/fork.s
index ca4506e..8dc35d2 100644
--- a/i386/sys/fork.s
+++ b/i386/sys/fork.s
@@ -25,7 +25,8 @@
  */
 #include "SYS.h"
 
-LEAF(_fork, 0) 
+LEAF(_fork, 0)
+	subl  $28, %esp   // Align the stack, with 16 bytes of extra padding that we'll need
 	CALL_EXTERN(__cthread_fork_prepare)
 #if defined(__DYNAMIC__)
 // Just like __cthread_fork_prepare we need to prevent threads on the child's
@@ -36,22 +37,20 @@ LEAF(_fork, 0)
 LC1:
 	.ascii "__dyld_fork_prepare\0"
 .text
-	subl	$4,%esp		// allocate space for the address parameter
-	leal	0(%esp),%eax	// get the address of the allocated space
-	pushl	%eax		// push the address of the allocated space
+	// Put a pointer to 8(%esp) in 4(%esp) for _dyld_func_lookup to fill in.
+	leal	0x8(%esp),%eax	// get the address where we're going to store the pointer
+	movl	%eax, 0x4(%esp)	// copy the address of the pointer
 	call	1f
 1:	popl	%eax
 	leal	LC1-1b(%eax),%eax
-	pushl 	%eax		// push the name of the function to look up
+	movl 	%eax, 0x0(%esp)	// copy the name of the function to look up
 	call 	__dyld_func_lookup
-	addl	$8,%esp		// remove parameters to __dyld_func_lookup
-	movl	0(%esp),%eax	// move the value returned in address parameter
-	addl	$4,%esp		// deallocate the space for the address param
+	movl	0x8(%esp),%eax	// move the value returned in address parameter
 	call	*%eax		// call __dyld_fork_prepare indirectly
 #endif
 
 	movl 	$ SYS_fork,%eax; 	// code for fork -> eax
-	UNIX_SYSCALL_TRAP; 		// do the system call
+	UNIX_SYSCALL_TRAP		// do the system call
 	jnc	L1			// jump if CF==0
 
 #if defined(__DYNAMIC__)
@@ -63,24 +62,22 @@ LC1:
 LC2:
 	.ascii "__dyld_fork_parent\0"
 .text
-	pushl	%eax		// save the return value (errno)
-	subl	$4,%esp		// allocate space for the address parameter
-	leal	0(%esp),%eax	// get the address of the allocated space
-	pushl	%eax		// push the address of the allocated space
+	movl	%eax, 0xc(%esp)		// save the return value (errno)
+	leal	0x8(%esp),%eax		// get the address where we're going to store the pointer
+	movl	%eax, 0x4(%esp)		// copy the address of the pointer
 	call	1f
 1:	popl	%eax
 	leal	LC2-1b(%eax),%eax
-	pushl 	%eax		// push the name of the function to look up
+	movl 	%eax, 0x0(%esp)		// copy the name of the function to look up
 	call 	__dyld_func_lookup
-	addl	$8,%esp		// remove parameters to __dyld_func_lookup
-	movl	0(%esp),%eax	// move the value returned in address parameter
-	addl	$4,%esp		// deallocate the space for the address param
+	movl	0x8(%esp),%eax		// move the value returned in address parameter
 	call	*%eax		// call __dyld_fork_parent indirectly
-	popl	%eax		// restore the return value (errno)
+	movl	0xc(%esp), %eax		// restore the return value (errno)
 #endif
 	CALL_EXTERN(cerror)
 	CALL_EXTERN(__cthread_fork_parent)
 	movl	$-1,%eax
+	addl	$28, %esp   // restore the stack
 	ret
 	
 L1:
@@ -91,7 +88,7 @@ L1:
 #if defined(__DYNAMIC__)
 // Here on the child side of the fork we need to tell the dynamic linker that
 // we have forked.  To do this we call __dyld_fork_child in the dyanmic
-// linker.  But since we can't dynamicly bind anything until this is done we
+// linker.  But since we can't dynamically bind anything until this is done we
 // do this by using the private extern __dyld_func_lookup() function to get the
 // address of __dyld_fork_child (the 'C' code equivlent):
 //
@@ -103,17 +100,14 @@ LC0:
 	.ascii "__dyld_fork_child\0"
 
 .text
-	subl	$4,%esp		// allocate space for the address parameter
-	leal	0(%esp),%eax	// get the address of the allocated space
-	pushl	%eax		// push the address of the allocated space
+	leal	0x8(%esp),%eax		// get the address where we're going to store the pointer
+	movl	%eax, 0x4(%esp)		// copy the address of the pointer
 	call	1f
 1:	popl	%eax
 	leal	LC0-1b(%eax),%eax
-	pushl 	%eax		// push the name of the function to look up
+	movl 	%eax, 0x0(%esp)		// copy the name of the function to look up
 	call 	__dyld_func_lookup
-	addl	$8,%esp		// remove parameters to __dyld_func_lookup
-	movl	0(%esp),%eax	// move the value returned in address parameter
-	addl	$4,%esp		// deallocate the space for the address param
+	movl	0x8(%esp),%eax		// move the value returned in address parameter
 	call	*%eax		// call __dyld_fork_child indirectly
 #endif
 	xorl	%eax, %eax
@@ -125,42 +119,38 @@ LC10:
 	.ascii "__dyld_fork_child_final\0"
 
 .text
-	subl	$4,%esp		// allocate space for the address parameter
-	leal	0(%esp),%eax	// get the address of the allocated space
-	pushl	%eax		// push the address of the allocated space
+	leal	0x8(%esp),%eax		// get the address where we're going to store the pointer
+	movl	%eax, 0x4(%esp)		// copy the address of the pointer
 	call	1f
 1:	popl	%eax
 	leal	LC10-1b(%eax),%eax
-	pushl 	%eax		// push the name of the function to look up
+	movl 	%eax, 0x0(%esp)		// copy the name of the function to look up
 	call 	__dyld_func_lookup
-	addl	$8,%esp		// remove parameters to __dyld_func_lookup
-	movl	0(%esp),%eax	// move the value returned in address parameter
-	addl	$4,%esp		// deallocate the space for the address param
+	movl	0x8(%esp),%eax		// move the value returned in address parameter
 	call	*%eax		// call __dyld_fork_child_final indirectly
 #endif
 	xorl	%eax,%eax	// zero eax
+	addl	$28, %esp   // restore the stack
 	ret
 
 	//parent here...
 L2:
-	push	%eax		// save pid
+	movl	%eax, 0xc(%esp)		// save pid
 #if	defined(__DYNAMIC__)
 // __dyld_fork_parent() is called by the parent process after a fork syscall.
 // This releases the dyld lock acquired by __dyld_fork_prepare().
-	subl	$4,%esp		// allocate space for the address parameter
-	leal	0(%esp),%eax	// get the address of the allocated space
-	pushl	%eax		// push the address of the allocated space
+	leal	0x8(%esp),%eax		// get the address where we're going to store the pointer
+	movl	%eax, 0x4(%esp)		// copy the address of the allocated space
 	call	1f
 1:	popl	%eax
 	leal	LC2-1b(%eax),%eax
-	pushl 	%eax		// push the name of the function to look up
+	movl 	%eax, 0x0(%esp)		// copy the name of the function to look up
 	call 	__dyld_func_lookup
-	addl	$8,%esp		// remove parameters to __dyld_func_lookup
-	movl	0(%esp),%eax	// move the value returned in address parameter
-	addl	$4,%esp		// deallocate the space for the address param
+	movl	0x8(%esp),%eax		// move the value returned in address parameter
 	call	*%eax		// call __dyld_fork_parent indirectly
 #endif
 	CALL_EXTERN_AGAIN(__cthread_fork_parent)
-	pop	%eax
+	movl	0xc(%esp), %eax		// return pid
+	addl	$28, %esp   // restore the stack
 	ret		
 
diff --git a/i386/sys/getpid.s b/i386/sys/getpid.s
index 4a4e4d1..d08bd95 100644
--- a/i386/sys/getpid.s
+++ b/i386/sys/getpid.s
@@ -59,6 +59,7 @@ LEAF(_getpid, 0)
 	UNIX_SYSCALL_NONAME(getpid, 0)
 	movl		%eax, %edx
 	xorl		%eax, %eax
+	GET_CURRENT_PID
 	lock
 	cmpxchgl	%edx, __current_pid
 	movl		%edx, %eax
diff --git a/i386/sys/i386_get_ldt.s b/i386/sys/i386_get_ldt.s
new file mode 100644
index 0000000..4a6611f
--- /dev/null
+++ b/i386/sys/i386_get_ldt.s
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <sys/syscall.h>
+#include <architecture/i386/asm_help.h>
+#include <mach/i386/syscall_sw.h>
+
+.text
+.globl cerror
+LEAF(_i386_get_ldt, 0)
+	movl    $6,%eax
+	MACHDEP_SYSCALL_TRAP
+	jnb	2f
+	BRANCH_EXTERN(cerror)
+2:	ret
diff --git a/i386/sys/i386_gettimeofday.s b/i386/sys/i386_gettimeofday.s
new file mode 100644
index 0000000..5de6ca7
--- /dev/null
+++ b/i386/sys/i386_gettimeofday.s
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 1999-2005 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+/* Copyright 1998 Apple Computer, Inc. */
+
+#include "SYS.h"
+
+#define	__APPLE_API_PRIVATE
+#include <machine/cpu_capabilities.h>
+#undef	__APPLE_API_PRIVATE
+
+LABEL(___commpage_gettimeofday)
+	mov		$ _COMM_PAGE_GETTIMEOFDAY,%eax
+	jmp		%eax
+
+/*
+ *	This syscall is special cased: the timeval is returned in eax/edx.
+ */
+LABEL(___gettimeofday)
+    UNIX_SYSCALL_INT_NONAME(gettimeofday,0)
+	mov		4(%esp),%ecx
+	mov		%eax,(%ecx)
+	mov		%edx,4(%ecx)
+	xor		%eax,%eax
+	ret
diff --git a/i386/sys/i386_set_ldt.s b/i386/sys/i386_set_ldt.s
new file mode 100644
index 0000000..3eeb2a5
--- /dev/null
+++ b/i386/sys/i386_set_ldt.s
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <sys/syscall.h>
+#include <architecture/i386/asm_help.h>
+#include <mach/i386/syscall_sw.h>
+
+.text
+.globl cerror
+LEAF(_i386_set_ldt, 0)
+	movl    $5,%eax
+	MACHDEP_SYSCALL_TRAP
+	jnb	2f
+	BRANCH_EXTERN(cerror)
+2:	ret
diff --git a/i386/sys/lseek.s b/i386/sys/lseek.s
index dfa0b22..8dfc808 100644
--- a/i386/sys/lseek.s
+++ b/i386/sys/lseek.s
@@ -25,5 +25,5 @@
  */
 #include "SYS.h"
 
-UNIX_SYSCALL(lseek, 3)
+UNIX_SYSCALL_INT(lseek, 3)
 	ret
diff --git a/i386/sys/pipe.s b/i386/sys/pipe.s
index bcb5883..e681240 100644
--- a/i386/sys/pipe.s
+++ b/i386/sys/pipe.s
@@ -25,7 +25,7 @@
  */
 #include "SYS.h"
 
-UNIX_SYSCALL(pipe, 0)
+UNIX_SYSCALL_INT(pipe, 0)
 	movl	4(%esp),%ecx
 	movl	%eax,(%ecx)
 	movl	%edx,4(%ecx)
diff --git a/i386/sys/setjmp.s b/i386/sys/setjmp.s
index 2727458..ac7b28b 100644
--- a/i386/sys/setjmp.s
+++ b/i386/sys/setjmp.s
@@ -71,85 +71,37 @@ LEAF(_sigsetjmp, 0)
 	movl	%ecx, JB_SAVEMASK(%eax)	// jmpbuf[_JBLEN] = savemask;
 	cmpl	$0, %ecx		// if savemask != 0
 	jne	_setjmp			//     setjmp(jmpbuf); 
-	BRANCH_EXTERN(__setjmp)		// else
-					//     _setjmp(jmpbuf); 
+	jmp L_do__setjmp		// else _setjmp(jmpbuf); 
 	
 LEAF(_setjmp, 0)
+	subl	$4, %esp		// make space for return from sigprocmask
+	pushl	%esp			// oset
+	pushl	$0				// set = NULL
+	pushl	$1				// how = SIG_BLOCK
+	CALL_EXTERN(_sigprocmask)
+	movl	12(%esp),%eax	// save the mask
+	addl	$16, %esp		// restore original esp
 	movl	4(%esp), %ecx		// jmp_buf (struct sigcontext *)
-	pushl	%ecx			// save ecx
-
-	// call sigstack to get the current signal stack
-	subl	$12, %esp		// space for return structure
-	pushl	%esp
-	pushl	$0
-	CALL_EXTERN(_sigaltstack)
-	movl	12(%esp), %eax		// save stack pointer
-	movl	%eax, JB_ONSTACK(%ecx)
-	addl	$20, %esp
-
-	// call sigblock to get signal mask
-	pushl	$0
-	CALL_EXTERN(_sigblock)
-	addl	$4, %esp
-	popl	%ecx			// restore ecx
 	movl	%eax, JB_MASK(%ecx)
-
-	// now build sigcontext
-	movl	%ebx, JB_EBX(%ecx)
-	movl	%edi, JB_EDI(%ecx)
-	movl	%esi, JB_ESI(%ecx)
-	movl	%ebp, JB_EBP(%ecx)
-
-	// EIP is set to the frame return address value
-	movl	(%esp), %eax
-	movl	%eax, JB_EIP(%ecx)
-	// ESP is set to the frame return address plus 4
-	movl	%esp, %eax
-	addl	$4, %eax
-	movl	%eax, JB_ESP(%ecx)
-
-	// segment registers
-	movl	$0,  JB_SS(%ecx)
-	mov	%ss, JB_SS(%ecx)
-	movl	$0,  JB_CS(%ecx)
-	mov	%cs, JB_CS(%ecx)
-	movl	$0,  JB_DS(%ecx)
-	mov	%ds, JB_DS(%ecx)
-	movl	$0,  JB_ES(%ecx)
-	mov	%es, JB_ES(%ecx)
-	movl	$0,  JB_FS(%ecx)
-	mov	%fs, JB_FS(%ecx)
-	movl	$0,  JB_GS(%ecx)
-	mov	%gs, JB_GS(%ecx)
-
-	// save eflags - you can't use movl
-	pushf
-	popl	%eax
-	movl	%eax, JB_EFLAGS(%ecx)
-
-	// return 0
-	xorl	%eax, %eax
-	ret
+L_do__setjmp:
+	BRANCH_EXTERN(__setjmp)
 
 LEAF(_siglongjmp, 0)
 	movl 4(%esp), %eax		// sigjmp_buf * jmpbuf; 
 	cmpl $0, JB_SAVEMASK(%eax)	// if jmpbuf[_JBLEN] != 0
 	jne 	_longjmp		//     longjmp(jmpbuf, var); 
-	BRANCH_EXTERN(__longjmp)	// else
-					//     _longjmp(jmpbuf, var); 
+	jmp L_do__longjmp		// else _longjmp(jmpbuf, var); 
 	
 LEAF(_longjmp, 0)
-	subl	$2,%esp
-	fnstcw	(%esp)			// save FP control word
-	fninit				// reset FP coprocessor
-	fldcw	(%esp)			// restore FP control word
-	addl	$2,%esp
-	movl	4(%esp), %eax		// address of jmp_buf (saved context)
-	movl	8(%esp), %edx		// return value
-	movl	%edx, JB_EAX(%eax)	// return value into saved context
-	movl	$ SYS_sigreturn, %eax	// sigreturn system call
-	UNIX_SYSCALL_TRAP
-	addl	$8, %esp
-	CALL_EXTERN(_longjmperror)
-	CALL_EXTERN(_abort)
+	movl	4(%esp), %ecx		// address of jmp_buf (saved context)
+	movl	JB_MASK(%ecx),%eax	// get the mask
+	pushl	%eax				// store the mask
+	movl	%esp, %edx			// save the address where we stored the mask
+	pushl	$0					// oset = NULL
+	pushl	%edx				// set
+	pushl	$3					// how = SIG_SETMASK
+	CALL_EXTERN_AGAIN(_sigprocmask)
+	addl	$16, %esp			// restore original esp
+L_do__longjmp:
+	BRANCH_EXTERN(__longjmp)	// else
 END(_longjmp)
diff --git a/i386/sys/sigaltstack.s b/i386/sys/sigaltstack.s
index baa6979..724180a 100644
--- a/i386/sys/sigaltstack.s
+++ b/i386/sys/sigaltstack.s
@@ -25,5 +25,5 @@
  */
 #include "SYS.h"
 
-UNIX_SYSCALL(sigaltstack, 3)
+UNIX_SYSCALL_INT(sigaltstack, 3)
 	ret
diff --git a/i386/sys/sigreturn.s b/i386/sys/sigreturn.s
index 9453116..76560ee 100644
--- a/i386/sys/sigreturn.s
+++ b/i386/sys/sigreturn.s
@@ -25,5 +25,5 @@
  */
 #include "SYS.h"
 
-UNIX_SYSCALL(sigreturn, 1)
+UNIX_SYSCALL_INT(sigreturn, 2)
 	ret
diff --git a/include/sys/acl.h b/include/sys/acl.h
index 55e82d0..15ef22b 100644
--- a/include/sys/acl.h
+++ b/include/sys/acl.h
@@ -161,10 +161,12 @@ extern int	acl_set_link(const char *path_p, acl_type_t type, acl_t acl);
 
 /* 23.1.6.4 ACL Format translation */
 extern ssize_t	acl_copy_ext(void *buf_p, acl_t acl, ssize_t size);
+extern ssize_t	acl_copy_ext_native(void *buf_p, acl_t acl, ssize_t size);
 extern acl_t	acl_copy_int(const void *buf_p);
+extern acl_t	acl_copy_int_native(const void *buf_p);
 extern acl_t	acl_from_text(const char *buf_p);
 extern ssize_t	acl_size(acl_t acl);
 extern char	*acl_to_text(acl_t acl, ssize_t *len_p);
 __END_DECLS
 
-#endif _SYS_ACL_H
+#endif /* _SYS_ACL_H */
diff --git a/mach/panic.c b/mach/panic.c
index b6e91a2..363739f 100644
--- a/mach/panic.c
+++ b/mach/panic.c
@@ -77,4 +77,7 @@ panic(const char *s, ...)
 
 #define RB_DEBUGGER	0x1000	/* enter debugger NOW */
 	(void) host_reboot(master_host_port, RB_DEBUGGER);
+
+	/* 4279008 - don't return */
+	abort();
 }
diff --git a/posix1e/acl_translate.c b/posix1e/acl_translate.c
index 25f4c8b..960a6d1 100644
--- a/posix1e/acl_translate.c
+++ b/posix1e/acl_translate.c
@@ -33,8 +33,19 @@
 #include <pwd.h>
 #include <grp.h>
 
+#include <libkern/OSByteOrder.h>
+
 #include "aclvar.h"
 
+/*
+ * NOTE: the copy_int/copy_ext functions are duplicated here, one version of each for
+ * each of native and portable endianity.  A more elegant solution might be called for
+ * if the functions become much more complicated.
+ */
+
+/*
+ * acl_t -> external representation, portable endianity
+ */
 ssize_t
 acl_copy_ext(void *buf, acl_t acl, ssize_t size)
 {
@@ -51,6 +62,42 @@ acl_copy_ext(void *buf, acl_t acl, ssize_t size)
 		return(-1);
 	}
 		
+	/* export the header */
+	ext->fsec_magic = OSSwapHostToBigInt32(KAUTH_FILESEC_MAGIC);
+	ext->fsec_entrycount = OSSwapHostToBigInt32(acl->a_entries);
+	ext->fsec_flags = OSSwapHostToBigInt32(acl->a_flags);
+	
+	/* copy ACEs */
+	for (i = 0; i < acl->a_entries; i++) {
+		/* ACE contents are almost identical */
+		ext->fsec_ace[i].ace_applicable = acl->a_ace[i].ae_applicable;
+		ext->fsec_ace[i].ace_flags =
+		    OSSwapHostToBigInt32((acl->a_ace[i].ae_tag & KAUTH_ACE_KINDMASK) | (acl->a_ace[i].ae_flags & ~KAUTH_ACE_KINDMASK));
+		ext->fsec_ace[i].ace_rights = OSSwapHostToBigInt32(acl->a_ace[i].ae_perms);
+	}		
+
+	return(reqsize);
+}
+
+/*
+ * acl_t -> external representation, native system endianity
+ */
+ssize_t
+acl_copy_ext_native(void *buf, acl_t acl, ssize_t size)
+{
+	struct kauth_filesec *ext = (struct kauth_filesec *)buf;
+	ssize_t		reqsize;
+	int		i;
+
+	/* validate arguments, compute required size */
+	reqsize = acl_size(acl);
+	if (reqsize < 0)
+		return(-1);
+	if (reqsize > size) {
+		errno = ERANGE;
+		return(-1);
+	}
+		
 	/* export the header */
 	ext->fsec_magic = KAUTH_FILESEC_MAGIC;
 	ext->fsec_entrycount = acl->a_entries;
@@ -70,6 +117,11 @@ acl_copy_ext(void *buf, acl_t acl, ssize_t size)
 	return(reqsize);
 }
 
+/*
+ * external representation, portable system endianity -> acl_t
+ *
+ * Unlike acl_copy_ext, we can't mung the buffer as it doesn't belong to us.
+ */
 acl_t
 acl_copy_int(const void *buf)
 {
@@ -77,6 +129,38 @@ acl_copy_int(const void *buf)
 	acl_t		ap;
 	int		i;
 
+	if (ext->fsec_magic != OSSwapHostToBigInt32(KAUTH_FILESEC_MAGIC)) {
+		errno = EINVAL;
+		return(NULL);
+	}
+
+	if ((ap = acl_init(OSSwapBigToHostInt32(ext->fsec_entrycount))) != NULL) {
+		/* copy useful header fields */
+		ap->a_flags = OSSwapBigToHostInt32(ext->fsec_flags);
+		ap->a_entries = OSSwapBigToHostInt32(ext->fsec_entrycount);
+		/* copy ACEs */
+		for (i = 0; i < ap->a_entries; i++) {
+			/* ACE contents are literally identical */
+			ap->a_ace[i].ae_magic = _ACL_ENTRY_MAGIC;
+			ap->a_ace[i].ae_applicable = ext->fsec_ace[i].ace_applicable;
+			ap->a_ace[i].ae_flags = OSSwapBigToHostInt32(ext->fsec_ace[i].ace_flags) & ~KAUTH_ACE_KINDMASK;
+			ap->a_ace[i].ae_tag = OSSwapBigToHostInt32(ext->fsec_ace[i].ace_flags) & KAUTH_ACE_KINDMASK;
+			ap->a_ace[i].ae_perms = OSSwapBigToHostInt32(ext->fsec_ace[i].ace_rights);
+		}
+	}
+	return(ap);
+}
+
+/*
+ * external representation, native system endianity -> acl_t
+ */
+acl_t
+acl_copy_int_native(const void *buf)
+{
+	struct kauth_filesec *ext = (struct kauth_filesec *)buf;
+	acl_t		ap;
+	int		i;
+
 	if (ext->fsec_magic != KAUTH_FILESEC_MAGIC) {
 		errno = EINVAL;
 		return(NULL);
@@ -89,9 +173,6 @@ acl_copy_int(const void *buf)
 		/* copy ACEs */
 		for (i = 0; i < ap->a_entries; i++) {
 			/* ACE contents are literally identical */
-/* XXX Consider writing the magic out to the persistent store  
- * to detect corruption
- */
 			ap->a_ace[i].ae_magic = _ACL_ENTRY_MAGIC;
 			ap->a_ace[i].ae_applicable = ext->fsec_ace[i].ace_applicable;
 			ap->a_ace[i].ae_flags = ext->fsec_ace[i].ace_flags & ~KAUTH_ACE_KINDMASK;
diff --git a/ppc/sys/ppc_gettimeofday.s b/ppc/sys/ppc_gettimeofday.s
index e1dab52..9d84417 100644
--- a/ppc/sys/ppc_gettimeofday.s
+++ b/ppc/sys/ppc_gettimeofday.s
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2003 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1999-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -36,14 +36,11 @@ MI_ENTRY_POINT(___commpage_gettimeofday)
  * Note also that the "seconds" field of the timeval is a long, so
  * it's size is mode dependent.
  */
-MI_ENTRY_POINT(___ppc_gettimeofday)
+MI_ENTRY_POINT(___gettimeofday)
     mr      r12,r3              // save ptr to timeval
     SYSCALL_NONAME(gettimeofday,0)
-	mr.     r12,r12             // was timeval ptr null?
-	beq     3f
 	stg     r3,0(r12)           // "stw" in 32-bit mode, "std" in 64-bit mode
 	stw     r4,GPR_BYTES(r12)
 	li      r3,0
-3:
 	blr
 
diff --git a/pthreads/lock.s b/pthreads/lock.s
index 8658051..6ff476f 100644
--- a/pthreads/lock.s
+++ b/pthreads/lock.s
@@ -95,9 +95,7 @@ END(__spin_unlock)
         TEXT
 	ALIGN
 
-.globl _spin_lock_try
 LEAF(__spin_lock_try, 0)
-_spin_lock_try:
 	movl    $(_COMM_PAGE_SPINLOCK_TRY), %eax
 	jmpl	%eax
 
diff --git a/stdio/FreeBSD/printf.3.patch b/stdio/FreeBSD/printf.3.patch
index a8da314..ca5226e 100644
--- a/stdio/FreeBSD/printf.3.patch
+++ b/stdio/FreeBSD/printf.3.patch
@@ -1,5 +1,5 @@
---- printf.3.orig	Fri Mar 11 17:08:43 2005
-+++ printf.3	Fri Mar 11 17:04:50 2005
+--- printf.3.orig	2004-11-25 11:38:35.000000000 -0800
++++ printf.3	2005-08-09 22:37:08.000000000 -0700
 @@ -101,6 +101,12 @@
  dynamically allocate a new string with
  .Xr malloc 3 .
@@ -19,10 +19,10 @@
  .It
 +An optional separator character (
 +.Cm \ , | \;  | \ : | _
-+) used for separating multiple values when printing an AltiVec vector,
++) used for separating multiple values when printing an AltiVec or SSE vector,
 +or other multi-value unit.
 +.Pp
-+NOTE: This is an AltiVec only extension onto the
++NOTE: This is an extension to the
 +.Fn printf
 +specification.
 +Behaviour of these values for
@@ -34,13 +34,13 @@
  An optional decimal digit string specifying a minimum field width.
  If the converted value has fewer characters than the field width, it will
  be padded with spaces on the left (or right, if the left-adjustment
-@@ -379,6 +399,28 @@
+@@ -379,6 +399,34 @@
  .It Sy Modifier Ta Cm c Ta Cm s
  .It Cm l No (ell) Ta Vt wint_t Ta Vt "wchar_t *"
  .El
 +.Pp
 +The AltiVec Technology Programming Interface Manual also defines five additional length modifiers
-+which can be used (in place of the conventional length modifiers) for the printing of AltiVec vectors:
++which can be used (in place of the conventional length modifiers) for the printing of AltiVec or SSE vectors:
 +.Bl -tag -compact
 +.It Cm v
 +Treat the argument as a vector value, unit length will be determined by the conversion
@@ -52,7 +52,7 @@
 +Treat the argument as a vector of 4 32-bit units.
 +.El
 +.Pp
-+NOTE: The vector length specifiers are AltiVec only extensions onto the
++NOTE: The vector length specifiers are extensions to the
 +.Fn printf
 +specification.
 +Behaviour of these values for
@@ -60,10 +60,16 @@
 +is only defined for operating systems conforming to the 
 +AltiVec Technology Programming Interface Manual.
 +(At time of writing this includes only Mac OS X 10.2 and later.)
++.Pp
++As a further extension, for SSE2 64-bit units:
++.Bl -tag -compact
++.It Cm vll, llv
++Treat the argument as a vector of 2 64-bit units.
++.El
  .It
  A character that specifies the type of conversion to be applied.
  .El
-@@ -792,12 +834,8 @@
+@@ -792,12 +840,8 @@
  .Xr fmtcheck 3 ,
  .Xr scanf 3 ,
  .Xr setlocale 3 ,
diff --git a/stdio/FreeBSD/vfprintf.c.patch b/stdio/FreeBSD/vfprintf.c.patch
index 82cc3c6..88df771 100644
--- a/stdio/FreeBSD/vfprintf.c.patch
+++ b/stdio/FreeBSD/vfprintf.c.patch
@@ -1,5 +1,5 @@
 --- vfprintf.c.orig	2004-11-25 11:38:35.000000000 -0800
-+++ vfprintf.c	2005-02-24 15:16:20.000000000 -0800
++++ vfprintf.c	2005-11-08 22:43:11.000000000 -0800
 @@ -40,6 +40,8 @@
  #include <sys/cdefs.h>
  __FBSDID("$FreeBSD: src/lib/libc/stdio/vfprintf.c,v 1.68 2004/08/26 06:25:28 des Exp $");
@@ -17,24 +17,25 @@
  
  #include <stdarg.h>
  #include "un-namespace.h"
-@@ -66,6 +69,12 @@
+@@ -66,6 +69,13 @@
  #include "local.h"
  #include "fvwrite.h"
  
-+#ifdef ALTIVEC
-+#include <machine/cpu_capabilities.h>
-+
-+#define VECTORTYPE    vector unsigned char
-+#endif /* ALTIVEC */
++#ifdef VECTORS
++typedef __attribute__ ((vector_size(16))) unsigned char VECTORTYPE;
++#ifdef __SSE2__
++#define V64TYPE
++#endif /* __SSE2__ */
++#endif /* VECTORS */
 +
  union arg {
  	int	intarg;
  	u_int	uintarg;
-@@ -93,6 +102,16 @@
+@@ -93,6 +103,21 @@
  #endif
  	wint_t	wintarg;
  	wchar_t	*pwchararg;
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +	VECTORTYPE		vectorarg;
 +	unsigned char		vuchararg[16];
 +	signed char		vchararg[16];
@@ -43,19 +44,24 @@
 +	unsigned int		vuintarg[4];
 +	signed int		vintarg[4];
 +	float			vfloatarg[4];
-+#endif /* ALTIVEC */
++#ifdef V64TYPE
++	double			vdoublearg[2];
++	unsigned long long	vulonglongarg[2];
++	long long		vlonglongarg[2];
++#endif /* V64TYPE */
++#endif /* VECTORS */
  };
  
  /*
-@@ -103,19 +122,56 @@
+@@ -103,16 +128,20 @@
  	T_LONG, T_U_LONG, TP_LONG, T_LLONG, T_U_LLONG, TP_LLONG,
  	T_PTRDIFFT, TP_PTRDIFFT, T_SIZET, TP_SIZET,
  	T_INTMAXT, T_UINTMAXT, TP_INTMAXT, TP_VOID, TP_CHAR, TP_SCHAR,
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +	T_DOUBLE, T_LONG_DOUBLE, T_WINT, TP_WCHAR, T_VECTOR
-+#else /* ! ALTIVEC */
++#else /* ! VECTORS */
  	T_DOUBLE, T_LONG_DOUBLE, T_WINT, TP_WCHAR
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  };
  
  static int	__sprint(FILE *, struct __suio *);
@@ -70,43 +76,7 @@
  static void	__find_arguments(const char *, va_list, union arg **);
  static void	__grow_type_table(int, enum typeid **, int *);
  
-+	/*
-+	 * Get the argument indexed by nextarg.   If the argument table is
-+	 * built, use it to get the argument.  If its not, get the next
-+	 * argument (and arguments must be gotten sequentially).
-+	 */
-+#define GETARG(type) \
-+	((argtable != NULL) ? *((type*)(&argtable[nextarg++])) : \
-+	    (nextarg++, va_arg(ap, type)))
-+
-+#ifdef ALTIVEC
-+#define hasAltivec	(_cpu_capabilities & kHasAltivec)
-+/*-----------------------------------------------------------------------
-+ * getvec() must be a real subroutine.  If it is a #define, then __vfprintf()
-+ * would have its calling sequence changed by Altivec so that a non-Altivec
-+ * processor would crash on illegal instruction.  By isolating the calling
-+ * sequence in getvec(), __vprintf() is callable by a non-Altivec processor.
-+ *-----------------------------------------------------------------------*/
-+static va_list getvec(union arg *, const union arg *, int, va_list) __attribute__((noinline));
-+
-+static va_list
-+getvec(union arg *dst, const union arg *argtable, int nextarg, va_list ap)
-+{
-+	dst->vectorarg = GETARG(VECTORTYPE);
-+	return ap;
-+}
-+
-+#define SETVEC(dst)	\
-+{ \
-+	ap = getvec(&dst, argtable, nextarg, ap); \
-+	nextarg++; \
-+}
-+#endif /* ALTIVEC */
-+
- /*
-  * Flush out all the vectors defined by the given uio,
-  * then reset it so that it can be reused.
-@@ -141,7 +197,7 @@
+@@ -141,7 +170,7 @@
   * worries about ungetc buffers and so forth.
   */
  static int
@@ -115,7 +85,7 @@
  {
  	int ret;
  	FILE fake;
-@@ -160,7 +216,7 @@
+@@ -160,7 +189,7 @@
  	fake._lbfsize = 0;	/* not actually used, but Just In Case */
  
  	/* do the work, then copy any error status */
@@ -124,7 +94,7 @@
  	if (ret >= 0 && __fflush(&fake))
  		ret = EOF;
  	if (fake._flags & __SERR)
-@@ -336,7 +392,7 @@
+@@ -336,7 +365,7 @@
   * that the wide char. string ends in a null character.
   */
  static char *
@@ -133,7 +103,7 @@
  {
  	static const mbstate_t initial;
  	mbstate_t mbs;
-@@ -354,7 +410,7 @@
+@@ -354,7 +383,7 @@
  		p = wcsarg;
  		mbs = initial;
  		for (;;) {
@@ -142,7 +112,7 @@
  			if (clen == 0 || clen == (size_t)-1 ||
  			    nbytes + clen > prec)
  				break;
-@@ -363,7 +419,7 @@
+@@ -363,7 +392,7 @@
  	} else {
  		p = wcsarg;
  		mbs = initial;
@@ -151,7 +121,7 @@
  		if (nbytes == (size_t)-1)
  			return (NULL);
  	}
-@@ -378,7 +434,7 @@
+@@ -378,7 +407,7 @@
  	p = wcsarg;
  	mbs = initial;
  	while (mbp - convbuf < nbytes) {
@@ -160,7 +130,7 @@
  		if (clen == 0 || clen == (size_t)-1)
  			break;
  		mbp += clen;
-@@ -402,7 +458,21 @@
+@@ -402,7 +431,21 @@
  	int ret;
  
  	FLOCKFILE(fp);
@@ -183,13 +153,13 @@
  	FUNLOCKFILE(fp);
  	return (ret);
  }
-@@ -451,12 +521,15 @@
+@@ -451,12 +494,15 @@
  #define	PTRDIFFT	0x800		/* ptrdiff_t */
  #define	INTMAXT		0x1000		/* intmax_t */
  #define	CHARINT		0x2000		/* print char using int format */
-+#ifdef ALTIVEC
-+#define	VECTOR		0x4000		/* Altivec vector */
-+#endif /* ALTIVEC */
++#ifdef VECTORS
++#define	VECTOR		0x4000		/* Altivec or SSE vector */
++#endif /* VECTORS */
  
  /*
   * Non-MT-safe version
@@ -201,11 +171,11 @@
  {
  	char *fmt;		/* format string */
  	int ch;			/* character from fmt */
-@@ -502,6 +575,11 @@
+@@ -502,6 +548,11 @@
  	int nseps;		/* number of group separators with ' */
  	int nrepeats;		/* number of repeats of the last group */
  #endif
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +	union arg vval;		/* Vector argument. */
 +	char *pct;		/* Pointer to '%' at beginning of specifier. */
 +	char vsep;		/* Vector separator character. */
@@ -213,23 +183,7 @@
  	u_long	ulval;		/* integer arguments %[diouxX] */
  	uintmax_t ujval;	/* %j, %ll, %q, %t, %z integers */
  	int base;		/* base for [diouxX] conversion */
-@@ -574,15 +652,6 @@
- }
- 
- 	/*
--	 * Get the argument indexed by nextarg.   If the argument table is
--	 * built, use it to get the argument.  If its not, get the next
--	 * argument (and arguments must be gotten sequentially).
--	 */
--#define GETARG(type) \
--	((argtable != NULL) ? *((type*)(&argtable[nextarg++])) : \
--	    (nextarg++, va_arg(ap, type)))
--
--	/*
- 	 * To extend shorts properly, we need both signed and unsigned
- 	 * argument extraction methods.
- 	 */
-@@ -633,22 +702,23 @@
+@@ -633,22 +684,23 @@
  		val = GETARG (int); \
  	}
  
@@ -257,39 +211,39 @@
  
  	fmt = (char *)fmt0;
  	argtable = NULL;
-@@ -675,6 +745,9 @@
+@@ -675,6 +727,9 @@
  		}
  		if (ch == '\0')
  			goto done;
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +		pct = fmt;
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  		fmt++;		/* skip over '%' */
  
  		flags = 0;
-@@ -683,6 +756,9 @@
+@@ -683,6 +738,9 @@
  		prec = -1;
  		sign = '\0';
  		ox[1] = '\0';
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +		vsep = 'X'; /* Illegal value, changed to defaults later. */
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  
  rflag:		ch = *fmt++;
  reswitch:	switch (ch) {
-@@ -698,6 +774,11 @@
+@@ -698,6 +756,11 @@
  		case '#':
  			flags |= ALT;
  			goto rflag;
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +		case ',': case ';': case ':': case '_':
 +			vsep = ch;
 +			goto rflag;
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  		case '*':
  			/*-
  			 * ``A negative field width argument is taken as a
-@@ -718,8 +799,8 @@
+@@ -718,8 +781,8 @@
  			goto rflag;
  		case '\'':
  			flags |= GROUPING;
@@ -300,16 +254,14 @@
  			goto rflag;
  		case '.':
  			if ((ch = *fmt++) == '*') {
-@@ -793,14 +874,20 @@
+@@ -793,14 +856,18 @@
  			flags |= LONGINT;
  			/*FALLTHROUGH*/
  		case 'c':
-+#ifdef ALTIVEC
-+			if (flags & VECTOR) {
-+				SETVEC(vval);
++#ifdef VECTORS
++			if (flags & VECTOR)
 +				break;
-+			}
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			if (flags & LONGINT) {
  				static const mbstate_t initial;
  				mbstate_t mbs;
@@ -323,34 +275,31 @@
  				if (mbseqlen == (size_t)-1) {
  					fp->_flags |= __SERR;
  					goto error;
-@@ -817,6 +904,12 @@
+@@ -817,6 +884,10 @@
  			/*FALLTHROUGH*/
  		case 'd':
  		case 'i':
-+#ifdef ALTIVEC
-+			if (flags & VECTOR) {
-+				SETVEC(vval);
++#ifdef VECTORS
++			if (flags & VECTOR)
 +				break;
-+			}
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			if (flags & INTMAX_SIZE) {
  				ujval = SJARG();
  				if ((intmax_t)ujval < 0) {
-@@ -835,6 +928,13 @@
+@@ -835,6 +906,12 @@
  #ifndef NO_FLOATING_POINT
  		case 'a':
  		case 'A':
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +			if (flags & VECTOR) {
 +				flags |= FPT;
-+				SETVEC(vval);
 +				break;
 +			}
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			if (ch == 'a') {
  				ox[1] = 'x';
  				xdigs = xdigs_lower;
-@@ -848,6 +948,12 @@
+@@ -848,6 +925,12 @@
  				prec++;
  			if (dtoaresult != NULL)
  				freedtoa(dtoaresult);
@@ -363,7 +312,7 @@
  			if (flags & LONGDBL) {
  				fparg.ldbl = GETARG(long double);
  				dtoaresult = cp =
-@@ -859,6 +965,7 @@
+@@ -859,6 +942,7 @@
  				    __hdtoa(fparg.dbl, xdigs, prec,
  				    &expt, &signflag, &dtoaend);
  			}
@@ -371,46 +320,43 @@
  			if (prec < 0)
  				prec = dtoaend - cp;
  			if (expt == INT_MAX)
-@@ -866,6 +973,13 @@
+@@ -866,6 +950,12 @@
  			goto fp_common;
  		case 'e':
  		case 'E':
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +			if (flags & VECTOR) {
 +				flags |= FPT;
-+				SETVEC(vval);
 +				break;
 +			}
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			expchar = ch;
  			if (prec < 0)	/* account for digit before decpt */
  				prec = DEFPREC + 1;
-@@ -874,10 +988,24 @@
+@@ -874,10 +964,22 @@
  			goto fp_begin;
  		case 'f':
  		case 'F':
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +			if (flags & VECTOR) {
 +				flags |= FPT;
-+				SETVEC(vval);
 +				break;
 +			}
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			expchar = '\0';
  			goto fp_begin;
  		case 'g':
  		case 'G':
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +			if (flags & VECTOR) {
 +				flags |= FPT;
-+				SETVEC(vval);
 +				break;
 +			}
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			expchar = ch - ('g' - 'e');
  			if (prec == 0)
  				prec = 1;
-@@ -886,6 +1014,14 @@
+@@ -886,6 +988,14 @@
  				prec = DEFPREC;
  			if (dtoaresult != NULL)
  				freedtoa(dtoaresult);
@@ -425,7 +371,7 @@
  			if (flags & LONGDBL) {
  				fparg.ldbl = GETARG(long double);
  				dtoaresult = cp =
-@@ -899,6 +1035,7 @@
+@@ -899,6 +1009,7 @@
  				if (expt == 9999)
  					expt = INT_MAX;
  			}
@@ -433,33 +379,29 @@
  fp_common:
  			if (signflag)
  				sign = '-';
-@@ -993,6 +1130,12 @@
+@@ -993,6 +1104,10 @@
  			flags |= LONGINT;
  			/*FALLTHROUGH*/
  		case 'o':
-+#ifdef ALTIVEC
-+			if (flags & VECTOR) {
-+				SETVEC(vval);
++#ifdef VECTORS
++			if (flags & VECTOR)
 +				break;
-+			}
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			if (flags & INTMAX_SIZE)
  				ujval = UJARG();
  			else
-@@ -1007,6 +1150,12 @@
+@@ -1007,6 +1122,10 @@
  			 * defined manner.''
  			 *	-- ANSI X3J11
  			 */
-+#ifdef ALTIVEC
-+			if (flags & VECTOR) {
-+				SETVEC(vval);
++#ifdef VECTORS
++			if (flags & VECTOR)
 +				break;
-+			}
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			ujval = (uintmax_t)(uintptr_t)GETARG(void *);
  			base = 16;
  			xdigs = xdigs_lower;
-@@ -1025,7 +1174,7 @@
+@@ -1025,7 +1144,7 @@
  				if ((wcp = GETARG(wchar_t *)) == NULL)
  					cp = "(null)";
  				else {
@@ -468,52 +410,45 @@
  					if (convbuf == NULL) {
  						fp->_flags |= __SERR;
  						goto error;
-@@ -1056,6 +1205,12 @@
+@@ -1056,6 +1175,10 @@
  			flags |= LONGINT;
  			/*FALLTHROUGH*/
  		case 'u':
-+#ifdef ALTIVEC
-+			if (flags & VECTOR) {
-+				SETVEC(vval);
++#ifdef VECTORS
++			if (flags & VECTOR)
 +				break;
-+			}
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			if (flags & INTMAX_SIZE)
  				ujval = UJARG();
  			else
-@@ -1068,6 +1223,12 @@
+@@ -1068,6 +1191,10 @@
  		case 'x':
  			xdigs = xdigs_lower;
  hex:
-+#ifdef ALTIVEC
-+			if (flags & VECTOR) {
-+				SETVEC(vval);
++#ifdef VECTORS
++			if (flags & VECTOR)
 +				break;
-+			}
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			if (flags & INTMAX_SIZE)
  				ujval = UJARG();
  			else
-@@ -1112,6 +1273,14 @@
+@@ -1112,6 +1239,11 @@
  			if (size > BUF)	/* should never happen */
  				abort();
  			break;
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +		case 'v':
-+			if (hasAltivec) {
-+				flags |= VECTOR;
-+				goto rflag;
-+			}
-+			/* drap through */
-+#endif /* ALTIVEC */
++			flags |= VECTOR;
++			goto rflag;
++#endif /* VECTORS */
  		default:	/* "%?" prints ?, unless ? is NUL */
  			if (ch == '\0')
  				goto done;
-@@ -1123,6 +1292,184 @@
+@@ -1123,6 +1255,290 @@
  			break;
  		}
  
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +		if (flags & VECTOR) {
 +			/*
 +			 * Do the minimum amount of work necessary to construct
@@ -523,24 +458,32 @@
 +			int i, j;	/* Counter. */
 +			int vcnt;	/* Number of elements in vector. */
 +			char *vfmt;	/* Pointer to format specifier. */
-+			char vfmt_buf[32]; /* Static buffer for format spec. */
++#define EXTRAHH 2
++			char vfmt_buf[32 + EXTRAHH]; /* Static buffer for format spec. */
 +			int vwidth = 0;	/* Width specified via '*'. */
 +			int vprec = 0;	/* Precision specified via '*'. */
-+			union {		/* Element. */
-+				int i;
-+				float f;
-+			} velm;
 +			char *vstr;	/* Used for asprintf(). */
 +			int vlen;	/* Length returned by asprintf(). */
++			enum {
++			    V_CHAR, V_SHORT, V_INT,
++			    V_PCHAR, V_PSHORT, V_PINT,
++			    V_FLOAT,
++#ifdef V64TYPE
++			    V_LONGLONG, V_PLONGLONG,
++			    V_DOUBLE,
++#endif /* V64TYPE */
++			} vtype;
 +
++			vval.vectorarg = GETARG(VECTORTYPE);
 +			/*
 +			 * Set vfmt.  If vfmt_buf may not be big enough,
 +			 * malloc() space, taking care to free it later.
++			 * (EXTRAHH is for possible extra "hh")
 +			 */
-+			if (&fmt[-1] - pct < sizeof(vfmt_buf))
++			if (&fmt[-1] - pct + EXTRAHH < sizeof(vfmt_buf))
 +				vfmt = vfmt_buf;
 +			else
-+				vfmt = (char *)malloc(&fmt[-1] - pct + 1);
++				vfmt = (char *)malloc(&fmt[-1] - pct + EXTRAHH + 1);
 +
 +			/* Set the separator character, if not specified. */
 +			if (vsep == 'X') {
@@ -573,13 +516,57 @@
 +			 * finish up the format specifier.
 +			 */
 +			if (flags & SHORTINT) {
-+				if (ch != 'c')
++				switch (ch) {
++				case 'c':
++					vtype = V_SHORT;
++					break;
++				case 'p':
++					vtype = V_PSHORT;
++					break;
++				default:
 +					vfmt[j++] = 'h';
++					vtype = V_SHORT;
++					break;
++				}
 +				vcnt = 8;
 +			} else if (flags & LONGINT) {
-+				if (ch != 'c')
-+					vfmt[j++] = 'l';
 +				vcnt = 4;
++				vtype = (ch == 'p') ? V_PINT : V_INT;
++#ifdef V64TYPE
++			} else if (flags & LLONGINT) {
++				switch (ch) {
++				case 'a':
++				case 'A':
++				case 'e':
++				case 'E':
++				case 'f':
++				case 'g':
++				case 'G':
++					vcnt = 2;
++					vtype = V_DOUBLE;
++					break;
++				case 'd':
++				case 'i':
++				case 'u':
++				case 'o':
++				case 'p':
++				case 'x':
++				case 'X':
++					vfmt[j++] = 'l';
++					vfmt[j++] = 'l';
++					vcnt = 2;
++					vtype = (ch == 'p') ? V_PLONGLONG : V_LONGLONG;
++					break;
++				default:
++					/*
++					 * The default case should never
++					 * happen.
++					 */
++				case 'c':
++					vcnt = 16;
++					vtype = V_CHAR;
++				}
++#endif /* V64TYPE */
 +			} else {
 +				switch (ch) {
 +				case 'a':
@@ -590,96 +577,150 @@
 +				case 'g':
 +				case 'G':
 +					vcnt = 4;
++					vtype = V_FLOAT;
 +					break;
 +				default:
 +					/*
 +					 * The default case should never
 +					 * happen.
 +					 */
-+				case 'c':
 +				case 'd':
 +				case 'i':
 +				case 'u':
 +				case 'o':
-+				case 'p':
 +				case 'x':
 +				case 'X':
++					vfmt[j++] = 'h';
++					vfmt[j++] = 'h';
++					/* drop through */
++				case 'p':
++				case 'c':
 +					vcnt = 16;
++					vtype = (ch == 'p') ? V_PCHAR : V_CHAR;
 +				}
 +			}
 +			vfmt[j++] = ch;
 +			vfmt[j++] = '\0';
 +
 +/* Get a vector element. */
-+#define VPRINT(cnt, ind, args...) do {					\
-+	if (flags & FPT) {						\
-+		velm.f = vval.vfloatarg[ind];				\
-+		vlen = asprintf_l(&vstr, loc, vfmt , ## args, velm.f);	\
-+	} else {							\
-+		switch (cnt) {						\
-+		default:						\
-+		/* The default case should never happen. */		\
-+		case 4:							\
-+			velm.i = (unsigned)vval.vintarg[ind];		\
-+			break;						\
-+		case 8:							\
-+			velm.i = (unsigned short)vval.vshortarg[ind];	\
-+			break;						\
-+		case 16:						\
-+			velm.i = (unsigned char)vval.vchararg[ind];	\
-+			break;						\
-+		}							\
-+		vlen = asprintf_l(&vstr, loc, vfmt , ## args, velm.i);	\
++#ifdef V64TYPE
++#define VPRINT(type, ind, args...) do {					\
++	switch (type) {							\
++	case V_CHAR:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vuchararg[ind]); \
++		break;							\
++	case V_PCHAR:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vuchararg[ind]); \
++		break;							\
++	case V_SHORT:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vushortarg[ind]); \
++		break;							\
++	case V_PSHORT:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vushortarg[ind]); \
++		break;							\
++	case V_INT:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vuintarg[ind]); \
++		break;							\
++	case V_PINT:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vuintarg[ind]); \
++		break;							\
++	case V_LONGLONG:						\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vulonglongarg[ind]); \
++		break;							\
++	case V_PLONGLONG:						\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vulonglongarg[ind]); \
++		break;							\
++	case V_FLOAT:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vfloatarg[ind]); \
++		break;							\
++	case V_DOUBLE:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vdoublearg[ind]); \
++		break;							\
++	}								\
++	ret += vlen;							\
++	PRINT(vstr, vlen);						\
++	FLUSH();							\
++	free(vstr);							\
++} while (0)
++#else /* !V64TYPE */
++#define VPRINT(type, ind, args...) do {					\
++	switch (type) {							\
++	case V_CHAR:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vuchararg[ind]); \
++		break;							\
++	case V_PCHAR:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vuchararg[ind]); \
++		break;							\
++	case V_SHORT:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vushortarg[ind]); \
++		break;							\
++	case V_PSHORT:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vushortarg[ind]); \
++		break;							\
++	case V_INT:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vuintarg[ind]); \
++		break;							\
++	case V_PINT:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vuintarg[ind]); \
++		break;							\
++	case V_FLOAT:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vfloatarg[ind]); \
++		break;							\
 +	}								\
 +	ret += vlen;							\
 +	PRINT(vstr, vlen);						\
 +	FLUSH();							\
 +	free(vstr);							\
 +} while (0)
++#endif /* V64TYPE */
 +
 +			/* Actually print. */
 +			if (vwidth == 0) {
 +				if (vprec == 0) {
 +					/* First element. */
-+					VPRINT(vcnt, 0);
++					VPRINT(vtype, 0);
 +					for (i = 1; i < vcnt; i++) {
 +						/* Separator. */
-+						PRINT(&vsep, 1);
++						if(vsep)
++							PRINT(&vsep, 1);
 +
 +						/* Element. */
-+						VPRINT(vcnt, i);
++						VPRINT(vtype, i);
 +					}
 +				} else {
 +					/* First element. */
-+					VPRINT(vcnt, 0, prec);
++					VPRINT(vtype, 0, prec);
 +					for (i = 1; i < vcnt; i++) {
 +						/* Separator. */
-+						PRINT(&vsep, 1);
++						if(vsep)
++							PRINT(&vsep, 1);
 +
 +						/* Element. */
-+						VPRINT(vcnt, i, prec);
++						VPRINT(vtype, i, prec);
 +					}
 +				}
 +			} else {
 +				if (vprec == 0) {
 +					/* First element. */
-+					VPRINT(vcnt, 0, width);
++					VPRINT(vtype, 0, width);
 +					for (i = 1; i < vcnt; i++) {
 +						/* Separator. */
-+						PRINT(&vsep, 1);
++						if(vsep)
++							PRINT(&vsep, 1);
 +
 +						/* Element. */
-+						VPRINT(vcnt, i, width);
++						VPRINT(vtype, i, width);
 +					}
 +				} else {
 +					/* First element. */
-+					VPRINT(vcnt, 0, width, prec);
++					VPRINT(vtype, 0, width, prec);
 +					for (i = 1; i < vcnt; i++) {
 +						/* Separator. */
-+						PRINT(&vsep, 1);
++						if(vsep)
++							PRINT(&vsep, 1);
 +
 +						/* Element. */
-+						VPRINT(vcnt, i, width, prec);
++						VPRINT(vtype, i, width, prec);
 +					}
 +				}
 +			}
@@ -690,27 +731,27 @@
 +
 +			continue;
 +		}
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  		/*
  		 * All reasonable formats wind up here.  At this point, `cp'
  		 * points to a string which (if not flags&LADJUST) should be
-@@ -1406,6 +1753,11 @@
+@@ -1406,6 +1822,11 @@
  			if (flags & LONGINT)
  				ADDTYPE(T_WINT);
  			else
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +			if (flags & VECTOR)
 +				ADDTYPE(T_VECTOR);
 +			else
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  				ADDTYPE(T_INT);
  			break;
  		case 'D':
-@@ -1413,6 +1765,11 @@
+@@ -1413,6 +1834,11 @@
  			/*FALLTHROUGH*/
  		case 'd':
  		case 'i':
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +			if (flags & VECTOR)
 +				ADDTYPE(T_VECTOR);
 +			else
@@ -718,51 +759,51 @@
  			ADDSARG();
  			break;
  #ifndef NO_FLOATING_POINT
-@@ -1423,6 +1780,11 @@
+@@ -1423,6 +1849,11 @@
  		case 'f':
  		case 'g':
  		case 'G':
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +			if (flags & VECTOR)
 +				ADDTYPE(T_VECTOR);
 +			else
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			if (flags & LONGDBL)
  				ADDTYPE(T_LONG_DOUBLE);
  			else
-@@ -1451,9 +1813,19 @@
+@@ -1451,9 +1882,19 @@
  			flags |= LONGINT;
  			/*FALLTHROUGH*/
  		case 'o':
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +			if (flags & VECTOR)
 +				ADDTYPE(T_VECTOR);
 +			else
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			ADDUARG();
  			break;
  		case 'p':
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +			if (flags & VECTOR)
 +				ADDTYPE(T_VECTOR);
 +			else
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			ADDTYPE(TP_VOID);
  			break;
  		case 'S':
-@@ -1471,6 +1843,11 @@
+@@ -1471,6 +1912,11 @@
  		case 'u':
  		case 'X':
  		case 'x':
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +			if (flags & VECTOR)
 +				ADDTYPE(T_VECTOR);
 +			else
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			ADDUARG();
  			break;
  		default:	/* "%?" prints ?, unless ? is NUL */
-@@ -1537,7 +1914,7 @@
+@@ -1537,7 +1983,7 @@
  			(*argtable) [n].sizearg = va_arg (ap, size_t);
  			break;
  		    case TP_SIZET:
@@ -771,16 +812,15 @@
  			break;
  		    case T_INTMAXT:
  			(*argtable) [n].intmaxarg = va_arg (ap, intmax_t);
-@@ -1556,6 +1933,12 @@
+@@ -1556,6 +2002,11 @@
  			(*argtable) [n].longdoublearg = va_arg (ap, long double);
  			break;
  #endif
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +		    case T_VECTOR:
-+			if (hasAltivec)
-+				ap = getvec( &((*argtable) [n]), NULL, 0, ap );
++			(*argtable) [n].vectorarg = va_arg (ap, VECTORTYPE);
 +			break;
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  		    case TP_CHAR:
  			(*argtable) [n].pchararg = va_arg (ap, char *);
  			break;
diff --git a/stdio/FreeBSD/vfwprintf.c.patch b/stdio/FreeBSD/vfwprintf.c.patch
index 8327adc..a645ab8 100644
--- a/stdio/FreeBSD/vfwprintf.c.patch
+++ b/stdio/FreeBSD/vfwprintf.c.patch
@@ -1,5 +1,5 @@
 --- vfwprintf.c.orig	2004-11-25 11:38:36.000000000 -0800
-+++ vfwprintf.c	2005-02-24 15:17:14.000000000 -0800
++++ vfwprintf.c	2005-11-08 22:46:07.000000000 -0800
 @@ -42,6 +42,8 @@
  #include <sys/cdefs.h>
  __FBSDID("$FreeBSD: src/lib/libc/stdio/vfwprintf.c,v 1.23 2004/08/26 06:25:28 des Exp $");
@@ -9,7 +9,7 @@
  /*
   * Actual wprintf innards.
   *
-@@ -63,12 +65,19 @@
+@@ -63,12 +65,20 @@
  #include <string.h>
  #include <wchar.h>
  #include <wctype.h>
@@ -20,20 +20,21 @@
  #include "local.h"
  #include "fvwrite.h"
  
-+#ifdef ALTIVEC
-+#include <machine/cpu_capabilities.h>
-+
-+#define VECTORTYPE    vector unsigned char
-+#endif /* ALTIVEC */
++#ifdef VECTORS
++typedef __attribute__ ((vector_size(16))) unsigned char VECTORTYPE;
++#ifdef __SSE2__
++#define V64TYPE
++#endif /* __SSE2__ */
++#endif /* VECTORS */
 +
  union arg {
  	int	intarg;
  	u_int	uintarg;
-@@ -96,6 +105,16 @@
+@@ -96,6 +106,21 @@
  #endif
  	wint_t	wintarg;
  	wchar_t	*pwchararg;
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +	VECTORTYPE		vectorarg;
 +	unsigned char		vuchararg[16];
 +	signed char		vchararg[16];
@@ -42,19 +43,24 @@
 +	unsigned int		vuintarg[4];
 +	signed int		vintarg[4];
 +	float			vfloatarg[4];
-+#endif /* ALTIVEC */
++#ifdef V64TYPE
++	double			vdoublearg[2];
++	unsigned long long	vulonglongarg[2];
++	long long		vlonglongarg[2];
++#endif /* V64TYPE */
++#endif /* VECTORS */
  };
  
  /*
-@@ -106,26 +125,63 @@
+@@ -106,16 +131,20 @@
  	T_LONG, T_U_LONG, TP_LONG, T_LLONG, T_U_LLONG, TP_LLONG,
  	T_PTRDIFFT, TP_PTRDIFFT, T_SIZET, TP_SIZET,
  	T_INTMAXT, T_UINTMAXT, TP_INTMAXT, TP_VOID, TP_CHAR, TP_SCHAR,
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +	T_DOUBLE, T_LONG_DOUBLE, T_WINT, TP_WCHAR, T_VECTOR
-+#else /* ! ALTIVEC */
++#else /* ! VECTORS */
  	T_DOUBLE, T_LONG_DOUBLE, T_WINT, TP_WCHAR
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  };
  
 -static int	__sbprintf(FILE *, const wchar_t *, va_list);
@@ -70,42 +76,7 @@
  static void	__find_arguments(const wchar_t *, va_list, union arg **);
  static void	__grow_type_table(int, enum typeid **, int *);
  
-+	/*
-+	 * Get the argument indexed by nextarg.   If the argument table is
-+	 * built, use it to get the argument.  If its not, get the next
-+	 * argument (and arguments must be gotten sequentially).
-+	 */
-+#define GETARG(type) \
-+	((argtable != NULL) ? *((type*)(&argtable[nextarg++])) : \
-+	    (nextarg++, va_arg(ap, type)))
-+
-+#ifdef ALTIVEC
-+#define hasAltivec	(_cpu_capabilities & kHasAltivec)
-+/*-----------------------------------------------------------------------
-+ * getvec() must be a real subroutine.  If it is a #define, then __vfprintf()
-+ * would have its calling sequence changed by Altivec so that a non-Altivec
-+ * processor would crash on illegal instruction.  By isolating the calling
-+ * sequence in getvec(), __vprintf() is callable by a non-Altivec processor.
-+ *-----------------------------------------------------------------------*/
-+static va_list getvec(union arg *, const union arg *, int, va_list) __attribute__((noinline));
-+
-+static va_list
-+getvec(union arg *dst, const union arg *argtable, int nextarg, va_list ap)
-+{
-+	dst->vectorarg = GETARG(VECTORTYPE);
-+	return ap;
-+}
-+
-+#define SETVEC(dst)	\
-+{ \
-+	ap = getvec(&dst, argtable, nextarg, ap); \
-+	nextarg++; \
-+}
-+#endif /* ALTIVEC */
-+
- /*
-  * Helper function for `fprintf to unbuffered unix file': creates a
-  * temporary buffer.  We only work on write-only files; this avoids
+@@ -125,7 +154,7 @@
   * worries about ungetc buffers and so forth.
   */
  static int
@@ -114,7 +85,7 @@
  {
  	int ret;
  	FILE fake;
-@@ -144,7 +200,7 @@
+@@ -144,7 +173,7 @@
  	fake._lbfsize = 0;	/* not actually used, but Just In Case */
  
  	/* do the work, then copy any error status */
@@ -123,7 +94,7 @@
  	if (ret >= 0 && __fflush(&fake))
  		ret = WEOF;
  	if (fake._flags & __SERR)
-@@ -157,7 +213,7 @@
+@@ -157,7 +186,7 @@
   * File must already be locked.
   */
  static wint_t
@@ -132,7 +103,7 @@
  {
  	static const mbstate_t initial;
  	mbstate_t mbs;
-@@ -167,10 +223,10 @@
+@@ -167,10 +196,10 @@
  	size_t len;
  
  	if ((fp->_flags & __SSTR) == 0)
@@ -145,7 +116,7 @@
  		fp->_flags |= __SERR;
  		return (WEOF);
  	}
-@@ -350,13 +406,14 @@
+@@ -350,13 +379,14 @@
   * that the multibyte char. string ends in a null character.
   */
  static wchar_t *
@@ -161,7 +132,7 @@
  
  	if (mbsarg == NULL)
  		return (NULL);
-@@ -374,7 +431,7 @@
+@@ -374,7 +404,7 @@
  		insize = nchars = 0;
  		mbs = initial;
  		while (nchars != (size_t)prec) {
@@ -170,7 +141,7 @@
  			if (nconv == 0 || nconv == (size_t)-1 ||
  			    nconv == (size_t)-2)
  				break;
-@@ -399,7 +456,7 @@
+@@ -399,7 +429,7 @@
  	p = mbsarg;
  	mbs = initial;
  	while (insize != 0) {
@@ -179,7 +150,7 @@
  		if (nconv == 0 || nconv == (size_t)-1 || nconv == (size_t)-2)
  			break;
  		wcp++;
-@@ -425,7 +482,21 @@
+@@ -425,7 +455,21 @@
  	int ret;
  
  	FLOCKFILE(fp);
@@ -202,13 +173,13 @@
  	FUNLOCKFILE(fp);
  	return (ret);
  }
-@@ -474,12 +545,15 @@
+@@ -474,12 +518,15 @@
  #define	PTRDIFFT	0x800		/* ptrdiff_t */
  #define	INTMAXT		0x1000		/* intmax_t */
  #define	CHARINT		0x2000		/* print char using int format */
-+#ifdef ALTIVEC
-+#define	VECTOR		0x4000		/* Altivec vector */
-+#endif /* ALTIVEC */
++#ifdef VECTORS
++#define	VECTOR		0x4000		/* Altivec or SSE vector */
++#endif /* VECTORS */
  
  /*
   * Non-MT-safe version
@@ -220,11 +191,11 @@
  {
  	wchar_t *fmt;		/* format string */
  	wchar_t ch;		/* character from fmt */
-@@ -524,6 +598,11 @@
+@@ -524,6 +571,11 @@
  	int nseps;		/* number of group separators with ' */
  	int nrepeats;		/* number of repeats of the last group */
  #endif
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +	union arg vval;		/* Vector argument. */
 +	wchar_t *pct;		/* Pointer to '%' at beginning of specifier. */
 +	wchar_t vsep;		/* Vector separator character. */
@@ -232,7 +203,7 @@
  	u_long	ulval;		/* integer arguments %[diouxX] */
  	uintmax_t ujval;	/* %j, %ll, %q, %t, %z integers */
  	int base;		/* base for [diouxX] conversion */
-@@ -560,7 +639,7 @@
+@@ -560,7 +612,7 @@
  	 */
  #define	PRINT(ptr, len)	do {			\
  	for (n3 = 0; n3 < (len); n3++)		\
@@ -241,23 +212,7 @@
  } while (0)
  #define	PAD(howmany, with)	do {		\
  	if ((n = (howmany)) > 0) {		\
-@@ -581,15 +660,6 @@
- } while(0)
- 
- 	/*
--	 * Get the argument indexed by nextarg.   If the argument table is
--	 * built, use it to get the argument.  If its not, get the next
--	 * argument (and arguments must be gotten sequentially).
--	 */
--#define GETARG(type) \
--	((argtable != NULL) ? *((type*)(&argtable[nextarg++])) : \
--	    (nextarg++, va_arg(ap, type)))
--
--	/*
- 	 * To extend shorts properly, we need both signed and unsigned
- 	 * argument extraction methods.
- 	 */
-@@ -640,21 +710,22 @@
+@@ -640,21 +692,22 @@
  		val = GETARG (int); \
  	}
  
@@ -284,39 +239,39 @@
  
  	fmt = (wchar_t *)fmt0;
  	argtable = NULL;
-@@ -678,6 +749,9 @@
+@@ -678,6 +731,9 @@
  		}
  		if (ch == '\0')
  			goto done;
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +		pct = fmt;
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  		fmt++;		/* skip over '%' */
  
  		flags = 0;
-@@ -686,6 +760,9 @@
+@@ -686,6 +742,9 @@
  		prec = -1;
  		sign = '\0';
  		ox[1] = '\0';
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +		vsep = 'X'; /* Illegal value, changed to defaults later. */
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  
  rflag:		ch = *fmt++;
  reswitch:	switch (ch) {
-@@ -701,6 +778,11 @@
+@@ -701,6 +760,11 @@
  		case '#':
  			flags |= ALT;
  			goto rflag;
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +		case ',': case ';': case ':': case '_':
 +			vsep = ch;
 +			goto rflag;
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  		case '*':
  			/*-
  			 * ``A negative field width argument is taken as a
-@@ -721,8 +803,8 @@
+@@ -721,8 +785,8 @@
  			goto rflag;
  		case '\'':
  			flags |= GROUPING;
@@ -327,16 +282,14 @@
  			goto rflag;
  		case '.':
  			if ((ch = *fmt++) == '*') {
-@@ -796,10 +878,16 @@
+@@ -796,10 +860,14 @@
  			flags |= LONGINT;
  			/*FALLTHROUGH*/
  		case 'c':
-+#ifdef ALTIVEC
-+			if (flags & VECTOR) {
-+				SETVEC(vval);
++#ifdef VECTORS
++			if (flags & VECTOR)
 +				break;
-+			}
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			if (flags & LONGINT)
  				*(cp = buf) = (wchar_t)GETARG(wint_t);
  			else
@@ -345,20 +298,31 @@
  			size = 1;
  			sign = '\0';
  			break;
-@@ -808,6 +896,12 @@
+@@ -808,6 +876,10 @@
  			/*FALLTHROUGH*/
  		case 'd':
  		case 'i':
-+#ifdef ALTIVEC
-+			if (flags & VECTOR) {
-+				SETVEC(vval);
++#ifdef VECTORS
++			if (flags & VECTOR)
 +				break;
-+			}
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			if (flags & INTMAX_SIZE) {
  				ujval = SJARG();
  				if ((intmax_t)ujval < 0) {
-@@ -837,6 +931,12 @@
+@@ -826,6 +898,12 @@
+ #ifndef NO_FLOATING_POINT
+ 		case 'a':
+ 		case 'A':
++#ifdef VECTORS
++			if (flags & VECTOR) {
++				flags |= FPT;
++				break;
++			}
++#endif /* VECTORS */
+ 			if (ch == 'a') {
+ 				ox[1] = 'x';
+ 				xdigs = xdigs_lower;
+@@ -837,6 +915,12 @@
  			}
  			if (prec >= 0)
  				prec++;
@@ -371,7 +335,7 @@
  			if (flags & LONGDBL) {
  				fparg.ldbl = GETARG(long double);
  				dtoaresult =
-@@ -848,6 +948,7 @@
+@@ -848,6 +932,7 @@
  				    __hdtoa(fparg.dbl, xdigs, prec,
  				        &expt, &signflag, &dtoaend);
  			}
@@ -379,7 +343,7 @@
  			if (prec < 0)
  				prec = dtoaend - dtoaresult;
  			if (expt == INT_MAX)
-@@ -855,7 +956,7 @@
+@@ -855,11 +940,17 @@
  			if (convbuf != NULL)
  				free(convbuf);
  			ndig = dtoaend - dtoaresult;
@@ -388,32 +352,40 @@
  			freedtoa(dtoaresult);
  			goto fp_common;
  		case 'e':
-@@ -868,10 +969,24 @@
+ 		case 'E':
++#ifdef VECTORS
++			if (flags & VECTOR) {
++				flags |= FPT;
++				break;
++			}
++#endif /* VECTORS */
+ 			expchar = ch;
+ 			if (prec < 0)	/* account for digit before decpt */
+ 				prec = DEFPREC + 1;
+@@ -868,10 +959,22 @@
  			goto fp_begin;
  		case 'f':
  		case 'F':
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +			if (flags & VECTOR) {
 +				flags |= FPT;
-+				SETVEC(vval);
 +				break;
 +			}
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			expchar = '\0';
  			goto fp_begin;
  		case 'g':
  		case 'G':
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +			if (flags & VECTOR) {
 +				flags |= FPT;
-+				SETVEC(vval);
 +				break;
 +			}
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			expchar = ch - ('g' - 'e');
  			if (prec == 0)
  				prec = 1;
-@@ -880,6 +995,14 @@
+@@ -880,6 +983,14 @@
  				prec = DEFPREC;
  			if (convbuf != NULL)
  				free(convbuf);
@@ -428,7 +400,7 @@
  			if (flags & LONGDBL) {
  				fparg.ldbl = GETARG(long double);
  				dtoaresult =
-@@ -893,8 +1016,9 @@
+@@ -893,8 +1004,9 @@
  				if (expt == 9999)
  					expt = INT_MAX;
  			}
@@ -439,33 +411,29 @@
  			freedtoa(dtoaresult);
  fp_common:
  			if (signflag)
-@@ -989,6 +1113,12 @@
+@@ -989,6 +1101,10 @@
  			flags |= LONGINT;
  			/*FALLTHROUGH*/
  		case 'o':
-+#ifdef ALTIVEC
-+			if (flags & VECTOR) {
-+				SETVEC(vval);
++#ifdef VECTORS
++			if (flags & VECTOR)
 +				break;
-+			}
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			if (flags & INTMAX_SIZE)
  				ujval = UJARG();
  			else
-@@ -1003,6 +1133,12 @@
+@@ -1003,6 +1119,10 @@
  			 * defined manner.''
  			 *	-- ANSI X3J11
  			 */
-+#ifdef ALTIVEC
-+			if (flags & VECTOR) {
-+				SETVEC(vval);
++#ifdef VECTORS
++			if (flags & VECTOR)
 +				break;
-+			}
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			ujval = (uintmax_t)(uintptr_t)GETARG(void *);
  			base = 16;
  			xdigs = xdigs_lower;
-@@ -1024,7 +1160,7 @@
+@@ -1024,7 +1144,7 @@
  				if ((mbp = GETARG(char *)) == NULL)
  					cp = L"(null)";
  				else {
@@ -474,52 +442,45 @@
  					if (convbuf == NULL) {
  						fp->_flags |= __SERR;
  						goto error;
-@@ -1055,6 +1191,12 @@
+@@ -1055,6 +1175,10 @@
  			flags |= LONGINT;
  			/*FALLTHROUGH*/
  		case 'u':
-+#ifdef ALTIVEC
-+			if (flags & VECTOR) {
-+				SETVEC(vval);
++#ifdef VECTORS
++			if (flags & VECTOR)
 +				break;
-+			}
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			if (flags & INTMAX_SIZE)
  				ujval = UJARG();
  			else
-@@ -1067,6 +1209,12 @@
+@@ -1067,6 +1191,10 @@
  		case 'x':
  			xdigs = xdigs_lower;
  hex:
-+#ifdef ALTIVEC
-+			if (flags & VECTOR) {
-+				SETVEC(vval);
++#ifdef VECTORS
++			if (flags & VECTOR)
 +				break;
-+			}
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			if (flags & INTMAX_SIZE)
  				ujval = UJARG();
  			else
-@@ -1111,6 +1259,14 @@
+@@ -1111,6 +1239,11 @@
  			if (size > BUF)	/* should never happen */
  				abort();
  			break;
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +		case 'v':
-+			if (hasAltivec) {
-+				flags |= VECTOR;
-+				goto rflag;
-+			}
-+			/* drop through */
-+#endif /* ALTIVEC */
++			flags |= VECTOR;
++			goto rflag;
++#endif /* VECTORS */
  		default:	/* "%?" prints ?, unless ? is NUL */
  			if (ch == '\0')
  				goto done;
-@@ -1122,6 +1278,183 @@
+@@ -1122,6 +1255,288 @@
  			break;
  		}
  
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +		if (flags & VECTOR) {
 +			/*
 +			 * Do the minimum amount of work necessary to construct
@@ -529,24 +490,32 @@
 +			int i, j;	/* Counter. */
 +			int vcnt;	/* Number of elements in vector. */
 +			char *vfmt;	/* Pointer to format specifier. */
-+			char vfmt_buf[32]; /* Static buffer for format spec. */
++#define EXTRAHH 2
++			char vfmt_buf[32 + EXTRAHH]; /* Static buffer for format spec. */
 +			int vwidth = 0;	/* Width specified via '*'. */
 +			int vprec = 0;	/* Precision specified via '*'. */
-+			union {		/* Element. */
-+				int i;
-+				float f;
-+			} velm;
 +			char *vstr;	/* Used for asprintf(). */
 +			int vlen;	/* Length returned by asprintf(). */
++			enum {
++			    V_CHAR, V_SHORT, V_INT,
++			    V_PCHAR, V_PSHORT, V_PINT,
++			    V_FLOAT,
++#ifdef V64TYPE
++			    V_LONGLONG, V_PLONGLONG,
++			    V_DOUBLE,
++#endif /* V64TYPE */
++			} vtype;
 +
++			vval.vectorarg = GETARG(VECTORTYPE);
 +			/*
 +			 * Set vfmt.  If vfmt_buf may not be big enough,
 +			 * malloc() space, taking care to free it later.
++			 * (EXTRAHH is for possible extra "hh")
 +			 */
-+			if (&fmt[-1] - pct < sizeof(vfmt_buf))
++			if (&fmt[-1] - pct + EXTRAHH < sizeof(vfmt_buf))
 +				vfmt = vfmt_buf;
 +			else
-+				vfmt = (char *)malloc(&fmt[-1] - pct + 1);
++				vfmt = (char *)malloc(&fmt[-1] - pct + EXTRAHH + 1);
 +
 +			/* Set the separator character, if not specified. */
 +			if (vsep == 'X') {
@@ -579,13 +548,57 @@
 +			 * finish up the format specifier.
 +			 */
 +			if (flags & SHORTINT) {
-+				if (ch != 'c')
++				switch (ch) {
++				case 'c':
++					vtype = V_SHORT;
++					break;
++				case 'p':
++					vtype = V_PSHORT;
++					break;
++				default:
 +					vfmt[j++] = 'h';
++					vtype = V_SHORT;
++					break;
++				}
 +				vcnt = 8;
 +			} else if (flags & LONGINT) {
-+				if (ch != 'c')
-+					vfmt[j++] = 'l';
 +				vcnt = 4;
++				vtype = (ch == 'p') ? V_PINT : V_INT;
++#ifdef V64TYPE
++			} else if (flags & LLONGINT) {
++				switch (ch) {
++				case 'a':
++				case 'A':
++				case 'e':
++				case 'E':
++				case 'f':
++				case 'g':
++				case 'G':
++					vcnt = 2;
++					vtype = V_DOUBLE;
++					break;
++				case 'd':
++				case 'i':
++				case 'u':
++				case 'o':
++				case 'p':
++				case 'x':
++				case 'X':
++					vfmt[j++] = 'l';
++					vfmt[j++] = 'l';
++					vcnt = 2;
++					vtype = (ch == 'p') ? V_PLONGLONG : V_LONGLONG;
++					break;
++				default:
++					/*
++					 * The default case should never
++					 * happen.
++					 */
++				case 'c':
++					vcnt = 16;
++					vtype = V_CHAR;
++				}
++#endif /* V64TYPE */
 +			} else {
 +				switch (ch) {
 +				case 'a':
@@ -596,95 +609,148 @@
 +				case 'g':
 +				case 'G':
 +					vcnt = 4;
++					vtype = V_FLOAT;
 +					break;
 +				default:
 +					/*
 +					 * The default case should never
 +					 * happen.
 +					 */
-+				case 'c':
 +				case 'd':
 +				case 'i':
 +				case 'u':
 +				case 'o':
-+				case 'p':
 +				case 'x':
 +				case 'X':
++					vfmt[j++] = 'h';
++					vfmt[j++] = 'h';
++					/* drop through */
++				case 'p':
++				case 'c':
 +					vcnt = 16;
++					vtype = (ch == 'p') ? V_PCHAR : V_CHAR;
 +				}
 +			}
 +			vfmt[j++] = ch;
 +			vfmt[j++] = '\0';
 +
 +/* Get a vector element. */
-+#define VPRINT(cnt, ind, args...) do {					\
-+	if (flags & FPT) {						\
-+		velm.f = vval.vfloatarg[ind];				\
-+		vlen = asprintf_l(&vstr, loc, vfmt , ## args, velm.f);	\
-+	} else {							\
-+		switch (cnt) {						\
-+		default:						\
-+		/* The default case should never happen. */		\
-+		case 4:							\
-+			velm.i = (unsigned)vval.vintarg[ind];		\
-+			break;						\
-+		case 8:							\
-+			velm.i = (unsigned short)vval.vshortarg[ind];	\
-+			break;						\
-+		case 16:						\
-+			velm.i = (unsigned char)vval.vchararg[ind];	\
-+			break;						\
-+		}							\
-+		vlen = asprintf_l(&vstr, loc, vfmt , ## args, velm.i);	\
++#ifdef V64TYPE
++#define VPRINT(type, ind, args...) do {					\
++	switch (type) {							\
++	case V_CHAR:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vuchararg[ind]); \
++		break;							\
++	case V_PCHAR:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vuchararg[ind]); \
++		break;							\
++	case V_SHORT:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vushortarg[ind]); \
++		break;							\
++	case V_PSHORT:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vushortarg[ind]); \
++		break;							\
++	case V_INT:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vuintarg[ind]); \
++		break;							\
++	case V_PINT:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vuintarg[ind]); \
++		break;							\
++	case V_LONGLONG:						\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vulonglongarg[ind]); \
++		break;							\
++	case V_PLONGLONG:						\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vulonglongarg[ind]); \
++		break;							\
++	case V_FLOAT:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vfloatarg[ind]); \
++		break;							\
++	case V_DOUBLE:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vdoublearg[ind]); \
++		break;							\
++	}								\
++	ret += vlen;							\
++	PRINT(vstr, vlen);						\
++	free(vstr);							\
++} while (0)
++#else /* !V64TYPE */
++#define VPRINT(type, ind, args...) do {					\
++	switch (type) {							\
++	case V_CHAR:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vuchararg[ind]); \
++		break;							\
++	case V_PCHAR:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vuchararg[ind]); \
++		break;							\
++	case V_SHORT:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vushortarg[ind]); \
++		break;							\
++	case V_PSHORT:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vushortarg[ind]); \
++		break;							\
++	case V_INT:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vuintarg[ind]); \
++		break;							\
++	case V_PINT:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, (void *)(long)vval.vuintarg[ind]); \
++		break;							\
++	case V_FLOAT:							\
++		vlen = asprintf_l(&vstr, loc, vfmt , ## args, vval.vfloatarg[ind]); \
++		break;							\
 +	}								\
 +	ret += vlen;							\
 +	PRINT(vstr, vlen);						\
 +	free(vstr);							\
 +} while (0)
++#endif /* V64TYPE */
 +
 +			/* Actually print. */
 +			if (vwidth == 0) {
 +				if (vprec == 0) {
 +					/* First element. */
-+					VPRINT(vcnt, 0);
++					VPRINT(vtype, 0);
 +					for (i = 1; i < vcnt; i++) {
 +						/* Separator. */
-+						PRINT(&vsep, 1);
++						if(vsep)
++							PRINT(&vsep, 1);
 +
 +						/* Element. */
-+						VPRINT(vcnt, i);
++						VPRINT(vtype, i);
 +					}
 +				} else {
 +					/* First element. */
-+					VPRINT(vcnt, 0, prec);
++					VPRINT(vtype, 0, prec);
 +					for (i = 1; i < vcnt; i++) {
 +						/* Separator. */
-+						PRINT(&vsep, 1);
++						if(vsep)
++							PRINT(&vsep, 1);
 +
 +						/* Element. */
-+						VPRINT(vcnt, i, prec);
++						VPRINT(vtype, i, prec);
 +					}
 +				}
 +			} else {
 +				if (vprec == 0) {
 +					/* First element. */
-+					VPRINT(vcnt, 0, width);
++					VPRINT(vtype, 0, width);
 +					for (i = 1; i < vcnt; i++) {
 +						/* Separator. */
-+						PRINT(&vsep, 1);
++						if(vsep)
++							PRINT(&vsep, 1);
 +
 +						/* Element. */
-+						VPRINT(vcnt, i, width);
++						VPRINT(vtype, i, width);
 +					}
 +				} else {
 +					/* First element. */
-+					VPRINT(vcnt, 0, width, prec);
++					VPRINT(vtype, 0, width, prec);
 +					for (i = 1; i < vcnt; i++) {
 +						/* Separator. */
-+						PRINT(&vsep, 1);
++						if(vsep)
++							PRINT(&vsep, 1);
 +
 +						/* Element. */
-+						VPRINT(vcnt, i, width, prec);
++						VPRINT(vtype, i, width, prec);
 +					}
 +				}
 +			}
@@ -695,67 +761,79 @@
 +
 +			continue;
 +		}
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  		/*
  		 * All reasonable formats wind up here.  At this point, `cp'
  		 * points to a string which (if not flags&LADJUST) should be
-@@ -1401,6 +1734,11 @@
+@@ -1401,6 +1816,11 @@
  			if (flags & LONGINT)
  				ADDTYPE(T_WINT);
  			else
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +			if (flags & VECTOR)
 +				ADDTYPE(T_VECTOR);
 +			else
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  				ADDTYPE(T_INT);
  			break;
  		case 'D':
-@@ -1418,6 +1756,11 @@
+@@ -1408,6 +1828,11 @@
+ 			/*FALLTHROUGH*/
+ 		case 'd':
+ 		case 'i':
++#ifdef VECTORS
++			if (flags & VECTOR)
++				ADDTYPE(T_VECTOR);
++			else
++#endif /* VECTORS */
+ 			ADDSARG();
+ 			break;
+ #ifndef NO_FLOATING_POINT
+@@ -1418,6 +1843,11 @@
  		case 'f':
  		case 'g':
  		case 'G':
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +			if (flags & VECTOR)
 +				ADDTYPE(T_VECTOR);
 +			else
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			if (flags & LONGDBL)
  				ADDTYPE(T_LONG_DOUBLE);
  			else
-@@ -1446,9 +1789,19 @@
+@@ -1446,9 +1876,19 @@
  			flags |= LONGINT;
  			/*FALLTHROUGH*/
  		case 'o':
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +			if (flags & VECTOR)
 +				ADDTYPE(T_VECTOR);
 +			else
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			ADDUARG();
  			break;
  		case 'p':
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +			if (flags & VECTOR)
 +				ADDTYPE(T_VECTOR);
 +			else
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			ADDTYPE(TP_VOID);
  			break;
  		case 'S':
-@@ -1466,6 +1819,11 @@
+@@ -1466,6 +1906,11 @@
  		case 'u':
  		case 'X':
  		case 'x':
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +			if (flags & VECTOR)
 +				ADDTYPE(T_VECTOR);
 +			else
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  			ADDUARG();
  			break;
  		default:	/* "%?" prints ?, unless ? is NUL */
-@@ -1532,7 +1890,7 @@
+@@ -1532,7 +1977,7 @@
  			(*argtable) [n].sizearg = va_arg (ap, size_t);
  			break;
  		    case TP_SIZET:
@@ -764,16 +842,15 @@
  			break;
  		    case T_INTMAXT:
  			(*argtable) [n].intmaxarg = va_arg (ap, intmax_t);
-@@ -1551,6 +1909,12 @@
+@@ -1551,6 +1996,11 @@
  			(*argtable) [n].longdoublearg = va_arg (ap, long double);
  			break;
  #endif
-+#ifdef ALTIVEC
++#ifdef VECTORS
 +		    case T_VECTOR:
-+			if (hasAltivec)
-+				ap = getvec( &((*argtable) [n]), NULL, 0, ap );
++			(*argtable) [n].vectorarg = va_arg (ap, VECTORTYPE);
 +			break;
-+#endif /* ALTIVEC */
++#endif /* VECTORS */
  		    case TP_CHAR:
  			(*argtable) [n].pchararg = va_arg (ap, char *);
  			break;
diff --git a/stdio/Makefile.inc b/stdio/Makefile.inc
index 548c169..0e6892a 100644
--- a/stdio/Makefile.inc
+++ b/stdio/Makefile.inc
@@ -29,11 +29,7 @@ LDBLSRCS += asprintf.c fprintf.c fscanf.c fwprintf.c fwscanf.c printf.c \
 	    vswscanf.c vwprintf.c vwscanf.c wprintf.c wscanf.c
 
 .for _src in vfprintf-fbsd.c vfwprintf-fbsd.c
-CFLAGS-${_src} += -fshort-enums
-# add altivec options on per file basis, since it now disables inlining
-.if (${MACHINE_ARCH} == ppc) || (${MACHINE_ARCH} == ppc64)
-CFLAGS-${_src} += -faltivec -DALTIVEC
-.endif
+CFLAGS-${_src} += -fshort-enums -DVECTORS
 .endfor
 
 UNIX03SRCS+= freopen.c fwrite.c
diff --git a/stdlib/FreeBSD/grantpt.c.patch b/stdlib/FreeBSD/grantpt.c.patch
index 2e73b70..a41355b 100644
--- a/stdlib/FreeBSD/grantpt.c.patch
+++ b/stdlib/FreeBSD/grantpt.c.patch
@@ -1,5 +1,5 @@
---- grantpt.c.orig	2006-04-21 22:41:31.000000000 -0700
-+++ grantpt.c	2006-04-21 22:43:03.000000000 -0700
+--- grantpt.c.orig	2004-09-14 19:06:46.000000000 -0700
++++ grantpt.c	2004-09-14 19:11:31.000000000 -0700
 @@ -54,18 +54,16 @@
  #include <unistd.h>
  #include "un-namespace.h"
@@ -58,97 +58,7 @@
  			 minor((x).st_rdev) >= 0 &&			\
  			 minor((x).st_rdev) < PT_MAX)
  
-@@ -100,50 +119,53 @@
- 	serrno = errno;
- 
- 	if ((slave = ptsname(fildes)) != NULL) {
--		/*
--		 * Block SIGCHLD.
--		 */
--		(void)sigemptyset(&nblock);
--		(void)sigaddset(&nblock, SIGCHLD);
--		(void)_sigprocmask(SIG_BLOCK, &nblock, &oblock);
--
--		switch (pid = fork()) {
--		case -1:
--			break;
--		case 0:		/* child */
-+		/* 4430299: if we are root, we don't need to fork/exec */
-+		if (geteuid() != 0) {
- 			/*
--			 * pt_chown expects the master pseudo TTY to be its
--			 * standard input.
-+			 * Block SIGCHLD.
- 			 */
--			(void)_dup2(fildes, STDIN_FILENO);
--			(void)_sigprocmask(SIG_SETMASK, &oblock, NULL);
--			execl(_PATH_PTCHOWN, _PATH_PTCHOWN, (char *)NULL);
--			_exit(EX_UNAVAILABLE);
--			/* NOTREACHED */
--		default:	/* parent */
-+			(void)sigemptyset(&nblock);
-+			(void)sigaddset(&nblock, SIGCHLD);
-+			(void)_sigprocmask(SIG_BLOCK, &nblock, &oblock);
-+
-+			switch (pid = fork()) {
-+			case -1:
-+				break;
-+			case 0:		/* child */
-+				/*
-+				 * pt_chown expects the master pseudo TTY to be its
-+				 * standard input.
-+				 */
-+				(void)_dup2(fildes, STDIN_FILENO);
-+				(void)_sigprocmask(SIG_SETMASK, &oblock, NULL);
-+				execl(_PATH_PTCHOWN, _PATH_PTCHOWN, (char *)NULL);
-+				_exit(EX_UNAVAILABLE);
-+				/* NOTREACHED */
-+			default:	/* parent */
-+				/*
-+				 * Just wait for the process.  Error checking is
-+				 * done below.
-+				 */
-+				while ((spid = _waitpid(pid, &status, 0)) == -1 &&
-+				       (errno == EINTR))
-+					;
-+				if (spid != -1 && WIFEXITED(status) &&
-+				    WEXITSTATUS(status) == EX_OK)
-+					retval = 0;
-+				else
-+					errno = EACCES;
-+				break;
-+			}
-+
- 			/*
--			 * Just wait for the process.  Error checking is
--			 * done below.
-+			 * Restore process's signal mask.
- 			 */
--			while ((spid = _waitpid(pid, &status, 0)) == -1 &&
--			       (errno == EINTR))
--				;
--			if (spid != -1 && WIFEXITED(status) &&
--			    WEXITSTATUS(status) == EX_OK)
--				retval = 0;
--			else
--				errno = EACCES;
--			break;
-+			(void)_sigprocmask(SIG_SETMASK, &oblock, NULL);
- 		}
- 
--		/*
--		 * Restore process's signal mask.
--		 */
--		(void)_sigprocmask(SIG_SETMASK, &oblock, NULL);
--
- 		if (retval) {
- 			/*
--			 * pt_chown failed.  Try to manually change the
-+			 * pt_chown failed (or we're root).  Try to manually change the
- 			 * permissions for the slave.
- 			 */
- 			gid = (grp = getgrnam("tty")) ? grp->gr_gid : -1;
-@@ -227,8 +249,8 @@
+@@ -227,8 +246,8 @@
  			errno = EINVAL;
  		else {
  			(void)sprintf(slave, _PATH_DEV PTS_PREFIX "%c%c",
diff --git a/sys/Makefile.inc b/sys/Makefile.inc
index b4aa2ae..9e6510b 100644
--- a/sys/Makefile.inc
+++ b/sys/Makefile.inc
@@ -34,6 +34,10 @@ MISRCS+= errno.c gettimeofday.c sigcatch.c sigsuspend.c \
 CFLAGS-${_src} += -D__APPLE_PR3375657_HACK__
 .endfor
 
+.include "Makefile.obsd_begin"
+OBSDMISRCS= stack_protector.c
+.include "Makefile.obsd_end"
+
 UNIX03SRCS += mmap.c mprotect.c msgctl.c msync.c munmap.c semctl.c shmctl.c
 
 # Add machine dependent asm sources:
diff --git a/sys/OpenBSD/stack_protector.c b/sys/OpenBSD/stack_protector.c
new file mode 100644
index 0000000..368e365
--- /dev/null
+++ b/sys/OpenBSD/stack_protector.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2002 Hiroaki Etoh, Federico G. Schwindt, and Miodrag Vallat.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#if defined(LIBC_SCCS) && !defined(list)
+static char rcsid[] = "$OpenBSD: stack_protector.c,v 1.3 2002/12/10 08:53:42 etoh Exp $";
+#endif
+
+#include <sys/param.h>
+#include <sys/sysctl.h>
+#include <syslog.h>
+
+long __guard[8] = {0, 0, 0, 0, 0, 0, 0, 0};
+static void __guard_setup(void) __attribute__ ((constructor));
+void __stack_smash_handler(char func[], int damaged __attribute__((unused)));
+
+static void
+__guard_setup(void)
+{
+  int fd;
+  if (__guard[0]!=0) return;
+  fd = open ("/dev/urandom", 0);
+  if (fd != -1) {
+    ssize_t size = read (fd, (char*)&__guard, sizeof(__guard));
+    close (fd) ;
+    if (size == sizeof(__guard)) return;
+  }
+  /* If a random generator can't be used, the protector switches the guard
+     to the "terminator canary" */
+  ((char*)__guard)[0] = 0; ((char*)__guard)[1] = 0;
+  ((char*)__guard)[2] = '\n'; ((char*)__guard)[3] = 255;
+}
+
+void
+__stack_smash_handler(char func[], int damaged)
+{
+  const char message[] = "stack overflow in function %s";
+  struct sigaction sa;
+
+  /* this may fail on a chroot jail, though luck */
+  syslog(LOG_CRIT, message, func);
+
+  bzero(&sa, sizeof(struct sigaction));
+  sigemptyset(&sa.sa_mask);
+  sa.sa_flags = 0;
+  sa.sa_handler = SIG_DFL;
+  sigaction(SIGABRT, &sa, NULL);
+
+  kill(getpid(), SIGABRT);
+
+  _exit(127);
+}
diff --git a/sys/gettimeofday.c b/sys/gettimeofday.c
index c328715..959bbf5 100644
--- a/sys/gettimeofday.c
+++ b/sys/gettimeofday.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2003-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -40,32 +40,24 @@
 
 int gettimeofday (struct timeval *tp, struct timezone *tzp)
 {
+		extern int __gettimeofday(struct timeval *, struct timezone *);
+		extern int __commpage_gettimeofday(struct timeval *);
         static int validtz = 0;
         static struct timezone cached_tz = {0};
-        struct timeval localtv;
+        struct timeval atv;
   
         if (tp == NULL) {
             if (tzp == NULL)
                 return	(0);
-            tp = &localtv;
+            tp = &atv;
         }
 
-#if defined(__ppc__) || defined(__ppc64__)
-        {
-            extern int __ppc_gettimeofday(struct timeval *, struct timezone *);
-            extern int __commpage_gettimeofday(struct timeval *);
-    
-            if (__commpage_gettimeofday(tp)) {		/* first try commpage */
-                if (__ppc_gettimeofday(tp,tzp)) {	/* if it fails, use syscall */
-                    return (-1);
-                }
-            }
-        }
-#else
-        if (syscall (SYS_gettimeofday, tp, tzp) < 0) {
-                return (-1);
-        }
-#endif
+		if (__commpage_gettimeofday(tp)) {		/* first try commpage */
+			if (__gettimeofday(tp, tzp) < 0) {	/* if it fails, use syscall */
+				return (-1);
+			}
+		}
+
         if (tzp) {
             if (validtz == 0)  {
                 struct tm *localtm = localtime ((time_t *)&tp->tv_sec);
diff --git a/sys/sigtramp.c b/sys/sigtramp.c
index b387329..e960491 100644
--- a/sys/sigtramp.c
+++ b/sys/sigtramp.c
@@ -42,10 +42,10 @@ int __in_sigtramp = 0;
 
 /* These defn should match the kernel one */
 #define UC_TRAD			1
+#define UC_FLAVOR		30
 #if defined(__ppc__) || defined(__ppc64__)
 #define UC_TRAD64		20
 #define UC_TRAD64_VEC		25
-#define UC_FLAVOR		30
 #define UC_FLAVOR_VEC		35
 #define UC_FLAVOR64		40
 #define UC_FLAVOR64_VEC		45
@@ -164,11 +164,11 @@ _sigtramp(
 	siginfo_t		*sinfo,
 	ucontext_t		*uctx
 ) {
-#if defined(__ppc__) || defined(__ppc64__)
 	int ctxstyle = UC_FLAVOR;
-#endif
+#if defined(__ppc__) || defined(__ppc64__)
 	mcontext_t mctx;
 	mcontext64_t mctx64;
+#endif
 
 #if defined(__DYNAMIC__)
         __in_sigtramp++;
@@ -176,6 +176,9 @@ _sigtramp(
 #ifdef __i386__
 	if (sigstyle == UC_TRAD)
         	sa_handler(sig);
+	else {
+		sa_sigaction(sig, sinfo, uctx);
+	}
 #elif defined(__ppc__) || defined(__ppc64__)
 	if ((sigstyle == UC_TRAD) || (sigstyle == UC_TRAD64) || (sigstyle == UC_TRAD64_VEC))
         	sa_handler(sig);
@@ -209,14 +212,8 @@ _sigtramp(
 #if defined(__DYNAMIC__)
         __in_sigtramp--;
 #endif
-#if defined(__ppc__) || defined(__ppc64__)
-	{
         /* sigreturn(uctx, ctxstyle); */
 	/* syscall (SYS_SIGRETURN, uctx, ctxstyle); */
 	syscall (184, uctx, ctxstyle);
-	}
-#else
-	sigreturn(uctx);
-#endif /* __ppc__ || __ppc64__ */
 }
 
-- 
2.51.0