Libc-1158.20.4.tar.gz

[apple/libc.git] / stdlib / FreeBSD / psort_b.c
diff --git a/stdlib/FreeBSD/psort_b.c b/stdlib/FreeBSD/psort_b.c

deleted file mode 120000 (symlink)

index 945c9b46d684f08ec84cb316e1dc0061e361f794..0000000000000000000000000000000000000000
--- a/stdlib/FreeBSD/psort_b.c
+++ /dev/null
@@ -1 +0,0 @@
-.
-\ No newline at end of file
diff --git a/stdlib/FreeBSD/psort_b.c b/stdlib/FreeBSD/psort_b.c

new file mode 100644 (file)

index 0000000000000000000000000000000000000000..cb6f707aa4cea70963e60edc0b7a4dadf1090757
--- /dev/null
+++ b/stdlib/FreeBSD/psort_b.c
@@ -0,0 +1,430 @@
+/****************************************************************************/
+/*-
+ * Copyright (c) 1992, 1993
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)qsort.c    8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/lib/libc/stdlib/qsort.c,v 1.15 2008/01/14 09:21:34 das Exp $");
+
+#include <stdlib.h>
+#include <pthread.h>
+#include <dispatch/dispatch.h>
+#include <stddef.h>
+#include <string.h>
+#include <libkern/OSAtomic.h>
+#include <sys/mman.h>
+#include <errno.h>
+#define __APPLE_API_PRIVATE
+#include <machine/cpu_capabilities.h>
+
+#ifdef I_AM_PSORT_R
+typedef int             cmp_t(void *, const void *, const void *);
+#else
+typedef int             cmp_t(const void *, const void *);
+#endif
+#ifdef I_AM_PSORT_B
+static inline char     *med3(char *, char *, char *, cmp_t ^, void *) __attribute__((always_inline));
+#else
+static inline char     *med3(char *, char *, char *, cmp_t *, void *) __attribute__((always_inline));
+#endif
+static inline void      swapfunc(char *, char *, int, int) __attribute__((always_inline));
+
+#define min(a, b)      (a) < (b) ? a : b
+
+#define NARGS                  ((PAGESIZE - offsetof(struct page, args)) / sizeof(union args))
+#define PAGESIZE               4096
+#define PARALLEL_MIN_SIZE      2000    /* determine heuristically */
+
+struct shared; /* forward reference */
+union args {
+    union args *next;
+    struct {
+       struct shared *shared;
+       void *a;
+       size_t n;
+       int depth_limit;
+    } /* anonymous */;
+};
+
+struct page {
+    struct page *next;
+    union args args[0];
+};
+
+struct shared {
+    char *who;
+    union args *freelist;
+    struct page *pagelist;
+#ifdef I_AM_PSORT_R
+    void *thunk;
+#endif
+#ifdef I_AM_PSORT_B
+    cmp_t ^cmp;
+#else
+    cmp_t *cmp;
+#endif
+    size_t es;
+    size_t turnoff;
+    dispatch_queue_t queue;
+    dispatch_group_t group;
+    OSSpinLock sharedlock;
+};
+
+static union args *
+getargs(struct shared *shared)
+{
+    union args *args;
+
+    OSSpinLockLock(&shared->sharedlock);
+    if(!shared->freelist) {
+       struct page *page;
+       union args *prev;
+       int i;
+       if((page = (struct page *)mmap(NULL, PAGESIZE, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0)) == NULL)
+           return NULL;
+       page->next = shared->pagelist;
+       shared->pagelist = page;
+       prev = NULL;
+       for(args = page->args, i = NARGS; i > 0; args++, i--) {
+           args->next = prev;
+           prev = args;
+       }
+       shared->freelist = prev;
+    }
+    args = shared->freelist;
+    shared->freelist = args->next;
+    OSSpinLockUnlock(&shared->sharedlock);
+    return args;
+}
+
+static void
+returnargs(struct shared *shared, union args *args)
+{
+    OSSpinLockLock(&shared->sharedlock);
+    args->next = shared->freelist;
+    shared->freelist = args;
+    OSSpinLockUnlock(&shared->sharedlock);
+}
+
+/*
+ * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
+ */
+#define swapcode(TYPE, parmi, parmj, n) {              \
+       long i = (n) / sizeof (TYPE);                   \
+       TYPE *pi = (TYPE *) (parmi);            \
+       TYPE *pj = (TYPE *) (parmj);            \
+       do {                                            \
+               TYPE    t = *pi;                \
+               *pi++ = *pj;                            \
+               *pj++ = t;                              \
+        } while (--i > 0);                             \
+}
+
+#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
+       es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
+
+static inline void
+swapfunc(a, b, n, swaptype)
+       char *a, *b;
+       int n, swaptype;
+{
+       if(swaptype <= 1)
+               swapcode(long, a, b, n)
+       else
+               swapcode(char, a, b, n)
+}
+
+#define swap(a, b)                                     \
+       if (swaptype == 0) {                            \
+               long t = *(long *)(a);                  \
+               *(long *)(a) = *(long *)(b);            \
+               *(long *)(b) = t;                       \
+       } else                                          \
+               swapfunc(a, b, es, swaptype)
+
+#define vecswap(a, b, n)       if ((n) > 0) swapfunc(a, b, n, swaptype)
+
+#ifdef I_AM_PSORT_R
+#define        CMP(t, x, y) (cmp((t), (x), (y)))
+#else
+#define        CMP(t, x, y) (cmp((x), (y)))
+#endif
+
+static inline char *
+med3(char *a, char *b, char *c,
+#ifdef I_AM_PSORT_B
+cmp_t ^cmp,
+#else
+cmp_t *cmp,
+#endif
+void *thunk
+#ifndef I_AM_PSORT_R
+__unused
+#endif
+)
+{
+       return CMP(thunk, a, b) < 0 ?
+              (CMP(thunk, b, c) < 0 ? b : (CMP(thunk, a, c) < 0 ? c : a ))
+              :(CMP(thunk, b, c) > 0 ? b : (CMP(thunk, a, c) < 0 ? a : c ));
+}
+
+#ifdef __LP64__
+#define DEPTH(x)       (2 * (flsl((long)(x)) - 1))
+#else /* !__LP64__ */
+#define DEPTH(x)       (2 * (fls((int)(x)) - 1))
+#endif /* __LP64__ */
+
+#ifdef I_AM_PSORT_R
+int __heapsort_r(void *, size_t, size_t, void *, int (*)(void *, const void *, const void *));
+#endif
+
+static void _psort_parallel(void *x);
+
+static void
+_psort(void *a, size_t n, size_t es,
+#ifdef I_AM_PSORT_R
+void *thunk,
+#else
+#define thunk  NULL
+#endif
+#ifdef I_AM_PSORT_B
+cmp_t ^cmp,
+#else
+cmp_t *cmp,
+#endif
+int depth_limit, struct shared *shared)
+{
+       char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
+       size_t d, r;
+       int cmp_result;
+       int swaptype, swap_cnt;
+
+loop:
+       if (depth_limit-- <= 0) {
+#ifdef I_AM_PSORT_B
+               heapsort_b(a, n, es, cmp);
+#elif defined(I_AM_PSORT_R)
+               __heapsort_r(a, n, es, thunk, cmp);
+#else
+               heapsort(a, n, es, cmp);
+#endif
+               return;
+       }
+       SWAPINIT(a, es);
+       swap_cnt = 0;
+       if (n < 7) {
+               for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
+                       for (pl = pm; 
+                            pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
+                            pl -= es)
+                               swap(pl, pl - es);
+               return;
+       }
+       pm = (char *)a + (n / 2) * es;
+       if (n > 7) {
+               pl = a;
+               pn = (char *)a + (n - 1) * es;
+               if (n > 40) {
+                       d = (n / 8) * es;
+                       pl = med3(pl, pl + d, pl + 2 * d, cmp, thunk);
+                       pm = med3(pm - d, pm, pm + d, cmp, thunk);
+                       pn = med3(pn - 2 * d, pn - d, pn, cmp, thunk);
+               }
+               pm = med3(pl, pm, pn, cmp, thunk);
+       }
+       swap(a, pm);
+       pa = pb = (char *)a + es;
+
+       pc = pd = (char *)a + (n - 1) * es;
+       for (;;) {
+               while (pb <= pc && (cmp_result = CMP(thunk, pb, a)) <= 0) {
+                       if (cmp_result == 0) {
+                               swap_cnt = 1;
+                               swap(pa, pb);
+                               pa += es;
+                       }
+                       pb += es;
+               }
+               while (pb <= pc && (cmp_result = CMP(thunk, pc, a)) >= 0) {
+                       if (cmp_result == 0) {
+                               swap_cnt = 1;
+                               swap(pc, pd);
+                               pd -= es;
+                       }
+                       pc -= es;
+               }
+               if (pb > pc)
+                       break;
+               swap(pb, pc);
+               swap_cnt = 1;
+               pb += es;
+               pc -= es;
+       }
+
+       pn = (char *)a + n * es;
+       r = min(pa - (char *)a, pb - pa);
+       vecswap(a, pb - r, r);
+       r = min(pd - pc, pn - pd - es);
+       vecswap(pb, pn - r, r);
+
+       if (swap_cnt == 0) {  /* Switch to insertion sort */
+               r = 1 + n / 4; /* n >= 7, so r >= 2 */
+               for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
+                       for (pl = pm; 
+                            pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
+                            pl -= es) {
+                               swap(pl, pl - es);
+                               if (++swap_cnt > r) goto nevermind;
+                       }
+               return;
+       }
+
+nevermind:
+       if ((r = pb - pa) > es) {
+               r /= es;
+               if (shared && r > shared->turnoff) {
+                       union args *args = getargs(shared);
+
+                       if (args == NULL)
+                               LIBC_ABORT("%s: getargs: %s", shared->who, strerror(errno));
+                       args->shared = shared;
+                       args->a = a;
+                       args->n = r;
+                       args->depth_limit = depth_limit;
+                       dispatch_group_async_f(shared->group, shared->queue, args,
+                                       _psort_parallel);
+               } else {
+#ifdef I_AM_PSORT_R
+                       _psort(a, r, es, thunk, cmp, depth_limit, NULL);
+#else
+                       _psort(a, r, es, cmp, depth_limit, NULL);
+#endif
+               }
+       }
+       if ((r = pd - pc) > es) {
+               /* Iterate rather than recurse to save stack space */
+               a = pn - r;
+               n = r / es;
+               goto loop;
+       }
+/*             psort(pn - r, r / es, es, cmp);*/
+}
+
+static void
+_psort_parallel(void *x)
+{
+       union args *args = (union args *)x;
+       struct shared *shared = args->shared;
+
+       _psort(args->a, args->n, shared->es,
+#ifdef I_AM_PSORT_R
+               shared->thunk,
+#endif
+               shared->cmp, args->depth_limit, shared);
+       returnargs(shared, args);
+}
+
+/* fast, approximate integer square root */
+static size_t
+isqrt(size_t x)
+{
+    size_t s = 1L << (flsl(x) / 2);
+    return (s + x / s) / 2;
+}
+
+void
+#ifdef I_AM_PSORT_R
+psort_r(void *a, size_t n, size_t es, void *thunk, cmp_t *cmp)
+#elif defined(I_AM_PSORT_B)
+psort_b(void *a, size_t n, size_t es, cmp_t ^cmp)
+#else
+psort(void *a, size_t n, size_t es, cmp_t *cmp)
+#endif
+{
+       if (n >= PARALLEL_MIN_SIZE && _NumCPUs() > 1) {
+               struct shared shared;
+               union args *args;
+
+               bzero(&shared, sizeof(shared));
+               shared.sharedlock = OS_SPINLOCK_INIT;
+               if ((args = getargs(&shared)) != NULL) {
+                       struct page *p, *pp;
+#ifdef I_AM_PSORT_R
+                       shared.who = "psort_r";
+                       shared.thunk = thunk;
+#elif defined(I_AM_PSORT_B)
+                       shared.who = "psort_b";
+#else
+                       shared.who = "psort";
+#endif
+                       shared.cmp = cmp;
+                       shared.es = es;
+                       shared.queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
+                       shared.group = dispatch_group_create();
+                       args->a = a;
+                       args->n = n;
+                       args->depth_limit = DEPTH(n);
+                       args->shared = &shared;
+                       /*
+                        * The turnoff value is the size of a partition that,
+                        * below which, we stop doing in parallel, and just do
+                        * in the current thread.  The value of sqrt(n) was
+                        * determined heuristically.  There is a smaller
+                        * dependence on the slowness of the comparison
+                        * function, and there might be a dependence on the
+                        * number of processors, but the algorithm has not been
+                        * determined.  Because the sensitivity to the turnoff
+                        * value is relatively low, we use a fast, approximate
+                        * integer square root routine that is good enough for
+                        * this purpose.
+                        */
+                       shared.turnoff = isqrt(n);
+                       _psort_parallel(args);
+
+                       /* wait for queue to drain */
+                       dispatch_group_wait(shared.group, DISPATCH_TIME_FOREVER);
+                       dispatch_release(shared.group);
+                       for(p = shared.pagelist; p; p = pp) {
+                               pp = p->next;
+                               munmap(p, PAGESIZE);
+                       }
+                       return;
+               }
+       }
+       /* Just call qsort */
+#ifdef I_AM_PSORT_R
+       qsort_r(a, n, es, thunk, cmp);
+#elif defined(I_AM_PSORT_B)
+       qsort_b(a, n, es, cmp);
+#else
+       qsort(a, n, es, cmp);
+#endif
+}