]> git.saurik.com Git - apple/libc.git/blame - stdlib/psort-fbsd.c
Libc-594.1.4.tar.gz
[apple/libc.git] / stdlib / psort-fbsd.c
CommitLineData
34e8f829
A
1/****************************************************************************/
2/*-
3 * Copyright (c) 1992, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 4. Neither the name of the University nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#if defined(LIBC_SCCS) && !defined(lint)
32static char sccsid[] = "@(#)qsort.c 8.1 (Berkeley) 6/4/93";
33#endif /* LIBC_SCCS and not lint */
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: src/lib/libc/stdlib/qsort.c,v 1.15 2008/01/14 09:21:34 das Exp $");
36
37#include <stdlib.h>
38#include <pthread.h>
39#include <dispatch/dispatch.h>
40#include <stddef.h>
41#include <string.h>
42#include <libkern/OSAtomic.h>
43#include <sys/mman.h>
44#include <errno.h>
45#define __APPLE_API_PRIVATE
46#include <machine/cpu_capabilities.h>
47
48#ifdef I_AM_PSORT_R
49typedef int cmp_t(void *, const void *, const void *);
50#else
51typedef int cmp_t(const void *, const void *);
52#endif
53#ifdef I_AM_PSORT_B
54static inline char *med3(char *, char *, char *, cmp_t ^, void *) __attribute__((always_inline));
55#else
56static inline char *med3(char *, char *, char *, cmp_t *, void *) __attribute__((always_inline));
57#endif
58static inline void swapfunc(char *, char *, int, int) __attribute__((always_inline));
59
60#define min(a, b) (a) < (b) ? a : b
61
62#define NARGS ((PAGESIZE - offsetof(struct page, args)) / sizeof(union args))
63#define PAGESIZE 4096
64#define PARALLEL_MIN_SIZE 2000 /* determine heuristically */
65
66struct shared; /* forward reference */
67union args {
68 union args *next;
69 struct {
70 struct shared *shared;
71 void *a;
72 size_t n;
73 int depth_limit;
74 } /* anonymous */;
75};
76
77struct page {
78 struct page *next;
79 union args args[0];
80};
81
82struct shared {
83 char *who;
84 union args *freelist;
85 struct page *pagelist;
86#ifdef I_AM_PSORT_R
87 void *thunk;
88#endif
89#ifdef I_AM_PSORT_B
90 cmp_t ^cmp;
91#else
92 cmp_t *cmp;
93#endif
94 size_t es;
95 size_t turnoff;
96 dispatch_queue_t queue;
97 pthread_cond_t cond;
98 pthread_mutex_t mutex;
99 OSSpinLock sharedlock;
100 int count;
101};
102
103static union args *
104getargs(struct shared *shared)
105{
106 union args *args;
107
108 OSSpinLockLock(&shared->sharedlock);
109 if(!shared->freelist) {
110 struct page *page;
111 union args *prev;
112 int i;
113 if((page = (struct page *)mmap(NULL, PAGESIZE, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0)) == NULL)
114 return NULL;
115 page->next = shared->pagelist;
116 shared->pagelist = page;
117 prev = NULL;
118 for(args = page->args, i = NARGS; i > 0; args++, i--) {
119 args->next = prev;
120 prev = args;
121 }
122 shared->freelist = prev;
123 }
124 args = shared->freelist;
125 shared->freelist = args->next;
126 OSSpinLockUnlock(&shared->sharedlock);
127 return args;
128}
129
130static void
131returnargs(struct shared *shared, union args *args)
132{
133 OSSpinLockLock(&shared->sharedlock);
134 args->next = shared->freelist;
135 shared->freelist = args;
136 OSSpinLockUnlock(&shared->sharedlock);
137}
138
139/*
140 * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
141 */
142#define swapcode(TYPE, parmi, parmj, n) { \
143 long i = (n) / sizeof (TYPE); \
144 TYPE *pi = (TYPE *) (parmi); \
145 TYPE *pj = (TYPE *) (parmj); \
146 do { \
147 TYPE t = *pi; \
148 *pi++ = *pj; \
149 *pj++ = t; \
150 } while (--i > 0); \
151}
152
153#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
154 es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
155
156static inline void
157swapfunc(a, b, n, swaptype)
158 char *a, *b;
159 int n, swaptype;
160{
161 if(swaptype <= 1)
162 swapcode(long, a, b, n)
163 else
164 swapcode(char, a, b, n)
165}
166
167#define swap(a, b) \
168 if (swaptype == 0) { \
169 long t = *(long *)(a); \
170 *(long *)(a) = *(long *)(b); \
171 *(long *)(b) = t; \
172 } else \
173 swapfunc(a, b, es, swaptype)
174
175#define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype)
176
177#ifdef I_AM_PSORT_R
178#define CMP(t, x, y) (cmp((t), (x), (y)))
179#else
180#define CMP(t, x, y) (cmp((x), (y)))
181#endif
182
183static inline char *
184med3(char *a, char *b, char *c,
185#ifdef I_AM_PSORT_B
186cmp_t ^cmp,
187#else
188cmp_t *cmp,
189#endif
190void *thunk
191#ifndef I_AM_PSORT_R
192__unused
193#endif
194)
195{
196 return CMP(thunk, a, b) < 0 ?
197 (CMP(thunk, b, c) < 0 ? b : (CMP(thunk, a, c) < 0 ? c : a ))
198 :(CMP(thunk, b, c) > 0 ? b : (CMP(thunk, a, c) < 0 ? a : c ));
199}
200
201#ifdef __LP64__
202#define DEPTH(x) (2 * (flsl((long)(x)) - 1))
203#else /* !__LP64__ */
204#define DEPTH(x) (2 * (fls((int)(x)) - 1))
205#endif /* __LP64__ */
206
207#ifdef I_AM_PSORT_R
208int __heapsort_r(void *, size_t, size_t, void *, int (*)(void *, const void *, const void *));
209#endif
210
211static void _psort_parallel(void *x);
212
213static void
214_psort(void *a, size_t n, size_t es,
215#ifdef I_AM_PSORT_R
216void *thunk,
217#else
218#define thunk NULL
219#endif
220#ifdef I_AM_PSORT_B
221cmp_t ^cmp,
222#else
223cmp_t *cmp,
224#endif
225int depth_limit, struct shared *shared)
226{
227 char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
228 size_t d, r;
229 int cmp_result;
230 int swaptype, swap_cnt;
231
232loop:
233 if (depth_limit-- <= 0) {
234#ifdef I_AM_PSORT_B
235 heapsort_b(a, n, es, cmp);
236#elif defined(I_AM_PSORT_R)
237 __heapsort_r(a, n, es, thunk, cmp);
238#else
239 heapsort(a, n, es, cmp);
240#endif
241 return;
242 }
243 SWAPINIT(a, es);
244 swap_cnt = 0;
245 if (n < 7) {
246 for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
247 for (pl = pm;
248 pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
249 pl -= es)
250 swap(pl, pl - es);
251 return;
252 }
253 pm = (char *)a + (n / 2) * es;
254 if (n > 7) {
255 pl = a;
256 pn = (char *)a + (n - 1) * es;
257 if (n > 40) {
258 d = (n / 8) * es;
259 pl = med3(pl, pl + d, pl + 2 * d, cmp, thunk);
260 pm = med3(pm - d, pm, pm + d, cmp, thunk);
261 pn = med3(pn - 2 * d, pn - d, pn, cmp, thunk);
262 }
263 pm = med3(pl, pm, pn, cmp, thunk);
264 }
265 swap(a, pm);
266 pa = pb = (char *)a + es;
267
268 pc = pd = (char *)a + (n - 1) * es;
269 for (;;) {
270 while (pb <= pc && (cmp_result = CMP(thunk, pb, a)) <= 0) {
271 if (cmp_result == 0) {
272 swap_cnt = 1;
273 swap(pa, pb);
274 pa += es;
275 }
276 pb += es;
277 }
278 while (pb <= pc && (cmp_result = CMP(thunk, pc, a)) >= 0) {
279 if (cmp_result == 0) {
280 swap_cnt = 1;
281 swap(pc, pd);
282 pd -= es;
283 }
284 pc -= es;
285 }
286 if (pb > pc)
287 break;
288 swap(pb, pc);
289 swap_cnt = 1;
290 pb += es;
291 pc -= es;
292 }
293
294 pn = (char *)a + n * es;
295 r = min(pa - (char *)a, pb - pa);
296 vecswap(a, pb - r, r);
297 r = min(pd - pc, pn - pd - es);
298 vecswap(pb, pn - r, r);
299
300 if (swap_cnt == 0) { /* Switch to insertion sort */
301 r = 1 + n / 4; /* n >= 7, so r >= 2 */
302 for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
303 for (pl = pm;
304 pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
305 pl -= es) {
306 swap(pl, pl - es);
307 if (++swap_cnt > r) goto nevermind;
308 }
309 return;
310 }
311
312nevermind:
313 if ((r = pb - pa) > es) {
314 r /= es;
315 if (shared && r > shared->turnoff) {
316 union args *args = getargs(shared);
317
318 if (args == NULL)
319 LIBC_ABORT("%s: getargs: %s", shared->who, strerror(errno));
320 args->shared = shared;
321 args->a = a;
322 args->n = r;
323 args->depth_limit = depth_limit;
324 OSAtomicIncrement32(&shared->count);
325 dispatch_async_f(shared->queue, args, _psort_parallel);
326 } else {
327#ifdef I_AM_PSORT_R
328 _psort(a, r, es, thunk, cmp, depth_limit, NULL);
329#else
330 _psort(a, r, es, cmp, depth_limit, NULL);
331#endif
332 }
333 }
334 if ((r = pd - pc) > es) {
335 /* Iterate rather than recurse to save stack space */
336 a = pn - r;
337 n = r / es;
338 goto loop;
339 }
340/* psort(pn - r, r / es, es, cmp);*/
341}
342
343static void
344_psort_parallel(void *x)
345{
346 union args *args = (union args *)x;
347 struct shared *shared = args->shared;
348
349 _psort(args->a, args->n, shared->es,
350#ifdef I_AM_PSORT_R
351 shared->thunk,
352#endif
353 shared->cmp, args->depth_limit, shared);
354 returnargs(shared, args);
355 if(OSAtomicDecrement32(&shared->count) <= 0) {
356 pthread_mutex_lock(&shared->mutex);
357 pthread_cond_signal(&shared->cond);
358 pthread_mutex_unlock(&shared->mutex);
359 }
360}
361
362/* fast, approximate integer square root */
363static size_t
364isqrt(size_t x)
365{
366 size_t s = 1L << (flsl(x) / 2);
367 return (s + x / s) / 2;
368}
369
370void
371#ifdef I_AM_PSORT_R
372psort_r(void *a, size_t n, size_t es, void *thunk, cmp_t *cmp)
373#elif defined(I_AM_PSORT_B)
374psort_b(void *a, size_t n, size_t es, cmp_t ^cmp)
375#else
376psort(void *a, size_t n, size_t es, cmp_t *cmp)
377#endif
378{
379 if (n >= PARALLEL_MIN_SIZE && _NumCPUs() > 1) {
380 struct shared shared;
381 union args *args;
382
383 bzero(&shared, sizeof(shared));
384 shared.sharedlock = OS_SPINLOCK_INIT;
385 if ((args = getargs(&shared)) != NULL) {
386 struct page *p, *pp;
387#ifdef I_AM_PSORT_R
388 shared.who = "psort_r";
389 shared.thunk = thunk;
390#elif defined(I_AM_PSORT_B)
391 shared.who = "psort_b";
392#else
393 shared.who = "psort";
394#endif
395 shared.cmp = cmp;
396 shared.es = es;
397 shared.queue = dispatch_get_concurrent_queue(0);
398 shared.cond = (pthread_cond_t)PTHREAD_COND_INITIALIZER;
399 shared.mutex = (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER;
400 args->a = a;
401 args->n = n;
402 args->depth_limit = DEPTH(n);
403 args->shared = &shared;
404 /*
405 * The turnoff value is the size of a partition that,
406 * below which, we stop doing in parallel, and just do
407 * in the current thread. The value of sqrt(n) was
408 * determined heuristically. There is a smaller
409 * dependence on the slowness of the comparison
410 * function, and there might be a dependence on the
411 * number of processors, but the algorithm has not been
412 * determined. Because the sensitivity to the turnoff
413 * value is relatively low, we use a fast, approximate
414 * integer square root routine that is good enough for
415 * this purpose.
416 */
417 shared.turnoff = isqrt(n);
418 OSAtomicIncrement32(&shared.count);
419 _psort_parallel(args);
420
421 /* wait for queue to drain */
422 pthread_mutex_lock(&shared.mutex);
423 while(shared.count > 0)
424 pthread_cond_wait(&shared.cond, &shared.mutex);
425
426 pthread_mutex_unlock(&shared.mutex);
427 pthread_mutex_destroy(&shared.mutex);
428 pthread_cond_destroy(&shared.cond);
429 for(p = shared.pagelist; p; p = pp) {
430 pp = p->next;
431 munmap(p, PAGESIZE);
432 }
433 return;
434 }
435 }
436 /* Just call qsort */
437#ifdef I_AM_PSORT_R
438 qsort_r(a, n, es, thunk, cmp);
439#elif defined(I_AM_PSORT_B)
440 qsort_b(a, n, es, cmp);
441#else
442 qsort(a, n, es, cmp);
443#endif
444}