]> git.saurik.com Git - apple/libc.git/blame_incremental - stdlib/FreeBSD/psort_b.c
Libc-1158.50.2.tar.gz
[apple/libc.git] / stdlib / FreeBSD / psort_b.c
... / ...
CommitLineData
1/****************************************************************************/
2/*-
3 * Copyright (c) 1992, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 4. Neither the name of the University nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#if defined(LIBC_SCCS) && !defined(lint)
32static char sccsid[] = "@(#)qsort.c 8.1 (Berkeley) 6/4/93";
33#endif /* LIBC_SCCS and not lint */
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: src/lib/libc/stdlib/qsort.c,v 1.15 2008/01/14 09:21:34 das Exp $");
36
37#include <stdlib.h>
38#include <pthread.h>
39#include <dispatch/dispatch.h>
40#include <stddef.h>
41#include <string.h>
42#include <libkern/OSAtomic.h>
43#include <sys/mman.h>
44#include <errno.h>
45#define __APPLE_API_PRIVATE
46#include <machine/cpu_capabilities.h>
47
48#ifdef I_AM_PSORT_R
49typedef int cmp_t(void *, const void *, const void *);
50#else
51typedef int cmp_t(const void *, const void *);
52#endif
53#ifdef I_AM_PSORT_B
54static inline char *med3(char *, char *, char *, cmp_t ^, void *) __attribute__((always_inline));
55#else
56static inline char *med3(char *, char *, char *, cmp_t *, void *) __attribute__((always_inline));
57#endif
58static inline void swapfunc(char *, char *, int, int) __attribute__((always_inline));
59
60#define min(a, b) (a) < (b) ? a : b
61
62#define NARGS ((PAGESIZE - offsetof(struct page, args)) / sizeof(union args))
63#define PAGESIZE 4096
64#define PARALLEL_MIN_SIZE 2000 /* determine heuristically */
65
66struct shared; /* forward reference */
67union args {
68 union args *next;
69 struct {
70 struct shared *shared;
71 void *a;
72 size_t n;
73 int depth_limit;
74 } /* anonymous */;
75};
76
77struct page {
78 struct page *next;
79 union args args[0];
80};
81
82struct shared {
83 char *who;
84 union args *freelist;
85 struct page *pagelist;
86#ifdef I_AM_PSORT_R
87 void *thunk;
88#endif
89#ifdef I_AM_PSORT_B
90 cmp_t ^cmp;
91#else
92 cmp_t *cmp;
93#endif
94 size_t es;
95 size_t turnoff;
96 dispatch_queue_t queue;
97 dispatch_group_t group;
98 OSSpinLock sharedlock;
99};
100
101static union args *
102getargs(struct shared *shared)
103{
104 union args *args;
105
106 OSSpinLockLock(&shared->sharedlock);
107 if(!shared->freelist) {
108 struct page *page;
109 union args *prev;
110 int i;
111 if((page = (struct page *)mmap(NULL, PAGESIZE, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0)) == NULL)
112 return NULL;
113 page->next = shared->pagelist;
114 shared->pagelist = page;
115 prev = NULL;
116 for(args = page->args, i = NARGS; i > 0; args++, i--) {
117 args->next = prev;
118 prev = args;
119 }
120 shared->freelist = prev;
121 }
122 args = shared->freelist;
123 shared->freelist = args->next;
124 OSSpinLockUnlock(&shared->sharedlock);
125 return args;
126}
127
128static void
129returnargs(struct shared *shared, union args *args)
130{
131 OSSpinLockLock(&shared->sharedlock);
132 args->next = shared->freelist;
133 shared->freelist = args;
134 OSSpinLockUnlock(&shared->sharedlock);
135}
136
137/*
138 * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
139 */
140#define swapcode(TYPE, parmi, parmj, n) { \
141 long i = (n) / sizeof (TYPE); \
142 TYPE *pi = (TYPE *) (parmi); \
143 TYPE *pj = (TYPE *) (parmj); \
144 do { \
145 TYPE t = *pi; \
146 *pi++ = *pj; \
147 *pj++ = t; \
148 } while (--i > 0); \
149}
150
151#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
152 es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
153
154static inline void
155swapfunc(a, b, n, swaptype)
156 char *a, *b;
157 int n, swaptype;
158{
159 if(swaptype <= 1)
160 swapcode(long, a, b, n)
161 else
162 swapcode(char, a, b, n)
163}
164
165#define swap(a, b) \
166 if (swaptype == 0) { \
167 long t = *(long *)(a); \
168 *(long *)(a) = *(long *)(b); \
169 *(long *)(b) = t; \
170 } else \
171 swapfunc(a, b, es, swaptype)
172
173#define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype)
174
175#ifdef I_AM_PSORT_R
176#define CMP(t, x, y) (cmp((t), (x), (y)))
177#else
178#define CMP(t, x, y) (cmp((x), (y)))
179#endif
180
181static inline char *
182med3(char *a, char *b, char *c,
183#ifdef I_AM_PSORT_B
184cmp_t ^cmp,
185#else
186cmp_t *cmp,
187#endif
188void *thunk
189#ifndef I_AM_PSORT_R
190__unused
191#endif
192)
193{
194 return CMP(thunk, a, b) < 0 ?
195 (CMP(thunk, b, c) < 0 ? b : (CMP(thunk, a, c) < 0 ? c : a ))
196 :(CMP(thunk, b, c) > 0 ? b : (CMP(thunk, a, c) < 0 ? a : c ));
197}
198
199#ifdef __LP64__
200#define DEPTH(x) (2 * (flsl((long)(x)) - 1))
201#else /* !__LP64__ */
202#define DEPTH(x) (2 * (fls((int)(x)) - 1))
203#endif /* __LP64__ */
204
205#ifdef I_AM_PSORT_R
206int __heapsort_r(void *, size_t, size_t, void *, int (*)(void *, const void *, const void *));
207#endif
208
209static void _psort_parallel(void *x);
210
211static void
212_psort(void *a, size_t n, size_t es,
213#ifdef I_AM_PSORT_R
214void *thunk,
215#else
216#define thunk NULL
217#endif
218#ifdef I_AM_PSORT_B
219cmp_t ^cmp,
220#else
221cmp_t *cmp,
222#endif
223int depth_limit, struct shared *shared)
224{
225 char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
226 size_t d, r;
227 int cmp_result;
228 int swaptype, swap_cnt;
229
230loop:
231 if (depth_limit-- <= 0) {
232#ifdef I_AM_PSORT_B
233 heapsort_b(a, n, es, cmp);
234#elif defined(I_AM_PSORT_R)
235 __heapsort_r(a, n, es, thunk, cmp);
236#else
237 heapsort(a, n, es, cmp);
238#endif
239 return;
240 }
241 SWAPINIT(a, es);
242 swap_cnt = 0;
243 if (n < 7) {
244 for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
245 for (pl = pm;
246 pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
247 pl -= es)
248 swap(pl, pl - es);
249 return;
250 }
251 pm = (char *)a + (n / 2) * es;
252 if (n > 7) {
253 pl = a;
254 pn = (char *)a + (n - 1) * es;
255 if (n > 40) {
256 d = (n / 8) * es;
257 pl = med3(pl, pl + d, pl + 2 * d, cmp, thunk);
258 pm = med3(pm - d, pm, pm + d, cmp, thunk);
259 pn = med3(pn - 2 * d, pn - d, pn, cmp, thunk);
260 }
261 pm = med3(pl, pm, pn, cmp, thunk);
262 }
263 swap(a, pm);
264 pa = pb = (char *)a + es;
265
266 pc = pd = (char *)a + (n - 1) * es;
267 for (;;) {
268 while (pb <= pc && (cmp_result = CMP(thunk, pb, a)) <= 0) {
269 if (cmp_result == 0) {
270 swap_cnt = 1;
271 swap(pa, pb);
272 pa += es;
273 }
274 pb += es;
275 }
276 while (pb <= pc && (cmp_result = CMP(thunk, pc, a)) >= 0) {
277 if (cmp_result == 0) {
278 swap_cnt = 1;
279 swap(pc, pd);
280 pd -= es;
281 }
282 pc -= es;
283 }
284 if (pb > pc)
285 break;
286 swap(pb, pc);
287 swap_cnt = 1;
288 pb += es;
289 pc -= es;
290 }
291
292 pn = (char *)a + n * es;
293 r = min(pa - (char *)a, pb - pa);
294 vecswap(a, pb - r, r);
295 r = min(pd - pc, pn - pd - es);
296 vecswap(pb, pn - r, r);
297
298 if (swap_cnt == 0) { /* Switch to insertion sort */
299 r = 1 + n / 4; /* n >= 7, so r >= 2 */
300 for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
301 for (pl = pm;
302 pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
303 pl -= es) {
304 swap(pl, pl - es);
305 if (++swap_cnt > r) goto nevermind;
306 }
307 return;
308 }
309
310nevermind:
311 if ((r = pb - pa) > es) {
312 r /= es;
313 if (shared && r > shared->turnoff) {
314 union args *args = getargs(shared);
315
316 if (args == NULL)
317 LIBC_ABORT("%s: getargs: %s", shared->who, strerror(errno));
318 args->shared = shared;
319 args->a = a;
320 args->n = r;
321 args->depth_limit = depth_limit;
322 dispatch_group_async_f(shared->group, shared->queue, args,
323 _psort_parallel);
324 } else {
325#ifdef I_AM_PSORT_R
326 _psort(a, r, es, thunk, cmp, depth_limit, NULL);
327#else
328 _psort(a, r, es, cmp, depth_limit, NULL);
329#endif
330 }
331 }
332 if ((r = pd - pc) > es) {
333 /* Iterate rather than recurse to save stack space */
334 a = pn - r;
335 n = r / es;
336 goto loop;
337 }
338/* psort(pn - r, r / es, es, cmp);*/
339}
340
341static void
342_psort_parallel(void *x)
343{
344 union args *args = (union args *)x;
345 struct shared *shared = args->shared;
346
347 _psort(args->a, args->n, shared->es,
348#ifdef I_AM_PSORT_R
349 shared->thunk,
350#endif
351 shared->cmp, args->depth_limit, shared);
352 returnargs(shared, args);
353}
354
355/* fast, approximate integer square root */
356static size_t
357isqrt(size_t x)
358{
359 size_t s = 1L << (flsl(x) / 2);
360 return (s + x / s) / 2;
361}
362
363void
364#ifdef I_AM_PSORT_R
365psort_r(void *a, size_t n, size_t es, void *thunk, cmp_t *cmp)
366#elif defined(I_AM_PSORT_B)
367psort_b(void *a, size_t n, size_t es, cmp_t ^cmp)
368#else
369psort(void *a, size_t n, size_t es, cmp_t *cmp)
370#endif
371{
372 if (n >= PARALLEL_MIN_SIZE && _NumCPUs() > 1) {
373 struct shared shared;
374 union args *args;
375
376 bzero(&shared, sizeof(shared));
377 shared.sharedlock = OS_SPINLOCK_INIT;
378 if ((args = getargs(&shared)) != NULL) {
379 struct page *p, *pp;
380#ifdef I_AM_PSORT_R
381 shared.who = "psort_r";
382 shared.thunk = thunk;
383#elif defined(I_AM_PSORT_B)
384 shared.who = "psort_b";
385#else
386 shared.who = "psort";
387#endif
388 shared.cmp = cmp;
389 shared.es = es;
390 shared.queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
391 shared.group = dispatch_group_create();
392 args->a = a;
393 args->n = n;
394 args->depth_limit = DEPTH(n);
395 args->shared = &shared;
396 /*
397 * The turnoff value is the size of a partition that,
398 * below which, we stop doing in parallel, and just do
399 * in the current thread. The value of sqrt(n) was
400 * determined heuristically. There is a smaller
401 * dependence on the slowness of the comparison
402 * function, and there might be a dependence on the
403 * number of processors, but the algorithm has not been
404 * determined. Because the sensitivity to the turnoff
405 * value is relatively low, we use a fast, approximate
406 * integer square root routine that is good enough for
407 * this purpose.
408 */
409 shared.turnoff = isqrt(n);
410 _psort_parallel(args);
411
412 /* wait for queue to drain */
413 dispatch_group_wait(shared.group, DISPATCH_TIME_FOREVER);
414 dispatch_release(shared.group);
415 for(p = shared.pagelist; p; p = pp) {
416 pp = p->next;
417 munmap(p, PAGESIZE);
418 }
419 return;
420 }
421 }
422 /* Just call qsort */
423#ifdef I_AM_PSORT_R
424 qsort_r(a, n, es, thunk, cmp);
425#elif defined(I_AM_PSORT_B)
426 qsort_b(a, n, es, cmp);
427#else
428 qsort(a, n, es, cmp);
429#endif
430}