]> git.saurik.com Git - apple/libc.git/blame - stdlib/FreeBSD/psort_r.c
Libc-1244.50.9.tar.gz
[apple/libc.git] / stdlib / FreeBSD / psort_r.c
CommitLineData
fc5ea90f
A
1/****************************************************************************/
2/*-
3 * Copyright (c) 1992, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 4. Neither the name of the University nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#if defined(LIBC_SCCS) && !defined(lint)
32static char sccsid[] = "@(#)qsort.c 8.1 (Berkeley) 6/4/93";
33#endif /* LIBC_SCCS and not lint */
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: src/lib/libc/stdlib/qsort.c,v 1.15 2008/01/14 09:21:34 das Exp $");
36
37#include <stdlib.h>
38#include <pthread.h>
39#include <dispatch/dispatch.h>
40#include <stddef.h>
41#include <string.h>
42#include <libkern/OSAtomic.h>
43#include <sys/mman.h>
44#include <errno.h>
b061a43b 45#include <os/lock.h>
fc5ea90f
A
46#define __APPLE_API_PRIVATE
47#include <machine/cpu_capabilities.h>
48
49#ifdef I_AM_PSORT_R
50typedef int cmp_t(void *, const void *, const void *);
51#else
52typedef int cmp_t(const void *, const void *);
53#endif
54#ifdef I_AM_PSORT_B
55static inline char *med3(char *, char *, char *, cmp_t ^, void *) __attribute__((always_inline));
56#else
57static inline char *med3(char *, char *, char *, cmp_t *, void *) __attribute__((always_inline));
58#endif
59static inline void swapfunc(char *, char *, int, int) __attribute__((always_inline));
60
61#define min(a, b) (a) < (b) ? a : b
62
63#define NARGS ((PAGESIZE - offsetof(struct page, args)) / sizeof(union args))
64#define PAGESIZE 4096
65#define PARALLEL_MIN_SIZE 2000 /* determine heuristically */
66
67struct shared; /* forward reference */
68union args {
69 union args *next;
70 struct {
71 struct shared *shared;
72 void *a;
73 size_t n;
74 int depth_limit;
75 } /* anonymous */;
76};
77
78struct page {
79 struct page *next;
80 union args args[0];
81};
82
83struct shared {
84 char *who;
85 union args *freelist;
86 struct page *pagelist;
87#ifdef I_AM_PSORT_R
88 void *thunk;
89#endif
90#ifdef I_AM_PSORT_B
91 cmp_t ^cmp;
92#else
93 cmp_t *cmp;
94#endif
95 size_t es;
96 size_t turnoff;
97 dispatch_queue_t queue;
98 dispatch_group_t group;
b061a43b 99 os_unfair_lock sharedlock;
fc5ea90f
A
100};
101
102static union args *
103getargs(struct shared *shared)
104{
105 union args *args;
106
b061a43b 107 os_unfair_lock_lock(&shared->sharedlock);
fc5ea90f
A
108 if(!shared->freelist) {
109 struct page *page;
110 union args *prev;
111 int i;
112 if((page = (struct page *)mmap(NULL, PAGESIZE, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0)) == NULL)
113 return NULL;
114 page->next = shared->pagelist;
115 shared->pagelist = page;
116 prev = NULL;
117 for(args = page->args, i = NARGS; i > 0; args++, i--) {
118 args->next = prev;
119 prev = args;
120 }
121 shared->freelist = prev;
122 }
123 args = shared->freelist;
124 shared->freelist = args->next;
b061a43b 125 os_unfair_lock_unlock(&shared->sharedlock);
fc5ea90f
A
126 return args;
127}
128
129static void
130returnargs(struct shared *shared, union args *args)
131{
b061a43b 132 os_unfair_lock_lock(&shared->sharedlock);
fc5ea90f
A
133 args->next = shared->freelist;
134 shared->freelist = args;
b061a43b 135 os_unfair_lock_unlock(&shared->sharedlock);
fc5ea90f
A
136}
137
138/*
139 * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
140 */
141#define swapcode(TYPE, parmi, parmj, n) { \
142 long i = (n) / sizeof (TYPE); \
143 TYPE *pi = (TYPE *) (parmi); \
144 TYPE *pj = (TYPE *) (parmj); \
145 do { \
146 TYPE t = *pi; \
147 *pi++ = *pj; \
148 *pj++ = t; \
149 } while (--i > 0); \
150}
151
152#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
153 es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
154
155static inline void
156swapfunc(a, b, n, swaptype)
157 char *a, *b;
158 int n, swaptype;
159{
160 if(swaptype <= 1)
161 swapcode(long, a, b, n)
162 else
163 swapcode(char, a, b, n)
164}
165
166#define swap(a, b) \
167 if (swaptype == 0) { \
168 long t = *(long *)(a); \
169 *(long *)(a) = *(long *)(b); \
170 *(long *)(b) = t; \
171 } else \
172 swapfunc(a, b, es, swaptype)
173
174#define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype)
175
176#ifdef I_AM_PSORT_R
177#define CMP(t, x, y) (cmp((t), (x), (y)))
178#else
179#define CMP(t, x, y) (cmp((x), (y)))
180#endif
181
182static inline char *
183med3(char *a, char *b, char *c,
184#ifdef I_AM_PSORT_B
185cmp_t ^cmp,
186#else
187cmp_t *cmp,
188#endif
189void *thunk
190#ifndef I_AM_PSORT_R
191__unused
192#endif
193)
194{
195 return CMP(thunk, a, b) < 0 ?
196 (CMP(thunk, b, c) < 0 ? b : (CMP(thunk, a, c) < 0 ? c : a ))
197 :(CMP(thunk, b, c) > 0 ? b : (CMP(thunk, a, c) < 0 ? a : c ));
198}
199
200#ifdef __LP64__
201#define DEPTH(x) (2 * (flsl((long)(x)) - 1))
202#else /* !__LP64__ */
203#define DEPTH(x) (2 * (fls((int)(x)) - 1))
204#endif /* __LP64__ */
205
206#ifdef I_AM_PSORT_R
207int __heapsort_r(void *, size_t, size_t, void *, int (*)(void *, const void *, const void *));
208#endif
209
210static void _psort_parallel(void *x);
211
212static void
213_psort(void *a, size_t n, size_t es,
214#ifdef I_AM_PSORT_R
215void *thunk,
216#else
217#define thunk NULL
218#endif
219#ifdef I_AM_PSORT_B
220cmp_t ^cmp,
221#else
222cmp_t *cmp,
223#endif
224int depth_limit, struct shared *shared)
225{
226 char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
227 size_t d, r;
228 int cmp_result;
229 int swaptype, swap_cnt;
230
231loop:
232 if (depth_limit-- <= 0) {
233#ifdef I_AM_PSORT_B
234 heapsort_b(a, n, es, cmp);
235#elif defined(I_AM_PSORT_R)
236 __heapsort_r(a, n, es, thunk, cmp);
237#else
238 heapsort(a, n, es, cmp);
239#endif
240 return;
241 }
242 SWAPINIT(a, es);
243 swap_cnt = 0;
244 if (n < 7) {
245 for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
246 for (pl = pm;
247 pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
248 pl -= es)
249 swap(pl, pl - es);
250 return;
251 }
252 pm = (char *)a + (n / 2) * es;
253 if (n > 7) {
254 pl = a;
255 pn = (char *)a + (n - 1) * es;
256 if (n > 40) {
257 d = (n / 8) * es;
258 pl = med3(pl, pl + d, pl + 2 * d, cmp, thunk);
259 pm = med3(pm - d, pm, pm + d, cmp, thunk);
260 pn = med3(pn - 2 * d, pn - d, pn, cmp, thunk);
261 }
262 pm = med3(pl, pm, pn, cmp, thunk);
263 }
264 swap(a, pm);
265 pa = pb = (char *)a + es;
266
267 pc = pd = (char *)a + (n - 1) * es;
268 for (;;) {
269 while (pb <= pc && (cmp_result = CMP(thunk, pb, a)) <= 0) {
270 if (cmp_result == 0) {
271 swap_cnt = 1;
272 swap(pa, pb);
273 pa += es;
274 }
275 pb += es;
276 }
277 while (pb <= pc && (cmp_result = CMP(thunk, pc, a)) >= 0) {
278 if (cmp_result == 0) {
279 swap_cnt = 1;
280 swap(pc, pd);
281 pd -= es;
282 }
283 pc -= es;
284 }
285 if (pb > pc)
286 break;
287 swap(pb, pc);
288 swap_cnt = 1;
289 pb += es;
290 pc -= es;
291 }
292
293 pn = (char *)a + n * es;
294 r = min(pa - (char *)a, pb - pa);
295 vecswap(a, pb - r, r);
296 r = min(pd - pc, pn - pd - es);
297 vecswap(pb, pn - r, r);
298
299 if (swap_cnt == 0) { /* Switch to insertion sort */
300 r = 1 + n / 4; /* n >= 7, so r >= 2 */
301 for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
302 for (pl = pm;
303 pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
304 pl -= es) {
305 swap(pl, pl - es);
306 if (++swap_cnt > r) goto nevermind;
307 }
308 return;
309 }
310
311nevermind:
312 if ((r = pb - pa) > es) {
313 r /= es;
314 if (shared && r > shared->turnoff) {
315 union args *args = getargs(shared);
316
317 if (args == NULL)
318 LIBC_ABORT("%s: getargs: %s", shared->who, strerror(errno));
319 args->shared = shared;
320 args->a = a;
321 args->n = r;
322 args->depth_limit = depth_limit;
323 dispatch_group_async_f(shared->group, shared->queue, args,
324 _psort_parallel);
325 } else {
326#ifdef I_AM_PSORT_R
327 _psort(a, r, es, thunk, cmp, depth_limit, NULL);
328#else
329 _psort(a, r, es, cmp, depth_limit, NULL);
330#endif
331 }
332 }
333 if ((r = pd - pc) > es) {
334 /* Iterate rather than recurse to save stack space */
335 a = pn - r;
336 n = r / es;
337 goto loop;
338 }
339/* psort(pn - r, r / es, es, cmp);*/
340}
341
342static void
343_psort_parallel(void *x)
344{
345 union args *args = (union args *)x;
346 struct shared *shared = args->shared;
347
348 _psort(args->a, args->n, shared->es,
349#ifdef I_AM_PSORT_R
350 shared->thunk,
351#endif
352 shared->cmp, args->depth_limit, shared);
353 returnargs(shared, args);
354}
355
356/* fast, approximate integer square root */
357static size_t
358isqrt(size_t x)
359{
360 size_t s = 1L << (flsl(x) / 2);
361 return (s + x / s) / 2;
362}
363
364void
365#ifdef I_AM_PSORT_R
366psort_r(void *a, size_t n, size_t es, void *thunk, cmp_t *cmp)
367#elif defined(I_AM_PSORT_B)
368psort_b(void *a, size_t n, size_t es, cmp_t ^cmp)
369#else
370psort(void *a, size_t n, size_t es, cmp_t *cmp)
371#endif
372{
373 if (n >= PARALLEL_MIN_SIZE && _NumCPUs() > 1) {
374 struct shared shared;
375 union args *args;
376
377 bzero(&shared, sizeof(shared));
b061a43b 378 shared.sharedlock = OS_UNFAIR_LOCK_INIT;
fc5ea90f
A
379 if ((args = getargs(&shared)) != NULL) {
380 struct page *p, *pp;
381#ifdef I_AM_PSORT_R
382 shared.who = "psort_r";
383 shared.thunk = thunk;
384#elif defined(I_AM_PSORT_B)
385 shared.who = "psort_b";
386#else
387 shared.who = "psort";
388#endif
389 shared.cmp = cmp;
390 shared.es = es;
391 shared.queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
392 shared.group = dispatch_group_create();
393 args->a = a;
394 args->n = n;
395 args->depth_limit = DEPTH(n);
396 args->shared = &shared;
397 /*
398 * The turnoff value is the size of a partition that,
399 * below which, we stop doing in parallel, and just do
400 * in the current thread. The value of sqrt(n) was
401 * determined heuristically. There is a smaller
402 * dependence on the slowness of the comparison
403 * function, and there might be a dependence on the
404 * number of processors, but the algorithm has not been
405 * determined. Because the sensitivity to the turnoff
406 * value is relatively low, we use a fast, approximate
407 * integer square root routine that is good enough for
408 * this purpose.
409 */
410 shared.turnoff = isqrt(n);
411 _psort_parallel(args);
412
413 /* wait for queue to drain */
414 dispatch_group_wait(shared.group, DISPATCH_TIME_FOREVER);
415 dispatch_release(shared.group);
416 for(p = shared.pagelist; p; p = pp) {
417 pp = p->next;
418 munmap(p, PAGESIZE);
419 }
420 return;
421 }
422 }
423 /* Just call qsort */
424#ifdef I_AM_PSORT_R
425 qsort_r(a, n, es, thunk, cmp);
426#elif defined(I_AM_PSORT_B)
427 qsort_b(a, n, es, cmp);
428#else
429 qsort(a, n, es, cmp);
430#endif
431}