]> git.saurik.com Git - apple/libc.git/blob - stdlib/FreeBSD/psort_b.c
e9251ab945e16bea668babe5d6821f04db9ac8e2
[apple/libc.git] / stdlib / FreeBSD / psort_b.c
1 /****************************************************************************/
2 /*-
3 * Copyright (c) 1992, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 4. Neither the name of the University nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #if defined(LIBC_SCCS) && !defined(lint)
32 static char sccsid[] = "@(#)qsort.c 8.1 (Berkeley) 6/4/93";
33 #endif /* LIBC_SCCS and not lint */
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD: src/lib/libc/stdlib/qsort.c,v 1.15 2008/01/14 09:21:34 das Exp $");
36
37 #include <stdlib.h>
38 #include <pthread.h>
39 #include <dispatch/dispatch.h>
40 #include <stddef.h>
41 #include <string.h>
42 #include <libkern/OSAtomic.h>
43 #include <sys/mman.h>
44 #include <errno.h>
45 #include <os/lock.h>
46 #define __APPLE_API_PRIVATE
47 #include <machine/cpu_capabilities.h>
48
49 #ifdef I_AM_PSORT_R
50 typedef int cmp_t(void *, const void *, const void *);
51 #else
52 typedef int cmp_t(const void *, const void *);
53 #endif
54 #ifdef I_AM_PSORT_B
55 static inline char *med3(char *, char *, char *, cmp_t ^, void *) __attribute__((always_inline));
56 #else
57 static inline char *med3(char *, char *, char *, cmp_t *, void *) __attribute__((always_inline));
58 #endif
59 static inline void swapfunc(char *, char *, int, int) __attribute__((always_inline));
60
61 #define min(a, b) (a) < (b) ? a : b
62
63 #define NARGS ((PAGESIZE - offsetof(struct page, args)) / sizeof(union args))
64 #define PAGESIZE 4096
65 #define PARALLEL_MIN_SIZE 2000 /* determine heuristically */
66
67 struct shared; /* forward reference */
68 union args {
69 union args *next;
70 struct {
71 struct shared *shared;
72 void *a;
73 size_t n;
74 int depth_limit;
75 } /* anonymous */;
76 };
77
78 struct page {
79 struct page *next;
80 union args args[0];
81 };
82
83 struct shared {
84 char *who;
85 union args *freelist;
86 struct page *pagelist;
87 #ifdef I_AM_PSORT_R
88 void *thunk;
89 #endif
90 #ifdef I_AM_PSORT_B
91 cmp_t ^cmp;
92 #else
93 cmp_t *cmp;
94 #endif
95 size_t es;
96 size_t turnoff;
97 dispatch_queue_t queue;
98 dispatch_group_t group;
99 os_unfair_lock sharedlock;
100 };
101
102 static union args *
103 getargs(struct shared *shared)
104 {
105 union args *args;
106
107 os_unfair_lock_lock(&shared->sharedlock);
108 if(!shared->freelist) {
109 struct page *page;
110 union args *prev;
111 int i;
112 if((page = (struct page *)mmap(NULL, PAGESIZE, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0)) == NULL)
113 return NULL;
114 page->next = shared->pagelist;
115 shared->pagelist = page;
116 prev = NULL;
117 for(args = page->args, i = NARGS; i > 0; args++, i--) {
118 args->next = prev;
119 prev = args;
120 }
121 shared->freelist = prev;
122 }
123 args = shared->freelist;
124 shared->freelist = args->next;
125 os_unfair_lock_unlock(&shared->sharedlock);
126 return args;
127 }
128
129 static void
130 returnargs(struct shared *shared, union args *args)
131 {
132 os_unfair_lock_lock(&shared->sharedlock);
133 args->next = shared->freelist;
134 shared->freelist = args;
135 os_unfair_lock_unlock(&shared->sharedlock);
136 }
137
138 /*
139 * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
140 */
141 #define swapcode(TYPE, parmi, parmj, n) { \
142 long i = (n) / sizeof (TYPE); \
143 TYPE *pi = (TYPE *) (parmi); \
144 TYPE *pj = (TYPE *) (parmj); \
145 do { \
146 TYPE t = *pi; \
147 *pi++ = *pj; \
148 *pj++ = t; \
149 } while (--i > 0); \
150 }
151
152 #define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
153 es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
154
155 static inline void
156 swapfunc(a, b, n, swaptype)
157 char *a, *b;
158 int n, swaptype;
159 {
160 if(swaptype <= 1)
161 swapcode(long, a, b, n)
162 else
163 swapcode(char, a, b, n)
164 }
165
166 #define swap(a, b) \
167 if (swaptype == 0) { \
168 long t = *(long *)(a); \
169 *(long *)(a) = *(long *)(b); \
170 *(long *)(b) = t; \
171 } else \
172 swapfunc(a, b, es, swaptype)
173
174 #define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype)
175
176 #ifdef I_AM_PSORT_R
177 #define CMP(t, x, y) (cmp((t), (x), (y)))
178 #else
179 #define CMP(t, x, y) (cmp((x), (y)))
180 #endif
181
182 static inline char *
183 med3(char *a, char *b, char *c,
184 #ifdef I_AM_PSORT_B
185 cmp_t ^cmp,
186 #else
187 cmp_t *cmp,
188 #endif
189 void *thunk
190 #ifndef I_AM_PSORT_R
191 __unused
192 #endif
193 )
194 {
195 return CMP(thunk, a, b) < 0 ?
196 (CMP(thunk, b, c) < 0 ? b : (CMP(thunk, a, c) < 0 ? c : a ))
197 :(CMP(thunk, b, c) > 0 ? b : (CMP(thunk, a, c) < 0 ? a : c ));
198 }
199
200 #ifdef __LP64__
201 #define DEPTH(x) (2 * (flsl((long)(x)) - 1))
202 #else /* !__LP64__ */
203 #define DEPTH(x) (2 * (fls((int)(x)) - 1))
204 #endif /* __LP64__ */
205
206 #ifdef I_AM_PSORT_R
207 int __heapsort_r(void *, size_t, size_t, void *, int (*)(void *, const void *, const void *));
208 #endif
209
210 static void _psort_parallel(void *x);
211
212 static void
213 _psort(void *a, size_t n, size_t es,
214 #ifdef I_AM_PSORT_R
215 void *thunk,
216 #else
217 #define thunk NULL
218 #endif
219 #ifdef I_AM_PSORT_B
220 cmp_t ^cmp,
221 #else
222 cmp_t *cmp,
223 #endif
224 int depth_limit, struct shared *shared)
225 {
226 char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
227 size_t d, r;
228 int cmp_result;
229 int swaptype, swap_cnt;
230
231 loop:
232 if (depth_limit-- <= 0) {
233 #ifdef I_AM_PSORT_B
234 heapsort_b(a, n, es, cmp);
235 #elif defined(I_AM_PSORT_R)
236 __heapsort_r(a, n, es, thunk, cmp);
237 #else
238 heapsort(a, n, es, cmp);
239 #endif
240 return;
241 }
242 SWAPINIT(a, es);
243 swap_cnt = 0;
244 if (n < 7) {
245 for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
246 for (pl = pm;
247 pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
248 pl -= es)
249 swap(pl, pl - es);
250 return;
251 }
252 pm = (char *)a + (n / 2) * es;
253 if (n > 7) {
254 pl = a;
255 pn = (char *)a + (n - 1) * es;
256 if (n > 40) {
257 d = (n / 8) * es;
258 pl = med3(pl, pl + d, pl + 2 * d, cmp, thunk);
259 pm = med3(pm - d, pm, pm + d, cmp, thunk);
260 pn = med3(pn - 2 * d, pn - d, pn, cmp, thunk);
261 }
262 pm = med3(pl, pm, pn, cmp, thunk);
263 }
264 swap(a, pm);
265 pa = pb = (char *)a + es;
266
267 pc = pd = (char *)a + (n - 1) * es;
268 for (;;) {
269 while (pb <= pc && (cmp_result = CMP(thunk, pb, a)) <= 0) {
270 if (cmp_result == 0) {
271 swap_cnt = 1;
272 swap(pa, pb);
273 pa += es;
274 }
275 pb += es;
276 }
277 while (pb <= pc && (cmp_result = CMP(thunk, pc, a)) >= 0) {
278 if (cmp_result == 0) {
279 swap_cnt = 1;
280 swap(pc, pd);
281 pd -= es;
282 }
283 pc -= es;
284 }
285 if (pb > pc)
286 break;
287 swap(pb, pc);
288 swap_cnt = 1;
289 pb += es;
290 pc -= es;
291 }
292
293 pn = (char *)a + n * es;
294 r = min(pa - (char *)a, pb - pa);
295 vecswap(a, pb - r, r);
296 r = min(pd - pc, pn - pd - es);
297 vecswap(pb, pn - r, r);
298
299 if (swap_cnt == 0) { /* Switch to insertion sort */
300 r = 1 + n / 4; /* n >= 7, so r >= 2 */
301 for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
302 for (pl = pm;
303 pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
304 pl -= es) {
305 swap(pl, pl - es);
306 if (++swap_cnt > r) goto nevermind;
307 }
308 return;
309 }
310
311 nevermind:
312 if ((r = pb - pa) > es) {
313 r /= es;
314 if (shared && r > shared->turnoff) {
315 union args *args = getargs(shared);
316
317 if (args == NULL)
318 LIBC_ABORT("%s: getargs: %s", shared->who, strerror(errno));
319 args->shared = shared;
320 args->a = a;
321 args->n = r;
322 args->depth_limit = depth_limit;
323 dispatch_group_async_f(shared->group, shared->queue, args,
324 _psort_parallel);
325 } else {
326 #ifdef I_AM_PSORT_R
327 _psort(a, r, es, thunk, cmp, depth_limit, NULL);
328 #else
329 _psort(a, r, es, cmp, depth_limit, NULL);
330 #endif
331 }
332 }
333 if ((r = pd - pc) > es) {
334 /* Iterate rather than recurse to save stack space */
335 a = pn - r;
336 n = r / es;
337 goto loop;
338 }
339 /* psort(pn - r, r / es, es, cmp);*/
340 }
341
342 static void
343 _psort_parallel(void *x)
344 {
345 union args *args = (union args *)x;
346 struct shared *shared = args->shared;
347
348 _psort(args->a, args->n, shared->es,
349 #ifdef I_AM_PSORT_R
350 shared->thunk,
351 #endif
352 shared->cmp, args->depth_limit, shared);
353 returnargs(shared, args);
354 }
355
356 /* fast, approximate integer square root */
357 static size_t
358 isqrt(size_t x)
359 {
360 size_t s = 1L << (flsl(x) / 2);
361 return (s + x / s) / 2;
362 }
363
364 void
365 #ifdef I_AM_PSORT_R
366 psort_r(void *a, size_t n, size_t es, void *thunk, cmp_t *cmp)
367 #elif defined(I_AM_PSORT_B)
368 psort_b(void *a, size_t n, size_t es, cmp_t ^cmp)
369 #else
370 psort(void *a, size_t n, size_t es, cmp_t *cmp)
371 #endif
372 {
373 if (n >= PARALLEL_MIN_SIZE && _NumCPUs() > 1) {
374 struct shared shared;
375 union args *args;
376
377 bzero(&shared, sizeof(shared));
378 shared.sharedlock = OS_UNFAIR_LOCK_INIT;
379 if ((args = getargs(&shared)) != NULL) {
380 struct page *p, *pp;
381 #ifdef I_AM_PSORT_R
382 shared.who = "psort_r";
383 shared.thunk = thunk;
384 #elif defined(I_AM_PSORT_B)
385 shared.who = "psort_b";
386 #else
387 shared.who = "psort";
388 #endif
389 shared.cmp = cmp;
390 shared.es = es;
391 shared.queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
392 shared.group = dispatch_group_create();
393 args->a = a;
394 args->n = n;
395 args->depth_limit = DEPTH(n);
396 args->shared = &shared;
397 /*
398 * The turnoff value is the size of a partition that,
399 * below which, we stop doing in parallel, and just do
400 * in the current thread. The value of sqrt(n) was
401 * determined heuristically. There is a smaller
402 * dependence on the slowness of the comparison
403 * function, and there might be a dependence on the
404 * number of processors, but the algorithm has not been
405 * determined. Because the sensitivity to the turnoff
406 * value is relatively low, we use a fast, approximate
407 * integer square root routine that is good enough for
408 * this purpose.
409 */
410 shared.turnoff = isqrt(n);
411 _psort_parallel(args);
412
413 /* wait for queue to drain */
414 dispatch_group_wait(shared.group, DISPATCH_TIME_FOREVER);
415 dispatch_release(shared.group);
416 for(p = shared.pagelist; p; p = pp) {
417 pp = p->next;
418 munmap(p, PAGESIZE);
419 }
420 return;
421 }
422 }
423 /* Just call qsort */
424 #ifdef I_AM_PSORT_R
425 qsort_r(a, n, es, thunk, cmp);
426 #elif defined(I_AM_PSORT_B)
427 qsort_b(a, n, es, cmp);
428 #else
429 qsort(a, n, es, cmp);
430 #endif
431 }