]> git.saurik.com Git - apple/libc.git/blob - stdlib/FreeBSD/psort.c
Libc-1044.40.1.tar.gz
[apple/libc.git] / stdlib / FreeBSD / psort.c
1 /****************************************************************************/
2 /*-
3 * Copyright (c) 1992, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 4. Neither the name of the University nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #if defined(LIBC_SCCS) && !defined(lint)
32 static char sccsid[] = "@(#)qsort.c 8.1 (Berkeley) 6/4/93";
33 #endif /* LIBC_SCCS and not lint */
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD: src/lib/libc/stdlib/qsort.c,v 1.15 2008/01/14 09:21:34 das Exp $");
36
37 #include <stdlib.h>
38 #include <pthread.h>
39 #include <dispatch/dispatch.h>
40 #include <stddef.h>
41 #include <string.h>
42 #include <libkern/OSAtomic.h>
43 #include <sys/mman.h>
44 #include <errno.h>
45 #define __APPLE_API_PRIVATE
46 #include <machine/cpu_capabilities.h>
47
48 #ifdef I_AM_PSORT_R
49 typedef int cmp_t(void *, const void *, const void *);
50 #else
51 typedef int cmp_t(const void *, const void *);
52 #endif
53 #ifdef I_AM_PSORT_B
54 static inline char *med3(char *, char *, char *, cmp_t ^, void *) __attribute__((always_inline));
55 #else
56 static inline char *med3(char *, char *, char *, cmp_t *, void *) __attribute__((always_inline));
57 #endif
58 static inline void swapfunc(char *, char *, int, int) __attribute__((always_inline));
59
60 #define min(a, b) (a) < (b) ? a : b
61
62 #define NARGS ((PAGESIZE - offsetof(struct page, args)) / sizeof(union args))
63 #define PAGESIZE 4096
64 #define PARALLEL_MIN_SIZE 2000 /* determine heuristically */
65
66 struct shared; /* forward reference */
67 union args {
68 union args *next;
69 struct {
70 struct shared *shared;
71 void *a;
72 size_t n;
73 int depth_limit;
74 } /* anonymous */;
75 };
76
77 struct page {
78 struct page *next;
79 union args args[0];
80 };
81
82 struct shared {
83 char *who;
84 union args *freelist;
85 struct page *pagelist;
86 #ifdef I_AM_PSORT_R
87 void *thunk;
88 #endif
89 #ifdef I_AM_PSORT_B
90 cmp_t ^cmp;
91 #else
92 cmp_t *cmp;
93 #endif
94 size_t es;
95 size_t turnoff;
96 dispatch_queue_t queue;
97 dispatch_group_t group;
98 OSSpinLock sharedlock;
99 };
100
101 static union args *
102 getargs(struct shared *shared)
103 {
104 union args *args;
105
106 OSSpinLockLock(&shared->sharedlock);
107 if(!shared->freelist) {
108 struct page *page;
109 union args *prev;
110 int i;
111 if((page = (struct page *)mmap(NULL, PAGESIZE, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0)) == NULL)
112 return NULL;
113 page->next = shared->pagelist;
114 shared->pagelist = page;
115 prev = NULL;
116 for(args = page->args, i = NARGS; i > 0; args++, i--) {
117 args->next = prev;
118 prev = args;
119 }
120 shared->freelist = prev;
121 }
122 args = shared->freelist;
123 shared->freelist = args->next;
124 OSSpinLockUnlock(&shared->sharedlock);
125 return args;
126 }
127
128 static void
129 returnargs(struct shared *shared, union args *args)
130 {
131 OSSpinLockLock(&shared->sharedlock);
132 args->next = shared->freelist;
133 shared->freelist = args;
134 OSSpinLockUnlock(&shared->sharedlock);
135 }
136
137 /*
138 * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
139 */
140 #define swapcode(TYPE, parmi, parmj, n) { \
141 long i = (n) / sizeof (TYPE); \
142 TYPE *pi = (TYPE *) (parmi); \
143 TYPE *pj = (TYPE *) (parmj); \
144 do { \
145 TYPE t = *pi; \
146 *pi++ = *pj; \
147 *pj++ = t; \
148 } while (--i > 0); \
149 }
150
151 #define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
152 es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
153
154 static inline void
155 swapfunc(a, b, n, swaptype)
156 char *a, *b;
157 int n, swaptype;
158 {
159 if(swaptype <= 1)
160 swapcode(long, a, b, n)
161 else
162 swapcode(char, a, b, n)
163 }
164
165 #define swap(a, b) \
166 if (swaptype == 0) { \
167 long t = *(long *)(a); \
168 *(long *)(a) = *(long *)(b); \
169 *(long *)(b) = t; \
170 } else \
171 swapfunc(a, b, es, swaptype)
172
173 #define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype)
174
175 #ifdef I_AM_PSORT_R
176 #define CMP(t, x, y) (cmp((t), (x), (y)))
177 #else
178 #define CMP(t, x, y) (cmp((x), (y)))
179 #endif
180
181 static inline char *
182 med3(char *a, char *b, char *c,
183 #ifdef I_AM_PSORT_B
184 cmp_t ^cmp,
185 #else
186 cmp_t *cmp,
187 #endif
188 void *thunk
189 #ifndef I_AM_PSORT_R
190 __unused
191 #endif
192 )
193 {
194 return CMP(thunk, a, b) < 0 ?
195 (CMP(thunk, b, c) < 0 ? b : (CMP(thunk, a, c) < 0 ? c : a ))
196 :(CMP(thunk, b, c) > 0 ? b : (CMP(thunk, a, c) < 0 ? a : c ));
197 }
198
199 #ifdef __LP64__
200 #define DEPTH(x) (2 * (flsl((long)(x)) - 1))
201 #else /* !__LP64__ */
202 #define DEPTH(x) (2 * (fls((int)(x)) - 1))
203 #endif /* __LP64__ */
204
205 #ifdef I_AM_PSORT_R
206 int __heapsort_r(void *, size_t, size_t, void *, int (*)(void *, const void *, const void *));
207 #endif
208
209 static void _psort_parallel(void *x);
210
211 static void
212 _psort(void *a, size_t n, size_t es,
213 #ifdef I_AM_PSORT_R
214 void *thunk,
215 #else
216 #define thunk NULL
217 #endif
218 #ifdef I_AM_PSORT_B
219 cmp_t ^cmp,
220 #else
221 cmp_t *cmp,
222 #endif
223 int depth_limit, struct shared *shared)
224 {
225 char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
226 size_t d, r;
227 int cmp_result;
228 int swaptype, swap_cnt;
229
230 loop:
231 if (depth_limit-- <= 0) {
232 #ifdef I_AM_PSORT_B
233 heapsort_b(a, n, es, cmp);
234 #elif defined(I_AM_PSORT_R)
235 __heapsort_r(a, n, es, thunk, cmp);
236 #else
237 heapsort(a, n, es, cmp);
238 #endif
239 return;
240 }
241 SWAPINIT(a, es);
242 swap_cnt = 0;
243 if (n < 7) {
244 for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
245 for (pl = pm;
246 pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
247 pl -= es)
248 swap(pl, pl - es);
249 return;
250 }
251 pm = (char *)a + (n / 2) * es;
252 if (n > 7) {
253 pl = a;
254 pn = (char *)a + (n - 1) * es;
255 if (n > 40) {
256 d = (n / 8) * es;
257 pl = med3(pl, pl + d, pl + 2 * d, cmp, thunk);
258 pm = med3(pm - d, pm, pm + d, cmp, thunk);
259 pn = med3(pn - 2 * d, pn - d, pn, cmp, thunk);
260 }
261 pm = med3(pl, pm, pn, cmp, thunk);
262 }
263 swap(a, pm);
264 pa = pb = (char *)a + es;
265
266 pc = pd = (char *)a + (n - 1) * es;
267 for (;;) {
268 while (pb <= pc && (cmp_result = CMP(thunk, pb, a)) <= 0) {
269 if (cmp_result == 0) {
270 swap_cnt = 1;
271 swap(pa, pb);
272 pa += es;
273 }
274 pb += es;
275 }
276 while (pb <= pc && (cmp_result = CMP(thunk, pc, a)) >= 0) {
277 if (cmp_result == 0) {
278 swap_cnt = 1;
279 swap(pc, pd);
280 pd -= es;
281 }
282 pc -= es;
283 }
284 if (pb > pc)
285 break;
286 swap(pb, pc);
287 swap_cnt = 1;
288 pb += es;
289 pc -= es;
290 }
291
292 pn = (char *)a + n * es;
293 r = min(pa - (char *)a, pb - pa);
294 vecswap(a, pb - r, r);
295 r = min(pd - pc, pn - pd - es);
296 vecswap(pb, pn - r, r);
297
298 if (swap_cnt == 0) { /* Switch to insertion sort */
299 r = 1 + n / 4; /* n >= 7, so r >= 2 */
300 for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
301 for (pl = pm;
302 pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
303 pl -= es) {
304 swap(pl, pl - es);
305 if (++swap_cnt > r) goto nevermind;
306 }
307 return;
308 }
309
310 nevermind:
311 if ((r = pb - pa) > es) {
312 r /= es;
313 if (shared && r > shared->turnoff) {
314 union args *args = getargs(shared);
315
316 if (args == NULL)
317 LIBC_ABORT("%s: getargs: %s", shared->who, strerror(errno));
318 args->shared = shared;
319 args->a = a;
320 args->n = r;
321 args->depth_limit = depth_limit;
322 dispatch_group_async_f(shared->group, shared->queue, args,
323 _psort_parallel);
324 } else {
325 #ifdef I_AM_PSORT_R
326 _psort(a, r, es, thunk, cmp, depth_limit, NULL);
327 #else
328 _psort(a, r, es, cmp, depth_limit, NULL);
329 #endif
330 }
331 }
332 if ((r = pd - pc) > es) {
333 /* Iterate rather than recurse to save stack space */
334 a = pn - r;
335 n = r / es;
336 goto loop;
337 }
338 /* psort(pn - r, r / es, es, cmp);*/
339 }
340
341 static void
342 _psort_parallel(void *x)
343 {
344 union args *args = (union args *)x;
345 struct shared *shared = args->shared;
346
347 _psort(args->a, args->n, shared->es,
348 #ifdef I_AM_PSORT_R
349 shared->thunk,
350 #endif
351 shared->cmp, args->depth_limit, shared);
352 returnargs(shared, args);
353 }
354
355 /* fast, approximate integer square root */
356 static size_t
357 isqrt(size_t x)
358 {
359 size_t s = 1L << (flsl(x) / 2);
360 return (s + x / s) / 2;
361 }
362
363 void
364 #ifdef I_AM_PSORT_R
365 psort_r(void *a, size_t n, size_t es, void *thunk, cmp_t *cmp)
366 #elif defined(I_AM_PSORT_B)
367 psort_b(void *a, size_t n, size_t es, cmp_t ^cmp)
368 #else
369 psort(void *a, size_t n, size_t es, cmp_t *cmp)
370 #endif
371 {
372 if (n >= PARALLEL_MIN_SIZE && _NumCPUs() > 1) {
373 struct shared shared;
374 union args *args;
375
376 bzero(&shared, sizeof(shared));
377 shared.sharedlock = OS_SPINLOCK_INIT;
378 if ((args = getargs(&shared)) != NULL) {
379 struct page *p, *pp;
380 #ifdef I_AM_PSORT_R
381 shared.who = "psort_r";
382 shared.thunk = thunk;
383 #elif defined(I_AM_PSORT_B)
384 shared.who = "psort_b";
385 #else
386 shared.who = "psort";
387 #endif
388 shared.cmp = cmp;
389 shared.es = es;
390 shared.queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
391 shared.group = dispatch_group_create();
392 args->a = a;
393 args->n = n;
394 args->depth_limit = DEPTH(n);
395 args->shared = &shared;
396 /*
397 * The turnoff value is the size of a partition that,
398 * below which, we stop doing in parallel, and just do
399 * in the current thread. The value of sqrt(n) was
400 * determined heuristically. There is a smaller
401 * dependence on the slowness of the comparison
402 * function, and there might be a dependence on the
403 * number of processors, but the algorithm has not been
404 * determined. Because the sensitivity to the turnoff
405 * value is relatively low, we use a fast, approximate
406 * integer square root routine that is good enough for
407 * this purpose.
408 */
409 shared.turnoff = isqrt(n);
410 _psort_parallel(args);
411
412 /* wait for queue to drain */
413 dispatch_group_wait(shared.group, DISPATCH_TIME_FOREVER);
414 dispatch_release(shared.group);
415 for(p = shared.pagelist; p; p = pp) {
416 pp = p->next;
417 munmap(p, PAGESIZE);
418 }
419 return;
420 }
421 }
422 /* Just call qsort */
423 #ifdef I_AM_PSORT_R
424 qsort_r(a, n, es, thunk, cmp);
425 #elif defined(I_AM_PSORT_B)
426 qsort_b(a, n, es, cmp);
427 #else
428 qsort(a, n, es, cmp);
429 #endif
430 }