1 /****************************************************************************/
3 * Copyright (c) 1992, 1993
4 * The Regents of the University of California. All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 4. Neither the name of the University nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #if defined(LIBC_SCCS) && !defined(lint)
32 static char sccsid
[] = "@(#)qsort.c 8.1 (Berkeley) 6/4/93";
33 #endif /* LIBC_SCCS and not lint */
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD: src/lib/libc/stdlib/qsort.c,v 1.15 2008/01/14 09:21:34 das Exp $");
39 #include <dispatch/dispatch.h>
42 #include <libkern/OSAtomic.h>
45 #define __APPLE_API_PRIVATE
46 #include <machine/cpu_capabilities.h>
49 typedef int cmp_t(void *, const void *, const void *);
51 typedef int cmp_t(const void *, const void *);
54 static inline char *med3(char *, char *, char *, cmp_t
^, void *) __attribute__((always_inline
));
56 static inline char *med3(char *, char *, char *, cmp_t
*, void *) __attribute__((always_inline
));
58 static inline void swapfunc(char *, char *, int, int) __attribute__((always_inline
));
60 #define min(a, b) (a) < (b) ? a : b
62 #define NARGS ((PAGESIZE - offsetof(struct page, args)) / sizeof(union args))
64 #define PARALLEL_MIN_SIZE 2000 /* determine heuristically */
66 struct shared
; /* forward reference */
70 struct shared
*shared
;
85 struct page
*pagelist
;
96 dispatch_queue_t queue
;
97 dispatch_group_t group
;
98 OSSpinLock sharedlock
;
102 getargs(struct shared
*shared
)
106 OSSpinLockLock(&shared
->sharedlock
);
107 if(!shared
->freelist
) {
111 if((page
= (struct page
*)mmap(NULL
, PAGESIZE
, PROT_READ
|PROT_WRITE
, MAP_ANON
|MAP_PRIVATE
, -1, 0)) == NULL
)
113 page
->next
= shared
->pagelist
;
114 shared
->pagelist
= page
;
116 for(args
= page
->args
, i
= NARGS
; i
> 0; args
++, i
--) {
120 shared
->freelist
= prev
;
122 args
= shared
->freelist
;
123 shared
->freelist
= args
->next
;
124 OSSpinLockUnlock(&shared
->sharedlock
);
129 returnargs(struct shared
*shared
, union args
*args
)
131 OSSpinLockLock(&shared
->sharedlock
);
132 args
->next
= shared
->freelist
;
133 shared
->freelist
= args
;
134 OSSpinLockUnlock(&shared
->sharedlock
);
138 * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
140 #define swapcode(TYPE, parmi, parmj, n) { \
141 long i = (n) / sizeof (TYPE); \
142 TYPE *pi = (TYPE *) (parmi); \
143 TYPE *pj = (TYPE *) (parmj); \
151 #define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
152 es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
155 swapfunc(a
, b
, n
, swaptype
)
160 swapcode(long, a
, b
, n
)
162 swapcode(char, a
, b
, n
)
166 if (swaptype == 0) { \
167 long t = *(long *)(a); \
168 *(long *)(a) = *(long *)(b); \
171 swapfunc(a, b, es, swaptype)
173 #define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype)
176 #define CMP(t, x, y) (cmp((t), (x), (y)))
178 #define CMP(t, x, y) (cmp((x), (y)))
182 med3(char *a
, char *b
, char *c
,
194 return CMP(thunk
, a
, b
) < 0 ?
195 (CMP(thunk
, b
, c
) < 0 ? b
: (CMP(thunk
, a
, c
) < 0 ? c
: a
))
196 :(CMP(thunk
, b
, c
) > 0 ? b
: (CMP(thunk
, a
, c
) < 0 ? a
: c
));
200 #define DEPTH(x) (2 * (flsl((long)(x)) - 1))
201 #else /* !__LP64__ */
202 #define DEPTH(x) (2 * (fls((int)(x)) - 1))
203 #endif /* __LP64__ */
206 int __heapsort_r(void *, size_t, size_t, void *, int (*)(void *, const void *, const void *));
209 static void _psort_parallel(void *x
);
212 _psort(void *a
, size_t n
, size_t es
,
223 int depth_limit
, struct shared
*shared
)
225 char *pa
, *pb
, *pc
, *pd
, *pl
, *pm
, *pn
;
228 int swaptype
, swap_cnt
;
231 if (depth_limit
-- <= 0) {
233 heapsort_b(a
, n
, es
, cmp
);
234 #elif defined(I_AM_PSORT_R)
235 __heapsort_r(a
, n
, es
, thunk
, cmp
);
237 heapsort(a
, n
, es
, cmp
);
244 for (pm
= (char *)a
+ es
; pm
< (char *)a
+ n
* es
; pm
+= es
)
246 pl
> (char *)a
&& CMP(thunk
, pl
- es
, pl
) > 0;
251 pm
= (char *)a
+ (n
/ 2) * es
;
254 pn
= (char *)a
+ (n
- 1) * es
;
257 pl
= med3(pl
, pl
+ d
, pl
+ 2 * d
, cmp
, thunk
);
258 pm
= med3(pm
- d
, pm
, pm
+ d
, cmp
, thunk
);
259 pn
= med3(pn
- 2 * d
, pn
- d
, pn
, cmp
, thunk
);
261 pm
= med3(pl
, pm
, pn
, cmp
, thunk
);
264 pa
= pb
= (char *)a
+ es
;
266 pc
= pd
= (char *)a
+ (n
- 1) * es
;
268 while (pb
<= pc
&& (cmp_result
= CMP(thunk
, pb
, a
)) <= 0) {
269 if (cmp_result
== 0) {
276 while (pb
<= pc
&& (cmp_result
= CMP(thunk
, pc
, a
)) >= 0) {
277 if (cmp_result
== 0) {
292 pn
= (char *)a
+ n
* es
;
293 r
= min(pa
- (char *)a
, pb
- pa
);
294 vecswap(a
, pb
- r
, r
);
295 r
= min(pd
- pc
, pn
- pd
- es
);
296 vecswap(pb
, pn
- r
, r
);
298 if (swap_cnt
== 0) { /* Switch to insertion sort */
299 r
= 1 + n
/ 4; /* n >= 7, so r >= 2 */
300 for (pm
= (char *)a
+ es
; pm
< (char *)a
+ n
* es
; pm
+= es
)
302 pl
> (char *)a
&& CMP(thunk
, pl
- es
, pl
) > 0;
305 if (++swap_cnt
> r
) goto nevermind
;
311 if ((r
= pb
- pa
) > es
) {
313 if (shared
&& r
> shared
->turnoff
) {
314 union args
*args
= getargs(shared
);
317 LIBC_ABORT("%s: getargs: %s", shared
->who
, strerror(errno
));
318 args
->shared
= shared
;
321 args
->depth_limit
= depth_limit
;
322 dispatch_group_async_f(shared
->group
, shared
->queue
, args
,
326 _psort(a
, r
, es
, thunk
, cmp
, depth_limit
, NULL
);
328 _psort(a
, r
, es
, cmp
, depth_limit
, NULL
);
332 if ((r
= pd
- pc
) > es
) {
333 /* Iterate rather than recurse to save stack space */
338 /* psort(pn - r, r / es, es, cmp);*/
342 _psort_parallel(void *x
)
344 union args
*args
= (union args
*)x
;
345 struct shared
*shared
= args
->shared
;
347 _psort(args
->a
, args
->n
, shared
->es
,
351 shared
->cmp
, args
->depth_limit
, shared
);
352 returnargs(shared
, args
);
355 /* fast, approximate integer square root */
359 size_t s
= 1L << (flsl(x
) / 2);
360 return (s
+ x
/ s
) / 2;
365 psort_r(void *a
, size_t n
, size_t es
, void *thunk
, cmp_t
*cmp
)
366 #elif defined(I_AM_PSORT_B)
367 psort_b(void *a
, size_t n
, size_t es
, cmp_t
^cmp
)
369 psort(void *a
, size_t n
, size_t es
, cmp_t
*cmp
)
372 if (n
>= PARALLEL_MIN_SIZE
&& _NumCPUs() > 1) {
373 struct shared shared
;
376 bzero(&shared
, sizeof(shared
));
377 shared
.sharedlock
= OS_SPINLOCK_INIT
;
378 if ((args
= getargs(&shared
)) != NULL
) {
381 shared
.who
= "psort_r";
382 shared
.thunk
= thunk
;
383 #elif defined(I_AM_PSORT_B)
384 shared
.who
= "psort_b";
386 shared
.who
= "psort";
390 shared
.queue
= dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT
, 0);
391 shared
.group
= dispatch_group_create();
394 args
->depth_limit
= DEPTH(n
);
395 args
->shared
= &shared
;
397 * The turnoff value is the size of a partition that,
398 * below which, we stop doing in parallel, and just do
399 * in the current thread. The value of sqrt(n) was
400 * determined heuristically. There is a smaller
401 * dependence on the slowness of the comparison
402 * function, and there might be a dependence on the
403 * number of processors, but the algorithm has not been
404 * determined. Because the sensitivity to the turnoff
405 * value is relatively low, we use a fast, approximate
406 * integer square root routine that is good enough for
409 shared
.turnoff
= isqrt(n
);
410 _psort_parallel(args
);
412 /* wait for queue to drain */
413 dispatch_group_wait(shared
.group
, DISPATCH_TIME_FOREVER
);
414 dispatch_release(shared
.group
);
415 for(p
= shared
.pagelist
; p
; p
= pp
) {
422 /* Just call qsort */
424 qsort_r(a
, n
, es
, thunk
, cmp
);
425 #elif defined(I_AM_PSORT_B)
426 qsort_b(a
, n
, es
, cmp
);
428 qsort(a
, n
, es
, cmp
);