1 /****************************************************************************/
3 * Copyright (c) 1992, 1993
4 * The Regents of the University of California. All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 4. Neither the name of the University nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #if defined(LIBC_SCCS) && !defined(lint)
32 static char sccsid
[] = "@(#)qsort.c 8.1 (Berkeley) 6/4/93";
33 #endif /* LIBC_SCCS and not lint */
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD: src/lib/libc/stdlib/qsort.c,v 1.15 2008/01/14 09:21:34 das Exp $");
39 #include <dispatch/dispatch.h>
42 #include <libkern/OSAtomic.h>
46 #define __APPLE_API_PRIVATE
47 #include <machine/cpu_capabilities.h>
50 typedef int cmp_t(void *, const void *, const void *);
52 typedef int cmp_t(const void *, const void *);
55 static inline char *med3(char *, char *, char *, cmp_t
^, void *) __attribute__((always_inline
));
57 static inline char *med3(char *, char *, char *, cmp_t
*, void *) __attribute__((always_inline
));
59 static inline void swapfunc(char *, char *, int, int) __attribute__((always_inline
));
61 #define min(a, b) (a) < (b) ? a : b
63 #define NARGS ((PAGESIZE - offsetof(struct page, args)) / sizeof(union args))
65 #define PARALLEL_MIN_SIZE 2000 /* determine heuristically */
67 struct shared
; /* forward reference */
71 struct shared
*shared
;
86 struct page
*pagelist
;
97 dispatch_queue_t queue
;
98 dispatch_group_t group
;
99 os_unfair_lock sharedlock
;
103 getargs(struct shared
*shared
)
107 os_unfair_lock_lock(&shared
->sharedlock
);
108 if(!shared
->freelist
) {
112 if((page
= (struct page
*)mmap(NULL
, PAGESIZE
, PROT_READ
|PROT_WRITE
, MAP_ANON
|MAP_PRIVATE
, -1, 0)) == NULL
)
114 page
->next
= shared
->pagelist
;
115 shared
->pagelist
= page
;
117 for(args
= page
->args
, i
= NARGS
; i
> 0; args
++, i
--) {
121 shared
->freelist
= prev
;
123 args
= shared
->freelist
;
124 shared
->freelist
= args
->next
;
125 os_unfair_lock_unlock(&shared
->sharedlock
);
130 returnargs(struct shared
*shared
, union args
*args
)
132 os_unfair_lock_lock(&shared
->sharedlock
);
133 args
->next
= shared
->freelist
;
134 shared
->freelist
= args
;
135 os_unfair_lock_unlock(&shared
->sharedlock
);
139 * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
141 #define swapcode(TYPE, parmi, parmj, n) { \
142 long i = (n) / sizeof (TYPE); \
143 TYPE *pi = (TYPE *) (parmi); \
144 TYPE *pj = (TYPE *) (parmj); \
152 #define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
153 es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
156 swapfunc(a
, b
, n
, swaptype
)
161 swapcode(long, a
, b
, n
)
163 swapcode(char, a
, b
, n
)
167 if (swaptype == 0) { \
168 long t = *(long *)(a); \
169 *(long *)(a) = *(long *)(b); \
172 swapfunc(a, b, es, swaptype)
174 #define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype)
177 #define CMP(t, x, y) (cmp((t), (x), (y)))
179 #define CMP(t, x, y) (cmp((x), (y)))
183 med3(char *a
, char *b
, char *c
,
195 return CMP(thunk
, a
, b
) < 0 ?
196 (CMP(thunk
, b
, c
) < 0 ? b
: (CMP(thunk
, a
, c
) < 0 ? c
: a
))
197 :(CMP(thunk
, b
, c
) > 0 ? b
: (CMP(thunk
, a
, c
) < 0 ? a
: c
));
201 #define DEPTH(x) (2 * (flsl((long)(x)) - 1))
202 #else /* !__LP64__ */
203 #define DEPTH(x) (2 * (fls((int)(x)) - 1))
204 #endif /* __LP64__ */
207 int __heapsort_r(void *, size_t, size_t, void *, int (*)(void *, const void *, const void *));
210 static void _psort_parallel(void *x
);
213 _psort(void *a
, size_t n
, size_t es
,
224 int depth_limit
, struct shared
*shared
)
226 char *pa
, *pb
, *pc
, *pd
, *pl
, *pm
, *pn
;
229 int swaptype
, swap_cnt
;
232 if (depth_limit
-- <= 0) {
234 heapsort_b(a
, n
, es
, cmp
);
235 #elif defined(I_AM_PSORT_R)
236 __heapsort_r(a
, n
, es
, thunk
, cmp
);
238 heapsort(a
, n
, es
, cmp
);
245 for (pm
= (char *)a
+ es
; pm
< (char *)a
+ n
* es
; pm
+= es
)
247 pl
> (char *)a
&& CMP(thunk
, pl
- es
, pl
) > 0;
252 pm
= (char *)a
+ (n
/ 2) * es
;
255 pn
= (char *)a
+ (n
- 1) * es
;
258 pl
= med3(pl
, pl
+ d
, pl
+ 2 * d
, cmp
, thunk
);
259 pm
= med3(pm
- d
, pm
, pm
+ d
, cmp
, thunk
);
260 pn
= med3(pn
- 2 * d
, pn
- d
, pn
, cmp
, thunk
);
262 pm
= med3(pl
, pm
, pn
, cmp
, thunk
);
265 pa
= pb
= (char *)a
+ es
;
267 pc
= pd
= (char *)a
+ (n
- 1) * es
;
269 while (pb
<= pc
&& (cmp_result
= CMP(thunk
, pb
, a
)) <= 0) {
270 if (cmp_result
== 0) {
277 while (pb
<= pc
&& (cmp_result
= CMP(thunk
, pc
, a
)) >= 0) {
278 if (cmp_result
== 0) {
293 pn
= (char *)a
+ n
* es
;
294 r
= min(pa
- (char *)a
, pb
- pa
);
295 vecswap(a
, pb
- r
, r
);
296 r
= min(pd
- pc
, pn
- pd
- es
);
297 vecswap(pb
, pn
- r
, r
);
299 if (swap_cnt
== 0) { /* Switch to insertion sort */
300 r
= 1 + n
/ 4; /* n >= 7, so r >= 2 */
301 for (pm
= (char *)a
+ es
; pm
< (char *)a
+ n
* es
; pm
+= es
)
303 pl
> (char *)a
&& CMP(thunk
, pl
- es
, pl
) > 0;
306 if (++swap_cnt
> r
) goto nevermind
;
312 if ((r
= pb
- pa
) > es
) {
314 if (shared
&& r
> shared
->turnoff
) {
315 union args
*args
= getargs(shared
);
318 LIBC_ABORT("%s: getargs: %s", shared
->who
, strerror(errno
));
319 args
->shared
= shared
;
322 args
->depth_limit
= depth_limit
;
323 dispatch_group_async_f(shared
->group
, shared
->queue
, args
,
327 _psort(a
, r
, es
, thunk
, cmp
, depth_limit
, NULL
);
329 _psort(a
, r
, es
, cmp
, depth_limit
, NULL
);
333 if ((r
= pd
- pc
) > es
) {
334 /* Iterate rather than recurse to save stack space */
339 /* psort(pn - r, r / es, es, cmp);*/
343 _psort_parallel(void *x
)
345 union args
*args
= (union args
*)x
;
346 struct shared
*shared
= args
->shared
;
348 _psort(args
->a
, args
->n
, shared
->es
,
352 shared
->cmp
, args
->depth_limit
, shared
);
353 returnargs(shared
, args
);
356 /* fast, approximate integer square root */
360 size_t s
= 1L << (flsl(x
) / 2);
361 return (s
+ x
/ s
) / 2;
366 psort_r(void *a
, size_t n
, size_t es
, void *thunk
, cmp_t
*cmp
)
367 #elif defined(I_AM_PSORT_B)
368 psort_b(void *a
, size_t n
, size_t es
, cmp_t
^cmp
)
370 psort(void *a
, size_t n
, size_t es
, cmp_t
*cmp
)
373 if (n
>= PARALLEL_MIN_SIZE
&& _NumCPUs() > 1) {
374 struct shared shared
;
377 bzero(&shared
, sizeof(shared
));
378 shared
.sharedlock
= OS_UNFAIR_LOCK_INIT
;
379 if ((args
= getargs(&shared
)) != NULL
) {
382 shared
.who
= "psort_r";
383 shared
.thunk
= thunk
;
384 #elif defined(I_AM_PSORT_B)
385 shared
.who
= "psort_b";
387 shared
.who
= "psort";
391 shared
.queue
= dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT
, 0);
392 shared
.group
= dispatch_group_create();
395 args
->depth_limit
= DEPTH(n
);
396 args
->shared
= &shared
;
398 * The turnoff value is the size of a partition that,
399 * below which, we stop doing in parallel, and just do
400 * in the current thread. The value of sqrt(n) was
401 * determined heuristically. There is a smaller
402 * dependence on the slowness of the comparison
403 * function, and there might be a dependence on the
404 * number of processors, but the algorithm has not been
405 * determined. Because the sensitivity to the turnoff
406 * value is relatively low, we use a fast, approximate
407 * integer square root routine that is good enough for
410 shared
.turnoff
= isqrt(n
);
411 _psort_parallel(args
);
413 /* wait for queue to drain */
414 dispatch_group_wait(shared
.group
, DISPATCH_TIME_FOREVER
);
415 dispatch_release(shared
.group
);
416 for(p
= shared
.pagelist
; p
; p
= pp
) {
423 /* Just call qsort */
425 qsort_r(a
, n
, es
, thunk
, cmp
);
426 #elif defined(I_AM_PSORT_B)
427 qsort_b(a
, n
, es
, cmp
);
429 qsort(a
, n
, es
, cmp
);