1 /****************************************************************************/
3 * Copyright (c) 1992, 1993
4 * The Regents of the University of California. All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 4. Neither the name of the University nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #if defined(LIBC_SCCS) && !defined(lint)
32 static char sccsid
[] = "@(#)qsort.c 8.1 (Berkeley) 6/4/93";
33 #endif /* LIBC_SCCS and not lint */
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD: src/lib/libc/stdlib/qsort.c,v 1.15 2008/01/14 09:21:34 das Exp $");
39 #include <dispatch/dispatch.h>
42 #include <libkern/OSAtomic.h>
45 #define __APPLE_API_PRIVATE
46 #include <machine/cpu_capabilities.h>
49 typedef int cmp_t(void *, const void *, const void *);
51 typedef int cmp_t(const void *, const void *);
54 static inline char *med3(char *, char *, char *, cmp_t
^, void *) __attribute__((always_inline
));
56 static inline char *med3(char *, char *, char *, cmp_t
*, void *) __attribute__((always_inline
));
58 static inline void swapfunc(char *, char *, int, int) __attribute__((always_inline
));
60 #define min(a, b) (a) < (b) ? a : b
62 #define NARGS ((PAGESIZE - offsetof(struct page, args)) / sizeof(union args))
64 #define PARALLEL_MIN_SIZE 2000 /* determine heuristically */
66 struct shared
; /* forward reference */
70 struct shared
*shared
;
85 struct page
*pagelist
;
96 dispatch_queue_t queue
;
98 pthread_mutex_t mutex
;
99 OSSpinLock sharedlock
;
104 getargs(struct shared
*shared
)
108 OSSpinLockLock(&shared
->sharedlock
);
109 if(!shared
->freelist
) {
113 if((page
= (struct page
*)mmap(NULL
, PAGESIZE
, PROT_READ
|PROT_WRITE
, MAP_ANON
|MAP_PRIVATE
, -1, 0)) == NULL
)
115 page
->next
= shared
->pagelist
;
116 shared
->pagelist
= page
;
118 for(args
= page
->args
, i
= NARGS
; i
> 0; args
++, i
--) {
122 shared
->freelist
= prev
;
124 args
= shared
->freelist
;
125 shared
->freelist
= args
->next
;
126 OSSpinLockUnlock(&shared
->sharedlock
);
131 returnargs(struct shared
*shared
, union args
*args
)
133 OSSpinLockLock(&shared
->sharedlock
);
134 args
->next
= shared
->freelist
;
135 shared
->freelist
= args
;
136 OSSpinLockUnlock(&shared
->sharedlock
);
140 * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
142 #define swapcode(TYPE, parmi, parmj, n) { \
143 long i = (n) / sizeof (TYPE); \
144 TYPE *pi = (TYPE *) (parmi); \
145 TYPE *pj = (TYPE *) (parmj); \
153 #define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
154 es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
157 swapfunc(a
, b
, n
, swaptype
)
162 swapcode(long, a
, b
, n
)
164 swapcode(char, a
, b
, n
)
168 if (swaptype == 0) { \
169 long t = *(long *)(a); \
170 *(long *)(a) = *(long *)(b); \
173 swapfunc(a, b, es, swaptype)
175 #define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype)
178 #define CMP(t, x, y) (cmp((t), (x), (y)))
180 #define CMP(t, x, y) (cmp((x), (y)))
184 med3(char *a
, char *b
, char *c
,
196 return CMP(thunk
, a
, b
) < 0 ?
197 (CMP(thunk
, b
, c
) < 0 ? b
: (CMP(thunk
, a
, c
) < 0 ? c
: a
))
198 :(CMP(thunk
, b
, c
) > 0 ? b
: (CMP(thunk
, a
, c
) < 0 ? a
: c
));
202 #define DEPTH(x) (2 * (flsl((long)(x)) - 1))
203 #else /* !__LP64__ */
204 #define DEPTH(x) (2 * (fls((int)(x)) - 1))
205 #endif /* __LP64__ */
208 int __heapsort_r(void *, size_t, size_t, void *, int (*)(void *, const void *, const void *));
211 static void _psort_parallel(void *x
);
214 _psort(void *a
, size_t n
, size_t es
,
225 int depth_limit
, struct shared
*shared
)
227 char *pa
, *pb
, *pc
, *pd
, *pl
, *pm
, *pn
;
230 int swaptype
, swap_cnt
;
233 if (depth_limit
-- <= 0) {
235 heapsort_b(a
, n
, es
, cmp
);
236 #elif defined(I_AM_PSORT_R)
237 __heapsort_r(a
, n
, es
, thunk
, cmp
);
239 heapsort(a
, n
, es
, cmp
);
246 for (pm
= (char *)a
+ es
; pm
< (char *)a
+ n
* es
; pm
+= es
)
248 pl
> (char *)a
&& CMP(thunk
, pl
- es
, pl
) > 0;
253 pm
= (char *)a
+ (n
/ 2) * es
;
256 pn
= (char *)a
+ (n
- 1) * es
;
259 pl
= med3(pl
, pl
+ d
, pl
+ 2 * d
, cmp
, thunk
);
260 pm
= med3(pm
- d
, pm
, pm
+ d
, cmp
, thunk
);
261 pn
= med3(pn
- 2 * d
, pn
- d
, pn
, cmp
, thunk
);
263 pm
= med3(pl
, pm
, pn
, cmp
, thunk
);
266 pa
= pb
= (char *)a
+ es
;
268 pc
= pd
= (char *)a
+ (n
- 1) * es
;
270 while (pb
<= pc
&& (cmp_result
= CMP(thunk
, pb
, a
)) <= 0) {
271 if (cmp_result
== 0) {
278 while (pb
<= pc
&& (cmp_result
= CMP(thunk
, pc
, a
)) >= 0) {
279 if (cmp_result
== 0) {
294 pn
= (char *)a
+ n
* es
;
295 r
= min(pa
- (char *)a
, pb
- pa
);
296 vecswap(a
, pb
- r
, r
);
297 r
= min(pd
- pc
, pn
- pd
- es
);
298 vecswap(pb
, pn
- r
, r
);
300 if (swap_cnt
== 0) { /* Switch to insertion sort */
301 r
= 1 + n
/ 4; /* n >= 7, so r >= 2 */
302 for (pm
= (char *)a
+ es
; pm
< (char *)a
+ n
* es
; pm
+= es
)
304 pl
> (char *)a
&& CMP(thunk
, pl
- es
, pl
) > 0;
307 if (++swap_cnt
> r
) goto nevermind
;
313 if ((r
= pb
- pa
) > es
) {
315 if (shared
&& r
> shared
->turnoff
) {
316 union args
*args
= getargs(shared
);
319 LIBC_ABORT("%s: getargs: %s", shared
->who
, strerror(errno
));
320 args
->shared
= shared
;
323 args
->depth_limit
= depth_limit
;
324 OSAtomicIncrement32(&shared
->count
);
325 dispatch_async_f(shared
->queue
, args
, _psort_parallel
);
328 _psort(a
, r
, es
, thunk
, cmp
, depth_limit
, NULL
);
330 _psort(a
, r
, es
, cmp
, depth_limit
, NULL
);
334 if ((r
= pd
- pc
) > es
) {
335 /* Iterate rather than recurse to save stack space */
340 /* psort(pn - r, r / es, es, cmp);*/
344 _psort_parallel(void *x
)
346 union args
*args
= (union args
*)x
;
347 struct shared
*shared
= args
->shared
;
349 _psort(args
->a
, args
->n
, shared
->es
,
353 shared
->cmp
, args
->depth_limit
, shared
);
354 returnargs(shared
, args
);
355 if(OSAtomicDecrement32(&shared
->count
) <= 0) {
356 pthread_mutex_lock(&shared
->mutex
);
357 pthread_cond_signal(&shared
->cond
);
358 pthread_mutex_unlock(&shared
->mutex
);
362 /* fast, approximate integer square root */
366 size_t s
= 1L << (flsl(x
) / 2);
367 return (s
+ x
/ s
) / 2;
372 psort_r(void *a
, size_t n
, size_t es
, void *thunk
, cmp_t
*cmp
)
373 #elif defined(I_AM_PSORT_B)
374 psort_b(void *a
, size_t n
, size_t es
, cmp_t
^cmp
)
376 psort(void *a
, size_t n
, size_t es
, cmp_t
*cmp
)
379 if (n
>= PARALLEL_MIN_SIZE
&& _NumCPUs() > 1) {
380 struct shared shared
;
383 bzero(&shared
, sizeof(shared
));
384 shared
.sharedlock
= OS_SPINLOCK_INIT
;
385 if ((args
= getargs(&shared
)) != NULL
) {
388 shared
.who
= "psort_r";
389 shared
.thunk
= thunk
;
390 #elif defined(I_AM_PSORT_B)
391 shared
.who
= "psort_b";
393 shared
.who
= "psort";
397 shared
.queue
= dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT
, 0);
398 shared
.cond
= (pthread_cond_t
)PTHREAD_COND_INITIALIZER
;
399 shared
.mutex
= (pthread_mutex_t
)PTHREAD_MUTEX_INITIALIZER
;
402 args
->depth_limit
= DEPTH(n
);
403 args
->shared
= &shared
;
405 * The turnoff value is the size of a partition that,
406 * below which, we stop doing in parallel, and just do
407 * in the current thread. The value of sqrt(n) was
408 * determined heuristically. There is a smaller
409 * dependence on the slowness of the comparison
410 * function, and there might be a dependence on the
411 * number of processors, but the algorithm has not been
412 * determined. Because the sensitivity to the turnoff
413 * value is relatively low, we use a fast, approximate
414 * integer square root routine that is good enough for
417 shared
.turnoff
= isqrt(n
);
418 OSAtomicIncrement32(&shared
.count
);
419 _psort_parallel(args
);
421 /* wait for queue to drain */
422 pthread_mutex_lock(&shared
.mutex
);
423 while(shared
.count
> 0)
424 pthread_cond_wait(&shared
.cond
, &shared
.mutex
);
426 pthread_mutex_unlock(&shared
.mutex
);
427 pthread_mutex_destroy(&shared
.mutex
);
428 pthread_cond_destroy(&shared
.cond
);
429 for(p
= shared
.pagelist
; p
; p
= pp
) {
436 /* Just call qsort */
438 qsort_r(a
, n
, es
, thunk
, cmp
);
439 #elif defined(I_AM_PSORT_B)
440 qsort_b(a
, n
, es
, cmp
);
442 qsort(a
, n
, es
, cmp
);