]> git.saurik.com Git - apple/libc.git/blob - stdlib/psort.c.patch
b71822661d06b465d7874ac03e85cbe2886e0de6
[apple/libc.git] / stdlib / psort.c.patch
1 --- psort.c.orig 2008-11-24 17:01:07.000000000 -0800
2 +++ psort.c 2008-11-24 22:02:57.000000000 -0800
3 @@ -1,3 +1,4 @@
4 +/****************************************************************************/
5 /*-
6 * Copyright (c) 1992, 1993
7 * The Regents of the University of California. All rights reserved.
8 @@ -34,14 +35,22 @@ static char sccsid[] = "@(#)qsort.c 8.1
9 __FBSDID("$FreeBSD: src/lib/libc/stdlib/qsort.c,v 1.15 2008/01/14 09:21:34 das Exp $");
10
11 #include <stdlib.h>
12 +#include <pthread.h>
13 +#include <dispatch/dispatch.h>
14 +#include <stddef.h>
15 #include <string.h>
16 +#include <libkern/OSAtomic.h>
17 +#include <sys/mman.h>
18 +#include <errno.h>
19 +#define __APPLE_API_PRIVATE
20 +#include <machine/cpu_capabilities.h>
21
22 -#ifdef I_AM_QSORT_R
23 +#ifdef I_AM_PSORT_R
24 typedef int cmp_t(void *, const void *, const void *);
25 #else
26 typedef int cmp_t(const void *, const void *);
27 #endif
28 -#ifdef I_AM_QSORT_B
29 +#ifdef I_AM_PSORT_B
30 static inline char *med3(char *, char *, char *, cmp_t ^, void *) __attribute__((always_inline));
31 #else
32 static inline char *med3(char *, char *, char *, cmp_t *, void *) __attribute__((always_inline));
33 @@ -50,6 +59,83 @@ static inline void swapfunc(char *, cha
34
35 #define min(a, b) (a) < (b) ? a : b
36
37 +#define NARGS ((PAGESIZE - offsetof(struct page, args)) / sizeof(union args))
38 +#define PAGESIZE 4096
39 +#define PARALLEL_MIN_SIZE 2000 /* determine heuristically */
40 +
41 +struct shared; /* forward reference */
42 +union args {
43 + union args *next;
44 + struct {
45 + struct shared *shared;
46 + void *a;
47 + size_t n;
48 + int depth_limit;
49 + } /* anonymous */;
50 +};
51 +
52 +struct page {
53 + struct page *next;
54 + union args args[0];
55 +};
56 +
57 +struct shared {
58 + char *who;
59 + union args *freelist;
60 + struct page *pagelist;
61 +#ifdef I_AM_PSORT_R
62 + void *thunk;
63 +#endif
64 +#ifdef I_AM_PSORT_B
65 + cmp_t ^cmp;
66 +#else
67 + cmp_t *cmp;
68 +#endif
69 + size_t es;
70 + size_t turnoff;
71 + dispatch_queue_t queue;
72 + pthread_cond_t cond;
73 + pthread_mutex_t mutex;
74 + OSSpinLock sharedlock;
75 + int count;
76 +};
77 +
78 +static union args *
79 +getargs(struct shared *shared)
80 +{
81 + union args *args;
82 +
83 + OSSpinLockLock(&shared->sharedlock);
84 + if(!shared->freelist) {
85 + struct page *page;
86 + union args *prev;
87 + int i;
88 + if((page = (struct page *)mmap(NULL, PAGESIZE, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0)) == NULL)
89 + return NULL;
90 + page->next = shared->pagelist;
91 + shared->pagelist = page;
92 + prev = NULL;
93 + for(args = page->args, i = NARGS; i > 0; args++, i--) {
94 + args->next = prev;
95 + prev = args;
96 + }
97 + shared->freelist = prev;
98 + }
99 + args = shared->freelist;
100 + shared->freelist = args->next;
101 + OSSpinLockUnlock(&shared->sharedlock);
102 + return args;
103 +}
104 +
105 +static void
106 +returnargs(struct shared *shared, union args *args)
107 +{
108 + OSSpinLockLock(&shared->sharedlock);
109 + args->next = shared->freelist;
110 + shared->freelist = args;
111 + OSSpinLockUnlock(&shared->sharedlock);
112 +}
113 +
114 /*
115 * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
116 */
117 @@ -88,7 +174,7 @@ swapfunc(a, b, n, swaptype)
118
119 #define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype)
120
121 -#ifdef I_AM_QSORT_R
122 +#ifdef I_AM_PSORT_R
123 #define CMP(t, x, y) (cmp((t), (x), (y)))
124 #else
125 #define CMP(t, x, y) (cmp((x), (y)))
126 @@ -96,13 +182,13 @@ swapfunc(a, b, n, swaptype)
127
128 static inline char *
129 med3(char *a, char *b, char *c,
130 -#ifdef I_AM_QSORT_B
131 +#ifdef I_AM_PSORT_B
132 cmp_t ^cmp,
133 #else
134 cmp_t *cmp,
135 #endif
136 void *thunk
137 -#ifndef I_AM_QSORT_R
138 +#ifndef I_AM_PSORT_R
139 __unused
140 #endif
141 )
142 @@ -118,23 +204,25 @@ __unused
143 #define DEPTH(x) (2 * (fls((int)(x)) - 1))
144 #endif /* __LP64__ */
145
146 -#ifdef I_AM_QSORT_R
147 +#ifdef I_AM_PSORT_R
148 int __heapsort_r(void *, size_t, size_t, void *, int (*)(void *, const void *, const void *));
149 #endif
150
151 +static void _psort_parallel(void *x);
152 +
153 static void
154 -_qsort(void *a, size_t n, size_t es,
155 -#ifdef I_AM_QSORT_R
156 +_psort(void *a, size_t n, size_t es,
157 +#ifdef I_AM_PSORT_R
158 void *thunk,
159 #else
160 #define thunk NULL
161 #endif
162 -#ifdef I_AM_QSORT_B
163 +#ifdef I_AM_PSORT_B
164 cmp_t ^cmp,
165 #else
166 cmp_t *cmp,
167 #endif
168 -int depth_limit)
169 +int depth_limit, struct shared *shared)
170 {
171 char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
172 size_t d, r;
173 @@ -143,9 +231,9 @@ int depth_limit)
174
175 loop:
176 if (depth_limit-- <= 0) {
177 -#ifdef I_AM_QSORT_B
178 +#ifdef I_AM_PSORT_B
179 heapsort_b(a, n, es, cmp);
180 -#elif defined(I_AM_QSORT_R)
181 +#elif defined(I_AM_PSORT_R)
182 __heapsort_r(a, n, es, thunk, cmp);
183 #else
184 heapsort(a, n, es, cmp);
185 @@ -222,33 +310,135 @@ loop:
186 }
187
188 nevermind:
189 - if ((r = pb - pa) > es)
190 -#ifdef I_AM_QSORT_R
191 - _qsort(a, r / es, es, thunk, cmp, depth_limit);
192 + if ((r = pb - pa) > es) {
193 + r /= es;
194 + if (shared && r > shared->turnoff) {
195 + union args *args = getargs(shared);
196 +
197 + if (args == NULL)
198 + LIBC_ABORT("%s: getargs: %s", shared->who, strerror(errno));
199 + args->shared = shared;
200 + args->a = a;
201 + args->n = r;
202 + args->depth_limit = depth_limit;
203 + OSAtomicIncrement32(&shared->count);
204 + dispatch_async_f(shared->queue, args, _psort_parallel);
205 + } else {
206 +#ifdef I_AM_PSORT_R
207 + _psort(a, r, es, thunk, cmp, depth_limit, NULL);
208 #else
209 - _qsort(a, r / es, es, cmp, depth_limit);
210 + _psort(a, r, es, cmp, depth_limit, NULL);
211 #endif
212 + }
213 + }
214 if ((r = pd - pc) > es) {
215 /* Iterate rather than recurse to save stack space */
216 a = pn - r;
217 n = r / es;
218 goto loop;
219 }
220 -/* qsort(pn - r, r / es, es, cmp);*/
221 +/* psort(pn - r, r / es, es, cmp);*/
222 +}
223 +
224 +static void
225 +_psort_parallel(void *x)
226 +{
227 + union args *args = (union args *)x;
228 + struct shared *shared = args->shared;
229 +
230 + _psort(args->a, args->n, shared->es,
231 +#ifdef I_AM_PSORT_R
232 + shared->thunk,
233 +#endif
234 + shared->cmp, args->depth_limit, shared);
235 + returnargs(shared, args);
236 + if(OSAtomicDecrement32(&shared->count) <= 0) {
237 + pthread_mutex_lock(&shared->mutex);
238 + pthread_cond_signal(&shared->cond);
239 + pthread_mutex_unlock(&shared->mutex);
240 + }
241 +}
242 +
243 +/* fast, approximate integer square root */
244 +static size_t
245 +isqrt(size_t x)
246 +{
247 + size_t s = 1L << (flsl(x) / 2);
248 + return (s + x / s) / 2;
249 }
250
251 void
252 -#ifdef I_AM_QSORT_R
253 -qsort_r(void *a, size_t n, size_t es, void *thunk, cmp_t *cmp)
254 -#elif defined(I_AM_QSORT_B)
255 -qsort_b(void *a, size_t n, size_t es, cmp_t ^cmp)
256 +#ifdef I_AM_PSORT_R
257 +psort_r(void *a, size_t n, size_t es, void *thunk, cmp_t *cmp)
258 +#elif defined(I_AM_PSORT_B)
259 +psort_b(void *a, size_t n, size_t es, cmp_t ^cmp)
260 #else
261 -qsort(void *a, size_t n, size_t es, cmp_t *cmp)
262 +psort(void *a, size_t n, size_t es, cmp_t *cmp)
263 #endif
264 {
265 - _qsort(a, n, es,
266 -#ifdef I_AM_QSORT_R
267 - thunk,
268 + if (n >= PARALLEL_MIN_SIZE && _NumCPUs() > 1) {
269 + struct shared shared;
270 + union args *args;
271 +
272 + bzero(&shared, sizeof(shared));
273 + shared.sharedlock = OS_SPINLOCK_INIT;
274 + if ((args = getargs(&shared)) != NULL) {
275 + struct page *p, *pp;
276 +#ifdef I_AM_PSORT_R
277 + shared.who = "psort_r";
278 + shared.thunk = thunk;
279 +#elif defined(I_AM_PSORT_B)
280 + shared.who = "psort_b";
281 +#else
282 + shared.who = "psort";
283 +#endif
284 + shared.cmp = cmp;
285 + shared.es = es;
286 + shared.queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
287 + shared.cond = (pthread_cond_t)PTHREAD_COND_INITIALIZER;
288 + shared.mutex = (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER;
289 + args->a = a;
290 + args->n = n;
291 + args->depth_limit = DEPTH(n);
292 + args->shared = &shared;
293 + /*
294 + * The turnoff value is the size of a partition that,
295 + * below which, we stop doing in parallel, and just do
296 + * in the current thread. The value of sqrt(n) was
297 + * determined heuristically. There is a smaller
298 + * dependence on the slowness of the comparison
299 + * function, and there might be a dependence on the
300 + * number of processors, but the algorithm has not been
301 + * determined. Because the sensitivity to the turnoff
302 + * value is relatively low, we use a fast, approximate
303 + * integer square root routine that is good enough for
304 + * this purpose.
305 + */
306 + shared.turnoff = isqrt(n);
307 + OSAtomicIncrement32(&shared.count);
308 + _psort_parallel(args);
309 +
310 + /* wait for queue to drain */
311 + pthread_mutex_lock(&shared.mutex);
312 + while(shared.count > 0)
313 + pthread_cond_wait(&shared.cond, &shared.mutex);
314 +
315 + pthread_mutex_unlock(&shared.mutex);
316 + pthread_mutex_destroy(&shared.mutex);
317 + pthread_cond_destroy(&shared.cond);
318 + for(p = shared.pagelist; p; p = pp) {
319 + pp = p->next;
320 + munmap(p, PAGESIZE);
321 + }
322 + return;
323 + }
324 + }
325 + /* Just call qsort */
326 +#ifdef I_AM_PSORT_R
327 + qsort_r(a, n, es, thunk, cmp);
328 +#elif defined(I_AM_PSORT_B)
329 + qsort_b(a, n, es, cmp);
330 +#else
331 + qsort(a, n, es, cmp);
332 #endif
333 - cmp, DEPTH(n));
334 }