]>
Commit | Line | Data |
---|---|---|
bd5b749c | 1 | /* |
e588f561 | 2 | * Copyright (c) 2010 Apple Inc. All rights reserved. |
bd5b749c A |
3 | * |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. Please obtain a copy of the License at | |
10 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
11 | * file. | |
12 | * | |
13 | * The Original Code and all software distributed under the License are | |
14 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
15 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
16 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
18 | * Please see the License for the specific language governing rights and | |
19 | * limitations under the License. | |
20 | * | |
21 | * @APPLE_LICENSE_HEADER_END@ | |
22 | */ | |
f64f9b69 | 23 | |
bd5b749c | 24 | /* CFSortFunctions.c |
cf7d2af9 | 25 | Copyright (c) 1999-2009, Apple Inc. All rights reserved. |
bd5b749c A |
26 | Responsibility: Christopher Kane |
27 | */ | |
28 | ||
bd5b749c | 29 | #include <CoreFoundation/CFBase.h> |
bd5b749c | 30 | #include "CFInternal.h" |
cf7d2af9 A |
31 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED |
32 | #include <dispatch/dispatch.h> | |
33 | #include <sys/sysctl.h> | |
bd5b749c A |
34 | #endif |
35 | ||
bd5b749c | 36 | |
cf7d2af9 A |
37 | enum { |
38 | kCFSortConcurrent = (1 << 0), | |
39 | kCFSortStable = (1 << 4), | |
40 | }; | |
41 | ||
42 | typedef CFIndex VALUE_TYPE; | |
43 | typedef CFIndex INDEX_TYPE; | |
44 | typedef CFComparisonResult CMP_RESULT_TYPE; | |
45 | typedef CMP_RESULT_TYPE (^COMPARATOR_BLOCK)(VALUE_TYPE, VALUE_TYPE); | |
bd5b749c | 46 | |
bd5b749c | 47 | /* |
cf7d2af9 A |
48 | Number of elements in a list and expected number of compares, |
49 | when the initial short-circuiting compare is not done. | |
50 | 1 0 | |
51 | 2 1 | |
52 | 3 2.667 | |
53 | 4 4.667 | |
54 | 5 7.167 | |
55 | 6 9.833 | |
56 | 7 12.733 | |
57 | 8 15.733 | |
58 | 9 19.167 | |
59 | 10 22.667 | |
60 | 11 26.2857 | |
61 | 12 29.9524 | |
62 | */ | |
bd5b749c | 63 | |
cf7d2af9 A |
64 | static void __CFSimpleMerge(VALUE_TYPE listp[], INDEX_TYPE cnt1, INDEX_TYPE cnt2, VALUE_TYPE tmp[], COMPARATOR_BLOCK cmp) { |
65 | if (cnt1 <= 0 || cnt2 <= 0) return; | |
66 | // if the last element of listp1 <= the first of listp2, lists are already ordered | |
67 | if (16 < cnt1 + cnt2 && cmp(listp[cnt1 - 1], listp[cnt1]) <= 0) return; | |
68 | ||
69 | INDEX_TYPE idx = 0, idx1 = 0, idx2 = cnt1; | |
70 | for (;;) { | |
71 | if (cnt1 <= idx1) { | |
72 | while (idx--) { | |
73 | listp[idx] = tmp[idx]; | |
74 | } | |
75 | return; | |
76 | } | |
77 | if (cnt1 + cnt2 <= idx2) { | |
78 | for (INDEX_TYPE t = cnt1 + cnt2 - 1; idx <= t; t--) { | |
79 | listp[t] = listp[t - cnt2]; | |
80 | } | |
81 | while (idx--) { | |
82 | listp[idx] = tmp[idx]; | |
83 | } | |
84 | return; | |
85 | } | |
86 | VALUE_TYPE v1 = listp[idx1], v2 = listp[idx2]; | |
87 | if (cmp(v1, v2) <= 0) { | |
88 | tmp[idx] = v1; | |
89 | idx1++; | |
90 | } else { | |
91 | tmp[idx] = v2; | |
92 | idx2++; | |
93 | } | |
94 | idx++; | |
95 | } | |
96 | } | |
bd5b749c | 97 | |
cf7d2af9 A |
98 | static void __CFSimpleMergeSort(VALUE_TYPE listp[], INDEX_TYPE cnt, VALUE_TYPE tmp[], COMPARATOR_BLOCK cmp) { |
99 | if (cnt < 2) { | |
100 | /* do nothing */ | |
101 | } else if (2 == cnt) { | |
102 | VALUE_TYPE v0 = listp[0], v1 = listp[1]; | |
103 | if (0 < cmp(v0, v1)) { | |
104 | listp[0] = v1; | |
105 | listp[1] = v0; | |
106 | } | |
107 | } else if (3 == cnt) { | |
108 | VALUE_TYPE v0 = listp[0], v1 = listp[1], v2 = listp[2], vt; | |
109 | if (0 < cmp(v0, v1)) { | |
110 | vt = v0; | |
111 | v0 = v1; | |
112 | v1 = vt; | |
113 | } | |
114 | if (0 < cmp(v1, v2)) { | |
115 | vt = v1; | |
116 | v1 = v2; | |
117 | v2 = vt; | |
118 | if (0 < cmp(v0, v1)) { | |
119 | vt = v0; | |
120 | v0 = v1; | |
121 | v1 = vt; | |
122 | } | |
123 | } | |
124 | listp[0] = v0; | |
125 | listp[1] = v1; | |
126 | listp[2] = v2; | |
127 | } else { | |
128 | INDEX_TYPE half_cnt = cnt / 2; | |
129 | __CFSimpleMergeSort(listp, half_cnt, tmp, cmp); | |
130 | __CFSimpleMergeSort(listp + half_cnt, cnt - half_cnt, tmp, cmp); | |
131 | __CFSimpleMerge(listp, half_cnt, cnt - half_cnt, tmp, cmp); | |
132 | } | |
bd5b749c A |
133 | } |
134 | ||
cf7d2af9 A |
135 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED |
136 | // if !right, put the cnt1 smallest values in tmp, else put the cnt2 largest values in tmp | |
137 | static void __CFSortIndexesNMerge(VALUE_TYPE listp1[], INDEX_TYPE cnt1, VALUE_TYPE listp2[], INDEX_TYPE cnt2, VALUE_TYPE tmp[], size_t right, COMPARATOR_BLOCK cmp) { | |
138 | // if the last element of listp1 <= the first of listp2, lists are already ordered | |
139 | if (16 < cnt1 + cnt2 && cmp(listp1[cnt1 - 1], listp2[0]) <= 0) { | |
140 | memmove(tmp, (right ? listp2 : listp1), (right ? cnt2 : cnt1) * sizeof(VALUE_TYPE)); | |
141 | return; | |
142 | } | |
bd5b749c | 143 | |
cf7d2af9 A |
144 | if (right) { |
145 | VALUE_TYPE *listp1_end = listp1; | |
146 | VALUE_TYPE *listp2_end = listp2; | |
147 | VALUE_TYPE *tmp_end = tmp; | |
148 | listp1 += cnt1 - 1; | |
149 | listp2 += cnt2 - 1; | |
150 | tmp += cnt2; | |
151 | while (tmp_end < tmp) { | |
152 | tmp--; | |
153 | if (listp2 < listp2_end) { | |
154 | listp1--; | |
155 | *tmp = *listp1; | |
156 | } else if (listp1 < listp1_end) { | |
157 | listp2--; | |
158 | *tmp = *listp2; | |
159 | } else { | |
160 | VALUE_TYPE v1 = *listp1, v2 = *listp2; | |
161 | CMP_RESULT_TYPE res = cmp(v1, v2); | |
162 | if (res <= 0) { | |
163 | *tmp = v2; | |
164 | listp2--; | |
165 | } else { | |
166 | *tmp = v1; | |
167 | listp1--; | |
168 | } | |
169 | } | |
170 | } | |
171 | } else { | |
172 | VALUE_TYPE *listp1_end = listp1 + cnt1; | |
173 | VALUE_TYPE *listp2_end = listp2 + cnt2; | |
174 | VALUE_TYPE *tmp_end = tmp + cnt1; | |
175 | while (tmp < tmp_end) { | |
176 | if (listp2_end <= listp2) { | |
177 | *tmp = *listp1; | |
178 | listp1++; | |
179 | } else if (listp1_end <= listp1) { | |
180 | *tmp = *listp2; | |
181 | listp2++; | |
182 | } else { | |
183 | VALUE_TYPE v1 = *listp1, v2 = *listp2; | |
184 | CMP_RESULT_TYPE res = cmp(v1, v2); | |
185 | if (res <= 0) { | |
186 | *tmp = v1; | |
187 | listp1++; | |
188 | } else { | |
189 | *tmp = v2; | |
190 | listp2++; | |
191 | } | |
192 | } | |
193 | tmp++; | |
194 | } | |
195 | } | |
196 | } | |
bd5b749c | 197 | |
cf7d2af9 A |
198 | /* Merging algorithm based on |
199 | "A New Parallel Sorting Algorithm based on Odd-Even Mergesort", Ezequiel Herruzo, et al | |
200 | */ | |
201 | static void __CFSortIndexesN(VALUE_TYPE listp[], INDEX_TYPE count, int32_t ncores, CMP_RESULT_TYPE (^cmp)(INDEX_TYPE, INDEX_TYPE)) { | |
202 | /* Divide the array up into up to ncores, multiple-of-16-sized, chunks */ | |
203 | INDEX_TYPE sz = ((((count + ncores - 1) / ncores) + 15) / 16) * 16; | |
204 | INDEX_TYPE num_sect = (count + sz - 1) / sz; | |
205 | INDEX_TYPE last_sect_len = count + sz - sz * num_sect; | |
206 | ||
207 | STACK_BUFFER_DECL(VALUE_TYPE *, stack_tmps, num_sect); | |
208 | for (INDEX_TYPE idx = 0; idx < num_sect; idx++) { | |
209 | stack_tmps[idx] = malloc(sz * sizeof(VALUE_TYPE)); | |
210 | } | |
211 | VALUE_TYPE **tmps = stack_tmps; | |
212 | ||
213 | dispatch_queue_t q = dispatch_get_concurrent_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT); | |
214 | dispatch_apply(num_sect, q, ^(size_t sect) { | |
215 | INDEX_TYPE sect_len = (sect < num_sect - 1) ? sz : last_sect_len; | |
216 | __CFSimpleMergeSort(listp + sect * sz, sect_len, tmps[sect], cmp); // naturally stable | |
217 | }); | |
218 | ||
219 | INDEX_TYPE even_phase_cnt = ((num_sect / 2) * 2); | |
220 | INDEX_TYPE odd_phase_cnt = (((num_sect - 1) / 2) * 2); | |
221 | for (INDEX_TYPE idx = 0; idx < (num_sect + 1) / 2; idx++) { | |
222 | dispatch_apply(even_phase_cnt, q, ^(size_t sect) { // merge even | |
223 | size_t right = sect & (size_t)0x1; | |
224 | VALUE_TYPE *left_base = listp + sect * sz - (right ? sz : 0); | |
225 | VALUE_TYPE *right_base = listp + sect * sz + (right ? 0 : sz); | |
226 | INDEX_TYPE sect2_len = (sect + 1 + (right ? 0 : 1) == num_sect) ? last_sect_len : sz; | |
227 | __CFSortIndexesNMerge(left_base, sz, right_base, sect2_len, tmps[sect], right, cmp); | |
228 | }); | |
229 | if (num_sect & 0x1) { | |
230 | memmove(tmps[num_sect - 1], listp + (num_sect - 1) * sz, last_sect_len * sizeof(VALUE_TYPE)); | |
231 | } | |
232 | dispatch_apply(odd_phase_cnt, q, ^(size_t sect) { // merge odd | |
233 | size_t right = sect & (size_t)0x1; | |
234 | VALUE_TYPE *left_base = tmps[sect + (right ? 0 : 1)]; | |
235 | VALUE_TYPE *right_base = tmps[sect + (right ? 1 : 2)]; | |
236 | INDEX_TYPE sect2_len = (sect + 1 + (right ? 1 : 2) == num_sect) ? last_sect_len : sz; | |
237 | __CFSortIndexesNMerge(left_base, sz, right_base, sect2_len, listp + sect * sz + sz, right, cmp); | |
238 | }); | |
239 | memmove(listp + 0 * sz, tmps[0], sz * sizeof(VALUE_TYPE)); | |
240 | if (!(num_sect & 0x1)) { | |
241 | memmove(listp + (num_sect - 1) * sz, tmps[num_sect - 1], last_sect_len * sizeof(VALUE_TYPE)); | |
242 | } | |
243 | } | |
bd5b749c | 244 | |
cf7d2af9 A |
245 | for (INDEX_TYPE idx = 0; idx < num_sect; idx++) { |
246 | free(stack_tmps[idx]); | |
247 | } | |
bd5b749c | 248 | } |
bd5b749c | 249 | #endif |
bd5b749c | 250 | |
cf7d2af9 A |
251 | // returns an array of indexes (of length count) giving the indexes 0 - count-1, as sorted by the comparator block |
252 | CFIndex *CFSortIndexes(CFIndex count, CFOptionFlags opts, CFComparisonResult (^cmp)(CFIndex, CFIndex)) { | |
253 | if (count < 1) return NULL; | |
254 | if (INTPTR_MAX / sizeof(CFIndex) < count) return NULL; | |
255 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED | |
256 | int32_t ncores = 0; | |
257 | if (opts & kCFSortConcurrent) { | |
258 | int32_t mib[2] = {CTL_HW, HW_AVAILCPU}; | |
259 | size_t len = sizeof(ncores); | |
260 | sysctl(mib, 2, &ncores, &len, NULL, 0); | |
261 | if (count < 160 || ncores < 2) { | |
262 | opts = (opts & ~kCFSortConcurrent); | |
263 | } else if (count < 640 && 2 < ncores) { | |
264 | ncores = 2; | |
265 | } else if (count < 3200 && 4 < ncores) { | |
266 | ncores = 4; | |
267 | } else if (count < 16000 && 8 < ncores) { | |
268 | ncores = 8; | |
269 | } | |
270 | if (16 < ncores) { | |
271 | ncores = 16; | |
272 | } | |
273 | } | |
274 | CFIndex *idxs = malloc_zone_memalign(malloc_default_zone(), 64, count * sizeof(CFIndex)); | |
275 | if (!idxs) return NULL; | |
276 | if (count <= 65536) { | |
277 | for (CFIndex idx = 0; idx < count; idx++) idxs[idx] = idx; | |
278 | } else { | |
279 | /* Specifically hard-coded to 8; the count has to be very large before more chunks and/or cores is worthwhile. */ | |
280 | CFIndex sz = ((((size_t)count + 15) / 16) * 16) / 8; | |
281 | dispatch_apply(8, dispatch_get_concurrent_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT), ^(size_t n) { | |
282 | CFIndex idx = n * sz, lim = __CFMin(idx + sz, count); | |
283 | for (; idx < lim; idx++) idxs[idx] = idx; | |
284 | }); | |
285 | } | |
286 | if (opts & kCFSortConcurrent) { | |
287 | __CFSortIndexesN(idxs, count, ncores, cmp); // naturally stable | |
288 | return idxs; | |
289 | } | |
bd5b749c | 290 | #else |
cf7d2af9 A |
291 | CFIndex *idxs = (CFIndex *)malloc(count * sizeof(CFIndex)); |
292 | if (!idxs) return NULL; | |
293 | for (CFIndex idx = 0; idx < count; idx++) idxs[idx] = idx; | |
bd5b749c | 294 | #endif |
cf7d2af9 A |
295 | STACK_BUFFER_DECL(VALUE_TYPE, local, count <= 16384 ? count : 1); |
296 | VALUE_TYPE *tmp = (count <= 16384) ? local : (VALUE_TYPE *)malloc(count * sizeof(VALUE_TYPE)); | |
297 | __CFSimpleMergeSort(idxs, count, tmp, cmp); // naturally stable | |
298 | if (local != tmp) free(tmp); | |
299 | return idxs; | |
bd5b749c A |
300 | } |
301 | ||
302 | /* Comparator is passed the address of the values. */ | |
303 | void CFQSortArray(void *list, CFIndex count, CFIndex elementSize, CFComparatorFunction comparator, void *context) { | |
cf7d2af9 A |
304 | if (count < 1 || elementSize < 1) return; |
305 | CFIndex *indexes = CFSortIndexes(count, 0, ^(CFIndex a, CFIndex b) { return comparator((char *)list + a * elementSize, (char *)list + b * elementSize, context); }); // naturally stable | |
306 | void *store = malloc(count * elementSize); | |
307 | for (CFIndex idx = 0; idx < count; idx++) { | |
308 | if (sizeof(uintptr_t) == elementSize) { | |
309 | uintptr_t *a = (uintptr_t *)list + indexes[idx]; | |
310 | uintptr_t *b = (uintptr_t *)store + idx; | |
311 | *b = *a; | |
312 | } else { | |
313 | memmove((char *)store + idx * elementSize, (char *)list + indexes[idx] * elementSize, elementSize); | |
314 | } | |
bd5b749c | 315 | } |
cf7d2af9 A |
316 | // no swapping or modification of the original list has occurred until this point |
317 | objc_memmove_collectable(list, store, count * elementSize); | |
318 | free(store); | |
319 | free(indexes); | |
bd5b749c A |
320 | } |
321 | ||
cf7d2af9 | 322 | /* Comparator is passed the address of the values. */ |
bd5b749c | 323 | void CFMergeSortArray(void *list, CFIndex count, CFIndex elementSize, CFComparatorFunction comparator, void *context) { |
cf7d2af9 A |
324 | if (count < 1 || elementSize < 1) return; |
325 | CFIndex *indexes = CFSortIndexes(count, kCFSortStable, ^(CFIndex a, CFIndex b) { return comparator((char *)list + a * elementSize, (char *)list + b * elementSize, context); }); // naturally stable | |
326 | void *store = malloc(count * elementSize); | |
327 | for (CFIndex idx = 0; idx < count; idx++) { | |
328 | if (sizeof(uintptr_t) == elementSize) { | |
329 | uintptr_t *a = (uintptr_t *)list + indexes[idx]; | |
330 | uintptr_t *b = (uintptr_t *)store + idx; | |
331 | *b = *a; | |
332 | } else { | |
333 | memmove((char *)store + idx * elementSize, (char *)list + indexes[idx] * elementSize, elementSize); | |
334 | } | |
335 | } | |
336 | // no swapping or modification of the original list has occurred until this point | |
337 | objc_memmove_collectable(list, store, count * elementSize); | |
338 | free(store); | |
339 | free(indexes); | |
340 | } | |
bd5b749c | 341 | |
bd5b749c | 342 |