]>
Commit | Line | Data |
---|---|---|
bd5b749c | 1 | /* |
8ca704e1 | 2 | * Copyright (c) 2011 Apple Inc. All rights reserved. |
bd5b749c A |
3 | * |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. Please obtain a copy of the License at | |
10 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
11 | * file. | |
12 | * | |
13 | * The Original Code and all software distributed under the License are | |
14 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
15 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
16 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
18 | * Please see the License for the specific language governing rights and | |
19 | * limitations under the License. | |
20 | * | |
21 | * @APPLE_LICENSE_HEADER_END@ | |
22 | */ | |
f64f9b69 | 23 | |
bd5b749c | 24 | /* CFSortFunctions.c |
8ca704e1 | 25 | Copyright (c) 1999-2011, Apple Inc. All rights reserved. |
bd5b749c A |
26 | Responsibility: Christopher Kane |
27 | */ | |
28 | ||
bd5b749c | 29 | #include <CoreFoundation/CFBase.h> |
bd5b749c | 30 | #include "CFInternal.h" |
8ca704e1 | 31 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS |
cf7d2af9 | 32 | #include <dispatch/dispatch.h> |
bd5b749c A |
33 | #endif |
34 | ||
cf7d2af9 A |
35 | enum { |
36 | kCFSortConcurrent = (1 << 0), | |
37 | kCFSortStable = (1 << 4), | |
38 | }; | |
39 | ||
40 | typedef CFIndex VALUE_TYPE; | |
41 | typedef CFIndex INDEX_TYPE; | |
42 | typedef CFComparisonResult CMP_RESULT_TYPE; | |
43 | typedef CMP_RESULT_TYPE (^COMPARATOR_BLOCK)(VALUE_TYPE, VALUE_TYPE); | |
bd5b749c | 44 | |
bd5b749c | 45 | /* |
cf7d2af9 A |
46 | Number of elements in a list and expected number of compares, |
47 | when the initial short-circuiting compare is not done. | |
48 | 1 0 | |
49 | 2 1 | |
50 | 3 2.667 | |
51 | 4 4.667 | |
52 | 5 7.167 | |
53 | 6 9.833 | |
54 | 7 12.733 | |
55 | 8 15.733 | |
56 | 9 19.167 | |
57 | 10 22.667 | |
58 | 11 26.2857 | |
59 | 12 29.9524 | |
60 | */ | |
bd5b749c | 61 | |
cf7d2af9 A |
62 | static void __CFSimpleMerge(VALUE_TYPE listp[], INDEX_TYPE cnt1, INDEX_TYPE cnt2, VALUE_TYPE tmp[], COMPARATOR_BLOCK cmp) { |
63 | if (cnt1 <= 0 || cnt2 <= 0) return; | |
64 | // if the last element of listp1 <= the first of listp2, lists are already ordered | |
65 | if (16 < cnt1 + cnt2 && cmp(listp[cnt1 - 1], listp[cnt1]) <= 0) return; | |
66 | ||
67 | INDEX_TYPE idx = 0, idx1 = 0, idx2 = cnt1; | |
68 | for (;;) { | |
69 | if (cnt1 <= idx1) { | |
70 | while (idx--) { | |
71 | listp[idx] = tmp[idx]; | |
72 | } | |
73 | return; | |
74 | } | |
75 | if (cnt1 + cnt2 <= idx2) { | |
76 | for (INDEX_TYPE t = cnt1 + cnt2 - 1; idx <= t; t--) { | |
77 | listp[t] = listp[t - cnt2]; | |
78 | } | |
79 | while (idx--) { | |
80 | listp[idx] = tmp[idx]; | |
81 | } | |
82 | return; | |
83 | } | |
84 | VALUE_TYPE v1 = listp[idx1], v2 = listp[idx2]; | |
85 | if (cmp(v1, v2) <= 0) { | |
86 | tmp[idx] = v1; | |
87 | idx1++; | |
88 | } else { | |
89 | tmp[idx] = v2; | |
90 | idx2++; | |
91 | } | |
92 | idx++; | |
93 | } | |
94 | } | |
bd5b749c | 95 | |
cf7d2af9 A |
96 | static void __CFSimpleMergeSort(VALUE_TYPE listp[], INDEX_TYPE cnt, VALUE_TYPE tmp[], COMPARATOR_BLOCK cmp) { |
97 | if (cnt < 2) { | |
98 | /* do nothing */ | |
99 | } else if (2 == cnt) { | |
100 | VALUE_TYPE v0 = listp[0], v1 = listp[1]; | |
101 | if (0 < cmp(v0, v1)) { | |
102 | listp[0] = v1; | |
103 | listp[1] = v0; | |
104 | } | |
105 | } else if (3 == cnt) { | |
106 | VALUE_TYPE v0 = listp[0], v1 = listp[1], v2 = listp[2], vt; | |
107 | if (0 < cmp(v0, v1)) { | |
108 | vt = v0; | |
109 | v0 = v1; | |
110 | v1 = vt; | |
111 | } | |
112 | if (0 < cmp(v1, v2)) { | |
113 | vt = v1; | |
114 | v1 = v2; | |
115 | v2 = vt; | |
116 | if (0 < cmp(v0, v1)) { | |
117 | vt = v0; | |
118 | v0 = v1; | |
119 | v1 = vt; | |
120 | } | |
121 | } | |
122 | listp[0] = v0; | |
123 | listp[1] = v1; | |
124 | listp[2] = v2; | |
125 | } else { | |
126 | INDEX_TYPE half_cnt = cnt / 2; | |
127 | __CFSimpleMergeSort(listp, half_cnt, tmp, cmp); | |
128 | __CFSimpleMergeSort(listp + half_cnt, cnt - half_cnt, tmp, cmp); | |
129 | __CFSimpleMerge(listp, half_cnt, cnt - half_cnt, tmp, cmp); | |
130 | } | |
bd5b749c A |
131 | } |
132 | ||
8ca704e1 A |
133 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS |
134 | // Excluded from linux for dispatch dependency | |
135 | ||
cf7d2af9 A |
136 | // if !right, put the cnt1 smallest values in tmp, else put the cnt2 largest values in tmp |
137 | static void __CFSortIndexesNMerge(VALUE_TYPE listp1[], INDEX_TYPE cnt1, VALUE_TYPE listp2[], INDEX_TYPE cnt2, VALUE_TYPE tmp[], size_t right, COMPARATOR_BLOCK cmp) { | |
138 | // if the last element of listp1 <= the first of listp2, lists are already ordered | |
139 | if (16 < cnt1 + cnt2 && cmp(listp1[cnt1 - 1], listp2[0]) <= 0) { | |
140 | memmove(tmp, (right ? listp2 : listp1), (right ? cnt2 : cnt1) * sizeof(VALUE_TYPE)); | |
141 | return; | |
142 | } | |
bd5b749c | 143 | |
cf7d2af9 A |
144 | if (right) { |
145 | VALUE_TYPE *listp1_end = listp1; | |
146 | VALUE_TYPE *listp2_end = listp2; | |
147 | VALUE_TYPE *tmp_end = tmp; | |
148 | listp1 += cnt1 - 1; | |
149 | listp2 += cnt2 - 1; | |
150 | tmp += cnt2; | |
151 | while (tmp_end < tmp) { | |
152 | tmp--; | |
153 | if (listp2 < listp2_end) { | |
154 | listp1--; | |
155 | *tmp = *listp1; | |
156 | } else if (listp1 < listp1_end) { | |
157 | listp2--; | |
158 | *tmp = *listp2; | |
159 | } else { | |
160 | VALUE_TYPE v1 = *listp1, v2 = *listp2; | |
161 | CMP_RESULT_TYPE res = cmp(v1, v2); | |
162 | if (res <= 0) { | |
163 | *tmp = v2; | |
164 | listp2--; | |
165 | } else { | |
166 | *tmp = v1; | |
167 | listp1--; | |
168 | } | |
169 | } | |
170 | } | |
171 | } else { | |
172 | VALUE_TYPE *listp1_end = listp1 + cnt1; | |
173 | VALUE_TYPE *listp2_end = listp2 + cnt2; | |
174 | VALUE_TYPE *tmp_end = tmp + cnt1; | |
175 | while (tmp < tmp_end) { | |
176 | if (listp2_end <= listp2) { | |
177 | *tmp = *listp1; | |
178 | listp1++; | |
179 | } else if (listp1_end <= listp1) { | |
180 | *tmp = *listp2; | |
181 | listp2++; | |
182 | } else { | |
183 | VALUE_TYPE v1 = *listp1, v2 = *listp2; | |
184 | CMP_RESULT_TYPE res = cmp(v1, v2); | |
185 | if (res <= 0) { | |
186 | *tmp = v1; | |
187 | listp1++; | |
188 | } else { | |
189 | *tmp = v2; | |
190 | listp2++; | |
191 | } | |
192 | } | |
193 | tmp++; | |
194 | } | |
195 | } | |
196 | } | |
bd5b749c | 197 | |
cf7d2af9 A |
198 | /* Merging algorithm based on |
199 | "A New Parallel Sorting Algorithm based on Odd-Even Mergesort", Ezequiel Herruzo, et al | |
200 | */ | |
201 | static void __CFSortIndexesN(VALUE_TYPE listp[], INDEX_TYPE count, int32_t ncores, CMP_RESULT_TYPE (^cmp)(INDEX_TYPE, INDEX_TYPE)) { | |
202 | /* Divide the array up into up to ncores, multiple-of-16-sized, chunks */ | |
203 | INDEX_TYPE sz = ((((count + ncores - 1) / ncores) + 15) / 16) * 16; | |
204 | INDEX_TYPE num_sect = (count + sz - 1) / sz; | |
205 | INDEX_TYPE last_sect_len = count + sz - sz * num_sect; | |
206 | ||
207 | STACK_BUFFER_DECL(VALUE_TYPE *, stack_tmps, num_sect); | |
208 | for (INDEX_TYPE idx = 0; idx < num_sect; idx++) { | |
8ca704e1 | 209 | stack_tmps[idx] = (VALUE_TYPE *)malloc(sz * sizeof(VALUE_TYPE)); |
cf7d2af9 A |
210 | } |
211 | VALUE_TYPE **tmps = stack_tmps; | |
212 | ||
8ca704e1 | 213 | dispatch_queue_t q = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, DISPATCH_QUEUE_OVERCOMMIT); |
cf7d2af9 A |
214 | dispatch_apply(num_sect, q, ^(size_t sect) { |
215 | INDEX_TYPE sect_len = (sect < num_sect - 1) ? sz : last_sect_len; | |
216 | __CFSimpleMergeSort(listp + sect * sz, sect_len, tmps[sect], cmp); // naturally stable | |
217 | }); | |
218 | ||
219 | INDEX_TYPE even_phase_cnt = ((num_sect / 2) * 2); | |
220 | INDEX_TYPE odd_phase_cnt = (((num_sect - 1) / 2) * 2); | |
221 | for (INDEX_TYPE idx = 0; idx < (num_sect + 1) / 2; idx++) { | |
222 | dispatch_apply(even_phase_cnt, q, ^(size_t sect) { // merge even | |
223 | size_t right = sect & (size_t)0x1; | |
224 | VALUE_TYPE *left_base = listp + sect * sz - (right ? sz : 0); | |
225 | VALUE_TYPE *right_base = listp + sect * sz + (right ? 0 : sz); | |
226 | INDEX_TYPE sect2_len = (sect + 1 + (right ? 0 : 1) == num_sect) ? last_sect_len : sz; | |
227 | __CFSortIndexesNMerge(left_base, sz, right_base, sect2_len, tmps[sect], right, cmp); | |
228 | }); | |
229 | if (num_sect & 0x1) { | |
230 | memmove(tmps[num_sect - 1], listp + (num_sect - 1) * sz, last_sect_len * sizeof(VALUE_TYPE)); | |
231 | } | |
232 | dispatch_apply(odd_phase_cnt, q, ^(size_t sect) { // merge odd | |
233 | size_t right = sect & (size_t)0x1; | |
234 | VALUE_TYPE *left_base = tmps[sect + (right ? 0 : 1)]; | |
235 | VALUE_TYPE *right_base = tmps[sect + (right ? 1 : 2)]; | |
236 | INDEX_TYPE sect2_len = (sect + 1 + (right ? 1 : 2) == num_sect) ? last_sect_len : sz; | |
237 | __CFSortIndexesNMerge(left_base, sz, right_base, sect2_len, listp + sect * sz + sz, right, cmp); | |
238 | }); | |
239 | memmove(listp + 0 * sz, tmps[0], sz * sizeof(VALUE_TYPE)); | |
240 | if (!(num_sect & 0x1)) { | |
241 | memmove(listp + (num_sect - 1) * sz, tmps[num_sect - 1], last_sect_len * sizeof(VALUE_TYPE)); | |
242 | } | |
243 | } | |
bd5b749c | 244 | |
cf7d2af9 A |
245 | for (INDEX_TYPE idx = 0; idx < num_sect; idx++) { |
246 | free(stack_tmps[idx]); | |
247 | } | |
bd5b749c | 248 | } |
bd5b749c | 249 | #endif |
bd5b749c | 250 | |
8ca704e1 A |
251 | // fills an array of indexes (of length count) giving the indexes 0 - count-1, as sorted by the comparator block |
252 | void CFSortIndexes(CFIndex *indexBuffer, CFIndex count, CFOptionFlags opts, CFComparisonResult (^cmp)(CFIndex, CFIndex)) { | |
253 | if (count < 1) return; | |
254 | if (INTPTR_MAX / sizeof(CFIndex) < count) return; | |
cf7d2af9 A |
255 | int32_t ncores = 0; |
256 | if (opts & kCFSortConcurrent) { | |
8ca704e1 | 257 | ncores = __CFActiveProcessorCount(); |
cf7d2af9 A |
258 | if (count < 160 || ncores < 2) { |
259 | opts = (opts & ~kCFSortConcurrent); | |
260 | } else if (count < 640 && 2 < ncores) { | |
261 | ncores = 2; | |
262 | } else if (count < 3200 && 4 < ncores) { | |
263 | ncores = 4; | |
264 | } else if (count < 16000 && 8 < ncores) { | |
265 | ncores = 8; | |
266 | } | |
267 | if (16 < ncores) { | |
268 | ncores = 16; | |
269 | } | |
270 | } | |
8ca704e1 | 271 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS |
cf7d2af9 | 272 | if (count <= 65536) { |
8ca704e1 | 273 | for (CFIndex idx = 0; idx < count; idx++) indexBuffer[idx] = idx; |
cf7d2af9 A |
274 | } else { |
275 | /* Specifically hard-coded to 8; the count has to be very large before more chunks and/or cores is worthwhile. */ | |
276 | CFIndex sz = ((((size_t)count + 15) / 16) * 16) / 8; | |
8ca704e1 | 277 | dispatch_apply(8, dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, DISPATCH_QUEUE_OVERCOMMIT), ^(size_t n) { |
cf7d2af9 | 278 | CFIndex idx = n * sz, lim = __CFMin(idx + sz, count); |
8ca704e1 | 279 | for (; idx < lim; idx++) indexBuffer[idx] = idx; |
cf7d2af9 A |
280 | }); |
281 | } | |
8ca704e1 A |
282 | #else |
283 | for (CFIndex idx = 0; idx < count; idx++) indexBuffer[idx] = idx; | |
284 | #endif | |
285 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS | |
cf7d2af9 | 286 | if (opts & kCFSortConcurrent) { |
8ca704e1 A |
287 | __CFSortIndexesN(indexBuffer, count, ncores, cmp); // naturally stable |
288 | return; | |
cf7d2af9 | 289 | } |
bd5b749c | 290 | #endif |
8ca704e1 A |
291 | STACK_BUFFER_DECL(VALUE_TYPE, local, count <= 4096 ? count : 1); |
292 | VALUE_TYPE *tmp = (count <= 4096) ? local : (VALUE_TYPE *)malloc(count * sizeof(VALUE_TYPE)); | |
293 | __CFSimpleMergeSort(indexBuffer, count, tmp, cmp); // naturally stable | |
cf7d2af9 | 294 | if (local != tmp) free(tmp); |
bd5b749c A |
295 | } |
296 | ||
297 | /* Comparator is passed the address of the values. */ | |
298 | void CFQSortArray(void *list, CFIndex count, CFIndex elementSize, CFComparatorFunction comparator, void *context) { | |
8ca704e1 A |
299 | if (count < 2 || elementSize < 1) return; |
300 | STACK_BUFFER_DECL(CFIndex, locali, count <= 4096 ? count : 1); | |
301 | CFIndex *indexes = (count <= 4096) ? locali : (CFIndex *)malloc(count * sizeof(CFIndex)); | |
302 | CFSortIndexes(indexes, count, 0, ^(CFIndex a, CFIndex b) { return comparator((char *)list + a * elementSize, (char *)list + b * elementSize, context); }); | |
303 | STACK_BUFFER_DECL(uint8_t, locals, count <= (16 * 1024 / elementSize) ? count * elementSize : 1); | |
304 | void *store = (count <= (16 * 1024 / elementSize)) ? locals : malloc(count * elementSize); | |
cf7d2af9 A |
305 | for (CFIndex idx = 0; idx < count; idx++) { |
306 | if (sizeof(uintptr_t) == elementSize) { | |
307 | uintptr_t *a = (uintptr_t *)list + indexes[idx]; | |
308 | uintptr_t *b = (uintptr_t *)store + idx; | |
309 | *b = *a; | |
310 | } else { | |
311 | memmove((char *)store + idx * elementSize, (char *)list + indexes[idx] * elementSize, elementSize); | |
312 | } | |
bd5b749c | 313 | } |
cf7d2af9 A |
314 | // no swapping or modification of the original list has occurred until this point |
315 | objc_memmove_collectable(list, store, count * elementSize); | |
8ca704e1 A |
316 | if (locals != store) free(store); |
317 | if (locali != indexes) free(indexes); | |
bd5b749c A |
318 | } |
319 | ||
cf7d2af9 | 320 | /* Comparator is passed the address of the values. */ |
bd5b749c | 321 | void CFMergeSortArray(void *list, CFIndex count, CFIndex elementSize, CFComparatorFunction comparator, void *context) { |
8ca704e1 A |
322 | if (count < 2 || elementSize < 1) return; |
323 | STACK_BUFFER_DECL(CFIndex, locali, count <= 4096 ? count : 1); | |
324 | CFIndex *indexes = (count <= 4096) ? locali : (CFIndex *)malloc(count * sizeof(CFIndex)); | |
325 | CFSortIndexes(indexes, count, kCFSortStable, ^(CFIndex a, CFIndex b) { return comparator((char *)list + a * elementSize, (char *)list + b * elementSize, context); }); | |
326 | STACK_BUFFER_DECL(uint8_t, locals, count <= (16 * 1024 / elementSize) ? count * elementSize : 1); | |
327 | void *store = (count <= (16 * 1024 / elementSize)) ? locals : malloc(count * elementSize); | |
cf7d2af9 A |
328 | for (CFIndex idx = 0; idx < count; idx++) { |
329 | if (sizeof(uintptr_t) == elementSize) { | |
330 | uintptr_t *a = (uintptr_t *)list + indexes[idx]; | |
331 | uintptr_t *b = (uintptr_t *)store + idx; | |
332 | *b = *a; | |
333 | } else { | |
334 | memmove((char *)store + idx * elementSize, (char *)list + indexes[idx] * elementSize, elementSize); | |
335 | } | |
336 | } | |
337 | // no swapping or modification of the original list has occurred until this point | |
338 | objc_memmove_collectable(list, store, count * elementSize); | |
8ca704e1 A |
339 | if (locals != store) free(store); |
340 | if (locali != indexes) free(indexes); | |
cf7d2af9 | 341 | } |
bd5b749c | 342 | |
bd5b749c | 343 |