2 * Copyright (c) 2011 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
25 Copyright (c) 1999-2011, Apple Inc. All rights reserved.
26 Responsibility: Christopher Kane
29 #include <CoreFoundation/CFBase.h>
30 #include "CFInternal.h"
31 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS
32 #include <dispatch/dispatch.h>
36 kCFSortConcurrent
= (1 << 0),
37 kCFSortStable
= (1 << 4),
40 typedef CFIndex VALUE_TYPE
;
41 typedef CFIndex INDEX_TYPE
;
42 typedef CFComparisonResult CMP_RESULT_TYPE
;
43 typedef CMP_RESULT_TYPE (^COMPARATOR_BLOCK
)(VALUE_TYPE
, VALUE_TYPE
);
46 Number of elements in a list and expected number of compares,
47 when the initial short-circuiting compare is not done.
62 static void __CFSimpleMerge(VALUE_TYPE listp
[], INDEX_TYPE cnt1
, INDEX_TYPE cnt2
, VALUE_TYPE tmp
[], COMPARATOR_BLOCK cmp
) {
63 if (cnt1
<= 0 || cnt2
<= 0) return;
64 // if the last element of listp1 <= the first of listp2, lists are already ordered
65 if (16 < cnt1
+ cnt2
&& cmp(listp
[cnt1
- 1], listp
[cnt1
]) <= 0) return;
67 INDEX_TYPE idx
= 0, idx1
= 0, idx2
= cnt1
;
71 listp
[idx
] = tmp
[idx
];
75 if (cnt1
+ cnt2
<= idx2
) {
76 for (INDEX_TYPE t
= cnt1
+ cnt2
- 1; idx
<= t
; t
--) {
77 listp
[t
] = listp
[t
- cnt2
];
80 listp
[idx
] = tmp
[idx
];
84 VALUE_TYPE v1
= listp
[idx1
], v2
= listp
[idx2
];
85 if (cmp(v1
, v2
) <= 0) {
96 static void __CFSimpleMergeSort(VALUE_TYPE listp
[], INDEX_TYPE cnt
, VALUE_TYPE tmp
[], COMPARATOR_BLOCK cmp
) {
99 } else if (2 == cnt
) {
100 VALUE_TYPE v0
= listp
[0], v1
= listp
[1];
101 if (0 < cmp(v0
, v1
)) {
105 } else if (3 == cnt
) {
106 VALUE_TYPE v0
= listp
[0], v1
= listp
[1], v2
= listp
[2], vt
;
107 if (0 < cmp(v0
, v1
)) {
112 if (0 < cmp(v1
, v2
)) {
116 if (0 < cmp(v0
, v1
)) {
126 INDEX_TYPE half_cnt
= cnt
/ 2;
127 __CFSimpleMergeSort(listp
, half_cnt
, tmp
, cmp
);
128 __CFSimpleMergeSort(listp
+ half_cnt
, cnt
- half_cnt
, tmp
, cmp
);
129 __CFSimpleMerge(listp
, half_cnt
, cnt
- half_cnt
, tmp
, cmp
);
133 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS
134 // Excluded from linux for dispatch dependency
136 // if !right, put the cnt1 smallest values in tmp, else put the cnt2 largest values in tmp
137 static void __CFSortIndexesNMerge(VALUE_TYPE listp1
[], INDEX_TYPE cnt1
, VALUE_TYPE listp2
[], INDEX_TYPE cnt2
, VALUE_TYPE tmp
[], size_t right
, COMPARATOR_BLOCK cmp
) {
138 // if the last element of listp1 <= the first of listp2, lists are already ordered
139 if (16 < cnt1
+ cnt2
&& cmp(listp1
[cnt1
- 1], listp2
[0]) <= 0) {
140 memmove(tmp
, (right
? listp2
: listp1
), (right
? cnt2
: cnt1
) * sizeof(VALUE_TYPE
));
145 VALUE_TYPE
*listp1_end
= listp1
;
146 VALUE_TYPE
*listp2_end
= listp2
;
147 VALUE_TYPE
*tmp_end
= tmp
;
151 while (tmp_end
< tmp
) {
153 if (listp2
< listp2_end
) {
156 } else if (listp1
< listp1_end
) {
160 VALUE_TYPE v1
= *listp1
, v2
= *listp2
;
161 CMP_RESULT_TYPE res
= cmp(v1
, v2
);
172 VALUE_TYPE
*listp1_end
= listp1
+ cnt1
;
173 VALUE_TYPE
*listp2_end
= listp2
+ cnt2
;
174 VALUE_TYPE
*tmp_end
= tmp
+ cnt1
;
175 while (tmp
< tmp_end
) {
176 if (listp2_end
<= listp2
) {
179 } else if (listp1_end
<= listp1
) {
183 VALUE_TYPE v1
= *listp1
, v2
= *listp2
;
184 CMP_RESULT_TYPE res
= cmp(v1
, v2
);
198 /* Merging algorithm based on
199 "A New Parallel Sorting Algorithm based on Odd-Even Mergesort", Ezequiel Herruzo, et al
201 static void __CFSortIndexesN(VALUE_TYPE listp
[], INDEX_TYPE count
, int32_t ncores
, CMP_RESULT_TYPE (^cmp
)(INDEX_TYPE
, INDEX_TYPE
)) {
202 /* Divide the array up into up to ncores, multiple-of-16-sized, chunks */
203 INDEX_TYPE sz
= ((((count
+ ncores
- 1) / ncores
) + 15) / 16) * 16;
204 INDEX_TYPE num_sect
= (count
+ sz
- 1) / sz
;
205 INDEX_TYPE last_sect_len
= count
+ sz
- sz
* num_sect
;
207 STACK_BUFFER_DECL(VALUE_TYPE
*, stack_tmps
, num_sect
);
208 for (INDEX_TYPE idx
= 0; idx
< num_sect
; idx
++) {
209 stack_tmps
[idx
] = (VALUE_TYPE
*)malloc(sz
* sizeof(VALUE_TYPE
));
211 VALUE_TYPE
**tmps
= stack_tmps
;
213 dispatch_queue_t q
= dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT
, DISPATCH_QUEUE_OVERCOMMIT
);
214 dispatch_apply(num_sect
, q
, ^(size_t sect
) {
215 INDEX_TYPE sect_len
= (sect
< num_sect
- 1) ? sz
: last_sect_len
;
216 __CFSimpleMergeSort(listp
+ sect
* sz
, sect_len
, tmps
[sect
], cmp
); // naturally stable
219 INDEX_TYPE even_phase_cnt
= ((num_sect
/ 2) * 2);
220 INDEX_TYPE odd_phase_cnt
= (((num_sect
- 1) / 2) * 2);
221 for (INDEX_TYPE idx
= 0; idx
< (num_sect
+ 1) / 2; idx
++) {
222 dispatch_apply(even_phase_cnt
, q
, ^(size_t sect
) { // merge even
223 size_t right
= sect
& (size_t)0x1;
224 VALUE_TYPE
*left_base
= listp
+ sect
* sz
- (right
? sz
: 0);
225 VALUE_TYPE
*right_base
= listp
+ sect
* sz
+ (right
? 0 : sz
);
226 INDEX_TYPE sect2_len
= (sect
+ 1 + (right
? 0 : 1) == num_sect
) ? last_sect_len
: sz
;
227 __CFSortIndexesNMerge(left_base
, sz
, right_base
, sect2_len
, tmps
[sect
], right
, cmp
);
229 if (num_sect
& 0x1) {
230 memmove(tmps
[num_sect
- 1], listp
+ (num_sect
- 1) * sz
, last_sect_len
* sizeof(VALUE_TYPE
));
232 dispatch_apply(odd_phase_cnt
, q
, ^(size_t sect
) { // merge odd
233 size_t right
= sect
& (size_t)0x1;
234 VALUE_TYPE
*left_base
= tmps
[sect
+ (right
? 0 : 1)];
235 VALUE_TYPE
*right_base
= tmps
[sect
+ (right
? 1 : 2)];
236 INDEX_TYPE sect2_len
= (sect
+ 1 + (right
? 1 : 2) == num_sect
) ? last_sect_len
: sz
;
237 __CFSortIndexesNMerge(left_base
, sz
, right_base
, sect2_len
, listp
+ sect
* sz
+ sz
, right
, cmp
);
239 memmove(listp
+ 0 * sz
, tmps
[0], sz
* sizeof(VALUE_TYPE
));
240 if (!(num_sect
& 0x1)) {
241 memmove(listp
+ (num_sect
- 1) * sz
, tmps
[num_sect
- 1], last_sect_len
* sizeof(VALUE_TYPE
));
245 for (INDEX_TYPE idx
= 0; idx
< num_sect
; idx
++) {
246 free(stack_tmps
[idx
]);
251 // fills an array of indexes (of length count) giving the indexes 0 - count-1, as sorted by the comparator block
252 void CFSortIndexes(CFIndex
*indexBuffer
, CFIndex count
, CFOptionFlags opts
, CFComparisonResult (^cmp
)(CFIndex
, CFIndex
)) {
253 if (count
< 1) return;
254 if (INTPTR_MAX
/ sizeof(CFIndex
) < count
) return;
256 if (opts
& kCFSortConcurrent
) {
257 ncores
= __CFActiveProcessorCount();
258 if (count
< 160 || ncores
< 2) {
259 opts
= (opts
& ~kCFSortConcurrent
);
260 } else if (count
< 640 && 2 < ncores
) {
262 } else if (count
< 3200 && 4 < ncores
) {
264 } else if (count
< 16000 && 8 < ncores
) {
271 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS
272 if (count
<= 65536) {
273 for (CFIndex idx
= 0; idx
< count
; idx
++) indexBuffer
[idx
] = idx
;
275 /* Specifically hard-coded to 8; the count has to be very large before more chunks and/or cores is worthwhile. */
276 CFIndex sz
= ((((size_t)count
+ 15) / 16) * 16) / 8;
277 dispatch_apply(8, dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT
, DISPATCH_QUEUE_OVERCOMMIT
), ^(size_t n
) {
278 CFIndex idx
= n
* sz
, lim
= __CFMin(idx
+ sz
, count
);
279 for (; idx
< lim
; idx
++) indexBuffer
[idx
] = idx
;
283 for (CFIndex idx
= 0; idx
< count
; idx
++) indexBuffer
[idx
] = idx
;
285 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS
286 if (opts
& kCFSortConcurrent
) {
287 __CFSortIndexesN(indexBuffer
, count
, ncores
, cmp
); // naturally stable
291 STACK_BUFFER_DECL(VALUE_TYPE
, local
, count
<= 4096 ? count
: 1);
292 VALUE_TYPE
*tmp
= (count
<= 4096) ? local
: (VALUE_TYPE
*)malloc(count
* sizeof(VALUE_TYPE
));
293 __CFSimpleMergeSort(indexBuffer
, count
, tmp
, cmp
); // naturally stable
294 if (local
!= tmp
) free(tmp
);
297 /* Comparator is passed the address of the values. */
298 void CFQSortArray(void *list
, CFIndex count
, CFIndex elementSize
, CFComparatorFunction comparator
, void *context
) {
299 if (count
< 2 || elementSize
< 1) return;
300 STACK_BUFFER_DECL(CFIndex
, locali
, count
<= 4096 ? count
: 1);
301 CFIndex
*indexes
= (count
<= 4096) ? locali
: (CFIndex
*)malloc(count
* sizeof(CFIndex
));
302 CFSortIndexes(indexes
, count
, 0, ^(CFIndex a
, CFIndex b
) { return comparator((char *)list
+ a
* elementSize
, (char *)list
+ b
* elementSize
, context
); });
303 STACK_BUFFER_DECL(uint8_t, locals
, count
<= (16 * 1024 / elementSize
) ? count
* elementSize
: 1);
304 void *store
= (count
<= (16 * 1024 / elementSize
)) ? locals
: malloc(count
* elementSize
);
305 for (CFIndex idx
= 0; idx
< count
; idx
++) {
306 if (sizeof(uintptr_t) == elementSize
) {
307 uintptr_t *a
= (uintptr_t *)list
+ indexes
[idx
];
308 uintptr_t *b
= (uintptr_t *)store
+ idx
;
311 memmove((char *)store
+ idx
* elementSize
, (char *)list
+ indexes
[idx
] * elementSize
, elementSize
);
314 // no swapping or modification of the original list has occurred until this point
315 objc_memmove_collectable(list
, store
, count
* elementSize
);
316 if (locals
!= store
) free(store
);
317 if (locali
!= indexes
) free(indexes
);
320 /* Comparator is passed the address of the values. */
321 void CFMergeSortArray(void *list
, CFIndex count
, CFIndex elementSize
, CFComparatorFunction comparator
, void *context
) {
322 if (count
< 2 || elementSize
< 1) return;
323 STACK_BUFFER_DECL(CFIndex
, locali
, count
<= 4096 ? count
: 1);
324 CFIndex
*indexes
= (count
<= 4096) ? locali
: (CFIndex
*)malloc(count
* sizeof(CFIndex
));
325 CFSortIndexes(indexes
, count
, kCFSortStable
, ^(CFIndex a
, CFIndex b
) { return comparator((char *)list
+ a
* elementSize
, (char *)list
+ b
* elementSize
, context
); });
326 STACK_BUFFER_DECL(uint8_t, locals
, count
<= (16 * 1024 / elementSize
) ? count
* elementSize
: 1);
327 void *store
= (count
<= (16 * 1024 / elementSize
)) ? locals
: malloc(count
* elementSize
);
328 for (CFIndex idx
= 0; idx
< count
; idx
++) {
329 if (sizeof(uintptr_t) == elementSize
) {
330 uintptr_t *a
= (uintptr_t *)list
+ indexes
[idx
];
331 uintptr_t *b
= (uintptr_t *)store
+ idx
;
334 memmove((char *)store
+ idx
* elementSize
, (char *)list
+ indexes
[idx
] * elementSize
, elementSize
);
337 // no swapping or modification of the original list has occurred until this point
338 objc_memmove_collectable(list
, store
, count
* elementSize
);
339 if (locals
!= store
) free(store
);
340 if (locali
!= indexes
) free(indexes
);