]>
Commit | Line | Data |
---|---|---|
374ca955 A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
57a6839d | 4 | * Copyright (C) 2003-2013, International Business Machines |
374ca955 A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: uarrsort.c | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2003aug04 | |
14 | * created by: Markus W. Scherer | |
15 | * | |
16 | * Internal function for sorting arrays. | |
17 | */ | |
18 | ||
19 | #include "unicode/utypes.h" | |
20 | #include "cmemory.h" | |
21 | #include "uarrsort.h" | |
22 | ||
23 | enum { | |
57a6839d A |
24 | /** |
25 | * "from Knuth" | |
26 | * | |
27 | * A binary search over 8 items performs 4 comparisons: | |
28 | * log2(8)=3 to subdivide, +1 to check for equality. | |
29 | * A linear search over 8 items on average also performs 4 comparisons. | |
30 | */ | |
31 | MIN_QSORT=9, | |
374ca955 A |
32 | STACK_ITEM_SIZE=200 |
33 | }; | |
34 | ||
35 | /* UComparator convenience implementations ---------------------------------- */ | |
36 | ||
37 | U_CAPI int32_t U_EXPORT2 | |
38 | uprv_uint16Comparator(const void *context, const void *left, const void *right) { | |
39 | return (int32_t)*(const uint16_t *)left - (int32_t)*(const uint16_t *)right; | |
40 | } | |
41 | ||
42 | U_CAPI int32_t U_EXPORT2 | |
43 | uprv_int32Comparator(const void *context, const void *left, const void *right) { | |
44 | return *(const int32_t *)left - *(const int32_t *)right; | |
45 | } | |
46 | ||
47 | U_CAPI int32_t U_EXPORT2 | |
48 | uprv_uint32Comparator(const void *context, const void *left, const void *right) { | |
49 | uint32_t l=*(const uint32_t *)left, r=*(const uint32_t *)right; | |
50 | ||
51 | /* compare directly because (l-r) would overflow the int32_t result */ | |
52 | if(l<r) { | |
53 | return -1; | |
54 | } else if(l==r) { | |
55 | return 0; | |
56 | } else /* l>r */ { | |
57 | return 1; | |
58 | } | |
59 | } | |
60 | ||
57a6839d | 61 | /* Insertion sort using binary search --------------------------------------- */ |
374ca955 | 62 | |
57a6839d A |
63 | U_CAPI int32_t U_EXPORT2 |
64 | uprv_stableBinarySearch(char *array, int32_t limit, void *item, int32_t itemSize, | |
65 | UComparator *cmp, const void *context) { | |
66 | int32_t start=0; | |
67 | UBool found=FALSE; | |
68 | ||
69 | /* Binary search until we get down to a tiny sub-array. */ | |
70 | while((limit-start)>=MIN_QSORT) { | |
71 | int32_t i=(start+limit)/2; | |
72 | int32_t diff=cmp(context, item, array+i*itemSize); | |
73 | if(diff==0) { | |
74 | /* | |
75 | * Found the item. We look for the *last* occurrence of such | |
76 | * an item, for stable sorting. | |
77 | * If we knew that there will be only few equal items, | |
78 | * we could break now and enter the linear search. | |
79 | * However, if there are many equal items, then it should be | |
80 | * faster to continue with the binary search. | |
81 | * It seems likely that we either have all unique items | |
82 | * (where found will never become TRUE in the insertion sort) | |
83 | * or potentially many duplicates. | |
84 | */ | |
85 | found=TRUE; | |
86 | start=i+1; | |
87 | } else if(diff<0) { | |
88 | limit=i; | |
89 | } else { | |
90 | start=i; | |
91 | } | |
92 | } | |
374ca955 | 93 | |
57a6839d A |
94 | /* Linear search over the remaining tiny sub-array. */ |
95 | while(start<limit) { | |
96 | int32_t diff=cmp(context, item, array+start*itemSize); | |
97 | if(diff==0) { | |
98 | found=TRUE; | |
99 | } else if(diff<0) { | |
100 | break; | |
101 | } | |
102 | ++start; | |
103 | } | |
104 | return found ? (start-1) : ~start; | |
105 | } | |
374ca955 | 106 | |
57a6839d A |
107 | static void |
108 | doInsertionSort(char *array, int32_t length, int32_t itemSize, | |
109 | UComparator *cmp, const void *context, void *pv) { | |
110 | int32_t j; | |
374ca955 | 111 | |
57a6839d A |
112 | for(j=1; j<length; ++j) { |
113 | char *item=array+j*itemSize; | |
114 | int32_t insertionPoint=uprv_stableBinarySearch(array, j, item, itemSize, cmp, context); | |
115 | if(insertionPoint<0) { | |
116 | insertionPoint=~insertionPoint; | |
117 | } else { | |
118 | ++insertionPoint; /* one past the last equal item */ | |
374ca955 | 119 | } |
57a6839d A |
120 | if(insertionPoint<j) { |
121 | char *dest=array+insertionPoint*itemSize; | |
122 | uprv_memcpy(pv, item, itemSize); /* v=array[j] */ | |
a62d09fc | 123 | uprv_memmove(dest+itemSize, dest, (j-insertionPoint)*(size_t)itemSize); |
57a6839d | 124 | uprv_memcpy(dest, pv, itemSize); /* array[insertionPoint]=v */ |
374ca955 A |
125 | } |
126 | } | |
127 | } | |
128 | ||
129 | static void | |
130 | insertionSort(char *array, int32_t length, int32_t itemSize, | |
131 | UComparator *cmp, const void *context, UErrorCode *pErrorCode) { | |
132 | UAlignedMemory v[STACK_ITEM_SIZE/sizeof(UAlignedMemory)+1]; | |
133 | void *pv; | |
134 | ||
135 | /* allocate an intermediate item variable (v) */ | |
136 | if(itemSize<=STACK_ITEM_SIZE) { | |
137 | pv=v; | |
138 | } else { | |
139 | pv=uprv_malloc(itemSize); | |
140 | if(pv==NULL) { | |
141 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
142 | return; | |
143 | } | |
144 | } | |
145 | ||
57a6839d | 146 | doInsertionSort(array, length, itemSize, cmp, context, pv); |
374ca955 A |
147 | |
148 | if(pv!=v) { | |
149 | uprv_free(pv); | |
150 | } | |
151 | } | |
152 | ||
153 | /* QuickSort ---------------------------------------------------------------- */ | |
154 | ||
155 | /* | |
156 | * This implementation is semi-recursive: | |
157 | * It recurses for the smaller sub-array to shorten the recursion depth, | |
158 | * and loops for the larger sub-array. | |
159 | * | |
160 | * Loosely after QuickSort algorithms in | |
161 | * Niklaus Wirth | |
162 | * Algorithmen und Datenstrukturen mit Modula-2 | |
163 | * B.G. Teubner Stuttgart | |
164 | * 4. Auflage 1986 | |
165 | * ISBN 3-519-02260-5 | |
166 | */ | |
167 | static void | |
168 | subQuickSort(char *array, int32_t start, int32_t limit, int32_t itemSize, | |
169 | UComparator *cmp, const void *context, | |
170 | void *px, void *pw) { | |
171 | int32_t left, right; | |
172 | ||
173 | /* start and left are inclusive, limit and right are exclusive */ | |
174 | do { | |
175 | if((start+MIN_QSORT)>=limit) { | |
57a6839d | 176 | doInsertionSort(array+start*itemSize, limit-start, itemSize, cmp, context, px); |
374ca955 A |
177 | break; |
178 | } | |
179 | ||
180 | left=start; | |
181 | right=limit; | |
182 | ||
183 | /* x=array[middle] */ | |
a62d09fc | 184 | uprv_memcpy(px, array+(size_t)((start+limit)/2)*itemSize, itemSize); |
374ca955 A |
185 | |
186 | do { | |
187 | while(/* array[left]<x */ | |
188 | cmp(context, array+left*itemSize, px)<0 | |
189 | ) { | |
190 | ++left; | |
191 | } | |
192 | while(/* x<array[right-1] */ | |
193 | cmp(context, px, array+(right-1)*itemSize)<0 | |
194 | ) { | |
195 | --right; | |
196 | } | |
197 | ||
198 | /* swap array[left] and array[right-1] via w; ++left; --right */ | |
199 | if(left<right) { | |
200 | --right; | |
201 | ||
202 | if(left<right) { | |
a62d09fc A |
203 | uprv_memcpy(pw, array+(size_t)left*itemSize, itemSize); |
204 | uprv_memcpy(array+(size_t)left*itemSize, array+(size_t)right*itemSize, itemSize); | |
205 | uprv_memcpy(array+(size_t)right*itemSize, pw, itemSize); | |
374ca955 A |
206 | } |
207 | ||
208 | ++left; | |
209 | } | |
210 | } while(left<right); | |
211 | ||
212 | /* sort sub-arrays */ | |
213 | if((right-start)<(limit-left)) { | |
214 | /* sort [start..right[ */ | |
215 | if(start<(right-1)) { | |
216 | subQuickSort(array, start, right, itemSize, cmp, context, px, pw); | |
217 | } | |
218 | ||
219 | /* sort [left..limit[ */ | |
220 | start=left; | |
221 | } else { | |
222 | /* sort [left..limit[ */ | |
223 | if(left<(limit-1)) { | |
224 | subQuickSort(array, left, limit, itemSize, cmp, context, px, pw); | |
225 | } | |
226 | ||
227 | /* sort [start..right[ */ | |
228 | limit=right; | |
229 | } | |
230 | } while(start<(limit-1)); | |
231 | } | |
232 | ||
233 | static void | |
234 | quickSort(char *array, int32_t length, int32_t itemSize, | |
235 | UComparator *cmp, const void *context, UErrorCode *pErrorCode) { | |
236 | UAlignedMemory xw[(2*STACK_ITEM_SIZE)/sizeof(UAlignedMemory)+1]; | |
237 | void *p; | |
238 | ||
239 | /* allocate two intermediate item variables (x and w) */ | |
240 | if(itemSize<=STACK_ITEM_SIZE) { | |
241 | p=xw; | |
242 | } else { | |
243 | p=uprv_malloc(2*itemSize); | |
244 | if(p==NULL) { | |
245 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
246 | return; | |
247 | } | |
248 | } | |
249 | ||
250 | subQuickSort(array, 0, length, itemSize, | |
251 | cmp, context, p, (char *)p+itemSize); | |
252 | ||
253 | if(p!=xw) { | |
254 | uprv_free(p); | |
255 | } | |
256 | } | |
257 | ||
258 | /* uprv_sortArray() API ----------------------------------------------------- */ | |
259 | ||
260 | /* | |
261 | * Check arguments, select an appropriate implementation, | |
262 | * cast the array to char * so that array+i*itemSize works. | |
263 | */ | |
264 | U_CAPI void U_EXPORT2 | |
265 | uprv_sortArray(void *array, int32_t length, int32_t itemSize, | |
266 | UComparator *cmp, const void *context, | |
267 | UBool sortStable, UErrorCode *pErrorCode) { | |
268 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
269 | return; | |
270 | } | |
271 | if((length>0 && array==NULL) || length<0 || itemSize<=0 || cmp==NULL) { | |
272 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
273 | return; | |
274 | } | |
275 | ||
276 | if(length<=1) { | |
277 | return; | |
278 | } else if(length<MIN_QSORT || sortStable) { | |
279 | insertionSort((char *)array, length, itemSize, cmp, context, pErrorCode); | |
374ca955 A |
280 | } else { |
281 | quickSort((char *)array, length, itemSize, cmp, context, pErrorCode); | |
282 | } | |
283 | } |