]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
b75a7d8f A |
3 | /******************************************************************** |
4 | * COPYRIGHT: | |
57a6839d | 5 | * Copyright (c) 1997-2014, International Business Machines Corporation and |
b75a7d8f A |
6 | * others. All Rights Reserved. |
7 | ********************************************************************/ | |
46f4442e | 8 | /******************************************************************************* |
b75a7d8f A |
9 | * |
10 | * File CALLCOLL.C | |
11 | * | |
12 | * Modification History: | |
13 | * Name Description | |
14 | * Madhu Katragadda Ported for C API | |
46f4442e | 15 | ******************************************************************************** |
b75a7d8f A |
16 | */ |
17 | ||
18 | /* | |
19 | * Important: This file is included into intltest/allcoll.cpp so that the | |
20 | * test data is shared. This makes it easier to maintain the test data, | |
21 | * especially since the Unicode data must be portable and quoted character | |
22 | * literals will not work. | |
23 | * If it is included, then there will be a #define INCLUDE_CALLCOLL_C | |
24 | * that must prevent the actual code in here from being part of the | |
25 | * allcoll.cpp compilation. | |
26 | */ | |
27 | ||
28 | /** | |
29 | * CollationDummyTest is a third level test class. This tests creation of | |
30 | * a customized collator object. For example, number 1 to be sorted | |
31 | * equlivalent to word 'one'. | |
32 | */ | |
33 | ||
34 | #include <string.h> | |
35 | #include <stdlib.h> | |
36 | ||
37 | #include "unicode/utypes.h" | |
38 | ||
39 | #if !UCONFIG_NO_COLLATION | |
40 | ||
41 | #include "unicode/ucol.h" | |
42 | #include "unicode/uloc.h" | |
46f4442e A |
43 | #include "unicode/ures.h" |
44 | #include "unicode/udata.h" | |
b75a7d8f A |
45 | #include "unicode/ucoleitr.h" |
46 | #include "unicode/ustring.h" | |
374ca955 A |
47 | #include "unicode/uclean.h" |
48 | #include "unicode/putil.h" | |
46f4442e | 49 | #include "unicode/uenum.h" |
b75a7d8f A |
50 | |
51 | #include "cintltst.h" | |
52 | #include "ccolltst.h" | |
53 | #include "callcoll.h" | |
54 | #include "calldata.h" | |
55 | #include "cstring.h" | |
56 | #include "cmemory.h" | |
b75a7d8f | 57 | |
46f4442e A |
58 | /* set to 1 to test offsets in backAndForth() */ |
59 | #define TEST_OFFSETS 0 | |
60 | ||
b75a7d8f A |
61 | /* perform test with strength PRIMARY */ |
62 | static void TestPrimary(void); | |
63 | ||
64 | /* perform test with strength SECONDARY */ | |
65 | static void TestSecondary(void); | |
66 | ||
67 | /* perform test with strength tertiary */ | |
68 | static void TestTertiary(void); | |
69 | ||
70 | /*perform tests with strength Identical */ | |
71 | static void TestIdentical(void); | |
72 | ||
73 | /* perform extra tests */ | |
74 | static void TestExtra(void); | |
75 | ||
76 | /* Test jitterbug 581 */ | |
77 | static void TestJB581(void); | |
78 | ||
79 | /* Test jitterbug 1401 */ | |
80 | static void TestJB1401(void); | |
81 | ||
82 | /* Test [variable top] in the rule syntax */ | |
83 | static void TestVariableTop(void); | |
84 | ||
85 | /* Test surrogates */ | |
86 | static void TestSurrogates(void); | |
87 | ||
88 | static void TestInvalidRules(void); | |
89 | ||
90 | static void TestJitterbug1098(void); | |
91 | ||
374ca955 A |
92 | static void TestFCDCrash(void); |
93 | ||
46f4442e A |
94 | static void TestJ5298(void); |
95 | ||
b75a7d8f A |
96 | const UCollationResult results[] = { |
97 | UCOL_LESS, | |
98 | UCOL_LESS, /*UCOL_GREATER,*/ | |
99 | UCOL_LESS, | |
100 | UCOL_LESS, | |
101 | UCOL_LESS, | |
102 | UCOL_LESS, | |
103 | UCOL_LESS, | |
104 | UCOL_GREATER, | |
105 | UCOL_GREATER, | |
106 | UCOL_LESS, /* 10 */ | |
107 | UCOL_GREATER, | |
108 | UCOL_LESS, | |
109 | UCOL_GREATER, | |
110 | UCOL_GREATER, | |
111 | UCOL_LESS, | |
112 | UCOL_LESS, | |
113 | UCOL_LESS, | |
114 | /* test primary > 17 */ | |
115 | UCOL_EQUAL, | |
116 | UCOL_EQUAL, | |
117 | UCOL_EQUAL, /* 20 */ | |
118 | UCOL_LESS, | |
119 | UCOL_LESS, | |
120 | UCOL_EQUAL, | |
121 | UCOL_EQUAL, | |
122 | UCOL_EQUAL, | |
123 | UCOL_LESS, | |
124 | /* test secondary > 26 */ | |
125 | UCOL_EQUAL, | |
126 | UCOL_EQUAL, | |
127 | UCOL_EQUAL, | |
128 | UCOL_EQUAL, | |
129 | UCOL_EQUAL, /* 30 */ | |
130 | UCOL_EQUAL, | |
131 | UCOL_LESS, | |
132 | UCOL_EQUAL, /* 34 */ | |
133 | UCOL_EQUAL, | |
134 | UCOL_EQUAL, | |
135 | UCOL_LESS /* 37 */ | |
136 | }; | |
137 | ||
138 | ||
73c04bcf A |
139 | static |
140 | void uprv_appendByteToHexString(char *dst, uint8_t val) { | |
141 | uint32_t len = (uint32_t)uprv_strlen(dst); | |
142 | *(dst+len) = T_CString_itosOffset((val >> 4)); | |
143 | *(dst+len+1) = T_CString_itosOffset((val & 0xF)); | |
144 | *(dst+len+2) = 0; | |
145 | } | |
146 | ||
147 | /* this function makes a string with representation of a sortkey */ | |
148 | static char* U_EXPORT2 sortKeyToString(const UCollator *coll, const uint8_t *sortkey, char *buffer, uint32_t *len) { | |
46f4442e A |
149 | int32_t strength = UCOL_PRIMARY; |
150 | uint32_t res_size = 0; | |
151 | UBool doneCase = FALSE; | |
729e4ab9 | 152 | UErrorCode errorCode = U_ZERO_ERROR; |
73c04bcf | 153 | |
46f4442e A |
154 | char *current = buffer; |
155 | const uint8_t *currentSk = sortkey; | |
73c04bcf | 156 | |
46f4442e | 157 | uprv_strcpy(current, "["); |
73c04bcf | 158 | |
729e4ab9 | 159 | while(strength <= UCOL_QUATERNARY && strength <= ucol_getStrength(coll)) { |
46f4442e A |
160 | if(strength > UCOL_PRIMARY) { |
161 | uprv_strcat(current, " . "); | |
162 | } | |
163 | while(*currentSk != 0x01 && *currentSk != 0x00) { /* print a level */ | |
164 | uprv_appendByteToHexString(current, *currentSk++); | |
165 | uprv_strcat(current, " "); | |
166 | } | |
729e4ab9 | 167 | if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, &errorCode) == UCOL_ON && strength == UCOL_SECONDARY && doneCase == FALSE) { |
46f4442e | 168 | doneCase = TRUE; |
729e4ab9 | 169 | } else if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, &errorCode) == UCOL_OFF || doneCase == TRUE || strength != UCOL_SECONDARY) { |
46f4442e A |
170 | strength ++; |
171 | } | |
172 | if (*currentSk) { | |
173 | uprv_appendByteToHexString(current, *currentSk++); /* This should print '01' */ | |
174 | } | |
729e4ab9 | 175 | if(strength == UCOL_QUATERNARY && ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &errorCode) == UCOL_NON_IGNORABLE) { |
46f4442e A |
176 | break; |
177 | } | |
73c04bcf | 178 | } |
73c04bcf | 179 | |
729e4ab9 | 180 | if(ucol_getStrength(coll) == UCOL_IDENTICAL) { |
46f4442e A |
181 | uprv_strcat(current, " . "); |
182 | while(*currentSk != 0) { | |
183 | uprv_appendByteToHexString(current, *currentSk++); | |
184 | uprv_strcat(current, " "); | |
185 | } | |
73c04bcf | 186 | |
46f4442e A |
187 | uprv_appendByteToHexString(current, *currentSk++); |
188 | } | |
189 | uprv_strcat(current, "]"); | |
73c04bcf | 190 | |
46f4442e A |
191 | if(res_size > *len) { |
192 | return NULL; | |
193 | } | |
73c04bcf | 194 | |
46f4442e | 195 | return buffer; |
73c04bcf A |
196 | } |
197 | ||
b75a7d8f A |
198 | void addAllCollTest(TestNode** root) |
199 | { | |
b75a7d8f A |
200 | addTest(root, &TestPrimary, "tscoll/callcoll/TestPrimary"); |
201 | addTest(root, &TestSecondary, "tscoll/callcoll/TestSecondary"); | |
202 | addTest(root, &TestTertiary, "tscoll/callcoll/TestTertiary"); | |
203 | addTest(root, &TestIdentical, "tscoll/callcoll/TestIdentical"); | |
204 | addTest(root, &TestExtra, "tscoll/callcoll/TestExtra"); | |
46f4442e A |
205 | addTest(root, &TestJB581, "tscoll/callcoll/TestJB581"); |
206 | addTest(root, &TestVariableTop, "tscoll/callcoll/TestVariableTop"); | |
b75a7d8f A |
207 | addTest(root, &TestSurrogates, "tscoll/callcoll/TestSurrogates"); |
208 | addTest(root, &TestInvalidRules, "tscoll/callcoll/TestInvalidRules"); | |
46f4442e A |
209 | addTest(root, &TestJB1401, "tscoll/callcoll/TestJB1401"); |
210 | addTest(root, &TestJitterbug1098, "tscoll/callcoll/TestJitterbug1098"); | |
374ca955 | 211 | addTest(root, &TestFCDCrash, "tscoll/callcoll/TestFCDCrash"); |
46f4442e A |
212 | addTest(root, &TestJ5298, "tscoll/callcoll/TestJ5298"); |
213 | } | |
b75a7d8f | 214 | |
374ca955 A |
215 | UBool hasCollationElements(const char *locName) { |
216 | ||
217 | UErrorCode status = U_ZERO_ERROR; | |
374ca955 | 218 | |
729e4ab9 | 219 | UResourceBundle *loc = ures_open(U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "coll", locName, &status);; |
374ca955 A |
220 | |
221 | if(U_SUCCESS(status)) { | |
222 | status = U_ZERO_ERROR; | |
46f4442e A |
223 | loc = ures_getByKey(loc, "collations", loc, &status); |
224 | ures_close(loc); | |
374ca955 | 225 | if(status == U_ZERO_ERROR) { /* do the test - there are real elements */ |
374ca955 A |
226 | return TRUE; |
227 | } | |
374ca955 A |
228 | } |
229 | return FALSE; | |
230 | } | |
231 | ||
b75a7d8f A |
232 | static UCollationResult compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode *status) { |
233 | int32_t partialSKResult = 0; | |
234 | UCharIterator sIter, tIter; | |
235 | uint32_t sState[2], tState[2]; | |
236 | int32_t sSize = pieceSize, tSize = pieceSize; | |
374ca955 | 237 | /*int32_t i = 0;*/ |
b75a7d8f A |
238 | uint8_t sBuf[16384], tBuf[16384]; |
239 | if(pieceSize > 16384) { | |
240 | log_err("Partial sortkey size buffer too small. Please consider increasing the buffer!\n"); | |
241 | *status = U_BUFFER_OVERFLOW_ERROR; | |
242 | return UCOL_EQUAL; | |
243 | } | |
244 | *status = U_ZERO_ERROR; | |
245 | sState[0] = 0; sState[1] = 0; | |
246 | tState[0] = 0; tState[1] = 0; | |
247 | while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) { | |
248 | uiter_setString(&sIter, source, sLen); | |
249 | uiter_setString(&tIter, target, tLen); | |
250 | sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, status); | |
251 | tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, status); | |
252 | ||
253 | if(sState[0] != 0 || tState[0] != 0) { | |
374ca955 | 254 | /*log_verbose("State != 0 : %08X %08X\n", sState[0], tState[0]);*/ |
b75a7d8f | 255 | } |
374ca955 | 256 | /*log_verbose("%i ", i++);*/ |
b75a7d8f A |
257 | |
258 | partialSKResult = memcmp(sBuf, tBuf, pieceSize); | |
259 | } | |
260 | ||
261 | if(partialSKResult < 0) { | |
262 | return UCOL_LESS; | |
263 | } else if(partialSKResult > 0) { | |
264 | return UCOL_GREATER; | |
265 | } else { | |
266 | return UCOL_EQUAL; | |
267 | } | |
268 | } | |
269 | ||
270 | static void doTestVariant(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result) | |
271 | { | |
272 | int32_t sortklen1, sortklen2, sortklenmax, sortklenmin; | |
273 | int temp=0, gSortklen1=0,gSortklen2=0; | |
274 | UCollationResult compareResult, compareResulta, keyResult, compareResultIter = result; | |
275 | uint8_t *sortKey1, *sortKey2, *sortKey1a, *sortKey2a; | |
276 | uint32_t sLen = u_strlen(source); | |
277 | uint32_t tLen = u_strlen(target); | |
278 | char buffer[256]; | |
279 | uint32_t len; | |
280 | UErrorCode status = U_ZERO_ERROR; | |
281 | UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status); | |
282 | ||
283 | UCharIterator sIter, tIter; | |
51004dcb A |
284 | |
285 | compareResult = ucol_strcoll(myCollation, source, sLen, target, tLen); | |
286 | if (compareResult != result) { | |
287 | log_err("ucol_strcoll with explicit length returned wrong result (%i exp. %i): %s, %s\n", | |
288 | compareResult, result, aescstrdup(source,-1), aescstrdup(target,-1)); | |
289 | } | |
290 | compareResulta = ucol_strcoll(myCollation, source, -1, target, -1); | |
291 | if (compareResulta != result) { | |
292 | log_err("ucol_strcoll with null terminated strings returned wrong result (%i exp. %i): %s, %s\n", | |
293 | compareResult, result, aescstrdup(source,-1), aescstrdup(target,-1)); | |
294 | } | |
295 | ||
b75a7d8f A |
296 | uiter_setString(&sIter, source, sLen); |
297 | uiter_setString(&tIter, target, tLen); | |
298 | compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status); | |
299 | if(compareResultIter != result) { | |
729e4ab9 | 300 | log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1)); |
b75a7d8f A |
301 | } |
302 | ||
51004dcb A |
303 | /* convert the strings to UTF-8 and do try comparing with char iterator and ucol_strcollUTF8 */ |
304 | { | |
305 | char utf8Source[256], utf8Target[256]; | |
306 | int32_t utf8SourceLen = 0, utf8TargetLen = 0; | |
307 | ||
308 | u_strToUTF8(utf8Source, 256, &utf8SourceLen, source, sLen, &status); | |
309 | if(U_FAILURE(status)) { /* probably buffer is not big enough */ | |
310 | log_verbose("Src UTF-8 buffer too small! Will not compare!\n"); | |
b75a7d8f | 311 | } else { |
51004dcb A |
312 | u_strToUTF8(utf8Target, 256, &utf8TargetLen, target, tLen, &status); |
313 | if(U_SUCCESS(status)) { | |
314 | { | |
315 | /* ucol_strcollUTF8 */ | |
316 | compareResulta = ucol_strcollUTF8(myCollation, utf8Source, utf8SourceLen, utf8Target, utf8TargetLen, &status); | |
317 | if (U_FAILURE(status)) { | |
318 | log_err("Error in ucol_strcollUTF8 with explicit length\n"); | |
319 | status = U_ZERO_ERROR; | |
320 | } else if (compareResulta != result) { | |
321 | log_err("ucol_strcollUTF8 with explicit length returned wrong result (%i exp. %i): %s, %s\n", | |
322 | compareResulta, result, aescstrdup(source,-1), aescstrdup(target,-1)); | |
323 | } | |
324 | compareResulta = ucol_strcollUTF8(myCollation, utf8Source, -1, utf8Target, -1, &status); | |
325 | if (U_FAILURE(status)) { | |
326 | log_err("Error in ucol_strcollUTF8 with null terminated strings\n"); | |
327 | status = U_ZERO_ERROR; | |
328 | } else if (compareResulta != result) { | |
329 | log_err("ucol_strcollUTF8 with null terminated strings returned wrong result (%i exp. %i): %s, %s\n", | |
330 | compareResulta, result, aescstrdup(source,-1), aescstrdup(target,-1)); | |
331 | } | |
332 | } | |
333 | ||
334 | { | |
335 | /* char iterator over UTF8 */ | |
336 | UCollationResult compareResultUTF8Iter = result, compareResultUTF8IterNorm = result; | |
337 | ||
338 | uiter_setUTF8(&sIter, utf8Source, utf8SourceLen); | |
339 | uiter_setUTF8(&tIter, utf8Target, utf8TargetLen); | |
340 | compareResultUTF8Iter = ucol_strcollIter(myCollation, &sIter, &tIter, &status); | |
341 | ||
342 | ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); | |
343 | sIter.move(&sIter, 0, UITER_START); | |
344 | tIter.move(&tIter, 0, UITER_START); | |
345 | compareResultUTF8IterNorm = ucol_strcollIter(myCollation, &sIter, &tIter, &status); | |
346 | ||
347 | ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); | |
348 | if(compareResultUTF8Iter != compareResultIter) { | |
349 | log_err("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1)); | |
350 | } | |
351 | if(compareResultUTF8Iter != compareResultUTF8IterNorm) { | |
352 | log_err("different results in iterative when normalization is turned on with UTF-8 strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1)); | |
353 | } | |
354 | } | |
355 | } else { | |
356 | log_verbose("Target UTF-8 buffer too small! Did not compare!\n"); | |
357 | } | |
358 | if(U_FAILURE(status)) { | |
359 | log_verbose("UTF-8 strcoll failed! Ignoring result\n"); | |
360 | } | |
b75a7d8f | 361 | } |
b75a7d8f A |
362 | } |
363 | ||
364 | /* testing the partial sortkeys */ | |
365 | if(1) { /*!QUICK*/ | |
366 | int32_t i = 0; | |
367 | int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */ | |
368 | int32_t partialSizesSize = 1; | |
729e4ab9 | 369 | if(getTestOption(QUICK_OPTION) <= 0) { |
b75a7d8f A |
370 | partialSizesSize = 7; |
371 | } | |
374ca955 | 372 | /*log_verbose("partial sortkey test piecesize=");*/ |
b75a7d8f A |
373 | for(i = 0; i < partialSizesSize; i++) { |
374 | UCollationResult partialSKResult = result, partialNormalizedSKResult = result; | |
374ca955 | 375 | /*log_verbose("%i ", partialSizes[i]);*/ |
b75a7d8f A |
376 | |
377 | partialSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status); | |
378 | if(partialSKResult != result) { | |
73c04bcf A |
379 | log_err("Partial sortkey comparison returned wrong result (%i exp. %i): %s, %s (size %i)\n", |
380 | partialSKResult, result, | |
b75a7d8f A |
381 | aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]); |
382 | } | |
383 | ||
729e4ab9 | 384 | if(getTestOption(QUICK_OPTION) <= 0 && norm != UCOL_ON) { |
374ca955 | 385 | /*log_verbose("N ");*/ |
b75a7d8f A |
386 | ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); |
387 | partialNormalizedSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status); | |
388 | ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); | |
389 | if(partialSKResult != partialNormalizedSKResult) { | |
390 | log_err("Partial sortkey comparison gets different result when normalization is on: %s, %s (size %i)\n", | |
391 | aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]); | |
392 | } | |
393 | } | |
394 | } | |
374ca955 | 395 | /*log_verbose("\n");*/ |
b75a7d8f A |
396 | } |
397 | ||
b75a7d8f A |
398 | sortklen1=ucol_getSortKey(myCollation, source, sLen, NULL, 0); |
399 | sortklen2=ucol_getSortKey(myCollation, target, tLen, NULL, 0); | |
400 | ||
401 | sortklenmax = (sortklen1>sortklen2?sortklen1:sortklen2); | |
402 | sortklenmin = (sortklen1<sortklen2?sortklen1:sortklen2); | |
57a6839d | 403 | (void)sortklenmin; /* Suppress set but not used warning. */ |
b75a7d8f A |
404 | |
405 | sortKey1 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); | |
406 | sortKey1a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); | |
407 | ucol_getSortKey(myCollation, source, sLen, sortKey1, sortklen1+1); | |
408 | ucol_getSortKey(myCollation, source, -1, sortKey1a, sortklen1+1); | |
409 | ||
410 | sortKey2 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); | |
411 | sortKey2a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); | |
412 | ucol_getSortKey(myCollation, target, tLen, sortKey2, sortklen2+1); | |
413 | ucol_getSortKey(myCollation, target, -1, sortKey2a, sortklen2+1); | |
414 | ||
415 | /* Check that sort key generated with null terminated string is identical */ | |
416 | /* to that generted with a length specified. */ | |
417 | if (uprv_strcmp((const char *)sortKey1, (const char *)sortKey1a) != 0 || | |
418 | uprv_strcmp((const char *)sortKey2, (const char *)sortKey2a) != 0 ) { | |
419 | log_err("Sort Keys from null terminated and explicit length strings differ.\n"); | |
420 | } | |
421 | ||
422 | /*memcmp(sortKey1, sortKey2,sortklenmax);*/ | |
423 | temp= uprv_strcmp((const char *)sortKey1, (const char *)sortKey2); | |
424 | gSortklen1 = uprv_strlen((const char *)sortKey1)+1; | |
425 | gSortklen2 = uprv_strlen((const char *)sortKey2)+1; | |
426 | if(sortklen1 != gSortklen1){ | |
427 | log_err("SortKey length does not match Expected: %i Got: %i\n",sortklen1, gSortklen1); | |
73c04bcf | 428 | log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey1, buffer, &len)); |
b75a7d8f A |
429 | } |
430 | if(sortklen2!= gSortklen2){ | |
431 | log_err("SortKey length does not match Expected: %i Got: %i\n", sortklen2, gSortklen2); | |
73c04bcf | 432 | log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey2, buffer, &len)); |
b75a7d8f A |
433 | } |
434 | ||
435 | if(temp < 0) { | |
436 | keyResult=UCOL_LESS; | |
437 | } | |
438 | else if(temp > 0) { | |
439 | keyResult= UCOL_GREATER; | |
440 | } | |
441 | else { | |
442 | keyResult = UCOL_EQUAL; | |
443 | } | |
444 | reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, compareResultIter, result ); | |
445 | free(sortKey1); | |
446 | free(sortKey2); | |
447 | free(sortKey1a); | |
448 | free(sortKey2a); | |
449 | ||
450 | } | |
451 | ||
452 | void doTest(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result) | |
453 | { | |
454 | if(myCollation) { | |
455 | doTestVariant(myCollation, source, target, result); | |
456 | if(result == UCOL_LESS) { | |
457 | doTestVariant(myCollation, target, source, UCOL_GREATER); | |
458 | } else if(result == UCOL_GREATER) { | |
459 | doTestVariant(myCollation, target, source, UCOL_LESS); | |
460 | } else { | |
461 | doTestVariant(myCollation, target, source, UCOL_EQUAL); | |
462 | } | |
463 | } else { | |
464 | log_data_err("No collator! Any data around?\n"); | |
465 | } | |
466 | } | |
467 | ||
374ca955 A |
468 | |
469 | /** | |
470 | * Return an integer array containing all of the collation orders | |
471 | * returned by calls to next on the specified iterator | |
472 | */ | |
46f4442e | 473 | OrderAndOffset* getOrders(UCollationElements *iter, int32_t *orderLength) |
374ca955 A |
474 | { |
475 | UErrorCode status; | |
476 | int32_t order; | |
477 | int32_t maxSize = 100; | |
478 | int32_t size = 0; | |
46f4442e A |
479 | int32_t offset = ucol_getOffset(iter); |
480 | OrderAndOffset *temp; | |
481 | OrderAndOffset *orders =(OrderAndOffset *)malloc(sizeof(OrderAndOffset) * maxSize); | |
374ca955 A |
482 | status= U_ZERO_ERROR; |
483 | ||
484 | ||
485 | while ((order=ucol_next(iter, &status)) != UCOL_NULLORDER) | |
486 | { | |
487 | if (size == maxSize) | |
488 | { | |
489 | maxSize *= 2; | |
46f4442e | 490 | temp = (OrderAndOffset *)malloc(sizeof(OrderAndOffset) * maxSize); |
374ca955 | 491 | |
46f4442e | 492 | memcpy(temp, orders, size * sizeof(OrderAndOffset)); |
374ca955 A |
493 | free(orders); |
494 | orders = temp; | |
495 | ||
496 | } | |
497 | ||
46f4442e A |
498 | orders[size].order = order; |
499 | orders[size].offset = offset; | |
500 | ||
501 | offset = ucol_getOffset(iter); | |
502 | size += 1; | |
374ca955 A |
503 | } |
504 | ||
505 | if (maxSize > size && size > 0) | |
506 | { | |
46f4442e | 507 | temp = (OrderAndOffset *)malloc(sizeof(OrderAndOffset) * size); |
374ca955 | 508 | |
46f4442e | 509 | memcpy(temp, orders, size * sizeof(OrderAndOffset)); |
374ca955 A |
510 | free(orders); |
511 | orders = temp; | |
512 | ||
513 | ||
514 | } | |
515 | ||
516 | *orderLength = size; | |
517 | return orders; | |
518 | } | |
519 | ||
520 | ||
521 | void | |
522 | backAndForth(UCollationElements *iter) | |
523 | { | |
524 | /* Run through the iterator forwards and stick it into an array */ | |
51004dcb | 525 | int32_t idx, o; |
374ca955 A |
526 | UErrorCode status = U_ZERO_ERROR; |
527 | int32_t orderLength = 0; | |
46f4442e | 528 | OrderAndOffset *orders = getOrders(iter, &orderLength); |
374ca955 A |
529 | |
530 | ||
531 | /* Now go through it backwards and make sure we get the same values */ | |
51004dcb | 532 | idx = orderLength; |
374ca955 A |
533 | ucol_reset(iter); |
534 | ||
535 | /* synwee : changed */ | |
46f4442e | 536 | while ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER) { |
729e4ab9 A |
537 | #if TEST_OFFSETS |
538 | int32_t offset = | |
539 | #endif | |
540 | ucol_getOffset(iter); | |
46f4442e | 541 | |
51004dcb A |
542 | idx -= 1; |
543 | if (o != orders[idx].order) { | |
374ca955 | 544 | if (o == 0) |
51004dcb | 545 | idx ++; |
46f4442e | 546 | else { |
51004dcb | 547 | while (idx > 0 && orders[-- idx].order == 0) { |
46f4442e | 548 | /* nothing... */ |
374ca955 | 549 | } |
374ca955 | 550 | |
51004dcb A |
551 | if (o != orders[idx].order) { |
552 | log_err("Mismatched order at index %d: 0x%8.8X vs. 0x%8.8X\n", idx, | |
553 | orders[idx].order, o); | |
46f4442e A |
554 | goto bail; |
555 | } | |
374ca955 A |
556 | } |
557 | } | |
46f4442e A |
558 | |
559 | #if TEST_OFFSETS | |
51004dcb A |
560 | if (offset != orders[idx].offset) { |
561 | log_err("Mismatched offset at index %d: %d vs. %d\n", idx, | |
562 | orders[idx].offset, offset); | |
46f4442e A |
563 | goto bail; |
564 | } | |
565 | #endif | |
566 | ||
374ca955 A |
567 | } |
568 | ||
51004dcb A |
569 | while (idx != 0 && orders[idx - 1].order == 0) { |
570 | idx -= 1; | |
374ca955 A |
571 | } |
572 | ||
51004dcb A |
573 | if (idx != 0) { |
574 | log_err("Didn't get back to beginning - index is %d\n", idx); | |
374ca955 A |
575 | |
576 | ucol_reset(iter); | |
577 | log_err("\nnext: "); | |
46f4442e A |
578 | |
579 | if ((o = ucol_next(iter, &status)) != UCOL_NULLORDER) { | |
374ca955 A |
580 | log_err("Error at %x\n", o); |
581 | } | |
46f4442e | 582 | |
374ca955 | 583 | log_err("\nprev: "); |
46f4442e A |
584 | |
585 | if ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER) { | |
374ca955 A |
586 | log_err("Error at %x\n", o); |
587 | } | |
46f4442e | 588 | |
374ca955 A |
589 | log_verbose("\n"); |
590 | } | |
591 | ||
46f4442e | 592 | bail: |
374ca955 A |
593 | free(orders); |
594 | } | |
595 | ||
46f4442e | 596 | void genericOrderingTestWithResult(UCollator *coll, const char * const s[], uint32_t size, UCollationResult result) { |
374ca955 A |
597 | UChar t1[2048] = {0}; |
598 | UChar t2[2048] = {0}; | |
599 | UCollationElements *iter; | |
600 | UErrorCode status = U_ZERO_ERROR; | |
601 | ||
602 | uint32_t i = 0, j = 0; | |
603 | log_verbose("testing sequence:\n"); | |
604 | for(i = 0; i < size; i++) { | |
605 | log_verbose("%s\n", s[i]); | |
606 | } | |
607 | ||
608 | iter = ucol_openElements(coll, t1, u_strlen(t1), &status); | |
609 | if (U_FAILURE(status)) { | |
610 | log_err("Creation of iterator failed\n"); | |
611 | } | |
612 | for(i = 0; i < size-1; i++) { | |
613 | for(j = i+1; j < size; j++) { | |
614 | u_unescape(s[i], t1, 2048); | |
615 | u_unescape(s[j], t2, 2048); | |
616 | doTest(coll, t1, t2, result); | |
617 | /* synwee : added collation element iterator test */ | |
618 | ucol_setText(iter, t1, u_strlen(t1), &status); | |
619 | backAndForth(iter); | |
620 | ucol_setText(iter, t2, u_strlen(t2), &status); | |
621 | backAndForth(iter); | |
622 | } | |
623 | } | |
624 | ucol_closeElements(iter); | |
625 | } | |
626 | ||
46f4442e | 627 | void genericOrderingTest(UCollator *coll, const char * const s[], uint32_t size) { |
374ca955 A |
628 | genericOrderingTestWithResult(coll, s, size, UCOL_LESS); |
629 | } | |
630 | ||
46f4442e | 631 | void genericLocaleStarter(const char *locale, const char * const s[], uint32_t size) { |
374ca955 A |
632 | UErrorCode status = U_ZERO_ERROR; |
633 | UCollator *coll = ucol_open(locale, &status); | |
634 | ||
635 | log_verbose("Locale starter for %s\n", locale); | |
636 | ||
637 | if(U_SUCCESS(status)) { | |
638 | genericOrderingTest(coll, s, size); | |
639 | } else if(status == U_FILE_ACCESS_ERROR) { | |
640 | log_data_err("Is your data around?\n"); | |
641 | return; | |
642 | } else { | |
643 | log_err("Unable to open collator for locale %s\n", locale); | |
644 | } | |
645 | ucol_close(coll); | |
646 | } | |
647 | ||
46f4442e | 648 | void genericLocaleStarterWithResult(const char *locale, const char * const s[], uint32_t size, UCollationResult result) { |
374ca955 A |
649 | UErrorCode status = U_ZERO_ERROR; |
650 | UCollator *coll = ucol_open(locale, &status); | |
651 | ||
652 | log_verbose("Locale starter for %s\n", locale); | |
653 | ||
654 | if(U_SUCCESS(status)) { | |
655 | genericOrderingTestWithResult(coll, s, size, result); | |
656 | } else if(status == U_FILE_ACCESS_ERROR) { | |
657 | log_data_err("Is your data around?\n"); | |
658 | return; | |
659 | } else { | |
660 | log_err("Unable to open collator for locale %s\n", locale); | |
661 | } | |
662 | ucol_close(coll); | |
663 | } | |
664 | ||
374ca955 | 665 | /* currently not used with options */ |
46f4442e | 666 | void genericRulesStarterWithOptionsAndResult(const char *rules, const char * const s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize, UCollationResult result) { |
374ca955 A |
667 | UErrorCode status = U_ZERO_ERROR; |
668 | UChar rlz[RULE_BUFFER_LEN] = { 0 }; | |
669 | uint32_t rlen = u_unescape(rules, rlz, RULE_BUFFER_LEN); | |
670 | uint32_t i; | |
671 | ||
672 | UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status); | |
673 | ||
674 | log_verbose("Rules starter for %s\n", rules); | |
675 | ||
676 | if(U_SUCCESS(status)) { | |
677 | log_verbose("Setting attributes\n"); | |
678 | for(i = 0; i < attsize; i++) { | |
679 | ucol_setAttribute(coll, attrs[i], values[i], &status); | |
680 | } | |
681 | ||
73c04bcf | 682 | genericOrderingTestWithResult(coll, s, size, result); |
374ca955 | 683 | } else { |
729e4ab9 | 684 | log_err_status(status, "Unable to open collator with rules %s\n", rules); |
374ca955 A |
685 | } |
686 | ucol_close(coll); | |
687 | } | |
374ca955 | 688 | |
46f4442e | 689 | void genericLocaleStarterWithOptionsAndResult(const char *locale, const char * const s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize, UCollationResult result) { |
374ca955 A |
690 | UErrorCode status = U_ZERO_ERROR; |
691 | uint32_t i; | |
692 | ||
693 | UCollator *coll = ucol_open(locale, &status); | |
694 | ||
695 | log_verbose("Locale starter for %s\n", locale); | |
696 | ||
697 | if(U_SUCCESS(status)) { | |
698 | ||
699 | log_verbose("Setting attributes\n"); | |
700 | for(i = 0; i < attsize; i++) { | |
701 | ucol_setAttribute(coll, attrs[i], values[i], &status); | |
702 | } | |
703 | ||
73c04bcf | 704 | genericOrderingTestWithResult(coll, s, size, result); |
374ca955 | 705 | } else { |
729e4ab9 | 706 | log_err_status(status, "Unable to open collator for locale %s\n", locale); |
374ca955 A |
707 | } |
708 | ucol_close(coll); | |
709 | } | |
710 | ||
46f4442e | 711 | void genericLocaleStarterWithOptions(const char *locale, const char * const s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize) { |
73c04bcf A |
712 | genericLocaleStarterWithOptionsAndResult(locale, s, size, attrs, values, attsize, UCOL_LESS); |
713 | } | |
714 | ||
46f4442e | 715 | void genericRulesStarterWithResult(const char *rules, const char * const s[], uint32_t size, UCollationResult result) { |
374ca955 A |
716 | UErrorCode status = U_ZERO_ERROR; |
717 | UChar rlz[RULE_BUFFER_LEN] = { 0 }; | |
718 | uint32_t rlen = u_unescape(rules, rlz, RULE_BUFFER_LEN); | |
719 | ||
720 | UCollator *coll = NULL; | |
721 | coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status); | |
722 | log_verbose("Rules starter for %s\n", rules); | |
723 | ||
724 | if(U_SUCCESS(status)) { | |
725 | genericOrderingTestWithResult(coll, s, size, result); | |
726 | ucol_close(coll); | |
727 | } else if(status == U_FILE_ACCESS_ERROR) { | |
728 | log_data_err("Is your data around?\n"); | |
729 | } else { | |
730 | log_err("Unable to open collator with rules %s\n", rules); | |
731 | } | |
732 | } | |
733 | ||
46f4442e | 734 | void genericRulesStarter(const char *rules, const char * const s[], uint32_t size) { |
73c04bcf | 735 | genericRulesStarterWithResult(rules, s, size, UCOL_LESS); |
374ca955 A |
736 | } |
737 | ||
b75a7d8f A |
738 | static void TestTertiary() |
739 | { | |
740 | int32_t len,i; | |
b75a7d8f A |
741 | UCollator *myCollation; |
742 | UErrorCode status=U_ZERO_ERROR; | |
46f4442e A |
743 | static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 "; |
744 | UChar rules[sizeof(str)]; | |
b75a7d8f | 745 | len = strlen(str); |
b75a7d8f A |
746 | u_uastrcpy(rules, str); |
747 | ||
748 | myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status); | |
749 | if(U_FAILURE(status)){ | |
729e4ab9 | 750 | log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status)); |
46f4442e | 751 | return; |
b75a7d8f A |
752 | } |
753 | ||
754 | ucol_setStrength(myCollation, UCOL_TERTIARY); | |
755 | for (i = 0; i < 17 ; i++) | |
756 | { | |
757 | doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); | |
758 | } | |
b75a7d8f A |
759 | ucol_close(myCollation); |
760 | myCollation = 0; | |
761 | } | |
762 | ||
763 | static void TestPrimary( ) | |
764 | { | |
765 | int32_t len,i; | |
b75a7d8f A |
766 | UCollator *myCollation; |
767 | UErrorCode status=U_ZERO_ERROR; | |
46f4442e A |
768 | static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 "; |
769 | UChar rules[sizeof(str)]; | |
b75a7d8f | 770 | len = strlen(str); |
b75a7d8f A |
771 | u_uastrcpy(rules, str); |
772 | ||
773 | myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status); | |
774 | if(U_FAILURE(status)){ | |
729e4ab9 | 775 | log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status)); |
46f4442e | 776 | return; |
b75a7d8f A |
777 | } |
778 | ucol_setStrength(myCollation, UCOL_PRIMARY); | |
779 | ||
780 | for (i = 17; i < 26 ; i++) | |
781 | { | |
782 | ||
783 | doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); | |
784 | } | |
b75a7d8f A |
785 | ucol_close(myCollation); |
786 | myCollation = 0; | |
787 | } | |
788 | ||
789 | static void TestSecondary() | |
790 | { | |
791 | int32_t i; | |
792 | int32_t len; | |
b75a7d8f A |
793 | UCollator *myCollation; |
794 | UErrorCode status=U_ZERO_ERROR; | |
46f4442e A |
795 | static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 "; |
796 | UChar rules[sizeof(str)]; | |
b75a7d8f | 797 | len = strlen(str); |
b75a7d8f A |
798 | u_uastrcpy(rules, str); |
799 | ||
800 | myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status); | |
801 | if(U_FAILURE(status)){ | |
729e4ab9 | 802 | log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status)); |
46f4442e | 803 | return; |
b75a7d8f A |
804 | } |
805 | ucol_setStrength(myCollation, UCOL_SECONDARY); | |
806 | for (i = 26; i < 34 ; i++) | |
807 | { | |
808 | doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); | |
809 | } | |
b75a7d8f A |
810 | ucol_close(myCollation); |
811 | myCollation = 0; | |
812 | } | |
813 | ||
814 | static void TestIdentical() | |
815 | { | |
816 | int32_t i; | |
817 | int32_t len; | |
b75a7d8f A |
818 | UCollator *myCollation; |
819 | UErrorCode status=U_ZERO_ERROR; | |
46f4442e A |
820 | static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 "; |
821 | UChar rules[sizeof(str)]; | |
b75a7d8f | 822 | len = strlen(str); |
b75a7d8f A |
823 | u_uastrcpy(rules, str); |
824 | ||
825 | myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_IDENTICAL, NULL,&status); | |
826 | if(U_FAILURE(status)){ | |
729e4ab9 | 827 | log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status)); |
46f4442e | 828 | return; |
b75a7d8f A |
829 | } |
830 | for(i= 34; i<37; i++) | |
831 | { | |
832 | doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); | |
833 | } | |
b75a7d8f A |
834 | ucol_close(myCollation); |
835 | myCollation = 0; | |
836 | } | |
837 | ||
838 | static void TestExtra() | |
839 | { | |
840 | int32_t i, j; | |
841 | int32_t len; | |
b75a7d8f A |
842 | UCollator *myCollation; |
843 | UErrorCode status = U_ZERO_ERROR; | |
46f4442e A |
844 | static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 "; |
845 | UChar rules[sizeof(str)]; | |
b75a7d8f | 846 | len = strlen(str); |
b75a7d8f A |
847 | u_uastrcpy(rules, str); |
848 | ||
849 | myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status); | |
850 | if(U_FAILURE(status)){ | |
729e4ab9 | 851 | log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status)); |
46f4442e | 852 | return; |
b75a7d8f A |
853 | } |
854 | ucol_setStrength(myCollation, UCOL_TERTIARY); | |
855 | for (i = 0; i < COUNT_TEST_CASES-1 ; i++) | |
856 | { | |
857 | for (j = i + 1; j < COUNT_TEST_CASES; j += 1) | |
858 | { | |
859 | ||
860 | doTest(myCollation, testCases[i], testCases[j], UCOL_LESS); | |
861 | } | |
862 | } | |
b75a7d8f A |
863 | ucol_close(myCollation); |
864 | myCollation = 0; | |
865 | } | |
866 | ||
867 | static void TestJB581(void) | |
868 | { | |
b75a7d8f A |
869 | int32_t bufferLen = 0; |
870 | UChar source [100]; | |
871 | UChar target [100]; | |
872 | UCollationResult result = UCOL_EQUAL; | |
873 | uint8_t sourceKeyArray [100]; | |
874 | uint8_t targetKeyArray [100]; | |
875 | int32_t sourceKeyOut = 0, | |
876 | targetKeyOut = 0; | |
877 | UCollator *myCollator = 0; | |
878 | UErrorCode status = U_ZERO_ERROR; | |
879 | ||
880 | /*u_uastrcpy(source, "This is a test.");*/ | |
881 | /*u_uastrcpy(target, "THISISATEST.");*/ | |
882 | u_uastrcpy(source, "THISISATEST."); | |
883 | u_uastrcpy(target, "Thisisatest."); | |
884 | ||
885 | myCollator = ucol_open("en_US", &status); | |
886 | if (U_FAILURE(status)){ | |
729e4ab9 | 887 | log_err_status(status, "ERROR: Failed to create the collator : %s\n", u_errorName(status)); |
b75a7d8f A |
888 | return; |
889 | } | |
890 | result = ucol_strcoll(myCollator, source, -1, target, -1); | |
891 | /* result is 1, secondary differences only for ignorable space characters*/ | |
892 | if (result != 1) | |
893 | { | |
894 | log_err("Comparing two strings with only secondary differences in C failed.\n"); | |
895 | } | |
896 | /* To compare them with just primary differences */ | |
897 | ucol_setStrength(myCollator, UCOL_PRIMARY); | |
898 | result = ucol_strcoll(myCollator, source, -1, target, -1); | |
899 | /* result is 0 */ | |
900 | if (result != 0) | |
901 | { | |
902 | log_err("Comparing two strings with no differences in C failed.\n"); | |
903 | } | |
904 | /* Now, do the same comparison with keys */ | |
905 | sourceKeyOut = ucol_getSortKey(myCollator, source, -1, sourceKeyArray, 100); | |
57a6839d | 906 | (void)sourceKeyOut; /* Suppress set but not used warning. */ |
b75a7d8f | 907 | targetKeyOut = ucol_getSortKey(myCollator, target, -1, targetKeyArray, 100); |
b75a7d8f | 908 | bufferLen = ((targetKeyOut > 100) ? 100 : targetKeyOut); |
73c04bcf | 909 | if (memcmp(sourceKeyArray, targetKeyArray, bufferLen) != 0) |
b75a7d8f A |
910 | { |
911 | log_err("Comparing two strings with sort keys in C failed.\n"); | |
912 | } | |
913 | ucol_close(myCollator); | |
914 | } | |
915 | ||
916 | static void TestJB1401(void) | |
917 | { | |
918 | UCollator *myCollator = 0; | |
919 | UErrorCode status = U_ZERO_ERROR; | |
920 | static UChar NFD_UnsafeStartChars[] = { | |
921 | 0x0f73, /* Tibetan Vowel Sign II */ | |
922 | 0x0f75, /* Tibetan Vowel Sign UU */ | |
923 | 0x0f81, /* Tibetan Vowel Sign Reversed II */ | |
924 | 0 | |
925 | }; | |
926 | int i; | |
927 | ||
928 | ||
929 | myCollator = ucol_open("en_US", &status); | |
930 | if (U_FAILURE(status)){ | |
729e4ab9 | 931 | log_err_status(status, "ERROR: Failed to create the collator : %s\n", u_errorName(status)); |
b75a7d8f A |
932 | return; |
933 | } | |
934 | ucol_setAttribute(myCollator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); | |
935 | if (U_FAILURE(status)){ | |
936 | log_err("ERROR: Failed to set normalization mode ON for collator.\n"); | |
937 | return; | |
938 | } | |
939 | ||
940 | for (i=0; ; i++) { | |
941 | UChar c; | |
942 | UChar X[4]; | |
943 | UChar Y[20]; | |
944 | UChar Z[20]; | |
945 | ||
946 | /* Get the next funny character to be tested, and set up the | |
947 | * three test strings X, Y, Z, consisting of an A-grave + test char, | |
948 | * in original form, NFD, and then NFC form. | |
949 | */ | |
950 | c = NFD_UnsafeStartChars[i]; | |
951 | if (c==0) {break;} | |
952 | ||
953 | X[0]=0xC0; X[1]=c; X[2]=0; /* \u00C0 is A Grave*/ | |
954 | ||
955 | unorm_normalize(X, -1, UNORM_NFD, 0, Y, 20, &status); | |
956 | unorm_normalize(Y, -1, UNORM_NFC, 0, Z, 20, &status); | |
957 | if (U_FAILURE(status)){ | |
958 | log_err("ERROR: Failed to normalize test of character %x\n", c); | |
959 | return; | |
960 | } | |
961 | ||
962 | /* Collation test. All three strings should be equal. | |
963 | * doTest does both strcoll and sort keys, with params in both orders. | |
964 | */ | |
965 | doTest(myCollator, X, Y, UCOL_EQUAL); | |
966 | doTest(myCollator, X, Z, UCOL_EQUAL); | |
967 | doTest(myCollator, Y, Z, UCOL_EQUAL); | |
968 | ||
969 | /* Run collation element iterators over the three strings. Results should be same for each. | |
970 | */ | |
971 | { | |
972 | UCollationElements *ceiX, *ceiY, *ceiZ; | |
973 | int32_t ceX, ceY, ceZ; | |
974 | int j; | |
975 | ||
976 | ceiX = ucol_openElements(myCollator, X, -1, &status); | |
977 | ceiY = ucol_openElements(myCollator, Y, -1, &status); | |
978 | ceiZ = ucol_openElements(myCollator, Z, -1, &status); | |
979 | if (U_FAILURE(status)) { | |
980 | log_err("ERROR: uucol_openElements failed.\n"); | |
981 | return; | |
982 | } | |
983 | ||
984 | for (j=0;; j++) { | |
985 | ceX = ucol_next(ceiX, &status); | |
986 | ceY = ucol_next(ceiY, &status); | |
987 | ceZ = ucol_next(ceiZ, &status); | |
988 | if (U_FAILURE(status)) { | |
989 | log_err("ERROR: ucol_next failed for iteration #%d.\n", j); | |
990 | break; | |
991 | } | |
992 | if (ceX != ceY || ceY != ceZ) { | |
993 | log_err("ERROR: ucol_next failed for iteration #%d.\n", j); | |
994 | break; | |
995 | } | |
996 | if (ceX == UCOL_NULLORDER) { | |
997 | break; | |
998 | } | |
999 | } | |
1000 | ucol_closeElements(ceiX); | |
1001 | ucol_closeElements(ceiY); | |
1002 | ucol_closeElements(ceiZ); | |
1003 | } | |
1004 | } | |
1005 | ucol_close(myCollator); | |
1006 | } | |
1007 | ||
1008 | ||
1009 | ||
1010 | /** | |
1011 | * Tests the [variable top] tag in rule syntax. Since the default [alternate] | |
1012 | * tag has the value shifted, any codepoints before [variable top] should give | |
1013 | * a primary ce of 0. | |
1014 | */ | |
1015 | static void TestVariableTop(void) | |
1016 | { | |
57a6839d A |
1017 | #if 0 |
1018 | /* | |
1019 | * Starting with ICU 53, setting the variable top via a pseudo relation string | |
1020 | * is not supported any more. | |
1021 | * It was replaced by the [maxVariable symbol] setting. | |
1022 | * See ICU tickets #9958 and #8032. | |
1023 | */ | |
46f4442e | 1024 | static const char str[] = "&z = [variable top]"; |
b75a7d8f | 1025 | int len = strlen(str); |
46f4442e | 1026 | UChar rules[sizeof(str)]; |
b75a7d8f A |
1027 | UCollator *myCollation; |
1028 | UCollator *enCollation; | |
1029 | UErrorCode status = U_ZERO_ERROR; | |
1030 | UChar source[1]; | |
1031 | UChar ch; | |
1032 | uint8_t result[20]; | |
1033 | uint8_t expected[20]; | |
1034 | ||
b75a7d8f A |
1035 | u_uastrcpy(rules, str); |
1036 | ||
1037 | enCollation = ucol_open("en_US", &status); | |
46f4442e | 1038 | if (U_FAILURE(status)) { |
729e4ab9 | 1039 | log_err_status(status, "ERROR: in creation of collator :%s\n", |
46f4442e A |
1040 | myErrorName(status)); |
1041 | return; | |
1042 | } | |
b75a7d8f A |
1043 | myCollation = ucol_openRules(rules, len, UCOL_OFF, |
1044 | UCOL_PRIMARY,NULL, &status); | |
1045 | if (U_FAILURE(status)) { | |
46f4442e | 1046 | ucol_close(enCollation); |
b75a7d8f A |
1047 | log_err("ERROR: in creation of rule based collator :%s\n", |
1048 | myErrorName(status)); | |
1049 | return; | |
1050 | } | |
1051 | ||
1052 | ucol_setStrength(enCollation, UCOL_PRIMARY); | |
1053 | ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, | |
1054 | &status); | |
1055 | ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, | |
1056 | &status); | |
1057 | ||
1058 | if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) != | |
1059 | UCOL_SHIFTED || U_FAILURE(status)) { | |
1060 | log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n"); | |
1061 | } | |
1062 | ||
1063 | uprv_memset(expected, 0, 20); | |
1064 | ||
1065 | /* space is supposed to be a variable */ | |
1066 | source[0] = ' '; | |
1067 | len = ucol_getSortKey(enCollation, source, 1, result, | |
1068 | sizeof(result)); | |
1069 | ||
1070 | if (uprv_memcmp(expected, result, len) != 0) { | |
1071 | log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n"); | |
1072 | } | |
1073 | ||
1074 | ch = 'a'; | |
1075 | while (ch < 'z') { | |
1076 | source[0] = ch; | |
1077 | len = ucol_getSortKey(myCollation, source, 1, result, | |
1078 | sizeof(result)); | |
1079 | if (uprv_memcmp(expected, result, len) != 0) { | |
1080 | log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n", | |
1081 | ch); | |
1082 | } | |
1083 | ch ++; | |
1084 | } | |
1085 | ||
b75a7d8f A |
1086 | ucol_close(enCollation); |
1087 | ucol_close(myCollation); | |
1088 | enCollation = NULL; | |
1089 | myCollation = NULL; | |
57a6839d | 1090 | #endif |
b75a7d8f A |
1091 | } |
1092 | ||
1093 | /** | |
1094 | * Tests surrogate support. | |
1095 | * NOTE: This test used \\uD801\\uDC01 pair, which is now assigned to Desseret | |
1096 | * Therefore, another (unassigned) code point was used for this test. | |
1097 | */ | |
1098 | static void TestSurrogates(void) | |
1099 | { | |
46f4442e | 1100 | static const char str[] = |
b75a7d8f A |
1101 | "&z<'\\uD800\\uDC00'<'\\uD800\\uDC0A\\u0308'<A"; |
1102 | int len = strlen(str); | |
1103 | int rlen = 0; | |
46f4442e | 1104 | UChar rules[sizeof(str)]; |
b75a7d8f A |
1105 | UCollator *myCollation; |
1106 | UCollator *enCollation; | |
1107 | UErrorCode status = U_ZERO_ERROR; | |
1108 | UChar source[][4] = | |
1109 | {{'z', 0, 0}, {0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {0xD800, 0xDC02}}; | |
1110 | UChar target[][4] = | |
1111 | {{0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {'A', 0, 0}, {0xD800, 0xDC03}}; | |
1112 | int count = 0; | |
1113 | uint8_t enresult[20], myresult[20]; | |
1114 | int enlen, mylen; | |
1115 | ||
1116 | /* tests for open rules with surrogate rules */ | |
b75a7d8f A |
1117 | rlen = u_unescape(str, rules, len); |
1118 | ||
1119 | enCollation = ucol_open("en_US", &status); | |
46f4442e | 1120 | if (U_FAILURE(status)) { |
729e4ab9 | 1121 | log_err_status(status, "ERROR: in creation of collator :%s\n", |
46f4442e A |
1122 | myErrorName(status)); |
1123 | return; | |
1124 | } | |
b75a7d8f A |
1125 | myCollation = ucol_openRules(rules, rlen, UCOL_OFF, |
1126 | UCOL_TERTIARY,NULL, &status); | |
1127 | if (U_FAILURE(status)) { | |
46f4442e | 1128 | ucol_close(enCollation); |
b75a7d8f A |
1129 | log_err("ERROR: in creation of rule based collator :%s\n", |
1130 | myErrorName(status)); | |
1131 | return; | |
1132 | } | |
1133 | ||
1134 | /* | |
1135 | this test is to verify the supplementary sort key order in the english | |
1136 | collator | |
1137 | */ | |
1138 | log_verbose("start of english collation supplementary characters test\n"); | |
1139 | while (count < 2) { | |
1140 | doTest(enCollation, source[count], target[count], UCOL_LESS); | |
1141 | count ++; | |
1142 | } | |
1143 | doTest(enCollation, source[count], target[count], UCOL_GREATER); | |
1144 | ||
1145 | log_verbose("start of tailored collation supplementary characters test\n"); | |
1146 | count = 0; | |
1147 | /* tests getting collation elements for surrogates for tailored rules */ | |
1148 | while (count < 4) { | |
1149 | doTest(myCollation, source[count], target[count], UCOL_LESS); | |
1150 | count ++; | |
1151 | } | |
1152 | ||
1153 | /* tests that \uD800\uDC02 still has the same value, not changed */ | |
1154 | enlen = ucol_getSortKey(enCollation, source[3], 2, enresult, 20); | |
1155 | mylen = ucol_getSortKey(myCollation, source[3], 2, myresult, 20); | |
1156 | if (enlen != mylen || | |
1157 | uprv_memcmp(enresult, myresult, enlen) != 0) { | |
1158 | log_verbose("Failed : non-tailored supplementary characters should have the same value\n"); | |
1159 | } | |
1160 | ||
b75a7d8f A |
1161 | ucol_close(enCollation); |
1162 | ucol_close(myCollation); | |
1163 | enCollation = NULL; | |
1164 | myCollation = NULL; | |
1165 | } | |
1166 | ||
1167 | /* | |
1168 | *### TODO: Add more invalid rules to test all different scenarios. | |
1169 | * | |
1170 | */ | |
1171 | static void | |
1172 | TestInvalidRules(){ | |
1173 | #define MAX_ERROR_STATES 2 | |
1174 | ||
1175 | static const char* rulesArr[MAX_ERROR_STATES] = { | |
1176 | "& C < ch, cH, Ch[this should fail]<d", | |
1177 | "& C < ch, cH, & Ch[variable top]" | |
1178 | }; | |
1179 | static const char* preContextArr[MAX_ERROR_STATES] = { | |
57a6839d A |
1180 | " C < ch, cH, Ch", |
1181 | "& C < ch, cH", | |
b75a7d8f A |
1182 | |
1183 | }; | |
1184 | static const char* postContextArr[MAX_ERROR_STATES] = { | |
57a6839d A |
1185 | "[this should fa", |
1186 | ", & Ch[variable" | |
b75a7d8f A |
1187 | }; |
1188 | int i; | |
1189 | ||
1190 | for(i = 0;i<MAX_ERROR_STATES;i++){ | |
1191 | UChar rules[1000] = { '\0' }; | |
1192 | UChar preContextExp[1000] = { '\0' }; | |
1193 | UChar postContextExp[1000] = { '\0' }; | |
1194 | UParseError parseError; | |
1195 | UErrorCode status = U_ZERO_ERROR; | |
1196 | UCollator* coll=0; | |
1197 | u_charsToUChars(rulesArr[i],rules,uprv_strlen(rulesArr[i])+1); | |
1198 | u_charsToUChars(preContextArr[i],preContextExp,uprv_strlen(preContextArr[i])+1); | |
1199 | u_charsToUChars(postContextArr[i],postContextExp,uprv_strlen(postContextArr[i])+1); | |
1200 | /* clean up stuff in parseError */ | |
1201 | u_memset(parseError.preContext,0x0000,U_PARSE_CONTEXT_LEN); | |
1202 | u_memset(parseError.postContext,0x0000,U_PARSE_CONTEXT_LEN); | |
1203 | /* open the rules and test */ | |
1204 | coll = ucol_openRules(rules,u_strlen(rules),UCOL_OFF,UCOL_DEFAULT_STRENGTH,&parseError,&status); | |
57a6839d | 1205 | (void)coll; /* Suppress set but not used warning. */ |
b75a7d8f | 1206 | if(u_strcmp(parseError.preContext,preContextExp)!=0){ |
57a6839d A |
1207 | log_err_status(status, "preContext in UParseError for ucol_openRules does not match: \"%s\"\n", |
1208 | aescstrdup(parseError.preContext, -1)); | |
b75a7d8f A |
1209 | } |
1210 | if(u_strcmp(parseError.postContext,postContextExp)!=0){ | |
57a6839d A |
1211 | log_err_status(status, "postContext in UParseError for ucol_openRules does not match: \"%s\"\n", |
1212 | aescstrdup(parseError.postContext, -1)); | |
b75a7d8f A |
1213 | } |
1214 | } | |
1215 | } | |
1216 | ||
1217 | static void | |
1218 | TestJitterbug1098(){ | |
1219 | UChar rule[1000]; | |
1220 | UCollator* c1 = NULL; | |
1221 | UErrorCode status = U_ZERO_ERROR; | |
1222 | UParseError parseError; | |
1223 | char preContext[200]={0}; | |
1224 | char postContext[200]={0}; | |
1225 | int i=0; | |
1226 | const char* rules[] = { | |
1227 | "&''<\\\\", | |
1228 | "&\\'<\\\\", | |
1229 | "&\\\"<'\\'", | |
1230 | "&'\"'<\\'", | |
57a6839d | 1231 | NULL |
b75a7d8f A |
1232 | |
1233 | }; | |
1234 | const UCollationResult results1098[] = { | |
1235 | UCOL_LESS, | |
1236 | UCOL_LESS, | |
1237 | UCOL_LESS, | |
1238 | UCOL_LESS, | |
1239 | }; | |
1240 | const UChar input[][2]= { | |
1241 | {0x0027,0x005c}, | |
1242 | {0x0027,0x005c}, | |
1243 | {0x0022,0x005c}, | |
1244 | {0x0022,0x0027}, | |
1245 | }; | |
1246 | UChar X[2] ={0}; | |
1247 | UChar Y[2] ={0}; | |
1248 | u_memset(parseError.preContext,0x0000,U_PARSE_CONTEXT_LEN); | |
1249 | u_memset(parseError.postContext,0x0000,U_PARSE_CONTEXT_LEN); | |
1250 | for(;rules[i]!=0;i++){ | |
1251 | u_uastrcpy(rule, rules[i]); | |
1252 | c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, &parseError, &status); | |
1253 | if(U_FAILURE(status)){ | |
729e4ab9 | 1254 | log_err_status(status, "Could not parse the rules syntax. Error: %s\n", u_errorName(status)); |
374ca955 A |
1255 | |
1256 | if (status == U_PARSE_ERROR) { | |
1257 | u_UCharsToChars(parseError.preContext,preContext,20); | |
1258 | u_UCharsToChars(parseError.postContext,postContext,20); | |
1259 | log_verbose("\n\tPre-Context: %s \n\tPost-Context:%s \n",preContext,postContext); | |
1260 | } | |
1261 | ||
b75a7d8f A |
1262 | return; |
1263 | } | |
1264 | X[0] = input[i][0]; | |
1265 | Y[0] = input[i][1]; | |
1266 | doTest(c1,X,Y,results1098[i]); | |
1267 | ucol_close(c1); | |
1268 | } | |
1269 | } | |
1270 | ||
374ca955 A |
1271 | static void |
1272 | TestFCDCrash(void) { | |
1273 | static const char *test[] = { | |
1274 | "Gr\\u00F6\\u00DFe", | |
1275 | "Grossist" | |
1276 | }; | |
1277 | ||
374ca955 A |
1278 | UErrorCode status = U_ZERO_ERROR; |
1279 | UCollator *coll = ucol_open("es", &status); | |
1280 | if(U_FAILURE(status)) { | |
729e4ab9 | 1281 | log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status)); |
374ca955 A |
1282 | return; |
1283 | } | |
1284 | ucol_close(coll); | |
1285 | coll = NULL; | |
46f4442e | 1286 | ctest_resetICU(); |
374ca955 A |
1287 | coll = ucol_open("de_DE", &status); |
1288 | if(U_FAILURE(status)) { | |
729e4ab9 | 1289 | log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status)); |
374ca955 A |
1290 | return; |
1291 | } | |
1292 | ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); | |
1293 | genericOrderingTest(coll, test, 2); | |
1294 | ucol_close(coll); | |
374ca955 | 1295 | } |
b75a7d8f | 1296 | |
46f4442e A |
1297 | /*static UBool |
1298 | find(UEnumeration* list, const char* str, UErrorCode* status){ | |
1299 | const char* value = NULL; | |
1300 | int32_t length=0; | |
1301 | if(U_FAILURE(*status)){ | |
1302 | return FALSE; | |
1303 | } | |
1304 | uenum_reset(list, status); | |
1305 | while( (value= uenum_next(list, &length, status))!=NULL){ | |
1306 | if(strcmp(value, str)==0){ | |
1307 | return TRUE; | |
1308 | } | |
1309 | } | |
1310 | return FALSE; | |
1311 | }*/ | |
1312 | ||
1313 | static void TestJ5298(void) | |
1314 | { | |
1315 | UErrorCode status = U_ZERO_ERROR; | |
1316 | char input[256], output[256]; | |
1317 | UBool isAvailable; | |
1318 | int32_t i = 0; | |
1319 | UEnumeration* values = NULL; | |
1320 | const char *keywordValue = NULL; | |
1321 | log_verbose("Number of collator locales returned : %i \n", ucol_countAvailable()); | |
1322 | values = ucol_getKeywordValues("collation", &status); | |
b331163b A |
1323 | while ((keywordValue = uenum_next(values, NULL, &status)) != NULL) { |
1324 | if (strncmp(keywordValue, "private-", 8) == 0) { | |
1325 | log_err("ucol_getKeywordValues() returns private collation keyword: %s\n", keywordValue); | |
1326 | } | |
1327 | } | |
46f4442e A |
1328 | for (i = 0; i < ucol_countAvailable(); i++) { |
1329 | uenum_reset(values, &status); | |
1330 | while ((keywordValue = uenum_next(values, NULL, &status)) != NULL) { | |
1331 | strcpy(input, ucol_getAvailable(i)); | |
1332 | if (strcmp(keywordValue, "standard") != 0) { | |
1333 | strcat(input, "@collation="); | |
1334 | strcat(input, keywordValue); | |
1335 | } | |
1336 | ||
1337 | ucol_getFunctionalEquivalent(output, 256, "collation", input, &isAvailable, &status); | |
1338 | if (strcmp(input, output) == 0) { /* Unique locale, print it out */ | |
1339 | log_verbose("%s, \n", output); | |
1340 | } | |
1341 | } | |
1342 | } | |
1343 | uenum_close(values); | |
1344 | log_verbose("\n"); | |
1345 | } | |
b75a7d8f | 1346 | #endif /* #if !UCONFIG_NO_COLLATION */ |