]>
Commit | Line | Data |
---|---|---|
1 | /******************************************************************** | |
2 | * COPYRIGHT: | |
3 | * Copyright (c) 1997-2003, International Business Machines Corporation and | |
4 | * others. All Rights Reserved. | |
5 | ********************************************************************/ | |
6 | /******************************************************************************** | |
7 | * | |
8 | * File CALLCOLL.C | |
9 | * | |
10 | * Modification History: | |
11 | * Name Description | |
12 | * Madhu Katragadda Ported for C API | |
13 | ********************************************************************************* | |
14 | */ | |
15 | ||
16 | /* | |
17 | * Important: This file is included into intltest/allcoll.cpp so that the | |
18 | * test data is shared. This makes it easier to maintain the test data, | |
19 | * especially since the Unicode data must be portable and quoted character | |
20 | * literals will not work. | |
21 | * If it is included, then there will be a #define INCLUDE_CALLCOLL_C | |
22 | * that must prevent the actual code in here from being part of the | |
23 | * allcoll.cpp compilation. | |
24 | */ | |
25 | ||
26 | /** | |
27 | * CollationDummyTest is a third level test class. This tests creation of | |
28 | * a customized collator object. For example, number 1 to be sorted | |
29 | * equlivalent to word 'one'. | |
30 | */ | |
31 | ||
32 | #include <string.h> | |
33 | #include <stdlib.h> | |
34 | ||
35 | #include "unicode/utypes.h" | |
36 | ||
37 | #if !UCONFIG_NO_COLLATION | |
38 | ||
39 | #include "unicode/ucol.h" | |
40 | #include "unicode/uloc.h" | |
41 | #include "unicode/ucoleitr.h" | |
42 | #include "unicode/ustring.h" | |
43 | ||
44 | #include "cintltst.h" | |
45 | #include "ccolltst.h" | |
46 | #include "callcoll.h" | |
47 | #include "calldata.h" | |
48 | #include "cstring.h" | |
49 | #include "cmemory.h" | |
50 | #include "ucol_imp.h" | |
51 | ||
52 | /* perform test with strength PRIMARY */ | |
53 | static void TestPrimary(void); | |
54 | ||
55 | /* perform test with strength SECONDARY */ | |
56 | static void TestSecondary(void); | |
57 | ||
58 | /* perform test with strength tertiary */ | |
59 | static void TestTertiary(void); | |
60 | ||
61 | /*perform tests with strength Identical */ | |
62 | static void TestIdentical(void); | |
63 | ||
64 | /* perform extra tests */ | |
65 | static void TestExtra(void); | |
66 | ||
67 | /* Test jitterbug 581 */ | |
68 | static void TestJB581(void); | |
69 | ||
70 | /* Test jitterbug 1401 */ | |
71 | static void TestJB1401(void); | |
72 | ||
73 | /* Test [variable top] in the rule syntax */ | |
74 | static void TestVariableTop(void); | |
75 | ||
76 | /* Test surrogates */ | |
77 | static void TestSurrogates(void); | |
78 | ||
79 | static void TestInvalidRules(void); | |
80 | ||
81 | static void TestJitterbug1098(void); | |
82 | ||
83 | const UCollationResult results[] = { | |
84 | UCOL_LESS, | |
85 | UCOL_LESS, /*UCOL_GREATER,*/ | |
86 | UCOL_LESS, | |
87 | UCOL_LESS, | |
88 | UCOL_LESS, | |
89 | UCOL_LESS, | |
90 | UCOL_LESS, | |
91 | UCOL_GREATER, | |
92 | UCOL_GREATER, | |
93 | UCOL_LESS, /* 10 */ | |
94 | UCOL_GREATER, | |
95 | UCOL_LESS, | |
96 | UCOL_GREATER, | |
97 | UCOL_GREATER, | |
98 | UCOL_LESS, | |
99 | UCOL_LESS, | |
100 | UCOL_LESS, | |
101 | /* test primary > 17 */ | |
102 | UCOL_EQUAL, | |
103 | UCOL_EQUAL, | |
104 | UCOL_EQUAL, /* 20 */ | |
105 | UCOL_LESS, | |
106 | UCOL_LESS, | |
107 | UCOL_EQUAL, | |
108 | UCOL_EQUAL, | |
109 | UCOL_EQUAL, | |
110 | UCOL_LESS, | |
111 | /* test secondary > 26 */ | |
112 | UCOL_EQUAL, | |
113 | UCOL_EQUAL, | |
114 | UCOL_EQUAL, | |
115 | UCOL_EQUAL, | |
116 | UCOL_EQUAL, /* 30 */ | |
117 | UCOL_EQUAL, | |
118 | UCOL_LESS, | |
119 | UCOL_EQUAL, /* 34 */ | |
120 | UCOL_EQUAL, | |
121 | UCOL_EQUAL, | |
122 | UCOL_LESS /* 37 */ | |
123 | }; | |
124 | ||
125 | ||
126 | void addAllCollTest(TestNode** root) | |
127 | { | |
128 | ||
129 | ||
130 | addTest(root, &TestPrimary, "tscoll/callcoll/TestPrimary"); | |
131 | addTest(root, &TestSecondary, "tscoll/callcoll/TestSecondary"); | |
132 | addTest(root, &TestTertiary, "tscoll/callcoll/TestTertiary"); | |
133 | addTest(root, &TestIdentical, "tscoll/callcoll/TestIdentical"); | |
134 | addTest(root, &TestExtra, "tscoll/callcoll/TestExtra"); | |
135 | addTest(root, &TestJB581, "tscoll/callcoll/TestJB581"); | |
136 | addTest(root, &TestVariableTop, "tscoll/callcoll/TestVariableTop"); | |
137 | addTest(root, &TestSurrogates, "tscoll/callcoll/TestSurrogates"); | |
138 | addTest(root, &TestInvalidRules, "tscoll/callcoll/TestInvalidRules"); | |
139 | addTest(root, &TestJB1401, "tscoll/callcoll/TestJB1401"); | |
140 | addTest(root, &TestJitterbug1098, "tscoll/callcoll/TestJitterbug1098"); | |
141 | ||
142 | } | |
143 | ||
144 | static UCollationResult compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode *status) { | |
145 | int32_t partialSKResult = 0; | |
146 | UCharIterator sIter, tIter; | |
147 | uint32_t sState[2], tState[2]; | |
148 | int32_t sSize = pieceSize, tSize = pieceSize; | |
149 | int32_t i = 0; | |
150 | uint8_t sBuf[16384], tBuf[16384]; | |
151 | if(pieceSize > 16384) { | |
152 | log_err("Partial sortkey size buffer too small. Please consider increasing the buffer!\n"); | |
153 | *status = U_BUFFER_OVERFLOW_ERROR; | |
154 | return UCOL_EQUAL; | |
155 | } | |
156 | *status = U_ZERO_ERROR; | |
157 | sState[0] = 0; sState[1] = 0; | |
158 | tState[0] = 0; tState[1] = 0; | |
159 | while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) { | |
160 | uiter_setString(&sIter, source, sLen); | |
161 | uiter_setString(&tIter, target, tLen); | |
162 | sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, status); | |
163 | tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, status); | |
164 | ||
165 | if(sState[0] != 0 || tState[0] != 0) { | |
166 | log_verbose("State != 0 : %08X %08X\n", sState[0], tState[0]); | |
167 | } | |
168 | log_verbose("%i ", i++); | |
169 | ||
170 | partialSKResult = memcmp(sBuf, tBuf, pieceSize); | |
171 | } | |
172 | ||
173 | if(partialSKResult < 0) { | |
174 | return UCOL_LESS; | |
175 | } else if(partialSKResult > 0) { | |
176 | return UCOL_GREATER; | |
177 | } else { | |
178 | return UCOL_EQUAL; | |
179 | } | |
180 | } | |
181 | ||
182 | static void doTestVariant(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result) | |
183 | { | |
184 | int32_t sortklen1, sortklen2, sortklenmax, sortklenmin; | |
185 | int temp=0, gSortklen1=0,gSortklen2=0; | |
186 | UCollationResult compareResult, compareResulta, keyResult, compareResultIter = result; | |
187 | uint8_t *sortKey1, *sortKey2, *sortKey1a, *sortKey2a; | |
188 | uint32_t sLen = u_strlen(source); | |
189 | uint32_t tLen = u_strlen(target); | |
190 | char buffer[256]; | |
191 | uint32_t len; | |
192 | UErrorCode status = U_ZERO_ERROR; | |
193 | UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status); | |
194 | ||
195 | UCharIterator sIter, tIter; | |
196 | uiter_setString(&sIter, source, sLen); | |
197 | uiter_setString(&tIter, target, tLen); | |
198 | compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status); | |
199 | if(compareResultIter != result) { | |
200 | log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1)); | |
201 | } | |
202 | ||
203 | /* convert the strings to UTF-8 and do try comparing with char iterator */ | |
204 | if(QUICK <= 0) { /*!QUICK*/ | |
205 | char utf8Source[256], utf8Target[256]; | |
206 | int32_t utf8SourceLen = 0, utf8TargetLen = 0; | |
207 | u_strToUTF8(utf8Source, 256, &utf8SourceLen, source, sLen, &status); | |
208 | if(U_FAILURE(status)) { /* probably buffer is not big enough */ | |
209 | log_verbose("Src UTF-8 buffer too small! Will not compare!\n"); | |
210 | } else { | |
211 | u_strToUTF8(utf8Target, 256, &utf8TargetLen, target, tLen, &status); | |
212 | if(U_SUCCESS(status)) { /* probably buffer is not big enough */ | |
213 | UCollationResult compareResultUTF8 = result, compareResultUTF8Norm = result; | |
214 | /*UCharIterator sIter, tIter;*/ | |
215 | /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/ | |
216 | uiter_setUTF8(&sIter, utf8Source, utf8SourceLen); | |
217 | uiter_setUTF8(&tIter, utf8Target, utf8TargetLen); | |
218 | /*uiter_setString(&sIter, source, sLen); | |
219 | uiter_setString(&tIter, target, tLen);*/ | |
220 | compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status); | |
221 | ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); | |
222 | sIter.move(&sIter, 0, UITER_START); | |
223 | tIter.move(&tIter, 0, UITER_START); | |
224 | compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status); | |
225 | ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); | |
226 | if(compareResultUTF8 != compareResultIter) { | |
227 | log_err("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1)); | |
228 | } | |
229 | if(compareResultUTF8 != compareResultUTF8Norm) { | |
230 | log_err("different results in iterative when normalization is turned on with UTF-8 strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1)); | |
231 | } | |
232 | } else { | |
233 | log_verbose("Target UTF-8 buffer too small! Did not compare!\n"); | |
234 | } | |
235 | if(U_FAILURE(status)) { | |
236 | log_verbose("UTF-8 strcoll failed! Ignoring result\n"); | |
237 | } | |
238 | } | |
239 | } | |
240 | ||
241 | /* testing the partial sortkeys */ | |
242 | if(1) { /*!QUICK*/ | |
243 | int32_t i = 0; | |
244 | int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */ | |
245 | int32_t partialSizesSize = 1; | |
246 | if(QUICK <= 0) { | |
247 | partialSizesSize = 7; | |
248 | } | |
249 | log_verbose("partial sortkey test piecesize="); | |
250 | for(i = 0; i < partialSizesSize; i++) { | |
251 | UCollationResult partialSKResult = result, partialNormalizedSKResult = result; | |
252 | log_verbose("%i ", partialSizes[i]); | |
253 | ||
254 | partialSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status); | |
255 | if(partialSKResult != result) { | |
256 | log_err("Partial sortkey comparison returned wrong result: %s, %s (size %i)\n", | |
257 | aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]); | |
258 | } | |
259 | ||
260 | if(QUICK <= 0 && norm != UCOL_ON) { | |
261 | log_verbose("N "); | |
262 | ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); | |
263 | partialNormalizedSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status); | |
264 | ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); | |
265 | if(partialSKResult != partialNormalizedSKResult) { | |
266 | log_err("Partial sortkey comparison gets different result when normalization is on: %s, %s (size %i)\n", | |
267 | aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]); | |
268 | } | |
269 | } | |
270 | } | |
271 | log_verbose("\n"); | |
272 | } | |
273 | ||
274 | ||
275 | compareResult = ucol_strcoll(myCollation, source, sLen, target, tLen); | |
276 | compareResulta = ucol_strcoll(myCollation, source, -1, target, -1); | |
277 | if (compareResult != compareResulta) { | |
278 | log_err("ucol_strcoll result from null terminated and explicit length strings differs.\n"); | |
279 | } | |
280 | ||
281 | sortklen1=ucol_getSortKey(myCollation, source, sLen, NULL, 0); | |
282 | sortklen2=ucol_getSortKey(myCollation, target, tLen, NULL, 0); | |
283 | ||
284 | sortklenmax = (sortklen1>sortklen2?sortklen1:sortklen2); | |
285 | sortklenmin = (sortklen1<sortklen2?sortklen1:sortklen2); | |
286 | ||
287 | sortKey1 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); | |
288 | sortKey1a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); | |
289 | ucol_getSortKey(myCollation, source, sLen, sortKey1, sortklen1+1); | |
290 | ucol_getSortKey(myCollation, source, -1, sortKey1a, sortklen1+1); | |
291 | ||
292 | sortKey2 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); | |
293 | sortKey2a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); | |
294 | ucol_getSortKey(myCollation, target, tLen, sortKey2, sortklen2+1); | |
295 | ucol_getSortKey(myCollation, target, -1, sortKey2a, sortklen2+1); | |
296 | ||
297 | /* Check that sort key generated with null terminated string is identical */ | |
298 | /* to that generted with a length specified. */ | |
299 | if (uprv_strcmp((const char *)sortKey1, (const char *)sortKey1a) != 0 || | |
300 | uprv_strcmp((const char *)sortKey2, (const char *)sortKey2a) != 0 ) { | |
301 | log_err("Sort Keys from null terminated and explicit length strings differ.\n"); | |
302 | } | |
303 | ||
304 | /*memcmp(sortKey1, sortKey2,sortklenmax);*/ | |
305 | temp= uprv_strcmp((const char *)sortKey1, (const char *)sortKey2); | |
306 | gSortklen1 = uprv_strlen((const char *)sortKey1)+1; | |
307 | gSortklen2 = uprv_strlen((const char *)sortKey2)+1; | |
308 | if(sortklen1 != gSortklen1){ | |
309 | log_err("SortKey length does not match Expected: %i Got: %i\n",sortklen1, gSortklen1); | |
310 | log_verbose("Generated sortkey: %s\n", ucol_sortKeyToString(myCollation, sortKey1, buffer, &len)); | |
311 | } | |
312 | if(sortklen2!= gSortklen2){ | |
313 | log_err("SortKey length does not match Expected: %i Got: %i\n", sortklen2, gSortklen2); | |
314 | log_verbose("Generated sortkey: %s\n", ucol_sortKeyToString(myCollation, sortKey2, buffer, &len)); | |
315 | } | |
316 | ||
317 | if(temp < 0) { | |
318 | keyResult=UCOL_LESS; | |
319 | } | |
320 | else if(temp > 0) { | |
321 | keyResult= UCOL_GREATER; | |
322 | } | |
323 | else { | |
324 | keyResult = UCOL_EQUAL; | |
325 | } | |
326 | reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, compareResultIter, result ); | |
327 | free(sortKey1); | |
328 | free(sortKey2); | |
329 | free(sortKey1a); | |
330 | free(sortKey2a); | |
331 | ||
332 | } | |
333 | ||
334 | void doTest(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result) | |
335 | { | |
336 | if(myCollation) { | |
337 | doTestVariant(myCollation, source, target, result); | |
338 | if(result == UCOL_LESS) { | |
339 | doTestVariant(myCollation, target, source, UCOL_GREATER); | |
340 | } else if(result == UCOL_GREATER) { | |
341 | doTestVariant(myCollation, target, source, UCOL_LESS); | |
342 | } else { | |
343 | doTestVariant(myCollation, target, source, UCOL_EQUAL); | |
344 | } | |
345 | } else { | |
346 | log_data_err("No collator! Any data around?\n"); | |
347 | } | |
348 | } | |
349 | ||
350 | static void TestTertiary() | |
351 | { | |
352 | int32_t len,i; | |
353 | UChar *rules; | |
354 | UCollator *myCollation; | |
355 | UErrorCode status=U_ZERO_ERROR; | |
356 | const char* str="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 "; | |
357 | len = strlen(str); | |
358 | rules=(UChar*)malloc(sizeof(UChar*) * (len+1)); | |
359 | u_uastrcpy(rules, str); | |
360 | ||
361 | myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status); | |
362 | if(U_FAILURE(status)){ | |
363 | log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status)); | |
364 | } | |
365 | ||
366 | ucol_setStrength(myCollation, UCOL_TERTIARY); | |
367 | for (i = 0; i < 17 ; i++) | |
368 | { | |
369 | doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); | |
370 | } | |
371 | free(rules); | |
372 | ucol_close(myCollation); | |
373 | myCollation = 0; | |
374 | } | |
375 | ||
376 | static void TestPrimary( ) | |
377 | { | |
378 | int32_t len,i; | |
379 | UChar *rules; | |
380 | UCollator *myCollation; | |
381 | UErrorCode status=U_ZERO_ERROR; | |
382 | const char* str="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 "; | |
383 | len = strlen(str); | |
384 | rules=(UChar*)malloc(sizeof(UChar*) * (len+1)); | |
385 | u_uastrcpy(rules, str); | |
386 | ||
387 | myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status); | |
388 | if(U_FAILURE(status)){ | |
389 | log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status)); | |
390 | } | |
391 | ucol_setStrength(myCollation, UCOL_PRIMARY); | |
392 | ||
393 | for (i = 17; i < 26 ; i++) | |
394 | { | |
395 | ||
396 | doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); | |
397 | } | |
398 | free(rules); | |
399 | ucol_close(myCollation); | |
400 | myCollation = 0; | |
401 | } | |
402 | ||
403 | static void TestSecondary() | |
404 | { | |
405 | int32_t i; | |
406 | int32_t len; | |
407 | UChar *rules; | |
408 | UCollator *myCollation; | |
409 | UErrorCode status=U_ZERO_ERROR; | |
410 | const char* str="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 "; | |
411 | len = strlen(str); | |
412 | rules=(UChar*)malloc(sizeof(UChar*) * (len+1)); | |
413 | u_uastrcpy(rules, str); | |
414 | ||
415 | myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status); | |
416 | if(U_FAILURE(status)){ | |
417 | log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status)); | |
418 | } | |
419 | ucol_setStrength(myCollation, UCOL_SECONDARY); | |
420 | for (i = 26; i < 34 ; i++) | |
421 | { | |
422 | doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); | |
423 | } | |
424 | free(rules); | |
425 | ucol_close(myCollation); | |
426 | myCollation = 0; | |
427 | } | |
428 | ||
429 | static void TestIdentical() | |
430 | { | |
431 | int32_t i; | |
432 | int32_t len; | |
433 | UChar *rules = 0; | |
434 | UCollator *myCollation; | |
435 | UErrorCode status=U_ZERO_ERROR; | |
436 | const char* str="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 "; | |
437 | len = strlen(str); | |
438 | rules=(UChar*)malloc(sizeof(UChar*) * (len+1)); | |
439 | u_uastrcpy(rules, str); | |
440 | ||
441 | myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_IDENTICAL, NULL,&status); | |
442 | if(U_FAILURE(status)){ | |
443 | log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status)); | |
444 | } | |
445 | for(i= 34; i<37; i++) | |
446 | { | |
447 | doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); | |
448 | } | |
449 | free(rules); | |
450 | ucol_close(myCollation); | |
451 | myCollation = 0; | |
452 | } | |
453 | ||
454 | static void TestExtra() | |
455 | { | |
456 | int32_t i, j; | |
457 | int32_t len; | |
458 | UChar *rules; | |
459 | UCollator *myCollation; | |
460 | UErrorCode status = U_ZERO_ERROR; | |
461 | const char* str="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 "; | |
462 | len = strlen(str); | |
463 | rules=(UChar*)malloc(sizeof(UChar*) * (len+1)); | |
464 | u_uastrcpy(rules, str); | |
465 | ||
466 | myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status); | |
467 | if(U_FAILURE(status)){ | |
468 | log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status)); | |
469 | } | |
470 | ucol_setStrength(myCollation, UCOL_TERTIARY); | |
471 | for (i = 0; i < COUNT_TEST_CASES-1 ; i++) | |
472 | { | |
473 | for (j = i + 1; j < COUNT_TEST_CASES; j += 1) | |
474 | { | |
475 | ||
476 | doTest(myCollation, testCases[i], testCases[j], UCOL_LESS); | |
477 | } | |
478 | } | |
479 | free(rules); | |
480 | ucol_close(myCollation); | |
481 | myCollation = 0; | |
482 | } | |
483 | ||
484 | static void TestJB581(void) | |
485 | { | |
486 | UChar dispName [100]; | |
487 | int32_t bufferLen = 0; | |
488 | UChar source [100]; | |
489 | UChar target [100]; | |
490 | UCollationResult result = UCOL_EQUAL; | |
491 | uint8_t sourceKeyArray [100]; | |
492 | uint8_t targetKeyArray [100]; | |
493 | int32_t sourceKeyOut = 0, | |
494 | targetKeyOut = 0; | |
495 | UCollator *myCollator = 0; | |
496 | UErrorCode status = U_ZERO_ERROR; | |
497 | ||
498 | /*u_uastrcpy(source, "This is a test.");*/ | |
499 | /*u_uastrcpy(target, "THISISATEST.");*/ | |
500 | u_uastrcpy(source, "THISISATEST."); | |
501 | u_uastrcpy(target, "Thisisatest."); | |
502 | ||
503 | myCollator = ucol_open("en_US", &status); | |
504 | if (U_FAILURE(status)){ | |
505 | bufferLen = uloc_getDisplayName("en_US", 0, dispName, 100, &status); | |
506 | /*Report the error with display name... */ | |
507 | log_err("ERROR: Failed to create the collator for : \"%s\"\n", dispName); | |
508 | return; | |
509 | } | |
510 | result = ucol_strcoll(myCollator, source, -1, target, -1); | |
511 | /* result is 1, secondary differences only for ignorable space characters*/ | |
512 | if (result != 1) | |
513 | { | |
514 | log_err("Comparing two strings with only secondary differences in C failed.\n"); | |
515 | } | |
516 | /* To compare them with just primary differences */ | |
517 | ucol_setStrength(myCollator, UCOL_PRIMARY); | |
518 | result = ucol_strcoll(myCollator, source, -1, target, -1); | |
519 | /* result is 0 */ | |
520 | if (result != 0) | |
521 | { | |
522 | log_err("Comparing two strings with no differences in C failed.\n"); | |
523 | } | |
524 | /* Now, do the same comparison with keys */ | |
525 | sourceKeyOut = ucol_getSortKey(myCollator, source, -1, sourceKeyArray, 100); | |
526 | targetKeyOut = ucol_getSortKey(myCollator, target, -1, targetKeyArray, 100); | |
527 | result = 0; | |
528 | bufferLen = ((targetKeyOut > 100) ? 100 : targetKeyOut); | |
529 | result = memcmp(sourceKeyArray, targetKeyArray, bufferLen); | |
530 | if (result != 0) | |
531 | { | |
532 | log_err("Comparing two strings with sort keys in C failed.\n"); | |
533 | } | |
534 | ucol_close(myCollator); | |
535 | } | |
536 | ||
537 | static void TestJB1401(void) | |
538 | { | |
539 | UCollator *myCollator = 0; | |
540 | UErrorCode status = U_ZERO_ERROR; | |
541 | static UChar NFD_UnsafeStartChars[] = { | |
542 | 0x0f73, /* Tibetan Vowel Sign II */ | |
543 | 0x0f75, /* Tibetan Vowel Sign UU */ | |
544 | 0x0f81, /* Tibetan Vowel Sign Reversed II */ | |
545 | 0 | |
546 | }; | |
547 | int i; | |
548 | ||
549 | ||
550 | myCollator = ucol_open("en_US", &status); | |
551 | if (U_FAILURE(status)){ | |
552 | int32_t bufferLen = 0; | |
553 | UChar dispName [100]; | |
554 | bufferLen = uloc_getDisplayName("en_US", 0, dispName, 100, &status); | |
555 | /*Report the error with display name... */ | |
556 | log_err("ERROR: Failed to create the collator for : \"%s\"\n", dispName); | |
557 | return; | |
558 | } | |
559 | ucol_setAttribute(myCollator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); | |
560 | if (U_FAILURE(status)){ | |
561 | log_err("ERROR: Failed to set normalization mode ON for collator.\n"); | |
562 | return; | |
563 | } | |
564 | ||
565 | for (i=0; ; i++) { | |
566 | UChar c; | |
567 | UChar X[4]; | |
568 | UChar Y[20]; | |
569 | UChar Z[20]; | |
570 | ||
571 | /* Get the next funny character to be tested, and set up the | |
572 | * three test strings X, Y, Z, consisting of an A-grave + test char, | |
573 | * in original form, NFD, and then NFC form. | |
574 | */ | |
575 | c = NFD_UnsafeStartChars[i]; | |
576 | if (c==0) {break;} | |
577 | ||
578 | X[0]=0xC0; X[1]=c; X[2]=0; /* \u00C0 is A Grave*/ | |
579 | ||
580 | unorm_normalize(X, -1, UNORM_NFD, 0, Y, 20, &status); | |
581 | unorm_normalize(Y, -1, UNORM_NFC, 0, Z, 20, &status); | |
582 | if (U_FAILURE(status)){ | |
583 | log_err("ERROR: Failed to normalize test of character %x\n", c); | |
584 | return; | |
585 | } | |
586 | ||
587 | /* Collation test. All three strings should be equal. | |
588 | * doTest does both strcoll and sort keys, with params in both orders. | |
589 | */ | |
590 | doTest(myCollator, X, Y, UCOL_EQUAL); | |
591 | doTest(myCollator, X, Z, UCOL_EQUAL); | |
592 | doTest(myCollator, Y, Z, UCOL_EQUAL); | |
593 | ||
594 | /* Run collation element iterators over the three strings. Results should be same for each. | |
595 | */ | |
596 | { | |
597 | UCollationElements *ceiX, *ceiY, *ceiZ; | |
598 | int32_t ceX, ceY, ceZ; | |
599 | int j; | |
600 | ||
601 | ceiX = ucol_openElements(myCollator, X, -1, &status); | |
602 | ceiY = ucol_openElements(myCollator, Y, -1, &status); | |
603 | ceiZ = ucol_openElements(myCollator, Z, -1, &status); | |
604 | if (U_FAILURE(status)) { | |
605 | log_err("ERROR: uucol_openElements failed.\n"); | |
606 | return; | |
607 | } | |
608 | ||
609 | for (j=0;; j++) { | |
610 | ceX = ucol_next(ceiX, &status); | |
611 | ceY = ucol_next(ceiY, &status); | |
612 | ceZ = ucol_next(ceiZ, &status); | |
613 | if (U_FAILURE(status)) { | |
614 | log_err("ERROR: ucol_next failed for iteration #%d.\n", j); | |
615 | break; | |
616 | } | |
617 | if (ceX != ceY || ceY != ceZ) { | |
618 | log_err("ERROR: ucol_next failed for iteration #%d.\n", j); | |
619 | break; | |
620 | } | |
621 | if (ceX == UCOL_NULLORDER) { | |
622 | break; | |
623 | } | |
624 | } | |
625 | ucol_closeElements(ceiX); | |
626 | ucol_closeElements(ceiY); | |
627 | ucol_closeElements(ceiZ); | |
628 | } | |
629 | } | |
630 | ucol_close(myCollator); | |
631 | } | |
632 | ||
633 | ||
634 | ||
635 | /** | |
636 | * Tests the [variable top] tag in rule syntax. Since the default [alternate] | |
637 | * tag has the value shifted, any codepoints before [variable top] should give | |
638 | * a primary ce of 0. | |
639 | */ | |
640 | static void TestVariableTop(void) | |
641 | { | |
642 | const char *str = "&z = [variable top]"; | |
643 | int len = strlen(str); | |
644 | UChar *rules; | |
645 | UCollator *myCollation; | |
646 | UCollator *enCollation; | |
647 | UErrorCode status = U_ZERO_ERROR; | |
648 | UChar source[1]; | |
649 | UChar ch; | |
650 | uint8_t result[20]; | |
651 | uint8_t expected[20]; | |
652 | ||
653 | rules = (UChar*)malloc(sizeof(UChar*) * (len + 1)); | |
654 | u_uastrcpy(rules, str); | |
655 | ||
656 | enCollation = ucol_open("en_US", &status); | |
657 | myCollation = ucol_openRules(rules, len, UCOL_OFF, | |
658 | UCOL_PRIMARY,NULL, &status); | |
659 | if (U_FAILURE(status)) { | |
660 | log_err("ERROR: in creation of rule based collator :%s\n", | |
661 | myErrorName(status)); | |
662 | return; | |
663 | } | |
664 | ||
665 | ucol_setStrength(enCollation, UCOL_PRIMARY); | |
666 | ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, | |
667 | &status); | |
668 | ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, | |
669 | &status); | |
670 | ||
671 | if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) != | |
672 | UCOL_SHIFTED || U_FAILURE(status)) { | |
673 | log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n"); | |
674 | } | |
675 | ||
676 | uprv_memset(expected, 0, 20); | |
677 | ||
678 | /* space is supposed to be a variable */ | |
679 | source[0] = ' '; | |
680 | len = ucol_getSortKey(enCollation, source, 1, result, | |
681 | sizeof(result)); | |
682 | ||
683 | if (uprv_memcmp(expected, result, len) != 0) { | |
684 | log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n"); | |
685 | } | |
686 | ||
687 | ch = 'a'; | |
688 | while (ch < 'z') { | |
689 | source[0] = ch; | |
690 | len = ucol_getSortKey(myCollation, source, 1, result, | |
691 | sizeof(result)); | |
692 | if (uprv_memcmp(expected, result, len) != 0) { | |
693 | log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n", | |
694 | ch); | |
695 | } | |
696 | ch ++; | |
697 | } | |
698 | ||
699 | free(rules); | |
700 | ucol_close(enCollation); | |
701 | ucol_close(myCollation); | |
702 | enCollation = NULL; | |
703 | myCollation = NULL; | |
704 | } | |
705 | ||
706 | /** | |
707 | * Tests surrogate support. | |
708 | * NOTE: This test used \\uD801\\uDC01 pair, which is now assigned to Desseret | |
709 | * Therefore, another (unassigned) code point was used for this test. | |
710 | */ | |
711 | static void TestSurrogates(void) | |
712 | { | |
713 | const char *str = | |
714 | "&z<'\\uD800\\uDC00'<'\\uD800\\uDC0A\\u0308'<A"; | |
715 | int len = strlen(str); | |
716 | int rlen = 0; | |
717 | UChar *rules; | |
718 | UCollator *myCollation; | |
719 | UCollator *enCollation; | |
720 | UErrorCode status = U_ZERO_ERROR; | |
721 | UChar source[][4] = | |
722 | {{'z', 0, 0}, {0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {0xD800, 0xDC02}}; | |
723 | UChar target[][4] = | |
724 | {{0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {'A', 0, 0}, {0xD800, 0xDC03}}; | |
725 | int count = 0; | |
726 | uint8_t enresult[20], myresult[20]; | |
727 | int enlen, mylen; | |
728 | ||
729 | /* tests for open rules with surrogate rules */ | |
730 | rules = (UChar*)malloc(sizeof(UChar*) * (len + 1)); | |
731 | rlen = u_unescape(str, rules, len); | |
732 | ||
733 | enCollation = ucol_open("en_US", &status); | |
734 | myCollation = ucol_openRules(rules, rlen, UCOL_OFF, | |
735 | UCOL_TERTIARY,NULL, &status); | |
736 | if (U_FAILURE(status)) { | |
737 | log_err("ERROR: in creation of rule based collator :%s\n", | |
738 | myErrorName(status)); | |
739 | return; | |
740 | } | |
741 | ||
742 | /* | |
743 | this test is to verify the supplementary sort key order in the english | |
744 | collator | |
745 | */ | |
746 | log_verbose("start of english collation supplementary characters test\n"); | |
747 | while (count < 2) { | |
748 | doTest(enCollation, source[count], target[count], UCOL_LESS); | |
749 | count ++; | |
750 | } | |
751 | doTest(enCollation, source[count], target[count], UCOL_GREATER); | |
752 | ||
753 | log_verbose("start of tailored collation supplementary characters test\n"); | |
754 | count = 0; | |
755 | /* tests getting collation elements for surrogates for tailored rules */ | |
756 | while (count < 4) { | |
757 | doTest(myCollation, source[count], target[count], UCOL_LESS); | |
758 | count ++; | |
759 | } | |
760 | ||
761 | /* tests that \uD800\uDC02 still has the same value, not changed */ | |
762 | enlen = ucol_getSortKey(enCollation, source[3], 2, enresult, 20); | |
763 | mylen = ucol_getSortKey(myCollation, source[3], 2, myresult, 20); | |
764 | if (enlen != mylen || | |
765 | uprv_memcmp(enresult, myresult, enlen) != 0) { | |
766 | log_verbose("Failed : non-tailored supplementary characters should have the same value\n"); | |
767 | } | |
768 | ||
769 | free(rules); | |
770 | ucol_close(enCollation); | |
771 | ucol_close(myCollation); | |
772 | enCollation = NULL; | |
773 | myCollation = NULL; | |
774 | } | |
775 | ||
776 | /* | |
777 | *### TODO: Add more invalid rules to test all different scenarios. | |
778 | * | |
779 | */ | |
780 | static void | |
781 | TestInvalidRules(){ | |
782 | #define MAX_ERROR_STATES 2 | |
783 | ||
784 | static const char* rulesArr[MAX_ERROR_STATES] = { | |
785 | "& C < ch, cH, Ch[this should fail]<d", | |
786 | "& C < ch, cH, & Ch[variable top]" | |
787 | }; | |
788 | static const char* preContextArr[MAX_ERROR_STATES] = { | |
789 | "his should fail", | |
790 | "& C < ch, cH, ", | |
791 | ||
792 | }; | |
793 | static const char* postContextArr[MAX_ERROR_STATES] = { | |
794 | "<d", | |
795 | " Ch[variable t" | |
796 | }; | |
797 | int i; | |
798 | ||
799 | for(i = 0;i<MAX_ERROR_STATES;i++){ | |
800 | UChar rules[1000] = { '\0' }; | |
801 | UChar preContextExp[1000] = { '\0' }; | |
802 | UChar postContextExp[1000] = { '\0' }; | |
803 | UParseError parseError; | |
804 | UErrorCode status = U_ZERO_ERROR; | |
805 | UCollator* coll=0; | |
806 | u_charsToUChars(rulesArr[i],rules,uprv_strlen(rulesArr[i])+1); | |
807 | u_charsToUChars(preContextArr[i],preContextExp,uprv_strlen(preContextArr[i])+1); | |
808 | u_charsToUChars(postContextArr[i],postContextExp,uprv_strlen(postContextArr[i])+1); | |
809 | /* clean up stuff in parseError */ | |
810 | u_memset(parseError.preContext,0x0000,U_PARSE_CONTEXT_LEN); | |
811 | u_memset(parseError.postContext,0x0000,U_PARSE_CONTEXT_LEN); | |
812 | /* open the rules and test */ | |
813 | coll = ucol_openRules(rules,u_strlen(rules),UCOL_OFF,UCOL_DEFAULT_STRENGTH,&parseError,&status); | |
814 | if(u_strcmp(parseError.preContext,preContextExp)!=0){ | |
815 | log_err("preContext in UParseError for ucol_openRules does not match\n"); | |
816 | } | |
817 | if(u_strcmp(parseError.postContext,postContextExp)!=0){ | |
818 | log_err("postContext in UParseError for ucol_openRules does not match\n"); | |
819 | } | |
820 | } | |
821 | } | |
822 | ||
823 | static void | |
824 | TestJitterbug1098(){ | |
825 | UChar rule[1000]; | |
826 | UCollator* c1 = NULL; | |
827 | UErrorCode status = U_ZERO_ERROR; | |
828 | UParseError parseError; | |
829 | char preContext[200]={0}; | |
830 | char postContext[200]={0}; | |
831 | int i=0; | |
832 | const char* rules[] = { | |
833 | "&''<\\\\", | |
834 | "&\\'<\\\\", | |
835 | "&\\\"<'\\'", | |
836 | "&'\"'<\\'", | |
837 | '\0' | |
838 | ||
839 | }; | |
840 | const UCollationResult results1098[] = { | |
841 | UCOL_LESS, | |
842 | UCOL_LESS, | |
843 | UCOL_LESS, | |
844 | UCOL_LESS, | |
845 | }; | |
846 | const UChar input[][2]= { | |
847 | {0x0027,0x005c}, | |
848 | {0x0027,0x005c}, | |
849 | {0x0022,0x005c}, | |
850 | {0x0022,0x0027}, | |
851 | }; | |
852 | UChar X[2] ={0}; | |
853 | UChar Y[2] ={0}; | |
854 | u_memset(parseError.preContext,0x0000,U_PARSE_CONTEXT_LEN); | |
855 | u_memset(parseError.postContext,0x0000,U_PARSE_CONTEXT_LEN); | |
856 | for(;rules[i]!=0;i++){ | |
857 | u_uastrcpy(rule, rules[i]); | |
858 | c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, &parseError, &status); | |
859 | if(U_FAILURE(status)){ | |
860 | u_UCharsToChars(parseError.preContext,preContext,20); | |
861 | u_UCharsToChars(parseError.postContext,postContext,20); | |
862 | log_err("Could not parse the rules syntax. Error: %s ", u_errorName(status)); | |
863 | log_verbose("\n\tPre-Context: %s \n\tPost-Context:%s \n",preContext,postContext); | |
864 | return; | |
865 | } | |
866 | X[0] = input[i][0]; | |
867 | Y[0] = input[i][1]; | |
868 | doTest(c1,X,Y,results1098[i]); | |
869 | ucol_close(c1); | |
870 | } | |
871 | } | |
872 | ||
873 | ||
874 | #endif /* #if !UCONFIG_NO_COLLATION */ |