]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/callcoll.c
ICU-66108.tar.gz
[apple/icu.git] / icuSources / test / cintltst / callcoll.c
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/********************************************************************
4 * COPYRIGHT:
57a6839d 5 * Copyright (c) 1997-2014, International Business Machines Corporation and
b75a7d8f
A
6 * others. All Rights Reserved.
7 ********************************************************************/
46f4442e 8/*******************************************************************************
b75a7d8f
A
9*
10* File CALLCOLL.C
11*
12* Modification History:
13* Name Description
14* Madhu Katragadda Ported for C API
46f4442e 15********************************************************************************
b75a7d8f
A
16*/
17
18/*
19 * Important: This file is included into intltest/allcoll.cpp so that the
20 * test data is shared. This makes it easier to maintain the test data,
21 * especially since the Unicode data must be portable and quoted character
22 * literals will not work.
23 * If it is included, then there will be a #define INCLUDE_CALLCOLL_C
24 * that must prevent the actual code in here from being part of the
25 * allcoll.cpp compilation.
26 */
27
28/**
29 * CollationDummyTest is a third level test class. This tests creation of
30 * a customized collator object. For example, number 1 to be sorted
31 * equlivalent to word 'one'.
32 */
33
34#include <string.h>
35#include <stdlib.h>
36
37#include "unicode/utypes.h"
38
39#if !UCONFIG_NO_COLLATION
40
41#include "unicode/ucol.h"
42#include "unicode/uloc.h"
46f4442e
A
43#include "unicode/ures.h"
44#include "unicode/udata.h"
b75a7d8f
A
45#include "unicode/ucoleitr.h"
46#include "unicode/ustring.h"
374ca955
A
47#include "unicode/uclean.h"
48#include "unicode/putil.h"
46f4442e 49#include "unicode/uenum.h"
b75a7d8f
A
50
51#include "cintltst.h"
52#include "ccolltst.h"
53#include "callcoll.h"
54#include "calldata.h"
55#include "cstring.h"
56#include "cmemory.h"
b75a7d8f 57
46f4442e
A
58/* set to 1 to test offsets in backAndForth() */
59#define TEST_OFFSETS 0
60
b75a7d8f
A
61/* perform test with strength PRIMARY */
62static void TestPrimary(void);
63
64/* perform test with strength SECONDARY */
65static void TestSecondary(void);
66
67/* perform test with strength tertiary */
68static void TestTertiary(void);
69
70/*perform tests with strength Identical */
71static void TestIdentical(void);
72
73/* perform extra tests */
74static void TestExtra(void);
75
76/* Test jitterbug 581 */
77static void TestJB581(void);
78
79/* Test jitterbug 1401 */
80static void TestJB1401(void);
81
82/* Test [variable top] in the rule syntax */
83static void TestVariableTop(void);
84
85/* Test surrogates */
86static void TestSurrogates(void);
87
88static void TestInvalidRules(void);
89
90static void TestJitterbug1098(void);
91
374ca955
A
92static void TestFCDCrash(void);
93
46f4442e
A
94static void TestJ5298(void);
95
0f5d89e8
A
96static void TestBadKey(void);
97
b75a7d8f
A
98const UCollationResult results[] = {
99 UCOL_LESS,
100 UCOL_LESS, /*UCOL_GREATER,*/
101 UCOL_LESS,
102 UCOL_LESS,
103 UCOL_LESS,
104 UCOL_LESS,
105 UCOL_LESS,
106 UCOL_GREATER,
107 UCOL_GREATER,
108 UCOL_LESS, /* 10 */
109 UCOL_GREATER,
110 UCOL_LESS,
111 UCOL_GREATER,
112 UCOL_GREATER,
113 UCOL_LESS,
114 UCOL_LESS,
115 UCOL_LESS,
116 /* test primary > 17 */
117 UCOL_EQUAL,
118 UCOL_EQUAL,
119 UCOL_EQUAL, /* 20 */
120 UCOL_LESS,
121 UCOL_LESS,
122 UCOL_EQUAL,
123 UCOL_EQUAL,
124 UCOL_EQUAL,
125 UCOL_LESS,
126 /* test secondary > 26 */
127 UCOL_EQUAL,
128 UCOL_EQUAL,
129 UCOL_EQUAL,
130 UCOL_EQUAL,
131 UCOL_EQUAL, /* 30 */
132 UCOL_EQUAL,
133 UCOL_LESS,
134 UCOL_EQUAL, /* 34 */
135 UCOL_EQUAL,
136 UCOL_EQUAL,
137 UCOL_LESS /* 37 */
138};
139
140
73c04bcf
A
141static
142void uprv_appendByteToHexString(char *dst, uint8_t val) {
143 uint32_t len = (uint32_t)uprv_strlen(dst);
144 *(dst+len) = T_CString_itosOffset((val >> 4));
145 *(dst+len+1) = T_CString_itosOffset((val & 0xF));
146 *(dst+len+2) = 0;
147}
148
149/* this function makes a string with representation of a sortkey */
150static char* U_EXPORT2 sortKeyToString(const UCollator *coll, const uint8_t *sortkey, char *buffer, uint32_t *len) {
46f4442e
A
151 int32_t strength = UCOL_PRIMARY;
152 uint32_t res_size = 0;
153 UBool doneCase = FALSE;
729e4ab9 154 UErrorCode errorCode = U_ZERO_ERROR;
73c04bcf 155
46f4442e
A
156 char *current = buffer;
157 const uint8_t *currentSk = sortkey;
73c04bcf 158
46f4442e 159 uprv_strcpy(current, "[");
73c04bcf 160
729e4ab9 161 while(strength <= UCOL_QUATERNARY && strength <= ucol_getStrength(coll)) {
46f4442e
A
162 if(strength > UCOL_PRIMARY) {
163 uprv_strcat(current, " . ");
164 }
165 while(*currentSk != 0x01 && *currentSk != 0x00) { /* print a level */
166 uprv_appendByteToHexString(current, *currentSk++);
167 uprv_strcat(current, " ");
168 }
729e4ab9 169 if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, &errorCode) == UCOL_ON && strength == UCOL_SECONDARY && doneCase == FALSE) {
46f4442e 170 doneCase = TRUE;
729e4ab9 171 } else if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, &errorCode) == UCOL_OFF || doneCase == TRUE || strength != UCOL_SECONDARY) {
46f4442e
A
172 strength ++;
173 }
174 if (*currentSk) {
175 uprv_appendByteToHexString(current, *currentSk++); /* This should print '01' */
176 }
729e4ab9 177 if(strength == UCOL_QUATERNARY && ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &errorCode) == UCOL_NON_IGNORABLE) {
46f4442e
A
178 break;
179 }
73c04bcf 180 }
73c04bcf 181
729e4ab9 182 if(ucol_getStrength(coll) == UCOL_IDENTICAL) {
46f4442e
A
183 uprv_strcat(current, " . ");
184 while(*currentSk != 0) {
185 uprv_appendByteToHexString(current, *currentSk++);
186 uprv_strcat(current, " ");
187 }
73c04bcf 188
46f4442e
A
189 uprv_appendByteToHexString(current, *currentSk++);
190 }
191 uprv_strcat(current, "]");
73c04bcf 192
46f4442e
A
193 if(res_size > *len) {
194 return NULL;
195 }
73c04bcf 196
46f4442e 197 return buffer;
73c04bcf
A
198}
199
b75a7d8f
A
200void addAllCollTest(TestNode** root)
201{
b75a7d8f
A
202 addTest(root, &TestPrimary, "tscoll/callcoll/TestPrimary");
203 addTest(root, &TestSecondary, "tscoll/callcoll/TestSecondary");
204 addTest(root, &TestTertiary, "tscoll/callcoll/TestTertiary");
205 addTest(root, &TestIdentical, "tscoll/callcoll/TestIdentical");
206 addTest(root, &TestExtra, "tscoll/callcoll/TestExtra");
46f4442e
A
207 addTest(root, &TestJB581, "tscoll/callcoll/TestJB581");
208 addTest(root, &TestVariableTop, "tscoll/callcoll/TestVariableTop");
b75a7d8f
A
209 addTest(root, &TestSurrogates, "tscoll/callcoll/TestSurrogates");
210 addTest(root, &TestInvalidRules, "tscoll/callcoll/TestInvalidRules");
46f4442e
A
211 addTest(root, &TestJB1401, "tscoll/callcoll/TestJB1401");
212 addTest(root, &TestJitterbug1098, "tscoll/callcoll/TestJitterbug1098");
374ca955 213 addTest(root, &TestFCDCrash, "tscoll/callcoll/TestFCDCrash");
46f4442e 214 addTest(root, &TestJ5298, "tscoll/callcoll/TestJ5298");
0f5d89e8 215 addTest(root, &TestBadKey, "tscoll/callcoll/TestBadKey");
46f4442e 216}
b75a7d8f 217
374ca955
A
218UBool hasCollationElements(const char *locName) {
219
220 UErrorCode status = U_ZERO_ERROR;
374ca955 221
340931cb 222 UResourceBundle *loc = ures_open(U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "coll", locName, &status);
374ca955
A
223
224 if(U_SUCCESS(status)) {
225 status = U_ZERO_ERROR;
46f4442e
A
226 loc = ures_getByKey(loc, "collations", loc, &status);
227 ures_close(loc);
374ca955 228 if(status == U_ZERO_ERROR) { /* do the test - there are real elements */
374ca955
A
229 return TRUE;
230 }
374ca955
A
231 }
232 return FALSE;
233}
234
b75a7d8f
A
235static UCollationResult compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode *status) {
236 int32_t partialSKResult = 0;
237 UCharIterator sIter, tIter;
238 uint32_t sState[2], tState[2];
239 int32_t sSize = pieceSize, tSize = pieceSize;
374ca955 240 /*int32_t i = 0;*/
b75a7d8f
A
241 uint8_t sBuf[16384], tBuf[16384];
242 if(pieceSize > 16384) {
243 log_err("Partial sortkey size buffer too small. Please consider increasing the buffer!\n");
244 *status = U_BUFFER_OVERFLOW_ERROR;
245 return UCOL_EQUAL;
246 }
247 *status = U_ZERO_ERROR;
248 sState[0] = 0; sState[1] = 0;
249 tState[0] = 0; tState[1] = 0;
250 while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) {
251 uiter_setString(&sIter, source, sLen);
252 uiter_setString(&tIter, target, tLen);
253 sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, status);
254 tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, status);
255
256 if(sState[0] != 0 || tState[0] != 0) {
374ca955 257 /*log_verbose("State != 0 : %08X %08X\n", sState[0], tState[0]);*/
b75a7d8f 258 }
374ca955 259 /*log_verbose("%i ", i++);*/
b75a7d8f
A
260
261 partialSKResult = memcmp(sBuf, tBuf, pieceSize);
262 }
263
264 if(partialSKResult < 0) {
265 return UCOL_LESS;
266 } else if(partialSKResult > 0) {
267 return UCOL_GREATER;
268 } else {
269 return UCOL_EQUAL;
270 }
271}
272
273static void doTestVariant(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result)
274{
275 int32_t sortklen1, sortklen2, sortklenmax, sortklenmin;
276 int temp=0, gSortklen1=0,gSortklen2=0;
277 UCollationResult compareResult, compareResulta, keyResult, compareResultIter = result;
278 uint8_t *sortKey1, *sortKey2, *sortKey1a, *sortKey2a;
279 uint32_t sLen = u_strlen(source);
280 uint32_t tLen = u_strlen(target);
281 char buffer[256];
282 uint32_t len;
283 UErrorCode status = U_ZERO_ERROR;
284 UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);
285
286 UCharIterator sIter, tIter;
51004dcb
A
287
288 compareResult = ucol_strcoll(myCollation, source, sLen, target, tLen);
289 if (compareResult != result) {
290 log_err("ucol_strcoll with explicit length returned wrong result (%i exp. %i): %s, %s\n",
291 compareResult, result, aescstrdup(source,-1), aescstrdup(target,-1));
292 }
293 compareResulta = ucol_strcoll(myCollation, source, -1, target, -1);
294 if (compareResulta != result) {
295 log_err("ucol_strcoll with null terminated strings returned wrong result (%i exp. %i): %s, %s\n",
296 compareResult, result, aescstrdup(source,-1), aescstrdup(target,-1));
297 }
298
b75a7d8f
A
299 uiter_setString(&sIter, source, sLen);
300 uiter_setString(&tIter, target, tLen);
301 compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
302 if(compareResultIter != result) {
729e4ab9 303 log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
b75a7d8f
A
304 }
305
51004dcb
A
306 /* convert the strings to UTF-8 and do try comparing with char iterator and ucol_strcollUTF8 */
307 {
308 char utf8Source[256], utf8Target[256];
309 int32_t utf8SourceLen = 0, utf8TargetLen = 0;
310
311 u_strToUTF8(utf8Source, 256, &utf8SourceLen, source, sLen, &status);
312 if(U_FAILURE(status)) { /* probably buffer is not big enough */
313 log_verbose("Src UTF-8 buffer too small! Will not compare!\n");
b75a7d8f 314 } else {
51004dcb
A
315 u_strToUTF8(utf8Target, 256, &utf8TargetLen, target, tLen, &status);
316 if(U_SUCCESS(status)) {
317 {
318 /* ucol_strcollUTF8 */
319 compareResulta = ucol_strcollUTF8(myCollation, utf8Source, utf8SourceLen, utf8Target, utf8TargetLen, &status);
320 if (U_FAILURE(status)) {
321 log_err("Error in ucol_strcollUTF8 with explicit length\n");
322 status = U_ZERO_ERROR;
323 } else if (compareResulta != result) {
324 log_err("ucol_strcollUTF8 with explicit length returned wrong result (%i exp. %i): %s, %s\n",
325 compareResulta, result, aescstrdup(source,-1), aescstrdup(target,-1));
326 }
327 compareResulta = ucol_strcollUTF8(myCollation, utf8Source, -1, utf8Target, -1, &status);
328 if (U_FAILURE(status)) {
329 log_err("Error in ucol_strcollUTF8 with null terminated strings\n");
330 status = U_ZERO_ERROR;
331 } else if (compareResulta != result) {
332 log_err("ucol_strcollUTF8 with null terminated strings returned wrong result (%i exp. %i): %s, %s\n",
333 compareResulta, result, aescstrdup(source,-1), aescstrdup(target,-1));
334 }
335 }
336
337 {
338 /* char iterator over UTF8 */
339 UCollationResult compareResultUTF8Iter = result, compareResultUTF8IterNorm = result;
340
341 uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
342 uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
343 compareResultUTF8Iter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
344
345 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
346 sIter.move(&sIter, 0, UITER_START);
347 tIter.move(&tIter, 0, UITER_START);
348 compareResultUTF8IterNorm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
349
350 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
351 if(compareResultUTF8Iter != compareResultIter) {
352 log_err("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
353 }
354 if(compareResultUTF8Iter != compareResultUTF8IterNorm) {
355 log_err("different results in iterative when normalization is turned on with UTF-8 strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
356 }
357 }
358 } else {
359 log_verbose("Target UTF-8 buffer too small! Did not compare!\n");
360 }
361 if(U_FAILURE(status)) {
362 log_verbose("UTF-8 strcoll failed! Ignoring result\n");
363 }
b75a7d8f 364 }
b75a7d8f
A
365 }
366
367 /* testing the partial sortkeys */
368 if(1) { /*!QUICK*/
369 int32_t i = 0;
370 int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
371 int32_t partialSizesSize = 1;
729e4ab9 372 if(getTestOption(QUICK_OPTION) <= 0) {
b75a7d8f
A
373 partialSizesSize = 7;
374 }
374ca955 375 /*log_verbose("partial sortkey test piecesize=");*/
b75a7d8f
A
376 for(i = 0; i < partialSizesSize; i++) {
377 UCollationResult partialSKResult = result, partialNormalizedSKResult = result;
374ca955 378 /*log_verbose("%i ", partialSizes[i]);*/
b75a7d8f
A
379
380 partialSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status);
381 if(partialSKResult != result) {
73c04bcf
A
382 log_err("Partial sortkey comparison returned wrong result (%i exp. %i): %s, %s (size %i)\n",
383 partialSKResult, result,
b75a7d8f
A
384 aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]);
385 }
386
729e4ab9 387 if(getTestOption(QUICK_OPTION) <= 0 && norm != UCOL_ON) {
374ca955 388 /*log_verbose("N ");*/
b75a7d8f
A
389 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
390 partialNormalizedSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status);
391 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
392 if(partialSKResult != partialNormalizedSKResult) {
393 log_err("Partial sortkey comparison gets different result when normalization is on: %s, %s (size %i)\n",
394 aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]);
395 }
396 }
397 }
374ca955 398 /*log_verbose("\n");*/
b75a7d8f
A
399 }
400
b75a7d8f
A
401 sortklen1=ucol_getSortKey(myCollation, source, sLen, NULL, 0);
402 sortklen2=ucol_getSortKey(myCollation, target, tLen, NULL, 0);
403
404 sortklenmax = (sortklen1>sortklen2?sortklen1:sortklen2);
405 sortklenmin = (sortklen1<sortklen2?sortklen1:sortklen2);
57a6839d 406 (void)sortklenmin; /* Suppress set but not used warning. */
b75a7d8f
A
407
408 sortKey1 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
409 sortKey1a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
410 ucol_getSortKey(myCollation, source, sLen, sortKey1, sortklen1+1);
411 ucol_getSortKey(myCollation, source, -1, sortKey1a, sortklen1+1);
412
413 sortKey2 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
414 sortKey2a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
415 ucol_getSortKey(myCollation, target, tLen, sortKey2, sortklen2+1);
416 ucol_getSortKey(myCollation, target, -1, sortKey2a, sortklen2+1);
417
418 /* Check that sort key generated with null terminated string is identical */
3d1f044b 419 /* to that generated with a length specified. */
b75a7d8f
A
420 if (uprv_strcmp((const char *)sortKey1, (const char *)sortKey1a) != 0 ||
421 uprv_strcmp((const char *)sortKey2, (const char *)sortKey2a) != 0 ) {
422 log_err("Sort Keys from null terminated and explicit length strings differ.\n");
423 }
424
425 /*memcmp(sortKey1, sortKey2,sortklenmax);*/
426 temp= uprv_strcmp((const char *)sortKey1, (const char *)sortKey2);
3d1f044b
A
427 gSortklen1 = (int)uprv_strlen((const char *)sortKey1)+1;
428 gSortklen2 = (int)uprv_strlen((const char *)sortKey2)+1;
b75a7d8f
A
429 if(sortklen1 != gSortklen1){
430 log_err("SortKey length does not match Expected: %i Got: %i\n",sortklen1, gSortklen1);
73c04bcf 431 log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey1, buffer, &len));
b75a7d8f
A
432 }
433 if(sortklen2!= gSortklen2){
434 log_err("SortKey length does not match Expected: %i Got: %i\n", sortklen2, gSortklen2);
73c04bcf 435 log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey2, buffer, &len));
b75a7d8f
A
436 }
437
438 if(temp < 0) {
439 keyResult=UCOL_LESS;
440 }
441 else if(temp > 0) {
442 keyResult= UCOL_GREATER;
443 }
444 else {
445 keyResult = UCOL_EQUAL;
446 }
447 reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, compareResultIter, result );
448 free(sortKey1);
449 free(sortKey2);
450 free(sortKey1a);
451 free(sortKey2a);
452
453}
454
455void doTest(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result)
456{
457 if(myCollation) {
458 doTestVariant(myCollation, source, target, result);
459 if(result == UCOL_LESS) {
460 doTestVariant(myCollation, target, source, UCOL_GREATER);
461 } else if(result == UCOL_GREATER) {
462 doTestVariant(myCollation, target, source, UCOL_LESS);
463 } else {
464 doTestVariant(myCollation, target, source, UCOL_EQUAL);
465 }
466 } else {
467 log_data_err("No collator! Any data around?\n");
468 }
469}
470
374ca955
A
471
472/**
473 * Return an integer array containing all of the collation orders
474 * returned by calls to next on the specified iterator
475 */
46f4442e 476OrderAndOffset* getOrders(UCollationElements *iter, int32_t *orderLength)
374ca955
A
477{
478 UErrorCode status;
479 int32_t order;
480 int32_t maxSize = 100;
481 int32_t size = 0;
46f4442e
A
482 int32_t offset = ucol_getOffset(iter);
483 OrderAndOffset *temp;
484 OrderAndOffset *orders =(OrderAndOffset *)malloc(sizeof(OrderAndOffset) * maxSize);
374ca955
A
485 status= U_ZERO_ERROR;
486
487
488 while ((order=ucol_next(iter, &status)) != UCOL_NULLORDER)
489 {
490 if (size == maxSize)
491 {
492 maxSize *= 2;
46f4442e 493 temp = (OrderAndOffset *)malloc(sizeof(OrderAndOffset) * maxSize);
374ca955 494
46f4442e 495 memcpy(temp, orders, size * sizeof(OrderAndOffset));
374ca955
A
496 free(orders);
497 orders = temp;
498
499 }
500
46f4442e
A
501 orders[size].order = order;
502 orders[size].offset = offset;
503
504 offset = ucol_getOffset(iter);
505 size += 1;
374ca955
A
506 }
507
508 if (maxSize > size && size > 0)
509 {
46f4442e 510 temp = (OrderAndOffset *)malloc(sizeof(OrderAndOffset) * size);
374ca955 511
46f4442e 512 memcpy(temp, orders, size * sizeof(OrderAndOffset));
374ca955
A
513 free(orders);
514 orders = temp;
515
516
517 }
518
519 *orderLength = size;
520 return orders;
521}
522
523
524void
525backAndForth(UCollationElements *iter)
526{
527 /* Run through the iterator forwards and stick it into an array */
51004dcb 528 int32_t idx, o;
374ca955
A
529 UErrorCode status = U_ZERO_ERROR;
530 int32_t orderLength = 0;
46f4442e 531 OrderAndOffset *orders = getOrders(iter, &orderLength);
374ca955
A
532
533
534 /* Now go through it backwards and make sure we get the same values */
51004dcb 535 idx = orderLength;
374ca955
A
536 ucol_reset(iter);
537
538 /* synwee : changed */
46f4442e 539 while ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER) {
729e4ab9
A
540#if TEST_OFFSETS
541 int32_t offset =
542#endif
543 ucol_getOffset(iter);
46f4442e 544
51004dcb
A
545 idx -= 1;
546 if (o != orders[idx].order) {
374ca955 547 if (o == 0)
51004dcb 548 idx ++;
46f4442e 549 else {
51004dcb 550 while (idx > 0 && orders[-- idx].order == 0) {
46f4442e 551 /* nothing... */
374ca955 552 }
374ca955 553
51004dcb
A
554 if (o != orders[idx].order) {
555 log_err("Mismatched order at index %d: 0x%8.8X vs. 0x%8.8X\n", idx,
556 orders[idx].order, o);
46f4442e
A
557 goto bail;
558 }
374ca955
A
559 }
560 }
46f4442e
A
561
562#if TEST_OFFSETS
51004dcb
A
563 if (offset != orders[idx].offset) {
564 log_err("Mismatched offset at index %d: %d vs. %d\n", idx,
565 orders[idx].offset, offset);
46f4442e
A
566 goto bail;
567 }
568#endif
569
374ca955
A
570 }
571
51004dcb
A
572 while (idx != 0 && orders[idx - 1].order == 0) {
573 idx -= 1;
374ca955
A
574 }
575
51004dcb
A
576 if (idx != 0) {
577 log_err("Didn't get back to beginning - index is %d\n", idx);
374ca955
A
578
579 ucol_reset(iter);
580 log_err("\nnext: ");
46f4442e
A
581
582 if ((o = ucol_next(iter, &status)) != UCOL_NULLORDER) {
374ca955
A
583 log_err("Error at %x\n", o);
584 }
46f4442e 585
374ca955 586 log_err("\nprev: ");
46f4442e
A
587
588 if ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER) {
374ca955
A
589 log_err("Error at %x\n", o);
590 }
46f4442e 591
374ca955
A
592 log_verbose("\n");
593 }
594
46f4442e 595bail:
374ca955
A
596 free(orders);
597}
598
46f4442e 599void genericOrderingTestWithResult(UCollator *coll, const char * const s[], uint32_t size, UCollationResult result) {
374ca955
A
600 UChar t1[2048] = {0};
601 UChar t2[2048] = {0};
602 UCollationElements *iter;
603 UErrorCode status = U_ZERO_ERROR;
604
605 uint32_t i = 0, j = 0;
606 log_verbose("testing sequence:\n");
607 for(i = 0; i < size; i++) {
608 log_verbose("%s\n", s[i]);
609 }
610
611 iter = ucol_openElements(coll, t1, u_strlen(t1), &status);
612 if (U_FAILURE(status)) {
613 log_err("Creation of iterator failed\n");
614 }
615 for(i = 0; i < size-1; i++) {
616 for(j = i+1; j < size; j++) {
617 u_unescape(s[i], t1, 2048);
618 u_unescape(s[j], t2, 2048);
619 doTest(coll, t1, t2, result);
620 /* synwee : added collation element iterator test */
621 ucol_setText(iter, t1, u_strlen(t1), &status);
622 backAndForth(iter);
623 ucol_setText(iter, t2, u_strlen(t2), &status);
624 backAndForth(iter);
625 }
626 }
627 ucol_closeElements(iter);
628}
629
46f4442e 630void genericOrderingTest(UCollator *coll, const char * const s[], uint32_t size) {
374ca955
A
631 genericOrderingTestWithResult(coll, s, size, UCOL_LESS);
632}
633
46f4442e 634void genericLocaleStarter(const char *locale, const char * const s[], uint32_t size) {
374ca955
A
635 UErrorCode status = U_ZERO_ERROR;
636 UCollator *coll = ucol_open(locale, &status);
637
638 log_verbose("Locale starter for %s\n", locale);
639
640 if(U_SUCCESS(status)) {
641 genericOrderingTest(coll, s, size);
642 } else if(status == U_FILE_ACCESS_ERROR) {
643 log_data_err("Is your data around?\n");
644 return;
645 } else {
646 log_err("Unable to open collator for locale %s\n", locale);
647 }
648 ucol_close(coll);
649}
650
46f4442e 651void genericLocaleStarterWithResult(const char *locale, const char * const s[], uint32_t size, UCollationResult result) {
374ca955
A
652 UErrorCode status = U_ZERO_ERROR;
653 UCollator *coll = ucol_open(locale, &status);
654
655 log_verbose("Locale starter for %s\n", locale);
656
657 if(U_SUCCESS(status)) {
658 genericOrderingTestWithResult(coll, s, size, result);
659 } else if(status == U_FILE_ACCESS_ERROR) {
660 log_data_err("Is your data around?\n");
661 return;
662 } else {
663 log_err("Unable to open collator for locale %s\n", locale);
664 }
665 ucol_close(coll);
666}
667
374ca955 668/* currently not used with options */
46f4442e 669void genericRulesStarterWithOptionsAndResult(const char *rules, const char * const s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize, UCollationResult result) {
374ca955
A
670 UErrorCode status = U_ZERO_ERROR;
671 UChar rlz[RULE_BUFFER_LEN] = { 0 };
672 uint32_t rlen = u_unescape(rules, rlz, RULE_BUFFER_LEN);
673 uint32_t i;
674
675 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
676
677 log_verbose("Rules starter for %s\n", rules);
678
679 if(U_SUCCESS(status)) {
680 log_verbose("Setting attributes\n");
681 for(i = 0; i < attsize; i++) {
682 ucol_setAttribute(coll, attrs[i], values[i], &status);
683 }
684
73c04bcf 685 genericOrderingTestWithResult(coll, s, size, result);
374ca955 686 } else {
729e4ab9 687 log_err_status(status, "Unable to open collator with rules %s\n", rules);
374ca955
A
688 }
689 ucol_close(coll);
690}
374ca955 691
46f4442e 692void genericLocaleStarterWithOptionsAndResult(const char *locale, const char * const s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize, UCollationResult result) {
374ca955
A
693 UErrorCode status = U_ZERO_ERROR;
694 uint32_t i;
695
696 UCollator *coll = ucol_open(locale, &status);
697
698 log_verbose("Locale starter for %s\n", locale);
699
700 if(U_SUCCESS(status)) {
701
702 log_verbose("Setting attributes\n");
703 for(i = 0; i < attsize; i++) {
704 ucol_setAttribute(coll, attrs[i], values[i], &status);
705 }
706
73c04bcf 707 genericOrderingTestWithResult(coll, s, size, result);
374ca955 708 } else {
729e4ab9 709 log_err_status(status, "Unable to open collator for locale %s\n", locale);
374ca955
A
710 }
711 ucol_close(coll);
712}
713
46f4442e 714void genericLocaleStarterWithOptions(const char *locale, const char * const s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize) {
73c04bcf
A
715 genericLocaleStarterWithOptionsAndResult(locale, s, size, attrs, values, attsize, UCOL_LESS);
716}
717
46f4442e 718void genericRulesStarterWithResult(const char *rules, const char * const s[], uint32_t size, UCollationResult result) {
374ca955
A
719 UErrorCode status = U_ZERO_ERROR;
720 UChar rlz[RULE_BUFFER_LEN] = { 0 };
721 uint32_t rlen = u_unescape(rules, rlz, RULE_BUFFER_LEN);
722
723 UCollator *coll = NULL;
724 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
725 log_verbose("Rules starter for %s\n", rules);
726
727 if(U_SUCCESS(status)) {
728 genericOrderingTestWithResult(coll, s, size, result);
729 ucol_close(coll);
730 } else if(status == U_FILE_ACCESS_ERROR) {
731 log_data_err("Is your data around?\n");
732 } else {
733 log_err("Unable to open collator with rules %s\n", rules);
734 }
735}
736
46f4442e 737void genericRulesStarter(const char *rules, const char * const s[], uint32_t size) {
73c04bcf 738 genericRulesStarterWithResult(rules, s, size, UCOL_LESS);
374ca955
A
739}
740
b75a7d8f
A
741static void TestTertiary()
742{
743 int32_t len,i;
b75a7d8f
A
744 UCollator *myCollation;
745 UErrorCode status=U_ZERO_ERROR;
46f4442e
A
746 static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
747 UChar rules[sizeof(str)];
3d1f044b 748 len = (int32_t)strlen(str);
b75a7d8f
A
749 u_uastrcpy(rules, str);
750
751 myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
752 if(U_FAILURE(status)){
729e4ab9 753 log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
46f4442e 754 return;
b75a7d8f
A
755 }
756
757 ucol_setStrength(myCollation, UCOL_TERTIARY);
758 for (i = 0; i < 17 ; i++)
759 {
760 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
761 }
b75a7d8f
A
762 ucol_close(myCollation);
763 myCollation = 0;
764}
765
766static void TestPrimary( )
767{
768 int32_t len,i;
b75a7d8f
A
769 UCollator *myCollation;
770 UErrorCode status=U_ZERO_ERROR;
46f4442e
A
771 static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
772 UChar rules[sizeof(str)];
3d1f044b 773 len = (int32_t)strlen(str);
b75a7d8f
A
774 u_uastrcpy(rules, str);
775
776 myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
777 if(U_FAILURE(status)){
729e4ab9 778 log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
46f4442e 779 return;
b75a7d8f
A
780 }
781 ucol_setStrength(myCollation, UCOL_PRIMARY);
782
783 for (i = 17; i < 26 ; i++)
784 {
785
786 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
787 }
b75a7d8f
A
788 ucol_close(myCollation);
789 myCollation = 0;
790}
791
792static void TestSecondary()
793{
794 int32_t i;
795 int32_t len;
b75a7d8f
A
796 UCollator *myCollation;
797 UErrorCode status=U_ZERO_ERROR;
46f4442e
A
798 static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
799 UChar rules[sizeof(str)];
3d1f044b 800 len = (int32_t)strlen(str);
b75a7d8f
A
801 u_uastrcpy(rules, str);
802
803 myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
804 if(U_FAILURE(status)){
729e4ab9 805 log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
46f4442e 806 return;
b75a7d8f
A
807 }
808 ucol_setStrength(myCollation, UCOL_SECONDARY);
809 for (i = 26; i < 34 ; i++)
810 {
811 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
812 }
b75a7d8f
A
813 ucol_close(myCollation);
814 myCollation = 0;
815}
816
817static void TestIdentical()
818{
819 int32_t i;
820 int32_t len;
b75a7d8f
A
821 UCollator *myCollation;
822 UErrorCode status=U_ZERO_ERROR;
46f4442e
A
823 static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
824 UChar rules[sizeof(str)];
3d1f044b 825 len = (int32_t)strlen(str);
b75a7d8f
A
826 u_uastrcpy(rules, str);
827
828 myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_IDENTICAL, NULL,&status);
829 if(U_FAILURE(status)){
729e4ab9 830 log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
46f4442e 831 return;
b75a7d8f
A
832 }
833 for(i= 34; i<37; i++)
834 {
835 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
836 }
b75a7d8f
A
837 ucol_close(myCollation);
838 myCollation = 0;
839}
840
841static void TestExtra()
842{
843 int32_t i, j;
844 int32_t len;
b75a7d8f
A
845 UCollator *myCollation;
846 UErrorCode status = U_ZERO_ERROR;
46f4442e
A
847 static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
848 UChar rules[sizeof(str)];
3d1f044b 849 len = (int32_t)strlen(str);
b75a7d8f
A
850 u_uastrcpy(rules, str);
851
852 myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
853 if(U_FAILURE(status)){
729e4ab9 854 log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
46f4442e 855 return;
b75a7d8f
A
856 }
857 ucol_setStrength(myCollation, UCOL_TERTIARY);
858 for (i = 0; i < COUNT_TEST_CASES-1 ; i++)
859 {
860 for (j = i + 1; j < COUNT_TEST_CASES; j += 1)
861 {
862
863 doTest(myCollation, testCases[i], testCases[j], UCOL_LESS);
864 }
865 }
b75a7d8f
A
866 ucol_close(myCollation);
867 myCollation = 0;
868}
869
870static void TestJB581(void)
871{
b75a7d8f
A
872 int32_t bufferLen = 0;
873 UChar source [100];
874 UChar target [100];
875 UCollationResult result = UCOL_EQUAL;
876 uint8_t sourceKeyArray [100];
877 uint8_t targetKeyArray [100];
878 int32_t sourceKeyOut = 0,
879 targetKeyOut = 0;
880 UCollator *myCollator = 0;
881 UErrorCode status = U_ZERO_ERROR;
882
883 /*u_uastrcpy(source, "This is a test.");*/
884 /*u_uastrcpy(target, "THISISATEST.");*/
885 u_uastrcpy(source, "THISISATEST.");
886 u_uastrcpy(target, "Thisisatest.");
887
888 myCollator = ucol_open("en_US", &status);
889 if (U_FAILURE(status)){
729e4ab9 890 log_err_status(status, "ERROR: Failed to create the collator : %s\n", u_errorName(status));
b75a7d8f
A
891 return;
892 }
893 result = ucol_strcoll(myCollator, source, -1, target, -1);
894 /* result is 1, secondary differences only for ignorable space characters*/
895 if (result != 1)
896 {
897 log_err("Comparing two strings with only secondary differences in C failed.\n");
898 }
899 /* To compare them with just primary differences */
900 ucol_setStrength(myCollator, UCOL_PRIMARY);
901 result = ucol_strcoll(myCollator, source, -1, target, -1);
902 /* result is 0 */
903 if (result != 0)
904 {
905 log_err("Comparing two strings with no differences in C failed.\n");
906 }
907 /* Now, do the same comparison with keys */
908 sourceKeyOut = ucol_getSortKey(myCollator, source, -1, sourceKeyArray, 100);
57a6839d 909 (void)sourceKeyOut; /* Suppress set but not used warning. */
b75a7d8f 910 targetKeyOut = ucol_getSortKey(myCollator, target, -1, targetKeyArray, 100);
b75a7d8f 911 bufferLen = ((targetKeyOut > 100) ? 100 : targetKeyOut);
73c04bcf 912 if (memcmp(sourceKeyArray, targetKeyArray, bufferLen) != 0)
b75a7d8f
A
913 {
914 log_err("Comparing two strings with sort keys in C failed.\n");
915 }
916 ucol_close(myCollator);
917}
918
919static void TestJB1401(void)
920{
921 UCollator *myCollator = 0;
922 UErrorCode status = U_ZERO_ERROR;
923 static UChar NFD_UnsafeStartChars[] = {
924 0x0f73, /* Tibetan Vowel Sign II */
925 0x0f75, /* Tibetan Vowel Sign UU */
926 0x0f81, /* Tibetan Vowel Sign Reversed II */
927 0
928 };
929 int i;
930
931
932 myCollator = ucol_open("en_US", &status);
933 if (U_FAILURE(status)){
729e4ab9 934 log_err_status(status, "ERROR: Failed to create the collator : %s\n", u_errorName(status));
b75a7d8f
A
935 return;
936 }
937 ucol_setAttribute(myCollator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
938 if (U_FAILURE(status)){
939 log_err("ERROR: Failed to set normalization mode ON for collator.\n");
940 return;
941 }
942
943 for (i=0; ; i++) {
944 UChar c;
945 UChar X[4];
946 UChar Y[20];
947 UChar Z[20];
948
949 /* Get the next funny character to be tested, and set up the
950 * three test strings X, Y, Z, consisting of an A-grave + test char,
951 * in original form, NFD, and then NFC form.
952 */
953 c = NFD_UnsafeStartChars[i];
954 if (c==0) {break;}
955
956 X[0]=0xC0; X[1]=c; X[2]=0; /* \u00C0 is A Grave*/
957
958 unorm_normalize(X, -1, UNORM_NFD, 0, Y, 20, &status);
959 unorm_normalize(Y, -1, UNORM_NFC, 0, Z, 20, &status);
960 if (U_FAILURE(status)){
961 log_err("ERROR: Failed to normalize test of character %x\n", c);
962 return;
963 }
964
965 /* Collation test. All three strings should be equal.
966 * doTest does both strcoll and sort keys, with params in both orders.
967 */
968 doTest(myCollator, X, Y, UCOL_EQUAL);
969 doTest(myCollator, X, Z, UCOL_EQUAL);
970 doTest(myCollator, Y, Z, UCOL_EQUAL);
971
972 /* Run collation element iterators over the three strings. Results should be same for each.
973 */
974 {
975 UCollationElements *ceiX, *ceiY, *ceiZ;
976 int32_t ceX, ceY, ceZ;
977 int j;
978
979 ceiX = ucol_openElements(myCollator, X, -1, &status);
980 ceiY = ucol_openElements(myCollator, Y, -1, &status);
981 ceiZ = ucol_openElements(myCollator, Z, -1, &status);
982 if (U_FAILURE(status)) {
983 log_err("ERROR: uucol_openElements failed.\n");
984 return;
985 }
986
987 for (j=0;; j++) {
988 ceX = ucol_next(ceiX, &status);
989 ceY = ucol_next(ceiY, &status);
990 ceZ = ucol_next(ceiZ, &status);
991 if (U_FAILURE(status)) {
992 log_err("ERROR: ucol_next failed for iteration #%d.\n", j);
993 break;
994 }
995 if (ceX != ceY || ceY != ceZ) {
996 log_err("ERROR: ucol_next failed for iteration #%d.\n", j);
997 break;
998 }
999 if (ceX == UCOL_NULLORDER) {
1000 break;
1001 }
1002 }
1003 ucol_closeElements(ceiX);
1004 ucol_closeElements(ceiY);
1005 ucol_closeElements(ceiZ);
1006 }
1007 }
1008 ucol_close(myCollator);
1009}
1010
1011
1012
1013/**
1014* Tests the [variable top] tag in rule syntax. Since the default [alternate]
1015* tag has the value shifted, any codepoints before [variable top] should give
1016* a primary ce of 0.
1017*/
1018static void TestVariableTop(void)
1019{
57a6839d
A
1020#if 0
1021 /*
1022 * Starting with ICU 53, setting the variable top via a pseudo relation string
1023 * is not supported any more.
1024 * It was replaced by the [maxVariable symbol] setting.
1025 * See ICU tickets #9958 and #8032.
1026 */
46f4442e 1027 static const char str[] = "&z = [variable top]";
b75a7d8f 1028 int len = strlen(str);
46f4442e 1029 UChar rules[sizeof(str)];
b75a7d8f
A
1030 UCollator *myCollation;
1031 UCollator *enCollation;
1032 UErrorCode status = U_ZERO_ERROR;
1033 UChar source[1];
1034 UChar ch;
1035 uint8_t result[20];
1036 uint8_t expected[20];
1037
b75a7d8f
A
1038 u_uastrcpy(rules, str);
1039
1040 enCollation = ucol_open("en_US", &status);
46f4442e 1041 if (U_FAILURE(status)) {
729e4ab9 1042 log_err_status(status, "ERROR: in creation of collator :%s\n",
46f4442e
A
1043 myErrorName(status));
1044 return;
1045 }
b75a7d8f
A
1046 myCollation = ucol_openRules(rules, len, UCOL_OFF,
1047 UCOL_PRIMARY,NULL, &status);
1048 if (U_FAILURE(status)) {
46f4442e 1049 ucol_close(enCollation);
b75a7d8f
A
1050 log_err("ERROR: in creation of rule based collator :%s\n",
1051 myErrorName(status));
1052 return;
1053 }
1054
1055 ucol_setStrength(enCollation, UCOL_PRIMARY);
1056 ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
1057 &status);
1058 ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
1059 &status);
1060
1061 if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) !=
1062 UCOL_SHIFTED || U_FAILURE(status)) {
1063 log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n");
1064 }
1065
1066 uprv_memset(expected, 0, 20);
1067
1068 /* space is supposed to be a variable */
1069 source[0] = ' ';
1070 len = ucol_getSortKey(enCollation, source, 1, result,
1071 sizeof(result));
1072
1073 if (uprv_memcmp(expected, result, len) != 0) {
1074 log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n");
1075 }
1076
1077 ch = 'a';
1078 while (ch < 'z') {
1079 source[0] = ch;
1080 len = ucol_getSortKey(myCollation, source, 1, result,
1081 sizeof(result));
1082 if (uprv_memcmp(expected, result, len) != 0) {
1083 log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n",
1084 ch);
1085 }
1086 ch ++;
1087 }
1088
b75a7d8f
A
1089 ucol_close(enCollation);
1090 ucol_close(myCollation);
1091 enCollation = NULL;
1092 myCollation = NULL;
57a6839d 1093#endif
b75a7d8f
A
1094}
1095
1096/**
1097 * Tests surrogate support.
1098 * NOTE: This test used \\uD801\\uDC01 pair, which is now assigned to Desseret
1099 * Therefore, another (unassigned) code point was used for this test.
1100 */
1101static void TestSurrogates(void)
1102{
46f4442e 1103 static const char str[] =
b75a7d8f 1104 "&z<'\\uD800\\uDC00'<'\\uD800\\uDC0A\\u0308'<A";
3d1f044b 1105 int len = (int)strlen(str);
b75a7d8f 1106 int rlen = 0;
46f4442e 1107 UChar rules[sizeof(str)];
b75a7d8f
A
1108 UCollator *myCollation;
1109 UCollator *enCollation;
1110 UErrorCode status = U_ZERO_ERROR;
1111 UChar source[][4] =
1112 {{'z', 0, 0}, {0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {0xD800, 0xDC02}};
1113 UChar target[][4] =
1114 {{0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {'A', 0, 0}, {0xD800, 0xDC03}};
1115 int count = 0;
1116 uint8_t enresult[20], myresult[20];
1117 int enlen, mylen;
1118
1119 /* tests for open rules with surrogate rules */
b75a7d8f
A
1120 rlen = u_unescape(str, rules, len);
1121
1122 enCollation = ucol_open("en_US", &status);
46f4442e 1123 if (U_FAILURE(status)) {
729e4ab9 1124 log_err_status(status, "ERROR: in creation of collator :%s\n",
46f4442e
A
1125 myErrorName(status));
1126 return;
1127 }
b75a7d8f
A
1128 myCollation = ucol_openRules(rules, rlen, UCOL_OFF,
1129 UCOL_TERTIARY,NULL, &status);
1130 if (U_FAILURE(status)) {
46f4442e 1131 ucol_close(enCollation);
b75a7d8f
A
1132 log_err("ERROR: in creation of rule based collator :%s\n",
1133 myErrorName(status));
1134 return;
1135 }
1136
1137 /*
1138 this test is to verify the supplementary sort key order in the english
1139 collator
1140 */
1141 log_verbose("start of english collation supplementary characters test\n");
1142 while (count < 2) {
1143 doTest(enCollation, source[count], target[count], UCOL_LESS);
1144 count ++;
1145 }
1146 doTest(enCollation, source[count], target[count], UCOL_GREATER);
1147
1148 log_verbose("start of tailored collation supplementary characters test\n");
1149 count = 0;
1150 /* tests getting collation elements for surrogates for tailored rules */
1151 while (count < 4) {
1152 doTest(myCollation, source[count], target[count], UCOL_LESS);
1153 count ++;
1154 }
1155
1156 /* tests that \uD800\uDC02 still has the same value, not changed */
1157 enlen = ucol_getSortKey(enCollation, source[3], 2, enresult, 20);
1158 mylen = ucol_getSortKey(myCollation, source[3], 2, myresult, 20);
1159 if (enlen != mylen ||
1160 uprv_memcmp(enresult, myresult, enlen) != 0) {
1161 log_verbose("Failed : non-tailored supplementary characters should have the same value\n");
1162 }
1163
b75a7d8f
A
1164 ucol_close(enCollation);
1165 ucol_close(myCollation);
1166 enCollation = NULL;
1167 myCollation = NULL;
1168}
1169
1170/*
1171 *### TODO: Add more invalid rules to test all different scenarios.
1172 *
1173 */
1174static void
1175TestInvalidRules(){
1176#define MAX_ERROR_STATES 2
1177
1178 static const char* rulesArr[MAX_ERROR_STATES] = {
1179 "& C < ch, cH, Ch[this should fail]<d",
1180 "& C < ch, cH, & Ch[variable top]"
1181 };
1182 static const char* preContextArr[MAX_ERROR_STATES] = {
57a6839d
A
1183 " C < ch, cH, Ch",
1184 "& C < ch, cH",
b75a7d8f
A
1185
1186 };
1187 static const char* postContextArr[MAX_ERROR_STATES] = {
57a6839d
A
1188 "[this should fa",
1189 ", & Ch[variable"
b75a7d8f
A
1190 };
1191 int i;
1192
1193 for(i = 0;i<MAX_ERROR_STATES;i++){
1194 UChar rules[1000] = { '\0' };
1195 UChar preContextExp[1000] = { '\0' };
1196 UChar postContextExp[1000] = { '\0' };
1197 UParseError parseError;
1198 UErrorCode status = U_ZERO_ERROR;
1199 UCollator* coll=0;
3d1f044b
A
1200 u_charsToUChars(rulesArr[i], rules, (int32_t)uprv_strlen(rulesArr[i]) + 1);
1201 u_charsToUChars(preContextArr[i], preContextExp, (int32_t)uprv_strlen(preContextArr[i]) + 1);
1202 u_charsToUChars(postContextArr[i], postContextExp, (int32_t)uprv_strlen(postContextArr[i]) + 1);
b75a7d8f 1203 /* clean up stuff in parseError */
3d1f044b
A
1204 u_memset(parseError.preContext, 0x0000, U_PARSE_CONTEXT_LEN);
1205 u_memset(parseError.postContext, 0x0000, U_PARSE_CONTEXT_LEN);
b75a7d8f 1206 /* open the rules and test */
3d1f044b 1207 coll = ucol_openRules(rules, u_strlen(rules), UCOL_OFF, UCOL_DEFAULT_STRENGTH, &parseError, &status);
57a6839d 1208 (void)coll; /* Suppress set but not used warning. */
b75a7d8f 1209 if(u_strcmp(parseError.preContext,preContextExp)!=0){
57a6839d
A
1210 log_err_status(status, "preContext in UParseError for ucol_openRules does not match: \"%s\"\n",
1211 aescstrdup(parseError.preContext, -1));
b75a7d8f
A
1212 }
1213 if(u_strcmp(parseError.postContext,postContextExp)!=0){
57a6839d
A
1214 log_err_status(status, "postContext in UParseError for ucol_openRules does not match: \"%s\"\n",
1215 aescstrdup(parseError.postContext, -1));
b75a7d8f
A
1216 }
1217 }
1218}
1219
1220static void
1221TestJitterbug1098(){
1222 UChar rule[1000];
1223 UCollator* c1 = NULL;
1224 UErrorCode status = U_ZERO_ERROR;
1225 UParseError parseError;
1226 char preContext[200]={0};
1227 char postContext[200]={0};
1228 int i=0;
1229 const char* rules[] = {
1230 "&''<\\\\",
1231 "&\\'<\\\\",
1232 "&\\\"<'\\'",
1233 "&'\"'<\\'",
57a6839d 1234 NULL
b75a7d8f
A
1235
1236 };
1237 const UCollationResult results1098[] = {
1238 UCOL_LESS,
1239 UCOL_LESS,
1240 UCOL_LESS,
1241 UCOL_LESS,
1242 };
1243 const UChar input[][2]= {
1244 {0x0027,0x005c},
1245 {0x0027,0x005c},
1246 {0x0022,0x005c},
1247 {0x0022,0x0027},
1248 };
1249 UChar X[2] ={0};
1250 UChar Y[2] ={0};
1251 u_memset(parseError.preContext,0x0000,U_PARSE_CONTEXT_LEN);
1252 u_memset(parseError.postContext,0x0000,U_PARSE_CONTEXT_LEN);
1253 for(;rules[i]!=0;i++){
1254 u_uastrcpy(rule, rules[i]);
1255 c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, &parseError, &status);
1256 if(U_FAILURE(status)){
729e4ab9 1257 log_err_status(status, "Could not parse the rules syntax. Error: %s\n", u_errorName(status));
374ca955
A
1258
1259 if (status == U_PARSE_ERROR) {
1260 u_UCharsToChars(parseError.preContext,preContext,20);
1261 u_UCharsToChars(parseError.postContext,postContext,20);
1262 log_verbose("\n\tPre-Context: %s \n\tPost-Context:%s \n",preContext,postContext);
1263 }
1264
b75a7d8f
A
1265 return;
1266 }
1267 X[0] = input[i][0];
1268 Y[0] = input[i][1];
1269 doTest(c1,X,Y,results1098[i]);
1270 ucol_close(c1);
1271 }
1272}
1273
374ca955
A
1274static void
1275TestFCDCrash(void) {
1276 static const char *test[] = {
1277 "Gr\\u00F6\\u00DFe",
1278 "Grossist"
1279 };
1280
374ca955
A
1281 UErrorCode status = U_ZERO_ERROR;
1282 UCollator *coll = ucol_open("es", &status);
1283 if(U_FAILURE(status)) {
729e4ab9 1284 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
374ca955
A
1285 return;
1286 }
1287 ucol_close(coll);
1288 coll = NULL;
46f4442e 1289 ctest_resetICU();
374ca955
A
1290 coll = ucol_open("de_DE", &status);
1291 if(U_FAILURE(status)) {
729e4ab9 1292 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
374ca955
A
1293 return;
1294 }
1295 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
1296 genericOrderingTest(coll, test, 2);
1297 ucol_close(coll);
374ca955 1298}
b75a7d8f 1299
46f4442e
A
1300/*static UBool
1301find(UEnumeration* list, const char* str, UErrorCode* status){
1302 const char* value = NULL;
1303 int32_t length=0;
1304 if(U_FAILURE(*status)){
1305 return FALSE;
1306 }
1307 uenum_reset(list, status);
1308 while( (value= uenum_next(list, &length, status))!=NULL){
1309 if(strcmp(value, str)==0){
1310 return TRUE;
1311 }
1312 }
1313 return FALSE;
1314}*/
1315
1316static void TestJ5298(void)
1317{
1318 UErrorCode status = U_ZERO_ERROR;
1319 char input[256], output[256];
1320 UBool isAvailable;
1321 int32_t i = 0;
1322 UEnumeration* values = NULL;
1323 const char *keywordValue = NULL;
1324 log_verbose("Number of collator locales returned : %i \n", ucol_countAvailable());
1325 values = ucol_getKeywordValues("collation", &status);
b331163b
A
1326 while ((keywordValue = uenum_next(values, NULL, &status)) != NULL) {
1327 if (strncmp(keywordValue, "private-", 8) == 0) {
1328 log_err("ucol_getKeywordValues() returns private collation keyword: %s\n", keywordValue);
1329 }
1330 }
46f4442e
A
1331 for (i = 0; i < ucol_countAvailable(); i++) {
1332 uenum_reset(values, &status);
1333 while ((keywordValue = uenum_next(values, NULL, &status)) != NULL) {
1334 strcpy(input, ucol_getAvailable(i));
1335 if (strcmp(keywordValue, "standard") != 0) {
1336 strcat(input, "@collation=");
1337 strcat(input, keywordValue);
1338 }
1339
1340 ucol_getFunctionalEquivalent(output, 256, "collation", input, &isAvailable, &status);
1341 if (strcmp(input, output) == 0) { /* Unique locale, print it out */
1342 log_verbose("%s, \n", output);
1343 }
1344 }
1345 }
1346 uenum_close(values);
1347 log_verbose("\n");
1348}
0f5d89e8
A
1349
1350static const char* badKeyLocales[] = {
3d1f044b
A
1351 "@calendar=japanese;collation=search", // ucol_open OK
1352 "@calendar=japanese", // ucol_open OK
1353 "en@calendar=x", // ucol_open OK
1354 "ja@calendar=x", // ucol_open OK
1355 "en@collation=x", // ucol_open OK
1356 "ja@collation=x", // ucol_open OK
1357 "en@calendar=\x81", // ucol_open OK
1358 "ja@collation=private-kana", // ucol_open fails, verify it does not crash
1359 "en@collation=\x80", // (x80 undef in ASCII,EBCDIC) ucol_open fails, verify it does not crash
1360 "ja@calendar=japanese;collation=\x81", // ucol_open fails, verify it does not crash
0f5d89e8
A
1361 NULL
1362};
1363
3d1f044b
A
1364// Mainly this is to check that we don't have a crash, but we check
1365// for correct NULL return and FAILURE/SUCCESS status as a bonus.
0f5d89e8
A
1366static void TestBadKey(void)
1367{
1368 const char* badLoc;
1369 const char** badLocsPtr = badKeyLocales;
1370 while ((badLoc = *badLocsPtr++) != NULL) {
1371 UErrorCode status = U_ZERO_ERROR;
1372 UCollator* uc = ucol_open(badLoc, &status);
1373 if ( U_SUCCESS(status) ) {
1374 if (uc == NULL) {
1375 log_err("ucol_open sets SUCCESS but returns NULL, locale: %s\n", badLoc);
1376 }
1377 ucol_close(uc);
1378 } else if (uc != NULL) {
1379 log_err("ucol_open sets FAILURE but returns non-NULL, locale: %s\n", badLoc);
1380 }
1381 }
1382}
b75a7d8f 1383#endif /* #if !UCONFIG_NO_COLLATION */