]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/callcoll.c
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / test / cintltst / callcoll.c
CommitLineData
b75a7d8f
A
1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2003, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/********************************************************************************
7*
8* File CALLCOLL.C
9*
10* Modification History:
11* Name Description
12* Madhu Katragadda Ported for C API
13*********************************************************************************
14*/
15
16/*
17 * Important: This file is included into intltest/allcoll.cpp so that the
18 * test data is shared. This makes it easier to maintain the test data,
19 * especially since the Unicode data must be portable and quoted character
20 * literals will not work.
21 * If it is included, then there will be a #define INCLUDE_CALLCOLL_C
22 * that must prevent the actual code in here from being part of the
23 * allcoll.cpp compilation.
24 */
25
26/**
27 * CollationDummyTest is a third level test class. This tests creation of
28 * a customized collator object. For example, number 1 to be sorted
29 * equlivalent to word 'one'.
30 */
31
32#include <string.h>
33#include <stdlib.h>
34
35#include "unicode/utypes.h"
36
37#if !UCONFIG_NO_COLLATION
38
39#include "unicode/ucol.h"
40#include "unicode/uloc.h"
41#include "unicode/ucoleitr.h"
42#include "unicode/ustring.h"
43
44#include "cintltst.h"
45#include "ccolltst.h"
46#include "callcoll.h"
47#include "calldata.h"
48#include "cstring.h"
49#include "cmemory.h"
50#include "ucol_imp.h"
51
52/* perform test with strength PRIMARY */
53static void TestPrimary(void);
54
55/* perform test with strength SECONDARY */
56static void TestSecondary(void);
57
58/* perform test with strength tertiary */
59static void TestTertiary(void);
60
61/*perform tests with strength Identical */
62static void TestIdentical(void);
63
64/* perform extra tests */
65static void TestExtra(void);
66
67/* Test jitterbug 581 */
68static void TestJB581(void);
69
70/* Test jitterbug 1401 */
71static void TestJB1401(void);
72
73/* Test [variable top] in the rule syntax */
74static void TestVariableTop(void);
75
76/* Test surrogates */
77static void TestSurrogates(void);
78
79static void TestInvalidRules(void);
80
81static void TestJitterbug1098(void);
82
83const UCollationResult results[] = {
84 UCOL_LESS,
85 UCOL_LESS, /*UCOL_GREATER,*/
86 UCOL_LESS,
87 UCOL_LESS,
88 UCOL_LESS,
89 UCOL_LESS,
90 UCOL_LESS,
91 UCOL_GREATER,
92 UCOL_GREATER,
93 UCOL_LESS, /* 10 */
94 UCOL_GREATER,
95 UCOL_LESS,
96 UCOL_GREATER,
97 UCOL_GREATER,
98 UCOL_LESS,
99 UCOL_LESS,
100 UCOL_LESS,
101 /* test primary > 17 */
102 UCOL_EQUAL,
103 UCOL_EQUAL,
104 UCOL_EQUAL, /* 20 */
105 UCOL_LESS,
106 UCOL_LESS,
107 UCOL_EQUAL,
108 UCOL_EQUAL,
109 UCOL_EQUAL,
110 UCOL_LESS,
111 /* test secondary > 26 */
112 UCOL_EQUAL,
113 UCOL_EQUAL,
114 UCOL_EQUAL,
115 UCOL_EQUAL,
116 UCOL_EQUAL, /* 30 */
117 UCOL_EQUAL,
118 UCOL_LESS,
119 UCOL_EQUAL, /* 34 */
120 UCOL_EQUAL,
121 UCOL_EQUAL,
122 UCOL_LESS /* 37 */
123};
124
125
126void addAllCollTest(TestNode** root)
127{
128
129
130 addTest(root, &TestPrimary, "tscoll/callcoll/TestPrimary");
131 addTest(root, &TestSecondary, "tscoll/callcoll/TestSecondary");
132 addTest(root, &TestTertiary, "tscoll/callcoll/TestTertiary");
133 addTest(root, &TestIdentical, "tscoll/callcoll/TestIdentical");
134 addTest(root, &TestExtra, "tscoll/callcoll/TestExtra");
135 addTest(root, &TestJB581, "tscoll/callcoll/TestJB581");
136 addTest(root, &TestVariableTop, "tscoll/callcoll/TestVariableTop");
137 addTest(root, &TestSurrogates, "tscoll/callcoll/TestSurrogates");
138 addTest(root, &TestInvalidRules, "tscoll/callcoll/TestInvalidRules");
139 addTest(root, &TestJB1401, "tscoll/callcoll/TestJB1401");
140 addTest(root, &TestJitterbug1098, "tscoll/callcoll/TestJitterbug1098");
141
142 }
143
144static UCollationResult compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode *status) {
145 int32_t partialSKResult = 0;
146 UCharIterator sIter, tIter;
147 uint32_t sState[2], tState[2];
148 int32_t sSize = pieceSize, tSize = pieceSize;
149 int32_t i = 0;
150 uint8_t sBuf[16384], tBuf[16384];
151 if(pieceSize > 16384) {
152 log_err("Partial sortkey size buffer too small. Please consider increasing the buffer!\n");
153 *status = U_BUFFER_OVERFLOW_ERROR;
154 return UCOL_EQUAL;
155 }
156 *status = U_ZERO_ERROR;
157 sState[0] = 0; sState[1] = 0;
158 tState[0] = 0; tState[1] = 0;
159 while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) {
160 uiter_setString(&sIter, source, sLen);
161 uiter_setString(&tIter, target, tLen);
162 sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, status);
163 tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, status);
164
165 if(sState[0] != 0 || tState[0] != 0) {
166 log_verbose("State != 0 : %08X %08X\n", sState[0], tState[0]);
167 }
168 log_verbose("%i ", i++);
169
170 partialSKResult = memcmp(sBuf, tBuf, pieceSize);
171 }
172
173 if(partialSKResult < 0) {
174 return UCOL_LESS;
175 } else if(partialSKResult > 0) {
176 return UCOL_GREATER;
177 } else {
178 return UCOL_EQUAL;
179 }
180}
181
182static void doTestVariant(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result)
183{
184 int32_t sortklen1, sortklen2, sortklenmax, sortklenmin;
185 int temp=0, gSortklen1=0,gSortklen2=0;
186 UCollationResult compareResult, compareResulta, keyResult, compareResultIter = result;
187 uint8_t *sortKey1, *sortKey2, *sortKey1a, *sortKey2a;
188 uint32_t sLen = u_strlen(source);
189 uint32_t tLen = u_strlen(target);
190 char buffer[256];
191 uint32_t len;
192 UErrorCode status = U_ZERO_ERROR;
193 UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);
194
195 UCharIterator sIter, tIter;
196 uiter_setString(&sIter, source, sLen);
197 uiter_setString(&tIter, target, tLen);
198 compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
199 if(compareResultIter != result) {
200 log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
201 }
202
203 /* convert the strings to UTF-8 and do try comparing with char iterator */
204 if(QUICK <= 0) { /*!QUICK*/
205 char utf8Source[256], utf8Target[256];
206 int32_t utf8SourceLen = 0, utf8TargetLen = 0;
207 u_strToUTF8(utf8Source, 256, &utf8SourceLen, source, sLen, &status);
208 if(U_FAILURE(status)) { /* probably buffer is not big enough */
209 log_verbose("Src UTF-8 buffer too small! Will not compare!\n");
210 } else {
211 u_strToUTF8(utf8Target, 256, &utf8TargetLen, target, tLen, &status);
212 if(U_SUCCESS(status)) { /* probably buffer is not big enough */
213 UCollationResult compareResultUTF8 = result, compareResultUTF8Norm = result;
214 /*UCharIterator sIter, tIter;*/
215 /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/
216 uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
217 uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
218 /*uiter_setString(&sIter, source, sLen);
219 uiter_setString(&tIter, target, tLen);*/
220 compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
221 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
222 sIter.move(&sIter, 0, UITER_START);
223 tIter.move(&tIter, 0, UITER_START);
224 compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
225 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
226 if(compareResultUTF8 != compareResultIter) {
227 log_err("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
228 }
229 if(compareResultUTF8 != compareResultUTF8Norm) {
230 log_err("different results in iterative when normalization is turned on with UTF-8 strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
231 }
232 } else {
233 log_verbose("Target UTF-8 buffer too small! Did not compare!\n");
234 }
235 if(U_FAILURE(status)) {
236 log_verbose("UTF-8 strcoll failed! Ignoring result\n");
237 }
238 }
239 }
240
241 /* testing the partial sortkeys */
242 if(1) { /*!QUICK*/
243 int32_t i = 0;
244 int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
245 int32_t partialSizesSize = 1;
246 if(QUICK <= 0) {
247 partialSizesSize = 7;
248 }
249 log_verbose("partial sortkey test piecesize=");
250 for(i = 0; i < partialSizesSize; i++) {
251 UCollationResult partialSKResult = result, partialNormalizedSKResult = result;
252 log_verbose("%i ", partialSizes[i]);
253
254 partialSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status);
255 if(partialSKResult != result) {
256 log_err("Partial sortkey comparison returned wrong result: %s, %s (size %i)\n",
257 aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]);
258 }
259
260 if(QUICK <= 0 && norm != UCOL_ON) {
261 log_verbose("N ");
262 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
263 partialNormalizedSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status);
264 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
265 if(partialSKResult != partialNormalizedSKResult) {
266 log_err("Partial sortkey comparison gets different result when normalization is on: %s, %s (size %i)\n",
267 aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]);
268 }
269 }
270 }
271 log_verbose("\n");
272 }
273
274
275 compareResult = ucol_strcoll(myCollation, source, sLen, target, tLen);
276 compareResulta = ucol_strcoll(myCollation, source, -1, target, -1);
277 if (compareResult != compareResulta) {
278 log_err("ucol_strcoll result from null terminated and explicit length strings differs.\n");
279 }
280
281 sortklen1=ucol_getSortKey(myCollation, source, sLen, NULL, 0);
282 sortklen2=ucol_getSortKey(myCollation, target, tLen, NULL, 0);
283
284 sortklenmax = (sortklen1>sortklen2?sortklen1:sortklen2);
285 sortklenmin = (sortklen1<sortklen2?sortklen1:sortklen2);
286
287 sortKey1 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
288 sortKey1a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
289 ucol_getSortKey(myCollation, source, sLen, sortKey1, sortklen1+1);
290 ucol_getSortKey(myCollation, source, -1, sortKey1a, sortklen1+1);
291
292 sortKey2 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
293 sortKey2a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
294 ucol_getSortKey(myCollation, target, tLen, sortKey2, sortklen2+1);
295 ucol_getSortKey(myCollation, target, -1, sortKey2a, sortklen2+1);
296
297 /* Check that sort key generated with null terminated string is identical */
298 /* to that generted with a length specified. */
299 if (uprv_strcmp((const char *)sortKey1, (const char *)sortKey1a) != 0 ||
300 uprv_strcmp((const char *)sortKey2, (const char *)sortKey2a) != 0 ) {
301 log_err("Sort Keys from null terminated and explicit length strings differ.\n");
302 }
303
304 /*memcmp(sortKey1, sortKey2,sortklenmax);*/
305 temp= uprv_strcmp((const char *)sortKey1, (const char *)sortKey2);
306 gSortklen1 = uprv_strlen((const char *)sortKey1)+1;
307 gSortklen2 = uprv_strlen((const char *)sortKey2)+1;
308 if(sortklen1 != gSortklen1){
309 log_err("SortKey length does not match Expected: %i Got: %i\n",sortklen1, gSortklen1);
310 log_verbose("Generated sortkey: %s\n", ucol_sortKeyToString(myCollation, sortKey1, buffer, &len));
311 }
312 if(sortklen2!= gSortklen2){
313 log_err("SortKey length does not match Expected: %i Got: %i\n", sortklen2, gSortklen2);
314 log_verbose("Generated sortkey: %s\n", ucol_sortKeyToString(myCollation, sortKey2, buffer, &len));
315 }
316
317 if(temp < 0) {
318 keyResult=UCOL_LESS;
319 }
320 else if(temp > 0) {
321 keyResult= UCOL_GREATER;
322 }
323 else {
324 keyResult = UCOL_EQUAL;
325 }
326 reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, compareResultIter, result );
327 free(sortKey1);
328 free(sortKey2);
329 free(sortKey1a);
330 free(sortKey2a);
331
332}
333
334void doTest(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result)
335{
336 if(myCollation) {
337 doTestVariant(myCollation, source, target, result);
338 if(result == UCOL_LESS) {
339 doTestVariant(myCollation, target, source, UCOL_GREATER);
340 } else if(result == UCOL_GREATER) {
341 doTestVariant(myCollation, target, source, UCOL_LESS);
342 } else {
343 doTestVariant(myCollation, target, source, UCOL_EQUAL);
344 }
345 } else {
346 log_data_err("No collator! Any data around?\n");
347 }
348}
349
350static void TestTertiary()
351{
352 int32_t len,i;
353 UChar *rules;
354 UCollator *myCollation;
355 UErrorCode status=U_ZERO_ERROR;
356 const char* str="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
357 len = strlen(str);
358 rules=(UChar*)malloc(sizeof(UChar*) * (len+1));
359 u_uastrcpy(rules, str);
360
361 myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
362 if(U_FAILURE(status)){
363 log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status));
364 }
365
366 ucol_setStrength(myCollation, UCOL_TERTIARY);
367 for (i = 0; i < 17 ; i++)
368 {
369 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
370 }
371 free(rules);
372 ucol_close(myCollation);
373 myCollation = 0;
374}
375
376static void TestPrimary( )
377{
378 int32_t len,i;
379 UChar *rules;
380 UCollator *myCollation;
381 UErrorCode status=U_ZERO_ERROR;
382 const char* str="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
383 len = strlen(str);
384 rules=(UChar*)malloc(sizeof(UChar*) * (len+1));
385 u_uastrcpy(rules, str);
386
387 myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
388 if(U_FAILURE(status)){
389 log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status));
390 }
391 ucol_setStrength(myCollation, UCOL_PRIMARY);
392
393 for (i = 17; i < 26 ; i++)
394 {
395
396 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
397 }
398 free(rules);
399 ucol_close(myCollation);
400 myCollation = 0;
401}
402
403static void TestSecondary()
404{
405 int32_t i;
406 int32_t len;
407 UChar *rules;
408 UCollator *myCollation;
409 UErrorCode status=U_ZERO_ERROR;
410 const char* str="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
411 len = strlen(str);
412 rules=(UChar*)malloc(sizeof(UChar*) * (len+1));
413 u_uastrcpy(rules, str);
414
415 myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
416 if(U_FAILURE(status)){
417 log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status));
418 }
419 ucol_setStrength(myCollation, UCOL_SECONDARY);
420 for (i = 26; i < 34 ; i++)
421 {
422 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
423 }
424 free(rules);
425 ucol_close(myCollation);
426 myCollation = 0;
427}
428
429static void TestIdentical()
430{
431 int32_t i;
432 int32_t len;
433 UChar *rules = 0;
434 UCollator *myCollation;
435 UErrorCode status=U_ZERO_ERROR;
436 const char* str="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
437 len = strlen(str);
438 rules=(UChar*)malloc(sizeof(UChar*) * (len+1));
439 u_uastrcpy(rules, str);
440
441 myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_IDENTICAL, NULL,&status);
442 if(U_FAILURE(status)){
443 log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status));
444 }
445 for(i= 34; i<37; i++)
446 {
447 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
448 }
449 free(rules);
450 ucol_close(myCollation);
451 myCollation = 0;
452}
453
454static void TestExtra()
455{
456 int32_t i, j;
457 int32_t len;
458 UChar *rules;
459 UCollator *myCollation;
460 UErrorCode status = U_ZERO_ERROR;
461 const char* str="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
462 len = strlen(str);
463 rules=(UChar*)malloc(sizeof(UChar*) * (len+1));
464 u_uastrcpy(rules, str);
465
466 myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
467 if(U_FAILURE(status)){
468 log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status));
469 }
470 ucol_setStrength(myCollation, UCOL_TERTIARY);
471 for (i = 0; i < COUNT_TEST_CASES-1 ; i++)
472 {
473 for (j = i + 1; j < COUNT_TEST_CASES; j += 1)
474 {
475
476 doTest(myCollation, testCases[i], testCases[j], UCOL_LESS);
477 }
478 }
479 free(rules);
480 ucol_close(myCollation);
481 myCollation = 0;
482}
483
484static void TestJB581(void)
485{
486 UChar dispName [100];
487 int32_t bufferLen = 0;
488 UChar source [100];
489 UChar target [100];
490 UCollationResult result = UCOL_EQUAL;
491 uint8_t sourceKeyArray [100];
492 uint8_t targetKeyArray [100];
493 int32_t sourceKeyOut = 0,
494 targetKeyOut = 0;
495 UCollator *myCollator = 0;
496 UErrorCode status = U_ZERO_ERROR;
497
498 /*u_uastrcpy(source, "This is a test.");*/
499 /*u_uastrcpy(target, "THISISATEST.");*/
500 u_uastrcpy(source, "THISISATEST.");
501 u_uastrcpy(target, "Thisisatest.");
502
503 myCollator = ucol_open("en_US", &status);
504 if (U_FAILURE(status)){
505 bufferLen = uloc_getDisplayName("en_US", 0, dispName, 100, &status);
506 /*Report the error with display name... */
507 log_err("ERROR: Failed to create the collator for : \"%s\"\n", dispName);
508 return;
509 }
510 result = ucol_strcoll(myCollator, source, -1, target, -1);
511 /* result is 1, secondary differences only for ignorable space characters*/
512 if (result != 1)
513 {
514 log_err("Comparing two strings with only secondary differences in C failed.\n");
515 }
516 /* To compare them with just primary differences */
517 ucol_setStrength(myCollator, UCOL_PRIMARY);
518 result = ucol_strcoll(myCollator, source, -1, target, -1);
519 /* result is 0 */
520 if (result != 0)
521 {
522 log_err("Comparing two strings with no differences in C failed.\n");
523 }
524 /* Now, do the same comparison with keys */
525 sourceKeyOut = ucol_getSortKey(myCollator, source, -1, sourceKeyArray, 100);
526 targetKeyOut = ucol_getSortKey(myCollator, target, -1, targetKeyArray, 100);
527 result = 0;
528 bufferLen = ((targetKeyOut > 100) ? 100 : targetKeyOut);
529 result = memcmp(sourceKeyArray, targetKeyArray, bufferLen);
530 if (result != 0)
531 {
532 log_err("Comparing two strings with sort keys in C failed.\n");
533 }
534 ucol_close(myCollator);
535}
536
537static void TestJB1401(void)
538{
539 UCollator *myCollator = 0;
540 UErrorCode status = U_ZERO_ERROR;
541 static UChar NFD_UnsafeStartChars[] = {
542 0x0f73, /* Tibetan Vowel Sign II */
543 0x0f75, /* Tibetan Vowel Sign UU */
544 0x0f81, /* Tibetan Vowel Sign Reversed II */
545 0
546 };
547 int i;
548
549
550 myCollator = ucol_open("en_US", &status);
551 if (U_FAILURE(status)){
552 int32_t bufferLen = 0;
553 UChar dispName [100];
554 bufferLen = uloc_getDisplayName("en_US", 0, dispName, 100, &status);
555 /*Report the error with display name... */
556 log_err("ERROR: Failed to create the collator for : \"%s\"\n", dispName);
557 return;
558 }
559 ucol_setAttribute(myCollator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
560 if (U_FAILURE(status)){
561 log_err("ERROR: Failed to set normalization mode ON for collator.\n");
562 return;
563 }
564
565 for (i=0; ; i++) {
566 UChar c;
567 UChar X[4];
568 UChar Y[20];
569 UChar Z[20];
570
571 /* Get the next funny character to be tested, and set up the
572 * three test strings X, Y, Z, consisting of an A-grave + test char,
573 * in original form, NFD, and then NFC form.
574 */
575 c = NFD_UnsafeStartChars[i];
576 if (c==0) {break;}
577
578 X[0]=0xC0; X[1]=c; X[2]=0; /* \u00C0 is A Grave*/
579
580 unorm_normalize(X, -1, UNORM_NFD, 0, Y, 20, &status);
581 unorm_normalize(Y, -1, UNORM_NFC, 0, Z, 20, &status);
582 if (U_FAILURE(status)){
583 log_err("ERROR: Failed to normalize test of character %x\n", c);
584 return;
585 }
586
587 /* Collation test. All three strings should be equal.
588 * doTest does both strcoll and sort keys, with params in both orders.
589 */
590 doTest(myCollator, X, Y, UCOL_EQUAL);
591 doTest(myCollator, X, Z, UCOL_EQUAL);
592 doTest(myCollator, Y, Z, UCOL_EQUAL);
593
594 /* Run collation element iterators over the three strings. Results should be same for each.
595 */
596 {
597 UCollationElements *ceiX, *ceiY, *ceiZ;
598 int32_t ceX, ceY, ceZ;
599 int j;
600
601 ceiX = ucol_openElements(myCollator, X, -1, &status);
602 ceiY = ucol_openElements(myCollator, Y, -1, &status);
603 ceiZ = ucol_openElements(myCollator, Z, -1, &status);
604 if (U_FAILURE(status)) {
605 log_err("ERROR: uucol_openElements failed.\n");
606 return;
607 }
608
609 for (j=0;; j++) {
610 ceX = ucol_next(ceiX, &status);
611 ceY = ucol_next(ceiY, &status);
612 ceZ = ucol_next(ceiZ, &status);
613 if (U_FAILURE(status)) {
614 log_err("ERROR: ucol_next failed for iteration #%d.\n", j);
615 break;
616 }
617 if (ceX != ceY || ceY != ceZ) {
618 log_err("ERROR: ucol_next failed for iteration #%d.\n", j);
619 break;
620 }
621 if (ceX == UCOL_NULLORDER) {
622 break;
623 }
624 }
625 ucol_closeElements(ceiX);
626 ucol_closeElements(ceiY);
627 ucol_closeElements(ceiZ);
628 }
629 }
630 ucol_close(myCollator);
631}
632
633
634
635/**
636* Tests the [variable top] tag in rule syntax. Since the default [alternate]
637* tag has the value shifted, any codepoints before [variable top] should give
638* a primary ce of 0.
639*/
640static void TestVariableTop(void)
641{
642 const char *str = "&z = [variable top]";
643 int len = strlen(str);
644 UChar *rules;
645 UCollator *myCollation;
646 UCollator *enCollation;
647 UErrorCode status = U_ZERO_ERROR;
648 UChar source[1];
649 UChar ch;
650 uint8_t result[20];
651 uint8_t expected[20];
652
653 rules = (UChar*)malloc(sizeof(UChar*) * (len + 1));
654 u_uastrcpy(rules, str);
655
656 enCollation = ucol_open("en_US", &status);
657 myCollation = ucol_openRules(rules, len, UCOL_OFF,
658 UCOL_PRIMARY,NULL, &status);
659 if (U_FAILURE(status)) {
660 log_err("ERROR: in creation of rule based collator :%s\n",
661 myErrorName(status));
662 return;
663 }
664
665 ucol_setStrength(enCollation, UCOL_PRIMARY);
666 ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
667 &status);
668 ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
669 &status);
670
671 if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) !=
672 UCOL_SHIFTED || U_FAILURE(status)) {
673 log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n");
674 }
675
676 uprv_memset(expected, 0, 20);
677
678 /* space is supposed to be a variable */
679 source[0] = ' ';
680 len = ucol_getSortKey(enCollation, source, 1, result,
681 sizeof(result));
682
683 if (uprv_memcmp(expected, result, len) != 0) {
684 log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n");
685 }
686
687 ch = 'a';
688 while (ch < 'z') {
689 source[0] = ch;
690 len = ucol_getSortKey(myCollation, source, 1, result,
691 sizeof(result));
692 if (uprv_memcmp(expected, result, len) != 0) {
693 log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n",
694 ch);
695 }
696 ch ++;
697 }
698
699 free(rules);
700 ucol_close(enCollation);
701 ucol_close(myCollation);
702 enCollation = NULL;
703 myCollation = NULL;
704}
705
706/**
707 * Tests surrogate support.
708 * NOTE: This test used \\uD801\\uDC01 pair, which is now assigned to Desseret
709 * Therefore, another (unassigned) code point was used for this test.
710 */
711static void TestSurrogates(void)
712{
713 const char *str =
714 "&z<'\\uD800\\uDC00'<'\\uD800\\uDC0A\\u0308'<A";
715 int len = strlen(str);
716 int rlen = 0;
717 UChar *rules;
718 UCollator *myCollation;
719 UCollator *enCollation;
720 UErrorCode status = U_ZERO_ERROR;
721 UChar source[][4] =
722 {{'z', 0, 0}, {0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {0xD800, 0xDC02}};
723 UChar target[][4] =
724 {{0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {'A', 0, 0}, {0xD800, 0xDC03}};
725 int count = 0;
726 uint8_t enresult[20], myresult[20];
727 int enlen, mylen;
728
729 /* tests for open rules with surrogate rules */
730 rules = (UChar*)malloc(sizeof(UChar*) * (len + 1));
731 rlen = u_unescape(str, rules, len);
732
733 enCollation = ucol_open("en_US", &status);
734 myCollation = ucol_openRules(rules, rlen, UCOL_OFF,
735 UCOL_TERTIARY,NULL, &status);
736 if (U_FAILURE(status)) {
737 log_err("ERROR: in creation of rule based collator :%s\n",
738 myErrorName(status));
739 return;
740 }
741
742 /*
743 this test is to verify the supplementary sort key order in the english
744 collator
745 */
746 log_verbose("start of english collation supplementary characters test\n");
747 while (count < 2) {
748 doTest(enCollation, source[count], target[count], UCOL_LESS);
749 count ++;
750 }
751 doTest(enCollation, source[count], target[count], UCOL_GREATER);
752
753 log_verbose("start of tailored collation supplementary characters test\n");
754 count = 0;
755 /* tests getting collation elements for surrogates for tailored rules */
756 while (count < 4) {
757 doTest(myCollation, source[count], target[count], UCOL_LESS);
758 count ++;
759 }
760
761 /* tests that \uD800\uDC02 still has the same value, not changed */
762 enlen = ucol_getSortKey(enCollation, source[3], 2, enresult, 20);
763 mylen = ucol_getSortKey(myCollation, source[3], 2, myresult, 20);
764 if (enlen != mylen ||
765 uprv_memcmp(enresult, myresult, enlen) != 0) {
766 log_verbose("Failed : non-tailored supplementary characters should have the same value\n");
767 }
768
769 free(rules);
770 ucol_close(enCollation);
771 ucol_close(myCollation);
772 enCollation = NULL;
773 myCollation = NULL;
774}
775
776/*
777 *### TODO: Add more invalid rules to test all different scenarios.
778 *
779 */
780static void
781TestInvalidRules(){
782#define MAX_ERROR_STATES 2
783
784 static const char* rulesArr[MAX_ERROR_STATES] = {
785 "& C < ch, cH, Ch[this should fail]<d",
786 "& C < ch, cH, & Ch[variable top]"
787 };
788 static const char* preContextArr[MAX_ERROR_STATES] = {
789 "his should fail",
790 "& C < ch, cH, ",
791
792 };
793 static const char* postContextArr[MAX_ERROR_STATES] = {
794 "<d",
795 " Ch[variable t"
796 };
797 int i;
798
799 for(i = 0;i<MAX_ERROR_STATES;i++){
800 UChar rules[1000] = { '\0' };
801 UChar preContextExp[1000] = { '\0' };
802 UChar postContextExp[1000] = { '\0' };
803 UParseError parseError;
804 UErrorCode status = U_ZERO_ERROR;
805 UCollator* coll=0;
806 u_charsToUChars(rulesArr[i],rules,uprv_strlen(rulesArr[i])+1);
807 u_charsToUChars(preContextArr[i],preContextExp,uprv_strlen(preContextArr[i])+1);
808 u_charsToUChars(postContextArr[i],postContextExp,uprv_strlen(postContextArr[i])+1);
809 /* clean up stuff in parseError */
810 u_memset(parseError.preContext,0x0000,U_PARSE_CONTEXT_LEN);
811 u_memset(parseError.postContext,0x0000,U_PARSE_CONTEXT_LEN);
812 /* open the rules and test */
813 coll = ucol_openRules(rules,u_strlen(rules),UCOL_OFF,UCOL_DEFAULT_STRENGTH,&parseError,&status);
814 if(u_strcmp(parseError.preContext,preContextExp)!=0){
815 log_err("preContext in UParseError for ucol_openRules does not match\n");
816 }
817 if(u_strcmp(parseError.postContext,postContextExp)!=0){
818 log_err("postContext in UParseError for ucol_openRules does not match\n");
819 }
820 }
821}
822
823static void
824TestJitterbug1098(){
825 UChar rule[1000];
826 UCollator* c1 = NULL;
827 UErrorCode status = U_ZERO_ERROR;
828 UParseError parseError;
829 char preContext[200]={0};
830 char postContext[200]={0};
831 int i=0;
832 const char* rules[] = {
833 "&''<\\\\",
834 "&\\'<\\\\",
835 "&\\\"<'\\'",
836 "&'\"'<\\'",
837 '\0'
838
839 };
840 const UCollationResult results1098[] = {
841 UCOL_LESS,
842 UCOL_LESS,
843 UCOL_LESS,
844 UCOL_LESS,
845 };
846 const UChar input[][2]= {
847 {0x0027,0x005c},
848 {0x0027,0x005c},
849 {0x0022,0x005c},
850 {0x0022,0x0027},
851 };
852 UChar X[2] ={0};
853 UChar Y[2] ={0};
854 u_memset(parseError.preContext,0x0000,U_PARSE_CONTEXT_LEN);
855 u_memset(parseError.postContext,0x0000,U_PARSE_CONTEXT_LEN);
856 for(;rules[i]!=0;i++){
857 u_uastrcpy(rule, rules[i]);
858 c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, &parseError, &status);
859 if(U_FAILURE(status)){
860 u_UCharsToChars(parseError.preContext,preContext,20);
861 u_UCharsToChars(parseError.postContext,postContext,20);
862 log_err("Could not parse the rules syntax. Error: %s ", u_errorName(status));
863 log_verbose("\n\tPre-Context: %s \n\tPost-Context:%s \n",preContext,postContext);
864 return;
865 }
866 X[0] = input[i][0];
867 Y[0] = input[i][1];
868 doTest(c1,X,Y,results1098[i]);
869 ucol_close(c1);
870 }
871}
872
873
874#endif /* #if !UCONFIG_NO_COLLATION */