]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/utransts.c
ICU-64260.0.1.tar.gz
[apple/icu.git] / icuSources / test / cintltst / utransts.c
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 1997-2016 International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * Date Name Description
9 * 06/23/00 aliu Creation.
10 *******************************************************************************
11 */
12
13 #include "unicode/utypes.h"
14
15 #if !UCONFIG_NO_TRANSLITERATION
16
17 #include <stdlib.h>
18 #include <string.h>
19 #include "unicode/utrans.h"
20 #include "unicode/ustring.h"
21 #include "unicode/uset.h"
22 #include "cintltst.h"
23 #include "cmemory.h"
24
25 #define TEST(x) addTest(root, &x, "utrans/" # x)
26
27 static void TestAPI(void);
28 static void TestSimpleRules(void);
29 static void TestFilter(void);
30 static void TestOpenInverse(void);
31 static void TestClone(void);
32 static void TestRegisterUnregister(void);
33 static void TestExtractBetween(void);
34 static void TestUnicodeIDs(void);
35 static void TestGetRulesAndSourceSet(void);
36 static void TestDataVariantsCompounds(void);
37
38 static void _expectRules(const char*, const char*, const char*);
39 static void _expect(const UTransliterator* trans, const char* cfrom, const char* cto);
40
41 void addUTransTest(TestNode** root);
42
43
44 void
45 addUTransTest(TestNode** root) {
46 TEST(TestAPI);
47 TEST(TestSimpleRules);
48 TEST(TestFilter);
49 TEST(TestOpenInverse);
50 TEST(TestClone);
51 TEST(TestRegisterUnregister);
52 TEST(TestExtractBetween);
53 TEST(TestUnicodeIDs);
54 TEST(TestGetRulesAndSourceSet);
55 TEST(TestDataVariantsCompounds);
56 }
57
58 /*------------------------------------------------------------------
59 * Replaceable glue
60 *
61 * To test the Replaceable glue we have to dummy up a C-based
62 * Replaceable callback. This code is for testing purposes only.
63 *------------------------------------------------------------------*/
64
65 typedef struct XReplaceable {
66 UChar* text; /* MUST BE null-terminated */
67 } XReplaceable;
68
69 static void InitXReplaceable(XReplaceable* rep, const char* cstring) {
70 rep->text = malloc(sizeof(UChar) * (strlen(cstring)+1));
71 u_uastrcpy(rep->text, cstring);
72 }
73
74 static void FreeXReplaceable(XReplaceable* rep) {
75 if (rep->text != NULL) {
76 free(rep->text);
77 rep->text = NULL;
78 }
79 }
80
81 /* UReplaceableCallbacks callback */
82 static int32_t Xlength(const UReplaceable* rep) {
83 const XReplaceable* x = (const XReplaceable*)rep;
84 return u_strlen(x->text);
85 }
86
87 /* UReplaceableCallbacks callback */
88 static UChar XcharAt(const UReplaceable* rep, int32_t offset) {
89 const XReplaceable* x = (const XReplaceable*)rep;
90 return x->text[offset];
91 }
92
93 /* UReplaceableCallbacks callback */
94 static UChar32 Xchar32At(const UReplaceable* rep, int32_t offset) {
95 const XReplaceable* x = (const XReplaceable*)rep;
96 return x->text[offset];
97 }
98
99 /* UReplaceableCallbacks callback */
100 static void Xreplace(UReplaceable* rep, int32_t start, int32_t limit,
101 const UChar* text, int32_t textLength) {
102 XReplaceable* x = (XReplaceable*)rep;
103 int32_t newLen = Xlength(rep) + limit - start + textLength;
104 UChar* newText = (UChar*) malloc(sizeof(UChar) * (newLen+1));
105 u_strncpy(newText, x->text, start);
106 u_strncpy(newText + start, text, textLength);
107 u_strcpy(newText + start + textLength, x->text + limit);
108 free(x->text);
109 x->text = newText;
110 }
111
112 /* UReplaceableCallbacks callback */
113 static void Xcopy(UReplaceable* rep, int32_t start, int32_t limit, int32_t dest) {
114 XReplaceable* x = (XReplaceable*)rep;
115 int32_t newLen = Xlength(rep) + limit - start;
116 UChar* newText = (UChar*) malloc(sizeof(UChar) * (newLen+1));
117 u_strncpy(newText, x->text, dest);
118 u_strncpy(newText + dest, x->text + start, limit - start);
119 u_strcpy(newText + dest + limit - start, x->text + dest);
120 free(x->text);
121 x->text = newText;
122 }
123
124 /* UReplaceableCallbacks callback */
125 static void Xextract(UReplaceable* rep, int32_t start, int32_t limit, UChar* dst) {
126 XReplaceable* x = (XReplaceable*)rep;
127 int32_t len = limit - start;
128 u_strncpy(dst, x->text, len);
129 }
130
131 static void InitXReplaceableCallbacks(UReplaceableCallbacks* callbacks) {
132 callbacks->length = Xlength;
133 callbacks->charAt = XcharAt;
134 callbacks->char32At = Xchar32At;
135 callbacks->replace = Xreplace;
136 callbacks->extract = Xextract;
137 callbacks->copy = Xcopy;
138 }
139
140 /*------------------------------------------------------------------
141 * Tests
142 *------------------------------------------------------------------*/
143
144 static void TestAPI() {
145 enum { BUF_CAP = 128 };
146 char buf[BUF_CAP], buf2[BUF_CAP];
147 UErrorCode status = U_ZERO_ERROR;
148 UTransliterator* trans = NULL;
149 int32_t i, n;
150
151 /* Test getAvailableIDs */
152 n = utrans_countAvailableIDs();
153 if (n < 1) {
154 log_err("FAIL: utrans_countAvailableIDs() returned %d\n", n);
155 } else {
156 log_verbose("System ID count: %d\n", n);
157 }
158 for (i=0; i<n; ++i) {
159 utrans_getAvailableID(i, buf, BUF_CAP);
160 if (*buf == 0) {
161 log_err("FAIL: System transliterator %d: \"\"\n", i);
162 } else {
163 log_verbose("System transliterator %d: \"%s\"\n", i, buf);
164 }
165 }
166
167 /* Test open */
168 utrans_getAvailableID(0, buf, BUF_CAP);
169 trans = utrans_open(buf, UTRANS_FORWARD,NULL,0,NULL, &status);
170 if (U_FAILURE(status)) {
171 log_err("FAIL: utrans_open(%s) failed, error=%s\n",
172 buf, u_errorName(status));
173 }
174
175 else {
176 /* Test getID */
177 utrans_getID(trans, buf2, BUF_CAP);
178 if (0 != strcmp(buf, buf2)) {
179 log_err("FAIL: utrans_getID(%s) returned %s\n",
180 buf, buf2);
181 }
182 utrans_close(trans);
183 }
184 }
185
186 static void TestUnicodeIDs() {
187 UEnumeration *uenum;
188 UTransliterator *utrans;
189 const UChar *id, *id2;
190 int32_t idLength, id2Length, count, count2;
191
192 UErrorCode errorCode;
193
194 errorCode=U_ZERO_ERROR;
195 uenum=utrans_openIDs(&errorCode);
196 if(U_FAILURE(errorCode)) {
197 log_err("utrans_openIDs() failed - %s\n", u_errorName(errorCode));
198 return;
199 }
200
201 count=uenum_count(uenum, &errorCode);
202 if(U_FAILURE(errorCode) || count<1) {
203 log_err("uenum_count(transliterator IDs)=%d - %s\n", count, u_errorName(errorCode));
204 }
205
206 count=0;
207 for(;;) {
208 id=uenum_unext(uenum, &idLength, &errorCode);
209 if(U_FAILURE(errorCode)) {
210 log_err("uenum_unext(transliterator ID %d) failed - %s\n", count, u_errorName(errorCode));
211 break;
212 }
213 if(id==NULL) {
214 break;
215 }
216
217 if(++count>10) {
218 /* try to actually open only a few transliterators */
219 continue;
220 }
221
222 utrans=utrans_openU(id, idLength, UTRANS_FORWARD, NULL, 0, NULL, &errorCode);
223 if(U_FAILURE(errorCode)) {
224 log_err("utrans_openU(%s) failed - %s\n", aescstrdup(id, idLength), u_errorName(errorCode));
225 continue;
226 }
227
228 id2=utrans_getUnicodeID(utrans, &id2Length);
229 if(idLength!=id2Length || 0!=u_memcmp(id, id2, idLength)) {
230 log_err("utrans_getUnicodeID(%s) does not match the original ID\n", aescstrdup(id, idLength));
231 }
232
233 utrans_close(utrans);
234 }
235
236 uenum_reset(uenum, &errorCode);
237 if(U_FAILURE(errorCode) || count<1) {
238 log_err("uenum_reset(transliterator IDs) failed - %s\n", u_errorName(errorCode));
239 } else {
240 count2=uenum_count(uenum, &errorCode);
241 if(U_FAILURE(errorCode) || count<1) {
242 log_err("2nd uenum_count(transliterator IDs)=%d - %s\n", count2, u_errorName(errorCode));
243 } else if(count!=count2) {
244 log_err("uenum_unext(transliterator IDs) returned %d IDs but uenum_count() after uenum_reset() claims there are %d\n", count, count2);
245 }
246 }
247
248 uenum_close(uenum);
249 }
250
251 static void TestOpenInverse(){
252 UErrorCode status=U_ZERO_ERROR;
253 UTransliterator* t1=NULL;
254 UTransliterator* inverse1=NULL;
255 enum { BUF_CAP = 128 };
256 char buf1[BUF_CAP];
257 int32_t i=0;
258
259 const char TransID[][25]={
260 "Halfwidth-Fullwidth",
261 "Fullwidth-Halfwidth",
262 "Greek-Latin" ,
263 "Latin-Greek",
264 /*"Arabic-Latin", // Removed in 2.0*/
265 /*"Latin-Arabic", // Removed in 2.0*/
266 "Katakana-Latin",
267 "Latin-Katakana",
268 /*"Hebrew-Latin", // Removed in 2.0*/
269 /*"Latin-Hebrew", // Removed in 2.0*/
270 "Cyrillic-Latin",
271 "Latin-Cyrillic",
272 "Devanagari-Latin",
273 "Latin-Devanagari",
274 "Any-Hex",
275 "Hex-Any"
276 };
277
278 for(i=0; i<UPRV_LENGTHOF(TransID); i=i+2){
279 status = U_ZERO_ERROR;
280 t1=utrans_open(TransID[i], UTRANS_FORWARD,NULL,0,NULL, &status);
281 if(t1 == NULL || U_FAILURE(status)){
282 log_data_err("FAIL: in instantiation for id=%s -> %s (Are you missing data?)\n", TransID[i], u_errorName(status));
283 continue;
284 }
285 inverse1=utrans_openInverse(t1, &status);
286 if(U_FAILURE(status)){
287 log_err("FAIL: utrans_openInverse() failed for id=%s. Error=%s\n", TransID[i], myErrorName(status));
288 continue;
289 }
290 utrans_getID(inverse1, buf1, BUF_CAP);
291 if(strcmp(buf1, TransID[i+1]) != 0){
292 log_err("FAIL :openInverse() for %s returned %s instead of %s\n", TransID[i], buf1, TransID[i+1]);
293 }
294 utrans_close(t1);
295 utrans_close(inverse1);
296 }
297 }
298
299 static void TestClone(){
300 UErrorCode status=U_ZERO_ERROR;
301 UTransliterator* t1=NULL;
302 UTransliterator* t2=NULL;
303 UTransliterator* t3=NULL;
304 UTransliterator* t4=NULL;
305 enum { BUF_CAP = 128 };
306 char buf1[BUF_CAP], buf2[BUF_CAP], buf3[BUF_CAP];
307
308 t1=utrans_open("Latin-Devanagari", UTRANS_FORWARD, NULL,0,NULL,&status);
309 if(U_FAILURE(status)){
310 log_data_err("FAIL: construction -> %s (Are you missing data?)\n", u_errorName(status));
311 return;
312 }
313 t2=utrans_open("Latin-Greek", UTRANS_FORWARD, NULL,0,NULL,&status);
314 if(U_FAILURE(status)){
315 log_err("FAIL: construction\n");
316 utrans_close(t1);
317 return;
318 }
319
320 t3=utrans_clone(t1, &status);
321 t4=utrans_clone(t2, &status);
322
323 utrans_getID(t1, buf1, BUF_CAP);
324 utrans_getID(t2, buf2, BUF_CAP);
325 utrans_getID(t3, buf3, BUF_CAP);
326
327 if(strcmp(buf1, buf3) != 0 ||
328 strcmp(buf1, buf2) == 0) {
329 log_err("FAIL: utrans_clone() failed\n");
330 }
331
332 utrans_getID(t4, buf3, BUF_CAP);
333
334 if(strcmp(buf2, buf3) != 0 ||
335 strcmp(buf1, buf3) == 0) {
336 log_err("FAIL: utrans_clone() failed\n");
337 }
338
339 utrans_close(t1);
340 utrans_close(t2);
341 utrans_close(t3);
342 utrans_close(t4);
343
344 }
345
346 static void TestRegisterUnregister(){
347 UErrorCode status=U_ZERO_ERROR;
348 UTransliterator* t1=NULL;
349 UTransliterator* rules=NULL, *rules2;
350 UTransliterator* inverse1=NULL;
351 UChar rule[]={ 0x0061, 0x003c, 0x003e, 0x0063}; /*a<>b*/
352
353 U_STRING_DECL(ID, "TestA-TestB", 11);
354 U_STRING_INIT(ID, "TestA-TestB", 11);
355
356 /* Make sure it doesn't exist */
357 t1=utrans_open("TestA-TestB", UTRANS_FORWARD,NULL,0,NULL, &status);
358 if(t1 != NULL || U_SUCCESS(status)) {
359 log_err("FAIL: TestA-TestB already registered\n");
360 return;
361 }
362 status=U_ZERO_ERROR;
363 /* Check inverse too */
364 inverse1=utrans_open("TestA-TestB", UTRANS_REVERSE, NULL,0,NULL,&status);
365 if(inverse1 != NULL || U_SUCCESS(status)) {
366 log_err("FAIL: TestA-TestB already registered\n");
367 return;
368 }
369 status=U_ZERO_ERROR;
370 /* Create it */
371 rules=utrans_open("TestA-TestB",UTRANS_FORWARD, rule, 4, NULL, &status);
372 if(U_FAILURE(status)){
373 log_err("FAIL: utrans_openRules(a<>B) failed with error=%s\n", myErrorName(status));
374 return;
375 }
376
377 /* clone it so we can register it a second time */
378 rules2=utrans_clone(rules, &status);
379 if(U_FAILURE(status)) {
380 log_err("FAIL: utrans_clone(a<>B) failed with error=%s\n", myErrorName(status));
381 return;
382 }
383
384 status=U_ZERO_ERROR;
385 /* Register it */
386 utrans_register(rules, &status);
387 if(U_FAILURE(status)){
388 log_err("FAIL: utrans_register failed with error=%s\n", myErrorName(status));
389 return;
390 }
391 status=U_ZERO_ERROR;
392 /* Now check again -- should exist now*/
393 t1= utrans_open("TestA-TestB", UTRANS_FORWARD, NULL,0,NULL,&status);
394 if(U_FAILURE(status) || t1 == NULL){
395 log_err("FAIL: TestA-TestB not registered\n");
396 return;
397 }
398 utrans_close(t1);
399
400 /*unregister the instance*/
401 status=U_ZERO_ERROR;
402 utrans_unregister("TestA-TestB");
403 /* now Make sure it doesn't exist */
404 t1=utrans_open("TestA-TestB", UTRANS_FORWARD,NULL,0,NULL, &status);
405 if(U_SUCCESS(status) || t1 != NULL) {
406 log_err("FAIL: TestA-TestB isn't unregistered\n");
407 return;
408 }
409 utrans_close(t1);
410
411 /* now with utrans_unregisterID(const UChar *) */
412 status=U_ZERO_ERROR;
413 utrans_register(rules2, &status);
414 if(U_FAILURE(status)){
415 log_err("FAIL: 2nd utrans_register failed with error=%s\n", myErrorName(status));
416 return;
417 }
418 status=U_ZERO_ERROR;
419 /* Now check again -- should exist now*/
420 t1= utrans_open("TestA-TestB", UTRANS_FORWARD, NULL,0,NULL,&status);
421 if(U_FAILURE(status) || t1 == NULL){
422 log_err("FAIL: 2nd TestA-TestB not registered\n");
423 return;
424 }
425 utrans_close(t1);
426
427 /*unregister the instance*/
428 status=U_ZERO_ERROR;
429 utrans_unregisterID(ID, -1);
430 /* now Make sure it doesn't exist */
431 t1=utrans_openU(ID, -1, UTRANS_FORWARD,NULL,0,NULL, &status);
432 if(U_SUCCESS(status) || t1 != NULL) {
433 log_err("FAIL: 2nd TestA-TestB isn't unregistered\n");
434 return;
435 }
436
437 utrans_close(t1);
438 utrans_close(inverse1);
439 }
440
441 static void TestSimpleRules() {
442 /* Test rules */
443 /* Example: rules 1. ab>x|y
444 * 2. yc>z
445 *
446 * []|eabcd start - no match, copy e to tranlated buffer
447 * [e]|abcd match rule 1 - copy output & adjust cursor
448 * [ex|y]cd match rule 2 - copy output & adjust cursor
449 * [exz]|d no match, copy d to transliterated buffer
450 * [exzd]| done
451 */
452 _expectRules("ab>x|y;"
453 "yc>z",
454 "eabcd", "exzd");
455
456 /* Another set of rules:
457 * 1. ab>x|yzacw
458 * 2. za>q
459 * 3. qc>r
460 * 4. cw>n
461 *
462 * []|ab Rule 1
463 * [x|yzacw] No match
464 * [xy|zacw] Rule 2
465 * [xyq|cw] Rule 4
466 * [xyqn]| Done
467 */
468 _expectRules("ab>x|yzacw;"
469 "za>q;"
470 "qc>r;"
471 "cw>n",
472 "ab", "xyqn");
473
474 /* Test categories
475 */
476 _expectRules("$dummy=" "\\uE100" ";" /* careful here with E100 */
477 "$vowel=[aeiouAEIOU];"
478 "$lu=[:Lu:];"
479 "$vowel } $lu > '!';"
480 "$vowel > '&';"
481 "'!' { $lu > '^';"
482 "$lu > '*';"
483 "a > ERROR",
484 "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
485
486 /* Test multiple passes
487 */
488 _expectRules("abc > xy;"
489 "::Null;"
490 "aba > z;",
491 "abc ababc aba", "xy abxy z");
492 }
493
494 static void TestFilter() {
495 UErrorCode status = U_ZERO_ERROR;
496 UChar filt[128];
497 UChar buf[128];
498 UChar exp[128];
499 char *cbuf;
500 int32_t limit;
501 const char* DATA[] = {
502 "[^c]", /* Filter out 'c' */
503 "abcde",
504 "\\u0061\\u0062c\\u0064\\u0065",
505
506 "", /* No filter */
507 "abcde",
508 "\\u0061\\u0062\\u0063\\u0064\\u0065"
509 };
510 int32_t DATA_length = UPRV_LENGTHOF(DATA);
511 int32_t i;
512
513 UTransliterator* hex = utrans_open("Any-Hex", UTRANS_FORWARD, NULL,0,NULL,&status);
514
515 if (hex == 0 || U_FAILURE(status)) {
516 log_err("FAIL: utrans_open(Unicode-Hex) failed, error=%s\n",
517 u_errorName(status));
518 goto exit;
519 }
520
521 for (i=0; i<DATA_length; i+=3) {
522 /*u_uastrcpy(filt, DATA[i]);*/
523 u_charsToUChars(DATA[i], filt, (int32_t)strlen(DATA[i])+1);
524 utrans_setFilter(hex, filt, -1, &status);
525
526 if (U_FAILURE(status)) {
527 log_err("FAIL: utrans_setFilter() failed, error=%s\n",
528 u_errorName(status));
529 goto exit;
530 }
531
532 /*u_uastrcpy(buf, DATA[i+1]);*/
533 u_charsToUChars(DATA[i+1], buf, (int32_t)strlen(DATA[i+1])+1);
534 limit = 5;
535 utrans_transUChars(hex, buf, NULL, 128, 0, &limit, &status);
536
537 if (U_FAILURE(status)) {
538 log_err("FAIL: utrans_transUChars() failed, error=%s\n",
539 u_errorName(status));
540 goto exit;
541 }
542
543 cbuf=aescstrdup(buf, -1);
544 u_charsToUChars(DATA[i+2], exp, (int32_t)strlen(DATA[i+2])+1);
545 if (0 == u_strcmp(buf, exp)) {
546 log_verbose("Ok: %s | %s -> %s\n", DATA[i+1], DATA[i], cbuf);
547 } else {
548 log_err("FAIL: %s | %s -> %s, expected %s\n", DATA[i+1], DATA[i], cbuf, DATA[i+2]);
549 }
550 }
551
552 exit:
553 utrans_close(hex);
554 }
555
556 /**
557 * Test the UReplaceableCallback extractBetween support. We use a
558 * transliterator known to rely on this call.
559 */
560 static void TestExtractBetween() {
561
562 UTransliterator *trans;
563 UErrorCode status = U_ZERO_ERROR;
564 UParseError parseErr;
565
566 trans = utrans_open("Lower", UTRANS_FORWARD, NULL, -1,
567 &parseErr, &status);
568
569 if (U_FAILURE(status)) {
570 log_err("FAIL: utrans_open(Lower) failed, error=%s\n",
571 u_errorName(status));
572 } else {
573 _expect(trans, "ABC", "abc");
574
575 utrans_close(trans);
576 }
577 }
578
579 /**
580 * Test utrans_toRules, utrans_getSourceSet
581 */
582
583 /* A simple transform with a small filter & source set: rules 50-100 chars unescaped, 100-200 chars escaped,
584 filter & source set 4-20 chars */
585 static const UChar transSimpleID[] = { 0x79,0x6F,0x2D,0x79,0x6F,0x5F,0x42,0x4A,0 }; /* "yo-yo_BJ" */
586 static const char* transSimpleCName = "yo-yo_BJ";
587
588 enum { kUBufMax = 256 };
589 static void TestGetRulesAndSourceSet() {
590 UErrorCode status = U_ZERO_ERROR;
591 UTransliterator *utrans = utrans_openU(transSimpleID, -1, UTRANS_FORWARD, NULL, 0, NULL, &status);
592 if ( U_SUCCESS(status) ) {
593 USet* uset;
594 UChar ubuf[kUBufMax];
595 int32_t ulen;
596
597 status = U_ZERO_ERROR;
598 ulen = utrans_toRules(utrans, FALSE, ubuf, kUBufMax, &status);
599 if ( U_FAILURE(status) || ulen <= 50 || ulen >= 100) {
600 log_err("FAIL: utrans_toRules unescaped, expected noErr and len 50-100, got error=%s and len=%d\n",
601 u_errorName(status), ulen);
602 }
603
604 status = U_ZERO_ERROR;
605 ulen = utrans_toRules(utrans, FALSE, NULL, 0, &status);
606 if ( status != U_BUFFER_OVERFLOW_ERROR || ulen <= 50 || ulen >= 100) {
607 log_err("FAIL: utrans_toRules unescaped, expected U_BUFFER_OVERFLOW_ERROR and len 50-100, got error=%s and len=%d\n",
608 u_errorName(status), ulen);
609 }
610
611 status = U_ZERO_ERROR;
612 ulen = utrans_toRules(utrans, TRUE, ubuf, kUBufMax, &status);
613 if ( U_FAILURE(status) || ulen <= 100 || ulen >= 200) {
614 log_err("FAIL: utrans_toRules escaped, expected noErr and len 100-200, got error=%s and len=%d\n",
615 u_errorName(status), ulen);
616 }
617
618 status = U_ZERO_ERROR;
619 uset = utrans_getSourceSet(utrans, FALSE, NULL, &status);
620 ulen = uset_toPattern(uset, ubuf, kUBufMax, FALSE, &status);
621 uset_close(uset);
622 if ( U_FAILURE(status) || ulen <= 4 || ulen >= 20) {
623 log_err("FAIL: utrans_getSourceSet useFilter, expected noErr and len 4-20, got error=%s and len=%d\n",
624 u_errorName(status), ulen);
625 }
626
627 status = U_ZERO_ERROR;
628 uset = utrans_getSourceSet(utrans, TRUE, NULL, &status);
629 ulen = uset_toPattern(uset, ubuf, kUBufMax, FALSE, &status);
630 uset_close(uset);
631 if ( U_FAILURE(status) || ulen <= 4 || ulen >= 20) {
632 log_err("FAIL: utrans_getSourceSet ignoreFilter, expected noErr and len 4-20, got error=%s and len=%d\n",
633 u_errorName(status), ulen);
634 }
635
636 utrans_close(utrans);
637 } else {
638 log_data_err("FAIL: utrans_openRules(%s) failed, error=%s (Are you missing data?)\n",
639 transSimpleCName, u_errorName(status));
640 }
641 }
642
643 typedef struct {
644 const UChar * transID;
645 const char * sourceText;
646 const char * targetText;
647 } TransIDSourceTarg;
648
649 static const TransIDSourceTarg dataVarCompItems[] = {
650 { u"Simplified-Traditional",
651 "\\u4E0B\\u9762\\u662F\\u4E00\\u4E9B\\u4ECE\\u7B80\\u4F53\\u8F6C\\u6362\\u4E3A\\u7E41\\u4F53\\u5B57\\u793A\\u4F8B\\u6587\\u672C\\u3002",
652 "\\u4E0B\\u9762\\u662F\\u4E00\\u4E9B\\u5F9E\\u7C21\\u9AD4\\u8F49\\u63DB\\u70BA\\u7E41\\u9AD4\\u5B57\\u793A\\u4F8B\\u6587\\u672C\\u3002" },
653 { u"Halfwidth-Fullwidth",
654 "Sample text, \\uFF7B\\uFF9D\\uFF8C\\uFF9F\\uFF99\\uFF83\\uFF77\\uFF7D\\uFF84.",
655 "\\uFF33\\uFF41\\uFF4D\\uFF50\\uFF4C\\uFF45\\u3000\\uFF54\\uFF45\\uFF58\\uFF54\\uFF0C\\u3000\\u30B5\\u30F3\\u30D7\\u30EB\\u30C6\\u30AD\\u30B9\\u30C8\\uFF0E" },
656 { u"Han-Latin/Names; Latin-Bopomofo",
657 "\\u4E07\\u4FDF\\u919C\\u5974\\u3001\\u533A\\u695A\\u826F\\u3001\\u4EFB\\u70E8\\u3001\\u5CB3\\u98DB",
658 "\\u3107\\u311B\\u02CB \\u3111\\u3127\\u02CA \\u3114\\u3121\\u02C7 \\u310B\\u3128\\u02CA\\u3001 \\u3121 \\u3114\\u3128\\u02C7 \\u310C\\u3127\\u3124\\u02CA\\u3001 \\u3116\\u3123\\u02CA \\u3127\\u311D\\u02CB\\u3001 \\u3129\\u311D\\u02CB \\u3108\\u311F" },
659 { u"Han-Latin",
660 "\\u85CF.\\u92BA.\\u85CF\\u6587.\\u85CF\\u8BED.",
661 "c\\u00E1ng. z\\u00E0ng. z\\u00E0ng w\\u00E9n. z\\u00E0ng y\\u01D4." },
662 { u"Greek-Latin",
663 "\\u1F08 \\u1FBC \\u1F89 \\u1FEC",
664 "A \\u0100I H\\u0100I RH" },
665 /* The following transform is provisional and not present in ICU 60
666 { "Greek-Latin/BGN",
667 "\\u1F08 \\u1FBC \\u1F89 \\u1FEC",
668 "A\\u0313 A\\u0345 A\\u0314\\u0345 \\u1FEC" },
669 */
670 { u"Greek-Latin/UNGEGN",
671 "\\u1F08 \\u1FBC \\u1F89 \\u1FEC",
672 "A A A R" },
673
674 { u"NFD; [[:Mn:]&[:Diacritic:]] Remove; [ÐØøĐđĦħŁłŦŧƀƗƵƶǤǥȺȻȼȾɃɆɇɈɉɌɍɎɏɟɨᴌᵻᵽᵾⱣⱥⱦꝀꝁꝂꝃꝄꝅꝈꝉꝊꝋꝐꝑꝖꝗꝞꝟꞠꞡꞢꞣꞤꞥꞦꞧꞨꞩ] Latin-ASCII; NFC; Upper",
675 "\\u0248 \\u0249 \\u00C6 \\u00E6",
676 "J J \\u00C6 \\u00C6" },
677 { u"NFD; [[:Mn:]&[:Diacritic:]] Remove; [ÐØøĐđĦħŁłŦŧƀƗƵƶǤǥȺȻȼȾɃɆɇɈɉɌɍɎɏɟɨᴌᵻᵽᵾⱣⱥⱦꝀꝁꝂꝃꝄꝅꝈꝉꝊꝋꝐꝑꝖꝗꝞꝟꞠꞡꞢꞣꞤꞥꞦꞧꞨꞩ] Latin-ASCII; NFC; Lower",
678 "\\u0248 \\u0249 \\u00C6 \\u00E6",
679 "j j \\u00E6 \\u00E6" },
680
681 { u"Hiragana-Katakana", // rdar://52039352
682 "\\u309B\\u309C \\u308F\\u3099 \\u309F",
683 "\\u309B\\u309C \\u30F7 \\u30E8\\u30EA" },
684
685 { NULL, NULL, NULL }
686 };
687
688 enum { kBBufMax = 384 };
689 static void TestDataVariantsCompounds() {
690 const TransIDSourceTarg* itemsPtr;
691 for (itemsPtr = dataVarCompItems; itemsPtr->transID != NULL; itemsPtr++) {
692 UErrorCode status = U_ZERO_ERROR;
693 char btrid[kUBufMax];
694 u_austrcpy(btrid, itemsPtr->transID);
695 UTransliterator* utrans = utrans_openU(itemsPtr->transID, -1, UTRANS_FORWARD, NULL, 0, NULL, &status);
696 if (U_FAILURE(status)) {
697 log_data_err("FAIL: utrans_openRules(%s) failed, error=%s (Are you missing data?)\n", btrid, u_errorName(status));
698 continue;
699 }
700 UChar text[kUBufMax];
701 int32_t textLen = u_unescape(itemsPtr->sourceText, text, kUBufMax);
702 int32_t textLim = textLen;
703 utrans_transUChars(utrans, text, &textLen, kUBufMax, 0, &textLim, &status);
704 if (U_FAILURE(status)) {
705 log_err("FAIL: utrans_transUChars(%s) failed, error=%s\n", itemsPtr->transID, u_errorName(status));
706 } else {
707 UChar expect[kUBufMax];
708 int32_t expectLen = u_unescape(itemsPtr->targetText, expect, kUBufMax);
709 if (textLen != expectLen || u_strncmp(text, expect, textLen) != 0) {
710 char btext[kBBufMax], bexpect[kBBufMax];
711 u_austrncpy(btext, text, textLen);
712 u_austrncpy(bexpect, expect, expectLen);
713 log_err("FAIL: utrans_transUChars(%s),\n expect %s\n get %s\n", btrid, bexpect, btext);
714 }
715 }
716 utrans_close(utrans);
717 }
718 }
719
720 static void _expectRules(const char* crules,
721 const char* cfrom,
722 const char* cto) {
723 /* u_uastrcpy has no capacity param for the buffer -- so just
724 * make all buffers way too big */
725 enum { CAP = 256 };
726 UChar rules[CAP];
727 UTransliterator *trans;
728 UErrorCode status = U_ZERO_ERROR;
729 UParseError parseErr;
730
731 u_uastrcpy(rules, crules);
732
733 trans = utrans_open(crules /*use rules as ID*/, UTRANS_FORWARD, rules, -1,
734 &parseErr, &status);
735 if (U_FAILURE(status)) {
736 utrans_close(trans);
737 log_data_err("FAIL: utrans_openRules(%s) failed, error=%s (Are you missing data?)\n",
738 crules, u_errorName(status));
739 return;
740 }
741
742 _expect(trans, cfrom, cto);
743
744 utrans_close(trans);
745 }
746
747 static void _expect(const UTransliterator* trans,
748 const char* cfrom,
749 const char* cto) {
750 /* u_uastrcpy has no capacity param for the buffer -- so just
751 * make all buffers way too big */
752 enum { CAP = 256 };
753 UChar from[CAP];
754 UChar to[CAP];
755 UChar buf[CAP];
756 const UChar *ID;
757 int32_t IDLength;
758 const char *id;
759
760 UErrorCode status = U_ZERO_ERROR;
761 int32_t limit;
762 UTransPosition pos;
763 XReplaceable xrep;
764 XReplaceable *xrepPtr = &xrep;
765 UReplaceableCallbacks xrepVtable;
766
767 u_uastrcpy(from, cfrom);
768 u_uastrcpy(to, cto);
769
770 ID = utrans_getUnicodeID(trans, &IDLength);
771 id = aescstrdup(ID, IDLength);
772
773 /* utrans_transUChars() */
774 u_strcpy(buf, from);
775 limit = u_strlen(buf);
776 utrans_transUChars(trans, buf, NULL, CAP, 0, &limit, &status);
777 if (U_FAILURE(status)) {
778 log_err("FAIL: utrans_transUChars() failed, error=%s\n",
779 u_errorName(status));
780 return;
781 }
782
783 if (0 == u_strcmp(buf, to)) {
784 log_verbose("Ok: utrans_transUChars(%s) x %s -> %s\n",
785 id, cfrom, cto);
786 } else {
787 char actual[CAP];
788 u_austrcpy(actual, buf);
789 log_err("FAIL: utrans_transUChars(%s) x %s -> %s, expected %s\n",
790 id, cfrom, actual, cto);
791 }
792
793 /* utrans_transIncrementalUChars() */
794 u_strcpy(buf, from);
795 pos.start = pos.contextStart = 0;
796 pos.limit = pos.contextLimit = u_strlen(buf);
797 utrans_transIncrementalUChars(trans, buf, NULL, CAP, &pos, &status);
798 utrans_transUChars(trans, buf, NULL, CAP, pos.start, &pos.limit, &status);
799 if (U_FAILURE(status)) {
800 log_err("FAIL: utrans_transIncrementalUChars() failed, error=%s\n",
801 u_errorName(status));
802 return;
803 }
804
805 if (0 == u_strcmp(buf, to)) {
806 log_verbose("Ok: utrans_transIncrementalUChars(%s) x %s -> %s\n",
807 id, cfrom, cto);
808 } else {
809 char actual[CAP];
810 u_austrcpy(actual, buf);
811 log_err("FAIL: utrans_transIncrementalUChars(%s) x %s -> %s, expected %s\n",
812 id, cfrom, actual, cto);
813 }
814
815 /* utrans_trans() */
816 InitXReplaceableCallbacks(&xrepVtable);
817 InitXReplaceable(&xrep, cfrom);
818 limit = u_strlen(from);
819 utrans_trans(trans, (UReplaceable*)xrepPtr, &xrepVtable, 0, &limit, &status);
820 if (U_FAILURE(status)) {
821 log_err("FAIL: utrans_trans() failed, error=%s\n",
822 u_errorName(status));
823 FreeXReplaceable(&xrep);
824 return;
825 }
826
827 if (0 == u_strcmp(xrep.text, to)) {
828 log_verbose("Ok: utrans_trans(%s) x %s -> %s\n",
829 id, cfrom, cto);
830 } else {
831 char actual[CAP];
832 u_austrcpy(actual, xrep.text);
833 log_err("FAIL: utrans_trans(%s) x %s -> %s, expected %s\n",
834 id, cfrom, actual, cto);
835 }
836 FreeXReplaceable(&xrep);
837
838 /* utrans_transIncremental() */
839 InitXReplaceable(&xrep, cfrom);
840 pos.start = pos.contextStart = 0;
841 pos.limit = pos.contextLimit = u_strlen(from);
842 utrans_transIncremental(trans, (UReplaceable*)xrepPtr, &xrepVtable, &pos, &status);
843 utrans_trans(trans, (UReplaceable*)xrepPtr, &xrepVtable, pos.start, &pos.limit, &status);
844 if (U_FAILURE(status)) {
845 log_err("FAIL: utrans_transIncremental() failed, error=%s\n",
846 u_errorName(status));
847 FreeXReplaceable(&xrep);
848 return;
849 }
850
851 if (0 == u_strcmp(xrep.text, to)) {
852 log_verbose("Ok: utrans_transIncremental(%s) x %s -> %s\n",
853 id, cfrom, cto);
854 } else {
855 char actual[CAP];
856 u_austrcpy(actual, xrep.text);
857 log_err("FAIL: utrans_transIncremental(%s) x %s -> %s, expected %s\n",
858 id, cfrom, actual, cto);
859 }
860 FreeXReplaceable(&xrep);
861 }
862
863 #endif /* #if !UCONFIG_NO_TRANSLITERATION */