]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
b75a7d8f | 3 | /* |
374ca955 | 4 | ******************************************************************************* |
2ca993e8 | 5 | * Copyright (C) 1997-2016 International Business Machines |
374ca955 A |
6 | * Corporation and others. All Rights Reserved. |
7 | ******************************************************************************* | |
8 | * Date Name Description | |
9 | * 06/23/00 aliu Creation. | |
10 | ******************************************************************************* | |
11 | */ | |
b75a7d8f A |
12 | |
13 | #include "unicode/utypes.h" | |
14 | ||
15 | #if !UCONFIG_NO_TRANSLITERATION | |
16 | ||
17 | #include <stdlib.h> | |
18 | #include <string.h> | |
19 | #include "unicode/utrans.h" | |
20 | #include "unicode/ustring.h" | |
57a6839d | 21 | #include "unicode/uset.h" |
b75a7d8f | 22 | #include "cintltst.h" |
2ca993e8 | 23 | #include "cmemory.h" |
b75a7d8f A |
24 | |
25 | #define TEST(x) addTest(root, &x, "utrans/" # x) | |
26 | ||
27 | static void TestAPI(void); | |
28 | static void TestSimpleRules(void); | |
29 | static void TestFilter(void); | |
30 | static void TestOpenInverse(void); | |
31 | static void TestClone(void); | |
32 | static void TestRegisterUnregister(void); | |
33 | static void TestExtractBetween(void); | |
374ca955 | 34 | static void TestUnicodeIDs(void); |
57a6839d | 35 | static void TestGetRulesAndSourceSet(void); |
2ca993e8 | 36 | static void TestDataVariantsCompounds(void); |
b75a7d8f A |
37 | |
38 | static void _expectRules(const char*, const char*, const char*); | |
39 | static void _expect(const UTransliterator* trans, const char* cfrom, const char* cto); | |
40 | ||
41 | void addUTransTest(TestNode** root); | |
42 | ||
43 | ||
44 | void | |
45 | addUTransTest(TestNode** root) { | |
46 | TEST(TestAPI); | |
47 | TEST(TestSimpleRules); | |
48 | TEST(TestFilter); | |
49 | TEST(TestOpenInverse); | |
50 | TEST(TestClone); | |
51 | TEST(TestRegisterUnregister); | |
52 | TEST(TestExtractBetween); | |
374ca955 | 53 | TEST(TestUnicodeIDs); |
57a6839d | 54 | TEST(TestGetRulesAndSourceSet); |
2ca993e8 | 55 | TEST(TestDataVariantsCompounds); |
b75a7d8f A |
56 | } |
57 | ||
58 | /*------------------------------------------------------------------ | |
59 | * Replaceable glue | |
60 | * | |
61 | * To test the Replaceable glue we have to dummy up a C-based | |
62 | * Replaceable callback. This code is for testing purposes only. | |
63 | *------------------------------------------------------------------*/ | |
64 | ||
65 | typedef struct XReplaceable { | |
66 | UChar* text; /* MUST BE null-terminated */ | |
67 | } XReplaceable; | |
68 | ||
69 | static void InitXReplaceable(XReplaceable* rep, const char* cstring) { | |
70 | rep->text = malloc(sizeof(UChar) * (strlen(cstring)+1)); | |
71 | u_uastrcpy(rep->text, cstring); | |
72 | } | |
73 | ||
74 | static void FreeXReplaceable(XReplaceable* rep) { | |
75 | if (rep->text != NULL) { | |
76 | free(rep->text); | |
77 | rep->text = NULL; | |
78 | } | |
79 | } | |
80 | ||
81 | /* UReplaceableCallbacks callback */ | |
82 | static int32_t Xlength(const UReplaceable* rep) { | |
83 | const XReplaceable* x = (const XReplaceable*)rep; | |
84 | return u_strlen(x->text); | |
85 | } | |
86 | ||
87 | /* UReplaceableCallbacks callback */ | |
88 | static UChar XcharAt(const UReplaceable* rep, int32_t offset) { | |
89 | const XReplaceable* x = (const XReplaceable*)rep; | |
90 | return x->text[offset]; | |
91 | } | |
92 | ||
93 | /* UReplaceableCallbacks callback */ | |
94 | static UChar32 Xchar32At(const UReplaceable* rep, int32_t offset) { | |
95 | const XReplaceable* x = (const XReplaceable*)rep; | |
96 | return x->text[offset]; | |
97 | } | |
98 | ||
99 | /* UReplaceableCallbacks callback */ | |
100 | static void Xreplace(UReplaceable* rep, int32_t start, int32_t limit, | |
101 | const UChar* text, int32_t textLength) { | |
102 | XReplaceable* x = (XReplaceable*)rep; | |
103 | int32_t newLen = Xlength(rep) + limit - start + textLength; | |
104 | UChar* newText = (UChar*) malloc(sizeof(UChar) * (newLen+1)); | |
105 | u_strncpy(newText, x->text, start); | |
106 | u_strncpy(newText + start, text, textLength); | |
107 | u_strcpy(newText + start + textLength, x->text + limit); | |
108 | free(x->text); | |
109 | x->text = newText; | |
110 | } | |
111 | ||
112 | /* UReplaceableCallbacks callback */ | |
113 | static void Xcopy(UReplaceable* rep, int32_t start, int32_t limit, int32_t dest) { | |
114 | XReplaceable* x = (XReplaceable*)rep; | |
115 | int32_t newLen = Xlength(rep) + limit - start; | |
116 | UChar* newText = (UChar*) malloc(sizeof(UChar) * (newLen+1)); | |
117 | u_strncpy(newText, x->text, dest); | |
118 | u_strncpy(newText + dest, x->text + start, limit - start); | |
119 | u_strcpy(newText + dest + limit - start, x->text + dest); | |
120 | free(x->text); | |
121 | x->text = newText; | |
122 | } | |
123 | ||
124 | /* UReplaceableCallbacks callback */ | |
125 | static void Xextract(UReplaceable* rep, int32_t start, int32_t limit, UChar* dst) { | |
126 | XReplaceable* x = (XReplaceable*)rep; | |
127 | int32_t len = limit - start; | |
128 | u_strncpy(dst, x->text, len); | |
129 | } | |
130 | ||
131 | static void InitXReplaceableCallbacks(UReplaceableCallbacks* callbacks) { | |
132 | callbacks->length = Xlength; | |
133 | callbacks->charAt = XcharAt; | |
134 | callbacks->char32At = Xchar32At; | |
135 | callbacks->replace = Xreplace; | |
136 | callbacks->extract = Xextract; | |
137 | callbacks->copy = Xcopy; | |
138 | } | |
139 | ||
140 | /*------------------------------------------------------------------ | |
141 | * Tests | |
142 | *------------------------------------------------------------------*/ | |
143 | ||
144 | static void TestAPI() { | |
145 | enum { BUF_CAP = 128 }; | |
146 | char buf[BUF_CAP], buf2[BUF_CAP]; | |
147 | UErrorCode status = U_ZERO_ERROR; | |
148 | UTransliterator* trans = NULL; | |
149 | int32_t i, n; | |
150 | ||
151 | /* Test getAvailableIDs */ | |
152 | n = utrans_countAvailableIDs(); | |
153 | if (n < 1) { | |
154 | log_err("FAIL: utrans_countAvailableIDs() returned %d\n", n); | |
155 | } else { | |
156 | log_verbose("System ID count: %d\n", n); | |
157 | } | |
158 | for (i=0; i<n; ++i) { | |
159 | utrans_getAvailableID(i, buf, BUF_CAP); | |
160 | if (*buf == 0) { | |
161 | log_err("FAIL: System transliterator %d: \"\"\n", i); | |
162 | } else { | |
163 | log_verbose("System transliterator %d: \"%s\"\n", i, buf); | |
164 | } | |
165 | } | |
166 | ||
167 | /* Test open */ | |
168 | utrans_getAvailableID(0, buf, BUF_CAP); | |
169 | trans = utrans_open(buf, UTRANS_FORWARD,NULL,0,NULL, &status); | |
170 | if (U_FAILURE(status)) { | |
171 | log_err("FAIL: utrans_open(%s) failed, error=%s\n", | |
172 | buf, u_errorName(status)); | |
173 | } | |
174 | ||
175 | else { | |
176 | /* Test getID */ | |
177 | utrans_getID(trans, buf2, BUF_CAP); | |
178 | if (0 != strcmp(buf, buf2)) { | |
179 | log_err("FAIL: utrans_getID(%s) returned %s\n", | |
180 | buf, buf2); | |
181 | } | |
182 | utrans_close(trans); | |
183 | } | |
184 | } | |
185 | ||
374ca955 A |
186 | static void TestUnicodeIDs() { |
187 | UEnumeration *uenum; | |
188 | UTransliterator *utrans; | |
189 | const UChar *id, *id2; | |
190 | int32_t idLength, id2Length, count, count2; | |
191 | ||
192 | UErrorCode errorCode; | |
193 | ||
194 | errorCode=U_ZERO_ERROR; | |
195 | uenum=utrans_openIDs(&errorCode); | |
196 | if(U_FAILURE(errorCode)) { | |
197 | log_err("utrans_openIDs() failed - %s\n", u_errorName(errorCode)); | |
198 | return; | |
199 | } | |
200 | ||
201 | count=uenum_count(uenum, &errorCode); | |
202 | if(U_FAILURE(errorCode) || count<1) { | |
203 | log_err("uenum_count(transliterator IDs)=%d - %s\n", count, u_errorName(errorCode)); | |
204 | } | |
205 | ||
206 | count=0; | |
207 | for(;;) { | |
208 | id=uenum_unext(uenum, &idLength, &errorCode); | |
209 | if(U_FAILURE(errorCode)) { | |
210 | log_err("uenum_unext(transliterator ID %d) failed - %s\n", count, u_errorName(errorCode)); | |
211 | break; | |
212 | } | |
213 | if(id==NULL) { | |
214 | break; | |
215 | } | |
216 | ||
217 | if(++count>10) { | |
218 | /* try to actually open only a few transliterators */ | |
219 | continue; | |
220 | } | |
221 | ||
222 | utrans=utrans_openU(id, idLength, UTRANS_FORWARD, NULL, 0, NULL, &errorCode); | |
223 | if(U_FAILURE(errorCode)) { | |
224 | log_err("utrans_openU(%s) failed - %s\n", aescstrdup(id, idLength), u_errorName(errorCode)); | |
225 | continue; | |
226 | } | |
227 | ||
228 | id2=utrans_getUnicodeID(utrans, &id2Length); | |
229 | if(idLength!=id2Length || 0!=u_memcmp(id, id2, idLength)) { | |
230 | log_err("utrans_getUnicodeID(%s) does not match the original ID\n", aescstrdup(id, idLength)); | |
231 | } | |
232 | ||
233 | utrans_close(utrans); | |
234 | } | |
235 | ||
236 | uenum_reset(uenum, &errorCode); | |
237 | if(U_FAILURE(errorCode) || count<1) { | |
238 | log_err("uenum_reset(transliterator IDs) failed - %s\n", u_errorName(errorCode)); | |
239 | } else { | |
240 | count2=uenum_count(uenum, &errorCode); | |
241 | if(U_FAILURE(errorCode) || count<1) { | |
242 | log_err("2nd uenum_count(transliterator IDs)=%d - %s\n", count2, u_errorName(errorCode)); | |
243 | } else if(count!=count2) { | |
244 | log_err("uenum_unext(transliterator IDs) returned %d IDs but uenum_count() after uenum_reset() claims there are %d\n", count, count2); | |
245 | } | |
246 | } | |
247 | ||
248 | uenum_close(uenum); | |
249 | } | |
250 | ||
b75a7d8f A |
251 | static void TestOpenInverse(){ |
252 | UErrorCode status=U_ZERO_ERROR; | |
253 | UTransliterator* t1=NULL; | |
254 | UTransliterator* inverse1=NULL; | |
255 | enum { BUF_CAP = 128 }; | |
256 | char buf1[BUF_CAP]; | |
257 | int32_t i=0; | |
258 | ||
259 | const char TransID[][25]={ | |
260 | "Halfwidth-Fullwidth", | |
261 | "Fullwidth-Halfwidth", | |
262 | "Greek-Latin" , | |
263 | "Latin-Greek", | |
264 | /*"Arabic-Latin", // Removed in 2.0*/ | |
265 | /*"Latin-Arabic", // Removed in 2.0*/ | |
266 | "Katakana-Latin", | |
267 | "Latin-Katakana", | |
268 | /*"Hebrew-Latin", // Removed in 2.0*/ | |
269 | /*"Latin-Hebrew", // Removed in 2.0*/ | |
270 | "Cyrillic-Latin", | |
271 | "Latin-Cyrillic", | |
272 | "Devanagari-Latin", | |
273 | "Latin-Devanagari", | |
274 | "Any-Hex", | |
275 | "Hex-Any" | |
276 | }; | |
277 | ||
2ca993e8 | 278 | for(i=0; i<UPRV_LENGTHOF(TransID); i=i+2){ |
b75a7d8f A |
279 | status = U_ZERO_ERROR; |
280 | t1=utrans_open(TransID[i], UTRANS_FORWARD,NULL,0,NULL, &status); | |
281 | if(t1 == NULL || U_FAILURE(status)){ | |
729e4ab9 | 282 | log_data_err("FAIL: in instantiation for id=%s -> %s (Are you missing data?)\n", TransID[i], u_errorName(status)); |
b75a7d8f A |
283 | continue; |
284 | } | |
285 | inverse1=utrans_openInverse(t1, &status); | |
286 | if(U_FAILURE(status)){ | |
287 | log_err("FAIL: utrans_openInverse() failed for id=%s. Error=%s\n", TransID[i], myErrorName(status)); | |
288 | continue; | |
289 | } | |
290 | utrans_getID(inverse1, buf1, BUF_CAP); | |
291 | if(strcmp(buf1, TransID[i+1]) != 0){ | |
292 | log_err("FAIL :openInverse() for %s returned %s instead of %s\n", TransID[i], buf1, TransID[i+1]); | |
293 | } | |
294 | utrans_close(t1); | |
295 | utrans_close(inverse1); | |
296 | } | |
297 | } | |
298 | ||
299 | static void TestClone(){ | |
300 | UErrorCode status=U_ZERO_ERROR; | |
301 | UTransliterator* t1=NULL; | |
302 | UTransliterator* t2=NULL; | |
303 | UTransliterator* t3=NULL; | |
304 | UTransliterator* t4=NULL; | |
305 | enum { BUF_CAP = 128 }; | |
306 | char buf1[BUF_CAP], buf2[BUF_CAP], buf3[BUF_CAP]; | |
307 | ||
308 | t1=utrans_open("Latin-Devanagari", UTRANS_FORWARD, NULL,0,NULL,&status); | |
309 | if(U_FAILURE(status)){ | |
729e4ab9 | 310 | log_data_err("FAIL: construction -> %s (Are you missing data?)\n", u_errorName(status)); |
b75a7d8f A |
311 | return; |
312 | } | |
313 | t2=utrans_open("Latin-Greek", UTRANS_FORWARD, NULL,0,NULL,&status); | |
314 | if(U_FAILURE(status)){ | |
315 | log_err("FAIL: construction\n"); | |
316 | utrans_close(t1); | |
317 | return; | |
318 | } | |
319 | ||
320 | t3=utrans_clone(t1, &status); | |
321 | t4=utrans_clone(t2, &status); | |
322 | ||
323 | utrans_getID(t1, buf1, BUF_CAP); | |
324 | utrans_getID(t2, buf2, BUF_CAP); | |
325 | utrans_getID(t3, buf3, BUF_CAP); | |
326 | ||
327 | if(strcmp(buf1, buf3) != 0 || | |
328 | strcmp(buf1, buf2) == 0) { | |
329 | log_err("FAIL: utrans_clone() failed\n"); | |
330 | } | |
331 | ||
332 | utrans_getID(t4, buf3, BUF_CAP); | |
333 | ||
334 | if(strcmp(buf2, buf3) != 0 || | |
335 | strcmp(buf1, buf3) == 0) { | |
336 | log_err("FAIL: utrans_clone() failed\n"); | |
337 | } | |
338 | ||
339 | utrans_close(t1); | |
340 | utrans_close(t2); | |
341 | utrans_close(t3); | |
342 | utrans_close(t4); | |
343 | ||
344 | } | |
345 | ||
346 | static void TestRegisterUnregister(){ | |
347 | UErrorCode status=U_ZERO_ERROR; | |
348 | UTransliterator* t1=NULL; | |
374ca955 | 349 | UTransliterator* rules=NULL, *rules2; |
b75a7d8f A |
350 | UTransliterator* inverse1=NULL; |
351 | UChar rule[]={ 0x0061, 0x003c, 0x003e, 0x0063}; /*a<>b*/ | |
352 | ||
374ca955 A |
353 | U_STRING_DECL(ID, "TestA-TestB", 11); |
354 | U_STRING_INIT(ID, "TestA-TestB", 11); | |
355 | ||
b75a7d8f A |
356 | /* Make sure it doesn't exist */ |
357 | t1=utrans_open("TestA-TestB", UTRANS_FORWARD,NULL,0,NULL, &status); | |
358 | if(t1 != NULL || U_SUCCESS(status)) { | |
359 | log_err("FAIL: TestA-TestB already registered\n"); | |
360 | return; | |
361 | } | |
362 | status=U_ZERO_ERROR; | |
363 | /* Check inverse too */ | |
364 | inverse1=utrans_open("TestA-TestB", UTRANS_REVERSE, NULL,0,NULL,&status); | |
365 | if(inverse1 != NULL || U_SUCCESS(status)) { | |
366 | log_err("FAIL: TestA-TestB already registered\n"); | |
367 | return; | |
368 | } | |
369 | status=U_ZERO_ERROR; | |
370 | /* Create it */ | |
371 | rules=utrans_open("TestA-TestB",UTRANS_FORWARD, rule, 4, NULL, &status); | |
372 | if(U_FAILURE(status)){ | |
373 | log_err("FAIL: utrans_openRules(a<>B) failed with error=%s\n", myErrorName(status)); | |
374 | return; | |
375 | } | |
374ca955 A |
376 | |
377 | /* clone it so we can register it a second time */ | |
378 | rules2=utrans_clone(rules, &status); | |
379 | if(U_FAILURE(status)) { | |
380 | log_err("FAIL: utrans_clone(a<>B) failed with error=%s\n", myErrorName(status)); | |
381 | return; | |
382 | } | |
383 | ||
b75a7d8f A |
384 | status=U_ZERO_ERROR; |
385 | /* Register it */ | |
386 | utrans_register(rules, &status); | |
387 | if(U_FAILURE(status)){ | |
388 | log_err("FAIL: utrans_register failed with error=%s\n", myErrorName(status)); | |
389 | return; | |
390 | } | |
391 | status=U_ZERO_ERROR; | |
392 | /* Now check again -- should exist now*/ | |
393 | t1= utrans_open("TestA-TestB", UTRANS_FORWARD, NULL,0,NULL,&status); | |
394 | if(U_FAILURE(status) || t1 == NULL){ | |
395 | log_err("FAIL: TestA-TestB not registered\n"); | |
396 | return; | |
397 | } | |
398 | utrans_close(t1); | |
399 | ||
400 | /*unregister the instance*/ | |
401 | status=U_ZERO_ERROR; | |
402 | utrans_unregister("TestA-TestB"); | |
403 | /* now Make sure it doesn't exist */ | |
404 | t1=utrans_open("TestA-TestB", UTRANS_FORWARD,NULL,0,NULL, &status); | |
405 | if(U_SUCCESS(status) || t1 != NULL) { | |
406 | log_err("FAIL: TestA-TestB isn't unregistered\n"); | |
407 | return; | |
408 | } | |
374ca955 A |
409 | utrans_close(t1); |
410 | ||
411 | /* now with utrans_unregisterID(const UChar *) */ | |
412 | status=U_ZERO_ERROR; | |
413 | utrans_register(rules2, &status); | |
414 | if(U_FAILURE(status)){ | |
415 | log_err("FAIL: 2nd utrans_register failed with error=%s\n", myErrorName(status)); | |
416 | return; | |
417 | } | |
418 | status=U_ZERO_ERROR; | |
419 | /* Now check again -- should exist now*/ | |
420 | t1= utrans_open("TestA-TestB", UTRANS_FORWARD, NULL,0,NULL,&status); | |
421 | if(U_FAILURE(status) || t1 == NULL){ | |
422 | log_err("FAIL: 2nd TestA-TestB not registered\n"); | |
423 | return; | |
424 | } | |
425 | utrans_close(t1); | |
426 | ||
427 | /*unregister the instance*/ | |
428 | status=U_ZERO_ERROR; | |
429 | utrans_unregisterID(ID, -1); | |
430 | /* now Make sure it doesn't exist */ | |
431 | t1=utrans_openU(ID, -1, UTRANS_FORWARD,NULL,0,NULL, &status); | |
432 | if(U_SUCCESS(status) || t1 != NULL) { | |
433 | log_err("FAIL: 2nd TestA-TestB isn't unregistered\n"); | |
434 | return; | |
435 | } | |
436 | ||
b75a7d8f A |
437 | utrans_close(t1); |
438 | utrans_close(inverse1); | |
439 | } | |
440 | ||
441 | static void TestSimpleRules() { | |
442 | /* Test rules */ | |
443 | /* Example: rules 1. ab>x|y | |
444 | * 2. yc>z | |
445 | * | |
446 | * []|eabcd start - no match, copy e to tranlated buffer | |
447 | * [e]|abcd match rule 1 - copy output & adjust cursor | |
448 | * [ex|y]cd match rule 2 - copy output & adjust cursor | |
449 | * [exz]|d no match, copy d to transliterated buffer | |
450 | * [exzd]| done | |
451 | */ | |
452 | _expectRules("ab>x|y;" | |
453 | "yc>z", | |
454 | "eabcd", "exzd"); | |
455 | ||
456 | /* Another set of rules: | |
457 | * 1. ab>x|yzacw | |
458 | * 2. za>q | |
459 | * 3. qc>r | |
460 | * 4. cw>n | |
461 | * | |
462 | * []|ab Rule 1 | |
463 | * [x|yzacw] No match | |
464 | * [xy|zacw] Rule 2 | |
465 | * [xyq|cw] Rule 4 | |
466 | * [xyqn]| Done | |
467 | */ | |
468 | _expectRules("ab>x|yzacw;" | |
469 | "za>q;" | |
470 | "qc>r;" | |
471 | "cw>n", | |
472 | "ab", "xyqn"); | |
473 | ||
474 | /* Test categories | |
475 | */ | |
476 | _expectRules("$dummy=" "\\uE100" ";" /* careful here with E100 */ | |
477 | "$vowel=[aeiouAEIOU];" | |
478 | "$lu=[:Lu:];" | |
479 | "$vowel } $lu > '!';" | |
480 | "$vowel > '&';" | |
481 | "'!' { $lu > '^';" | |
482 | "$lu > '*';" | |
483 | "a > ERROR", | |
484 | "abcdefgABCDEFGU", "&bcd&fg!^**!^*&"); | |
729e4ab9 A |
485 | |
486 | /* Test multiple passes | |
487 | */ | |
488 | _expectRules("abc > xy;" | |
489 | "::Null;" | |
490 | "aba > z;", | |
491 | "abc ababc aba", "xy abxy z"); | |
b75a7d8f A |
492 | } |
493 | ||
494 | static void TestFilter() { | |
495 | UErrorCode status = U_ZERO_ERROR; | |
496 | UChar filt[128]; | |
497 | UChar buf[128]; | |
498 | UChar exp[128]; | |
374ca955 | 499 | char *cbuf; |
b75a7d8f A |
500 | int32_t limit; |
501 | const char* DATA[] = { | |
502 | "[^c]", /* Filter out 'c' */ | |
503 | "abcde", | |
504 | "\\u0061\\u0062c\\u0064\\u0065", | |
505 | ||
506 | "", /* No filter */ | |
507 | "abcde", | |
508 | "\\u0061\\u0062\\u0063\\u0064\\u0065" | |
509 | }; | |
2ca993e8 | 510 | int32_t DATA_length = UPRV_LENGTHOF(DATA); |
b75a7d8f A |
511 | int32_t i; |
512 | ||
513 | UTransliterator* hex = utrans_open("Any-Hex", UTRANS_FORWARD, NULL,0,NULL,&status); | |
514 | ||
515 | if (hex == 0 || U_FAILURE(status)) { | |
516 | log_err("FAIL: utrans_open(Unicode-Hex) failed, error=%s\n", | |
517 | u_errorName(status)); | |
518 | goto exit; | |
519 | } | |
520 | ||
521 | for (i=0; i<DATA_length; i+=3) { | |
522 | /*u_uastrcpy(filt, DATA[i]);*/ | |
374ca955 | 523 | u_charsToUChars(DATA[i], filt, (int32_t)strlen(DATA[i])+1); |
b75a7d8f A |
524 | utrans_setFilter(hex, filt, -1, &status); |
525 | ||
526 | if (U_FAILURE(status)) { | |
527 | log_err("FAIL: utrans_setFilter() failed, error=%s\n", | |
528 | u_errorName(status)); | |
529 | goto exit; | |
530 | } | |
531 | ||
532 | /*u_uastrcpy(buf, DATA[i+1]);*/ | |
374ca955 | 533 | u_charsToUChars(DATA[i+1], buf, (int32_t)strlen(DATA[i+1])+1); |
b75a7d8f A |
534 | limit = 5; |
535 | utrans_transUChars(hex, buf, NULL, 128, 0, &limit, &status); | |
536 | ||
537 | if (U_FAILURE(status)) { | |
538 | log_err("FAIL: utrans_transUChars() failed, error=%s\n", | |
539 | u_errorName(status)); | |
540 | goto exit; | |
541 | } | |
542 | ||
374ca955 A |
543 | cbuf=aescstrdup(buf, -1); |
544 | u_charsToUChars(DATA[i+2], exp, (int32_t)strlen(DATA[i+2])+1); | |
b75a7d8f A |
545 | if (0 == u_strcmp(buf, exp)) { |
546 | log_verbose("Ok: %s | %s -> %s\n", DATA[i+1], DATA[i], cbuf); | |
547 | } else { | |
548 | log_err("FAIL: %s | %s -> %s, expected %s\n", DATA[i+1], DATA[i], cbuf, DATA[i+2]); | |
549 | } | |
550 | } | |
551 | ||
552 | exit: | |
553 | utrans_close(hex); | |
554 | } | |
555 | ||
556 | /** | |
557 | * Test the UReplaceableCallback extractBetween support. We use a | |
558 | * transliterator known to rely on this call. | |
559 | */ | |
560 | static void TestExtractBetween() { | |
561 | ||
562 | UTransliterator *trans; | |
563 | UErrorCode status = U_ZERO_ERROR; | |
564 | UParseError parseErr; | |
565 | ||
566 | trans = utrans_open("Lower", UTRANS_FORWARD, NULL, -1, | |
567 | &parseErr, &status); | |
568 | ||
569 | if (U_FAILURE(status)) { | |
570 | log_err("FAIL: utrans_open(Lower) failed, error=%s\n", | |
571 | u_errorName(status)); | |
572 | } else { | |
573 | _expect(trans, "ABC", "abc"); | |
574 | ||
575 | utrans_close(trans); | |
576 | } | |
577 | } | |
578 | ||
57a6839d A |
579 | /** |
580 | * Test utrans_toRules, utrans_getSourceSet | |
581 | */ | |
582 | ||
583 | /* A simple transform with a small filter & source set: rules 50-100 chars unescaped, 100-200 chars escaped, | |
584 | filter & source set 4-20 chars */ | |
585 | static const UChar transSimpleID[] = { 0x79,0x6F,0x2D,0x79,0x6F,0x5F,0x42,0x4A,0 }; /* "yo-yo_BJ" */ | |
586 | static const char* transSimpleCName = "yo-yo_BJ"; | |
587 | ||
588 | enum { kUBufMax = 256 }; | |
589 | static void TestGetRulesAndSourceSet() { | |
590 | UErrorCode status = U_ZERO_ERROR; | |
591 | UTransliterator *utrans = utrans_openU(transSimpleID, -1, UTRANS_FORWARD, NULL, 0, NULL, &status); | |
592 | if ( U_SUCCESS(status) ) { | |
593 | USet* uset; | |
594 | UChar ubuf[kUBufMax]; | |
595 | int32_t ulen; | |
596 | ||
597 | status = U_ZERO_ERROR; | |
598 | ulen = utrans_toRules(utrans, FALSE, ubuf, kUBufMax, &status); | |
599 | if ( U_FAILURE(status) || ulen <= 50 || ulen >= 100) { | |
600 | log_err("FAIL: utrans_toRules unescaped, expected noErr and len 50-100, got error=%s and len=%d\n", | |
601 | u_errorName(status), ulen); | |
602 | } | |
603 | ||
604 | status = U_ZERO_ERROR; | |
605 | ulen = utrans_toRules(utrans, FALSE, NULL, 0, &status); | |
606 | if ( status != U_BUFFER_OVERFLOW_ERROR || ulen <= 50 || ulen >= 100) { | |
607 | log_err("FAIL: utrans_toRules unescaped, expected U_BUFFER_OVERFLOW_ERROR and len 50-100, got error=%s and len=%d\n", | |
608 | u_errorName(status), ulen); | |
609 | } | |
610 | ||
611 | status = U_ZERO_ERROR; | |
612 | ulen = utrans_toRules(utrans, TRUE, ubuf, kUBufMax, &status); | |
613 | if ( U_FAILURE(status) || ulen <= 100 || ulen >= 200) { | |
614 | log_err("FAIL: utrans_toRules escaped, expected noErr and len 100-200, got error=%s and len=%d\n", | |
615 | u_errorName(status), ulen); | |
616 | } | |
617 | ||
618 | status = U_ZERO_ERROR; | |
619 | uset = utrans_getSourceSet(utrans, FALSE, NULL, &status); | |
620 | ulen = uset_toPattern(uset, ubuf, kUBufMax, FALSE, &status); | |
621 | uset_close(uset); | |
622 | if ( U_FAILURE(status) || ulen <= 4 || ulen >= 20) { | |
623 | log_err("FAIL: utrans_getSourceSet useFilter, expected noErr and len 4-20, got error=%s and len=%d\n", | |
624 | u_errorName(status), ulen); | |
625 | } | |
626 | ||
627 | status = U_ZERO_ERROR; | |
628 | uset = utrans_getSourceSet(utrans, TRUE, NULL, &status); | |
629 | ulen = uset_toPattern(uset, ubuf, kUBufMax, FALSE, &status); | |
630 | uset_close(uset); | |
631 | if ( U_FAILURE(status) || ulen <= 4 || ulen >= 20) { | |
632 | log_err("FAIL: utrans_getSourceSet ignoreFilter, expected noErr and len 4-20, got error=%s and len=%d\n", | |
633 | u_errorName(status), ulen); | |
634 | } | |
635 | ||
636 | utrans_close(utrans); | |
637 | } else { | |
638 | log_data_err("FAIL: utrans_openRules(%s) failed, error=%s (Are you missing data?)\n", | |
639 | transSimpleCName, u_errorName(status)); | |
640 | } | |
641 | } | |
642 | ||
2ca993e8 A |
643 | typedef struct { |
644 | const char * transID; | |
645 | const char * sourceText; | |
646 | const char * targetText; | |
647 | } TransIDSourceTarg; | |
648 | ||
649 | static const TransIDSourceTarg dataVarCompItems[] = { | |
650 | { "Simplified-Traditional", | |
651 | "\\u4E0B\\u9762\\u662F\\u4E00\\u4E9B\\u4ECE\\u7B80\\u4F53\\u8F6C\\u6362\\u4E3A\\u7E41\\u4F53\\u5B57\\u793A\\u4F8B\\u6587\\u672C\\u3002", | |
652 | "\\u4E0B\\u9762\\u662F\\u4E00\\u4E9B\\u5F9E\\u7C21\\u9AD4\\u8F49\\u63DB\\u70BA\\u7E41\\u9AD4\\u5B57\\u793A\\u4F8B\\u6587\\u672C\\u3002" }, | |
653 | { "Halfwidth-Fullwidth", | |
654 | "Sample text, \\uFF7B\\uFF9D\\uFF8C\\uFF9F\\uFF99\\uFF83\\uFF77\\uFF7D\\uFF84.", | |
655 | "\\uFF33\\uFF41\\uFF4D\\uFF50\\uFF4C\\uFF45\\u3000\\uFF54\\uFF45\\uFF58\\uFF54\\uFF0C\\u3000\\u30B5\\u30F3\\u30D7\\u30EB\\u30C6\\u30AD\\u30B9\\u30C8\\uFF0E" }, | |
656 | { "Han-Latin/Names; Latin-Bopomofo", | |
657 | "\\u4E07\\u4FDF\\u919C\\u5974\\u3001\\u533A\\u695A\\u826F\\u3001\\u4EFB\\u70E8\\u3001\\u5CB3\\u98DB", | |
658 | "\\u3107\\u311B\\u02CB \\u3111\\u3127\\u02CA \\u3114\\u3121\\u02C7 \\u310B\\u3128\\u02CA\\u3001 \\u3121 \\u3114\\u3128\\u02C7 \\u310C\\u3127\\u3124\\u02CA\\u3001 \\u3116\\u3123\\u02CA \\u3127\\u311D\\u02CB\\u3001 \\u3129\\u311D\\u02CB \\u3108\\u311F" }, | |
659 | { "Greek-Latin", | |
660 | "\\u1F08 \\u1FBC \\u1F89 \\u1FEC", | |
661 | "A \\u0100I H\\u0100I RH" }, | |
662 | { "Greek-Latin/BGN", | |
663 | "\\u1F08 \\u1FBC \\u1F89 \\u1FEC", | |
664 | "A\\u0313 A\\u0345 A\\u0314\\u0345 \\u1FEC" }, | |
665 | { "Greek-Latin/UNGEGN", | |
666 | "\\u1F08 \\u1FBC \\u1F89 \\u1FEC", | |
667 | "A A A R" }, | |
668 | { NULL, NULL, NULL } | |
669 | }; | |
670 | ||
671 | enum { kBBufMax = 384 }; | |
672 | static void TestDataVariantsCompounds() { | |
673 | const TransIDSourceTarg* itemsPtr; | |
674 | for (itemsPtr = dataVarCompItems; itemsPtr->transID != NULL; itemsPtr++) { | |
675 | UErrorCode status = U_ZERO_ERROR; | |
676 | UChar utrid[kUBufMax]; | |
677 | int32_t utridlen = u_unescape(itemsPtr->transID, utrid, kUBufMax); | |
678 | UTransliterator* utrans = utrans_openU(utrid, utridlen, UTRANS_FORWARD, NULL, 0, NULL, &status); | |
679 | if (U_FAILURE(status)) { | |
680 | log_data_err("FAIL: utrans_openRules(%s) failed, error=%s (Are you missing data?)\n", itemsPtr->transID, u_errorName(status)); | |
681 | continue; | |
682 | } | |
683 | UChar text[kUBufMax]; | |
684 | int32_t textLen = u_unescape(itemsPtr->sourceText, text, kUBufMax); | |
685 | int32_t textLim = textLen; | |
686 | utrans_transUChars(utrans, text, &textLen, kUBufMax, 0, &textLim, &status); | |
687 | if (U_FAILURE(status)) { | |
688 | log_err("FAIL: utrans_transUChars(%s) failed, error=%s\n", itemsPtr->transID, u_errorName(status)); | |
689 | } else { | |
690 | UChar expect[kUBufMax]; | |
691 | int32_t expectLen = u_unescape(itemsPtr->targetText, expect, kUBufMax); | |
692 | if (textLen != expectLen || u_strncmp(text, expect, textLen) != 0) { | |
693 | char btext[kBBufMax], bexpect[kBBufMax]; | |
694 | u_austrncpy(btext, text, textLen); | |
695 | u_austrncpy(bexpect, expect, expectLen); | |
696 | log_err("FAIL: utrans_transUChars(%s),\n expect %s\n get %s\n", itemsPtr->transID, bexpect, btext); | |
697 | } | |
698 | } | |
699 | utrans_close(utrans); | |
700 | } | |
701 | } | |
57a6839d | 702 | |
b75a7d8f A |
703 | static void _expectRules(const char* crules, |
704 | const char* cfrom, | |
705 | const char* cto) { | |
706 | /* u_uastrcpy has no capacity param for the buffer -- so just | |
707 | * make all buffers way too big */ | |
708 | enum { CAP = 256 }; | |
709 | UChar rules[CAP]; | |
710 | UTransliterator *trans; | |
711 | UErrorCode status = U_ZERO_ERROR; | |
712 | UParseError parseErr; | |
713 | ||
714 | u_uastrcpy(rules, crules); | |
715 | ||
716 | trans = utrans_open(crules /*use rules as ID*/, UTRANS_FORWARD, rules, -1, | |
717 | &parseErr, &status); | |
718 | if (U_FAILURE(status)) { | |
719 | utrans_close(trans); | |
729e4ab9 | 720 | log_data_err("FAIL: utrans_openRules(%s) failed, error=%s (Are you missing data?)\n", |
b75a7d8f A |
721 | crules, u_errorName(status)); |
722 | return; | |
723 | } | |
724 | ||
725 | _expect(trans, cfrom, cto); | |
726 | ||
727 | utrans_close(trans); | |
728 | } | |
729 | ||
730 | static void _expect(const UTransliterator* trans, | |
731 | const char* cfrom, | |
732 | const char* cto) { | |
733 | /* u_uastrcpy has no capacity param for the buffer -- so just | |
734 | * make all buffers way too big */ | |
735 | enum { CAP = 256 }; | |
736 | UChar from[CAP]; | |
737 | UChar to[CAP]; | |
738 | UChar buf[CAP]; | |
374ca955 A |
739 | const UChar *ID; |
740 | int32_t IDLength; | |
741 | const char *id; | |
742 | ||
b75a7d8f A |
743 | UErrorCode status = U_ZERO_ERROR; |
744 | int32_t limit; | |
745 | UTransPosition pos; | |
746 | XReplaceable xrep; | |
73c04bcf | 747 | XReplaceable *xrepPtr = &xrep; |
b75a7d8f A |
748 | UReplaceableCallbacks xrepVtable; |
749 | ||
750 | u_uastrcpy(from, cfrom); | |
751 | u_uastrcpy(to, cto); | |
752 | ||
374ca955 A |
753 | ID = utrans_getUnicodeID(trans, &IDLength); |
754 | id = aescstrdup(ID, IDLength); | |
b75a7d8f A |
755 | |
756 | /* utrans_transUChars() */ | |
757 | u_strcpy(buf, from); | |
758 | limit = u_strlen(buf); | |
759 | utrans_transUChars(trans, buf, NULL, CAP, 0, &limit, &status); | |
760 | if (U_FAILURE(status)) { | |
761 | log_err("FAIL: utrans_transUChars() failed, error=%s\n", | |
762 | u_errorName(status)); | |
763 | return; | |
764 | } | |
765 | ||
766 | if (0 == u_strcmp(buf, to)) { | |
767 | log_verbose("Ok: utrans_transUChars(%s) x %s -> %s\n", | |
768 | id, cfrom, cto); | |
769 | } else { | |
770 | char actual[CAP]; | |
771 | u_austrcpy(actual, buf); | |
772 | log_err("FAIL: utrans_transUChars(%s) x %s -> %s, expected %s\n", | |
773 | id, cfrom, actual, cto); | |
774 | } | |
775 | ||
776 | /* utrans_transIncrementalUChars() */ | |
777 | u_strcpy(buf, from); | |
778 | pos.start = pos.contextStart = 0; | |
779 | pos.limit = pos.contextLimit = u_strlen(buf); | |
780 | utrans_transIncrementalUChars(trans, buf, NULL, CAP, &pos, &status); | |
781 | utrans_transUChars(trans, buf, NULL, CAP, pos.start, &pos.limit, &status); | |
782 | if (U_FAILURE(status)) { | |
783 | log_err("FAIL: utrans_transIncrementalUChars() failed, error=%s\n", | |
784 | u_errorName(status)); | |
785 | return; | |
786 | } | |
787 | ||
788 | if (0 == u_strcmp(buf, to)) { | |
789 | log_verbose("Ok: utrans_transIncrementalUChars(%s) x %s -> %s\n", | |
790 | id, cfrom, cto); | |
791 | } else { | |
792 | char actual[CAP]; | |
793 | u_austrcpy(actual, buf); | |
794 | log_err("FAIL: utrans_transIncrementalUChars(%s) x %s -> %s, expected %s\n", | |
795 | id, cfrom, actual, cto); | |
796 | } | |
797 | ||
798 | /* utrans_trans() */ | |
799 | InitXReplaceableCallbacks(&xrepVtable); | |
800 | InitXReplaceable(&xrep, cfrom); | |
801 | limit = u_strlen(from); | |
73c04bcf | 802 | utrans_trans(trans, (UReplaceable*)xrepPtr, &xrepVtable, 0, &limit, &status); |
b75a7d8f A |
803 | if (U_FAILURE(status)) { |
804 | log_err("FAIL: utrans_trans() failed, error=%s\n", | |
805 | u_errorName(status)); | |
806 | FreeXReplaceable(&xrep); | |
807 | return; | |
808 | } | |
809 | ||
810 | if (0 == u_strcmp(xrep.text, to)) { | |
811 | log_verbose("Ok: utrans_trans(%s) x %s -> %s\n", | |
812 | id, cfrom, cto); | |
813 | } else { | |
814 | char actual[CAP]; | |
815 | u_austrcpy(actual, xrep.text); | |
816 | log_err("FAIL: utrans_trans(%s) x %s -> %s, expected %s\n", | |
817 | id, cfrom, actual, cto); | |
818 | } | |
819 | FreeXReplaceable(&xrep); | |
820 | ||
821 | /* utrans_transIncremental() */ | |
822 | InitXReplaceable(&xrep, cfrom); | |
823 | pos.start = pos.contextStart = 0; | |
824 | pos.limit = pos.contextLimit = u_strlen(from); | |
73c04bcf A |
825 | utrans_transIncremental(trans, (UReplaceable*)xrepPtr, &xrepVtable, &pos, &status); |
826 | utrans_trans(trans, (UReplaceable*)xrepPtr, &xrepVtable, pos.start, &pos.limit, &status); | |
b75a7d8f A |
827 | if (U_FAILURE(status)) { |
828 | log_err("FAIL: utrans_transIncremental() failed, error=%s\n", | |
829 | u_errorName(status)); | |
830 | FreeXReplaceable(&xrep); | |
831 | return; | |
832 | } | |
833 | ||
834 | if (0 == u_strcmp(xrep.text, to)) { | |
835 | log_verbose("Ok: utrans_transIncremental(%s) x %s -> %s\n", | |
836 | id, cfrom, cto); | |
837 | } else { | |
838 | char actual[CAP]; | |
839 | u_austrcpy(actual, xrep.text); | |
840 | log_err("FAIL: utrans_transIncremental(%s) x %s -> %s, expected %s\n", | |
841 | id, cfrom, actual, cto); | |
842 | } | |
843 | FreeXReplaceable(&xrep); | |
844 | } | |
845 | ||
846 | #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |