]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/transtst.cpp
ICU-59117.0.1.tar.gz
[apple/icu.git] / icuSources / test / intltest / transtst.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1999-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 * Date Name Description
9 * 11/10/99 aliu Creation.
10 **********************************************************************
11 */
12
13 #include "unicode/utypes.h"
14
15 #if !UCONFIG_NO_TRANSLITERATION
16
17 #include "transtst.h"
18 #include "unicode/locid.h"
19 #include "unicode/dtfmtsym.h"
20 #include "unicode/normlzr.h"
21 #include "unicode/translit.h"
22 #include "unicode/uchar.h"
23 #include "unicode/unifilt.h"
24 #include "unicode/uniset.h"
25 #include "unicode/ustring.h"
26 #include "unicode/usetiter.h"
27 #include "unicode/uscript.h"
28 #include "unicode/utf16.h"
29 #include "cpdtrans.h"
30 #include "nultrans.h"
31 #include "rbt.h"
32 #include "rbt_pars.h"
33 #include "anytrans.h"
34 #include "esctrn.h"
35 #include "name2uni.h"
36 #include "nortrans.h"
37 #include "remtrans.h"
38 #include "titletrn.h"
39 #include "tolowtrn.h"
40 #include "toupptrn.h"
41 #include "unesctrn.h"
42 #include "uni2name.h"
43 #include "cstring.h"
44 #include "cmemory.h"
45 #include <stdio.h>
46
47 /***********************************************************************
48
49 HOW TO USE THIS TEST FILE
50 -or-
51 How I developed on two platforms
52 without losing (too much of) my mind
53
54
55 1. Add new tests by copying/pasting/changing existing tests. On Java,
56 any public void method named Test...() taking no parameters becomes
57 a test. On C++, you need to modify the header and add a line to
58 the runIndexedTest() dispatch method.
59
60 2. Make liberal use of the expect() method; it is your friend.
61
62 3. The tests in this file exactly match those in a sister file on the
63 other side. The two files are:
64
65 icu4j: src/com/ibm/test/translit/TransliteratorTest.java
66 icu4c: source/test/intltest/transtst.cpp
67
68 ==> THIS IS THE IMPORTANT PART <==
69
70 When you add a test in this file, add it in TransliteratorTest.java
71 too. Give it the same name and put it in the same relative place.
72 This makes maintenance a lot simpler for any poor soul who ends up
73 trying to synchronize the tests between icu4j and icu4c.
74
75 4. If you MUST enter a test that is NOT paralleled in the sister file,
76 then add it in the special non-mirrored section. These are
77 labeled
78
79 "icu4j ONLY"
80
81 or
82
83 "icu4c ONLY"
84
85 Make sure you document the reason the test is here and not there.
86
87
88 Thank you.
89 The Management
90 ***********************************************************************/
91
92 // Define character constants thusly to be EBCDIC-friendly
93 enum {
94 LEFT_BRACE=((UChar)0x007B), /*{*/
95 PIPE =((UChar)0x007C), /*|*/
96 ZERO =((UChar)0x0030), /*0*/
97 UPPER_A =((UChar)0x0041) /*A*/
98 };
99
100 TransliteratorTest::TransliteratorTest()
101 : DESERET_DEE((UChar32)0x10414),
102 DESERET_dee((UChar32)0x1043C)
103 {
104 }
105
106 TransliteratorTest::~TransliteratorTest() {}
107
108 void
109 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
110 const char* &name, char* /*par*/) {
111 switch (index) {
112 TESTCASE(0,TestInstantiation);
113 TESTCASE(1,TestSimpleRules);
114 TESTCASE(2,TestRuleBasedInverse);
115 TESTCASE(3,TestKeyboard);
116 TESTCASE(4,TestKeyboard2);
117 TESTCASE(5,TestKeyboard3);
118 TESTCASE(6,TestArabic);
119 TESTCASE(7,TestCompoundKana);
120 TESTCASE(8,TestCompoundHex);
121 TESTCASE(9,TestFiltering);
122 TESTCASE(10,TestInlineSet);
123 TESTCASE(11,TestPatternQuoting);
124 TESTCASE(12,TestJ277);
125 TESTCASE(13,TestJ243);
126 TESTCASE(14,TestJ329);
127 TESTCASE(15,TestSegments);
128 TESTCASE(16,TestCursorOffset);
129 TESTCASE(17,TestArbitraryVariableValues);
130 TESTCASE(18,TestPositionHandling);
131 TESTCASE(19,TestHiraganaKatakana);
132 TESTCASE(20,TestCopyJ476);
133 TESTCASE(21,TestAnchors);
134 TESTCASE(22,TestInterIndic);
135 TESTCASE(23,TestFilterIDs);
136 TESTCASE(24,TestCaseMap);
137 TESTCASE(25,TestNameMap);
138 TESTCASE(26,TestLiberalizedID);
139 TESTCASE(27,TestCreateInstance);
140 TESTCASE(28,TestNormalizationTransliterator);
141 TESTCASE(29,TestCompoundRBT);
142 TESTCASE(30,TestCompoundFilter);
143 TESTCASE(31,TestRemove);
144 TESTCASE(32,TestToRules);
145 TESTCASE(33,TestContext);
146 TESTCASE(34,TestSupplemental);
147 TESTCASE(35,TestQuantifier);
148 TESTCASE(36,TestSTV);
149 TESTCASE(37,TestCompoundInverse);
150 TESTCASE(38,TestNFDChainRBT);
151 TESTCASE(39,TestNullInverse);
152 TESTCASE(40,TestAliasInverseID);
153 TESTCASE(41,TestCompoundInverseID);
154 TESTCASE(42,TestUndefinedVariable);
155 TESTCASE(43,TestEmptyContext);
156 TESTCASE(44,TestCompoundFilterID);
157 TESTCASE(45,TestPropertySet);
158 TESTCASE(46,TestNewEngine);
159 TESTCASE(47,TestQuantifiedSegment);
160 TESTCASE(48,TestDevanagariLatinRT);
161 TESTCASE(49,TestTeluguLatinRT);
162 TESTCASE(50,TestCompoundLatinRT);
163 TESTCASE(51,TestSanskritLatinRT);
164 TESTCASE(52,TestLocaleInstantiation);
165 TESTCASE(53,TestTitleAccents);
166 TESTCASE(54,TestLocaleResource);
167 TESTCASE(55,TestParseError);
168 TESTCASE(56,TestOutputSet);
169 TESTCASE(57,TestVariableRange);
170 TESTCASE(58,TestInvalidPostContext);
171 TESTCASE(59,TestIDForms);
172 TESTCASE(60,TestToRulesMark);
173 TESTCASE(61,TestEscape);
174 TESTCASE(62,TestAnchorMasking);
175 TESTCASE(63,TestDisplayName);
176 TESTCASE(64,TestSpecialCases);
177 #if !UCONFIG_NO_FILE_IO
178 TESTCASE(65,TestIncrementalProgress);
179 #endif
180 TESTCASE(66,TestSurrogateCasing);
181 TESTCASE(67,TestFunction);
182 TESTCASE(68,TestInvalidBackRef);
183 TESTCASE(69,TestMulticharStringSet);
184 TESTCASE(70,TestUserFunction);
185 TESTCASE(71,TestAnyX);
186 TESTCASE(72,TestSourceTargetSet);
187 TESTCASE(73,TestGurmukhiDevanagari);
188 TESTCASE(74,TestPatternWhiteSpace);
189 TESTCASE(75,TestAllCodepoints);
190 TESTCASE(76,TestBoilerplate);
191 TESTCASE(77,TestAlternateSyntax);
192 TESTCASE(78,TestBeginEnd);
193 TESTCASE(79,TestBeginEndToRules);
194 TESTCASE(80,TestRegisterAlias);
195 TESTCASE(81,TestRuleStripping);
196 TESTCASE(82,TestHalfwidthFullwidth);
197 TESTCASE(83,TestThai);
198 TESTCASE(84,TestAny);
199 default: name = ""; break;
200 }
201 }
202
203 /**
204 * Make sure every system transliterator can be instantiated.
205 *
206 * ALSO test that the result of toRules() for each rule is a valid
207 * rule. Do this here so we don't have to have another test that
208 * instantiates everything as well.
209 */
210 void TransliteratorTest::TestInstantiation() {
211 UErrorCode ec = U_ZERO_ERROR;
212 StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
213 assertSuccess("getAvailableIDs()", ec);
214 assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
215 int32_t n = Transliterator::countAvailableIDs();
216 assertTrue("getAvailableIDs().count()==countAvailableIDs()",
217 avail->count(ec) == n);
218 assertSuccess("count()", ec);
219 UnicodeString name;
220 for (int32_t i=0; i<n; ++i) {
221 const UnicodeString& id = *avail->snext(ec);
222 if (!assertSuccess("snext()", ec) ||
223 !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
224 break;
225 }
226 UnicodeString id2 = Transliterator::getAvailableID(i);
227 if (id.length() < 1) {
228 errln(UnicodeString("FAIL: getAvailableID(") +
229 i + ") returned empty string");
230 continue;
231 }
232 if (id != id2) {
233 errln(UnicodeString("FAIL: getAvailableID(") +
234 i + ") != getAvailableIDs().snext()");
235 continue;
236 }
237 UParseError parseError;
238 UErrorCode status = U_ZERO_ERROR;
239 Transliterator* t = Transliterator::createInstance(id,
240 UTRANS_FORWARD, parseError,status);
241 name.truncate(0);
242 Transliterator::getDisplayName(id, name);
243 if (t == 0) {
244 #if UCONFIG_NO_BREAK_ITERATION
245 // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
246 if (id.compare((UnicodeString)"Thai-Latn") != 0 &&
247 id.compare((UnicodeString)"Thai-Latin") != 0)
248 #endif
249 dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
250 /*", parse error " + parseError.code +*/
251 ", line " + parseError.line +
252 ", offset " + parseError.offset +
253 ", pre-context " + prettify(parseError.preContext, TRUE) +
254 ", post-context " +prettify(parseError.postContext,TRUE) +
255 ", Error: " + u_errorName(status));
256 // When createInstance fails, it deletes the failing
257 // entry from the available ID list. We detect this
258 // here by looking for a change in countAvailableIDs.
259 int32_t nn = Transliterator::countAvailableIDs();
260 if (nn == (n - 1)) {
261 n = nn;
262 --i; // Compensate for deleted entry
263 }
264 } else {
265 logln(UnicodeString("OK: ") + name + " (" + id + ")");
266
267 // Now test toRules
268 UnicodeString rules;
269 t->toRules(rules, TRUE);
270 Transliterator *u = Transliterator::createFromRules("x",
271 rules, UTRANS_FORWARD, parseError,status);
272 if (u == 0) {
273 errln(UnicodeString("FAIL: ") + id +
274 ".createFromRules() => bad rules" +
275 /*", parse error " + parseError.code +*/
276 ", line " + parseError.line +
277 ", offset " + parseError.offset +
278 ", context " + prettify(parseError.preContext, TRUE) +
279 ", rules: " + prettify(rules, TRUE));
280 } else {
281 delete u;
282 }
283 delete t;
284 }
285 }
286 assertTrue("snext()==NULL", avail->snext(ec)==NULL);
287 assertSuccess("snext()", ec);
288 delete avail;
289
290 // Now test the failure path
291 UParseError parseError;
292 UErrorCode status = U_ZERO_ERROR;
293 UnicodeString id("<Not a valid Transliterator ID>");
294 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
295 if (t != 0) {
296 errln("FAIL: " + id + " returned a transliterator");
297 delete t;
298 } else {
299 logln("OK: Bogus ID handled properly");
300 }
301 }
302
303 void TransliteratorTest::TestSimpleRules(void) {
304 /* Example: rules 1. ab>x|y
305 * 2. yc>z
306 *
307 * []|eabcd start - no match, copy e to tranlated buffer
308 * [e]|abcd match rule 1 - copy output & adjust cursor
309 * [ex|y]cd match rule 2 - copy output & adjust cursor
310 * [exz]|d no match, copy d to transliterated buffer
311 * [exzd]| done
312 */
313 expect(UnicodeString("ab>x|y;", "") +
314 "yc>z",
315 "eabcd", "exzd");
316
317 /* Another set of rules:
318 * 1. ab>x|yzacw
319 * 2. za>q
320 * 3. qc>r
321 * 4. cw>n
322 *
323 * []|ab Rule 1
324 * [x|yzacw] No match
325 * [xy|zacw] Rule 2
326 * [xyq|cw] Rule 4
327 * [xyqn]| Done
328 */
329 expect(UnicodeString("ab>x|yzacw;") +
330 "za>q;" +
331 "qc>r;" +
332 "cw>n",
333 "ab", "xyqn");
334
335 /* Test categories
336 */
337 UErrorCode status = U_ZERO_ERROR;
338 UParseError parseError;
339 Transliterator *t = Transliterator::createFromRules(
340 "<ID>",
341 UnicodeString("$dummy=").append((UChar)0xE100) +
342 UnicodeString(";"
343 "$vowel=[aeiouAEIOU];"
344 "$lu=[:Lu:];"
345 "$vowel } $lu > '!';"
346 "$vowel > '&';"
347 "'!' { $lu > '^';"
348 "$lu > '*';"
349 "a > ERROR", ""),
350 UTRANS_FORWARD, parseError,
351 status);
352 if (U_FAILURE(status)) {
353 dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
354 return;
355 }
356 expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
357 delete t;
358 }
359
360 /**
361 * Test inline set syntax and set variable syntax.
362 */
363 void TransliteratorTest::TestInlineSet(void) {
364 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
365 expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
366
367 expect(UnicodeString(
368 "$digit = [0-9];"
369 "$alpha = [a-zA-Z];"
370 "$alphanumeric = [$digit $alpha];" // ***
371 "$special = [^$alphanumeric];" // ***
372 "$alphanumeric > '-';"
373 "$special > '*';", ""),
374
375 "thx-1138", "---*----");
376 }
377
378 /**
379 * Create some inverses and confirm that they work. We have to be
380 * careful how we do this, since the inverses will not be true
381 * inverses -- we can't throw any random string at the composition
382 * of the transliterators and expect the identity function. F x
383 * F' != I. However, if we are careful about the input, we will
384 * get the expected results.
385 */
386 void TransliteratorTest::TestRuleBasedInverse(void) {
387 UnicodeString RULES =
388 UnicodeString("abc>zyx;") +
389 "ab>yz;" +
390 "bc>zx;" +
391 "ca>xy;" +
392 "a>x;" +
393 "b>y;" +
394 "c>z;" +
395
396 "abc<zyx;" +
397 "ab<yz;" +
398 "bc<zx;" +
399 "ca<xy;" +
400 "a<x;" +
401 "b<y;" +
402 "c<z;" +
403
404 "";
405
406 const char* DATA[] = {
407 // Careful here -- random strings will not work. If we keep
408 // the left side to the domain and the right side to the range
409 // we will be okay though (left, abc; right xyz).
410 "a", "x",
411 "abcacab", "zyxxxyy",
412 "caccb", "xyzzy",
413 };
414
415 int32_t DATA_length = UPRV_LENGTHOF(DATA);
416
417 UErrorCode status = U_ZERO_ERROR;
418 UParseError parseError;
419 Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
420 UTRANS_FORWARD, parseError, status);
421 Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
422 UTRANS_REVERSE, parseError, status);
423 if (U_FAILURE(status)) {
424 errln("FAIL: RBT constructor failed");
425 return;
426 }
427 for (int32_t i=0; i<DATA_length; i+=2) {
428 expect(*fwd, DATA[i], DATA[i+1]);
429 expect(*rev, DATA[i+1], DATA[i]);
430 }
431 delete fwd;
432 delete rev;
433 }
434
435 /**
436 * Basic test of keyboard.
437 */
438 void TransliteratorTest::TestKeyboard(void) {
439 UParseError parseError;
440 UErrorCode status = U_ZERO_ERROR;
441 Transliterator *t = Transliterator::createFromRules("<ID>",
442 UnicodeString("psch>Y;")
443 +"ps>y;"
444 +"ch>x;"
445 +"a>A;",
446 UTRANS_FORWARD, parseError,
447 status);
448 if (U_FAILURE(status)) {
449 errln("FAIL: RBT constructor failed");
450 return;
451 }
452 const char* DATA[] = {
453 // insertion, buffer
454 "a", "A",
455 "p", "Ap",
456 "s", "Aps",
457 "c", "Apsc",
458 "a", "AycA",
459 "psch", "AycAY",
460 0, "AycAY", // null means finishKeyboardTransliteration
461 };
462
463 keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
464 delete t;
465 }
466
467 /**
468 * Basic test of keyboard with cursor.
469 */
470 void TransliteratorTest::TestKeyboard2(void) {
471 UParseError parseError;
472 UErrorCode status = U_ZERO_ERROR;
473 Transliterator *t = Transliterator::createFromRules("<ID>",
474 UnicodeString("ych>Y;")
475 +"ps>|y;"
476 +"ch>x;"
477 +"a>A;",
478 UTRANS_FORWARD, parseError,
479 status);
480 if (U_FAILURE(status)) {
481 errln("FAIL: RBT constructor failed");
482 return;
483 }
484 const char* DATA[] = {
485 // insertion, buffer
486 "a", "A",
487 "p", "Ap",
488 "s", "Aps", // modified for rollback - "Ay",
489 "c", "Apsc", // modified for rollback - "Ayc",
490 "a", "AycA",
491 "p", "AycAp",
492 "s", "AycAps", // modified for rollback - "AycAy",
493 "c", "AycApsc", // modified for rollback - "AycAyc",
494 "h", "AycAY",
495 0, "AycAY", // null means finishKeyboardTransliteration
496 };
497
498 keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
499 delete t;
500 }
501
502 /**
503 * Test keyboard transliteration with back-replacement.
504 */
505 void TransliteratorTest::TestKeyboard3(void) {
506 // We want th>z but t>y. Furthermore, during keyboard
507 // transliteration we want t>y then yh>z if t, then h are
508 // typed.
509 UnicodeString RULES("t>|y;"
510 "yh>z;");
511
512 const char* DATA[] = {
513 // Column 1: characters to add to buffer (as if typed)
514 // Column 2: expected appearance of buffer after
515 // keyboard xliteration.
516 "a", "a",
517 "b", "ab",
518 "t", "abt", // modified for rollback - "aby",
519 "c", "abyc",
520 "t", "abyct", // modified for rollback - "abycy",
521 "h", "abycz",
522 0, "abycz", // null means finishKeyboardTransliteration
523 };
524
525 UParseError parseError;
526 UErrorCode status = U_ZERO_ERROR;
527 Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
528 if (U_FAILURE(status)) {
529 errln("FAIL: RBT constructor failed");
530 return;
531 }
532 keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
533 delete t;
534 }
535
536 void TransliteratorTest::keyboardAux(const Transliterator& t,
537 const char* DATA[], int32_t DATA_length) {
538 UErrorCode status = U_ZERO_ERROR;
539 UTransPosition index={0, 0, 0, 0};
540 UnicodeString s;
541 for (int32_t i=0; i<DATA_length; i+=2) {
542 UnicodeString log;
543 if (DATA[i] != 0) {
544 log = s + " + "
545 + DATA[i]
546 + " -> ";
547 t.transliterate(s, index, DATA[i], status);
548 } else {
549 log = s + " => ";
550 t.finishTransliteration(s, index);
551 }
552 // Show the start index '{' and the cursor '|'
553 UnicodeString a, b, c;
554 s.extractBetween(0, index.contextStart, a);
555 s.extractBetween(index.contextStart, index.start, b);
556 s.extractBetween(index.start, s.length(), c);
557 log.append(a).
558 append((UChar)LEFT_BRACE).
559 append(b).
560 append((UChar)PIPE).
561 append(c);
562 if (s == DATA[i+1] && U_SUCCESS(status)) {
563 logln(log);
564 } else {
565 errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
566 }
567 }
568 }
569
570 void TransliteratorTest::TestArabic(void) {
571 // Test disabled for 2.0 until new Arabic transliterator can be written.
572 // /*
573 // const char* DATA[] = {
574 // "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
575 // "\u0627\u0644\u0644\u063a\u0629\u0020"+
576 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
577 // "\u0628\u0628\u0646\u0638\u0645\u0020"+
578 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
579 // "\u062c\u0645\u064a\u0644\u0629",
580 // };
581 // */
582 //
583 // UChar ar_raw[] = {
584 // 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
585 // 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
586 // 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
587 // 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
588 // 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
589 // 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
590 // };
591 // UnicodeString ar(ar_raw);
592 // UErrorCode status=U_ZERO_ERROR;
593 // UParseError parseError;
594 // Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
595 // if (t == 0) {
596 // errln("FAIL: createInstance failed");
597 // return;
598 // }
599 // expect(*t, "Arabic", ar);
600 // delete t;
601 }
602
603 /**
604 * Compose the Kana transliterator forward and reverse and try
605 * some strings that should come out unchanged.
606 */
607 void TransliteratorTest::TestCompoundKana(void) {
608 UParseError parseError;
609 UErrorCode status = U_ZERO_ERROR;
610 Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
611 if (t == 0) {
612 dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
613 } else {
614 expect(*t, "aaaaa", "aaaaa");
615 delete t;
616 }
617 }
618
619 /**
620 * Compose the hex transliterators forward and reverse.
621 */
622 void TransliteratorTest::TestCompoundHex(void) {
623 UParseError parseError;
624 UErrorCode status = U_ZERO_ERROR;
625 Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
626 Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
627 Transliterator* transab[] = { a, b };
628 Transliterator* transba[] = { b, a };
629 if (a == 0 || b == 0) {
630 errln("FAIL: construction failed");
631 delete a;
632 delete b;
633 return;
634 }
635 // Do some basic tests of a
636 expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
637 // Do some basic tests of b
638 expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
639
640 Transliterator* ab = new CompoundTransliterator(transab, 2);
641 UnicodeString s("abcde", "");
642 expect(*ab, s, s);
643
644 UnicodeString str(s);
645 a->transliterate(str);
646 Transliterator* ba = new CompoundTransliterator(transba, 2);
647 expect(*ba, str, str);
648
649 delete ab;
650 delete ba;
651 delete a;
652 delete b;
653 }
654
655 int gTestFilterClassID = 0;
656 /**
657 * Used by TestFiltering().
658 */
659 class TestFilter : public UnicodeFilter {
660 virtual UnicodeFunctor* clone() const {
661 return new TestFilter(*this);
662 }
663 virtual UBool contains(UChar32 c) const {
664 return c != (UChar)0x0063 /*c*/;
665 }
666 // Stubs
667 virtual UnicodeString& toPattern(UnicodeString& result,
668 UBool /*escapeUnprintable*/) const {
669 return result;
670 }
671 virtual UBool matchesIndexValue(uint8_t /*v*/) const {
672 return FALSE;
673 }
674 virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
675 public:
676 UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
677 };
678
679 /**
680 * Do some basic tests of filtering.
681 */
682 void TransliteratorTest::TestFiltering(void) {
683 UParseError parseError;
684 UErrorCode status = U_ZERO_ERROR;
685 Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
686 if (hex == 0) {
687 errln("FAIL: createInstance(Any-Hex) failed");
688 return;
689 }
690 hex->adoptFilter(new TestFilter());
691 UnicodeString s("abcde");
692 hex->transliterate(s);
693 UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
694 if (s == exp) {
695 logln(UnicodeString("Ok: \"") + exp + "\"");
696 } else {
697 logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
698 }
699
700 // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
701 UnicodeFilter *f = hex->orphanFilter();
702 if (f == NULL){
703 errln("FAIL: orphanFilter() should get a UnicodeFilter");
704 } else {
705 delete f;
706 }
707 delete hex;
708 }
709
710 /**
711 * Test anchors
712 */
713 void TransliteratorTest::TestAnchors(void) {
714 expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""),
715 "aaa",
716 "012");
717 expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
718 "aaa",
719 "012");
720 expect(UnicodeString("^ab > 01 ;"
721 " ab > |8 ;"
722 " b > k ;"
723 " 8x$ > 45 ;"
724 " 8x > 77 ;", ""),
725
726 "ababbabxabx",
727 "018k7745");
728 expect(UnicodeString("$s = [z$] ;"
729 "$s{ab > 01 ;"
730 " ab > |8 ;"
731 " b > k ;"
732 " 8x}$s > 45 ;"
733 " 8x > 77 ;", ""),
734
735 "abzababbabxzabxabx",
736 "01z018k45z01x45");
737 }
738
739 /**
740 * Test pattern quoting and escape mechanisms.
741 */
742 void TransliteratorTest::TestPatternQuoting(void) {
743 // Array of 3n items
744 // Each item is <rules>, <input>, <expected output>
745 const UnicodeString DATA[] = {
746 UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
747 UnicodeString(UChar(0x4E01)),
748 "[male adult]"
749 };
750
751 for (int32_t i=0; i<3; i+=3) {
752 logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
753 UParseError parseError;
754 UErrorCode status = U_ZERO_ERROR;
755 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
756 if (U_FAILURE(status)) {
757 errln("RBT constructor failed");
758 } else {
759 expect(*t, DATA[i+1], DATA[i+2]);
760 }
761 delete t;
762 }
763 }
764
765 /**
766 * Regression test for bugs found in Greek transliteration.
767 */
768 void TransliteratorTest::TestJ277(void) {
769 UErrorCode status = U_ZERO_ERROR;
770 UParseError parseError;
771 Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
772 if (gl == NULL) {
773 dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
774 return;
775 }
776
777 UChar sigma = 0x3C3;
778 UChar upsilon = 0x3C5;
779 UChar nu = 0x3BD;
780 // UChar PHI = 0x3A6;
781 UChar alpha = 0x3B1;
782 // UChar omega = 0x3C9;
783 // UChar omicron = 0x3BF;
784 // UChar epsilon = 0x3B5;
785
786 // sigma upsilon nu -> syn
787 UnicodeString syn;
788 syn.append(sigma).append(upsilon).append(nu);
789 expect(*gl, syn, "syn");
790
791 // sigma alpha upsilon nu -> saun
792 UnicodeString sayn;
793 sayn.append(sigma).append(alpha).append(upsilon).append(nu);
794 expect(*gl, sayn, "saun");
795
796 // Again, using a smaller rule set
797 UnicodeString rules(
798 "$alpha = \\u03B1;"
799 "$nu = \\u03BD;"
800 "$sigma = \\u03C3;"
801 "$ypsilon = \\u03C5;"
802 "$vowel = [aeiouAEIOU$alpha$ypsilon];"
803 "s <> $sigma;"
804 "a <> $alpha;"
805 "u <> $vowel { $ypsilon;"
806 "y <> $ypsilon;"
807 "n <> $nu;",
808 "");
809 Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
810 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
811 expect(*mini, syn, "syn");
812 expect(*mini, sayn, "saun");
813 delete mini;
814 mini = NULL;
815
816 #if !UCONFIG_NO_FORMATTING
817 // Transliterate the Greek locale data
818 Locale el("el");
819 DateFormatSymbols syms(el, status);
820 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
821 int32_t i, count;
822 const UnicodeString* data = syms.getMonths(count);
823 for (i=0; i<count; ++i) {
824 if (data[i].length() == 0) {
825 continue;
826 }
827 UnicodeString out(data[i]);
828 gl->transliterate(out);
829 UBool ok = TRUE;
830 if (data[i].length() >= 2 && out.length() >= 2 &&
831 u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
832 if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
833 ok = FALSE;
834 }
835 }
836 if (ok) {
837 logln(prettify(data[i] + " -> " + out));
838 } else {
839 errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
840 }
841 }
842 #endif
843
844 delete gl;
845 }
846
847 /**
848 * Prefix, suffix support in hex transliterators
849 */
850 void TransliteratorTest::TestJ243(void) {
851 UErrorCode ec = U_ZERO_ERROR;
852
853 // Test default Hex-Any, which should handle
854 // \u, \U, u+, and U+
855 Transliterator *hex =
856 Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
857 if (assertSuccess("getInstance", ec)) {
858 expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
859 }
860 delete hex;
861
862 // // Try a custom Hex-Unicode
863 // // \uXXXX and &#xXXXX;
864 // ec = U_ZERO_ERROR;
865 // HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
866 // expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
867 // "abcd5fx012&#x00033;");
868 // // Try custom Any-Hex (default is tested elsewhere)
869 // ec = U_ZERO_ERROR;
870 // UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
871 // expect(hex3, "012", "&#x30;&#x31;&#x32;");
872 }
873
874 /**
875 * Parsers need better syntax error messages.
876 */
877 void TransliteratorTest::TestJ329(void) {
878
879 struct { UBool containsErrors; const char* rule; } DATA[] = {
880 { FALSE, "a > b; c > d" },
881 { TRUE, "a > b; no operator; c > d" },
882 };
883 int32_t DATA_length = UPRV_LENGTHOF(DATA);
884
885 for (int32_t i=0; i<DATA_length; ++i) {
886 UErrorCode status = U_ZERO_ERROR;
887 UParseError parseError;
888 Transliterator *rbt = Transliterator::createFromRules("<ID>",
889 DATA[i].rule,
890 UTRANS_FORWARD,
891 parseError,
892 status);
893 UBool gotError = U_FAILURE(status);
894 UnicodeString desc(DATA[i].rule);
895 desc.append(gotError ? " -> error" : " -> no error");
896 if (gotError) {
897 desc = desc + ", ParseError code=" + u_errorName(status) +
898 " line=" + parseError.line +
899 " offset=" + parseError.offset +
900 " context=" + parseError.preContext;
901 }
902 if (gotError == DATA[i].containsErrors) {
903 logln(UnicodeString("Ok: ") + desc);
904 } else {
905 errln(UnicodeString("FAIL: ") + desc);
906 }
907 delete rbt;
908 }
909 }
910
911 /**
912 * Test segments and segment references.
913 */
914 void TransliteratorTest::TestSegments(void) {
915 // Array of 3n items
916 // Each item is <rules>, <input>, <expected output>
917 UnicodeString DATA[] = {
918 "([a-z]) '.' ([0-9]) > $2 '-' $1",
919 "abc.123.xyz.456",
920 "ab1-c23.xy4-z56",
921
922 // nested
923 "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
924 "a1 b2",
925 "a1.a.1 b2.b.2",
926 };
927 int32_t DATA_length = UPRV_LENGTHOF(DATA);
928
929 for (int32_t i=0; i<DATA_length; i+=3) {
930 logln("Pattern: " + prettify(DATA[i]));
931 UParseError parseError;
932 UErrorCode status = U_ZERO_ERROR;
933 Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
934 if (U_FAILURE(status)) {
935 errln("FAIL: RBT constructor");
936 } else {
937 expect(*t, DATA[i+1], DATA[i+2]);
938 }
939 delete t;
940 }
941 }
942
943 /**
944 * Test cursor positioning outside of the key
945 */
946 void TransliteratorTest::TestCursorOffset(void) {
947 // Array of 3n items
948 // Each item is <rules>, <input>, <expected output>
949 UnicodeString DATA[] = {
950 "pre {alpha} post > | @ ALPHA ;"
951 "eALPHA > beta ;"
952 "pre {beta} post > BETA @@ | ;"
953 "post > xyz",
954
955 "prealphapost prebetapost",
956
957 "prbetaxyz preBETApost",
958 };
959 int32_t DATA_length = UPRV_LENGTHOF(DATA);
960
961 for (int32_t i=0; i<DATA_length; i+=3) {
962 logln("Pattern: " + prettify(DATA[i]));
963 UParseError parseError;
964 UErrorCode status = U_ZERO_ERROR;
965 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
966 if (U_FAILURE(status)) {
967 errln("FAIL: RBT constructor");
968 } else {
969 expect(*t, DATA[i+1], DATA[i+2]);
970 }
971 delete t;
972 }
973 }
974
975 /**
976 * Test zero length and > 1 char length variable values. Test
977 * use of variable refs in UnicodeSets.
978 */
979 void TransliteratorTest::TestArbitraryVariableValues(void) {
980 // Array of 3n items
981 // Each item is <rules>, <input>, <expected output>
982 UnicodeString DATA[] = {
983 "$abe = ab;"
984 "$pat = x[yY]z;"
985 "$ll = 'a-z';"
986 "$llZ = [$ll];"
987 "$llY = [$ll$pat];"
988 "$emp = ;"
989
990 "$abe > ABE;"
991 "$pat > END;"
992 "$llZ > 1;"
993 "$llY > 2;"
994 "7$emp 8 > 9;"
995 "",
996
997 "ab xYzxyz stY78",
998 "ABE ENDEND 1129",
999 };
1000 int32_t DATA_length = UPRV_LENGTHOF(DATA);
1001
1002 for (int32_t i=0; i<DATA_length; i+=3) {
1003 logln("Pattern: " + prettify(DATA[i]));
1004 UParseError parseError;
1005 UErrorCode status = U_ZERO_ERROR;
1006 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
1007 if (U_FAILURE(status)) {
1008 errln("FAIL: RBT constructor");
1009 } else {
1010 expect(*t, DATA[i+1], DATA[i+2]);
1011 }
1012 delete t;
1013 }
1014 }
1015
1016 /**
1017 * Confirm that the contextStart, contextLimit, start, and limit
1018 * behave correctly. J474.
1019 */
1020 void TransliteratorTest::TestPositionHandling(void) {
1021 // Array of 3n items
1022 // Each item is <rules>, <input>, <expected output>
1023 const char* DATA[] = {
1024 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1025 "xtat txtb", // pos 0,9,0,9
1026 "xTTaSS TTxUUb",
1027
1028 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1029 "xtat txtb", // pos 2,9,3,8
1030 "xtaSS TTxUUb",
1031
1032 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1033 "xtat txtb", // pos 3,8,3,8
1034 "xtaTT TTxTTb",
1035 };
1036
1037 // Array of 4n positions -- these go with the DATA array
1038 // They are: contextStart, contextLimit, start, limit
1039 int32_t POS[] = {
1040 0, 9, 0, 9,
1041 2, 9, 3, 8,
1042 3, 8, 3, 8,
1043 };
1044
1045 int32_t n = UPRV_LENGTHOF(DATA) / 3;
1046 for (int32_t i=0; i<n; i++) {
1047 UErrorCode status = U_ZERO_ERROR;
1048 UParseError parseError;
1049 Transliterator *t = Transliterator::createFromRules("<ID>",
1050 DATA[3*i], UTRANS_FORWARD, parseError, status);
1051 if (U_FAILURE(status)) {
1052 delete t;
1053 errln("FAIL: RBT constructor");
1054 return;
1055 }
1056 UTransPosition pos;
1057 pos.contextStart= POS[4*i];
1058 pos.contextLimit = POS[4*i+1];
1059 pos.start = POS[4*i+2];
1060 pos.limit = POS[4*i+3];
1061 UnicodeString rsource(DATA[3*i+1]);
1062 t->transliterate(rsource, pos, status);
1063 if (U_FAILURE(status)) {
1064 delete t;
1065 errln("FAIL: transliterate");
1066 return;
1067 }
1068 t->finishTransliteration(rsource, pos);
1069 expectAux(DATA[3*i],
1070 DATA[3*i+1],
1071 rsource,
1072 DATA[3*i+2]);
1073 delete t;
1074 }
1075 }
1076
1077 /**
1078 * Test the Hiragana-Katakana transliterator.
1079 */
1080 void TransliteratorTest::TestHiraganaKatakana(void) {
1081 UParseError parseError;
1082 UErrorCode status = U_ZERO_ERROR;
1083 Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1084 Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1085 if (hk == 0 || kh == 0) {
1086 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1087 delete hk;
1088 delete kh;
1089 return;
1090 }
1091
1092 // Array of 3n items
1093 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1094 const char* DATA[] = {
1095 "both",
1096 "\\u3042\\u3090\\u3099\\u3092\\u3050",
1097 "\\u30A2\\u30F8\\u30F2\\u30B0",
1098
1099 "kh",
1100 "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1101 "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1102 };
1103 int32_t DATA_length = UPRV_LENGTHOF(DATA);
1104
1105 for (int32_t i=0; i<DATA_length; i+=3) {
1106 UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1107 UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1108 switch (*DATA[i]) {
1109 case 0x68: //'h': // Hiragana-Katakana
1110 expect(*hk, h, k);
1111 break;
1112 case 0x6B: //'k': // Katakana-Hiragana
1113 expect(*kh, k, h);
1114 break;
1115 case 0x62: //'b': // both
1116 expect(*hk, h, k);
1117 expect(*kh, k, h);
1118 break;
1119 }
1120 }
1121 delete hk;
1122 delete kh;
1123 }
1124
1125 /**
1126 * Test cloning / copy constructor of RBT.
1127 */
1128 void TransliteratorTest::TestCopyJ476(void) {
1129 // The real test here is what happens when the destructors are
1130 // called. So we let one object get destructed, and check to
1131 // see that its copy still works.
1132 Transliterator *t2 = 0;
1133 {
1134 UParseError parseError;
1135 UErrorCode status = U_ZERO_ERROR;
1136 Transliterator *t1 = Transliterator::createFromRules("t1",
1137 "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
1138 if (U_FAILURE(status)) {
1139 errln("FAIL: RBT constructor");
1140 return;
1141 }
1142 t2 = t1->clone(); // Call copy constructor under the covers.
1143 expect(*t1, "abcfoofoo", "ABcbar");
1144 delete t1;
1145 }
1146 expect(*t2, "abcfoofoo", "ABcbar");
1147 delete t2;
1148 }
1149
1150 /**
1151 * Test inter-Indic transliterators. These are composed.
1152 * ICU4C Jitterbug 483.
1153 */
1154 void TransliteratorTest::TestInterIndic(void) {
1155 UnicodeString ID("Devanagari-Gujarati", "");
1156 UErrorCode status = U_ZERO_ERROR;
1157 UParseError parseError;
1158 Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1159 if (dg == 0) {
1160 dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
1161 return;
1162 }
1163 UnicodeString id = dg->getID();
1164 if (id != ID) {
1165 errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1166 }
1167 UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1168 UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1169 expect(*dg, dev, guj);
1170 delete dg;
1171 }
1172
1173 /**
1174 * Test filter syntax in IDs. (J918)
1175 */
1176 void TransliteratorTest::TestFilterIDs(void) {
1177 // Array of 3n strings:
1178 // <id>, <inverse id>, <input>, <expected output>
1179 const char* DATA[] = {
1180 "[aeiou]Any-Hex", // ID
1181 "[aeiou]Hex-Any", // expected inverse ID
1182 "quizzical", // src
1183 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1184
1185 "[aeiou]Any-Hex;[^5]Hex-Any",
1186 "[^5]Any-Hex;[aeiou]Hex-Any",
1187 "quizzical",
1188 "q\\u0075izzical",
1189
1190 "[abc]Null",
1191 "[abc]Null",
1192 "xyz",
1193 "xyz",
1194 };
1195 enum { DATA_length = UPRV_LENGTHOF(DATA) };
1196
1197 for (int i=0; i<DATA_length; i+=4) {
1198 UnicodeString ID(DATA[i], "");
1199 UnicodeString uID(DATA[i+1], "");
1200 UnicodeString data2(DATA[i+2], "");
1201 UnicodeString data3(DATA[i+3], "");
1202 UParseError parseError;
1203 UErrorCode status = U_ZERO_ERROR;
1204 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1205 if (t == 0) {
1206 errln("FAIL: createInstance(" + ID + ") returned NULL");
1207 return;
1208 }
1209 expect(*t, data2, data3);
1210
1211 // Check the ID
1212 if (ID != t->getID()) {
1213 errln("FAIL: createInstance(" + ID + ").getID() => " +
1214 t->getID());
1215 }
1216
1217 // Check the inverse
1218 Transliterator *u = t->createInverse(status);
1219 if (u == 0) {
1220 errln("FAIL: " + ID + ".createInverse() returned NULL");
1221 } else if (u->getID() != uID) {
1222 errln("FAIL: " + ID + ".createInverse().getID() => " +
1223 u->getID() + ", expected " + uID);
1224 }
1225
1226 delete t;
1227 delete u;
1228 }
1229 }
1230
1231 /**
1232 * Test the case mapping transliterators.
1233 */
1234 void TransliteratorTest::TestCaseMap(void) {
1235 UParseError parseError;
1236 UErrorCode status = U_ZERO_ERROR;
1237 Transliterator* toUpper =
1238 Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1239 Transliterator* toLower =
1240 Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1241 Transliterator* toTitle =
1242 Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1243 if (toUpper==0 || toLower==0 || toTitle==0) {
1244 errln("FAIL: createInstance returned NULL");
1245 delete toUpper;
1246 delete toLower;
1247 delete toTitle;
1248 return;
1249 }
1250
1251 expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1252 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1253 expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1254 "the quick brown foX jumped over the lazY dogs.");
1255 expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1256 "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1257
1258 delete toUpper;
1259 delete toLower;
1260 delete toTitle;
1261 }
1262
1263 /**
1264 * Test the name mapping transliterators.
1265 */
1266 void TransliteratorTest::TestNameMap(void) {
1267 UParseError parseError;
1268 UErrorCode status = U_ZERO_ERROR;
1269 Transliterator* uni2name =
1270 Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1271 Transliterator* name2uni =
1272 Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1273 if (uni2name==0 || name2uni==0) {
1274 errln("FAIL: createInstance returned NULL");
1275 delete uni2name;
1276 delete name2uni;
1277 return;
1278 }
1279
1280 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1281 expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1282 CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1283 expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1284 CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1285
1286 delete uni2name;
1287 delete name2uni;
1288
1289 // round trip
1290 Transliterator* t =
1291 Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1292 if (t==0) {
1293 errln("FAIL: createInstance returned NULL");
1294 delete t;
1295 return;
1296 }
1297
1298 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1299 UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1300 expect(*t, s, s);
1301 delete t;
1302 }
1303
1304 /**
1305 * Test liberalized ID syntax. 1006c
1306 */
1307 void TransliteratorTest::TestLiberalizedID(void) {
1308 // Some test cases have an expected getID() value of NULL. This
1309 // means I have disabled the test case for now. This stuff is
1310 // still under development, and I haven't decided whether to make
1311 // getID() return canonical case yet. It will all get rewritten
1312 // with the move to Source-Target/Variant IDs anyway. [aliu]
1313 const char* DATA[] = {
1314 "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1315 " Null ", "Null", "whitespace",
1316 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",
1317 " null ; latin-greek ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1318 };
1319 const int32_t DATA_length = UPRV_LENGTHOF(DATA);
1320 UParseError parseError;
1321 UErrorCode status= U_ZERO_ERROR;
1322 for (int32_t i=0; i<DATA_length; i+=3) {
1323 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1324 if (t == 0) {
1325 dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
1326 " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
1327 } else {
1328 UnicodeString exp;
1329 if (DATA[i+1]) {
1330 exp = UnicodeString(DATA[i+1], "");
1331 }
1332 // Don't worry about getID() if the expected char*
1333 // is NULL -- see above.
1334 if (exp.length() == 0 || exp == t->getID()) {
1335 logln(UnicodeString("Ok: ") + DATA[i+2] +
1336 " create ID \"" + DATA[i] + "\" => \"" +
1337 exp + "\"");
1338 } else {
1339 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1340 " create ID \"" + DATA[i] + "\" => \"" +
1341 t->getID() + "\", exp \"" + exp + "\"");
1342 }
1343 delete t;
1344 }
1345 }
1346 }
1347
1348 /* test for Jitterbug 912 */
1349 void TransliteratorTest::TestCreateInstance(){
1350 const char* FORWARD = "F";
1351 const char* REVERSE = "R";
1352 const char* DATA[] = {
1353 // Column 1: id
1354 // Column 2: direction
1355 // Column 3: expected ID, or "" if expect failure
1356 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1357
1358 // JB#2689: bad compound causes crash
1359 "InvalidSource-InvalidTarget", FORWARD, "",
1360 "InvalidSource-InvalidTarget", REVERSE, "",
1361 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1362 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1363 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1364 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1365
1366 NULL
1367 };
1368
1369 for (int32_t i=0; DATA[i]; i+=3) {
1370 UParseError err;
1371 UErrorCode ec = U_ZERO_ERROR;
1372 UnicodeString id(DATA[i]);
1373 UTransDirection dir = (DATA[i+1]==FORWARD)?
1374 UTRANS_FORWARD:UTRANS_REVERSE;
1375 UnicodeString expID(DATA[i+2]);
1376 Transliterator* t =
1377 Transliterator::createInstance(id,dir,err,ec);
1378 UnicodeString newID;
1379 if (t) {
1380 newID = t->getID();
1381 }
1382 UBool ok = (newID == expID);
1383 if (!t) {
1384 newID = u_errorName(ec);
1385 }
1386 if (ok) {
1387 logln((UnicodeString)"Ok: createInstance(" +
1388 id + "," + DATA[i+1] + ") => " + newID);
1389 } else {
1390 dataerrln((UnicodeString)"FAIL: createInstance(" +
1391 id + "," + DATA[i+1] + ") => " + newID +
1392 ", expected " + expID);
1393 }
1394 delete t;
1395 }
1396 }
1397
1398 /**
1399 * Test the normalization transliterator.
1400 */
1401 void TransliteratorTest::TestNormalizationTransliterator() {
1402 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1403 // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1404 const char* CANON[] = {
1405 // Input Decomposed Composed
1406 "cat", "cat", "cat" ,
1407 "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" ,
1408
1409 "\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above
1410 "D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above
1411
1412 "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above
1413 "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below
1414 "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above
1415
1416 "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1417 "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1418
1419 "\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave
1420 "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave
1421 "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron
1422
1423 "\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign
1424 "\\u00c5", "A\\u030a", "\\u00c5" , // A-ring
1425
1426 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0
1427 "\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0
1428
1429 "Henry IV", "Henry IV", "Henry IV" ,
1430 "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" ,
1431
1432 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1433 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1434 "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten
1435 "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten
1436 "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten
1437
1438 "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" ,
1439 0 // end
1440 };
1441
1442 const char* COMPAT[] = {
1443 // Input Decomposed Composed
1444 "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed
1445
1446 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0
1447 "\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i
1448
1449 "Henry IV", "Henry IV", "Henry IV" ,
1450 "Henry \\u2163", "Henry IV", "Henry IV" ,
1451
1452 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1453 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1454
1455 "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten
1456 0 // end
1457 };
1458
1459 int32_t i;
1460 UParseError parseError;
1461 UErrorCode status = U_ZERO_ERROR;
1462 Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1463 Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1464 if (!NFD || !NFC) {
1465 dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
1466 delete NFD;
1467 delete NFC;
1468 return;
1469 }
1470 for (i=0; CANON[i]; i+=3) {
1471 UnicodeString in = CharsToUnicodeString(CANON[i]);
1472 UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1473 UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1474 expect(*NFD, in, expd);
1475 expect(*NFC, in, expc);
1476 }
1477 delete NFD;
1478 delete NFC;
1479
1480 Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1481 Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1482 if (!NFKD || !NFKC) {
1483 dataerrln("FAIL: createInstance failed");
1484 delete NFKD;
1485 delete NFKC;
1486 return;
1487 }
1488 for (i=0; COMPAT[i]; i+=3) {
1489 UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1490 UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1491 UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1492 expect(*NFKD, in, expkd);
1493 expect(*NFKC, in, expkc);
1494 }
1495 delete NFKD;
1496 delete NFKC;
1497
1498 UParseError pe;
1499 status = U_ZERO_ERROR;
1500 Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1501 UTRANS_FORWARD,
1502 pe, status);
1503 if (t == 0) {
1504 errln("FAIL: createInstance failed");
1505 }
1506 expect(*t, CharsToUnicodeString("\\u010dx"),
1507 CharsToUnicodeString("c\\u030C"));
1508 delete t;
1509 }
1510
1511 /**
1512 * Test compound RBT rules.
1513 */
1514 void TransliteratorTest::TestCompoundRBT(void) {
1515 // Careful with spacing and ';' here: Phrase this exactly
1516 // as toRules() is going to return it. If toRules() changes
1517 // with regard to spacing or ';', then adjust this string.
1518 UnicodeString rule("::Hex-Any;\n"
1519 "::Any-Lower;\n"
1520 "a > '.A.';\n"
1521 "b > '.B.';\n"
1522 "::[^t]Any-Upper;", "");
1523 UParseError parseError;
1524 UErrorCode status = U_ZERO_ERROR;
1525 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1526 if (t == 0) {
1527 errln("FAIL: createFromRules failed");
1528 return;
1529 }
1530 expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1531 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1532 UnicodeString r;
1533 t->toRules(r, TRUE);
1534 if (r == rule) {
1535 logln((UnicodeString)"OK: toRules() => " + r);
1536 } else {
1537 errln((UnicodeString)"FAIL: toRules() => " + r +
1538 ", expected " + rule);
1539 }
1540 delete t;
1541
1542 // Now test toRules
1543 t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1544 if (t == 0) {
1545 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1546 return;
1547 }
1548 UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1549 t->toRules(r, TRUE);
1550 if (r != exp) {
1551 errln((UnicodeString)"FAIL: toRules() => " + r +
1552 ", expected " + exp);
1553 } else {
1554 logln((UnicodeString)"OK: toRules() => " + r);
1555 }
1556 delete t;
1557
1558 // Round trip the result of toRules
1559 t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1560 if (t == 0) {
1561 errln("FAIL: createFromRules #2 failed");
1562 return;
1563 } else {
1564 logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1565 }
1566
1567 // Test toRules again
1568 t->toRules(r, TRUE);
1569 if (r != exp) {
1570 errln((UnicodeString)"FAIL: toRules() => " + r +
1571 ", expected " + exp);
1572 } else {
1573 logln((UnicodeString)"OK: toRules() => " + r);
1574 }
1575
1576 delete t;
1577
1578 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform
1579 // to what the regenerated ID will look like.
1580 UnicodeString id("Upper(Lower);(NFKC)", "");
1581 t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1582 if (t == 0) {
1583 errln("FAIL: createInstance #2 failed");
1584 return;
1585 }
1586 if (t->getID() == id) {
1587 logln((UnicodeString)"OK: created " + id);
1588 } else {
1589 errln((UnicodeString)"FAIL: createInstance(" + id +
1590 ").getID() => " + t->getID());
1591 }
1592
1593 Transliterator *u = t->createInverse(status);
1594 if (u == 0) {
1595 errln("FAIL: createInverse failed");
1596 delete t;
1597 return;
1598 }
1599 exp = "NFKC();Lower(Upper)";
1600 if (u->getID() == exp) {
1601 logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1602 u->getID());
1603 } else {
1604 errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1605 u->getID());
1606 }
1607 delete t;
1608 delete u;
1609 }
1610
1611 /**
1612 * Compound filter semantics were orginially not implemented
1613 * correctly. Originally, each component filter f(i) is replaced by
1614 * f'(i) = f(i) && g, where g is the filter for the compound
1615 * transliterator.
1616 *
1617 * From Mark:
1618 *
1619 * Suppose and I have a transliterator X. Internally X is
1620 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1621 *
1622 * The compound should convert all greek characters (through latin) to
1623 * cyrillic, then lowercase the result. The filter should say "don't
1624 * touch 'A' in the original". But because an intermediate result
1625 * happens to go through "A", the Greek Alpha gets hung up.
1626 */
1627 void TransliteratorTest::TestCompoundFilter(void) {
1628 UParseError parseError;
1629 UErrorCode status = U_ZERO_ERROR;
1630 Transliterator *t = Transliterator::createInstance
1631 ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1632 if (t == 0) {
1633 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1634 return;
1635 }
1636 t->adoptFilter(new UnicodeSet("[^A]", status));
1637 if (U_FAILURE(status)) {
1638 errln("FAIL: UnicodeSet ct failed");
1639 delete t;
1640 return;
1641 }
1642
1643 // Only the 'A' at index 1 should remain unchanged
1644 expect(*t,
1645 CharsToUnicodeString("BA\\u039A\\u0391"),
1646 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1647 delete t;
1648 }
1649
1650 void TransliteratorTest::TestRemove(void) {
1651 UParseError parseError;
1652 UErrorCode status = U_ZERO_ERROR;
1653 Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1654 if (t == 0) {
1655 errln("FAIL: createInstance failed");
1656 return;
1657 }
1658
1659 expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1660
1661 // extra test for RemoveTransliterator::clone(), which at one point wasn't
1662 // duplicating the filter
1663 Transliterator* t2 = t->clone();
1664 expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1665
1666 delete t;
1667 delete t2;
1668 }
1669
1670 void TransliteratorTest::TestToRules(void) {
1671 const char* RBT = "rbt";
1672 const char* SET = "set";
1673 static const char* DATA[] = {
1674 RBT,
1675 "$a=\\u4E61; [$a] > A;",
1676 "[\\u4E61] > A;",
1677
1678 RBT,
1679 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1680 "[[:Zs:][:Zl:]]{a} > A;",
1681
1682 SET,
1683 "[[:Zs:][:Zl:]]",
1684 "[[:Zs:][:Zl:]]",
1685
1686 SET,
1687 "[:Ps:]",
1688 "[:Ps:]",
1689
1690 SET,
1691 "[:L:]",
1692 "[:L:]",
1693
1694 SET,
1695 "[[:L:]-[A]]",
1696 "[[:L:]-[A]]",
1697
1698 SET,
1699 "[~[:Lu:][:Ll:]]",
1700 "[~[:Lu:][:Ll:]]",
1701
1702 SET,
1703 "[~[a-z]]",
1704 "[~[a-z]]",
1705
1706 RBT,
1707 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1708 "[^[:Zs:]]{a} > A;",
1709
1710 RBT,
1711 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1712 "[[a-z]-[:Zs:]]{a} > A;",
1713
1714 RBT,
1715 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1716 "[[:Zs:]&[a-z]]{a} > A;",
1717
1718 RBT,
1719 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1720 "[x[:Zs:]]{a} > A;",
1721
1722 RBT,
1723 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1724 "$macron = \\u0304 ;"
1725 "$evowel = [aeiouyAEIOUY] ;"
1726 "$iotasub = \\u0345 ;"
1727 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1728 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1729
1730 RBT,
1731 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1732 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1733 };
1734 static const int32_t DATA_length = UPRV_LENGTHOF(DATA);
1735
1736 for (int32_t d=0; d < DATA_length; d+=3) {
1737 if (DATA[d] == RBT) {
1738 // Transliterator test
1739 UParseError parseError;
1740 UErrorCode status = U_ZERO_ERROR;
1741 Transliterator *t = Transliterator::createFromRules("ID",
1742 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
1743 if (t == 0) {
1744 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
1745 return;
1746 }
1747 UnicodeString rules, escapedRules;
1748 t->toRules(rules, FALSE);
1749 t->toRules(escapedRules, TRUE);
1750 UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1751 UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
1752 if (rules == expRules) {
1753 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1754 " => " + rules);
1755 } else {
1756 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1757 " => " + rules + ", exp " + expRules);
1758 }
1759 if (escapedRules == expEscapedRules) {
1760 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1761 " => " + escapedRules);
1762 } else {
1763 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1764 " => " + escapedRules + ", exp " + expEscapedRules);
1765 }
1766 delete t;
1767
1768 } else {
1769 // UnicodeSet test
1770 UErrorCode status = U_ZERO_ERROR;
1771 UnicodeString pat(DATA[d+1], -1, US_INV);
1772 UnicodeString expToPat(DATA[d+2], -1, US_INV);
1773 UnicodeSet set(pat, status);
1774 if (U_FAILURE(status)) {
1775 errln("FAIL: UnicodeSet ct failed");
1776 return;
1777 }
1778 // Adjust spacing etc. as necessary.
1779 UnicodeString toPat;
1780 set.toPattern(toPat);
1781 if (expToPat == toPat) {
1782 logln((UnicodeString)"Ok: " + pat +
1783 " => " + toPat);
1784 } else {
1785 errln((UnicodeString)"FAIL: " + pat +
1786 " => " + prettify(toPat, TRUE) +
1787 ", exp " + prettify(pat, TRUE));
1788 }
1789 }
1790 }
1791 }
1792
1793 void TransliteratorTest::TestContext() {
1794 UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1795 expect("de > x; {d}e > y;",
1796 "de",
1797 "ye",
1798 &pos);
1799
1800 expect("ab{c} > z;",
1801 "xadabdabcy",
1802 "xadabdabzy");
1803 }
1804
1805 void TransliteratorTest::TestSupplemental() {
1806
1807 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1808 "a > $a; $s > i;"),
1809 CharsToUnicodeString("ab\\U0001030Fx"),
1810 CharsToUnicodeString("\\U00010300bix"));
1811
1812 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1813 "$b=[A-Z\\U00010400-\\U0001044D];"
1814 "($a)($b) > $2 $1;"),
1815 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1816 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1817
1818 // k|ax\\U00010300xm
1819
1820 // k|a\\U00010400\\U00010300xm
1821 // ky|\\U00010400\\U00010300xm
1822 // ky\\U00010400|\\U00010300xm
1823
1824 // ky\\U00010400|\\U00010300\\U00010400m
1825 // ky\\U00010400y|\\U00010400m
1826 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1827 "$a {x} > | @ \\U00010400;"
1828 "{$a} [^\\u0000-\\uFFFF] > y;"),
1829 CharsToUnicodeString("kax\\U00010300xm"),
1830 CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1831
1832 expectT("Any-Name",
1833 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1834 UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1835
1836 expectT("Any-Hex/Unicode",
1837 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1838 UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1839
1840 expectT("Any-Hex/C",
1841 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1842 UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1843
1844 expectT("Any-Hex/Perl",
1845 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1846 UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1847
1848 expectT("Any-Hex/Java",
1849 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1850 UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1851
1852 expectT("Any-Hex/XML",
1853 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1854 "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1855
1856 expectT("Any-Hex/XML10",
1857 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1858 "&#66352;&#1113856;&#917601;&#160;");
1859
1860 expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1861 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1862 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1863 }
1864
1865 void TransliteratorTest::TestQuantifier() {
1866
1867 // Make sure @ in a quantified anteContext works
1868 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1869 "AAAAAb",
1870 "aaa(aac)");
1871
1872 // Make sure @ in a quantified postContext works
1873 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1874 "baaaaa",
1875 "caa(aaa)");
1876
1877 // Make sure @ in a quantified postContext with seg ref works
1878 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1879 "baaaaa",
1880 "baa(aaa)");
1881
1882 // Make sure @ past ante context doesn't enter ante context
1883 UTransPosition pos = {0, 5, 3, 5};
1884 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1885 "xxxab",
1886 "xxx(ac)",
1887 &pos);
1888
1889 // Make sure @ past post context doesn't pass limit
1890 UTransPosition pos2 = {0, 4, 0, 2};
1891 expect("{b} a+ > c @@ |; x > y; a > A;",
1892 "baxx",
1893 "caxx",
1894 &pos2);
1895
1896 // Make sure @ past post context doesn't enter post context
1897 expect("{b} a+ > c @@ |; x > y; a > A;",
1898 "baxx",
1899 "cayy");
1900
1901 expect("(ab)? c > d;",
1902 "c abc ababc",
1903 "d d abd");
1904
1905 // NOTE: The (ab)+ when referenced just yields a single "ab",
1906 // not the full sequence of them. This accords with perl behavior.
1907 expect("(ab)+ {x} > '(' $1 ')';",
1908 "x abx ababxy",
1909 "x ab(ab) abab(ab)y");
1910
1911 expect("b+ > x;",
1912 "ac abc abbc abbbc",
1913 "ac axc axc axc");
1914
1915 expect("[abc]+ > x;",
1916 "qac abrc abbcs abtbbc",
1917 "qx xrx xs xtx");
1918
1919 expect("q{(ab)+} > x;",
1920 "qa qab qaba qababc qaba",
1921 "qa qx qxa qxc qxa");
1922
1923 expect("q(ab)* > x;",
1924 "qa qab qaba qababc",
1925 "xa x xa xc");
1926
1927 // NOTE: The (ab)+ when referenced just yields a single "ab",
1928 // not the full sequence of them. This accords with perl behavior.
1929 expect("q(ab)* > '(' $1 ')';",
1930 "qa qab qaba qababc",
1931 "()a (ab) (ab)a (ab)c");
1932
1933 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1934 // quoted string
1935 expect("'ab'+ > x;",
1936 "bb ab ababb",
1937 "bb x xb");
1938
1939 // $foo+ and $foo* -- the quantifier should apply to the entire
1940 // variable reference
1941 expect("$var = ab; $var+ > x;",
1942 "bb ab ababb",
1943 "bb x xb");
1944 }
1945
1946 class TestTrans : public Transliterator {
1947 public:
1948 TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
1949 }
1950 virtual Transliterator* clone(void) const {
1951 return new TestTrans(getID());
1952 }
1953 virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
1954 UBool /*isIncremental*/) const
1955 {
1956 offsets.start = offsets.limit;
1957 }
1958 virtual UClassID getDynamicClassID() const;
1959 static UClassID U_EXPORT2 getStaticClassID();
1960 };
1961 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
1962
1963 /**
1964 * Test Source-Target/Variant.
1965 */
1966 void TransliteratorTest::TestSTV(void) {
1967 int32_t ns = Transliterator::countAvailableSources();
1968 if (ns < 0 || ns > 255) {
1969 errln((UnicodeString)"FAIL: Bad source count: " + ns);
1970 return;
1971 }
1972 int32_t i, j;
1973 for (i=0; i<ns; ++i) {
1974 UnicodeString source;
1975 Transliterator::getAvailableSource(i, source);
1976 logln((UnicodeString)"" + i + ": " + source);
1977 if (source.length() == 0) {
1978 errln("FAIL: empty source");
1979 continue;
1980 }
1981 int32_t nt = Transliterator::countAvailableTargets(source);
1982 if (nt < 0 || nt > 255) {
1983 errln((UnicodeString)"FAIL: Bad target count: " + nt);
1984 continue;
1985 }
1986 for (int32_t j=0; j<nt; ++j) {
1987 UnicodeString target;
1988 Transliterator::getAvailableTarget(j, source, target);
1989 logln((UnicodeString)" " + j + ": " + target);
1990 if (target.length() == 0) {
1991 errln("FAIL: empty target");
1992 continue;
1993 }
1994 int32_t nv = Transliterator::countAvailableVariants(source, target);
1995 if (nv < 0 || nv > 255) {
1996 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1997 continue;
1998 }
1999 for (int32_t k=0; k<nv; ++k) {
2000 UnicodeString variant;
2001 Transliterator::getAvailableVariant(k, source, target, variant);
2002 if (variant.length() == 0) {
2003 logln((UnicodeString)" " + k + ": <empty>");
2004 } else {
2005 logln((UnicodeString)" " + k + ": " + variant);
2006 }
2007 }
2008 }
2009 }
2010
2011 // Test registration
2012 const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2013 const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2014 const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
2015 for (i=0; i<3; ++i) {
2016 Transliterator *t = new TestTrans(IDS[i]);
2017 if (t == 0) {
2018 errln("FAIL: out of memory");
2019 return;
2020 }
2021 if (t->getID() != IDS[i]) {
2022 errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
2023 delete t;
2024 return;
2025 }
2026 Transliterator::registerInstance(t);
2027 UErrorCode status = U_ZERO_ERROR;
2028 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2029 if (t == NULL) {
2030 errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
2031 IDS[i]);
2032 } else {
2033 logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
2034 IDS[i]);
2035 delete t;
2036 }
2037 Transliterator::unregister(IDS[i]);
2038 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2039 if (t != NULL) {
2040 errln((UnicodeString)"FAIL: Unregistration failed for ID " +
2041 IDS[i]);
2042 delete t;
2043 }
2044 }
2045
2046 // Make sure getAvailable API reflects removal
2047 int32_t n = Transliterator::countAvailableIDs();
2048 for (i=0; i<n; ++i) {
2049 UnicodeString id = Transliterator::getAvailableID(i);
2050 for (j=0; j<3; ++j) {
2051 if (id.caseCompare(FULL_IDS[j],0)==0) {
2052 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2053 }
2054 }
2055 }
2056 n = Transliterator::countAvailableTargets("Any");
2057 for (i=0; i<n; ++i) {
2058 UnicodeString t;
2059 Transliterator::getAvailableTarget(i, "Any", t);
2060 if (t.caseCompare(IDS[0],0)==0) {
2061 errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2062 }
2063 }
2064 n = Transliterator::countAvailableSources();
2065 for (i=0; i<n; ++i) {
2066 UnicodeString s;
2067 Transliterator::getAvailableSource(i, s);
2068 for (j=0; j<3; ++j) {
2069 if (SOURCES[j] == NULL) continue;
2070 if (s.caseCompare(SOURCES[j],0)==0) {
2071 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2072 }
2073 }
2074 }
2075 }
2076
2077 /**
2078 * Test inverse of Greek-Latin; Title()
2079 */
2080 void TransliteratorTest::TestCompoundInverse(void) {
2081 UParseError parseError;
2082 UErrorCode status = U_ZERO_ERROR;
2083 Transliterator *t = Transliterator::createInstance
2084 ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2085 if (t == 0) {
2086 dataerrln("FAIL: createInstance - %s", u_errorName(status));
2087 return;
2088 }
2089 UnicodeString exp("(Title);Latin-Greek");
2090 if (t->getID() == exp) {
2091 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2092 t->getID());
2093 } else {
2094 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2095 t->getID() + "\", expected \"" + exp + "\"");
2096 }
2097 delete t;
2098 }
2099
2100 /**
2101 * Test NFD chaining with RBT
2102 */
2103 void TransliteratorTest::TestNFDChainRBT() {
2104 UParseError pe;
2105 UErrorCode ec = U_ZERO_ERROR;
2106 Transliterator* t = Transliterator::createFromRules(
2107 "TEST", "::NFD; aa > Q; a > q;",
2108 UTRANS_FORWARD, pe, ec);
2109 if (t == NULL || U_FAILURE(ec)) {
2110 dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2111 return;
2112 }
2113 expect(*t, "aa", "Q");
2114 delete t;
2115
2116 // TEMPORARY TESTS -- BEING DEBUGGED
2117 //=- UnicodeString s, s2;
2118 //=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2119 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2120 //=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2121 //=- expect(*t, s, s2);
2122 //=- delete t;
2123 //=-
2124 //=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2125 //=- expect(*t, s2, s);
2126 //=- delete t;
2127 //=-
2128 //=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2129 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2130 //=- expect(*t, s, s);
2131 //=- delete t;
2132
2133 // const char* source[] = {
2134 // /*
2135 // "\\u015Br\\u012Bmad",
2136 // "bhagavadg\\u012Bt\\u0101",
2137 // "adhy\\u0101ya",
2138 // "arjuna",
2139 // "vi\\u1E63\\u0101da",
2140 // "y\\u014Dga",
2141 // "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2142 // "uv\\u0101cr\\u0325",
2143 // */
2144 // "rmk\\u1E63\\u0113t",
2145 // //"dharmak\\u1E63\\u0113tr\\u0113",
2146 // /*
2147 // "kuruk\\u1E63\\u0113tr\\u0113",
2148 // "samav\\u0113t\\u0101",
2149 // "yuyutsava-\\u1E25",
2150 // "m\\u0101mak\\u0101-\\u1E25",
2151 // // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2152 // "kimakurvata",
2153 // "san\\u0304java",
2154 // */
2155 //
2156 // 0
2157 // };
2158 // const char* expected[] = {
2159 // /*
2160 // "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2161 // "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2162 // "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2163 // "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2164 // "\\u0935\\u093f\\u0937\\u093e\\u0926",
2165 // "\\u092f\\u094b\\u0917",
2166 // "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2167 // "\\u0909\\u0935\\u093E\\u091A\\u0943",
2168 // */
2169 // "\\u0927",
2170 // //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2171 // /*
2172 // "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2173 // "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2174 // "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2175 // "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2176 // // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2177 // "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2178 // "\\u0938\\u0902\\u091c\\u0935",
2179 // */
2180 // 0
2181 // };
2182 // UErrorCode status = U_ZERO_ERROR;
2183 // UParseError parseError;
2184 // UnicodeString message;
2185 // Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2186 // Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2187 // if(U_FAILURE(status)){
2188 // errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2189 // errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2190 // delete latinToDevToLatin;
2191 // delete devToLatinToDev;
2192 // return;
2193 // }
2194 // UnicodeString gotResult;
2195 // for(int i= 0; source[i] != 0; i++){
2196 // gotResult = source[i];
2197 // expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2198 // expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2199 // }
2200 // delete latinToDevToLatin;
2201 // delete devToLatinToDev;
2202 }
2203
2204 /**
2205 * Inverse of "Null" should be "Null". (J21)
2206 */
2207 void TransliteratorTest::TestNullInverse() {
2208 UParseError pe;
2209 UErrorCode ec = U_ZERO_ERROR;
2210 Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2211 if (t == 0 || U_FAILURE(ec)) {
2212 errln("FAIL: createInstance");
2213 return;
2214 }
2215 Transliterator *u = t->createInverse(ec);
2216 if (u == 0 || U_FAILURE(ec)) {
2217 errln("FAIL: createInverse");
2218 delete t;
2219 return;
2220 }
2221 if (u->getID() != "Null") {
2222 errln("FAIL: Inverse of Null should be Null");
2223 }
2224 delete t;
2225 delete u;
2226 }
2227
2228 /**
2229 * Check ID of inverse of alias. (J22)
2230 */
2231 void TransliteratorTest::TestAliasInverseID() {
2232 UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2233 UParseError pe;
2234 UErrorCode ec = U_ZERO_ERROR;
2235 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2236 if (t == 0 || U_FAILURE(ec)) {
2237 dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2238 return;
2239 }
2240 Transliterator *u = t->createInverse(ec);
2241 if (u == 0 || U_FAILURE(ec)) {
2242 errln("FAIL: createInverse");
2243 delete t;
2244 return;
2245 }
2246 UnicodeString exp = "Hangul-Latin";
2247 UnicodeString got = u->getID();
2248 if (got != exp) {
2249 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2250 ", expected " + exp);
2251 }
2252 delete t;
2253 delete u;
2254 }
2255
2256 /**
2257 * Test IDs of inverses of compound transliterators. (J20)
2258 */
2259 void TransliteratorTest::TestCompoundInverseID() {
2260 UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2261 UParseError pe;
2262 UErrorCode ec = U_ZERO_ERROR;
2263 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2264 if (t == 0 || U_FAILURE(ec)) {
2265 dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2266 return;
2267 }
2268 Transliterator *u = t->createInverse(ec);
2269 if (u == 0 || U_FAILURE(ec)) {
2270 errln("FAIL: createInverse");
2271 delete t;
2272 return;
2273 }
2274 UnicodeString exp = "NFD(NFC);Jamo-Latin";
2275 UnicodeString got = u->getID();
2276 if (got != exp) {
2277 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2278 ", expected " + exp);
2279 }
2280 delete t;
2281 delete u;
2282 }
2283
2284 /**
2285 * Test undefined variable.
2286
2287 */
2288 void TransliteratorTest::TestUndefinedVariable() {
2289 UnicodeString rule = "$initial } a <> \\u1161;";
2290 UParseError pe;
2291 UErrorCode ec = U_ZERO_ERROR;
2292 Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
2293 delete t;
2294 if (U_FAILURE(ec)) {
2295 logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2296 u_errorName(ec));
2297 return;
2298 }
2299 errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2300 u_errorName(ec));
2301 }
2302
2303 /**
2304 * Test empty context.
2305 */
2306 void TransliteratorTest::TestEmptyContext() {
2307 expect(" { a } > b;", "xay a ", "xby b ");
2308 }
2309
2310 /**
2311 * Test compound filter ID syntax
2312 */
2313 void TransliteratorTest::TestCompoundFilterID(void) {
2314 static const char* DATA[] = {
2315 // Col. 1 = ID or rule set (latter must start with #)
2316
2317 // = columns > 1 are null if expect col. 1 to be illegal =
2318
2319 // Col. 2 = direction, "F..." or "R..."
2320 // Col. 3 = source string
2321 // Col. 4 = exp result
2322
2323 "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2324 "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2325 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2326 "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2327 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2328 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2329 NULL,
2330 };
2331
2332 for (int32_t i=0; DATA[i]; i+=4) {
2333 UnicodeString id = CharsToUnicodeString(DATA[i]);
2334 UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2335 UTRANS_REVERSE : UTRANS_FORWARD;
2336 UnicodeString source;
2337 UnicodeString exp;
2338 if (DATA[i+2] != NULL) {
2339 source = CharsToUnicodeString(DATA[i+2]);
2340 exp = CharsToUnicodeString(DATA[i+3]);
2341 }
2342 UBool expOk = (DATA[i+1] != NULL);
2343 Transliterator* t = NULL;
2344 UParseError pe;
2345 UErrorCode ec = U_ZERO_ERROR;
2346 if (id.charAt(0) == 0x23/*#*/) {
2347 t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2348 } else {
2349 t = Transliterator::createInstance(id, direction, pe, ec);
2350 }
2351 UBool ok = (t != NULL && U_SUCCESS(ec));
2352 UnicodeString transID;
2353 if (t!=0) {
2354 transID = t->getID();
2355 }
2356 else {
2357 transID = UnicodeString("NULL", "");
2358 }
2359 if (ok == expOk) {
2360 logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2361 u_errorName(ec));
2362 if (source.length() != 0) {
2363 expect(*t, source, exp);
2364 }
2365 delete t;
2366 } else {
2367 dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2368 u_errorName(ec));
2369 }
2370 }
2371 }
2372
2373 /**
2374 * Test new property set syntax
2375 */
2376 void TransliteratorTest::TestPropertySet() {
2377 expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2378 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2379 "[ a stitch ]\n[ in time ]\r[ saves 9]");
2380 }
2381
2382 /**
2383 * Test various failure points of the new 2.0 engine.
2384 */
2385 void TransliteratorTest::TestNewEngine() {
2386 UParseError pe;
2387 UErrorCode ec = U_ZERO_ERROR;
2388 Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2389 if (t == 0 || U_FAILURE(ec)) {
2390 dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
2391 return;
2392 }
2393 // Katakana should be untouched
2394 expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2395 CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2396
2397 delete t;
2398
2399 #if 1
2400 // This test will only work if Transliterator.ROLLBACK is
2401 // true. Otherwise, this test will fail, revealing a
2402 // limitation of global filters in incremental mode.
2403 Transliterator *a =
2404 Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2405 Transliterator *A =
2406 Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2407 if (U_FAILURE(ec)) {
2408 delete a;
2409 delete A;
2410 return;
2411 }
2412
2413 Transliterator* array[3];
2414 array[0] = a;
2415 array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2416 array[2] = A;
2417 if (U_FAILURE(ec)) {
2418 errln("FAIL: createInstance NFD");
2419 delete a;
2420 delete A;
2421 delete array[1];
2422 return;
2423 }
2424
2425 t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2426 if (U_FAILURE(ec)) {
2427 errln("FAIL: UnicodeSet constructor");
2428 delete a;
2429 delete A;
2430 delete array[1];
2431 delete t;
2432 return;
2433 }
2434
2435 expect(*t, "aAaA", "bAbA");
2436
2437 assertTrue("countElements", t->countElements() == 3);
2438 assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2439 assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2440 assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2441 assertSuccess("getElement", ec);
2442
2443 delete a;
2444 delete A;
2445 delete array[1];
2446 delete t;
2447 #endif
2448
2449 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2450 "a",
2451 "ax");
2452
2453 UnicodeString gr = CharsToUnicodeString(
2454 "$ddot = \\u0308 ;"
2455 "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2456 "$rough = \\u0314 ;"
2457 "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2458 "\\u03b1 <> a ;"
2459 "$rough <> h ;");
2460
2461 expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2462 }
2463
2464 /**
2465 * Test quantified segment behavior. We want:
2466 * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2467 */
2468 void TransliteratorTest::TestQuantifiedSegment(void) {
2469 // The normal case
2470 expect("([abc]+) > x $1 x;", "cba", "xcbax");
2471
2472 // The tricky case; the quantifier is around the segment
2473 expect("([abc])+ > x $1 x;", "cba", "xax");
2474
2475 // Tricky case in reverse direction
2476 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2477
2478 // Check post-context segment
2479 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2480
2481 // Test toRule/toPattern for non-quantified segment.
2482 // Careful with spacing here.
2483 UnicodeString r("([a-c]){q} > x $1 x;");
2484 UParseError pe;
2485 UErrorCode ec = U_ZERO_ERROR;
2486 Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2487 if (U_FAILURE(ec)) {
2488 errln("FAIL: createFromRules");
2489 delete t;
2490 return;
2491 }
2492 UnicodeString rr;
2493 t->toRules(rr, TRUE);
2494 if (r != rr) {
2495 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2496 } else {
2497 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2498 }
2499 delete t;
2500
2501 // Test toRule/toPattern for quantified segment.
2502 // Careful with spacing here.
2503 r = "([a-c])+{q} > x $1 x;";
2504 t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2505 if (U_FAILURE(ec)) {
2506 errln("FAIL: createFromRules");
2507 delete t;
2508 return;
2509 }
2510 t->toRules(rr, TRUE);
2511 if (r != rr) {
2512 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2513 } else {
2514 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2515 }
2516 delete t;
2517 }
2518
2519 //======================================================================
2520 // Ram's tests
2521 //======================================================================
2522 void TransliteratorTest::TestDevanagariLatinRT(){
2523 const int MAX_LEN= 52;
2524 const char* const source[MAX_LEN] = {
2525 "bh\\u0101rata",
2526 "kra",
2527 "k\\u1E63a",
2528 "khra",
2529 "gra",
2530 "\\u1E45ra",
2531 "cra",
2532 "chra",
2533 "j\\u00F1a",
2534 "jhra",
2535 "\\u00F1ra",
2536 "\\u1E6Dya",
2537 "\\u1E6Dhra",
2538 "\\u1E0Dya",
2539 //"r\\u0323ya", // \u095c is not valid in Devanagari
2540 "\\u1E0Dhya",
2541 "\\u1E5Bhra",
2542 "\\u1E47ra",
2543 "tta",
2544 "thra",
2545 "dda",
2546 "dhra",
2547 "nna",
2548 "pra",
2549 "phra",
2550 "bra",
2551 "bhra",
2552 "mra",
2553 "\\u1E49ra",
2554 //"l\\u0331ra",
2555 "yra",
2556 "\\u1E8Fra",
2557 //"l-",
2558 "vra",
2559 "\\u015Bra",
2560 "\\u1E63ra",
2561 "sra",
2562 "hma",
2563 "\\u1E6D\\u1E6Da",
2564 "\\u1E6D\\u1E6Dha",
2565 "\\u1E6Dh\\u1E6Dha",
2566 "\\u1E0D\\u1E0Da",
2567 "\\u1E0D\\u1E0Dha",
2568 "\\u1E6Dya",
2569 "\\u1E6Dhya",
2570 "\\u1E0Dya",
2571 "\\u1E0Dhya",
2572 // Not roundtrippable --
2573 // \\u0939\\u094d\\u094d\\u092E - hma
2574 // \\u0939\\u094d\\u092E - hma
2575 // CharsToUnicodeString("hma"),
2576 "hya",
2577 "\\u015Br\\u0325",
2578 "\\u015Bca",
2579 "\\u0115",
2580 "san\\u0304j\\u012Bb s\\u0113nagupta",
2581 "\\u0101nand vaddir\\u0101ju",
2582 "\\u0101",
2583 "a"
2584 };
2585 const char* const expected[MAX_LEN] = {
2586 "\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */
2587 "\\u0915\\u094D\\u0930", /* kra */
2588 "\\u0915\\u094D\\u0937", /* ks\\u0323a */
2589 "\\u0916\\u094D\\u0930", /* khra */
2590 "\\u0917\\u094D\\u0930", /* gra */
2591 "\\u0919\\u094D\\u0930", /* n\\u0307ra */
2592 "\\u091A\\u094D\\u0930", /* cra */
2593 "\\u091B\\u094D\\u0930", /* chra */
2594 "\\u091C\\u094D\\u091E", /* jn\\u0303a */
2595 "\\u091D\\u094D\\u0930", /* jhra */
2596 "\\u091E\\u094D\\u0930", /* n\\u0303ra */
2597 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2598 "\\u0920\\u094D\\u0930", /* t\\u0323hra */
2599 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2600 //"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari
2601 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2602 "\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */
2603 "\\u0923\\u094D\\u0930", /* n\\u0323ra */
2604 "\\u0924\\u094D\\u0924", /* tta */
2605 "\\u0925\\u094D\\u0930", /* thra */
2606 "\\u0926\\u094D\\u0926", /* dda */
2607 "\\u0927\\u094D\\u0930", /* dhra */
2608 "\\u0928\\u094D\\u0928", /* nna */
2609 "\\u092A\\u094D\\u0930", /* pra */
2610 "\\u092B\\u094D\\u0930", /* phra */
2611 "\\u092C\\u094D\\u0930", /* bra */
2612 "\\u092D\\u094D\\u0930", /* bhra */
2613 "\\u092E\\u094D\\u0930", /* mra */
2614 "\\u0929\\u094D\\u0930", /* n\\u0331ra */
2615 //"\\u0934\\u094D\\u0930", /* l\\u0331ra */
2616 "\\u092F\\u094D\\u0930", /* yra */
2617 "\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */
2618 //"l-",
2619 "\\u0935\\u094D\\u0930", /* vra */
2620 "\\u0936\\u094D\\u0930", /* s\\u0301ra */
2621 "\\u0937\\u094D\\u0930", /* s\\u0323ra */
2622 "\\u0938\\u094D\\u0930", /* sra */
2623 "\\u0939\\u094d\\u092E", /* hma */
2624 "\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */
2625 "\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */
2626 "\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/
2627 "\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */
2628 "\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */
2629 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2630 "\\u0920\\u094D\\u092F", /* t\\u0323hya */
2631 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2632 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2633 // "hma", /* hma */
2634 "\\u0939\\u094D\\u092F", /* hya */
2635 "\\u0936\\u0943", /* s\\u0301r\\u0325a */
2636 "\\u0936\\u094D\\u091A", /* s\\u0301ca */
2637 "\\u090d", /* e\\u0306 */
2638 "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2639 "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2640 "\\u0906",
2641 "\\u0905",
2642 };
2643 UErrorCode status = U_ZERO_ERROR;
2644 UParseError parseError;
2645 UnicodeString message;
2646 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2647 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2648 if(U_FAILURE(status)){
2649 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2650 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2651 return;
2652 }
2653 UnicodeString gotResult;
2654 for(int i= 0; i<MAX_LEN; i++){
2655 gotResult = source[i];
2656 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2657 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2658 }
2659 delete latinToDev;
2660 delete devToLatin;
2661 }
2662
2663 void TransliteratorTest::TestTeluguLatinRT(){
2664 const int MAX_LEN=10;
2665 const char* const source[MAX_LEN] = {
2666 "raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */
2667 "\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */
2668 "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */
2669 "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */
2670 "san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */
2671 "amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */
2672 "ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */
2673 "\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */
2674 "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */
2675 "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */
2676 };
2677
2678 const char* const expected[MAX_LEN] = {
2679 "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2680 "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2681 "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2682 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2683 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2684 "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2685 "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2686 "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2687 "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2688 "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2689 };
2690
2691 UErrorCode status = U_ZERO_ERROR;
2692 UParseError parseError;
2693 UnicodeString message;
2694 Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2695 Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2696 if(U_FAILURE(status)){
2697 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2698 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2699 return;
2700 }
2701 UnicodeString gotResult;
2702 for(int i= 0; i<MAX_LEN; i++){
2703 gotResult = source[i];
2704 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2705 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2706 }
2707 delete latinToDev;
2708 delete devToLatin;
2709 }
2710
2711 void TransliteratorTest::TestSanskritLatinRT(){
2712 const int MAX_LEN =16;
2713 const char* const source[MAX_LEN] = {
2714 "rmk\\u1E63\\u0113t",
2715 "\\u015Br\\u012Bmad",
2716 "bhagavadg\\u012Bt\\u0101",
2717 "adhy\\u0101ya",
2718 "arjuna",
2719 "vi\\u1E63\\u0101da",
2720 "y\\u014Dga",
2721 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2722 "uv\\u0101cr\\u0325",
2723 "dharmak\\u1E63\\u0113tr\\u0113",
2724 "kuruk\\u1E63\\u0113tr\\u0113",
2725 "samav\\u0113t\\u0101",
2726 "yuyutsava\\u1E25",
2727 "m\\u0101mak\\u0101\\u1E25",
2728 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2729 "kimakurvata",
2730 "san\\u0304java",
2731 };
2732 const char* const expected[MAX_LEN] = {
2733 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2734 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2735 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2736 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2737 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2738 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2739 "\\u092f\\u094b\\u0917",
2740 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2741 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2742 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2743 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2744 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2745 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2746 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2747 //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2748 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2749 "\\u0938\\u0902\\u091c\\u0935",
2750 };
2751 UErrorCode status = U_ZERO_ERROR;
2752 UParseError parseError;
2753 UnicodeString message;
2754 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2755 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2756 if(U_FAILURE(status)){
2757 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2758 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2759 return;
2760 }
2761 UnicodeString gotResult;
2762 for(int i= 0; i<MAX_LEN; i++){
2763 gotResult = source[i];
2764 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2765 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2766 }
2767 delete latinToDev;
2768 delete devToLatin;
2769 }
2770
2771
2772 void TransliteratorTest::TestCompoundLatinRT(){
2773 const char* const source[] = {
2774 "rmk\\u1E63\\u0113t",
2775 "\\u015Br\\u012Bmad",
2776 "bhagavadg\\u012Bt\\u0101",
2777 "adhy\\u0101ya",
2778 "arjuna",
2779 "vi\\u1E63\\u0101da",
2780 "y\\u014Dga",
2781 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2782 "uv\\u0101cr\\u0325",
2783 "dharmak\\u1E63\\u0113tr\\u0113",
2784 "kuruk\\u1E63\\u0113tr\\u0113",
2785 "samav\\u0113t\\u0101",
2786 "yuyutsava\\u1E25",
2787 "m\\u0101mak\\u0101\\u1E25",
2788 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2789 "kimakurvata",
2790 "san\\u0304java"
2791 };
2792 const int MAX_LEN = UPRV_LENGTHOF(source);
2793 const char* const expected[MAX_LEN] = {
2794 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2795 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2796 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2797 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2798 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2799 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2800 "\\u092f\\u094b\\u0917",
2801 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2802 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2803 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2804 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2805 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2806 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2807 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2808 // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2809 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2810 "\\u0938\\u0902\\u091c\\u0935"
2811 };
2812 if(MAX_LEN != UPRV_LENGTHOF(expected)) {
2813 errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2814 return;
2815 }
2816
2817 UErrorCode status = U_ZERO_ERROR;
2818 UParseError parseError;
2819 UnicodeString message;
2820 Transliterator* devToLatinToDev =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2821 Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2822 Transliterator* devToTelToDev =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2823 Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2824
2825 if(U_FAILURE(status)){
2826 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2827 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2828 return;
2829 }
2830 UnicodeString gotResult;
2831 for(int i= 0; i<MAX_LEN; i++){
2832 gotResult = source[i];
2833 expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2834 expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2835 expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2836
2837 }
2838 delete(latinToDevToLatin);
2839 delete(devToLatinToDev);
2840 delete(devToTelToDev);
2841 delete(latinToTelToLatin);
2842 }
2843
2844 /**
2845 * Test Gurmukhi-Devanagari Tippi and Bindi
2846 */
2847 void TransliteratorTest::TestGurmukhiDevanagari(){
2848 // the rule says:
2849 // (\u0902) (when preceded by vowel) ---> (\u0A02)
2850 // (\u0902) (when preceded by consonant) ---> (\u0A70)
2851 UErrorCode status = U_ZERO_ERROR;
2852 UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
2853 UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
2854 UParseError parseError;
2855
2856 UnicodeSetIterator vIter(vowel);
2857 UnicodeSetIterator nvIter(non_vowel);
2858 Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2859 if(U_FAILURE(status)) {
2860 dataerrln("Error creating transliterator %s", u_errorName(status));
2861 delete trans;
2862 return;
2863 }
2864 UnicodeString src (" \\u0902", -1, US_INV);
2865 UnicodeString expected(" \\u0A02", -1, US_INV);
2866 src = src.unescape();
2867 expected= expected.unescape();
2868
2869 while(vIter.next()){
2870 src.setCharAt(0,(UChar) vIter.getCodepoint());
2871 expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2872 expect(*trans,src,expected);
2873 }
2874
2875 expected.setCharAt(1,0x0A70);
2876 while(nvIter.next()){
2877 //src.setCharAt(0,(char) nvIter.codepoint);
2878 src.setCharAt(0,(UChar)nvIter.getCodepoint());
2879 expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2880 expect(*trans,src,expected);
2881 }
2882 delete trans;
2883 }
2884 /**
2885 * Test instantiation from a locale.
2886 */
2887 void TransliteratorTest::TestLocaleInstantiation(void) {
2888 UParseError pe;
2889 UErrorCode ec = U_ZERO_ERROR;
2890 Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2891 if (U_FAILURE(ec)) {
2892 dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
2893 delete t;
2894 return;
2895 }
2896 expect(*t, CharsToUnicodeString("\\u0430"), "a");
2897 delete t;
2898
2899 t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2900 if (U_FAILURE(ec)) {
2901 errln("FAIL: createInstance(en-el)");
2902 delete t;
2903 return;
2904 }
2905 expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2906 delete t;
2907 }
2908
2909 /**
2910 * Test title case handling of accent (should ignore accents)
2911 */
2912 void TransliteratorTest::TestTitleAccents(void) {
2913 UParseError pe;
2914 UErrorCode ec = U_ZERO_ERROR;
2915 Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2916 if (U_FAILURE(ec)) {
2917 errln("FAIL: createInstance(Title)");
2918 delete t;
2919 return;
2920 }
2921 expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2922 delete t;
2923 }
2924
2925 /**
2926 * Basic test of a locale resource based rule.
2927 */
2928 void TransliteratorTest::TestLocaleResource() {
2929 const char* DATA[] = {
2930 // id from to
2931 //"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0",
2932 "Latin-el", "b", "\\u03bc\\u03c0",
2933 "Latin-Greek", "b", "\\u03B2",
2934 "Greek-Latin/UNGEGN", "\\u03B2", "v",
2935 "el-Latin", "\\u03B2", "v",
2936 "Greek-Latin", "\\u03B2", "b",
2937 };
2938 const int32_t DATA_length = UPRV_LENGTHOF(DATA);
2939 for (int32_t i=0; i<DATA_length; i+=3) {
2940 UParseError pe;
2941 UErrorCode ec = U_ZERO_ERROR;
2942 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2943 if (U_FAILURE(ec)) {
2944 dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
2945 delete t;
2946 continue;
2947 }
2948 expect(*t, CharsToUnicodeString(DATA[i+1]),
2949 CharsToUnicodeString(DATA[i+2]));
2950 delete t;
2951 }
2952 }
2953
2954 /**
2955 * Make sure parse errors reference the right line.
2956 */
2957 void TransliteratorTest::TestParseError() {
2958 static const char* rule =
2959 "a > b;\n"
2960 "# more stuff\n"
2961 "d << b;";
2962 UErrorCode ec = U_ZERO_ERROR;
2963 UParseError pe;
2964 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2965 delete t;
2966 if (U_FAILURE(ec)) {
2967 UnicodeString err(pe.preContext);
2968 err.append((UChar)124/*|*/).append(pe.postContext);
2969 if (err.indexOf("d << b") >= 0) {
2970 logln("Ok: " + err);
2971 } else {
2972 errln("FAIL: " + err);
2973 }
2974 }
2975 else {
2976 errln("FAIL: no syntax error");
2977 }
2978 static const char* maskingRule =
2979 "a>x;\n"
2980 "# more stuff\n"
2981 "ab>y;";
2982 ec = U_ZERO_ERROR;
2983 delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
2984 if (ec != U_RULE_MASK_ERROR) {
2985 errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
2986 }
2987 else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
2988 errln("FAIL: did not get expected precontext");
2989 }
2990 else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
2991 errln("FAIL: did not get expected postcontext");
2992 }
2993 }
2994
2995 /**
2996 * Make sure sets on output are disallowed.
2997 */
2998 void TransliteratorTest::TestOutputSet() {
2999 UnicodeString rule = "$set = [a-cm-n]; b > $set;";
3000 UErrorCode ec = U_ZERO_ERROR;
3001 UParseError pe;
3002 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3003 delete t;
3004 if (U_FAILURE(ec)) {
3005 UnicodeString err(pe.preContext);
3006 err.append((UChar)124/*|*/).append(pe.postContext);
3007 logln("Ok: " + err);
3008 return;
3009 }
3010 errln("FAIL: No syntax error");
3011 }
3012
3013 /**
3014 * Test the use variable range pragma, making sure that use of
3015 * variable range characters is detected and flagged as an error.
3016 */
3017 void TransliteratorTest::TestVariableRange() {
3018 UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3019 UErrorCode ec = U_ZERO_ERROR;
3020 UParseError pe;
3021 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3022 delete t;
3023 if (U_FAILURE(ec)) {
3024 UnicodeString err(pe.preContext);
3025 err.append((UChar)124/*|*/).append(pe.postContext);
3026 logln("Ok: " + err);
3027 return;
3028 }
3029 errln("FAIL: No syntax error");
3030 }
3031
3032 /**
3033 * Test invalid post context error handling
3034 */
3035 void TransliteratorTest::TestInvalidPostContext() {
3036 UnicodeString rule = "a}b{c>d;";
3037 UErrorCode ec = U_ZERO_ERROR;
3038 UParseError pe;
3039 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3040 delete t;
3041 if (U_FAILURE(ec)) {
3042 UnicodeString err(pe.preContext);
3043 err.append((UChar)124/*|*/).append(pe.postContext);
3044 if (err.indexOf("a}b{c") >= 0) {
3045 logln("Ok: " + err);
3046 } else {
3047 errln("FAIL: " + err);
3048 }
3049 return;
3050 }
3051 errln("FAIL: No syntax error");
3052 }
3053
3054 /**
3055 * Test ID form variants
3056 */
3057 void TransliteratorTest::TestIDForms() {
3058 const char* DATA[] = {
3059 "NFC", NULL, "NFD",
3060 "nfd", NULL, "NFC", // make sure case is ignored
3061 "Any-NFKD", NULL, "Any-NFKC",
3062 "Null", NULL, "Null",
3063 "-nfkc", "nfkc", "NFKD",
3064 "-nfkc/", "nfkc", "NFKD",
3065 "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3066 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3067 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3068 "Source-", NULL, NULL,
3069 "Source/Variant-", NULL, NULL,
3070 "Source-/Variant", NULL, NULL,
3071 "/Variant", NULL, NULL,
3072 "/Variant-", NULL, NULL,
3073 "-/Variant", NULL, NULL,
3074 "-/", NULL, NULL,
3075 "-", NULL, NULL,
3076 "/", NULL, NULL,
3077 };
3078 const int32_t DATA_length = UPRV_LENGTHOF(DATA);
3079
3080 for (int32_t i=0; i<DATA_length; i+=3) {
3081 const char* ID = DATA[i];
3082 const char* expID = DATA[i+1];
3083 const char* expInvID = DATA[i+2];
3084 UBool expValid = (expInvID != NULL);
3085 if (expID == NULL) {
3086 expID = ID;
3087 }
3088 UParseError pe;
3089 UErrorCode ec = U_ZERO_ERROR;
3090 Transliterator *t =
3091 Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3092 if (U_FAILURE(ec)) {
3093 if (!expValid) {
3094 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3095 } else {
3096 dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
3097 }
3098 delete t;
3099 continue;
3100 }
3101 Transliterator *u = t->createInverse(ec);
3102 if (U_FAILURE(ec)) {
3103 errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3104 delete t;
3105 delete u;
3106 continue;
3107 }
3108 if (t->getID() == expID &&
3109 u->getID() == expInvID) {
3110 logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3111 } else {
3112 errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3113 t->getID() + " x getInverse() => " + u->getID() +
3114 ", expected " + expInvID);
3115 }
3116 delete t;
3117 delete u;
3118 }
3119 }
3120
3121 static const UChar SPACE[] = {32,0};
3122 static const UChar NEWLINE[] = {10,0};
3123 static const UChar RETURN[] = {13,0};
3124 static const UChar EMPTY[] = {0};
3125
3126 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3127 const UnicodeString& testRulesForward) {
3128 UnicodeString rules2; t2.toRules(rules2, TRUE);
3129 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3130 rules2.findAndReplace(SPACE, EMPTY);
3131 rules2.findAndReplace(NEWLINE, EMPTY);
3132 rules2.findAndReplace(RETURN, EMPTY);
3133
3134 UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3135
3136 if (rules2 != testRules) {
3137 errln(label);
3138 logln((UnicodeString)"GENERATED RULES: " + rules2);
3139 logln((UnicodeString)"SHOULD BE: " + testRulesForward);
3140 }
3141 }
3142
3143 /**
3144 * Mark's toRules test.
3145 */
3146 void TransliteratorTest::TestToRulesMark() {
3147 const char* testRules =
3148 "::[[:Latin:][:Mark:]];"
3149 "::NFKD (NFC);"
3150 "::Lower (Lower);"
3151 "a <> \\u03B1;" // alpha
3152 "::NFKC (NFD);"
3153 "::Upper (Lower);"
3154 "::Lower ();"
3155 "::([[:Greek:][:Mark:]]);"
3156 ;
3157 const char* testRulesForward =
3158 "::[[:Latin:][:Mark:]];"
3159 "::NFKD(NFC);"
3160 "::Lower(Lower);"
3161 "a > \\u03B1;"
3162 "::NFKC(NFD);"
3163 "::Upper (Lower);"
3164 "::Lower ();"
3165 ;
3166 const char* testRulesBackward =
3167 "::[[:Greek:][:Mark:]];"
3168 "::Lower (Upper);"
3169 "::NFD(NFKC);"
3170 "\\u03B1 > a;"
3171 "::Lower(Lower);"
3172 "::NFC(NFKD);"
3173 ;
3174 UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3175 UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3176
3177 UParseError pe;
3178 UErrorCode ec = U_ZERO_ERROR;
3179 Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
3180 Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
3181
3182 if (U_FAILURE(ec)) {
3183 delete t2;
3184 delete t3;
3185 dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3186 return;
3187 }
3188
3189 expect(*t2, source, target);
3190 expect(*t3, target, source);
3191
3192 checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
3193 checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
3194
3195 delete t2;
3196 delete t3;
3197 }
3198
3199 /**
3200 * Test Escape and Unescape transliterators.
3201 */
3202 void TransliteratorTest::TestEscape() {
3203 UParseError pe;
3204 UErrorCode ec;
3205 Transliterator *t;
3206
3207 ec = U_ZERO_ERROR;
3208 t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3209 if (U_FAILURE(ec)) {
3210 errln((UnicodeString)"FAIL: createInstance");
3211 } else {
3212 expect(*t,
3213 UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
3214 "@12Q");
3215 }
3216 delete t;
3217
3218 ec = U_ZERO_ERROR;
3219 t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3220 if (U_FAILURE(ec)) {
3221 errln((UnicodeString)"FAIL: createInstance");
3222 } else {
3223 expect(*t,
3224 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3225 UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3226 }
3227 delete t;
3228
3229 ec = U_ZERO_ERROR;
3230 t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3231 if (U_FAILURE(ec)) {
3232 errln((UnicodeString)"FAIL: createInstance");
3233 } else {
3234 expect(*t,
3235 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3236 UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3237 }
3238 delete t;
3239
3240 ec = U_ZERO_ERROR;
3241 t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3242 if (U_FAILURE(ec)) {
3243 errln((UnicodeString)"FAIL: createInstance");
3244 } else {
3245 expect(*t,
3246 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3247 UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3248 }
3249 delete t;
3250 }
3251
3252
3253 void TransliteratorTest::TestAnchorMasking(){
3254 UnicodeString rule ("^a > Q; a > q;");
3255 UErrorCode status= U_ZERO_ERROR;
3256 UParseError parseError;
3257
3258 Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3259 if(U_FAILURE(status)){
3260 errln(UnicodeString("FAIL: ") + "ID" +
3261 ".createFromRules() => bad rules" +
3262 /*", parse error " + parseError.code +*/
3263 ", line " + parseError.line +
3264 ", offset " + parseError.offset +
3265 ", context " + prettify(parseError.preContext, TRUE) +
3266 ", rules: " + prettify(rule, TRUE));
3267 }
3268 delete t;
3269 }
3270
3271 /**
3272 * Make sure display names of variants look reasonable.
3273 */
3274 void TransliteratorTest::TestDisplayName() {
3275 #if UCONFIG_NO_FORMATTING
3276 logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3277 return;
3278 #else
3279 static const char* DATA[] = {
3280 // ID, forward name, reverse name
3281 // Update the text as necessary -- the important thing is
3282 // not the text itself, but how various cases are handled.
3283
3284 // Basic test
3285 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3286
3287 // Variants
3288 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3289
3290 // Target-only IDs
3291 "NFC", "Any to NFC", "Any to NFD",
3292 };
3293
3294 int32_t DATA_length = UPRV_LENGTHOF(DATA);
3295
3296 Locale US("en", "US");
3297
3298 for (int32_t i=0; i<DATA_length; i+=3) {
3299 UnicodeString name;
3300 Transliterator::getDisplayName(DATA[i], US, name);
3301 if (name != DATA[i+1]) {
3302 dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3303 name + ", expected " + DATA[i+1]);
3304 } else {
3305 logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3306 }
3307 UErrorCode ec = U_ZERO_ERROR;
3308 UParseError pe;
3309 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3310 if (U_FAILURE(ec)) {
3311 delete t;
3312 dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
3313 continue;
3314 }
3315 name = Transliterator::getDisplayName(t->getID(), US, name);
3316 if (name != DATA[i+2]) {
3317 dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3318 name + ", expected " + DATA[i+2]);
3319 } else {
3320 logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3321 }
3322 delete t;
3323 }
3324 #endif
3325 }
3326
3327 void TransliteratorTest::TestSpecialCases(void) {
3328 const UnicodeString registerRules[] = {
3329 "Any-Dev1", "x > X; y > Y;",
3330 "Any-Dev2", "XY > Z",
3331 "Greek-Latin/FAKE",
3332 CharsToUnicodeString
3333 ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3334 "" // END MARKER
3335 };
3336
3337 const UnicodeString testCases[] = {
3338 // NORMALIZATION
3339 // should add more test cases
3340 "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3341 "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3342 "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3343 "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3344
3345 // mp -> b BUG
3346 "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3347 "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3348
3349 // check for devanagari bug
3350 "nfd;Dev1;Dev2;nfc", "xy", "Z",
3351
3352 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3353 "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3354 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3355
3356 //TODO: enable this test once Titlecase works right
3357 /*
3358 "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3359 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3360 */
3361 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3362 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3363 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3364 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3365
3366 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3367 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3368
3369 // FORMS OF S
3370 "Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3371 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3372 "Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3373 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3374 "Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3375 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3376 "Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3377 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3378 // Tatiana bug
3379 // Upper: TAT\\u02B9\\u00C2NA
3380 // Lower: tat\\u02B9\\u00E2na
3381 // Title: Tat\\u02B9\\u00E2na
3382 "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3383 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3384 "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3385 CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3386 "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3387 CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3388
3389 "" // END MARKER
3390 };
3391
3392 UParseError pos;
3393 int32_t i;
3394 for (i = 0; registerRules[i].length()!=0; i+=2) {
3395 UErrorCode status = U_ZERO_ERROR;
3396
3397 Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3398 registerRules[i+1], UTRANS_FORWARD, pos, status);
3399 if (U_FAILURE(status)) {
3400 dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
3401 } else {
3402 Transliterator::registerInstance(t);
3403 }
3404 }
3405 for (i = 0; testCases[i].length()!=0; i+=3) {
3406 UErrorCode ec = U_ZERO_ERROR;
3407 UParseError pe;
3408 const UnicodeString& name = testCases[i];
3409 Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3410 if (U_FAILURE(ec)) {
3411 dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
3412 delete t;
3413 continue;
3414 }
3415 const UnicodeString& id = t->getID();
3416 const UnicodeString& source = testCases[i+1];
3417 UnicodeString target;
3418
3419 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3420
3421 if (testCases[i+2].length() > 0) {
3422 target = testCases[i+2];
3423 } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3424 Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3425 } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3426 Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3427 } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3428 Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3429 } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3430 Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3431 } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3432 target = source;
3433 target.toLower(Locale::getUS());
3434 } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3435 target = source;
3436 target.toUpper(Locale::getUS());
3437 }
3438 if (U_FAILURE(ec)) {
3439 errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3440 continue;
3441 }
3442
3443 expect(*t, source, target);
3444 delete t;
3445 }
3446 for (i = 0; registerRules[i].length()!=0; i+=2) {
3447 Transliterator::unregister(registerRules[i]);
3448 }
3449 }
3450
3451 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3452 if (ch <= 0xFFFF) {
3453 sprintf(buffer, "\\u%04x", (int)ch);
3454 } else {
3455 sprintf(buffer, "\\U%08x", (int)ch);
3456 }
3457 return buffer;
3458 }
3459
3460 void TransliteratorTest::TestSurrogateCasing (void) {
3461 // check that casing handles surrogates
3462 // titlecase is currently defective
3463 char buffer[20];
3464 UChar buffer2[20];
3465 UChar32 dee;
3466 U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3467 UnicodeString DEE(u_totitle(dee));
3468 if (DEE != DESERET_DEE) {
3469 err("Fails titlecase of surrogates");
3470 err(Char32ToEscapedChars(dee, buffer));
3471 err(", ");
3472 errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3473 }
3474
3475 UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3476 UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3477 UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3478 UErrorCode status= U_ZERO_ERROR;
3479
3480 u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3481 if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3482 errln("Fails: Can't uppercase surrogates.");
3483 }
3484
3485 status= U_ZERO_ERROR;
3486 u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3487 if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3488 errln("Fails: Can't lowercase surrogates.");
3489 }
3490 }
3491
3492 static void _trans(Transliterator& t, const UnicodeString& src,
3493 UnicodeString& result) {
3494 result = src;
3495 t.transliterate(result);
3496 }
3497
3498 static void _trans(const UnicodeString& id, const UnicodeString& src,
3499 UnicodeString& result, UErrorCode ec) {
3500 UParseError pe;
3501 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3502 if (U_SUCCESS(ec)) {
3503 _trans(*t, src, result);
3504 }
3505 delete t;
3506 }
3507
3508 static UnicodeString _findMatch(const UnicodeString& source,
3509 const UnicodeString* pairs) {
3510 UnicodeString empty;
3511 for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3512 if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3513 return pairs[i+1];
3514 }
3515 }
3516 return empty;
3517 }
3518
3519 // Check to see that incremental gets at least part way through a reasonable string.
3520
3521 void TransliteratorTest::TestIncrementalProgress(void) {
3522 UErrorCode ec = U_ZERO_ERROR;
3523 UnicodeString latinTest = "The Quick Brown Fox.";
3524 UnicodeString devaTest;
3525 _trans("Latin-Devanagari", latinTest, devaTest, ec);
3526 UnicodeString kataTest;
3527 _trans("Latin-Katakana", latinTest, kataTest, ec);
3528 if (U_FAILURE(ec)) {
3529 errln("FAIL: Internal error");
3530 return;
3531 }
3532 const UnicodeString tests[] = {
3533 "Any", latinTest,
3534 "Latin", latinTest,
3535 "Halfwidth", latinTest,
3536 "Devanagari", devaTest,
3537 "Katakana", kataTest,
3538 "" // END MARKER
3539 };
3540
3541 UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3542 int32_t i = 0, j=0, k=0;
3543 int32_t sources = Transliterator::countAvailableSources();
3544 for (i = 0; i < sources; i++) {
3545 UnicodeString source;
3546 Transliterator::getAvailableSource(i, source);
3547 UnicodeString test = _findMatch(source, tests);
3548 if (test.length() == 0) {
3549 logln((UnicodeString)"Skipping " + source + "-X");
3550 continue;
3551 }
3552 int32_t targets = Transliterator::countAvailableTargets(source);
3553 for (j = 0; j < targets; j++) {
3554 UnicodeString target;
3555 Transliterator::getAvailableTarget(j, source, target);
3556 int32_t variants = Transliterator::countAvailableVariants(source, target);
3557 for (k =0; k< variants; k++) {
3558 UnicodeString variant;
3559 UParseError err;
3560 UErrorCode status = U_ZERO_ERROR;
3561
3562 Transliterator::getAvailableVariant(k, source, target, variant);
3563 UnicodeString id = source + "-" + target + "/" + variant;
3564
3565 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3566 if (U_FAILURE(status)) {
3567 dataerrln((UnicodeString)"FAIL: Could not create " + id);
3568 delete t;
3569 continue;
3570 }
3571 status = U_ZERO_ERROR;
3572 CheckIncrementalAux(t, test);
3573
3574 UnicodeString rev;
3575 _trans(*t, test, rev);
3576 Transliterator *inv = t->createInverse(status);
3577 if (U_FAILURE(status)) {
3578 // The following are forward-only, it is OK that creating an inverse will not work:
3579 // 1. Devanagari-Arabic
3580 // 2. Any-*/BGN
3581 // 3. Any-*/UNGEGN
3582 // If UCONFIG_NO_BREAK_ITERATION is on, Latin-Thai is also not expected to work.
3583 if ( id.compare((UnicodeString)"Devanagari-Arabic/") != 0
3584 && !(id.startsWith((UnicodeString)"Any-") &&
3585 (id.endsWith((UnicodeString)"/BGN") || id.endsWith((UnicodeString)"/UNGEGN") || id.endsWith((UnicodeString)"/MNS"))
3586 )
3587 #if UCONFIG_NO_BREAK_ITERATION
3588 && id.compare((UnicodeString)"Latin-Thai/") != 0
3589 #endif
3590 )
3591 {
3592 errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3593 }
3594 delete t;
3595 delete inv;
3596 continue;
3597 }
3598 CheckIncrementalAux(inv, rev);
3599 delete t;
3600 delete inv;
3601 }
3602 }
3603 }
3604 }
3605
3606 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3607 const UnicodeString& input) {
3608 UErrorCode ec = U_ZERO_ERROR;
3609 UTransPosition pos;
3610 UnicodeString test = input;
3611
3612 pos.contextStart = 0;
3613 pos.contextLimit = input.length();
3614 pos.start = 0;
3615 pos.limit = input.length();
3616
3617 t->transliterate(test, pos, ec);
3618 if (U_FAILURE(ec)) {
3619 errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3620 return;
3621 }
3622 UBool gotError = FALSE;
3623 (void)gotError; // Suppress set but not used warning.
3624
3625 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3626
3627 if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3628 errln((UnicodeString)"No Progress, " +
3629 t->getID() + ": " + formatInput(test, input, pos));
3630 gotError = TRUE;
3631 } else {
3632 logln((UnicodeString)"PASS Progress, " +
3633 t->getID() + ": " + formatInput(test, input, pos));
3634 }
3635 t->finishTransliteration(test, pos);
3636 if (pos.start != pos.limit) {
3637 errln((UnicodeString)"Incomplete, " +
3638 t->getID() + ": " + formatInput(test, input, pos));
3639 gotError = TRUE;
3640 }
3641 }
3642
3643 void TransliteratorTest::TestFunction() {
3644 // Careful with spacing and ';' here: Phrase this exactly
3645 // as toRules() is going to return it. If toRules() changes
3646 // with regard to spacing or ';', then adjust this string.
3647 UnicodeString rule =
3648 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3649
3650 UParseError pe;
3651 UErrorCode ec = U_ZERO_ERROR;
3652 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3653 if (t == NULL) {
3654 dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
3655 return;
3656 }
3657
3658 UnicodeString r;
3659 t->toRules(r, TRUE);
3660 if (r == rule) {
3661 logln((UnicodeString)"OK: toRules() => " + r);
3662 } else {
3663 errln((UnicodeString)"FAIL: toRules() => " + r +
3664 ", expected " + rule);
3665 }
3666
3667 expect(*t, "The Quick Brown Fox",
3668 UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3669
3670 delete t;
3671 }
3672
3673 void TransliteratorTest::TestInvalidBackRef(void) {
3674 UnicodeString rule = ". > $1;";
3675 UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3676 UParseError pe;
3677 UErrorCode ec = U_ZERO_ERROR;
3678 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3679 Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3680
3681 if (t != NULL) {
3682 errln("FAIL: createFromRules should have returned NULL");
3683 delete t;
3684 }
3685
3686 if (t2 != NULL) {
3687 errln("FAIL: createFromRules should have returned NULL");
3688 delete t2;
3689 }
3690
3691 if (U_SUCCESS(ec)) {
3692 errln("FAIL: Ok: . > $1; => no error");
3693 } else {
3694 logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3695 }
3696 }
3697
3698 void TransliteratorTest::TestMulticharStringSet() {
3699 // Basic testing
3700 const char* rule =
3701 " [{aa}] > x;"
3702 " a > y;"
3703 " [b{bc}] > z;"
3704 "[{gd}] { e > q;"
3705 " e } [{fg}] > r;" ;
3706
3707 UParseError pe;
3708 UErrorCode ec = U_ZERO_ERROR;
3709 Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3710 if (t == NULL || U_FAILURE(ec)) {
3711 delete t;
3712 errln("FAIL: createFromRules failed");
3713 return;
3714 }
3715
3716 expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3717 "y x yz z d gd de gdq gdqfg ddrfg");
3718 delete t;
3719
3720 // Overlapped string test. Make sure that when multiple
3721 // strings can match that the longest one is matched.
3722 rule =
3723 " [a {ab} {abc}] > x;"
3724 " b > y;"
3725 " c > z;"
3726 " q [t {st} {rst}] { e > p;" ;
3727
3728 t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3729 if (t == NULL || U_FAILURE(ec)) {
3730 delete t;
3731 errln("FAIL: createFromRules failed");
3732 return;
3733 }
3734
3735 expect(*t, "a ab abc qte qste qrste",
3736 "x x x qtp qstp qrstp");
3737 delete t;
3738 }
3739
3740 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3741 // BEGIN TestUserFunction support factory
3742
3743 Transliterator* _TUFF[4];
3744 UnicodeString* _TUFID[4];
3745
3746 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3747 Transliterator::Token context) {
3748 return _TUFF[context.integer]->clone();
3749 }
3750
3751 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3752 _TUFF[n] = t;
3753 _TUFID[n] = new UnicodeString(ID);
3754 Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3755 }
3756
3757 static void _TUFUnreg(int32_t n) {
3758 if (_TUFF[n] != NULL) {
3759 Transliterator::unregister(*_TUFID[n]);
3760 delete _TUFF[n];
3761 delete _TUFID[n];
3762 }
3763 }
3764
3765 // END TestUserFunction support factory
3766 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3767
3768 /**
3769 * Test that user-registered transliterators can be used under function
3770 * syntax.
3771 */
3772 void TransliteratorTest::TestUserFunction() {
3773
3774 Transliterator* t;
3775 UParseError pe;
3776 UErrorCode ec = U_ZERO_ERROR;
3777
3778 // Setup our factory
3779 int32_t i;
3780 for (i=0; i<4; ++i) {
3781 _TUFF[i] = NULL;
3782 }
3783
3784 // There's no need to register inverses if we don't use them
3785 t = Transliterator::createFromRules("gif",
3786 UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3787 UTRANS_FORWARD, pe, ec);
3788 if (t == NULL || U_FAILURE(ec)) {
3789 dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3790 return;
3791 }
3792 _TUFReg("Any-gif", t, 0);
3793
3794 t = Transliterator::createFromRules("RemoveCurly",
3795 UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3796 UTRANS_FORWARD, pe, ec);
3797 if (t == NULL || U_FAILURE(ec)) {
3798 errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3799 goto FAIL;
3800 }
3801 expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3802 _TUFReg("Any-RemoveCurly", t, 1);
3803
3804 logln("Trying &hex");
3805 t = Transliterator::createFromRules("hex2",
3806 "(.) > &hex($1);",
3807 UTRANS_FORWARD, pe, ec);
3808 if (t == NULL || U_FAILURE(ec)) {
3809 errln("FAIL: createFromRules");
3810 goto FAIL;
3811 }
3812 logln("Registering");
3813 _TUFReg("Any-hex2", t, 2);
3814 t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3815 if (t == NULL || U_FAILURE(ec)) {
3816 errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3817 goto FAIL;
3818 }
3819 expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3820 delete t;
3821
3822 logln("Trying &gif");
3823 t = Transliterator::createFromRules("gif2",
3824 "(.) > &Gif(&Hex2($1));",
3825 UTRANS_FORWARD, pe, ec);
3826 if (t == NULL || U_FAILURE(ec)) {
3827 errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3828 goto FAIL;
3829 }
3830 logln("Registering");
3831 _TUFReg("Any-gif2", t, 3);
3832 t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3833 if (t == NULL || U_FAILURE(ec)) {
3834 errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3835 goto FAIL;
3836 }
3837 expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3838 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3839 delete t;
3840
3841 // Test that filters are allowed after &
3842 t = Transliterator::createFromRules("test",
3843 "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3844 UTRANS_FORWARD, pe, ec);
3845 if (t == NULL || U_FAILURE(ec)) {
3846 errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3847 goto FAIL;
3848 }
3849 expect(*t, "abc",
3850 UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3851 delete t;
3852
3853 FAIL:
3854 for (i=0; i<4; ++i) {
3855 _TUFUnreg(i);
3856 }
3857 }
3858
3859 /**
3860 * Test the Any-X transliterators.
3861 */
3862 void TransliteratorTest::TestAnyX(void) {
3863 UParseError parseError;
3864 UErrorCode status = U_ZERO_ERROR;
3865 Transliterator* anyLatin =
3866 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3867 if (anyLatin==0) {
3868 dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
3869 delete anyLatin;
3870 return;
3871 }
3872
3873 expect(*anyLatin,
3874 CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3875 CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3876
3877 delete anyLatin;
3878
3879 status = U_ZERO_ERROR;
3880 Transliterator* anyASCII =
3881 Transliterator::createInstance("Any-Latin;Latin-ASCII", UTRANS_FORWARD, parseError, status);
3882 if (U_FAILURE(status) || anyASCII==0) {
3883 dataerrln("FAIL: createInstance returned NULL and/or set status %s", u_errorName(status));
3884 delete anyASCII;
3885 return;
3886 }
3887
3888 expect(*anyASCII,
3889 CharsToUnicodeString("ArabicDigits:\\u0660\\u0661\\u0664\\u0669 PersianDigits:\\u06F0\\u06F1\\u06F4\\u06F9"),
3890 CharsToUnicodeString("ArabicDigits:0149 PersianDigits:0149"));
3891
3892 delete anyASCII;
3893 }
3894
3895 /**
3896 * Test Any-X transliterators with sample letters from all scripts.
3897 */
3898 void TransliteratorTest::TestAny(void) {
3899 UErrorCode status = U_ZERO_ERROR;
3900 // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3901 // function call parameters going on in this test.
3902 UnicodeSet alphabetic("[:alphabetic:]", status);
3903 if (U_FAILURE(status)) {
3904 dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3905 return;
3906 }
3907 alphabetic.freeze();
3908
3909 UnicodeString testString;
3910 for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
3911 const char *scriptName = uscript_getShortName((UScriptCode)i);
3912 if (scriptName == NULL) {
3913 errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
3914 return;
3915 }
3916
3917 UnicodeSet sample;
3918 sample.applyPropertyAlias("script", scriptName, status);
3919 if (U_FAILURE(status)) {
3920 errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3921 return;
3922 }
3923 sample.retainAll(alphabetic);
3924 for (int32_t count=0; count<5; count++) {
3925 UChar32 c = sample.charAt(count);
3926 if (c == -1) {
3927 break;
3928 }
3929 testString.append(c);
3930 }
3931 }
3932
3933 UParseError parseError;
3934 Transliterator* anyLatin =
3935 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3936 if (U_FAILURE(status)) {
3937 dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3938 return;
3939 }
3940
3941 logln(UnicodeString("Sample set for Any-Latin: ") + testString);
3942 anyLatin->transliterate(testString);
3943 logln(UnicodeString("Sample result for Any-Latin: ") + testString);
3944 delete anyLatin;
3945 }
3946
3947
3948 /**
3949 * Test the source and target set API. These are only implemented
3950 * for RBT and CompoundTransliterator at this time.
3951 */
3952 void TransliteratorTest::TestSourceTargetSet() {
3953 UErrorCode ec = U_ZERO_ERROR;
3954
3955 // Rules
3956 const char* r =
3957 "a > b; "
3958 "r [x{lu}] > q;";
3959
3960 // Expected source
3961 UnicodeSet expSrc("[arx{lu}]", ec);
3962
3963 // Expected target
3964 UnicodeSet expTrg("[bq]", ec);
3965
3966 UParseError pe;
3967 Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3968
3969 if (U_FAILURE(ec)) {
3970 delete t;
3971 errln("FAIL: Couldn't set up test");
3972 return;
3973 }
3974
3975 UnicodeSet src; t->getSourceSet(src);
3976 UnicodeSet trg; t->getTargetSet(trg);
3977
3978 if (src == expSrc && trg == expTrg) {
3979 UnicodeString a, b;
3980 logln((UnicodeString)"Ok: " +
3981 r + " => source = " + src.toPattern(a, TRUE) +
3982 ", target = " + trg.toPattern(b, TRUE));
3983 } else {
3984 UnicodeString a, b, c, d;
3985 errln((UnicodeString)"FAIL: " +
3986 r + " => source = " + src.toPattern(a, TRUE) +
3987 ", expected " + expSrc.toPattern(b, TRUE) +
3988 "; target = " + trg.toPattern(c, TRUE) +
3989 ", expected " + expTrg.toPattern(d, TRUE));
3990 }
3991
3992 delete t;
3993 }
3994
3995 /**
3996 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3997 */
3998 void TransliteratorTest::TestPatternWhiteSpace() {
3999 // Rules
4000 const char* r = "a > \\u200E b;";
4001
4002 UErrorCode ec = U_ZERO_ERROR;
4003 UParseError pe;
4004 Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
4005
4006 if (U_FAILURE(ec)) {
4007 errln("FAIL: Couldn't set up test");
4008 } else {
4009 expect(*t, "a", "b");
4010 }
4011 delete t;
4012
4013 // UnicodeSet
4014 ec = U_ZERO_ERROR;
4015 UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
4016
4017 if (U_FAILURE(ec)) {
4018 errln("FAIL: Couldn't set up test");
4019 } else {
4020 if (set.contains(0x200E)) {
4021 errln("FAIL: U+200E not being ignored by UnicodeSet");
4022 }
4023 }
4024 }
4025 //======================================================================
4026 // this method is in TestUScript.java
4027 //======================================================================
4028 void TransliteratorTest::TestAllCodepoints(){
4029 UScriptCode code= USCRIPT_INVALID_CODE;
4030 char id[256]={'\0'};
4031 char abbr[256]={'\0'};
4032 char newId[256]={'\0'};
4033 char newAbbrId[256]={'\0'};
4034 char oldId[256]={'\0'};
4035 char oldAbbrId[256]={'\0'};
4036
4037 UErrorCode status =U_ZERO_ERROR;
4038 UParseError pe;
4039
4040 for(uint32_t i = 0; i<=0x10ffff; i++){
4041 code = uscript_getScript(i,&status);
4042 if(code == USCRIPT_INVALID_CODE){
4043 dataerrln("uscript_getScript for codepoint \\U%08X failed.", i);
4044 }
4045 const char* myId = uscript_getName(code);
4046 if(!myId) {
4047 dataerrln("Valid script code returned NULL name. Check your data!");
4048 return;
4049 }
4050 uprv_strcpy(id,myId);
4051 uprv_strcpy(abbr,uscript_getShortName(code));
4052
4053 uprv_strcpy(newId,"[:");
4054 uprv_strcat(newId,id);
4055 uprv_strcat(newId,":];NFD");
4056
4057 uprv_strcpy(newAbbrId,"[:");
4058 uprv_strcat(newAbbrId,abbr);
4059 uprv_strcat(newAbbrId,":];NFD");
4060
4061 if(uprv_strcmp(newId,oldId)!=0){
4062 Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
4063 if(t==NULL || U_FAILURE(status)){
4064 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4065 }
4066 delete t;
4067 }
4068 if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
4069 Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
4070 if(t==NULL || U_FAILURE(status)){
4071 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4072 }
4073 delete t;
4074 }
4075 uprv_strcpy(oldId,newId);
4076 uprv_strcpy(oldAbbrId, newAbbrId);
4077
4078 }
4079
4080 }
4081
4082 #define TEST_TRANSLIT_ID(id, cls) { \
4083 UErrorCode ec = U_ZERO_ERROR; \
4084 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4085 if (U_FAILURE(ec)) { \
4086 dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
4087 } else { \
4088 if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4089 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4090 } \
4091 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4092 } \
4093 delete t; \
4094 }
4095
4096 #define TEST_TRANSLIT_RULE(rule, cls) { \
4097 UErrorCode ec = U_ZERO_ERROR; \
4098 UParseError pe; \
4099 Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4100 if (U_FAILURE(ec)) { \
4101 errln("FAIL: Couldn't create " rule); \
4102 } else { \
4103 if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4104 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4105 } \
4106 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4107 } \
4108 delete t; \
4109 }
4110
4111 void TransliteratorTest::TestBoilerplate() {
4112 TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
4113 TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
4114 TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
4115 TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
4116 TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
4117 TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
4118 TEST_TRANSLIT_ID("Null", NullTransliterator);
4119 TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
4120 TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
4121 TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
4122 TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
4123 TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
4124 TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
4125 }
4126
4127 void TransliteratorTest::TestAlternateSyntax() {
4128 // U+2206 == &
4129 // U+2190 == <
4130 // U+2192 == >
4131 // U+2194 == <>
4132 expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4133 "abc",
4134 "xbz");
4135 expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4136 CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4137 UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4138 }
4139
4140 static const char* BEGIN_END_RULES[] = {
4141 // [0]
4142 "abc > xy;"
4143 "aba > z;",
4144
4145 // [1]
4146 /*
4147 "::BEGIN;"
4148 "abc > xy;"
4149 "::END;"
4150 "::BEGIN;"
4151 "aba > z;"
4152 "::END;",
4153 */
4154 "", // test case commented out below, this is here to keep from messing up the indexes
4155
4156 // [2]
4157 /*
4158 "abc > xy;"
4159 "::BEGIN;"
4160 "aba > z;"
4161 "::END;",
4162 */
4163 "", // test case commented out below, this is here to keep from messing up the indexes
4164
4165 // [3]
4166 /*
4167 "::BEGIN;"
4168 "abc > xy;"
4169 "::END;"
4170 "aba > z;",
4171 */
4172 "", // test case commented out below, this is here to keep from messing up the indexes
4173
4174 // [4]
4175 "abc > xy;"
4176 "::Null;"
4177 "aba > z;",
4178
4179 // [5]
4180 "::Upper;"
4181 "ABC > xy;"
4182 "AB > x;"
4183 "C > z;"
4184 "::Upper;"
4185 "XYZ > p;"
4186 "XY > q;"
4187 "Z > r;"
4188 "::Upper;",
4189
4190 // [6]
4191 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4192 "$delim = [\\-$ws];"
4193 "$ws $delim* > ' ';"
4194 "'-' $delim* > '-';",
4195
4196 // [7]
4197 "::Null;"
4198 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4199 "$delim = [\\-$ws];"
4200 "$ws $delim* > ' ';"
4201 "'-' $delim* > '-';",
4202
4203 // [8]
4204 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4205 "$delim = [\\-$ws];"
4206 "$ws $delim* > ' ';"
4207 "'-' $delim* > '-';"
4208 "::Null;",
4209
4210 // [9]
4211 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4212 "$delim = [\\-$ws];"
4213 "::Null;"
4214 "$ws $delim* > ' ';"
4215 "'-' $delim* > '-';",
4216
4217 // [10]
4218 /*
4219 "::BEGIN;"
4220 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4221 "$delim = [\\-$ws];"
4222 "::END;"
4223 "$ws $delim* > ' ';"
4224 "'-' $delim* > '-';",
4225 */
4226 "", // test case commented out below, this is here to keep from messing up the indexes
4227
4228 // [11]
4229 /*
4230 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4231 "$delim = [\\-$ws];"
4232 "::BEGIN;"
4233 "$ws $delim* > ' ';"
4234 "'-' $delim* > '-';"
4235 "::END;",
4236 */
4237 "", // test case commented out below, this is here to keep from messing up the indexes
4238
4239 // [12]
4240 /*
4241 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4242 "$delim = [\\-$ws];"
4243 "$ab = [ab];"
4244 "::BEGIN;"
4245 "$ws $delim* > ' ';"
4246 "'-' $delim* > '-';"
4247 "::END;"
4248 "::BEGIN;"
4249 "$ab { ' ' } $ab > '-';"
4250 "c { ' ' > ;"
4251 "::END;"
4252 "::BEGIN;"
4253 "'a-a' > a\\%|a;"
4254 "::END;",
4255 */
4256 "", // test case commented out below, this is here to keep from messing up the indexes
4257
4258 // [13]
4259 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4260 "$delim = [\\-$ws];"
4261 "$ab = [ab];"
4262 "::Null;"
4263 "$ws $delim* > ' ';"
4264 "'-' $delim* > '-';"
4265 "::Null;"
4266 "$ab { ' ' } $ab > '-';"
4267 "c { ' ' > ;"
4268 "::Null;"
4269 "'a-a' > a\\%|a;",
4270
4271 // [14]
4272 /*
4273 "::[abc];"
4274 "::BEGIN;"
4275 "abc > xy;"
4276 "::END;"
4277 "::BEGIN;"
4278 "aba > yz;"
4279 "::END;"
4280 "::Upper;",
4281 */
4282 "", // test case commented out below, this is here to keep from messing up the indexes
4283
4284 // [15]
4285 "::[abc];"
4286 "abc > xy;"
4287 "::Null;"
4288 "aba > yz;"
4289 "::Upper;",
4290
4291 // [16]
4292 /*
4293 "::[abc];"
4294 "::BEGIN;"
4295 "abc <> xy;"
4296 "::END;"
4297 "::BEGIN;"
4298 "aba <> yz;"
4299 "::END;"
4300 "::Upper(Lower);"
4301 "::([XYZ]);"
4302 */
4303 "", // test case commented out below, this is here to keep from messing up the indexes
4304
4305 // [17]
4306 "::[abc];"
4307 "abc <> xy;"
4308 "::Null;"
4309 "aba <> yz;"
4310 "::Upper(Lower);"
4311 "::([XYZ]);"
4312 };
4313
4314 /*
4315 (This entire test is commented out below and will need some heavy revision when we re-add
4316 the ::BEGIN/::END stuff)
4317 static const char* BOGUS_BEGIN_END_RULES[] = {
4318 // [7]
4319 "::BEGIN;"
4320 "abc > xy;"
4321 "::BEGIN;"
4322 "aba > z;"
4323 "::END;"
4324 "::END;",
4325
4326 // [8]
4327 "abc > xy;"
4328 " aba > z;"
4329 "::END;",
4330
4331 // [9]
4332 "::BEGIN;"
4333 "::Upper;"
4334 "::END;"
4335 };
4336 static const int32_t BOGUS_BEGIN_END_RULES_length = UPRV_LENGTHOF(BOGUS_BEGIN_END_RULES);
4337 */
4338
4339 static const char* BEGIN_END_TEST_CASES[] = {
4340 // rules input expected output
4341 BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z",
4342 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
4343 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
4344 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
4345 BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z",
4346 BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR",
4347
4348 BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e",
4349 BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e",
4350 BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e",
4351 BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e",
4352 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e",
4353 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e",
4354 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e",
4355 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a",
4356 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
4357 BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e",
4358 BEGIN_END_RULES[13], "a a a a", "a%a%a%a",
4359 BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a",
4360
4361 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4362 BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4363 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4364 BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4365 };
4366 static const int32_t BEGIN_END_TEST_CASES_length = UPRV_LENGTHOF(BEGIN_END_TEST_CASES);
4367
4368 void TransliteratorTest::TestBeginEnd() {
4369 // run through the list of test cases above
4370 int32_t i = 0;
4371 for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4372 expect((UnicodeString)"Test case #" + (i / 3),
4373 UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4374 UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4375 UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4376 }
4377
4378 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4379 UParseError parseError;
4380 UErrorCode status = U_ZERO_ERROR;
4381 Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4382 UTRANS_REVERSE, parseError, status);
4383 if (reversed == 0 || U_FAILURE(status)) {
4384 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4385 } else {
4386 expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4387 }
4388 delete reversed;
4389
4390 // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4391 // that all of them cause errors
4392 /*
4393 (commented out until we have the real ::BEGIN/::END stuff in place
4394 for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4395 UParseError parseError;
4396 UErrorCode status = U_ZERO_ERROR;
4397 Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4398 UTRANS_FORWARD, parseError, status);
4399 if (!U_FAILURE(status)) {
4400 delete t;
4401 errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4402 }
4403 }
4404 */
4405 }
4406
4407 void TransliteratorTest::TestBeginEndToRules() {
4408 // run through the same list of test cases we used above, but this time, instead of just
4409 // instantiating a Transliterator from the rules and running the test against it, we instantiate
4410 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4411 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4412 // to (i.e., does the same thing as) the original rule set
4413 for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4414 UParseError parseError;
4415 UErrorCode status = U_ZERO_ERROR;
4416 Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4417 UTRANS_FORWARD, parseError, status);
4418 if (U_FAILURE(status)) {
4419 reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4420 } else {
4421 UnicodeString rules;
4422 t->toRules(rules, TRUE);
4423 Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4424 UTRANS_FORWARD, parseError, status);
4425 if (U_FAILURE(status)) {
4426 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4427 parseError, status);
4428 delete t;
4429 } else {
4430 expect(*t2,
4431 UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4432 UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4433 delete t;
4434 delete t2;
4435 }
4436 }
4437 }
4438
4439 // do the same thing for the reversible test case
4440 UParseError parseError;
4441 UErrorCode status = U_ZERO_ERROR;
4442 Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4443 UTRANS_REVERSE, parseError, status);
4444 if (U_FAILURE(status)) {
4445 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4446 } else {
4447 UnicodeString rules;
4448 reversed->toRules(rules, FALSE);
4449 Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4450 parseError, status);
4451 if (U_FAILURE(status)) {
4452 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4453 parseError, status);
4454 delete reversed;
4455 } else {
4456 expect(*reversed2,
4457 UnicodeString("xy XY XYZ yz YZ"),
4458 UnicodeString("xy abc xaba yz aba"));
4459 delete reversed;
4460 delete reversed2;
4461 }
4462 }
4463 }
4464
4465 void TransliteratorTest::TestRegisterAlias() {
4466 UnicodeString longID("Lower;[aeiou]Upper");
4467 UnicodeString shortID("Any-CapVowels");
4468 UnicodeString reallyShortID("CapVowels");
4469
4470 Transliterator::registerAlias(shortID, longID);
4471
4472 UErrorCode err = U_ZERO_ERROR;
4473 Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4474 if (U_FAILURE(err)) {
4475 errln("Failed to instantiate transliterator with long ID");
4476 Transliterator::unregister(shortID);
4477 return;
4478 }
4479 Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4480 if (U_FAILURE(err)) {
4481 errln("Failed to instantiate transliterator with short ID");
4482 delete t1;
4483 Transliterator::unregister(shortID);
4484 return;
4485 }
4486
4487 if (t1->getID() != longID)
4488 errln("Transliterator instantiated with long ID doesn't have long ID");
4489 if (t2->getID() != reallyShortID)
4490 errln("Transliterator instantiated with short ID doesn't have short ID");
4491
4492 UnicodeString rules1;
4493 UnicodeString rules2;
4494
4495 t1->toRules(rules1, TRUE);
4496 t2->toRules(rules2, TRUE);
4497 if (rules1 != rules2)
4498 errln("Alias transliterators aren't the same");
4499
4500 delete t1;
4501 delete t2;
4502 Transliterator::unregister(shortID);
4503
4504 t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4505 if (U_SUCCESS(err)) {
4506 errln("Instantiation with short ID succeeded after short ID was unregistered");
4507 delete t1;
4508 }
4509
4510 // try the same thing again, but this time with something other than
4511 // an instance of CompoundTransliterator
4512 UnicodeString realID("Latin-Greek");
4513 UnicodeString fakeID("Latin-dlgkjdflkjdl");
4514 Transliterator::registerAlias(fakeID, realID);
4515
4516 err = U_ZERO_ERROR;
4517 t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4518 if (U_FAILURE(err)) {
4519 dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
4520 Transliterator::unregister(realID);
4521 return;
4522 }
4523 t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4524 if (U_FAILURE(err)) {
4525 errln("Failed to instantiate transliterator with fake ID");
4526 delete t1;
4527 Transliterator::unregister(realID);
4528 return;
4529 }
4530
4531 t1->toRules(rules1, TRUE);
4532 t2->toRules(rules2, TRUE);
4533 if (rules1 != rules2)
4534 errln("Alias transliterators aren't the same");
4535
4536 delete t1;
4537 delete t2;
4538 Transliterator::unregister(fakeID);
4539 }
4540
4541 void TransliteratorTest::TestRuleStripping() {
4542 /*
4543 #
4544 \uE001>\u0C01; # SIGN
4545 */
4546 static const UChar rule[] = {
4547 0x0023,0x0020,0x000D,0x000A,
4548 0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4549 };
4550 static const UChar expectedRule[] = {
4551 0xE001,0x003E,0x0C01,0x003B,0
4552 };
4553 UChar result[UPRV_LENGTHOF(rule)];
4554 UErrorCode status = U_ZERO_ERROR;
4555 int32_t len = utrans_stripRules(rule, UPRV_LENGTHOF(rule), result, &status);
4556 if (len != u_strlen(expectedRule)) {
4557 errln("utrans_stripRules return len = %d", len);
4558 }
4559 if (u_strncmp(expectedRule, result, len) != 0) {
4560 errln("utrans_stripRules did not return expected string");
4561 }
4562 }
4563
4564 /**
4565 * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4566 */
4567 void TransliteratorTest::TestHalfwidthFullwidth(void) {
4568 UParseError parseError;
4569 UErrorCode status = U_ZERO_ERROR;
4570 Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
4571 Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
4572 if (hf == 0 || fh == 0) {
4573 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4574 delete hf;
4575 delete fh;
4576 return;
4577 }
4578
4579 // Array of 2n items
4580 // Each item is
4581 // "hf"|"fh"|"both",
4582 // <Halfwidth>,
4583 // <Fullwidth>
4584 const char* DATA[] = {
4585 "both",
4586 "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4587 "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4588 };
4589 int32_t DATA_length = UPRV_LENGTHOF(DATA);
4590
4591 for (int32_t i=0; i<DATA_length; i+=3) {
4592 UnicodeString h = CharsToUnicodeString(DATA[i+1]);
4593 UnicodeString f = CharsToUnicodeString(DATA[i+2]);
4594 switch (*DATA[i]) {
4595 case 0x68: //'h': // Halfwidth-Fullwidth only
4596 expect(*hf, h, f);
4597 break;
4598 case 0x66: //'f': // Fullwidth-Halfwidth only
4599 expect(*fh, f, h);
4600 break;
4601 case 0x62: //'b': // both directions
4602 expect(*hf, h, f);
4603 expect(*fh, f, h);
4604 break;
4605 }
4606 }
4607 delete hf;
4608 delete fh;
4609 }
4610
4611
4612 /**
4613 * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4614 * TODO: confirm that the expected results are correct.
4615 * For now, test just confirms that C++ and Java give identical results.
4616 */
4617 void TransliteratorTest::TestThai(void) {
4618 #if !UCONFIG_NO_BREAK_ITERATION
4619 UParseError parseError;
4620 UErrorCode status = U_ZERO_ERROR;
4621 Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
4622 if (tr == 0) {
4623 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4624 return;
4625 }
4626 if (U_FAILURE(status)) {
4627 errln("FAIL: createInstance failed with %s", u_errorName(status));
4628 return;
4629 }
4630 const char *thaiText =
4631 "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4632 "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4633 "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4634 "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4635 "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4636 "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4637 "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4638 "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4639 "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4640 "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4641 "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4642 "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4643 "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4644 "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4645 "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4646 "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4647 "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4648 "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4649 "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4650 "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4651 "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4652 "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4653 "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4654 "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4655 " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4656 "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4657 "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4658 " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4659 "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4660 "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4661
4662 const char *latinText =
4663 "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4664 "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4665 "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4666 "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4667 "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4668 " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4669 "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4670 "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4671 "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4672 "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4673 "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4674 "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4675 " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4676 "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4677 " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4678 "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4679 "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4680 "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4681
4682
4683 UnicodeString xlitText(thaiText);
4684 xlitText = xlitText.unescape();
4685 tr->transliterate(xlitText);
4686
4687 UnicodeString expectedText(latinText);
4688 expectedText = expectedText.unescape();
4689 expect(*tr, xlitText, expectedText);
4690
4691 delete tr;
4692 #endif
4693 }
4694
4695
4696 //======================================================================
4697 // Support methods
4698 //======================================================================
4699 void TransliteratorTest::expectT(const UnicodeString& id,
4700 const UnicodeString& source,
4701 const UnicodeString& expectedResult) {
4702 UErrorCode ec = U_ZERO_ERROR;
4703 UParseError pe;
4704 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4705 if (U_FAILURE(ec)) {
4706 errln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(ec));
4707 delete t;
4708 return;
4709 }
4710 expect(*t, source, expectedResult);
4711 delete t;
4712 }
4713
4714 void TransliteratorTest::reportParseError(const UnicodeString& message,
4715 const UParseError& parseError,
4716 const UErrorCode& status) {
4717 dataerrln(message +
4718 /*", parse error " + parseError.code +*/
4719 ", line " + parseError.line +
4720 ", offset " + parseError.offset +
4721 ", pre-context " + prettify(parseError.preContext, TRUE) +
4722 ", post-context " + prettify(parseError.postContext,TRUE) +
4723 ", Error: " + u_errorName(status));
4724 }
4725
4726 void TransliteratorTest::expect(const UnicodeString& rules,
4727 const UnicodeString& source,
4728 const UnicodeString& expectedResult,
4729 UTransPosition *pos) {
4730 expect("<ID>", rules, source, expectedResult, pos);
4731 }
4732
4733 void TransliteratorTest::expect(const UnicodeString& id,
4734 const UnicodeString& rules,
4735 const UnicodeString& source,
4736 const UnicodeString& expectedResult,
4737 UTransPosition *pos) {
4738 UErrorCode status = U_ZERO_ERROR;
4739 UParseError parseError;
4740 Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
4741 if (U_FAILURE(status)) {
4742 reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
4743 } else {
4744 expect(*t, source, expectedResult, pos);
4745 }
4746 delete t;
4747 }
4748
4749 void TransliteratorTest::expect(const Transliterator& t,
4750 const UnicodeString& source,
4751 const UnicodeString& expectedResult,
4752 const Transliterator& reverseTransliterator) {
4753 expect(t, source, expectedResult);
4754 expect(reverseTransliterator, expectedResult, source);
4755 }
4756
4757 void TransliteratorTest::expect(const Transliterator& t,
4758 const UnicodeString& source,
4759 const UnicodeString& expectedResult,
4760 UTransPosition *pos) {
4761 if (pos == 0) {
4762 UnicodeString result(source);
4763 t.transliterate(result);
4764 expectAux(t.getID() + ":String", source, result, expectedResult);
4765 }
4766 UTransPosition index={0, 0, 0, 0};
4767 if (pos != 0) {
4768 index = *pos;
4769 }
4770
4771 UnicodeString rsource(source);
4772 if (pos == 0) {
4773 t.transliterate(rsource);
4774 } else {
4775 // Do it all at once -- below we do it incrementally
4776 t.finishTransliteration(rsource, *pos);
4777 }
4778 expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4779
4780 // Test keyboard (incremental) transliteration -- this result
4781 // must be the same after we finalize (see below).
4782 UnicodeString log;
4783 rsource.remove();
4784 if (pos != 0) {
4785 rsource = source;
4786 formatInput(log, rsource, index);
4787 log.append(" -> ");
4788 UErrorCode status = U_ZERO_ERROR;
4789 t.transliterate(rsource, index, status);
4790 formatInput(log, rsource, index);
4791 } else {
4792 for (int32_t i=0; i<source.length(); ++i) {
4793 if (i != 0) {
4794 log.append(" + ");
4795 }
4796 log.append(source.charAt(i)).append(" -> ");
4797 UErrorCode status = U_ZERO_ERROR;
4798 t.transliterate(rsource, index, source.charAt(i), status);
4799 formatInput(log, rsource, index);
4800 }
4801 }
4802
4803 // As a final step in keyboard transliteration, we must call
4804 // transliterate to finish off any pending partial matches that
4805 // were waiting for more input.
4806 t.finishTransliteration(rsource, index);
4807 log.append(" => ").append(rsource);
4808
4809 expectAux(t.getID() + ":Keyboard", log,
4810 rsource == expectedResult,
4811 expectedResult);
4812 }
4813
4814
4815 /**
4816 * @param appendTo result is appended to this param.
4817 * @param input the string being transliterated
4818 * @param pos the index struct
4819 */
4820 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4821 const UnicodeString& input,
4822 const UTransPosition& pos) {
4823 // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4824 // the {} indicate the context start and limit, and the ||
4825 // indicate the start and limit.
4826 if (0 <= pos.contextStart &&
4827 pos.contextStart <= pos.start &&
4828 pos.start <= pos.limit &&
4829 pos.limit <= pos.contextLimit &&
4830 pos.contextLimit <= input.length()) {
4831
4832 UnicodeString a, b, c, d, e;
4833 input.extractBetween(0, pos.contextStart, a);
4834 input.extractBetween(pos.contextStart, pos.start, b);
4835 input.extractBetween(pos.start, pos.limit, c);
4836 input.extractBetween(pos.limit, pos.contextLimit, d);
4837 input.extractBetween(pos.contextLimit, input.length(), e);
4838 appendTo.append(a).append((UChar)123/*{*/).append(b).
4839 append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4840 append((UChar)125/*}*/).append(e);
4841 } else {
4842 appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4843 pos.contextStart + ", s=" + pos.start + ", l=" +
4844 pos.limit + ", cl=" + pos.contextLimit + "} on " +
4845 input);
4846 }
4847 return appendTo;
4848 }
4849
4850 void TransliteratorTest::expectAux(const UnicodeString& tag,
4851 const UnicodeString& source,
4852 const UnicodeString& result,
4853 const UnicodeString& expectedResult) {
4854 expectAux(tag, source + " -> " + result,
4855 result == expectedResult,
4856 expectedResult);
4857 }
4858
4859 void TransliteratorTest::expectAux(const UnicodeString& tag,
4860 const UnicodeString& summary, UBool pass,
4861 const UnicodeString& expectedResult) {
4862 if (pass) {
4863 logln(UnicodeString("(")+tag+") " + prettify(summary));
4864 } else {
4865 dataerrln(UnicodeString("FAIL: (")+tag+") "
4866 + prettify(summary)
4867 + ", expected " + prettify(expectedResult));
4868 }
4869 }
4870
4871 #endif /* #if !UCONFIG_NO_TRANSLITERATION */