]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/transtst.cpp
ICU-8.11.4.tar.gz
[apple/icu.git] / icuSources / test / intltest / transtst.cpp
1 /*
2 **********************************************************************
3 * Copyright (C) 1999-2006, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 11/10/99 aliu Creation.
8 **********************************************************************
9 */
10
11 #include "unicode/utypes.h"
12
13 #if !UCONFIG_NO_TRANSLITERATION
14
15 #include "transtst.h"
16 #include "unicode/locid.h"
17 #include "unicode/dtfmtsym.h"
18 #include "unicode/normlzr.h"
19 #include "unicode/translit.h"
20 #include "unicode/uchar.h"
21 #include "unicode/unifilt.h"
22 #include "unicode/uniset.h"
23 #include "unicode/ustring.h"
24 #include "unicode/usetiter.h"
25 #include "unicode/uscript.h"
26 #include "cpdtrans.h"
27 #include "nultrans.h"
28 #include "rbt.h"
29 #include "anytrans.h"
30 #include "esctrn.h"
31 #include "name2uni.h"
32 #include "nortrans.h"
33 #include "remtrans.h"
34 #include "titletrn.h"
35 #include "tolowtrn.h"
36 #include "toupptrn.h"
37 #include "unesctrn.h"
38 #include "uni2name.h"
39 #include "cstring.h"
40 #include "cmemory.h"
41 #include <stdio.h>
42
43 /***********************************************************************
44
45 HOW TO USE THIS TEST FILE
46 -or-
47 How I developed on two platforms
48 without losing (too much of) my mind
49
50
51 1. Add new tests by copying/pasting/changing existing tests. On Java,
52 any public void method named Test...() taking no parameters becomes
53 a test. On C++, you need to modify the header and add a line to
54 the runIndexedTest() dispatch method.
55
56 2. Make liberal use of the expect() method; it is your friend.
57
58 3. The tests in this file exactly match those in a sister file on the
59 other side. The two files are:
60
61 icu4j: src/com/ibm/test/translit/TransliteratorTest.java
62 icu4c: source/test/intltest/transtst.cpp
63
64 ==> THIS IS THE IMPORTANT PART <==
65
66 When you add a test in this file, add it in TransliteratorTest.java
67 too. Give it the same name and put it in the same relative place.
68 This makes maintenance a lot simpler for any poor soul who ends up
69 trying to synchronize the tests between icu4j and icu4c.
70
71 4. If you MUST enter a test that is NOT paralleled in the sister file,
72 then add it in the special non-mirrored section. These are
73 labeled
74
75 "icu4j ONLY"
76
77 or
78
79 "icu4c ONLY"
80
81 Make sure you document the reason the test is here and not there.
82
83
84 Thank you.
85 The Management
86 ***********************************************************************/
87
88 // Define character constants thusly to be EBCDIC-friendly
89 enum {
90 LEFT_BRACE=((UChar)0x007B), /*{*/
91 PIPE =((UChar)0x007C), /*|*/
92 ZERO =((UChar)0x0030), /*0*/
93 UPPER_A =((UChar)0x0041) /*A*/
94 };
95
96 TransliteratorTest::TransliteratorTest()
97 : DESERET_DEE((UChar32)0x10414),
98 DESERET_dee((UChar32)0x1043C)
99 {
100 }
101
102 TransliteratorTest::~TransliteratorTest() {}
103
104 void
105 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
106 const char* &name, char* /*par*/) {
107 switch (index) {
108 TESTCASE(0,TestInstantiation);
109 TESTCASE(1,TestSimpleRules);
110 TESTCASE(2,TestRuleBasedInverse);
111 TESTCASE(3,TestKeyboard);
112 TESTCASE(4,TestKeyboard2);
113 TESTCASE(5,TestKeyboard3);
114 TESTCASE(6,TestArabic);
115 TESTCASE(7,TestCompoundKana);
116 TESTCASE(8,TestCompoundHex);
117 TESTCASE(9,TestFiltering);
118 TESTCASE(10,TestInlineSet);
119 TESTCASE(11,TestPatternQuoting);
120 TESTCASE(12,TestJ277);
121 TESTCASE(13,TestJ243);
122 TESTCASE(14,TestJ329);
123 TESTCASE(15,TestSegments);
124 TESTCASE(16,TestCursorOffset);
125 TESTCASE(17,TestArbitraryVariableValues);
126 TESTCASE(18,TestPositionHandling);
127 TESTCASE(19,TestHiraganaKatakana);
128 TESTCASE(20,TestCopyJ476);
129 TESTCASE(21,TestAnchors);
130 TESTCASE(22,TestInterIndic);
131 TESTCASE(23,TestFilterIDs);
132 TESTCASE(24,TestCaseMap);
133 TESTCASE(25,TestNameMap);
134 TESTCASE(26,TestLiberalizedID);
135 TESTCASE(27,TestCreateInstance);
136 TESTCASE(28,TestNormalizationTransliterator);
137 TESTCASE(29,TestCompoundRBT);
138 TESTCASE(30,TestCompoundFilter);
139 TESTCASE(31,TestRemove);
140 TESTCASE(32,TestToRules);
141 TESTCASE(33,TestContext);
142 TESTCASE(34,TestSupplemental);
143 TESTCASE(35,TestQuantifier);
144 TESTCASE(36,TestSTV);
145 TESTCASE(37,TestCompoundInverse);
146 TESTCASE(38,TestNFDChainRBT);
147 TESTCASE(39,TestNullInverse);
148 TESTCASE(40,TestAliasInverseID);
149 TESTCASE(41,TestCompoundInverseID);
150 TESTCASE(42,TestUndefinedVariable);
151 TESTCASE(43,TestEmptyContext);
152 TESTCASE(44,TestCompoundFilterID);
153 TESTCASE(45,TestPropertySet);
154 TESTCASE(46,TestNewEngine);
155 TESTCASE(47,TestQuantifiedSegment);
156 TESTCASE(48,TestDevanagariLatinRT);
157 TESTCASE(49,TestTeluguLatinRT);
158 TESTCASE(50,TestCompoundLatinRT);
159 TESTCASE(51,TestSanskritLatinRT);
160 TESTCASE(52,TestLocaleInstantiation);
161 TESTCASE(53,TestTitleAccents);
162 TESTCASE(54,TestLocaleResource);
163 TESTCASE(55,TestParseError);
164 TESTCASE(56,TestOutputSet);
165 TESTCASE(57,TestVariableRange);
166 TESTCASE(58,TestInvalidPostContext);
167 TESTCASE(59,TestIDForms);
168 TESTCASE(60,TestToRulesMark);
169 TESTCASE(61,TestEscape);
170 TESTCASE(62,TestAnchorMasking);
171 TESTCASE(63,TestDisplayName);
172 TESTCASE(64,TestSpecialCases);
173 TESTCASE(65,TestIncrementalProgress);
174 TESTCASE(66,TestSurrogateCasing);
175 TESTCASE(67,TestFunction);
176 TESTCASE(68,TestInvalidBackRef);
177 TESTCASE(69,TestMulticharStringSet);
178 TESTCASE(70,TestUserFunction);
179 TESTCASE(71,TestAnyX);
180 TESTCASE(72,TestSourceTargetSet);
181 TESTCASE(73,TestGurmukhiDevanagari);
182 TESTCASE(74,TestRuleWhitespace);
183 TESTCASE(75,TestAllCodepoints);
184 TESTCASE(76,TestBoilerplate);
185 TESTCASE(77,TestAlternateSyntax);
186 TESTCASE(78,TestBeginEnd);
187 TESTCASE(79,TestBeginEndToRules);
188 TESTCASE(80,TestRegisterAlias);
189 default: name = ""; break;
190 }
191 }
192
193 static const UVersionInfo ICU_37 = {3,7,0,0};
194 /**
195 * Make sure every system transliterator can be instantiated.
196 *
197 * ALSO test that the result of toRules() for each rule is a valid
198 * rule. Do this here so we don't have to have another test that
199 * instantiates everything as well.
200 */
201 void TransliteratorTest::TestInstantiation() {
202 UErrorCode ec = U_ZERO_ERROR;
203 StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
204 assertSuccess("getAvailableIDs()", ec);
205 assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
206 int32_t n = Transliterator::countAvailableIDs();
207 assertTrue("getAvailableIDs().count()==countAvailableIDs()",
208 avail->count(ec) == n);
209 assertSuccess("count()", ec);
210 UnicodeString name;
211 for (int32_t i=0; i<n; ++i) {
212 const UnicodeString& id = *avail->snext(ec);
213 if (!assertSuccess("snext()", ec) ||
214 !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
215 break;
216 }
217 UnicodeString id2 = Transliterator::getAvailableID(i);
218 if (id.length() < 1) {
219 errln(UnicodeString("FAIL: getAvailableID(") +
220 i + ") returned empty string");
221 continue;
222 }
223 if (id != id2) {
224 errln(UnicodeString("FAIL: getAvailableID(") +
225 i + ") != getAvailableIDs().snext()");
226 continue;
227 }
228 if(id2.indexOf("Thai")>-1 && !isICUVersionAtLeast(ICU_37)){
229 /* The Thai-Latin transliterator doesn't exist in ICU4C yet */
230 continue;
231 }
232 UParseError parseError;
233 UErrorCode status = U_ZERO_ERROR;
234 Transliterator* t = Transliterator::createInstance(id,
235 UTRANS_FORWARD, parseError,status);
236 name.truncate(0);
237 Transliterator::getDisplayName(id, name);
238 if (t == 0) {
239 errln(UnicodeString("FAIL: Couldn't create ") + id +
240 /*", parse error " + parseError.code +*/
241 ", line " + parseError.line +
242 ", offset " + parseError.offset +
243 ", pre-context " + prettify(parseError.preContext, TRUE) +
244 ", post-context " +prettify(parseError.postContext,TRUE) +
245 ", Error: " + u_errorName(status));
246 // When createInstance fails, it deletes the failing
247 // entry from the available ID list. We detect this
248 // here by looking for a change in countAvailableIDs.
249 int32_t nn = Transliterator::countAvailableIDs();
250 if (nn == (n - 1)) {
251 n = nn;
252 --i; // Compensate for deleted entry
253 }
254 } else {
255 logln(UnicodeString("OK: ") + name + " (" + id + ")");
256
257 // Now test toRules
258 UnicodeString rules;
259 t->toRules(rules, TRUE);
260 Transliterator *u = Transliterator::createFromRules("x",
261 rules, UTRANS_FORWARD, parseError,status);
262 if (u == 0) {
263 errln(UnicodeString("FAIL: ") + id +
264 ".createFromRules() => bad rules" +
265 /*", parse error " + parseError.code +*/
266 ", line " + parseError.line +
267 ", offset " + parseError.offset +
268 ", context " + prettify(parseError.preContext, TRUE) +
269 ", rules: " + prettify(rules, TRUE));
270 } else {
271 delete u;
272 }
273 delete t;
274 }
275 }
276 assertTrue("snext()==NULL", avail->snext(ec)==NULL);
277 assertSuccess("snext()", ec);
278 delete avail;
279
280 // Now test the failure path
281 UParseError parseError;
282 UErrorCode status = U_ZERO_ERROR;
283 UnicodeString id("<Not a valid Transliterator ID>");
284 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
285 if (t != 0) {
286 errln("FAIL: " + id + " returned a transliterator");
287 delete t;
288 } else {
289 logln("OK: Bogus ID handled properly");
290 }
291 }
292
293 void TransliteratorTest::TestSimpleRules(void) {
294 /* Example: rules 1. ab>x|y
295 * 2. yc>z
296 *
297 * []|eabcd start - no match, copy e to tranlated buffer
298 * [e]|abcd match rule 1 - copy output & adjust cursor
299 * [ex|y]cd match rule 2 - copy output & adjust cursor
300 * [exz]|d no match, copy d to transliterated buffer
301 * [exzd]| done
302 */
303 expect(UnicodeString("ab>x|y;", "") +
304 "yc>z",
305 "eabcd", "exzd");
306
307 /* Another set of rules:
308 * 1. ab>x|yzacw
309 * 2. za>q
310 * 3. qc>r
311 * 4. cw>n
312 *
313 * []|ab Rule 1
314 * [x|yzacw] No match
315 * [xy|zacw] Rule 2
316 * [xyq|cw] Rule 4
317 * [xyqn]| Done
318 */
319 expect(UnicodeString("ab>x|yzacw;") +
320 "za>q;" +
321 "qc>r;" +
322 "cw>n",
323 "ab", "xyqn");
324
325 /* Test categories
326 */
327 UErrorCode status = U_ZERO_ERROR;
328 RuleBasedTransliterator t(
329 "<ID>",
330 UnicodeString("$dummy=").append((UChar)0xE100) +
331 UnicodeString(";"
332 "$vowel=[aeiouAEIOU];"
333 "$lu=[:Lu:];"
334 "$vowel } $lu > '!';"
335 "$vowel > '&';"
336 "'!' { $lu > '^';"
337 "$lu > '*';"
338 "a > ERROR", ""),
339 status);
340 if (U_FAILURE(status)) {
341 errln("FAIL: RBT constructor failed");
342 return;
343 }
344 expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
345 }
346
347 /**
348 * Test inline set syntax and set variable syntax.
349 */
350 void TransliteratorTest::TestInlineSet(void) {
351 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
352 expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
353
354 expect(UnicodeString(
355 "$digit = [0-9];"
356 "$alpha = [a-zA-Z];"
357 "$alphanumeric = [$digit $alpha];" // ***
358 "$special = [^$alphanumeric];" // ***
359 "$alphanumeric > '-';"
360 "$special > '*';", ""),
361
362 "thx-1138", "---*----");
363 }
364
365 /**
366 * Create some inverses and confirm that they work. We have to be
367 * careful how we do this, since the inverses will not be true
368 * inverses -- we can't throw any random string at the composition
369 * of the transliterators and expect the identity function. F x
370 * F' != I. However, if we are careful about the input, we will
371 * get the expected results.
372 */
373 void TransliteratorTest::TestRuleBasedInverse(void) {
374 UnicodeString RULES =
375 UnicodeString("abc>zyx;") +
376 "ab>yz;" +
377 "bc>zx;" +
378 "ca>xy;" +
379 "a>x;" +
380 "b>y;" +
381 "c>z;" +
382
383 "abc<zyx;" +
384 "ab<yz;" +
385 "bc<zx;" +
386 "ca<xy;" +
387 "a<x;" +
388 "b<y;" +
389 "c<z;" +
390
391 "";
392
393 const char* DATA[] = {
394 // Careful here -- random strings will not work. If we keep
395 // the left side to the domain and the right side to the range
396 // we will be okay though (left, abc; right xyz).
397 "a", "x",
398 "abcacab", "zyxxxyy",
399 "caccb", "xyzzy",
400 };
401
402 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
403
404 UErrorCode status = U_ZERO_ERROR;
405 RuleBasedTransliterator fwd("<ID>", RULES, status);
406 RuleBasedTransliterator rev("<ID>", RULES,
407 UTRANS_REVERSE, status);
408 if (U_FAILURE(status)) {
409 errln("FAIL: RBT constructor failed");
410 return;
411 }
412 for (int32_t i=0; i<DATA_length; i+=2) {
413 expect(fwd, DATA[i], DATA[i+1]);
414 expect(rev, DATA[i+1], DATA[i]);
415 }
416 }
417
418 /**
419 * Basic test of keyboard.
420 */
421 void TransliteratorTest::TestKeyboard(void) {
422 UErrorCode status = U_ZERO_ERROR;
423 RuleBasedTransliterator t("<ID>",
424 UnicodeString("psch>Y;")
425 +"ps>y;"
426 +"ch>x;"
427 +"a>A;",
428 status);
429 if (U_FAILURE(status)) {
430 errln("FAIL: RBT constructor failed");
431 return;
432 }
433 const char* DATA[] = {
434 // insertion, buffer
435 "a", "A",
436 "p", "Ap",
437 "s", "Aps",
438 "c", "Apsc",
439 "a", "AycA",
440 "psch", "AycAY",
441 0, "AycAY", // null means finishKeyboardTransliteration
442 };
443
444 keyboardAux(t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
445 }
446
447 /**
448 * Basic test of keyboard with cursor.
449 */
450 void TransliteratorTest::TestKeyboard2(void) {
451 UErrorCode status = U_ZERO_ERROR;
452 RuleBasedTransliterator t("<ID>",
453 UnicodeString("ych>Y;")
454 +"ps>|y;"
455 +"ch>x;"
456 +"a>A;",
457 status);
458 if (U_FAILURE(status)) {
459 errln("FAIL: RBT constructor failed");
460 return;
461 }
462 const char* DATA[] = {
463 // insertion, buffer
464 "a", "A",
465 "p", "Ap",
466 "s", "Aps", // modified for rollback - "Ay",
467 "c", "Apsc", // modified for rollback - "Ayc",
468 "a", "AycA",
469 "p", "AycAp",
470 "s", "AycAps", // modified for rollback - "AycAy",
471 "c", "AycApsc", // modified for rollback - "AycAyc",
472 "h", "AycAY",
473 0, "AycAY", // null means finishKeyboardTransliteration
474 };
475
476 keyboardAux(t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
477 }
478
479 /**
480 * Test keyboard transliteration with back-replacement.
481 */
482 void TransliteratorTest::TestKeyboard3(void) {
483 // We want th>z but t>y. Furthermore, during keyboard
484 // transliteration we want t>y then yh>z if t, then h are
485 // typed.
486 UnicodeString RULES("t>|y;"
487 "yh>z;");
488
489 const char* DATA[] = {
490 // Column 1: characters to add to buffer (as if typed)
491 // Column 2: expected appearance of buffer after
492 // keyboard xliteration.
493 "a", "a",
494 "b", "ab",
495 "t", "abt", // modified for rollback - "aby",
496 "c", "abyc",
497 "t", "abyct", // modified for rollback - "abycy",
498 "h", "abycz",
499 0, "abycz", // null means finishKeyboardTransliteration
500 };
501
502 UErrorCode status = U_ZERO_ERROR;
503 RuleBasedTransliterator t("<ID>", RULES, status);
504 if (U_FAILURE(status)) {
505 errln("FAIL: RBT constructor failed");
506 return;
507 }
508 keyboardAux(t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
509 }
510
511 void TransliteratorTest::keyboardAux(const Transliterator& t,
512 const char* DATA[], int32_t DATA_length) {
513 UErrorCode status = U_ZERO_ERROR;
514 UTransPosition index={0, 0, 0, 0};
515 UnicodeString s;
516 for (int32_t i=0; i<DATA_length; i+=2) {
517 UnicodeString log;
518 if (DATA[i] != 0) {
519 log = s + " + "
520 + DATA[i]
521 + " -> ";
522 t.transliterate(s, index, DATA[i], status);
523 } else {
524 log = s + " => ";
525 t.finishTransliteration(s, index);
526 }
527 // Show the start index '{' and the cursor '|'
528 UnicodeString a, b, c;
529 s.extractBetween(0, index.contextStart, a);
530 s.extractBetween(index.contextStart, index.start, b);
531 s.extractBetween(index.start, s.length(), c);
532 log.append(a).
533 append((UChar)LEFT_BRACE).
534 append(b).
535 append((UChar)PIPE).
536 append(c);
537 if (s == DATA[i+1] && U_SUCCESS(status)) {
538 logln(log);
539 } else {
540 errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
541 }
542 }
543 }
544
545 void TransliteratorTest::TestArabic(void) {
546 // Test disabled for 2.0 until new Arabic transliterator can be written.
547 // /*
548 // const char* DATA[] = {
549 // "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
550 // "\u0627\u0644\u0644\u063a\u0629\u0020"+
551 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
552 // "\u0628\u0628\u0646\u0638\u0645\u0020"+
553 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
554 // "\u062c\u0645\u064a\u0644\u0629",
555 // };
556 // */
557 //
558 // UChar ar_raw[] = {
559 // 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
560 // 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
561 // 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
562 // 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
563 // 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
564 // 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
565 // };
566 // UnicodeString ar(ar_raw);
567 // UErrorCode status=U_ZERO_ERROR;
568 // UParseError parseError;
569 // Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
570 // if (t == 0) {
571 // errln("FAIL: createInstance failed");
572 // return;
573 // }
574 // expect(*t, "Arabic", ar);
575 // delete t;
576 }
577
578 /**
579 * Compose the Kana transliterator forward and reverse and try
580 * some strings that should come out unchanged.
581 */
582 void TransliteratorTest::TestCompoundKana(void) {
583 UParseError parseError;
584 UErrorCode status = U_ZERO_ERROR;
585 Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
586 if (t == 0) {
587 errln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed");
588 } else {
589 expect(*t, "aaaaa", "aaaaa");
590 delete t;
591 }
592 }
593
594 /**
595 * Compose the hex transliterators forward and reverse.
596 */
597 void TransliteratorTest::TestCompoundHex(void) {
598 UParseError parseError;
599 UErrorCode status = U_ZERO_ERROR;
600 Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
601 Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
602 Transliterator* transab[] = { a, b };
603 Transliterator* transba[] = { b, a };
604 if (a == 0 || b == 0) {
605 errln("FAIL: construction failed");
606 delete a;
607 delete b;
608 return;
609 }
610 // Do some basic tests of a
611 expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
612 // Do some basic tests of b
613 expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
614
615 Transliterator* ab = new CompoundTransliterator(transab, 2);
616 UnicodeString s("abcde", "");
617 expect(*ab, s, s);
618
619 UnicodeString str(s);
620 a->transliterate(str);
621 Transliterator* ba = new CompoundTransliterator(transba, 2);
622 expect(*ba, str, str);
623
624 delete ab;
625 delete ba;
626 delete a;
627 delete b;
628 }
629
630 int gTestFilterClassID = 0;
631 /**
632 * Used by TestFiltering().
633 */
634 class TestFilter : public UnicodeFilter {
635 virtual UnicodeFunctor* clone() const {
636 return new TestFilter(*this);
637 }
638 virtual UBool contains(UChar32 c) const {
639 return c != (UChar)0x0063 /*c*/;
640 }
641 // Stubs
642 virtual UnicodeString& toPattern(UnicodeString& result,
643 UBool /*escapeUnprintable*/) const {
644 return result;
645 }
646 virtual UBool matchesIndexValue(uint8_t /*v*/) const {
647 return FALSE;
648 }
649 virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
650 public:
651 UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
652 };
653
654 /**
655 * Do some basic tests of filtering.
656 */
657 void TransliteratorTest::TestFiltering(void) {
658 UParseError parseError;
659 UErrorCode status = U_ZERO_ERROR;
660 Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
661 if (hex == 0) {
662 errln("FAIL: createInstance(Any-Hex) failed");
663 return;
664 }
665 hex->adoptFilter(new TestFilter());
666 UnicodeString s("abcde");
667 hex->transliterate(s);
668 UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
669 if (s == exp) {
670 logln(UnicodeString("Ok: \"") + exp + "\"");
671 } else {
672 logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
673 }
674
675 // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
676 UnicodeFilter *f = hex->orphanFilter();
677 if (f == NULL){
678 errln("FAIL: orphanFilter() should get a UnicodeFilter");
679 } else {
680 delete f;
681 }
682 delete hex;
683 }
684
685 /**
686 * Test anchors
687 */
688 void TransliteratorTest::TestAnchors(void) {
689 expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""),
690 "aaa",
691 "012");
692 expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
693 "aaa",
694 "012");
695 expect(UnicodeString("^ab > 01 ;"
696 " ab > |8 ;"
697 " b > k ;"
698 " 8x$ > 45 ;"
699 " 8x > 77 ;", ""),
700
701 "ababbabxabx",
702 "018k7745");
703 expect(UnicodeString("$s = [z$] ;"
704 "$s{ab > 01 ;"
705 " ab > |8 ;"
706 " b > k ;"
707 " 8x}$s > 45 ;"
708 " 8x > 77 ;", ""),
709
710 "abzababbabxzabxabx",
711 "01z018k45z01x45");
712 }
713
714 /**
715 * Test pattern quoting and escape mechanisms.
716 */
717 void TransliteratorTest::TestPatternQuoting(void) {
718 // Array of 3n items
719 // Each item is <rules>, <input>, <expected output>
720 const UnicodeString DATA[] = {
721 UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
722 UnicodeString(UChar(0x4E01)),
723 "[male adult]"
724 };
725
726 for (int32_t i=0; i<3; i+=3) {
727 logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
728 UErrorCode status = U_ZERO_ERROR;
729 RuleBasedTransliterator t("<ID>", DATA[i], status);
730 if (U_FAILURE(status)) {
731 errln("RBT constructor failed");
732 } else {
733 expect(t, DATA[i+1], DATA[i+2]);
734 }
735 }
736 }
737
738 /**
739 * Regression test for bugs found in Greek transliteration.
740 */
741 void TransliteratorTest::TestJ277(void) {
742 UErrorCode status = U_ZERO_ERROR;
743 UParseError parseError;
744 Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
745 if (gl == NULL) {
746 errln("FAIL: createInstance(Greek-Latin) returned NULL");
747 return;
748 }
749
750 UChar sigma = 0x3C3;
751 UChar upsilon = 0x3C5;
752 UChar nu = 0x3BD;
753 // UChar PHI = 0x3A6;
754 UChar alpha = 0x3B1;
755 // UChar omega = 0x3C9;
756 // UChar omicron = 0x3BF;
757 // UChar epsilon = 0x3B5;
758
759 // sigma upsilon nu -> syn
760 UnicodeString syn;
761 syn.append(sigma).append(upsilon).append(nu);
762 expect(*gl, syn, "syn");
763
764 // sigma alpha upsilon nu -> saun
765 UnicodeString sayn;
766 sayn.append(sigma).append(alpha).append(upsilon).append(nu);
767 expect(*gl, sayn, "saun");
768
769 // Again, using a smaller rule set
770 UnicodeString rules(
771 "$alpha = \\u03B1;"
772 "$nu = \\u03BD;"
773 "$sigma = \\u03C3;"
774 "$ypsilon = \\u03C5;"
775 "$vowel = [aeiouAEIOU$alpha$ypsilon];"
776 "s <> $sigma;"
777 "a <> $alpha;"
778 "u <> $vowel { $ypsilon;"
779 "y <> $ypsilon;"
780 "n <> $nu;",
781 "");
782 RuleBasedTransliterator mini("mini", rules, UTRANS_REVERSE, status);
783 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
784 expect(mini, syn, "syn");
785 expect(mini, sayn, "saun");
786
787 #if !UCONFIG_NO_FORMATTING
788 // Transliterate the Greek locale data
789 Locale el("el");
790 DateFormatSymbols syms(el, status);
791 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
792 int32_t i, count;
793 const UnicodeString* data = syms.getMonths(count);
794 for (i=0; i<count; ++i) {
795 if (data[i].length() == 0) {
796 continue;
797 }
798 UnicodeString out(data[i]);
799 gl->transliterate(out);
800 UBool ok = TRUE;
801 if (data[i].length() >= 2 && out.length() >= 2 &&
802 u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
803 if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
804 ok = FALSE;
805 }
806 }
807 if (ok) {
808 logln(prettify(data[i] + " -> " + out));
809 } else {
810 errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
811 }
812 }
813 #endif
814
815 delete gl;
816 }
817
818 /**
819 * Prefix, suffix support in hex transliterators
820 */
821 void TransliteratorTest::TestJ243(void) {
822 UErrorCode ec = U_ZERO_ERROR;
823
824 // Test default Hex-Any, which should handle
825 // \u, \U, u+, and U+
826 Transliterator *hex =
827 Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
828 if (assertSuccess("getInstance", ec)) {
829 expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
830 }
831 delete hex;
832
833 // // Try a custom Hex-Unicode
834 // // \uXXXX and &#xXXXX;
835 // ec = U_ZERO_ERROR;
836 // HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
837 // expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
838 // "abcd5fx012&#x00033;");
839 // // Try custom Any-Hex (default is tested elsewhere)
840 // ec = U_ZERO_ERROR;
841 // UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
842 // expect(hex3, "012", "&#x30;&#x31;&#x32;");
843 }
844
845 /**
846 * Parsers need better syntax error messages.
847 */
848 void TransliteratorTest::TestJ329(void) {
849
850 struct { UBool containsErrors; const char* rule; } DATA[] = {
851 { FALSE, "a > b; c > d" },
852 { TRUE, "a > b; no operator; c > d" },
853 };
854 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
855
856 for (int32_t i=0; i<DATA_length; ++i) {
857 UErrorCode status = U_ZERO_ERROR;
858 UParseError parseError;
859 RuleBasedTransliterator rbt("<ID>",
860 DATA[i].rule,
861 UTRANS_FORWARD,
862 0,
863 parseError,
864 status);
865 UBool gotError = U_FAILURE(status);
866 UnicodeString desc(DATA[i].rule);
867 desc.append(gotError ? " -> error" : " -> no error");
868 if (gotError) {
869 desc = desc + ", ParseError code=" + u_errorName(status) +
870 " line=" + parseError.line +
871 " offset=" + parseError.offset +
872 " context=" + parseError.preContext;
873 }
874 if (gotError == DATA[i].containsErrors) {
875 logln(UnicodeString("Ok: ") + desc);
876 } else {
877 errln(UnicodeString("FAIL: ") + desc);
878 }
879 }
880 }
881
882 /**
883 * Test segments and segment references.
884 */
885 void TransliteratorTest::TestSegments(void) {
886 // Array of 3n items
887 // Each item is <rules>, <input>, <expected output>
888 UnicodeString DATA[] = {
889 "([a-z]) '.' ([0-9]) > $2 '-' $1",
890 "abc.123.xyz.456",
891 "ab1-c23.xy4-z56",
892
893 // nested
894 "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
895 "a1 b2",
896 "a1.a.1 b2.b.2",
897 };
898 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
899
900 for (int32_t i=0; i<DATA_length; i+=3) {
901 logln("Pattern: " + prettify(DATA[i]));
902 UErrorCode status = U_ZERO_ERROR;
903 RuleBasedTransliterator t("ID", DATA[i], status);
904 if (U_FAILURE(status)) {
905 errln("FAIL: RBT constructor");
906 } else {
907 expect(t, DATA[i+1], DATA[i+2]);
908 }
909 }
910 }
911
912 /**
913 * Test cursor positioning outside of the key
914 */
915 void TransliteratorTest::TestCursorOffset(void) {
916 // Array of 3n items
917 // Each item is <rules>, <input>, <expected output>
918 UnicodeString DATA[] = {
919 "pre {alpha} post > | @ ALPHA ;"
920 "eALPHA > beta ;"
921 "pre {beta} post > BETA @@ | ;"
922 "post > xyz",
923
924 "prealphapost prebetapost",
925
926 "prbetaxyz preBETApost",
927 };
928 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
929
930 for (int32_t i=0; i<DATA_length; i+=3) {
931 logln("Pattern: " + prettify(DATA[i]));
932 UErrorCode status = U_ZERO_ERROR;
933 RuleBasedTransliterator t("<ID>", DATA[i], status);
934 if (U_FAILURE(status)) {
935 errln("FAIL: RBT constructor");
936 } else {
937 expect(t, DATA[i+1], DATA[i+2]);
938 }
939 }
940 }
941
942 /**
943 * Test zero length and > 1 char length variable values. Test
944 * use of variable refs in UnicodeSets.
945 */
946 void TransliteratorTest::TestArbitraryVariableValues(void) {
947 // Array of 3n items
948 // Each item is <rules>, <input>, <expected output>
949 UnicodeString DATA[] = {
950 "$abe = ab;"
951 "$pat = x[yY]z;"
952 "$ll = 'a-z';"
953 "$llZ = [$ll];"
954 "$llY = [$ll$pat];"
955 "$emp = ;"
956
957 "$abe > ABE;"
958 "$pat > END;"
959 "$llZ > 1;"
960 "$llY > 2;"
961 "7$emp 8 > 9;"
962 "",
963
964 "ab xYzxyz stY78",
965 "ABE ENDEND 1129",
966 };
967 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
968
969 for (int32_t i=0; i<DATA_length; i+=3) {
970 logln("Pattern: " + prettify(DATA[i]));
971 UErrorCode status = U_ZERO_ERROR;
972 RuleBasedTransliterator t("<ID>", DATA[i], status);
973 if (U_FAILURE(status)) {
974 errln("FAIL: RBT constructor");
975 } else {
976 expect(t, DATA[i+1], DATA[i+2]);
977 }
978 }
979 }
980
981 /**
982 * Confirm that the contextStart, contextLimit, start, and limit
983 * behave correctly. J474.
984 */
985 void TransliteratorTest::TestPositionHandling(void) {
986 // Array of 3n items
987 // Each item is <rules>, <input>, <expected output>
988 const char* DATA[] = {
989 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
990 "xtat txtb", // pos 0,9,0,9
991 "xTTaSS TTxUUb",
992
993 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
994 "xtat txtb", // pos 2,9,3,8
995 "xtaSS TTxUUb",
996
997 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
998 "xtat txtb", // pos 3,8,3,8
999 "xtaTT TTxTTb",
1000 };
1001
1002 // Array of 4n positions -- these go with the DATA array
1003 // They are: contextStart, contextLimit, start, limit
1004 int32_t POS[] = {
1005 0, 9, 0, 9,
1006 2, 9, 3, 8,
1007 3, 8, 3, 8,
1008 };
1009
1010 int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3;
1011 for (int32_t i=0; i<n; i++) {
1012 UErrorCode status = U_ZERO_ERROR;
1013 Transliterator *t = new RuleBasedTransliterator("<ID>",
1014 DATA[3*i], status);
1015 if (U_FAILURE(status)) {
1016 delete t;
1017 errln("FAIL: RBT constructor");
1018 return;
1019 }
1020 UTransPosition pos;
1021 pos.contextStart= POS[4*i];
1022 pos.contextLimit = POS[4*i+1];
1023 pos.start = POS[4*i+2];
1024 pos.limit = POS[4*i+3];
1025 UnicodeString rsource(DATA[3*i+1]);
1026 t->transliterate(rsource, pos, status);
1027 if (U_FAILURE(status)) {
1028 delete t;
1029 errln("FAIL: transliterate");
1030 return;
1031 }
1032 t->finishTransliteration(rsource, pos);
1033 expectAux(DATA[3*i],
1034 DATA[3*i+1],
1035 rsource,
1036 DATA[3*i+2]);
1037 delete t;
1038 }
1039 }
1040
1041 /**
1042 * Test the Hiragana-Katakana transliterator.
1043 */
1044 void TransliteratorTest::TestHiraganaKatakana(void) {
1045 UParseError parseError;
1046 UErrorCode status = U_ZERO_ERROR;
1047 Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1048 Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1049 if (hk == 0 || kh == 0) {
1050 errln("FAIL: createInstance failed");
1051 delete hk;
1052 delete kh;
1053 return;
1054 }
1055
1056 // Array of 3n items
1057 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1058 const char* DATA[] = {
1059 "both",
1060 "\\u3042\\u3090\\u3099\\u3092\\u3050",
1061 "\\u30A2\\u30F8\\u30F2\\u30B0",
1062
1063 "kh",
1064 "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1065 "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1066 };
1067 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1068
1069 for (int32_t i=0; i<DATA_length; i+=3) {
1070 UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1071 UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1072 switch (*DATA[i]) {
1073 case 0x68: //'h': // Hiragana-Katakana
1074 expect(*hk, h, k);
1075 break;
1076 case 0x6B: //'k': // Katakana-Hiragana
1077 expect(*kh, k, h);
1078 break;
1079 case 0x62: //'b': // both
1080 expect(*hk, h, k);
1081 expect(*kh, k, h);
1082 break;
1083 }
1084 }
1085 delete hk;
1086 delete kh;
1087 }
1088
1089 /**
1090 * Test cloning / copy constructor of RBT.
1091 */
1092 void TransliteratorTest::TestCopyJ476(void) {
1093 // The real test here is what happens when the destructors are
1094 // called. So we let one object get destructed, and check to
1095 // see that its copy still works.
1096 RuleBasedTransliterator *t2 = 0;
1097 {
1098 UErrorCode status = U_ZERO_ERROR;
1099 RuleBasedTransliterator t1("t1", "a>A;b>B;", status);
1100 if (U_FAILURE(status)) {
1101 errln("FAIL: RBT constructor");
1102 return;
1103 }
1104 t2 = new RuleBasedTransliterator(t1);
1105 expect(t1, "abc", "ABc");
1106 }
1107 expect(*t2, "abc", "ABc");
1108 delete t2;
1109 }
1110
1111 /**
1112 * Test inter-Indic transliterators. These are composed.
1113 * ICU4C Jitterbug 483.
1114 */
1115 void TransliteratorTest::TestInterIndic(void) {
1116 UnicodeString ID("Devanagari-Gujarati", "");
1117 UErrorCode status = U_ZERO_ERROR;
1118 UParseError parseError;
1119 Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1120 if (dg == 0) {
1121 errln("FAIL: createInstance(" + ID + ") returned NULL");
1122 return;
1123 }
1124 UnicodeString id = dg->getID();
1125 if (id != ID) {
1126 errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1127 }
1128 UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1129 UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1130 expect(*dg, dev, guj);
1131 delete dg;
1132 }
1133
1134 /**
1135 * Test filter syntax in IDs. (J918)
1136 */
1137 void TransliteratorTest::TestFilterIDs(void) {
1138 // Array of 3n strings:
1139 // <id>, <inverse id>, <input>, <expected output>
1140 const char* DATA[] = {
1141 "[aeiou]Any-Hex", // ID
1142 "[aeiou]Hex-Any", // expected inverse ID
1143 "quizzical", // src
1144 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1145
1146 "[aeiou]Any-Hex;[^5]Hex-Any",
1147 "[^5]Any-Hex;[aeiou]Hex-Any",
1148 "quizzical",
1149 "q\\u0075izzical",
1150
1151 "[abc]Null",
1152 "[abc]Null",
1153 "xyz",
1154 "xyz",
1155 };
1156 enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) };
1157
1158 for (int i=0; i<DATA_length; i+=4) {
1159 UnicodeString ID(DATA[i], "");
1160 UnicodeString uID(DATA[i+1], "");
1161 UnicodeString data2(DATA[i+2], "");
1162 UnicodeString data3(DATA[i+3], "");
1163 UParseError parseError;
1164 UErrorCode status = U_ZERO_ERROR;
1165 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1166 if (t == 0) {
1167 errln("FAIL: createInstance(" + ID + ") returned NULL");
1168 return;
1169 }
1170 expect(*t, data2, data3);
1171
1172 // Check the ID
1173 if (ID != t->getID()) {
1174 errln("FAIL: createInstance(" + ID + ").getID() => " +
1175 t->getID());
1176 }
1177
1178 // Check the inverse
1179 Transliterator *u = t->createInverse(status);
1180 if (u == 0) {
1181 errln("FAIL: " + ID + ".createInverse() returned NULL");
1182 } else if (u->getID() != uID) {
1183 errln("FAIL: " + ID + ".createInverse().getID() => " +
1184 u->getID() + ", expected " + uID);
1185 }
1186
1187 delete t;
1188 delete u;
1189 }
1190 }
1191
1192 /**
1193 * Test the case mapping transliterators.
1194 */
1195 void TransliteratorTest::TestCaseMap(void) {
1196 UParseError parseError;
1197 UErrorCode status = U_ZERO_ERROR;
1198 Transliterator* toUpper =
1199 Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1200 Transliterator* toLower =
1201 Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1202 Transliterator* toTitle =
1203 Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1204 if (toUpper==0 || toLower==0 || toTitle==0) {
1205 errln("FAIL: createInstance returned NULL");
1206 delete toUpper;
1207 delete toLower;
1208 delete toTitle;
1209 return;
1210 }
1211
1212 expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1213 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1214 expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1215 "the quick brown foX jumped over the lazY dogs.");
1216 expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1217 "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1218
1219 delete toUpper;
1220 delete toLower;
1221 delete toTitle;
1222 }
1223
1224 /**
1225 * Test the name mapping transliterators.
1226 */
1227 void TransliteratorTest::TestNameMap(void) {
1228 UParseError parseError;
1229 UErrorCode status = U_ZERO_ERROR;
1230 Transliterator* uni2name =
1231 Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1232 Transliterator* name2uni =
1233 Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1234 if (uni2name==0 || name2uni==0) {
1235 errln("FAIL: createInstance returned NULL");
1236 delete uni2name;
1237 delete name2uni;
1238 return;
1239 }
1240
1241 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1242 expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1243 CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{END OF TRANSMISSION}\\\\N{CHARACTER TABULATION}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1244 expect(*name2uni, "{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{",
1245 CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1246
1247 delete uni2name;
1248 delete name2uni;
1249
1250 // round trip
1251 Transliterator* t =
1252 Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1253 if (t==0) {
1254 errln("FAIL: createInstance returned NULL");
1255 delete t;
1256 return;
1257 }
1258
1259 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1260 UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1261 expect(*t, s, s);
1262 delete t;
1263 }
1264
1265 /**
1266 * Test liberalized ID syntax. 1006c
1267 */
1268 void TransliteratorTest::TestLiberalizedID(void) {
1269 // Some test cases have an expected getID() value of NULL. This
1270 // means I have disabled the test case for now. This stuff is
1271 // still under development, and I haven't decided whether to make
1272 // getID() return canonical case yet. It will all get rewritten
1273 // with the move to Source-Target/Variant IDs anyway. [aliu]
1274 const char* DATA[] = {
1275 "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1276 " Null ", "Null", "whitespace",
1277 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",
1278 " null ; latin-greek ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1279 };
1280 const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
1281 UParseError parseError;
1282 UErrorCode status= U_ZERO_ERROR;
1283 for (int32_t i=0; i<DATA_length; i+=3) {
1284 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1285 if (t == 0) {
1286 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1287 " cannot create ID \"" + DATA[i] + "\"");
1288 } else {
1289 UnicodeString exp;
1290 if (DATA[i+1]) {
1291 exp = UnicodeString(DATA[i+1], "");
1292 }
1293 // Don't worry about getID() if the expected char*
1294 // is NULL -- see above.
1295 if (exp.length() == 0 || exp == t->getID()) {
1296 logln(UnicodeString("Ok: ") + DATA[i+2] +
1297 " create ID \"" + DATA[i] + "\" => \"" +
1298 exp + "\"");
1299 } else {
1300 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1301 " create ID \"" + DATA[i] + "\" => \"" +
1302 t->getID() + "\", exp \"" + exp + "\"");
1303 }
1304 delete t;
1305 }
1306 }
1307 }
1308
1309 /* test for Jitterbug 912 */
1310 void TransliteratorTest::TestCreateInstance(){
1311 const char* FORWARD = "F";
1312 const char* REVERSE = "R";
1313 const char* DATA[] = {
1314 // Column 1: id
1315 // Column 2: direction
1316 // Column 3: expected ID, or "" if expect failure
1317 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1318
1319 // JB#2689: bad compound causes crash
1320 "InvalidSource-InvalidTarget", FORWARD, "",
1321 "InvalidSource-InvalidTarget", REVERSE, "",
1322 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1323 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1324 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1325 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1326
1327 NULL
1328 };
1329
1330 for (int32_t i=0; DATA[i]; i+=3) {
1331 UParseError err;
1332 UErrorCode ec = U_ZERO_ERROR;
1333 UnicodeString id(DATA[i]);
1334 UTransDirection dir = (DATA[i+1]==FORWARD)?
1335 UTRANS_FORWARD:UTRANS_REVERSE;
1336 UnicodeString expID(DATA[i+2]);
1337 Transliterator* t =
1338 Transliterator::createInstance(id,dir,err,ec);
1339 UnicodeString newID;
1340 if (t) {
1341 newID = t->getID();
1342 }
1343 UBool ok = (newID == expID);
1344 if (!t) {
1345 newID = u_errorName(ec);
1346 }
1347 if (ok) {
1348 logln((UnicodeString)"Ok: createInstance(" +
1349 id + "," + DATA[i+1] + ") => " + newID);
1350 } else {
1351 errln((UnicodeString)"FAIL: createInstance(" +
1352 id + "," + DATA[i+1] + ") => " + newID +
1353 ", expected " + expID);
1354 }
1355 delete t;
1356 }
1357 }
1358
1359 /**
1360 * Test the normalization transliterator.
1361 */
1362 void TransliteratorTest::TestNormalizationTransliterator() {
1363 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1364 // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1365 const char* CANON[] = {
1366 // Input Decomposed Composed
1367 "cat", "cat", "cat" ,
1368 "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" ,
1369
1370 "\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above
1371 "D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above
1372
1373 "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above
1374 "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below
1375 "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above
1376
1377 "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1378 "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1379
1380 "\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave
1381 "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave
1382 "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron
1383
1384 "\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign
1385 "\\u00c5", "A\\u030a", "\\u00c5" , // A-ring
1386
1387 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0
1388 "\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0
1389
1390 "Henry IV", "Henry IV", "Henry IV" ,
1391 "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" ,
1392
1393 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1394 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1395 "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten
1396 "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten
1397 "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten
1398
1399 "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" ,
1400 0 // end
1401 };
1402
1403 const char* COMPAT[] = {
1404 // Input Decomposed Composed
1405 "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed
1406
1407 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0
1408 "\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i
1409
1410 "Henry IV", "Henry IV", "Henry IV" ,
1411 "Henry \\u2163", "Henry IV", "Henry IV" ,
1412
1413 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1414 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1415
1416 "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten
1417 0 // end
1418 };
1419
1420 int32_t i;
1421 UParseError parseError;
1422 UErrorCode status = U_ZERO_ERROR;
1423 Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1424 Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1425 if (!NFD || !NFC) {
1426 errln("FAIL: createInstance failed");
1427 delete NFD;
1428 delete NFC;
1429 return;
1430 }
1431 for (i=0; CANON[i]; i+=3) {
1432 UnicodeString in = CharsToUnicodeString(CANON[i]);
1433 UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1434 UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1435 expect(*NFD, in, expd);
1436 expect(*NFC, in, expc);
1437 }
1438 delete NFD;
1439 delete NFC;
1440
1441 Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1442 Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1443 if (!NFKD || !NFKC) {
1444 errln("FAIL: createInstance failed");
1445 delete NFKD;
1446 delete NFKC;
1447 return;
1448 }
1449 for (i=0; COMPAT[i]; i+=3) {
1450 UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1451 UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1452 UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1453 expect(*NFKD, in, expkd);
1454 expect(*NFKC, in, expkc);
1455 }
1456 delete NFKD;
1457 delete NFKC;
1458
1459 UParseError pe;
1460 status = U_ZERO_ERROR;
1461 Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1462 UTRANS_FORWARD,
1463 pe, status);
1464 if (t == 0) {
1465 errln("FAIL: createInstance failed");
1466 }
1467 expect(*t, CharsToUnicodeString("\\u010dx"),
1468 CharsToUnicodeString("c\\u030C"));
1469 delete t;
1470 }
1471
1472 /**
1473 * Test compound RBT rules.
1474 */
1475 void TransliteratorTest::TestCompoundRBT(void) {
1476 // Careful with spacing and ';' here: Phrase this exactly
1477 // as toRules() is going to return it. If toRules() changes
1478 // with regard to spacing or ';', then adjust this string.
1479 UnicodeString rule("::Hex-Any;\n"
1480 "::Any-Lower;\n"
1481 "a > '.A.';\n"
1482 "b > '.B.';\n"
1483 "::[^t]Any-Upper;", "");
1484 UParseError parseError;
1485 UErrorCode status = U_ZERO_ERROR;
1486 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1487 if (t == 0) {
1488 errln("FAIL: createFromRules failed");
1489 return;
1490 }
1491 expect(*t, "\\u0043at in the hat, bat on the mat",
1492 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1493 UnicodeString r;
1494 t->toRules(r, TRUE);
1495 if (r == rule) {
1496 logln((UnicodeString)"OK: toRules() => " + r);
1497 } else {
1498 errln((UnicodeString)"FAIL: toRules() => " + r +
1499 ", expected " + rule);
1500 }
1501 delete t;
1502
1503 // Now test toRules
1504 t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1505 if (t == 0) {
1506 errln("FAIL: createInstance failed");
1507 return;
1508 }
1509 UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1510 t->toRules(r, TRUE);
1511 if (r != exp) {
1512 errln((UnicodeString)"FAIL: toRules() => " + r +
1513 ", expected " + exp);
1514 } else {
1515 logln((UnicodeString)"OK: toRules() => " + r);
1516 }
1517 delete t;
1518
1519 // Round trip the result of toRules
1520 t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1521 if (t == 0) {
1522 errln("FAIL: createFromRules #2 failed");
1523 return;
1524 } else {
1525 logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1526 }
1527
1528 // Test toRules again
1529 t->toRules(r, TRUE);
1530 if (r != exp) {
1531 errln((UnicodeString)"FAIL: toRules() => " + r +
1532 ", expected " + exp);
1533 } else {
1534 logln((UnicodeString)"OK: toRules() => " + r);
1535 }
1536
1537 delete t;
1538
1539 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform
1540 // to what the regenerated ID will look like.
1541 UnicodeString id("Upper(Lower);(NFKC)", "");
1542 t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1543 if (t == 0) {
1544 errln("FAIL: createInstance #2 failed");
1545 return;
1546 }
1547 if (t->getID() == id) {
1548 logln((UnicodeString)"OK: created " + id);
1549 } else {
1550 errln((UnicodeString)"FAIL: createInstance(" + id +
1551 ").getID() => " + t->getID());
1552 }
1553
1554 Transliterator *u = t->createInverse(status);
1555 if (u == 0) {
1556 errln("FAIL: createInverse failed");
1557 delete t;
1558 return;
1559 }
1560 exp = "NFKC();Lower(Upper)";
1561 if (u->getID() == exp) {
1562 logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1563 u->getID());
1564 } else {
1565 errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1566 u->getID());
1567 }
1568 delete t;
1569 delete u;
1570 }
1571
1572 /**
1573 * Compound filter semantics were orginially not implemented
1574 * correctly. Originally, each component filter f(i) is replaced by
1575 * f'(i) = f(i) && g, where g is the filter for the compound
1576 * transliterator.
1577 *
1578 * From Mark:
1579 *
1580 * Suppose and I have a transliterator X. Internally X is
1581 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1582 *
1583 * The compound should convert all greek characters (through latin) to
1584 * cyrillic, then lowercase the result. The filter should say "don't
1585 * touch 'A' in the original". But because an intermediate result
1586 * happens to go through "A", the Greek Alpha gets hung up.
1587 */
1588 void TransliteratorTest::TestCompoundFilter(void) {
1589 UParseError parseError;
1590 UErrorCode status = U_ZERO_ERROR;
1591 Transliterator *t = Transliterator::createInstance
1592 ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1593 if (t == 0) {
1594 errln("FAIL: createInstance failed");
1595 return;
1596 }
1597 t->adoptFilter(new UnicodeSet("[^A]", status));
1598 if (U_FAILURE(status)) {
1599 errln("FAIL: UnicodeSet ct failed");
1600 delete t;
1601 return;
1602 }
1603
1604 // Only the 'A' at index 1 should remain unchanged
1605 expect(*t,
1606 CharsToUnicodeString("BA\\u039A\\u0391"),
1607 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1608 delete t;
1609 }
1610
1611 void TransliteratorTest::TestRemove(void) {
1612 UParseError parseError;
1613 UErrorCode status = U_ZERO_ERROR;
1614 Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1615 if (t == 0) {
1616 errln("FAIL: createInstance failed");
1617 return;
1618 }
1619
1620 expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1621
1622 // extra test for RemoveTransliterator::clone(), which at one point wasn't
1623 // duplicating the filter
1624 Transliterator* t2 = t->clone();
1625 expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1626
1627 delete t;
1628 delete t2;
1629 }
1630
1631 void TransliteratorTest::TestToRules(void) {
1632 const char* RBT = "rbt";
1633 const char* SET = "set";
1634 static const char* DATA[] = {
1635 RBT,
1636 "$a=\\u4E61; [$a] > A;",
1637 "[\\u4E61] > A;",
1638
1639 RBT,
1640 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1641 "[[:Zs:][:Zl:]]{a} > A;",
1642
1643 SET,
1644 "[[:Zs:][:Zl:]]",
1645 "[[:Zs:][:Zl:]]",
1646
1647 SET,
1648 "[:Ps:]",
1649 "[:Ps:]",
1650
1651 SET,
1652 "[:L:]",
1653 "[:L:]",
1654
1655 SET,
1656 "[[:L:]-[A]]",
1657 "[[:L:]-[A]]",
1658
1659 SET,
1660 "[~[:Lu:][:Ll:]]",
1661 "[~[:Lu:][:Ll:]]",
1662
1663 SET,
1664 "[~[a-z]]",
1665 "[~[a-z]]",
1666
1667 RBT,
1668 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1669 "[^[:Zs:]]{a} > A;",
1670
1671 RBT,
1672 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1673 "[[a-z]-[:Zs:]]{a} > A;",
1674
1675 RBT,
1676 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1677 "[[:Zs:]&[a-z]]{a} > A;",
1678
1679 RBT,
1680 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1681 "[x[:Zs:]]{a} > A;",
1682
1683 RBT,
1684 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1685 "$macron = \\u0304 ;"
1686 "$evowel = [aeiouyAEIOUY] ;"
1687 "$iotasub = \\u0345 ;"
1688 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1689 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1690
1691 RBT,
1692 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1693 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1694 };
1695 static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1696
1697 for (int32_t d=0; d < DATA_length; d+=3) {
1698 if (DATA[d] == RBT) {
1699 // Transliterator test
1700 UParseError parseError;
1701 UErrorCode status = U_ZERO_ERROR;
1702 Transliterator *t = Transliterator::createFromRules("ID",
1703 DATA[d+1], UTRANS_FORWARD, parseError, status);
1704 if (t == 0) {
1705 errln("FAIL: createFromRules failed");
1706 return;
1707 }
1708 UnicodeString rules, escapedRules;
1709 t->toRules(rules, FALSE);
1710 t->toRules(escapedRules, TRUE);
1711 UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1712 UnicodeString expEscapedRules(DATA[d+2]);
1713 if (rules == expRules) {
1714 logln((UnicodeString)"Ok: " + DATA[d+1] +
1715 " => " + rules);
1716 } else {
1717 errln((UnicodeString)"FAIL: " + DATA[d+1] +
1718 " => " + rules + ", exp " + expRules);
1719 }
1720 if (escapedRules == expEscapedRules) {
1721 logln((UnicodeString)"Ok: " + DATA[d+1] +
1722 " => " + escapedRules);
1723 } else {
1724 errln((UnicodeString)"FAIL: " + DATA[d+1] +
1725 " => " + escapedRules + ", exp " + expEscapedRules);
1726 }
1727 delete t;
1728
1729 } else {
1730 // UnicodeSet test
1731 UErrorCode status = U_ZERO_ERROR;
1732 UnicodeString pat(DATA[d+1]);
1733 UnicodeString expToPat(DATA[d+2]);
1734 UnicodeSet set(pat, status);
1735 if (U_FAILURE(status)) {
1736 errln("FAIL: UnicodeSet ct failed");
1737 return;
1738 }
1739 // Adjust spacing etc. as necessary.
1740 UnicodeString toPat;
1741 set.toPattern(toPat);
1742 if (expToPat == toPat) {
1743 logln((UnicodeString)"Ok: " + pat +
1744 " => " + toPat);
1745 } else {
1746 errln((UnicodeString)"FAIL: " + pat +
1747 " => " + prettify(toPat, TRUE) +
1748 ", exp " + prettify(pat, TRUE));
1749 }
1750 }
1751 }
1752 }
1753
1754 void TransliteratorTest::TestContext() {
1755 UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1756 expect("de > x; {d}e > y;",
1757 "de",
1758 "ye",
1759 &pos);
1760
1761 expect("ab{c} > z;",
1762 "xadabdabcy",
1763 "xadabdabzy");
1764 }
1765
1766 void TransliteratorTest::TestSupplemental() {
1767
1768 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1769 "a > $a; $s > i;"),
1770 CharsToUnicodeString("ab\\U0001030Fx"),
1771 CharsToUnicodeString("\\U00010300bix"));
1772
1773 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1774 "$b=[A-Z\\U00010400-\\U0001044D];"
1775 "($a)($b) > $2 $1;"),
1776 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1777 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1778
1779 // k|ax\\U00010300xm
1780
1781 // k|a\\U00010400\\U00010300xm
1782 // ky|\\U00010400\\U00010300xm
1783 // ky\\U00010400|\\U00010300xm
1784
1785 // ky\\U00010400|\\U00010300\\U00010400m
1786 // ky\\U00010400y|\\U00010400m
1787 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1788 "$a {x} > | @ \\U00010400;"
1789 "{$a} [^\\u0000-\\uFFFF] > y;"),
1790 CharsToUnicodeString("kax\\U00010300xm"),
1791 CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1792
1793 expectT("Any-Name",
1794 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1795 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}");
1796
1797 expectT("Any-Hex/Unicode",
1798 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1799 "U+10330U+10FF00U+E0061U+00A0");
1800
1801 expectT("Any-Hex/C",
1802 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1803 "\\U00010330\\U0010FF00\\U000E0061\\u00A0");
1804
1805 expectT("Any-Hex/Perl",
1806 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1807 "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}");
1808
1809 expectT("Any-Hex/Java",
1810 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1811 "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0");
1812
1813 expectT("Any-Hex/XML",
1814 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1815 "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1816
1817 expectT("Any-Hex/XML10",
1818 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1819 "&#66352;&#1113856;&#917601;&#160;");
1820
1821 expectT("[\\U000E0000-\\U000E0FFF] Remove",
1822 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1823 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1824 }
1825
1826 void TransliteratorTest::TestQuantifier() {
1827
1828 // Make sure @ in a quantified anteContext works
1829 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1830 "AAAAAb",
1831 "aaa(aac)");
1832
1833 // Make sure @ in a quantified postContext works
1834 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1835 "baaaaa",
1836 "caa(aaa)");
1837
1838 // Make sure @ in a quantified postContext with seg ref works
1839 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1840 "baaaaa",
1841 "baa(aaa)");
1842
1843 // Make sure @ past ante context doesn't enter ante context
1844 UTransPosition pos = {0, 5, 3, 5};
1845 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1846 "xxxab",
1847 "xxx(ac)",
1848 &pos);
1849
1850 // Make sure @ past post context doesn't pass limit
1851 UTransPosition pos2 = {0, 4, 0, 2};
1852 expect("{b} a+ > c @@ |; x > y; a > A;",
1853 "baxx",
1854 "caxx",
1855 &pos2);
1856
1857 // Make sure @ past post context doesn't enter post context
1858 expect("{b} a+ > c @@ |; x > y; a > A;",
1859 "baxx",
1860 "cayy");
1861
1862 expect("(ab)? c > d;",
1863 "c abc ababc",
1864 "d d abd");
1865
1866 // NOTE: The (ab)+ when referenced just yields a single "ab",
1867 // not the full sequence of them. This accords with perl behavior.
1868 expect("(ab)+ {x} > '(' $1 ')';",
1869 "x abx ababxy",
1870 "x ab(ab) abab(ab)y");
1871
1872 expect("b+ > x;",
1873 "ac abc abbc abbbc",
1874 "ac axc axc axc");
1875
1876 expect("[abc]+ > x;",
1877 "qac abrc abbcs abtbbc",
1878 "qx xrx xs xtx");
1879
1880 expect("q{(ab)+} > x;",
1881 "qa qab qaba qababc qaba",
1882 "qa qx qxa qxc qxa");
1883
1884 expect("q(ab)* > x;",
1885 "qa qab qaba qababc",
1886 "xa x xa xc");
1887
1888 // NOTE: The (ab)+ when referenced just yields a single "ab",
1889 // not the full sequence of them. This accords with perl behavior.
1890 expect("q(ab)* > '(' $1 ')';",
1891 "qa qab qaba qababc",
1892 "()a (ab) (ab)a (ab)c");
1893
1894 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1895 // quoted string
1896 expect("'ab'+ > x;",
1897 "bb ab ababb",
1898 "bb x xb");
1899
1900 // $foo+ and $foo* -- the quantifier should apply to the entire
1901 // variable reference
1902 expect("$var = ab; $var+ > x;",
1903 "bb ab ababb",
1904 "bb x xb");
1905 }
1906
1907 class TestTrans : public NullTransliterator {
1908 public:
1909 TestTrans(const UnicodeString& id) {
1910 setID(id);
1911 }
1912 };
1913
1914 /**
1915 * Test Source-Target/Variant.
1916 */
1917 void TransliteratorTest::TestSTV(void) {
1918 int32_t ns = Transliterator::countAvailableSources();
1919 if (ns < 0 || ns > 255) {
1920 errln((UnicodeString)"FAIL: Bad source count: " + ns);
1921 return;
1922 }
1923 int32_t i, j;
1924 for (i=0; i<ns; ++i) {
1925 UnicodeString source;
1926 Transliterator::getAvailableSource(i, source);
1927 logln((UnicodeString)"" + i + ": " + source);
1928 if (source.length() == 0) {
1929 errln("FAIL: empty source");
1930 continue;
1931 }
1932 int32_t nt = Transliterator::countAvailableTargets(source);
1933 if (nt < 0 || nt > 255) {
1934 errln((UnicodeString)"FAIL: Bad target count: " + nt);
1935 continue;
1936 }
1937 for (int32_t j=0; j<nt; ++j) {
1938 UnicodeString target;
1939 Transliterator::getAvailableTarget(j, source, target);
1940 logln((UnicodeString)" " + j + ": " + target);
1941 if (target.length() == 0) {
1942 errln("FAIL: empty target");
1943 continue;
1944 }
1945 int32_t nv = Transliterator::countAvailableVariants(source, target);
1946 if (nv < 0 || nv > 255) {
1947 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1948 continue;
1949 }
1950 for (int32_t k=0; k<nv; ++k) {
1951 UnicodeString variant;
1952 Transliterator::getAvailableVariant(k, source, target, variant);
1953 if (variant.length() == 0) {
1954 logln((UnicodeString)" " + k + ": <empty>");
1955 } else {
1956 logln((UnicodeString)" " + k + ": " + variant);
1957 }
1958 }
1959 }
1960 }
1961
1962 // Test registration
1963 const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
1964 const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
1965 const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
1966 for (i=0; i<3; ++i) {
1967 Transliterator *t = new TestTrans(IDS[i]);
1968 if (t == 0) {
1969 errln("FAIL: out of memory");
1970 return;
1971 }
1972 if (t->getID() != IDS[i]) {
1973 errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
1974 delete t;
1975 return;
1976 }
1977 Transliterator::registerInstance(t);
1978 UErrorCode status = U_ZERO_ERROR;
1979 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
1980 if (t == NULL) {
1981 errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
1982 IDS[i]);
1983 } else {
1984 logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
1985 IDS[i]);
1986 delete t;
1987 }
1988 Transliterator::unregister(IDS[i]);
1989 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
1990 if (t != NULL) {
1991 errln((UnicodeString)"FAIL: Unregistration failed for ID " +
1992 IDS[i]);
1993 delete t;
1994 }
1995 }
1996
1997 // Make sure getAvailable API reflects removal
1998 int32_t n = Transliterator::countAvailableIDs();
1999 for (i=0; i<n; ++i) {
2000 UnicodeString id = Transliterator::getAvailableID(i);
2001 for (j=0; j<3; ++j) {
2002 if (id.caseCompare(FULL_IDS[j],0)==0) {
2003 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2004 }
2005 }
2006 }
2007 n = Transliterator::countAvailableTargets("Any");
2008 for (i=0; i<n; ++i) {
2009 UnicodeString t;
2010 Transliterator::getAvailableTarget(i, "Any", t);
2011 if (t.caseCompare(IDS[0],0)==0) {
2012 errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2013 }
2014 }
2015 n = Transliterator::countAvailableSources();
2016 for (i=0; i<n; ++i) {
2017 UnicodeString s;
2018 Transliterator::getAvailableSource(i, s);
2019 for (j=0; j<3; ++j) {
2020 if (SOURCES[j] == NULL) continue;
2021 if (s.caseCompare(SOURCES[j],0)==0) {
2022 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2023 }
2024 }
2025 }
2026 }
2027
2028 /**
2029 * Test inverse of Greek-Latin; Title()
2030 */
2031 void TransliteratorTest::TestCompoundInverse(void) {
2032 UParseError parseError;
2033 UErrorCode status = U_ZERO_ERROR;
2034 Transliterator *t = Transliterator::createInstance
2035 ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2036 if (t == 0) {
2037 errln("FAIL: createInstance");
2038 return;
2039 }
2040 UnicodeString exp("(Title);Latin-Greek");
2041 if (t->getID() == exp) {
2042 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2043 t->getID());
2044 } else {
2045 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2046 t->getID() + "\", expected \"" + exp + "\"");
2047 }
2048 delete t;
2049 }
2050
2051 /**
2052 * Test NFD chaining with RBT
2053 */
2054 void TransliteratorTest::TestNFDChainRBT() {
2055 UParseError pe;
2056 UErrorCode ec = U_ZERO_ERROR;
2057 Transliterator* t = Transliterator::createFromRules(
2058 "TEST", "::NFD; aa > Q; a > q;",
2059 UTRANS_FORWARD, pe, ec);
2060 if (t == NULL || U_FAILURE(ec)) {
2061 errln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2062 return;
2063 }
2064 expect(*t, "aa", "Q");
2065 delete t;
2066
2067 // TEMPORARY TESTS -- BEING DEBUGGED
2068 //=- UnicodeString s, s2;
2069 //=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2070 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2071 //=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2072 //=- expect(*t, s, s2);
2073 //=- delete t;
2074 //=-
2075 //=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2076 //=- expect(*t, s2, s);
2077 //=- delete t;
2078 //=-
2079 //=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2080 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2081 //=- expect(*t, s, s);
2082 //=- delete t;
2083
2084 // const char* source[] = {
2085 // /*
2086 // "\\u015Br\\u012Bmad",
2087 // "bhagavadg\\u012Bt\\u0101",
2088 // "adhy\\u0101ya",
2089 // "arjuna",
2090 // "vi\\u1E63\\u0101da",
2091 // "y\\u014Dga",
2092 // "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2093 // "uv\\u0101cr\\u0325",
2094 // */
2095 // "rmk\\u1E63\\u0113t",
2096 // //"dharmak\\u1E63\\u0113tr\\u0113",
2097 // /*
2098 // "kuruk\\u1E63\\u0113tr\\u0113",
2099 // "samav\\u0113t\\u0101",
2100 // "yuyutsava-\\u1E25",
2101 // "m\\u0101mak\\u0101-\\u1E25",
2102 // // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2103 // "kimakurvata",
2104 // "san\\u0304java",
2105 // */
2106 //
2107 // 0
2108 // };
2109 // const char* expected[] = {
2110 // /*
2111 // "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2112 // "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2113 // "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2114 // "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2115 // "\\u0935\\u093f\\u0937\\u093e\\u0926",
2116 // "\\u092f\\u094b\\u0917",
2117 // "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2118 // "\\u0909\\u0935\\u093E\\u091A\\u0943",
2119 // */
2120 // "\\u0927",
2121 // //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2122 // /*
2123 // "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2124 // "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2125 // "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2126 // "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2127 // // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2128 // "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2129 // "\\u0938\\u0902\\u091c\\u0935",
2130 // */
2131 // 0
2132 // };
2133 // UErrorCode status = U_ZERO_ERROR;
2134 // UParseError parseError;
2135 // UnicodeString message;
2136 // Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2137 // Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2138 // if(U_FAILURE(status)){
2139 // errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2140 // errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2141 // delete latinToDevToLatin;
2142 // delete devToLatinToDev;
2143 // return;
2144 // }
2145 // UnicodeString gotResult;
2146 // for(int i= 0; source[i] != 0; i++){
2147 // gotResult = source[i];
2148 // expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2149 // expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2150 // }
2151 // delete latinToDevToLatin;
2152 // delete devToLatinToDev;
2153 }
2154
2155 /**
2156 * Inverse of "Null" should be "Null". (J21)
2157 */
2158 void TransliteratorTest::TestNullInverse() {
2159 UParseError pe;
2160 UErrorCode ec = U_ZERO_ERROR;
2161 Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2162 if (t == 0 || U_FAILURE(ec)) {
2163 errln("FAIL: createInstance");
2164 return;
2165 }
2166 Transliterator *u = t->createInverse(ec);
2167 if (u == 0 || U_FAILURE(ec)) {
2168 errln("FAIL: createInverse");
2169 delete t;
2170 return;
2171 }
2172 if (u->getID() != "Null") {
2173 errln("FAIL: Inverse of Null should be Null");
2174 }
2175 delete t;
2176 delete u;
2177 }
2178
2179 /**
2180 * Check ID of inverse of alias. (J22)
2181 */
2182 void TransliteratorTest::TestAliasInverseID() {
2183 UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2184 UParseError pe;
2185 UErrorCode ec = U_ZERO_ERROR;
2186 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2187 if (t == 0 || U_FAILURE(ec)) {
2188 errln("FAIL: createInstance");
2189 return;
2190 }
2191 Transliterator *u = t->createInverse(ec);
2192 if (u == 0 || U_FAILURE(ec)) {
2193 errln("FAIL: createInverse");
2194 delete t;
2195 return;
2196 }
2197 UnicodeString exp = "Hangul-Latin";
2198 UnicodeString got = u->getID();
2199 if (got != exp) {
2200 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2201 ", expected " + exp);
2202 }
2203 delete t;
2204 delete u;
2205 }
2206
2207 /**
2208 * Test IDs of inverses of compound transliterators. (J20)
2209 */
2210 void TransliteratorTest::TestCompoundInverseID() {
2211 UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2212 UParseError pe;
2213 UErrorCode ec = U_ZERO_ERROR;
2214 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2215 if (t == 0 || U_FAILURE(ec)) {
2216 errln("FAIL: createInstance");
2217 return;
2218 }
2219 Transliterator *u = t->createInverse(ec);
2220 if (u == 0 || U_FAILURE(ec)) {
2221 errln("FAIL: createInverse");
2222 delete t;
2223 return;
2224 }
2225 UnicodeString exp = "NFD(NFC);Jamo-Latin";
2226 UnicodeString got = u->getID();
2227 if (got != exp) {
2228 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2229 ", expected " + exp);
2230 }
2231 delete t;
2232 delete u;
2233 }
2234
2235 /**
2236 * Test undefined variable.
2237
2238 */
2239 void TransliteratorTest::TestUndefinedVariable() {
2240 UnicodeString rule = "$initial } a <> \\u1161;";
2241 UParseError pe;
2242 UErrorCode ec = U_ZERO_ERROR;
2243 Transliterator *t = new RuleBasedTransliterator("<ID>", rule, UTRANS_FORWARD, 0, pe, ec);
2244 delete t;
2245 if (U_FAILURE(ec)) {
2246 logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2247 u_errorName(ec));
2248 return;
2249 }
2250 errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2251 u_errorName(ec));
2252 }
2253
2254 /**
2255 * Test empty context.
2256 */
2257 void TransliteratorTest::TestEmptyContext() {
2258 expect(" { a } > b;", "xay a ", "xby b ");
2259 }
2260
2261 /**
2262 * Test compound filter ID syntax
2263 */
2264 void TransliteratorTest::TestCompoundFilterID(void) {
2265 static const char* DATA[] = {
2266 // Col. 1 = ID or rule set (latter must start with #)
2267
2268 // = columns > 1 are null if expect col. 1 to be illegal =
2269
2270 // Col. 2 = direction, "F..." or "R..."
2271 // Col. 3 = source string
2272 // Col. 4 = exp result
2273
2274 "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2275 "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2276 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2277 "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2278 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2279 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2280 NULL,
2281 };
2282
2283 for (int32_t i=0; DATA[i]; i+=4) {
2284 UnicodeString id = CharsToUnicodeString(DATA[i]);
2285 UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2286 UTRANS_REVERSE : UTRANS_FORWARD;
2287 UnicodeString source;
2288 UnicodeString exp;
2289 if (DATA[i+2] != NULL) {
2290 source = CharsToUnicodeString(DATA[i+2]);
2291 exp = CharsToUnicodeString(DATA[i+3]);
2292 }
2293 UBool expOk = (DATA[i+1] != NULL);
2294 Transliterator* t = NULL;
2295 UParseError pe;
2296 UErrorCode ec = U_ZERO_ERROR;
2297 if (id.charAt(0) == 0x23/*#*/) {
2298 t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2299 } else {
2300 t = Transliterator::createInstance(id, direction, pe, ec);
2301 }
2302 UBool ok = (t != NULL && U_SUCCESS(ec));
2303 UnicodeString transID;
2304 if (t!=0) {
2305 transID = t->getID();
2306 }
2307 else {
2308 transID = UnicodeString("NULL", "");
2309 }
2310 if (ok == expOk) {
2311 logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2312 u_errorName(ec));
2313 if (source.length() != 0) {
2314 expect(*t, source, exp);
2315 }
2316 delete t;
2317 } else {
2318 errln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2319 u_errorName(ec));
2320 }
2321 }
2322 }
2323
2324 /**
2325 * Test new property set syntax
2326 */
2327 void TransliteratorTest::TestPropertySet() {
2328 expect("a>A; \\p{Lu}>x; \\p{ANY}>y;", "abcDEF", "Ayyxxx");
2329 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2330 "[ a stitch ]\n[ in time ]\r[ saves 9]");
2331 }
2332
2333 /**
2334 * Test various failure points of the new 2.0 engine.
2335 */
2336 void TransliteratorTest::TestNewEngine() {
2337 UParseError pe;
2338 UErrorCode ec = U_ZERO_ERROR;
2339 Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2340 if (t == 0 || U_FAILURE(ec)) {
2341 errln("FAIL: createInstance Latin-Hiragana");
2342 return;
2343 }
2344 // Katakana should be untouched
2345 expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2346 CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2347
2348 delete t;
2349
2350 #if 1
2351 // This test will only work if Transliterator.ROLLBACK is
2352 // true. Otherwise, this test will fail, revealing a
2353 // limitation of global filters in incremental mode.
2354 Transliterator *a =
2355 Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2356 Transliterator *A =
2357 Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2358 if (U_FAILURE(ec)) {
2359 delete a;
2360 delete A;
2361 return;
2362 }
2363
2364 Transliterator* array[3];
2365 array[0] = a;
2366 array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2367 array[2] = A;
2368 if (U_FAILURE(ec)) {
2369 errln("FAIL: createInstance NFD");
2370 delete a;
2371 delete A;
2372 delete array[1];
2373 return;
2374 }
2375
2376 t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2377 if (U_FAILURE(ec)) {
2378 errln("FAIL: UnicodeSet constructor");
2379 delete a;
2380 delete A;
2381 delete array[1];
2382 delete t;
2383 return;
2384 }
2385
2386 expect(*t, "aAaA", "bAbA");
2387
2388 assertTrue("countElements", t->countElements() == 3);
2389 assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2390 assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2391 assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2392 assertSuccess("getElement", ec);
2393
2394 delete a;
2395 delete A;
2396 delete array[1];
2397 delete t;
2398 #endif
2399
2400 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2401 "a",
2402 "ax");
2403
2404 UnicodeString gr = CharsToUnicodeString(
2405 "$ddot = \\u0308 ;"
2406 "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2407 "$rough = \\u0314 ;"
2408 "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2409 "\\u03b1 <> a ;"
2410 "$rough <> h ;");
2411
2412 expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2413 }
2414
2415 /**
2416 * Test quantified segment behavior. We want:
2417 * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2418 */
2419 void TransliteratorTest::TestQuantifiedSegment(void) {
2420 // The normal case
2421 expect("([abc]+) > x $1 x;", "cba", "xcbax");
2422
2423 // The tricky case; the quantifier is around the segment
2424 expect("([abc])+ > x $1 x;", "cba", "xax");
2425
2426 // Tricky case in reverse direction
2427 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2428
2429 // Check post-context segment
2430 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2431
2432 // Test toRule/toPattern for non-quantified segment.
2433 // Careful with spacing here.
2434 UnicodeString r("([a-c]){q} > x $1 x;");
2435 UParseError pe;
2436 UErrorCode ec = U_ZERO_ERROR;
2437 Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2438 if (U_FAILURE(ec)) {
2439 errln("FAIL: createFromRules");
2440 delete t;
2441 return;
2442 }
2443 UnicodeString rr;
2444 t->toRules(rr, TRUE);
2445 if (r != rr) {
2446 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2447 } else {
2448 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2449 }
2450 delete t;
2451
2452 // Test toRule/toPattern for quantified segment.
2453 // Careful with spacing here.
2454 r = "([a-c])+{q} > x $1 x;";
2455 t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2456 if (U_FAILURE(ec)) {
2457 errln("FAIL: createFromRules");
2458 delete t;
2459 return;
2460 }
2461 t->toRules(rr, TRUE);
2462 if (r != rr) {
2463 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2464 } else {
2465 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2466 }
2467 delete t;
2468 }
2469
2470 //======================================================================
2471 // Ram's tests
2472 //======================================================================
2473 void TransliteratorTest::TestDevanagariLatinRT(){
2474 const int MAX_LEN= 52;
2475 const char* const source[MAX_LEN] = {
2476 "bh\\u0101rata",
2477 "kra",
2478 "k\\u1E63a",
2479 "khra",
2480 "gra",
2481 "\\u1E45ra",
2482 "cra",
2483 "chra",
2484 "j\\u00F1a",
2485 "jhra",
2486 "\\u00F1ra",
2487 "\\u1E6Dya",
2488 "\\u1E6Dhra",
2489 "\\u1E0Dya",
2490 //"r\\u0323ya", // \u095c is not valid in Devanagari
2491 "\\u1E0Dhya",
2492 "\\u1E5Bhra",
2493 "\\u1E47ra",
2494 "tta",
2495 "thra",
2496 "dda",
2497 "dhra",
2498 "nna",
2499 "pra",
2500 "phra",
2501 "bra",
2502 "bhra",
2503 "mra",
2504 "\\u1E49ra",
2505 //"l\\u0331ra",
2506 "yra",
2507 "\\u1E8Fra",
2508 //"l-",
2509 "vra",
2510 "\\u015Bra",
2511 "\\u1E63ra",
2512 "sra",
2513 "hma",
2514 "\\u1E6D\\u1E6Da",
2515 "\\u1E6D\\u1E6Dha",
2516 "\\u1E6Dh\\u1E6Dha",
2517 "\\u1E0D\\u1E0Da",
2518 "\\u1E0D\\u1E0Dha",
2519 "\\u1E6Dya",
2520 "\\u1E6Dhya",
2521 "\\u1E0Dya",
2522 "\\u1E0Dhya",
2523 // Not roundtrippable --
2524 // \\u0939\\u094d\\u094d\\u092E - hma
2525 // \\u0939\\u094d\\u092E - hma
2526 // CharsToUnicodeString("hma"),
2527 "hya",
2528 "\\u015Br\\u0325",
2529 "\\u015Bca",
2530 "\\u0115",
2531 "san\\u0304j\\u012Bb s\\u0113nagupta",
2532 "\\u0101nand vaddir\\u0101ju",
2533 "\\u0101",
2534 "a"
2535 };
2536 const char* const expected[MAX_LEN] = {
2537 "\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */
2538 "\\u0915\\u094D\\u0930", /* kra */
2539 "\\u0915\\u094D\\u0937", /* ks\\u0323a */
2540 "\\u0916\\u094D\\u0930", /* khra */
2541 "\\u0917\\u094D\\u0930", /* gra */
2542 "\\u0919\\u094D\\u0930", /* n\\u0307ra */
2543 "\\u091A\\u094D\\u0930", /* cra */
2544 "\\u091B\\u094D\\u0930", /* chra */
2545 "\\u091C\\u094D\\u091E", /* jn\\u0303a */
2546 "\\u091D\\u094D\\u0930", /* jhra */
2547 "\\u091E\\u094D\\u0930", /* n\\u0303ra */
2548 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2549 "\\u0920\\u094D\\u0930", /* t\\u0323hra */
2550 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2551 //"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari
2552 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2553 "\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */
2554 "\\u0923\\u094D\\u0930", /* n\\u0323ra */
2555 "\\u0924\\u094D\\u0924", /* tta */
2556 "\\u0925\\u094D\\u0930", /* thra */
2557 "\\u0926\\u094D\\u0926", /* dda */
2558 "\\u0927\\u094D\\u0930", /* dhra */
2559 "\\u0928\\u094D\\u0928", /* nna */
2560 "\\u092A\\u094D\\u0930", /* pra */
2561 "\\u092B\\u094D\\u0930", /* phra */
2562 "\\u092C\\u094D\\u0930", /* bra */
2563 "\\u092D\\u094D\\u0930", /* bhra */
2564 "\\u092E\\u094D\\u0930", /* mra */
2565 "\\u0929\\u094D\\u0930", /* n\\u0331ra */
2566 //"\\u0934\\u094D\\u0930", /* l\\u0331ra */
2567 "\\u092F\\u094D\\u0930", /* yra */
2568 "\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */
2569 //"l-",
2570 "\\u0935\\u094D\\u0930", /* vra */
2571 "\\u0936\\u094D\\u0930", /* s\\u0301ra */
2572 "\\u0937\\u094D\\u0930", /* s\\u0323ra */
2573 "\\u0938\\u094D\\u0930", /* sra */
2574 "\\u0939\\u094d\\u092E", /* hma */
2575 "\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */
2576 "\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */
2577 "\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/
2578 "\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */
2579 "\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */
2580 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2581 "\\u0920\\u094D\\u092F", /* t\\u0323hya */
2582 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2583 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2584 // "hma", /* hma */
2585 "\\u0939\\u094D\\u092F", /* hya */
2586 "\\u0936\\u0943", /* s\\u0301r\\u0325a */
2587 "\\u0936\\u094D\\u091A", /* s\\u0301ca */
2588 "\\u090d", /* e\\u0306 */
2589 "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2590 "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2591 "\\u0906",
2592 "\\u0905",
2593 };
2594 UErrorCode status = U_ZERO_ERROR;
2595 UParseError parseError;
2596 UnicodeString message;
2597 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2598 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2599 if(U_FAILURE(status)){
2600 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2601 errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2602 return;
2603 }
2604 UnicodeString gotResult;
2605 for(int i= 0; i<MAX_LEN; i++){
2606 gotResult = source[i];
2607 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2608 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2609 }
2610 delete latinToDev;
2611 delete devToLatin;
2612 }
2613
2614 void TransliteratorTest::TestTeluguLatinRT(){
2615 const int MAX_LEN=10;
2616 const char* const source[MAX_LEN] = {
2617 "raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */
2618 "\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */
2619 "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */
2620 "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */
2621 "san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */
2622 "amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */
2623 "ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */
2624 "\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */
2625 "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */
2626 "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */
2627 };
2628
2629 const char* const expected[MAX_LEN] = {
2630 "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2631 "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2632 "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2633 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2634 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2635 "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2636 "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2637 "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2638 "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2639 "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2640 };
2641
2642 UErrorCode status = U_ZERO_ERROR;
2643 UParseError parseError;
2644 UnicodeString message;
2645 Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2646 Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2647 if(U_FAILURE(status)){
2648 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2649 errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2650 return;
2651 }
2652 UnicodeString gotResult;
2653 for(int i= 0; i<MAX_LEN; i++){
2654 gotResult = source[i];
2655 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2656 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2657 }
2658 delete latinToDev;
2659 delete devToLatin;
2660 }
2661
2662 void TransliteratorTest::TestSanskritLatinRT(){
2663 const int MAX_LEN =16;
2664 const char* const source[MAX_LEN] = {
2665 "rmk\\u1E63\\u0113t",
2666 "\\u015Br\\u012Bmad",
2667 "bhagavadg\\u012Bt\\u0101",
2668 "adhy\\u0101ya",
2669 "arjuna",
2670 "vi\\u1E63\\u0101da",
2671 "y\\u014Dga",
2672 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2673 "uv\\u0101cr\\u0325",
2674 "dharmak\\u1E63\\u0113tr\\u0113",
2675 "kuruk\\u1E63\\u0113tr\\u0113",
2676 "samav\\u0113t\\u0101",
2677 "yuyutsava\\u1E25",
2678 "m\\u0101mak\\u0101\\u1E25",
2679 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2680 "kimakurvata",
2681 "san\\u0304java",
2682 };
2683 const char* const expected[MAX_LEN] = {
2684 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2685 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2686 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2687 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2688 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2689 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2690 "\\u092f\\u094b\\u0917",
2691 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2692 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2693 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2694 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2695 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2696 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2697 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2698 //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2699 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2700 "\\u0938\\u0902\\u091c\\u0935",
2701 };
2702 UErrorCode status = U_ZERO_ERROR;
2703 UParseError parseError;
2704 UnicodeString message;
2705 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2706 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2707 if(U_FAILURE(status)){
2708 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2709 errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2710 return;
2711 }
2712 UnicodeString gotResult;
2713 for(int i= 0; i<MAX_LEN; i++){
2714 gotResult = source[i];
2715 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2716 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2717 }
2718 delete latinToDev;
2719 delete devToLatin;
2720 }
2721
2722
2723 void TransliteratorTest::TestCompoundLatinRT(){
2724 const char* const source[] = {
2725 "rmk\\u1E63\\u0113t",
2726 "\\u015Br\\u012Bmad",
2727 "bhagavadg\\u012Bt\\u0101",
2728 "adhy\\u0101ya",
2729 "arjuna",
2730 "vi\\u1E63\\u0101da",
2731 "y\\u014Dga",
2732 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2733 "uv\\u0101cr\\u0325",
2734 "dharmak\\u1E63\\u0113tr\\u0113",
2735 "kuruk\\u1E63\\u0113tr\\u0113",
2736 "samav\\u0113t\\u0101",
2737 "yuyutsava\\u1E25",
2738 "m\\u0101mak\\u0101\\u1E25",
2739 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2740 "kimakurvata",
2741 "san\\u0304java"
2742 };
2743 const int MAX_LEN = sizeof(source)/sizeof(source[0]);
2744 const char* const expected[MAX_LEN] = {
2745 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2746 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2747 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2748 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2749 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2750 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2751 "\\u092f\\u094b\\u0917",
2752 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2753 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2754 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2755 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2756 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2757 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2758 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2759 // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2760 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2761 "\\u0938\\u0902\\u091c\\u0935"
2762 };
2763 if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) {
2764 errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2765 return;
2766 }
2767
2768 UErrorCode status = U_ZERO_ERROR;
2769 UParseError parseError;
2770 UnicodeString message;
2771 Transliterator* devToLatinToDev =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2772 Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2773 Transliterator* devToTelToDev =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2774 Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2775
2776 if(U_FAILURE(status)){
2777 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2778 errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2779 return;
2780 }
2781 UnicodeString gotResult;
2782 for(int i= 0; i<MAX_LEN; i++){
2783 gotResult = source[i];
2784 expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2785 expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2786 expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2787
2788 }
2789 delete(latinToDevToLatin);
2790 delete(devToLatinToDev);
2791 delete(devToTelToDev);
2792 delete(latinToTelToLatin);
2793 }
2794
2795 /**
2796 * Test Gurmukhi-Devanagari Tippi and Bindi
2797 */
2798 void TransliteratorTest::TestGurmukhiDevanagari(){
2799 // the rule says:
2800 // (\u0902) (when preceded by vowel) ---> (\u0A02)
2801 // (\u0902) (when preceded by consonant) ---> (\u0A70)
2802 UErrorCode status = U_ZERO_ERROR;
2803 UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]").unescape(), status);
2804 UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]").unescape(), status);
2805 UParseError parseError;
2806
2807 UnicodeSetIterator vIter(vowel);
2808 UnicodeSetIterator nvIter(non_vowel);
2809 Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2810 if(U_FAILURE(status)) {
2811 errln("Error creating transliterator %s", u_errorName(status));
2812 delete trans;
2813 return;
2814 }
2815 UnicodeString src (" \\u0902");
2816 UnicodeString expected(" \\u0A02");
2817 src = src.unescape();
2818 expected= expected.unescape();
2819
2820 while(vIter.next()){
2821 src.setCharAt(0,(UChar) vIter.getCodepoint());
2822 expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2823 expect(*trans,src,expected);
2824 }
2825
2826 expected.setCharAt(1,0x0A70);
2827 while(nvIter.next()){
2828 //src.setCharAt(0,(char) nvIter.codepoint);
2829 src.setCharAt(0,(UChar)nvIter.getCodepoint());
2830 expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2831 expect(*trans,src,expected);
2832 }
2833 delete trans;
2834 }
2835 /**
2836 * Test instantiation from a locale.
2837 */
2838 void TransliteratorTest::TestLocaleInstantiation(void) {
2839 UParseError pe;
2840 UErrorCode ec = U_ZERO_ERROR;
2841 Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2842 if (U_FAILURE(ec)) {
2843 errln("FAIL: createInstance(ru_RU-Latin)");
2844 delete t;
2845 return;
2846 }
2847 expect(*t, CharsToUnicodeString("\\u0430"), "a");
2848 delete t;
2849
2850 t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2851 if (U_FAILURE(ec)) {
2852 errln("FAIL: createInstance(en-el)");
2853 delete t;
2854 return;
2855 }
2856 expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2857 delete t;
2858 }
2859
2860 /**
2861 * Test title case handling of accent (should ignore accents)
2862 */
2863 void TransliteratorTest::TestTitleAccents(void) {
2864 UParseError pe;
2865 UErrorCode ec = U_ZERO_ERROR;
2866 Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2867 if (U_FAILURE(ec)) {
2868 errln("FAIL: createInstance(Title)");
2869 delete t;
2870 return;
2871 }
2872 expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2873 delete t;
2874 }
2875
2876 /**
2877 * Basic test of a locale resource based rule.
2878 */
2879 void TransliteratorTest::TestLocaleResource() {
2880 const char* DATA[] = {
2881 // id from to
2882 //"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0",
2883 "Latin-el", "b", "\\u03bc\\u03c0",
2884 "Latin-Greek", "b", "\\u03B2",
2885 "Greek-Latin/UNGEGN", "\\u03B2", "v",
2886 "el-Latin", "\\u03B2", "v",
2887 "Greek-Latin", "\\u03B2", "b",
2888 };
2889 const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
2890 for (int32_t i=0; i<DATA_length; i+=3) {
2891 UParseError pe;
2892 UErrorCode ec = U_ZERO_ERROR;
2893 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2894 if (U_FAILURE(ec)) {
2895 errln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ")");
2896 delete t;
2897 continue;
2898 }
2899 expect(*t, CharsToUnicodeString(DATA[i+1]),
2900 CharsToUnicodeString(DATA[i+2]));
2901 delete t;
2902 }
2903 }
2904
2905 /**
2906 * Make sure parse errors reference the right line.
2907 */
2908 void TransliteratorTest::TestParseError() {
2909 const char* rule =
2910 "a > b;\n"
2911 "# more stuff\n"
2912 "d << b;";
2913 UErrorCode ec = U_ZERO_ERROR;
2914 UParseError pe;
2915 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2916 delete t;
2917 if (U_FAILURE(ec)) {
2918 UnicodeString err(pe.preContext);
2919 err.append((UChar)124/*|*/).append(pe.postContext);
2920 if (err.indexOf("d << b") >= 0) {
2921 logln("Ok: " + err);
2922 } else {
2923 errln("FAIL: " + err);
2924 }
2925 return;
2926 }
2927 errln("FAIL: no syntax error");
2928 }
2929
2930 /**
2931 * Make sure sets on output are disallowed.
2932 */
2933 void TransliteratorTest::TestOutputSet() {
2934 UnicodeString rule = "$set = [a-cm-n]; b > $set;";
2935 UErrorCode ec = U_ZERO_ERROR;
2936 UParseError pe;
2937 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2938 delete t;
2939 if (U_FAILURE(ec)) {
2940 UnicodeString err(pe.preContext);
2941 err.append((UChar)124/*|*/).append(pe.postContext);
2942 logln("Ok: " + err);
2943 return;
2944 }
2945 errln("FAIL: No syntax error");
2946 }
2947
2948 /**
2949 * Test the use variable range pragma, making sure that use of
2950 * variable range characters is detected and flagged as an error.
2951 */
2952 void TransliteratorTest::TestVariableRange() {
2953 UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
2954 UErrorCode ec = U_ZERO_ERROR;
2955 UParseError pe;
2956 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2957 delete t;
2958 if (U_FAILURE(ec)) {
2959 UnicodeString err(pe.preContext);
2960 err.append((UChar)124/*|*/).append(pe.postContext);
2961 logln("Ok: " + err);
2962 return;
2963 }
2964 errln("FAIL: No syntax error");
2965 }
2966
2967 /**
2968 * Test invalid post context error handling
2969 */
2970 void TransliteratorTest::TestInvalidPostContext() {
2971 UnicodeString rule = "a}b{c>d;";
2972 UErrorCode ec = U_ZERO_ERROR;
2973 UParseError pe;
2974 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2975 delete t;
2976 if (U_FAILURE(ec)) {
2977 UnicodeString err(pe.preContext);
2978 err.append((UChar)124/*|*/).append(pe.postContext);
2979 if (err.indexOf("a}b{c") >= 0) {
2980 logln("Ok: " + err);
2981 } else {
2982 errln("FAIL: " + err);
2983 }
2984 return;
2985 }
2986 errln("FAIL: No syntax error");
2987 }
2988
2989 /**
2990 * Test ID form variants
2991 */
2992 void TransliteratorTest::TestIDForms() {
2993 const char* DATA[] = {
2994 "NFC", NULL, "NFD",
2995 "nfd", NULL, "NFC", // make sure case is ignored
2996 "Any-NFKD", NULL, "Any-NFKC",
2997 "Null", NULL, "Null",
2998 "-nfkc", "nfkc", "NFKD",
2999 "-nfkc/", "nfkc", "NFKD",
3000 "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3001 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3002 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3003 "Source-", NULL, NULL,
3004 "Source/Variant-", NULL, NULL,
3005 "Source-/Variant", NULL, NULL,
3006 "/Variant", NULL, NULL,
3007 "/Variant-", NULL, NULL,
3008 "-/Variant", NULL, NULL,
3009 "-/", NULL, NULL,
3010 "-", NULL, NULL,
3011 "/", NULL, NULL,
3012 };
3013 const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
3014
3015 for (int32_t i=0; i<DATA_length; i+=3) {
3016 const char* ID = DATA[i];
3017 const char* expID = DATA[i+1];
3018 const char* expInvID = DATA[i+2];
3019 UBool expValid = (expInvID != NULL);
3020 if (expID == NULL) {
3021 expID = ID;
3022 }
3023 UParseError pe;
3024 UErrorCode ec = U_ZERO_ERROR;
3025 Transliterator *t =
3026 Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3027 if (U_FAILURE(ec)) {
3028 if (!expValid) {
3029 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3030 } else {
3031 errln((UnicodeString)"FAIL: Couldn't create " + ID);
3032 }
3033 delete t;
3034 continue;
3035 }
3036 Transliterator *u = t->createInverse(ec);
3037 if (U_FAILURE(ec)) {
3038 errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3039 delete t;
3040 delete u;
3041 continue;
3042 }
3043 if (t->getID() == expID &&
3044 u->getID() == expInvID) {
3045 logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3046 } else {
3047 errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3048 t->getID() + " x getInverse() => " + u->getID() +
3049 ", expected " + expInvID);
3050 }
3051 delete t;
3052 delete u;
3053 }
3054 }
3055
3056 static const UChar SPACE[] = {32,0};
3057 static const UChar NEWLINE[] = {10,0};
3058 static const UChar RETURN[] = {13,0};
3059 static const UChar EMPTY[] = {0};
3060
3061 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3062 const UnicodeString& testRulesForward) {
3063 UnicodeString rules2; t2.toRules(rules2, TRUE);
3064 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3065 rules2.findAndReplace(SPACE, EMPTY);
3066 rules2.findAndReplace(NEWLINE, EMPTY);
3067 rules2.findAndReplace(RETURN, EMPTY);
3068
3069 UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3070
3071 if (rules2 != testRules) {
3072 errln(label);
3073 logln((UnicodeString)"GENERATED RULES: " + rules2);
3074 logln((UnicodeString)"SHOULD BE: " + testRulesForward);
3075 }
3076 }
3077
3078 /**
3079 * Mark's toRules test.
3080 */
3081 void TransliteratorTest::TestToRulesMark() {
3082 const char* testRules =
3083 "::[[:Latin:][:Mark:]];"
3084 "::NFKD (NFC);"
3085 "::Lower (Lower);"
3086 "a <> \\u03B1;" // alpha
3087 "::NFKC (NFD);"
3088 "::Upper (Lower);"
3089 "::Lower ();"
3090 "::([[:Greek:][:Mark:]]);"
3091 ;
3092 const char* testRulesForward =
3093 "::[[:Latin:][:Mark:]];"
3094 "::NFKD(NFC);"
3095 "::Lower(Lower);"
3096 "a > \\u03B1;"
3097 "::NFKC(NFD);"
3098 "::Upper (Lower);"
3099 "::Lower ();"
3100 ;
3101 const char* testRulesBackward =
3102 "::[[:Greek:][:Mark:]];"
3103 "::Lower (Upper);"
3104 "::NFD(NFKC);"
3105 "\\u03B1 > a;"
3106 "::Lower(Lower);"
3107 "::NFC(NFKD);"
3108 ;
3109 UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3110 UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3111
3112 UParseError pe;
3113 UErrorCode ec = U_ZERO_ERROR;
3114 Transliterator *t2 = Transliterator::createFromRules("source-target", testRules, UTRANS_FORWARD, pe, ec);
3115 Transliterator *t3 = Transliterator::createFromRules("target-source", testRules, UTRANS_REVERSE, pe, ec);
3116
3117 if (U_FAILURE(ec)) {
3118 delete t2;
3119 delete t3;
3120 errln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3121 return;
3122 }
3123
3124 expect(*t2, source, target);
3125 expect(*t3, target, source);
3126
3127 checkRules("Failed toRules FORWARD", *t2, testRulesForward);
3128 checkRules("Failed toRules BACKWARD", *t3, testRulesBackward);
3129
3130 delete t2;
3131 delete t3;
3132 }
3133
3134 /**
3135 * Test Escape and Unescape transliterators.
3136 */
3137 void TransliteratorTest::TestEscape() {
3138 UParseError pe;
3139 UErrorCode ec;
3140 Transliterator *t;
3141
3142 ec = U_ZERO_ERROR;
3143 t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3144 if (U_FAILURE(ec)) {
3145 errln((UnicodeString)"FAIL: createInstance");
3146 } else {
3147 expect(*t,
3148 "\\x{40}\\U00000031&#x32;&#81;",
3149 "@12Q");
3150 }
3151 delete t;
3152
3153 ec = U_ZERO_ERROR;
3154 t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3155 if (U_FAILURE(ec)) {
3156 errln((UnicodeString)"FAIL: createInstance");
3157 } else {
3158 expect(*t,
3159 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3160 "\\u0041\\U0010BEEF\\uFEED");
3161 }
3162 delete t;
3163
3164 ec = U_ZERO_ERROR;
3165 t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3166 if (U_FAILURE(ec)) {
3167 errln((UnicodeString)"FAIL: createInstance");
3168 } else {
3169 expect(*t,
3170 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3171 "\\u0041\\uDBEF\\uDEEF\\uFEED");
3172 }
3173 delete t;
3174
3175 ec = U_ZERO_ERROR;
3176 t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3177 if (U_FAILURE(ec)) {
3178 errln((UnicodeString)"FAIL: createInstance");
3179 } else {
3180 expect(*t,
3181 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3182 "\\x{41}\\x{10BEEF}\\x{FEED}");
3183 }
3184 delete t;
3185 }
3186
3187
3188 void TransliteratorTest::TestAnchorMasking(){
3189 UnicodeString rule ("^a > Q; a > q;");
3190 UErrorCode status= U_ZERO_ERROR;
3191 UParseError parseError;
3192
3193 Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3194 if(U_FAILURE(status)){
3195 errln(UnicodeString("FAIL: ") + "ID" +
3196 ".createFromRules() => bad rules" +
3197 /*", parse error " + parseError.code +*/
3198 ", line " + parseError.line +
3199 ", offset " + parseError.offset +
3200 ", context " + prettify(parseError.preContext, TRUE) +
3201 ", rules: " + prettify(rule, TRUE));
3202 }
3203 delete t;
3204 }
3205
3206 /**
3207 * Make sure display names of variants look reasonable.
3208 */
3209 void TransliteratorTest::TestDisplayName() {
3210 #if UCONFIG_NO_FORMATTING
3211 logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3212 return;
3213 #else
3214 static const char* DATA[] = {
3215 // ID, forward name, reverse name
3216 // Update the text as necessary -- the important thing is
3217 // not the text itself, but how various cases are handled.
3218
3219 // Basic test
3220 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3221
3222 // Variants
3223 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3224
3225 // Target-only IDs
3226 "NFC", "Any to NFC", "Any to NFD",
3227 };
3228
3229 int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
3230
3231 Locale US("en", "US");
3232
3233 for (int32_t i=0; i<DATA_length; i+=3) {
3234 UnicodeString name;
3235 Transliterator::getDisplayName(DATA[i], US, name);
3236 if (name != DATA[i+1]) {
3237 errln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3238 name + ", expected " + DATA[i+1]);
3239 } else {
3240 logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3241 }
3242 UErrorCode ec = U_ZERO_ERROR;
3243 UParseError pe;
3244 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3245 if (U_FAILURE(ec)) {
3246 delete t;
3247 errln("FAIL: createInstance failed");
3248 continue;
3249 }
3250 name = Transliterator::getDisplayName(t->getID(), US, name);
3251 if (name != DATA[i+2]) {
3252 errln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3253 name + ", expected " + DATA[i+2]);
3254 } else {
3255 logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3256 }
3257 delete t;
3258 }
3259 #endif
3260 }
3261
3262 void TransliteratorTest::TestSpecialCases(void) {
3263 const UnicodeString registerRules[] = {
3264 "Any-Dev1", "x > X; y > Y;",
3265 "Any-Dev2", "XY > Z",
3266 "Greek-Latin/FAKE",
3267 CharsToUnicodeString
3268 ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3269 "" // END MARKER
3270 };
3271
3272 const UnicodeString testCases[] = {
3273 // NORMALIZATION
3274 // should add more test cases
3275 "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3276 "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3277 "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3278 "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3279
3280 // mp -> b BUG
3281 "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3282 "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3283
3284 // check for devanagari bug
3285 "nfd;Dev1;Dev2;nfc", "xy", "Z",
3286
3287 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3288 "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3289 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3290
3291 //TODO: enable this test once Titlecase works right
3292 /*
3293 "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3294 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3295 */
3296 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3297 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3298 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3299 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3300
3301 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3302 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3303
3304 // FORMS OF S
3305 "Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3306 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3307 "Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3308 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3309 "Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3310 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3311 "Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3312 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3313 // Tatiana bug
3314 // Upper: TAT\\u02B9\\u00C2NA
3315 // Lower: tat\\u02B9\\u00E2na
3316 // Title: Tat\\u02B9\\u00E2na
3317 "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3318 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3319 "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3320 CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3321 "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3322 CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3323
3324 "" // END MARKER
3325 };
3326
3327 UParseError pos;
3328 int32_t i;
3329 for (i = 0; registerRules[i].length()!=0; i+=2) {
3330 UErrorCode status = U_ZERO_ERROR;
3331
3332 Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3333 registerRules[i+1], UTRANS_FORWARD, pos, status);
3334 if (U_FAILURE(status)) {
3335 errln("Fails: Unable to create the transliterator from rules.");
3336 } else {
3337 Transliterator::registerInstance(t);
3338 }
3339 }
3340 for (i = 0; testCases[i].length()!=0; i+=3) {
3341 UErrorCode ec = U_ZERO_ERROR;
3342 UParseError pe;
3343 const UnicodeString& name = testCases[i];
3344 Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3345 if (U_FAILURE(ec)) {
3346 errln((UnicodeString)"FAIL: Couldn't create " + name);
3347 delete t;
3348 continue;
3349 }
3350 const UnicodeString& id = t->getID();
3351 const UnicodeString& source = testCases[i+1];
3352 UnicodeString target;
3353
3354 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3355
3356 if (testCases[i+2].length() > 0) {
3357 target = testCases[i+2];
3358 } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3359 Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3360 } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3361 Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3362 } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3363 Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3364 } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3365 Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3366 } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3367 target = source;
3368 target.toLower(Locale::getUS());
3369 } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3370 target = source;
3371 target.toUpper(Locale::getUS());
3372 }
3373 if (U_FAILURE(ec)) {
3374 errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3375 continue;
3376 }
3377
3378 expect(*t, source, target);
3379 delete t;
3380 }
3381 for (i = 0; registerRules[i].length()!=0; i+=2) {
3382 Transliterator::unregister(registerRules[i]);
3383 }
3384 }
3385
3386 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3387 if (ch <= 0xFFFF) {
3388 sprintf(buffer, "\\u%04x", (int)ch);
3389 } else {
3390 sprintf(buffer, "\\U%08x", (int)ch);
3391 }
3392 return buffer;
3393 }
3394
3395 void TransliteratorTest::TestSurrogateCasing (void) {
3396 // check that casing handles surrogates
3397 // titlecase is currently defective
3398 char buffer[20];
3399 UChar buffer2[20];
3400 UChar32 dee;
3401 UTF_GET_CHAR(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3402 UnicodeString DEE(u_totitle(dee));
3403 if (DEE != DESERET_DEE) {
3404 err("Fails titlecase of surrogates");
3405 err(Char32ToEscapedChars(dee, buffer));
3406 err(", ");
3407 errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3408 }
3409
3410 UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3411 UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3412 UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3413 UErrorCode status= U_ZERO_ERROR;
3414
3415 u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3416 if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3417 errln("Fails: Can't uppercase surrogates.");
3418 }
3419
3420 status= U_ZERO_ERROR;
3421 u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3422 if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3423 errln("Fails: Can't lowercase surrogates.");
3424 }
3425 }
3426
3427 static void _trans(Transliterator& t, const UnicodeString& src,
3428 UnicodeString& result) {
3429 result = src;
3430 t.transliterate(result);
3431 }
3432
3433 static void _trans(const UnicodeString& id, const UnicodeString& src,
3434 UnicodeString& result, UErrorCode ec) {
3435 UParseError pe;
3436 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3437 if (U_SUCCESS(ec)) {
3438 _trans(*t, src, result);
3439 }
3440 delete t;
3441 }
3442
3443 static UnicodeString _findMatch(const UnicodeString& source,
3444 const UnicodeString* pairs) {
3445 UnicodeString empty;
3446 for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3447 if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3448 return pairs[i+1];
3449 }
3450 }
3451 return empty;
3452 }
3453
3454 // Check to see that incremental gets at least part way through a reasonable string.
3455
3456 void TransliteratorTest::TestIncrementalProgress(void) {
3457 UErrorCode ec = U_ZERO_ERROR;
3458 UnicodeString latinTest = "The Quick Brown Fox.";
3459 UnicodeString devaTest;
3460 _trans("Latin-Devanagari", latinTest, devaTest, ec);
3461 UnicodeString kataTest;
3462 _trans("Latin-Katakana", latinTest, kataTest, ec);
3463 if (U_FAILURE(ec)) {
3464 errln("FAIL: Internal error");
3465 return;
3466 }
3467 const UnicodeString tests[] = {
3468 "Any", latinTest,
3469 "Latin", latinTest,
3470 "Halfwidth", latinTest,
3471 "Devanagari", devaTest,
3472 "Katakana", kataTest,
3473 "" // END MARKER
3474 };
3475
3476 UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3477 int32_t i = 0, j=0, k=0;
3478 int32_t sources = Transliterator::countAvailableSources();
3479 for (i = 0; i < sources; i++) {
3480 UnicodeString source;
3481 Transliterator::getAvailableSource(i, source);
3482 UnicodeString test = _findMatch(source, tests);
3483 if (test.length() == 0) {
3484 logln((UnicodeString)"Skipping " + source + "-X");
3485 continue;
3486 }
3487 int32_t targets = Transliterator::countAvailableTargets(source);
3488 for (j = 0; j < targets; j++) {
3489 UnicodeString target;
3490 Transliterator::getAvailableTarget(j, source, target);
3491 int32_t variants = Transliterator::countAvailableVariants(source, target);
3492 for (k =0; k< variants; k++) {
3493 UnicodeString variant;
3494 UParseError err;
3495 UErrorCode status = U_ZERO_ERROR;
3496
3497 Transliterator::getAvailableVariant(k, source, target, variant);
3498 UnicodeString id = source + "-" + target + "/" + variant;
3499
3500 if(id.indexOf("Thai")>-1 && !isICUVersionAtLeast(ICU_37)){
3501 /* The Thai-Latin transliterator doesn't exist in ICU4C yet */
3502 continue;
3503 }
3504 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3505 if (U_FAILURE(status)) {
3506 errln((UnicodeString)"FAIL: Could not create " + id);
3507 delete t;
3508 continue;
3509 }
3510 status = U_ZERO_ERROR;
3511 CheckIncrementalAux(t, test);
3512
3513 UnicodeString rev;
3514 _trans(*t, test, rev);
3515 Transliterator *inv = t->createInverse(status);
3516 if (U_FAILURE(status)) {
3517 errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3518 delete t;
3519 delete inv;
3520 continue;
3521 }
3522 CheckIncrementalAux(inv, rev);
3523 delete t;
3524 delete inv;
3525 }
3526 }
3527 }
3528 }
3529
3530 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3531 const UnicodeString& input) {
3532 UErrorCode ec = U_ZERO_ERROR;
3533 UTransPosition pos;
3534 UnicodeString test = input;
3535
3536 pos.contextStart = 0;
3537 pos.contextLimit = input.length();
3538 pos.start = 0;
3539 pos.limit = input.length();
3540
3541 t->transliterate(test, pos, ec);
3542 if (U_FAILURE(ec)) {
3543 errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3544 return;
3545 }
3546 UBool gotError = FALSE;
3547
3548 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3549
3550 if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3551 errln((UnicodeString)"No Progress, " +
3552 t->getID() + ": " + formatInput(test, input, pos));
3553 gotError = TRUE;
3554 } else {
3555 logln((UnicodeString)"PASS Progress, " +
3556 t->getID() + ": " + formatInput(test, input, pos));
3557 }
3558 t->finishTransliteration(test, pos);
3559 if (pos.start != pos.limit) {
3560 errln((UnicodeString)"Incomplete, " +
3561 t->getID() + ": " + formatInput(test, input, pos));
3562 gotError = TRUE;
3563 }
3564 }
3565
3566 void TransliteratorTest::TestFunction() {
3567 // Careful with spacing and ';' here: Phrase this exactly
3568 // as toRules() is going to return it. If toRules() changes
3569 // with regard to spacing or ';', then adjust this string.
3570 UnicodeString rule =
3571 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3572
3573 UParseError pe;
3574 UErrorCode ec = U_ZERO_ERROR;
3575 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3576 if (t == NULL) {
3577 errln("FAIL: createFromRules failed");
3578 return;
3579 }
3580
3581 UnicodeString r;
3582 t->toRules(r, TRUE);
3583 if (r == rule) {
3584 logln((UnicodeString)"OK: toRules() => " + r);
3585 } else {
3586 errln((UnicodeString)"FAIL: toRules() => " + r +
3587 ", expected " + rule);
3588 }
3589
3590 expect(*t, "The Quick Brown Fox",
3591 "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox");
3592
3593 delete t;
3594 }
3595
3596 void TransliteratorTest::TestInvalidBackRef(void) {
3597 UnicodeString rule = ". > $1;";
3598 UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3599 UParseError pe;
3600 UErrorCode ec = U_ZERO_ERROR;
3601 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3602 Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3603
3604 if (t != NULL) {
3605 errln("FAIL: createFromRules should have returned NULL");
3606 delete t;
3607 }
3608
3609 if (t2 != NULL) {
3610 errln("FAIL: createFromRules should have returned NULL");
3611 delete t2;
3612 }
3613
3614 if (U_SUCCESS(ec)) {
3615 errln("FAIL: Ok: . > $1; => no error");
3616 } else {
3617 logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3618 }
3619 }
3620
3621 void TransliteratorTest::TestMulticharStringSet() {
3622 // Basic testing
3623 const char* rule =
3624 " [{aa}] > x;"
3625 " a > y;"
3626 " [b{bc}] > z;"
3627 "[{gd}] { e > q;"
3628 " e } [{fg}] > r;" ;
3629
3630 UParseError pe;
3631 UErrorCode ec = U_ZERO_ERROR;
3632 Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3633 if (t == NULL || U_FAILURE(ec)) {
3634 delete t;
3635 errln("FAIL: createFromRules failed");
3636 return;
3637 }
3638
3639 expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3640 "y x yz z d gd de gdq gdqfg ddrfg");
3641 delete t;
3642
3643 // Overlapped string test. Make sure that when multiple
3644 // strings can match that the longest one is matched.
3645 rule =
3646 " [a {ab} {abc}] > x;"
3647 " b > y;"
3648 " c > z;"
3649 " q [t {st} {rst}] { e > p;" ;
3650
3651 t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3652 if (t == NULL || U_FAILURE(ec)) {
3653 delete t;
3654 errln("FAIL: createFromRules failed");
3655 return;
3656 }
3657
3658 expect(*t, "a ab abc qte qste qrste",
3659 "x x x qtp qstp qrstp");
3660 delete t;
3661 }
3662
3663 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3664 // BEGIN TestUserFunction support factory
3665
3666 Transliterator* _TUFF[4];
3667 UnicodeString* _TUFID[4];
3668
3669 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3670 Transliterator::Token context) {
3671 return _TUFF[context.integer]->clone();
3672 }
3673
3674 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3675 _TUFF[n] = t;
3676 _TUFID[n] = new UnicodeString(ID);
3677 Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3678 }
3679
3680 static void _TUFUnreg(int32_t n) {
3681 if (_TUFF[n] != NULL) {
3682 Transliterator::unregister(*_TUFID[n]);
3683 delete _TUFF[n];
3684 delete _TUFID[n];
3685 }
3686 }
3687
3688 // END TestUserFunction support factory
3689 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3690
3691 /**
3692 * Test that user-registered transliterators can be used under function
3693 * syntax.
3694 */
3695 void TransliteratorTest::TestUserFunction() {
3696
3697 Transliterator* t;
3698 UParseError pe;
3699 UErrorCode ec = U_ZERO_ERROR;
3700
3701 // Setup our factory
3702 int32_t i;
3703 for (i=0; i<4; ++i) {
3704 _TUFF[i] = NULL;
3705 }
3706
3707 // There's no need to register inverses if we don't use them
3708 t = Transliterator::createFromRules("gif",
3709 "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",
3710 UTRANS_FORWARD, pe, ec);
3711 if (t == NULL || U_FAILURE(ec)) {
3712 errln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3713 return;
3714 }
3715 _TUFReg("Any-gif", t, 0);
3716
3717 t = Transliterator::createFromRules("RemoveCurly",
3718 "[\\{\\}] > ; '\\N' > ;",
3719 UTRANS_FORWARD, pe, ec);
3720 if (t == NULL || U_FAILURE(ec)) {
3721 errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3722 goto FAIL;
3723 }
3724 expect(*t, "\\N{name}", "name");
3725 _TUFReg("Any-RemoveCurly", t, 1);
3726
3727 logln("Trying &hex");
3728 t = Transliterator::createFromRules("hex2",
3729 "(.) > &hex($1);",
3730 UTRANS_FORWARD, pe, ec);
3731 if (t == NULL || U_FAILURE(ec)) {
3732 errln("FAIL: createFromRules");
3733 goto FAIL;
3734 }
3735 logln("Registering");
3736 _TUFReg("Any-hex2", t, 2);
3737 t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3738 if (t == NULL || U_FAILURE(ec)) {
3739 errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3740 goto FAIL;
3741 }
3742 expect(*t, "abc", "\\u0061\\u0062\\u0063");
3743 delete t;
3744
3745 logln("Trying &gif");
3746 t = Transliterator::createFromRules("gif2",
3747 "(.) > &Gif(&Hex2($1));",
3748 UTRANS_FORWARD, pe, ec);
3749 if (t == NULL || U_FAILURE(ec)) {
3750 errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3751 goto FAIL;
3752 }
3753 logln("Registering");
3754 _TUFReg("Any-gif2", t, 3);
3755 t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3756 if (t == NULL || U_FAILURE(ec)) {
3757 errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3758 goto FAIL;
3759 }
3760 expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3761 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3762 delete t;
3763
3764 // Test that filters are allowed after &
3765 t = Transliterator::createFromRules("test",
3766 "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3767 UTRANS_FORWARD, pe, ec);
3768 if (t == NULL || U_FAILURE(ec)) {
3769 errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3770 goto FAIL;
3771 }
3772 expect(*t, "abc",
3773 "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");
3774 delete t;
3775
3776 FAIL:
3777 for (i=0; i<4; ++i) {
3778 _TUFUnreg(i);
3779 }
3780 }
3781
3782 /**
3783 * Test the Any-X transliterators.
3784 */
3785 void TransliteratorTest::TestAnyX(void) {
3786 UParseError parseError;
3787 UErrorCode status = U_ZERO_ERROR;
3788 Transliterator* anyLatin =
3789 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3790 if (anyLatin==0) {
3791 errln("FAIL: createInstance returned NULL");
3792 delete anyLatin;
3793 return;
3794 }
3795
3796 expect(*anyLatin,
3797 CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3798 CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3799
3800 delete anyLatin;
3801 }
3802
3803 /**
3804 * Test the source and target set API. These are only implemented
3805 * for RBT and CompoundTransliterator at this time.
3806 */
3807 void TransliteratorTest::TestSourceTargetSet() {
3808 UErrorCode ec = U_ZERO_ERROR;
3809
3810 // Rules
3811 const char* r =
3812 "a > b; "
3813 "r [x{lu}] > q;";
3814
3815 // Expected source
3816 UnicodeSet expSrc("[arx{lu}]", ec);
3817
3818 // Expected target
3819 UnicodeSet expTrg("[bq]", ec);
3820
3821 UParseError pe;
3822 Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3823
3824 if (U_FAILURE(ec)) {
3825 delete t;
3826 errln("FAIL: Couldn't set up test");
3827 return;
3828 }
3829
3830 UnicodeSet src; t->getSourceSet(src);
3831 UnicodeSet trg; t->getTargetSet(trg);
3832
3833 if (src == expSrc && trg == expTrg) {
3834 UnicodeString a, b;
3835 logln((UnicodeString)"Ok: " +
3836 r + " => source = " + src.toPattern(a, TRUE) +
3837 ", target = " + trg.toPattern(b, TRUE));
3838 } else {
3839 UnicodeString a, b, c, d;
3840 errln((UnicodeString)"FAIL: " +
3841 r + " => source = " + src.toPattern(a, TRUE) +
3842 ", expected " + expSrc.toPattern(b, TRUE) +
3843 "; target = " + trg.toPattern(c, TRUE) +
3844 ", expected " + expTrg.toPattern(d, TRUE));
3845 }
3846
3847 delete t;
3848 }
3849
3850 /**
3851 * Test handling of rule whitespace, for both RBT and UnicodeSet.
3852 */
3853 void TransliteratorTest::TestRuleWhitespace() {
3854 // Rules
3855 const char* r = "a > \\u200E b;";
3856
3857 UErrorCode ec = U_ZERO_ERROR;
3858 UParseError pe;
3859 Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
3860
3861 if (U_FAILURE(ec)) {
3862 errln("FAIL: Couldn't set up test");
3863 } else {
3864 expect(*t, "a", "b");
3865 }
3866 delete t;
3867
3868 // UnicodeSet
3869 ec = U_ZERO_ERROR;
3870 UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
3871
3872 if (U_FAILURE(ec)) {
3873 errln("FAIL: Couldn't set up test");
3874 } else {
3875 if (set.contains(0x200E)) {
3876 errln("FAIL: U+200E not being ignored by UnicodeSet");
3877 }
3878 }
3879 }
3880 //======================================================================
3881 // this method is in TestUScript.java
3882 //======================================================================
3883 void TransliteratorTest::TestAllCodepoints(){
3884 UScriptCode code= USCRIPT_INVALID_CODE;
3885 char id[256]={'\0'};
3886 char abbr[256]={'\0'};
3887 char newId[256]={'\0'};
3888 char newAbbrId[256]={'\0'};
3889 char oldId[256]={'\0'};
3890 char oldAbbrId[256]={'\0'};
3891
3892 UErrorCode status =U_ZERO_ERROR;
3893 UParseError pe;
3894
3895 for(uint32_t i = 0; i<=0x10ffff; i++){
3896 code = uscript_getScript(i,&status);
3897 if(code == USCRIPT_INVALID_CODE){
3898 errln("uscript_getScript for codepoint \\U%08X failed.\n", i);
3899 }
3900 const char* myId = uscript_getName(code);
3901 if(!myId) {
3902 errln("Valid script code returned NULL name. Check your data!");
3903 return;
3904 }
3905 uprv_strcpy(id,myId);
3906 uprv_strcpy(abbr,uscript_getShortName(code));
3907
3908 uprv_strcpy(newId,"[:");
3909 uprv_strcat(newId,id);
3910 uprv_strcat(newId,":];NFD");
3911
3912 uprv_strcpy(newAbbrId,"[:");
3913 uprv_strcat(newAbbrId,abbr);
3914 uprv_strcat(newAbbrId,":];NFD");
3915
3916 if(uprv_strcmp(newId,oldId)!=0){
3917 Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
3918 if(t==NULL || U_FAILURE(status)){
3919 errln((UnicodeString)"FAIL: Could not create " + id);
3920 }
3921 delete t;
3922 }
3923 if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
3924 Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
3925 if(t==NULL || U_FAILURE(status)){
3926 errln((UnicodeString)"FAIL: Could not create " + id);
3927 }
3928 delete t;
3929 }
3930 uprv_strcpy(oldId,newId);
3931 uprv_strcpy(oldAbbrId, newAbbrId);
3932
3933 }
3934
3935 }
3936
3937 #define TEST_TRANSLIT_ID(id, cls) { \
3938 UErrorCode ec = U_ZERO_ERROR; \
3939 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
3940 if (U_FAILURE(ec)) { \
3941 errln("FAIL: Couldn't create " id); \
3942 } else { \
3943 if (t->getDynamicClassID() != cls::getStaticClassID()) { \
3944 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
3945 } \
3946 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
3947 } \
3948 delete t; \
3949 }
3950
3951 #define TEST_TRANSLIT_RULE(rule, cls) { \
3952 UErrorCode ec = U_ZERO_ERROR; \
3953 UParseError pe; \
3954 Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
3955 if (U_FAILURE(ec)) { \
3956 errln("FAIL: Couldn't create " rule); \
3957 } else { \
3958 if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
3959 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
3960 } \
3961 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
3962 } \
3963 delete t; \
3964 }
3965
3966 void TransliteratorTest::TestBoilerplate() {
3967 TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
3968 TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
3969 TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
3970 TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
3971 TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
3972 TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
3973 TEST_TRANSLIT_ID("Null", NullTransliterator);
3974 TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
3975 TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
3976 TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
3977 TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
3978 TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
3979 TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
3980 }
3981
3982 void TransliteratorTest::TestAlternateSyntax() {
3983 // U+2206 == &
3984 // U+2190 == <
3985 // U+2192 == >
3986 // U+2194 == <>
3987 expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
3988 "abc",
3989 "xbz");
3990 expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
3991 CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
3992 "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}");
3993 }
3994
3995 static const char* BEGIN_END_RULES[] = {
3996 // [0]
3997 "abc > xy;"
3998 "aba > z;",
3999
4000 // [1]
4001 /*
4002 "::BEGIN;"
4003 "abc > xy;"
4004 "::END;"
4005 "::BEGIN;"
4006 "aba > z;"
4007 "::END;",
4008 */
4009 "", // test case commented out below, this is here to keep from messing up the indexes
4010
4011 // [2]
4012 /*
4013 "abc > xy;"
4014 "::BEGIN;"
4015 "aba > z;"
4016 "::END;",
4017 */
4018 "", // test case commented out below, this is here to keep from messing up the indexes
4019
4020 // [3]
4021 /*
4022 "::BEGIN;"
4023 "abc > xy;"
4024 "::END;"
4025 "aba > z;",
4026 */
4027 "", // test case commented out below, this is here to keep from messing up the indexes
4028
4029 // [4]
4030 "abc > xy;"
4031 "::Null;"
4032 "aba > z;",
4033
4034 // [5]
4035 "::Upper;"
4036 "ABC > xy;"
4037 "AB > x;"
4038 "C > z;"
4039 "::Upper;"
4040 "XYZ > p;"
4041 "XY > q;"
4042 "Z > r;"
4043 "::Upper;",
4044
4045 // [6]
4046 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4047 "$delim = [\\-$ws];"
4048 "$ws $delim* > ' ';"
4049 "'-' $delim* > '-';",
4050
4051 // [7]
4052 "::Null;"
4053 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4054 "$delim = [\\-$ws];"
4055 "$ws $delim* > ' ';"
4056 "'-' $delim* > '-';",
4057
4058 // [8]
4059 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4060 "$delim = [\\-$ws];"
4061 "$ws $delim* > ' ';"
4062 "'-' $delim* > '-';"
4063 "::Null;",
4064
4065 // [9]
4066 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4067 "$delim = [\\-$ws];"
4068 "::Null;"
4069 "$ws $delim* > ' ';"
4070 "'-' $delim* > '-';",
4071
4072 // [10]
4073 /*
4074 "::BEGIN;"
4075 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4076 "$delim = [\\-$ws];"
4077 "::END;"
4078 "$ws $delim* > ' ';"
4079 "'-' $delim* > '-';",
4080 */
4081 "", // test case commented out below, this is here to keep from messing up the indexes
4082
4083 // [11]
4084 /*
4085 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4086 "$delim = [\\-$ws];"
4087 "::BEGIN;"
4088 "$ws $delim* > ' ';"
4089 "'-' $delim* > '-';"
4090 "::END;",
4091 */
4092 "", // test case commented out below, this is here to keep from messing up the indexes
4093
4094 // [12]
4095 /*
4096 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4097 "$delim = [\\-$ws];"
4098 "$ab = [ab];"
4099 "::BEGIN;"
4100 "$ws $delim* > ' ';"
4101 "'-' $delim* > '-';"
4102 "::END;"
4103 "::BEGIN;"
4104 "$ab { ' ' } $ab > '-';"
4105 "c { ' ' > ;"
4106 "::END;"
4107 "::BEGIN;"
4108 "'a-a' > a\\%|a;"
4109 "::END;",
4110 */
4111 "", // test case commented out below, this is here to keep from messing up the indexes
4112
4113 // [13]
4114 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4115 "$delim = [\\-$ws];"
4116 "$ab = [ab];"
4117 "::Null;"
4118 "$ws $delim* > ' ';"
4119 "'-' $delim* > '-';"
4120 "::Null;"
4121 "$ab { ' ' } $ab > '-';"
4122 "c { ' ' > ;"
4123 "::Null;"
4124 "'a-a' > a\\%|a;",
4125
4126 // [14]
4127 /*
4128 "::[abc];"
4129 "::BEGIN;"
4130 "abc > xy;"
4131 "::END;"
4132 "::BEGIN;"
4133 "aba > yz;"
4134 "::END;"
4135 "::Upper;",
4136 */
4137 "", // test case commented out below, this is here to keep from messing up the indexes
4138
4139 // [15]
4140 "::[abc];"
4141 "abc > xy;"
4142 "::Null;"
4143 "aba > yz;"
4144 "::Upper;",
4145
4146 // [16]
4147 /*
4148 "::[abc];"
4149 "::BEGIN;"
4150 "abc <> xy;"
4151 "::END;"
4152 "::BEGIN;"
4153 "aba <> yz;"
4154 "::END;"
4155 "::Upper(Lower);"
4156 "::([XYZ]);"
4157 */
4158 "", // test case commented out below, this is here to keep from messing up the indexes
4159
4160 // [17]
4161 "::[abc];"
4162 "abc <> xy;"
4163 "::Null;"
4164 "aba <> yz;"
4165 "::Upper(Lower);"
4166 "::([XYZ]);"
4167 };
4168 static const int32_t BEGIN_END_RULES_length = (int32_t)(sizeof(BEGIN_END_RULES) / sizeof(BEGIN_END_RULES[0]));
4169
4170 /*
4171 (This entire test is commented out below and will need some heavy revision when we re-add
4172 the ::BEGIN/::END stuff)
4173 static const char* BOGUS_BEGIN_END_RULES[] = {
4174 // [7]
4175 "::BEGIN;"
4176 "abc > xy;"
4177 "::BEGIN;"
4178 "aba > z;"
4179 "::END;"
4180 "::END;",
4181
4182 // [8]
4183 "abc > xy;"
4184 " aba > z;"
4185 "::END;",
4186
4187 // [9]
4188 "::BEGIN;"
4189 "::Upper;"
4190 "::END;"
4191 };
4192 static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
4193 */
4194
4195 static const char* BEGIN_END_TEST_CASES[] = {
4196 // rules input expected output
4197 BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z",
4198 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
4199 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
4200 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
4201 BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z",
4202 BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR",
4203
4204 BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e",
4205 BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e",
4206 BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e",
4207 BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e",
4208 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e",
4209 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e",
4210 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e",
4211 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a",
4212 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
4213 BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e",
4214 BEGIN_END_RULES[13], "a a a a", "a%a%a%a",
4215 BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a",
4216
4217 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4218 BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4219 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4220 BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4221 };
4222 static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0]));
4223
4224 void TransliteratorTest::TestBeginEnd() {
4225 // run through the list of test cases above
4226 int32_t i = 0;
4227 for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4228 expect((UnicodeString)"Test case #" + (i / 3),
4229 UnicodeString(BEGIN_END_TEST_CASES[i]),
4230 UnicodeString(BEGIN_END_TEST_CASES[i + 1]),
4231 UnicodeString(BEGIN_END_TEST_CASES[i + 2]));
4232 }
4233
4234 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4235 UParseError parseError;
4236 UErrorCode status = U_ZERO_ERROR;
4237 Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4238 UTRANS_REVERSE, parseError, status);
4239 if (reversed == 0 || U_FAILURE(status)) {
4240 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4241 } else {
4242 expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4243 }
4244 delete reversed;
4245
4246 // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4247 // that all of them cause errors
4248 /*
4249 (commented out until we have the real ::BEGIN/::END stuff in place
4250 for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4251 UParseError parseError;
4252 UErrorCode status = U_ZERO_ERROR;
4253 Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4254 UTRANS_FORWARD, parseError, status);
4255 if (!U_FAILURE(status)) {
4256 delete t;
4257 errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4258 }
4259 }
4260 */
4261 }
4262
4263 void TransliteratorTest::TestBeginEndToRules() {
4264 // run through the same list of test cases we used above, but this time, instead of just
4265 // instantiating a Transliterator from the rules and running the test against it, we instantiate
4266 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4267 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4268 // to (i.e., does the same thing as) the original rule set
4269 for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4270 UParseError parseError;
4271 UErrorCode status = U_ZERO_ERROR;
4272 Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i]),
4273 UTRANS_FORWARD, parseError, status);
4274 if (U_FAILURE(status)) {
4275 reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4276 } else {
4277 UnicodeString rules;
4278 t->toRules(rules, TRUE);
4279 Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4280 UTRANS_FORWARD, parseError, status);
4281 if (U_FAILURE(status)) {
4282 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4283 parseError, status);
4284 delete t;
4285 } else {
4286 expect(*t2,
4287 UnicodeString(BEGIN_END_TEST_CASES[i + 1]),
4288 UnicodeString(BEGIN_END_TEST_CASES[i + 2]));
4289 delete t;
4290 delete t2;
4291 }
4292 }
4293 }
4294
4295 // do the same thing for the reversible test case
4296 UParseError parseError;
4297 UErrorCode status = U_ZERO_ERROR;
4298 Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4299 UTRANS_REVERSE, parseError, status);
4300 if (U_FAILURE(status)) {
4301 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4302 } else {
4303 UnicodeString rules;
4304 reversed->toRules(rules, FALSE);
4305 Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4306 parseError, status);
4307 if (U_FAILURE(status)) {
4308 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4309 parseError, status);
4310 delete reversed;
4311 } else {
4312 expect(*reversed2,
4313 UnicodeString("xy XY XYZ yz YZ"),
4314 UnicodeString("xy abc xaba yz aba"));
4315 delete reversed;
4316 delete reversed2;
4317 }
4318 }
4319 }
4320
4321 void TransliteratorTest::TestRegisterAlias() {
4322 UnicodeString longID("Lower;[aeiou]Upper");
4323 UnicodeString shortID("Any-CapVowels");
4324 UnicodeString reallyShortID("CapVowels");
4325
4326 Transliterator::registerAlias(shortID, longID);
4327
4328 UErrorCode err = U_ZERO_ERROR;
4329 Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4330 if (U_FAILURE(err)) {
4331 errln("Failed to instantiate transliterator with long ID");
4332 Transliterator::unregister(shortID);
4333 return;
4334 }
4335 Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4336 if (U_FAILURE(err)) {
4337 errln("Failed to instantiate transliterator with short ID");
4338 delete t1;
4339 Transliterator::unregister(shortID);
4340 return;
4341 }
4342
4343 if (t1->getID() != longID)
4344 errln("Transliterator instantiated with long ID doesn't have long ID");
4345 if (t2->getID() != reallyShortID)
4346 errln("Transliterator instantiated with short ID doesn't have short ID");
4347
4348 UnicodeString rules1;
4349 UnicodeString rules2;
4350
4351 t1->toRules(rules1, TRUE);
4352 t2->toRules(rules2, TRUE);
4353 if (rules1 != rules2)
4354 errln("Alias transliterators aren't the same");
4355
4356 delete t1;
4357 delete t2;
4358 Transliterator::unregister(shortID);
4359
4360 t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4361 if (U_SUCCESS(err)) {
4362 errln("Instantiation with short ID succeeded after short ID was unregistered");
4363 delete t1;
4364 }
4365
4366 // try the same thing again, but this time with something other than
4367 // an instance of CompoundTransliterator
4368 UnicodeString realID("Latin-Greek");
4369 UnicodeString fakeID("Latin-dlgkjdflkjdl");
4370 Transliterator::registerAlias(fakeID, realID);
4371
4372 err = U_ZERO_ERROR;
4373 t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4374 if (U_FAILURE(err)) {
4375 errln("Failed to instantiate transliterator with real ID");
4376 Transliterator::unregister(realID);
4377 return;
4378 }
4379 t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4380 if (U_FAILURE(err)) {
4381 errln("Failed to instantiate transliterator with fake ID");
4382 delete t1;
4383 Transliterator::unregister(realID);
4384 return;
4385 }
4386
4387 t1->toRules(rules1, TRUE);
4388 t2->toRules(rules2, TRUE);
4389 if (rules1 != rules2)
4390 errln("Alias transliterators aren't the same");
4391
4392 delete t1;
4393 delete t2;
4394 Transliterator::unregister(fakeID);
4395 }
4396
4397 //======================================================================
4398 // Support methods
4399 //======================================================================
4400 void TransliteratorTest::expectT(const UnicodeString& id,
4401 const UnicodeString& source,
4402 const UnicodeString& expectedResult) {
4403 UErrorCode ec = U_ZERO_ERROR;
4404 UParseError pe;
4405 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4406 if (U_FAILURE(ec)) {
4407 errln((UnicodeString)"FAIL: Could not create " + id);
4408 delete t;
4409 return;
4410 }
4411 expect(*t, source, expectedResult);
4412 delete t;
4413 }
4414
4415 void TransliteratorTest::reportParseError(const UnicodeString& message,
4416 const UParseError& parseError,
4417 const UErrorCode& status) {
4418 errln(message +
4419 /*", parse error " + parseError.code +*/
4420 ", line " + parseError.line +
4421 ", offset " + parseError.offset +
4422 ", pre-context " + prettify(parseError.preContext, TRUE) +
4423 ", post-context " + prettify(parseError.postContext,TRUE) +
4424 ", Error: " + u_errorName(status));
4425 }
4426
4427 void TransliteratorTest::expect(const UnicodeString& rules,
4428 const UnicodeString& source,
4429 const UnicodeString& expectedResult,
4430 UTransPosition *pos) {
4431 expect("<ID>", rules, source, expectedResult, pos);
4432 }
4433
4434 void TransliteratorTest::expect(const UnicodeString& id,
4435 const UnicodeString& rules,
4436 const UnicodeString& source,
4437 const UnicodeString& expectedResult,
4438 UTransPosition *pos) {
4439 UErrorCode status = U_ZERO_ERROR;
4440 UParseError parseError;
4441 Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
4442 if (U_FAILURE(status)) {
4443 reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
4444 } else {
4445 expect(*t, source, expectedResult, pos);
4446 }
4447 delete t;
4448 }
4449
4450 void TransliteratorTest::expect(const Transliterator& t,
4451 const UnicodeString& source,
4452 const UnicodeString& expectedResult,
4453 const Transliterator& reverseTransliterator) {
4454 expect(t, source, expectedResult);
4455 expect(reverseTransliterator, expectedResult, source);
4456 }
4457
4458 void TransliteratorTest::expect(const Transliterator& t,
4459 const UnicodeString& source,
4460 const UnicodeString& expectedResult,
4461 UTransPosition *pos) {
4462 if (pos == 0) {
4463 UnicodeString result(source);
4464 t.transliterate(result);
4465 expectAux(t.getID() + ":String", source, result, expectedResult);
4466 }
4467 UTransPosition index={0, 0, 0, 0};
4468 if (pos != 0) {
4469 index = *pos;
4470 }
4471
4472 UnicodeString rsource(source);
4473 if (pos == 0) {
4474 t.transliterate(rsource);
4475 } else {
4476 // Do it all at once -- below we do it incrementally
4477 t.finishTransliteration(rsource, *pos);
4478 }
4479 expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4480
4481 // Test keyboard (incremental) transliteration -- this result
4482 // must be the same after we finalize (see below).
4483 UnicodeString log;
4484 rsource.remove();
4485 if (pos != 0) {
4486 rsource = source;
4487 formatInput(log, rsource, index);
4488 log.append(" -> ");
4489 UErrorCode status = U_ZERO_ERROR;
4490 t.transliterate(rsource, index, status);
4491 formatInput(log, rsource, index);
4492 } else {
4493 for (int32_t i=0; i<source.length(); ++i) {
4494 if (i != 0) {
4495 log.append(" + ");
4496 }
4497 log.append(source.charAt(i)).append(" -> ");
4498 UErrorCode status = U_ZERO_ERROR;
4499 t.transliterate(rsource, index, source.charAt(i), status);
4500 formatInput(log, rsource, index);
4501 }
4502 }
4503
4504 // As a final step in keyboard transliteration, we must call
4505 // transliterate to finish off any pending partial matches that
4506 // were waiting for more input.
4507 t.finishTransliteration(rsource, index);
4508 log.append(" => ").append(rsource);
4509
4510 expectAux(t.getID() + ":Keyboard", log,
4511 rsource == expectedResult,
4512 expectedResult);
4513 }
4514
4515
4516 /**
4517 * @param appendTo result is appended to this param.
4518 * @param input the string being transliterated
4519 * @param pos the index struct
4520 */
4521 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4522 const UnicodeString& input,
4523 const UTransPosition& pos) {
4524 // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4525 // the {} indicate the context start and limit, and the ||
4526 // indicate the start and limit.
4527 if (0 <= pos.contextStart &&
4528 pos.contextStart <= pos.start &&
4529 pos.start <= pos.limit &&
4530 pos.limit <= pos.contextLimit &&
4531 pos.contextLimit <= input.length()) {
4532
4533 UnicodeString a, b, c, d, e;
4534 input.extractBetween(0, pos.contextStart, a);
4535 input.extractBetween(pos.contextStart, pos.start, b);
4536 input.extractBetween(pos.start, pos.limit, c);
4537 input.extractBetween(pos.limit, pos.contextLimit, d);
4538 input.extractBetween(pos.contextLimit, input.length(), e);
4539 appendTo.append(a).append((UChar)123/*{*/).append(b).
4540 append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4541 append((UChar)125/*}*/).append(e);
4542 } else {
4543 appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4544 pos.contextStart + ", s=" + pos.start + ", l=" +
4545 pos.limit + ", cl=" + pos.contextLimit + "} on " +
4546 input);
4547 }
4548 return appendTo;
4549 }
4550
4551 void TransliteratorTest::expectAux(const UnicodeString& tag,
4552 const UnicodeString& source,
4553 const UnicodeString& result,
4554 const UnicodeString& expectedResult) {
4555 expectAux(tag, source + " -> " + result,
4556 result == expectedResult,
4557 expectedResult);
4558 }
4559
4560 void TransliteratorTest::expectAux(const UnicodeString& tag,
4561 const UnicodeString& summary, UBool pass,
4562 const UnicodeString& expectedResult) {
4563 if (pass) {
4564 logln(UnicodeString("(")+tag+") " + prettify(summary));
4565 } else {
4566 errln(UnicodeString("FAIL: (")+tag+") "
4567 + prettify(summary)
4568 + ", expected " + prettify(expectedResult));
4569 }
4570 }
4571
4572 #endif /* #if !UCONFIG_NO_TRANSLITERATION */