2 **********************************************************************
3 * Copyright (C) 1999-2008, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 11/10/99 aliu Creation.
8 **********************************************************************
11 #include "unicode/utypes.h"
13 #if !UCONFIG_NO_TRANSLITERATION
16 #include "unicode/locid.h"
17 #include "unicode/dtfmtsym.h"
18 #include "unicode/normlzr.h"
19 #include "unicode/translit.h"
20 #include "unicode/uchar.h"
21 #include "unicode/unifilt.h"
22 #include "unicode/uniset.h"
23 #include "unicode/ustring.h"
24 #include "unicode/usetiter.h"
25 #include "unicode/uscript.h"
44 /***********************************************************************
46 HOW TO USE THIS TEST FILE
48 How I developed on two platforms
49 without losing (too much of) my mind
52 1. Add new tests by copying/pasting/changing existing tests. On Java,
53 any public void method named Test...() taking no parameters becomes
54 a test. On C++, you need to modify the header and add a line to
55 the runIndexedTest() dispatch method.
57 2. Make liberal use of the expect() method; it is your friend.
59 3. The tests in this file exactly match those in a sister file on the
60 other side. The two files are:
62 icu4j: src/com/ibm/test/translit/TransliteratorTest.java
63 icu4c: source/test/intltest/transtst.cpp
65 ==> THIS IS THE IMPORTANT PART <==
67 When you add a test in this file, add it in TransliteratorTest.java
68 too. Give it the same name and put it in the same relative place.
69 This makes maintenance a lot simpler for any poor soul who ends up
70 trying to synchronize the tests between icu4j and icu4c.
72 4. If you MUST enter a test that is NOT paralleled in the sister file,
73 then add it in the special non-mirrored section. These are
82 Make sure you document the reason the test is here and not there.
87 ***********************************************************************/
89 // Define character constants thusly to be EBCDIC-friendly
91 LEFT_BRACE
=((UChar
)0x007B), /*{*/
92 PIPE
=((UChar
)0x007C), /*|*/
93 ZERO
=((UChar
)0x0030), /*0*/
94 UPPER_A
=((UChar
)0x0041) /*A*/
97 TransliteratorTest::TransliteratorTest()
98 : DESERET_DEE((UChar32
)0x10414),
99 DESERET_dee((UChar32
)0x1043C)
103 TransliteratorTest::~TransliteratorTest() {}
106 TransliteratorTest::runIndexedTest(int32_t index
, UBool exec
,
107 const char* &name
, char* /*par*/) {
109 TESTCASE(0,TestInstantiation
);
110 TESTCASE(1,TestSimpleRules
);
111 TESTCASE(2,TestRuleBasedInverse
);
112 TESTCASE(3,TestKeyboard
);
113 TESTCASE(4,TestKeyboard2
);
114 TESTCASE(5,TestKeyboard3
);
115 TESTCASE(6,TestArabic
);
116 TESTCASE(7,TestCompoundKana
);
117 TESTCASE(8,TestCompoundHex
);
118 TESTCASE(9,TestFiltering
);
119 TESTCASE(10,TestInlineSet
);
120 TESTCASE(11,TestPatternQuoting
);
121 TESTCASE(12,TestJ277
);
122 TESTCASE(13,TestJ243
);
123 TESTCASE(14,TestJ329
);
124 TESTCASE(15,TestSegments
);
125 TESTCASE(16,TestCursorOffset
);
126 TESTCASE(17,TestArbitraryVariableValues
);
127 TESTCASE(18,TestPositionHandling
);
128 TESTCASE(19,TestHiraganaKatakana
);
129 TESTCASE(20,TestCopyJ476
);
130 TESTCASE(21,TestAnchors
);
131 TESTCASE(22,TestInterIndic
);
132 TESTCASE(23,TestFilterIDs
);
133 TESTCASE(24,TestCaseMap
);
134 TESTCASE(25,TestNameMap
);
135 TESTCASE(26,TestLiberalizedID
);
136 TESTCASE(27,TestCreateInstance
);
137 TESTCASE(28,TestNormalizationTransliterator
);
138 TESTCASE(29,TestCompoundRBT
);
139 TESTCASE(30,TestCompoundFilter
);
140 TESTCASE(31,TestRemove
);
141 TESTCASE(32,TestToRules
);
142 TESTCASE(33,TestContext
);
143 TESTCASE(34,TestSupplemental
);
144 TESTCASE(35,TestQuantifier
);
145 TESTCASE(36,TestSTV
);
146 TESTCASE(37,TestCompoundInverse
);
147 TESTCASE(38,TestNFDChainRBT
);
148 TESTCASE(39,TestNullInverse
);
149 TESTCASE(40,TestAliasInverseID
);
150 TESTCASE(41,TestCompoundInverseID
);
151 TESTCASE(42,TestUndefinedVariable
);
152 TESTCASE(43,TestEmptyContext
);
153 TESTCASE(44,TestCompoundFilterID
);
154 TESTCASE(45,TestPropertySet
);
155 TESTCASE(46,TestNewEngine
);
156 TESTCASE(47,TestQuantifiedSegment
);
157 TESTCASE(48,TestDevanagariLatinRT
);
158 TESTCASE(49,TestTeluguLatinRT
);
159 TESTCASE(50,TestCompoundLatinRT
);
160 TESTCASE(51,TestSanskritLatinRT
);
161 TESTCASE(52,TestLocaleInstantiation
);
162 TESTCASE(53,TestTitleAccents
);
163 TESTCASE(54,TestLocaleResource
);
164 TESTCASE(55,TestParseError
);
165 TESTCASE(56,TestOutputSet
);
166 TESTCASE(57,TestVariableRange
);
167 TESTCASE(58,TestInvalidPostContext
);
168 TESTCASE(59,TestIDForms
);
169 TESTCASE(60,TestToRulesMark
);
170 TESTCASE(61,TestEscape
);
171 TESTCASE(62,TestAnchorMasking
);
172 TESTCASE(63,TestDisplayName
);
173 TESTCASE(64,TestSpecialCases
);
174 TESTCASE(65,TestIncrementalProgress
);
175 TESTCASE(66,TestSurrogateCasing
);
176 TESTCASE(67,TestFunction
);
177 TESTCASE(68,TestInvalidBackRef
);
178 TESTCASE(69,TestMulticharStringSet
);
179 TESTCASE(70,TestUserFunction
);
180 TESTCASE(71,TestAnyX
);
181 TESTCASE(72,TestSourceTargetSet
);
182 TESTCASE(73,TestGurmukhiDevanagari
);
183 TESTCASE(74,TestRuleWhitespace
);
184 TESTCASE(75,TestAllCodepoints
);
185 TESTCASE(76,TestBoilerplate
);
186 TESTCASE(77,TestAlternateSyntax
);
187 TESTCASE(78,TestBeginEnd
);
188 TESTCASE(79,TestBeginEndToRules
);
189 TESTCASE(80,TestRegisterAlias
);
190 TESTCASE(81,TestRuleStripping
);
191 TESTCASE(82,TestHalfwidthFullwidth
);
192 TESTCASE(83,TestThai
);
193 default: name
= ""; break;
197 static const UVersionInfo ICU_39
= {3,9,4,0};
199 * Make sure every system transliterator can be instantiated.
201 * ALSO test that the result of toRules() for each rule is a valid
202 * rule. Do this here so we don't have to have another test that
203 * instantiates everything as well.
205 void TransliteratorTest::TestInstantiation() {
206 UErrorCode ec
= U_ZERO_ERROR
;
207 StringEnumeration
* avail
= Transliterator::getAvailableIDs(ec
);
208 assertSuccess("getAvailableIDs()", ec
);
209 assertTrue("getAvailableIDs()!=NULL", avail
!=NULL
);
210 int32_t n
= Transliterator::countAvailableIDs();
211 assertTrue("getAvailableIDs().count()==countAvailableIDs()",
212 avail
->count(ec
) == n
);
213 assertSuccess("count()", ec
);
215 for (int32_t i
=0; i
<n
; ++i
) {
216 const UnicodeString
& id
= *avail
->snext(ec
);
217 if (!assertSuccess("snext()", ec
) ||
218 !assertTrue("snext()!=NULL", (&id
)!=NULL
, TRUE
)) {
221 UnicodeString id2
= Transliterator::getAvailableID(i
);
222 if (id
.length() < 1) {
223 errln(UnicodeString("FAIL: getAvailableID(") +
224 i
+ ") returned empty string");
228 errln(UnicodeString("FAIL: getAvailableID(") +
229 i
+ ") != getAvailableIDs().snext()");
232 UParseError parseError
;
233 UErrorCode status
= U_ZERO_ERROR
;
234 Transliterator
* t
= Transliterator::createInstance(id
,
235 UTRANS_FORWARD
, parseError
,status
);
237 Transliterator::getDisplayName(id
, name
);
239 errln(UnicodeString("FAIL: Couldn't create ") + id
+
240 /*", parse error " + parseError.code +*/
241 ", line " + parseError
.line
+
242 ", offset " + parseError
.offset
+
243 ", pre-context " + prettify(parseError
.preContext
, TRUE
) +
244 ", post-context " +prettify(parseError
.postContext
,TRUE
) +
245 ", Error: " + u_errorName(status
));
246 // When createInstance fails, it deletes the failing
247 // entry from the available ID list. We detect this
248 // here by looking for a change in countAvailableIDs.
249 int32_t nn
= Transliterator::countAvailableIDs();
252 --i
; // Compensate for deleted entry
255 logln(UnicodeString("OK: ") + name
+ " (" + id
+ ")");
259 t
->toRules(rules
, TRUE
);
260 Transliterator
*u
= Transliterator::createFromRules("x",
261 rules
, UTRANS_FORWARD
, parseError
,status
);
263 errln(UnicodeString("FAIL: ") + id
+
264 ".createFromRules() => bad rules" +
265 /*", parse error " + parseError.code +*/
266 ", line " + parseError
.line
+
267 ", offset " + parseError
.offset
+
268 ", context " + prettify(parseError
.preContext
, TRUE
) +
269 ", rules: " + prettify(rules
, TRUE
));
276 assertTrue("snext()==NULL", avail
->snext(ec
)==NULL
);
277 assertSuccess("snext()", ec
);
280 // Now test the failure path
281 UParseError parseError
;
282 UErrorCode status
= U_ZERO_ERROR
;
283 UnicodeString
id("<Not a valid Transliterator ID>");
284 Transliterator
* t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, parseError
, status
);
286 errln("FAIL: " + id
+ " returned a transliterator");
289 logln("OK: Bogus ID handled properly");
293 void TransliteratorTest::TestSimpleRules(void) {
294 /* Example: rules 1. ab>x|y
297 * []|eabcd start - no match, copy e to tranlated buffer
298 * [e]|abcd match rule 1 - copy output & adjust cursor
299 * [ex|y]cd match rule 2 - copy output & adjust cursor
300 * [exz]|d no match, copy d to transliterated buffer
303 expect(UnicodeString("ab>x|y;", "") +
307 /* Another set of rules:
319 expect(UnicodeString("ab>x|yzacw;") +
327 UErrorCode status
= U_ZERO_ERROR
;
328 UParseError parseError
;
329 Transliterator
*t
= Transliterator::createFromRules(
331 UnicodeString("$dummy=").append((UChar
)0xE100) +
333 "$vowel=[aeiouAEIOU];"
335 "$vowel } $lu > '!';"
340 UTRANS_FORWARD
, parseError
,
342 if (U_FAILURE(status
)) {
343 errln("FAIL: RBT constructor failed");
346 expect(*t
, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
351 * Test inline set syntax and set variable syntax.
353 void TransliteratorTest::TestInlineSet(void) {
354 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
355 expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
357 expect(UnicodeString(
360 "$alphanumeric = [$digit $alpha];" // ***
361 "$special = [^$alphanumeric];" // ***
362 "$alphanumeric > '-';"
363 "$special > '*';", ""),
365 "thx-1138", "---*----");
369 * Create some inverses and confirm that they work. We have to be
370 * careful how we do this, since the inverses will not be true
371 * inverses -- we can't throw any random string at the composition
372 * of the transliterators and expect the identity function. F x
373 * F' != I. However, if we are careful about the input, we will
374 * get the expected results.
376 void TransliteratorTest::TestRuleBasedInverse(void) {
377 UnicodeString RULES
=
378 UnicodeString("abc>zyx;") +
396 const char* DATA
[] = {
397 // Careful here -- random strings will not work. If we keep
398 // the left side to the domain and the right side to the range
399 // we will be okay though (left, abc; right xyz).
401 "abcacab", "zyxxxyy",
405 int32_t DATA_length
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0]));
407 UErrorCode status
= U_ZERO_ERROR
;
408 UParseError parseError
;
409 Transliterator
*fwd
= Transliterator::createFromRules("<ID>", RULES
,
410 UTRANS_FORWARD
, parseError
, status
);
411 Transliterator
*rev
= Transliterator::createFromRules("<ID>", RULES
,
412 UTRANS_REVERSE
, parseError
, status
);
413 if (U_FAILURE(status
)) {
414 errln("FAIL: RBT constructor failed");
417 for (int32_t i
=0; i
<DATA_length
; i
+=2) {
418 expect(*fwd
, DATA
[i
], DATA
[i
+1]);
419 expect(*rev
, DATA
[i
+1], DATA
[i
]);
426 * Basic test of keyboard.
428 void TransliteratorTest::TestKeyboard(void) {
429 UParseError parseError
;
430 UErrorCode status
= U_ZERO_ERROR
;
431 Transliterator
*t
= Transliterator::createFromRules("<ID>",
432 UnicodeString("psch>Y;")
436 UTRANS_FORWARD
, parseError
,
438 if (U_FAILURE(status
)) {
439 errln("FAIL: RBT constructor failed");
442 const char* DATA
[] = {
450 0, "AycAY", // null means finishKeyboardTransliteration
453 keyboardAux(*t
, DATA
, (int32_t)(sizeof(DATA
)/sizeof(DATA
[0])));
458 * Basic test of keyboard with cursor.
460 void TransliteratorTest::TestKeyboard2(void) {
461 UParseError parseError
;
462 UErrorCode status
= U_ZERO_ERROR
;
463 Transliterator
*t
= Transliterator::createFromRules("<ID>",
464 UnicodeString("ych>Y;")
468 UTRANS_FORWARD
, parseError
,
470 if (U_FAILURE(status
)) {
471 errln("FAIL: RBT constructor failed");
474 const char* DATA
[] = {
478 "s", "Aps", // modified for rollback - "Ay",
479 "c", "Apsc", // modified for rollback - "Ayc",
482 "s", "AycAps", // modified for rollback - "AycAy",
483 "c", "AycApsc", // modified for rollback - "AycAyc",
485 0, "AycAY", // null means finishKeyboardTransliteration
488 keyboardAux(*t
, DATA
, (int32_t)(sizeof(DATA
)/sizeof(DATA
[0])));
493 * Test keyboard transliteration with back-replacement.
495 void TransliteratorTest::TestKeyboard3(void) {
496 // We want th>z but t>y. Furthermore, during keyboard
497 // transliteration we want t>y then yh>z if t, then h are
499 UnicodeString
RULES("t>|y;"
502 const char* DATA
[] = {
503 // Column 1: characters to add to buffer (as if typed)
504 // Column 2: expected appearance of buffer after
505 // keyboard xliteration.
508 "t", "abt", // modified for rollback - "aby",
510 "t", "abyct", // modified for rollback - "abycy",
512 0, "abycz", // null means finishKeyboardTransliteration
515 UParseError parseError
;
516 UErrorCode status
= U_ZERO_ERROR
;
517 Transliterator
*t
= Transliterator::createFromRules("<ID>", RULES
, UTRANS_FORWARD
, parseError
, status
);
518 if (U_FAILURE(status
)) {
519 errln("FAIL: RBT constructor failed");
522 keyboardAux(*t
, DATA
, (int32_t)(sizeof(DATA
)/sizeof(DATA
[0])));
526 void TransliteratorTest::keyboardAux(const Transliterator
& t
,
527 const char* DATA
[], int32_t DATA_length
) {
528 UErrorCode status
= U_ZERO_ERROR
;
529 UTransPosition index
={0, 0, 0, 0};
531 for (int32_t i
=0; i
<DATA_length
; i
+=2) {
537 t
.transliterate(s
, index
, DATA
[i
], status
);
540 t
.finishTransliteration(s
, index
);
542 // Show the start index '{' and the cursor '|'
543 UnicodeString a
, b
, c
;
544 s
.extractBetween(0, index
.contextStart
, a
);
545 s
.extractBetween(index
.contextStart
, index
.start
, b
);
546 s
.extractBetween(index
.start
, s
.length(), c
);
548 append((UChar
)LEFT_BRACE
).
552 if (s
== DATA
[i
+1] && U_SUCCESS(status
)) {
555 errln(UnicodeString("FAIL: ") + log
+ ", expected " + DATA
[i
+1]);
560 void TransliteratorTest::TestArabic(void) {
561 // Test disabled for 2.0 until new Arabic transliterator can be written.
563 // const char* DATA[] = {
564 // "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
565 // "\u0627\u0644\u0644\u063a\u0629\u0020"+
566 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
567 // "\u0628\u0628\u0646\u0638\u0645\u0020"+
568 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
569 // "\u062c\u0645\u064a\u0644\u0629",
573 // UChar ar_raw[] = {
574 // 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
575 // 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
576 // 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
577 // 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
578 // 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
579 // 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
581 // UnicodeString ar(ar_raw);
582 // UErrorCode status=U_ZERO_ERROR;
583 // UParseError parseError;
584 // Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
586 // errln("FAIL: createInstance failed");
589 // expect(*t, "Arabic", ar);
594 * Compose the Kana transliterator forward and reverse and try
595 * some strings that should come out unchanged.
597 void TransliteratorTest::TestCompoundKana(void) {
598 UParseError parseError
;
599 UErrorCode status
= U_ZERO_ERROR
;
600 Transliterator
* t
= Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD
, parseError
, status
);
602 errln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed");
604 expect(*t
, "aaaaa", "aaaaa");
610 * Compose the hex transliterators forward and reverse.
612 void TransliteratorTest::TestCompoundHex(void) {
613 UParseError parseError
;
614 UErrorCode status
= U_ZERO_ERROR
;
615 Transliterator
* a
= Transliterator::createInstance("Any-Hex", UTRANS_FORWARD
, parseError
, status
);
616 Transliterator
* b
= Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, parseError
, status
);
617 Transliterator
* transab
[] = { a
, b
};
618 Transliterator
* transba
[] = { b
, a
};
619 if (a
== 0 || b
== 0) {
620 errln("FAIL: construction failed");
625 // Do some basic tests of a
626 expect(*a
, "01", UnicodeString("\\u0030\\u0031", ""));
627 // Do some basic tests of b
628 expect(*b
, UnicodeString("\\u0030\\u0031", ""), "01");
630 Transliterator
* ab
= new CompoundTransliterator(transab
, 2);
631 UnicodeString
s("abcde", "");
634 UnicodeString
str(s
);
635 a
->transliterate(str
);
636 Transliterator
* ba
= new CompoundTransliterator(transba
, 2);
637 expect(*ba
, str
, str
);
645 int gTestFilterClassID
= 0;
647 * Used by TestFiltering().
649 class TestFilter
: public UnicodeFilter
{
650 virtual UnicodeFunctor
* clone() const {
651 return new TestFilter(*this);
653 virtual UBool
contains(UChar32 c
) const {
654 return c
!= (UChar
)0x0063 /*c*/;
657 virtual UnicodeString
& toPattern(UnicodeString
& result
,
658 UBool
/*escapeUnprintable*/) const {
661 virtual UBool
matchesIndexValue(uint8_t /*v*/) const {
664 virtual void addMatchSetTo(UnicodeSet
& /*toUnionTo*/) const {}
666 UClassID
getDynamicClassID() const { return (UClassID
)&gTestFilterClassID
; }
670 * Do some basic tests of filtering.
672 void TransliteratorTest::TestFiltering(void) {
673 UParseError parseError
;
674 UErrorCode status
= U_ZERO_ERROR
;
675 Transliterator
* hex
= Transliterator::createInstance("Any-Hex", UTRANS_FORWARD
, parseError
, status
);
677 errln("FAIL: createInstance(Any-Hex) failed");
680 hex
->adoptFilter(new TestFilter());
681 UnicodeString
s("abcde");
682 hex
->transliterate(s
);
683 UnicodeString
exp("\\u0061\\u0062c\\u0064\\u0065", "");
685 logln(UnicodeString("Ok: \"") + exp
+ "\"");
687 logln(UnicodeString("FAIL: \"") + s
+ "\", wanted \"" + exp
+ "\"");
690 // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
691 UnicodeFilter
*f
= hex
->orphanFilter();
693 errln("FAIL: orphanFilter() should get a UnicodeFilter");
703 void TransliteratorTest::TestAnchors(void) {
704 expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""),
707 expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
710 expect(UnicodeString("^ab > 01 ;"
718 expect(UnicodeString("$s = [z$] ;"
725 "abzababbabxzabxabx",
730 * Test pattern quoting and escape mechanisms.
732 void TransliteratorTest::TestPatternQuoting(void) {
734 // Each item is <rules>, <input>, <expected output>
735 const UnicodeString DATA
[] = {
736 UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
737 UnicodeString(UChar(0x4E01)),
741 for (int32_t i
=0; i
<3; i
+=3) {
742 logln(UnicodeString("Pattern: ") + prettify(DATA
[i
]));
743 UParseError parseError
;
744 UErrorCode status
= U_ZERO_ERROR
;
745 Transliterator
*t
= Transliterator::createFromRules("<ID>", DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
746 if (U_FAILURE(status
)) {
747 errln("RBT constructor failed");
749 expect(*t
, DATA
[i
+1], DATA
[i
+2]);
756 * Regression test for bugs found in Greek transliteration.
758 void TransliteratorTest::TestJ277(void) {
759 UErrorCode status
= U_ZERO_ERROR
;
760 UParseError parseError
;
761 Transliterator
*gl
= Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD
, parseError
, status
);
763 errln("FAIL: createInstance(Greek-Latin) returned NULL");
768 UChar upsilon
= 0x3C5;
770 // UChar PHI = 0x3A6;
772 // UChar omega = 0x3C9;
773 // UChar omicron = 0x3BF;
774 // UChar epsilon = 0x3B5;
776 // sigma upsilon nu -> syn
778 syn
.append(sigma
).append(upsilon
).append(nu
);
779 expect(*gl
, syn
, "syn");
781 // sigma alpha upsilon nu -> saun
783 sayn
.append(sigma
).append(alpha
).append(upsilon
).append(nu
);
784 expect(*gl
, sayn
, "saun");
786 // Again, using a smaller rule set
791 "$ypsilon = \\u03C5;"
792 "$vowel = [aeiouAEIOU$alpha$ypsilon];"
795 "u <> $vowel { $ypsilon;"
799 Transliterator
*mini
= Transliterator::createFromRules("mini", rules
, UTRANS_REVERSE
, parseError
, status
);
800 if (U_FAILURE(status
)) { errln("FAIL: Transliterator constructor failed"); return; }
801 expect(*mini
, syn
, "syn");
802 expect(*mini
, sayn
, "saun");
806 #if !UCONFIG_NO_FORMATTING
807 // Transliterate the Greek locale data
809 DateFormatSymbols
syms(el
, status
);
810 if (U_FAILURE(status
)) { errln("FAIL: Transliterator constructor failed"); return; }
812 const UnicodeString
* data
= syms
.getMonths(count
);
813 for (i
=0; i
<count
; ++i
) {
814 if (data
[i
].length() == 0) {
817 UnicodeString
out(data
[i
]);
818 gl
->transliterate(out
);
820 if (data
[i
].length() >= 2 && out
.length() >= 2 &&
821 u_isupper(data
[i
].charAt(0)) && u_islower(data
[i
].charAt(1))) {
822 if (!(u_isupper(out
.charAt(0)) && u_islower(out
.charAt(1)))) {
827 logln(prettify(data
[i
] + " -> " + out
));
829 errln(UnicodeString("FAIL: ") + prettify(data
[i
] + " -> " + out
));
838 * Prefix, suffix support in hex transliterators
840 void TransliteratorTest::TestJ243(void) {
841 UErrorCode ec
= U_ZERO_ERROR
;
843 // Test default Hex-Any, which should handle
844 // \u, \U, u+, and U+
845 Transliterator
*hex
=
846 Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, ec
);
847 if (assertSuccess("getInstance", ec
)) {
848 expect(*hex
, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
852 // // Try a custom Hex-Unicode
853 // // \uXXXX and &#xXXXX;
854 // ec = U_ZERO_ERROR;
855 // HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
856 // expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x0123", ""),
857 // "abcd5fx0123");
858 // // Try custom Any-Hex (default is tested elsewhere)
859 // ec = U_ZERO_ERROR;
860 // UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
861 // expect(hex3, "012", "012");
865 * Parsers need better syntax error messages.
867 void TransliteratorTest::TestJ329(void) {
869 struct { UBool containsErrors
; const char* rule
; } DATA
[] = {
870 { FALSE
, "a > b; c > d" },
871 { TRUE
, "a > b; no operator; c > d" },
873 int32_t DATA_length
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0]));
875 for (int32_t i
=0; i
<DATA_length
; ++i
) {
876 UErrorCode status
= U_ZERO_ERROR
;
877 UParseError parseError
;
878 Transliterator
*rbt
= Transliterator::createFromRules("<ID>",
883 UBool gotError
= U_FAILURE(status
);
884 UnicodeString
desc(DATA
[i
].rule
);
885 desc
.append(gotError
? " -> error" : " -> no error");
887 desc
= desc
+ ", ParseError code=" + u_errorName(status
) +
888 " line=" + parseError
.line
+
889 " offset=" + parseError
.offset
+
890 " context=" + parseError
.preContext
;
892 if (gotError
== DATA
[i
].containsErrors
) {
893 logln(UnicodeString("Ok: ") + desc
);
895 errln(UnicodeString("FAIL: ") + desc
);
902 * Test segments and segment references.
904 void TransliteratorTest::TestSegments(void) {
906 // Each item is <rules>, <input>, <expected output>
907 UnicodeString DATA
[] = {
908 "([a-z]) '.' ([0-9]) > $2 '-' $1",
913 "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
917 int32_t DATA_length
= (int32_t)(sizeof(DATA
)/sizeof(*DATA
));
919 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
920 logln("Pattern: " + prettify(DATA
[i
]));
921 UParseError parseError
;
922 UErrorCode status
= U_ZERO_ERROR
;
923 Transliterator
*t
= Transliterator::createFromRules("ID", DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
924 if (U_FAILURE(status
)) {
925 errln("FAIL: RBT constructor");
927 expect(*t
, DATA
[i
+1], DATA
[i
+2]);
934 * Test cursor positioning outside of the key
936 void TransliteratorTest::TestCursorOffset(void) {
938 // Each item is <rules>, <input>, <expected output>
939 UnicodeString DATA
[] = {
940 "pre {alpha} post > | @ ALPHA ;"
942 "pre {beta} post > BETA @@ | ;"
945 "prealphapost prebetapost",
947 "prbetaxyz preBETApost",
949 int32_t DATA_length
= (int32_t)(sizeof(DATA
)/sizeof(*DATA
));
951 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
952 logln("Pattern: " + prettify(DATA
[i
]));
953 UParseError parseError
;
954 UErrorCode status
= U_ZERO_ERROR
;
955 Transliterator
*t
= Transliterator::createFromRules("<ID>", DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
956 if (U_FAILURE(status
)) {
957 errln("FAIL: RBT constructor");
959 expect(*t
, DATA
[i
+1], DATA
[i
+2]);
966 * Test zero length and > 1 char length variable values. Test
967 * use of variable refs in UnicodeSets.
969 void TransliteratorTest::TestArbitraryVariableValues(void) {
971 // Each item is <rules>, <input>, <expected output>
972 UnicodeString DATA
[] = {
990 int32_t DATA_length
= (int32_t)(sizeof(DATA
)/sizeof(*DATA
));
992 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
993 logln("Pattern: " + prettify(DATA
[i
]));
994 UParseError parseError
;
995 UErrorCode status
= U_ZERO_ERROR
;
996 Transliterator
*t
= Transliterator::createFromRules("<ID>", DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
997 if (U_FAILURE(status
)) {
998 errln("FAIL: RBT constructor");
1000 expect(*t
, DATA
[i
+1], DATA
[i
+2]);
1007 * Confirm that the contextStart, contextLimit, start, and limit
1008 * behave correctly. J474.
1010 void TransliteratorTest::TestPositionHandling(void) {
1011 // Array of 3n items
1012 // Each item is <rules>, <input>, <expected output>
1013 const char* DATA
[] = {
1014 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1015 "xtat txtb", // pos 0,9,0,9
1018 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1019 "xtat txtb", // pos 2,9,3,8
1022 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1023 "xtat txtb", // pos 3,8,3,8
1027 // Array of 4n positions -- these go with the DATA array
1028 // They are: contextStart, contextLimit, start, limit
1035 int32_t n
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0])) / 3;
1036 for (int32_t i
=0; i
<n
; i
++) {
1037 UErrorCode status
= U_ZERO_ERROR
;
1038 UParseError parseError
;
1039 Transliterator
*t
= Transliterator::createFromRules("<ID>",
1040 DATA
[3*i
], UTRANS_FORWARD
, parseError
, status
);
1041 if (U_FAILURE(status
)) {
1043 errln("FAIL: RBT constructor");
1047 pos
.contextStart
= POS
[4*i
];
1048 pos
.contextLimit
= POS
[4*i
+1];
1049 pos
.start
= POS
[4*i
+2];
1050 pos
.limit
= POS
[4*i
+3];
1051 UnicodeString
rsource(DATA
[3*i
+1]);
1052 t
->transliterate(rsource
, pos
, status
);
1053 if (U_FAILURE(status
)) {
1055 errln("FAIL: transliterate");
1058 t
->finishTransliteration(rsource
, pos
);
1059 expectAux(DATA
[3*i
],
1068 * Test the Hiragana-Katakana transliterator.
1070 void TransliteratorTest::TestHiraganaKatakana(void) {
1071 UParseError parseError
;
1072 UErrorCode status
= U_ZERO_ERROR
;
1073 Transliterator
* hk
= Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD
, parseError
, status
);
1074 Transliterator
* kh
= Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD
, parseError
, status
);
1075 if (hk
== 0 || kh
== 0) {
1076 errln("FAIL: createInstance failed");
1082 // Array of 3n items
1083 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1084 const char* DATA
[] = {
1086 "\\u3042\\u3090\\u3099\\u3092\\u3050",
1087 "\\u30A2\\u30F8\\u30F2\\u30B0",
1090 "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1091 "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1093 int32_t DATA_length
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0]));
1095 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
1096 UnicodeString h
= CharsToUnicodeString(DATA
[i
+1]);
1097 UnicodeString k
= CharsToUnicodeString(DATA
[i
+2]);
1099 case 0x68: //'h': // Hiragana-Katakana
1102 case 0x6B: //'k': // Katakana-Hiragana
1105 case 0x62: //'b': // both
1116 * Test cloning / copy constructor of RBT.
1118 void TransliteratorTest::TestCopyJ476(void) {
1119 // The real test here is what happens when the destructors are
1120 // called. So we let one object get destructed, and check to
1121 // see that its copy still works.
1122 Transliterator
*t2
= 0;
1124 UParseError parseError
;
1125 UErrorCode status
= U_ZERO_ERROR
;
1126 Transliterator
*t1
= Transliterator::createFromRules("t1",
1127 "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD
, parseError
, status
);
1128 if (U_FAILURE(status
)) {
1129 errln("FAIL: RBT constructor");
1132 t2
= t1
->clone(); // Call copy constructor under the covers.
1133 expect(*t1
, "abcfoofoo", "ABcbar");
1136 expect(*t2
, "abcfoofoo", "ABcbar");
1141 * Test inter-Indic transliterators. These are composed.
1142 * ICU4C Jitterbug 483.
1144 void TransliteratorTest::TestInterIndic(void) {
1145 UnicodeString
ID("Devanagari-Gujarati", "");
1146 UErrorCode status
= U_ZERO_ERROR
;
1147 UParseError parseError
;
1148 Transliterator
* dg
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, parseError
, status
);
1150 errln("FAIL: createInstance(" + ID
+ ") returned NULL");
1153 UnicodeString id
= dg
->getID();
1155 errln("FAIL: createInstance(" + ID
+ ")->getID() => " + id
);
1157 UnicodeString dev
= CharsToUnicodeString("\\u0901\\u090B\\u0925");
1158 UnicodeString guj
= CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1159 expect(*dg
, dev
, guj
);
1164 * Test filter syntax in IDs. (J918)
1166 void TransliteratorTest::TestFilterIDs(void) {
1167 // Array of 3n strings:
1168 // <id>, <inverse id>, <input>, <expected output>
1169 const char* DATA
[] = {
1170 "[aeiou]Any-Hex", // ID
1171 "[aeiou]Hex-Any", // expected inverse ID
1173 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1175 "[aeiou]Any-Hex;[^5]Hex-Any",
1176 "[^5]Any-Hex;[aeiou]Hex-Any",
1185 enum { DATA_length
= sizeof(DATA
) / sizeof(DATA
[0]) };
1187 for (int i
=0; i
<DATA_length
; i
+=4) {
1188 UnicodeString
ID(DATA
[i
], "");
1189 UnicodeString
uID(DATA
[i
+1], "");
1190 UnicodeString
data2(DATA
[i
+2], "");
1191 UnicodeString
data3(DATA
[i
+3], "");
1192 UParseError parseError
;
1193 UErrorCode status
= U_ZERO_ERROR
;
1194 Transliterator
*t
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, parseError
, status
);
1196 errln("FAIL: createInstance(" + ID
+ ") returned NULL");
1199 expect(*t
, data2
, data3
);
1202 if (ID
!= t
->getID()) {
1203 errln("FAIL: createInstance(" + ID
+ ").getID() => " +
1207 // Check the inverse
1208 Transliterator
*u
= t
->createInverse(status
);
1210 errln("FAIL: " + ID
+ ".createInverse() returned NULL");
1211 } else if (u
->getID() != uID
) {
1212 errln("FAIL: " + ID
+ ".createInverse().getID() => " +
1213 u
->getID() + ", expected " + uID
);
1222 * Test the case mapping transliterators.
1224 void TransliteratorTest::TestCaseMap(void) {
1225 UParseError parseError
;
1226 UErrorCode status
= U_ZERO_ERROR
;
1227 Transliterator
* toUpper
=
1228 Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
);
1229 Transliterator
* toLower
=
1230 Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
);
1231 Transliterator
* toTitle
=
1232 Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
);
1233 if (toUpper
==0 || toLower
==0 || toTitle
==0) {
1234 errln("FAIL: createInstance returned NULL");
1241 expect(*toUpper
, "The quick brown fox jumped over the lazy dogs.",
1242 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1243 expect(*toLower
, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1244 "the quick brown foX jumped over the lazY dogs.");
1245 expect(*toTitle
, "the quick brown foX can't jump over the laZy dogs.",
1246 "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1254 * Test the name mapping transliterators.
1256 void TransliteratorTest::TestNameMap(void) {
1257 UParseError parseError
;
1258 UErrorCode status
= U_ZERO_ERROR
;
1259 Transliterator
* uni2name
=
1260 Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD
, parseError
, status
);
1261 Transliterator
* name2uni
=
1262 Transliterator::createInstance("Name-Any", UTRANS_FORWARD
, parseError
, status
);
1263 if (uni2name
==0 || name2uni
==0) {
1264 errln("FAIL: createInstance returned NULL");
1270 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1271 expect(*uni2name
, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1272 CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{END OF TRANSMISSION}\\\\N{CHARACTER TABULATION}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1273 expect(*name2uni
, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1274 CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1281 Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD
, parseError
, status
);
1283 errln("FAIL: createInstance returned NULL");
1288 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1289 UnicodeString s
= CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1295 * Test liberalized ID syntax. 1006c
1297 void TransliteratorTest::TestLiberalizedID(void) {
1298 // Some test cases have an expected getID() value of NULL. This
1299 // means I have disabled the test case for now. This stuff is
1300 // still under development, and I haven't decided whether to make
1301 // getID() return canonical case yet. It will all get rewritten
1302 // with the move to Source-Target/Variant IDs anyway. [aliu]
1303 const char* DATA
[] = {
1304 "latin-greek", NULL
/*"Latin-Greek"*/, "case insensitivity",
1305 " Null ", "Null", "whitespace",
1306 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",
1307 " null ; latin-greek ", NULL
/*"Null;Latin-Greek"*/, "compound whitespace",
1309 const int32_t DATA_length
= sizeof(DATA
)/sizeof(DATA
[0]);
1310 UParseError parseError
;
1311 UErrorCode status
= U_ZERO_ERROR
;
1312 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
1313 Transliterator
*t
= Transliterator::createInstance(DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
1315 errln(UnicodeString("FAIL: ") + DATA
[i
+2] +
1316 " cannot create ID \"" + DATA
[i
] + "\"");
1320 exp
= UnicodeString(DATA
[i
+1], "");
1322 // Don't worry about getID() if the expected char*
1323 // is NULL -- see above.
1324 if (exp
.length() == 0 || exp
== t
->getID()) {
1325 logln(UnicodeString("Ok: ") + DATA
[i
+2] +
1326 " create ID \"" + DATA
[i
] + "\" => \"" +
1329 errln(UnicodeString("FAIL: ") + DATA
[i
+2] +
1330 " create ID \"" + DATA
[i
] + "\" => \"" +
1331 t
->getID() + "\", exp \"" + exp
+ "\"");
1338 /* test for Jitterbug 912 */
1339 void TransliteratorTest::TestCreateInstance(){
1340 const char* FORWARD
= "F";
1341 const char* REVERSE
= "R";
1342 const char* DATA
[] = {
1344 // Column 2: direction
1345 // Column 3: expected ID, or "" if expect failure
1346 "Latin-Hangul", REVERSE
, "Hangul-Latin", // JB#912
1348 // JB#2689: bad compound causes crash
1349 "InvalidSource-InvalidTarget", FORWARD
, "",
1350 "InvalidSource-InvalidTarget", REVERSE
, "",
1351 "Hex-Any;InvalidSource-InvalidTarget", FORWARD
, "",
1352 "Hex-Any;InvalidSource-InvalidTarget", REVERSE
, "",
1353 "InvalidSource-InvalidTarget;Hex-Any", FORWARD
, "",
1354 "InvalidSource-InvalidTarget;Hex-Any", REVERSE
, "",
1359 for (int32_t i
=0; DATA
[i
]; i
+=3) {
1361 UErrorCode ec
= U_ZERO_ERROR
;
1362 UnicodeString
id(DATA
[i
]);
1363 UTransDirection dir
= (DATA
[i
+1]==FORWARD
)?
1364 UTRANS_FORWARD
:UTRANS_REVERSE
;
1365 UnicodeString
expID(DATA
[i
+2]);
1367 Transliterator::createInstance(id
,dir
,err
,ec
);
1368 UnicodeString newID
;
1372 UBool ok
= (newID
== expID
);
1374 newID
= u_errorName(ec
);
1377 logln((UnicodeString
)"Ok: createInstance(" +
1378 id
+ "," + DATA
[i
+1] + ") => " + newID
);
1380 errln((UnicodeString
)"FAIL: createInstance(" +
1381 id
+ "," + DATA
[i
+1] + ") => " + newID
+
1382 ", expected " + expID
);
1389 * Test the normalization transliterator.
1391 void TransliteratorTest::TestNormalizationTransliterator() {
1392 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1393 // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1394 const char* CANON
[] = {
1395 // Input Decomposed Composed
1396 "cat", "cat", "cat" ,
1397 "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" ,
1399 "\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above
1400 "D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above
1402 "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above
1403 "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below
1404 "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above
1406 "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1407 "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1409 "\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave
1410 "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave
1411 "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron
1413 "\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign
1414 "\\u00c5", "A\\u030a", "\\u00c5" , // A-ring
1416 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0
1417 "\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0
1419 "Henry IV", "Henry IV", "Henry IV" ,
1420 "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" ,
1422 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1423 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1424 "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten
1425 "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten
1426 "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten
1428 "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" ,
1432 const char* COMPAT
[] = {
1433 // Input Decomposed Composed
1434 "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed
1436 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0
1437 "\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i
1439 "Henry IV", "Henry IV", "Henry IV" ,
1440 "Henry \\u2163", "Henry IV", "Henry IV" ,
1442 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1443 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1445 "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten
1450 UParseError parseError
;
1451 UErrorCode status
= U_ZERO_ERROR
;
1452 Transliterator
* NFD
= Transliterator::createInstance("NFD", UTRANS_FORWARD
, parseError
, status
);
1453 Transliterator
* NFC
= Transliterator::createInstance("NFC", UTRANS_FORWARD
, parseError
, status
);
1455 errln("FAIL: createInstance failed");
1460 for (i
=0; CANON
[i
]; i
+=3) {
1461 UnicodeString in
= CharsToUnicodeString(CANON
[i
]);
1462 UnicodeString expd
= CharsToUnicodeString(CANON
[i
+1]);
1463 UnicodeString expc
= CharsToUnicodeString(CANON
[i
+2]);
1464 expect(*NFD
, in
, expd
);
1465 expect(*NFC
, in
, expc
);
1470 Transliterator
* NFKD
= Transliterator::createInstance("NFKD", UTRANS_FORWARD
, parseError
, status
);
1471 Transliterator
* NFKC
= Transliterator::createInstance("NFKC", UTRANS_FORWARD
, parseError
, status
);
1472 if (!NFKD
|| !NFKC
) {
1473 errln("FAIL: createInstance failed");
1478 for (i
=0; COMPAT
[i
]; i
+=3) {
1479 UnicodeString in
= CharsToUnicodeString(COMPAT
[i
]);
1480 UnicodeString expkd
= CharsToUnicodeString(COMPAT
[i
+1]);
1481 UnicodeString expkc
= CharsToUnicodeString(COMPAT
[i
+2]);
1482 expect(*NFKD
, in
, expkd
);
1483 expect(*NFKC
, in
, expkc
);
1489 status
= U_ZERO_ERROR
;
1490 Transliterator
*t
= Transliterator::createInstance("NFD; [x]Remove",
1494 errln("FAIL: createInstance failed");
1496 expect(*t
, CharsToUnicodeString("\\u010dx"),
1497 CharsToUnicodeString("c\\u030C"));
1502 * Test compound RBT rules.
1504 void TransliteratorTest::TestCompoundRBT(void) {
1505 // Careful with spacing and ';' here: Phrase this exactly
1506 // as toRules() is going to return it. If toRules() changes
1507 // with regard to spacing or ';', then adjust this string.
1508 UnicodeString
rule("::Hex-Any;\n"
1512 "::[^t]Any-Upper;", "");
1513 UParseError parseError
;
1514 UErrorCode status
= U_ZERO_ERROR
;
1515 Transliterator
*t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, parseError
, status
);
1517 errln("FAIL: createFromRules failed");
1520 expect(*t
, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1521 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1523 t
->toRules(r
, TRUE
);
1525 logln((UnicodeString
)"OK: toRules() => " + r
);
1527 errln((UnicodeString
)"FAIL: toRules() => " + r
+
1528 ", expected " + rule
);
1533 t
= Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD
, parseError
, status
);
1535 errln("FAIL: createInstance failed");
1538 UnicodeString
exp("::Greek-Latin;\n::Latin-Cyrillic;");
1539 t
->toRules(r
, TRUE
);
1541 errln((UnicodeString
)"FAIL: toRules() => " + r
+
1542 ", expected " + exp
);
1544 logln((UnicodeString
)"OK: toRules() => " + r
);
1548 // Round trip the result of toRules
1549 t
= Transliterator::createFromRules("Test", r
, UTRANS_FORWARD
, parseError
, status
);
1551 errln("FAIL: createFromRules #2 failed");
1554 logln((UnicodeString
)"OK: createFromRules(" + r
+ ") succeeded");
1557 // Test toRules again
1558 t
->toRules(r
, TRUE
);
1560 errln((UnicodeString
)"FAIL: toRules() => " + r
+
1561 ", expected " + exp
);
1563 logln((UnicodeString
)"OK: toRules() => " + r
);
1568 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform
1569 // to what the regenerated ID will look like.
1570 UnicodeString
id("Upper(Lower);(NFKC)", "");
1571 t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, parseError
, status
);
1573 errln("FAIL: createInstance #2 failed");
1576 if (t
->getID() == id
) {
1577 logln((UnicodeString
)"OK: created " + id
);
1579 errln((UnicodeString
)"FAIL: createInstance(" + id
+
1580 ").getID() => " + t
->getID());
1583 Transliterator
*u
= t
->createInverse(status
);
1585 errln("FAIL: createInverse failed");
1589 exp
= "NFKC();Lower(Upper)";
1590 if (u
->getID() == exp
) {
1591 logln((UnicodeString
)"OK: createInverse(" + id
+ ") => " +
1594 errln((UnicodeString
)"FAIL: createInverse(" + id
+ ") => " +
1602 * Compound filter semantics were orginially not implemented
1603 * correctly. Originally, each component filter f(i) is replaced by
1604 * f'(i) = f(i) && g, where g is the filter for the compound
1609 * Suppose and I have a transliterator X. Internally X is
1610 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1612 * The compound should convert all greek characters (through latin) to
1613 * cyrillic, then lowercase the result. The filter should say "don't
1614 * touch 'A' in the original". But because an intermediate result
1615 * happens to go through "A", the Greek Alpha gets hung up.
1617 void TransliteratorTest::TestCompoundFilter(void) {
1618 UParseError parseError
;
1619 UErrorCode status
= U_ZERO_ERROR
;
1620 Transliterator
*t
= Transliterator::createInstance
1621 ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD
, parseError
, status
);
1623 errln("FAIL: createInstance failed");
1626 t
->adoptFilter(new UnicodeSet("[^A]", status
));
1627 if (U_FAILURE(status
)) {
1628 errln("FAIL: UnicodeSet ct failed");
1633 // Only the 'A' at index 1 should remain unchanged
1635 CharsToUnicodeString("BA\\u039A\\u0391"),
1636 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1640 void TransliteratorTest::TestRemove(void) {
1641 UParseError parseError
;
1642 UErrorCode status
= U_ZERO_ERROR
;
1643 Transliterator
*t
= Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD
, parseError
, status
);
1645 errln("FAIL: createInstance failed");
1649 expect(*t
, "Able bodied baker's cats", "Ale odied ker's ts");
1651 // extra test for RemoveTransliterator::clone(), which at one point wasn't
1652 // duplicating the filter
1653 Transliterator
* t2
= t
->clone();
1654 expect(*t2
, "Able bodied baker's cats", "Ale odied ker's ts");
1660 void TransliteratorTest::TestToRules(void) {
1661 const char* RBT
= "rbt";
1662 const char* SET
= "set";
1663 static const char* DATA
[] = {
1665 "$a=\\u4E61; [$a] > A;",
1669 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1670 "[[:Zs:][:Zl:]]{a} > A;",
1697 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1698 "[^[:Zs:]]{a} > A;",
1701 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1702 "[[a-z]-[:Zs:]]{a} > A;",
1705 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1706 "[[:Zs:]&[a-z]]{a} > A;",
1709 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1710 "[x[:Zs:]]{a} > A;",
1713 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1714 "$macron = \\u0304 ;"
1715 "$evowel = [aeiouyAEIOUY] ;"
1716 "$iotasub = \\u0345 ;"
1717 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1718 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1721 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1722 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1724 static const int32_t DATA_length
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0]));
1726 for (int32_t d
=0; d
< DATA_length
; d
+=3) {
1727 if (DATA
[d
] == RBT
) {
1728 // Transliterator test
1729 UParseError parseError
;
1730 UErrorCode status
= U_ZERO_ERROR
;
1731 Transliterator
*t
= Transliterator::createFromRules("ID",
1732 UnicodeString(DATA
[d
+1], -1, US_INV
), UTRANS_FORWARD
, parseError
, status
);
1734 errln("FAIL: createFromRules failed");
1737 UnicodeString rules
, escapedRules
;
1738 t
->toRules(rules
, FALSE
);
1739 t
->toRules(escapedRules
, TRUE
);
1740 UnicodeString expRules
= CharsToUnicodeString(DATA
[d
+2]);
1741 UnicodeString
expEscapedRules(DATA
[d
+2], -1, US_INV
);
1742 if (rules
== expRules
) {
1743 logln((UnicodeString
)"Ok: " + UnicodeString(DATA
[d
+1], -1, US_INV
) +
1746 errln((UnicodeString
)"FAIL: " + UnicodeString(DATA
[d
+1], -1, US_INV
) +
1747 " => " + rules
+ ", exp " + expRules
);
1749 if (escapedRules
== expEscapedRules
) {
1750 logln((UnicodeString
)"Ok: " + UnicodeString(DATA
[d
+1], -1, US_INV
) +
1751 " => " + escapedRules
);
1753 errln((UnicodeString
)"FAIL: " + UnicodeString(DATA
[d
+1], -1, US_INV
) +
1754 " => " + escapedRules
+ ", exp " + expEscapedRules
);
1760 UErrorCode status
= U_ZERO_ERROR
;
1761 UnicodeString
pat(DATA
[d
+1], -1, US_INV
);
1762 UnicodeString
expToPat(DATA
[d
+2], -1, US_INV
);
1763 UnicodeSet
set(pat
, status
);
1764 if (U_FAILURE(status
)) {
1765 errln("FAIL: UnicodeSet ct failed");
1768 // Adjust spacing etc. as necessary.
1769 UnicodeString toPat
;
1770 set
.toPattern(toPat
);
1771 if (expToPat
== toPat
) {
1772 logln((UnicodeString
)"Ok: " + pat
+
1775 errln((UnicodeString
)"FAIL: " + pat
+
1776 " => " + prettify(toPat
, TRUE
) +
1777 ", exp " + prettify(pat
, TRUE
));
1783 void TransliteratorTest::TestContext() {
1784 UTransPosition pos
= {0, 2, 0, 1}; // cs cl s l
1785 expect("de > x; {d}e > y;",
1790 expect("ab{c} > z;",
1795 void TransliteratorTest::TestSupplemental() {
1797 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1799 CharsToUnicodeString("ab\\U0001030Fx"),
1800 CharsToUnicodeString("\\U00010300bix"));
1802 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1803 "$b=[A-Z\\U00010400-\\U0001044D];"
1804 "($a)($b) > $2 $1;"),
1805 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1806 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1808 // k|ax\\U00010300xm
1810 // k|a\\U00010400\\U00010300xm
1811 // ky|\\U00010400\\U00010300xm
1812 // ky\\U00010400|\\U00010300xm
1814 // ky\\U00010400|\\U00010300\\U00010400m
1815 // ky\\U00010400y|\\U00010400m
1816 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1817 "$a {x} > | @ \\U00010400;"
1818 "{$a} [^\\u0000-\\uFFFF] > y;"),
1819 CharsToUnicodeString("kax\\U00010300xm"),
1820 CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1823 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1824 UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1826 expectT("Any-Hex/Unicode",
1827 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1828 UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1830 expectT("Any-Hex/C",
1831 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1832 UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1834 expectT("Any-Hex/Perl",
1835 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1836 UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1838 expectT("Any-Hex/Java",
1839 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1840 UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1842 expectT("Any-Hex/XML",
1843 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1844 "𐌰􏼀󠁡 ");
1846 expectT("Any-Hex/XML10",
1847 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1848 "𐌰􏼀󠁡 ");
1850 expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1851 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1852 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1855 void TransliteratorTest::TestQuantifier() {
1857 // Make sure @ in a quantified anteContext works
1858 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1862 // Make sure @ in a quantified postContext works
1863 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1867 // Make sure @ in a quantified postContext with seg ref works
1868 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1872 // Make sure @ past ante context doesn't enter ante context
1873 UTransPosition pos
= {0, 5, 3, 5};
1874 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1879 // Make sure @ past post context doesn't pass limit
1880 UTransPosition pos2
= {0, 4, 0, 2};
1881 expect("{b} a+ > c @@ |; x > y; a > A;",
1886 // Make sure @ past post context doesn't enter post context
1887 expect("{b} a+ > c @@ |; x > y; a > A;",
1891 expect("(ab)? c > d;",
1895 // NOTE: The (ab)+ when referenced just yields a single "ab",
1896 // not the full sequence of them. This accords with perl behavior.
1897 expect("(ab)+ {x} > '(' $1 ')';",
1899 "x ab(ab) abab(ab)y");
1902 "ac abc abbc abbbc",
1905 expect("[abc]+ > x;",
1906 "qac abrc abbcs abtbbc",
1909 expect("q{(ab)+} > x;",
1910 "qa qab qaba qababc qaba",
1911 "qa qx qxa qxc qxa");
1913 expect("q(ab)* > x;",
1914 "qa qab qaba qababc",
1917 // NOTE: The (ab)+ when referenced just yields a single "ab",
1918 // not the full sequence of them. This accords with perl behavior.
1919 expect("q(ab)* > '(' $1 ')';",
1920 "qa qab qaba qababc",
1921 "()a (ab) (ab)a (ab)c");
1923 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1925 expect("'ab'+ > x;",
1929 // $foo+ and $foo* -- the quantifier should apply to the entire
1930 // variable reference
1931 expect("$var = ab; $var+ > x;",
1936 class TestTrans
: public Transliterator
{
1938 TestTrans(const UnicodeString
& id
) : Transliterator(id
, 0) {
1940 virtual Transliterator
* clone(void) const {
1941 return new TestTrans(getID());
1943 virtual void handleTransliterate(Replaceable
& /*text*/, UTransPosition
& offsets
,
1944 UBool
/*isIncremental*/) const
1946 offsets
.start
= offsets
.limit
;
1948 virtual UClassID
getDynamicClassID() const;
1949 static UClassID U_EXPORT2
getStaticClassID();
1951 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans
)
1954 * Test Source-Target/Variant.
1956 void TransliteratorTest::TestSTV(void) {
1957 int32_t ns
= Transliterator::countAvailableSources();
1958 if (ns
< 0 || ns
> 255) {
1959 errln((UnicodeString
)"FAIL: Bad source count: " + ns
);
1963 for (i
=0; i
<ns
; ++i
) {
1964 UnicodeString source
;
1965 Transliterator::getAvailableSource(i
, source
);
1966 logln((UnicodeString
)"" + i
+ ": " + source
);
1967 if (source
.length() == 0) {
1968 errln("FAIL: empty source");
1971 int32_t nt
= Transliterator::countAvailableTargets(source
);
1972 if (nt
< 0 || nt
> 255) {
1973 errln((UnicodeString
)"FAIL: Bad target count: " + nt
);
1976 for (int32_t j
=0; j
<nt
; ++j
) {
1977 UnicodeString target
;
1978 Transliterator::getAvailableTarget(j
, source
, target
);
1979 logln((UnicodeString
)" " + j
+ ": " + target
);
1980 if (target
.length() == 0) {
1981 errln("FAIL: empty target");
1984 int32_t nv
= Transliterator::countAvailableVariants(source
, target
);
1985 if (nv
< 0 || nv
> 255) {
1986 errln((UnicodeString
)"FAIL: Bad variant count: " + nv
);
1989 for (int32_t k
=0; k
<nv
; ++k
) {
1990 UnicodeString variant
;
1991 Transliterator::getAvailableVariant(k
, source
, target
, variant
);
1992 if (variant
.length() == 0) {
1993 logln((UnicodeString
)" " + k
+ ": <empty>");
1995 logln((UnicodeString
)" " + k
+ ": " + variant
);
2001 // Test registration
2002 const char* IDS
[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2003 const char* FULL_IDS
[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2004 const char* SOURCES
[] = { NULL
, "Seoridf", "Oewoir" };
2005 for (i
=0; i
<3; ++i
) {
2006 Transliterator
*t
= new TestTrans(IDS
[i
]);
2008 errln("FAIL: out of memory");
2011 if (t
->getID() != IDS
[i
]) {
2012 errln((UnicodeString
)"FAIL: ID mismatch for " + IDS
[i
]);
2016 Transliterator::registerInstance(t
);
2017 UErrorCode status
= U_ZERO_ERROR
;
2018 t
= Transliterator::createInstance(IDS
[i
], UTRANS_FORWARD
, status
);
2020 errln((UnicodeString
)"FAIL: Registration/creation failed for ID " +
2023 logln((UnicodeString
)"Ok: Registration/creation succeeded for ID " +
2027 Transliterator::unregister(IDS
[i
]);
2028 t
= Transliterator::createInstance(IDS
[i
], UTRANS_FORWARD
, status
);
2030 errln((UnicodeString
)"FAIL: Unregistration failed for ID " +
2036 // Make sure getAvailable API reflects removal
2037 int32_t n
= Transliterator::countAvailableIDs();
2038 for (i
=0; i
<n
; ++i
) {
2039 UnicodeString id
= Transliterator::getAvailableID(i
);
2040 for (j
=0; j
<3; ++j
) {
2041 if (id
.caseCompare(FULL_IDS
[j
],0)==0) {
2042 errln((UnicodeString
)"FAIL: unregister(" + id
+ ") failed");
2046 n
= Transliterator::countAvailableTargets("Any");
2047 for (i
=0; i
<n
; ++i
) {
2049 Transliterator::getAvailableTarget(i
, "Any", t
);
2050 if (t
.caseCompare(IDS
[0],0)==0) {
2051 errln((UnicodeString
)"FAIL: unregister(Any-" + t
+ ") failed");
2054 n
= Transliterator::countAvailableSources();
2055 for (i
=0; i
<n
; ++i
) {
2057 Transliterator::getAvailableSource(i
, s
);
2058 for (j
=0; j
<3; ++j
) {
2059 if (SOURCES
[j
] == NULL
) continue;
2060 if (s
.caseCompare(SOURCES
[j
],0)==0) {
2061 errln((UnicodeString
)"FAIL: unregister(" + s
+ "-*) failed");
2068 * Test inverse of Greek-Latin; Title()
2070 void TransliteratorTest::TestCompoundInverse(void) {
2071 UParseError parseError
;
2072 UErrorCode status
= U_ZERO_ERROR
;
2073 Transliterator
*t
= Transliterator::createInstance
2074 ("Greek-Latin; Title()", UTRANS_REVERSE
,parseError
, status
);
2076 errln("FAIL: createInstance");
2079 UnicodeString
exp("(Title);Latin-Greek");
2080 if (t
->getID() == exp
) {
2081 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2084 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2085 t
->getID() + "\", expected \"" + exp
+ "\"");
2091 * Test NFD chaining with RBT
2093 void TransliteratorTest::TestNFDChainRBT() {
2095 UErrorCode ec
= U_ZERO_ERROR
;
2096 Transliterator
* t
= Transliterator::createFromRules(
2097 "TEST", "::NFD; aa > Q; a > q;",
2098 UTRANS_FORWARD
, pe
, ec
);
2099 if (t
== NULL
|| U_FAILURE(ec
)) {
2100 errln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec
));
2103 expect(*t
, "aa", "Q");
2106 // TEMPORARY TESTS -- BEING DEBUGGED
2107 //=- UnicodeString s, s2;
2108 //=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2109 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2110 //=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2111 //=- expect(*t, s, s2);
2114 //=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2115 //=- expect(*t, s2, s);
2118 //=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2119 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2120 //=- expect(*t, s, s);
2123 // const char* source[] = {
2125 // "\\u015Br\\u012Bmad",
2126 // "bhagavadg\\u012Bt\\u0101",
2129 // "vi\\u1E63\\u0101da",
2131 // "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2132 // "uv\\u0101cr\\u0325",
2134 // "rmk\\u1E63\\u0113t",
2135 // //"dharmak\\u1E63\\u0113tr\\u0113",
2137 // "kuruk\\u1E63\\u0113tr\\u0113",
2138 // "samav\\u0113t\\u0101",
2139 // "yuyutsava-\\u1E25",
2140 // "m\\u0101mak\\u0101-\\u1E25",
2141 // // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2143 // "san\\u0304java",
2148 // const char* expected[] = {
2150 // "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2151 // "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2152 // "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2153 // "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2154 // "\\u0935\\u093f\\u0937\\u093e\\u0926",
2155 // "\\u092f\\u094b\\u0917",
2156 // "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2157 // "\\u0909\\u0935\\u093E\\u091A\\u0943",
2160 // //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2162 // "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2163 // "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2164 // "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2165 // "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2166 // // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2167 // "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2168 // "\\u0938\\u0902\\u091c\\u0935",
2172 // UErrorCode status = U_ZERO_ERROR;
2173 // UParseError parseError;
2174 // UnicodeString message;
2175 // Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2176 // Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2177 // if(U_FAILURE(status)){
2178 // errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2179 // errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2180 // delete latinToDevToLatin;
2181 // delete devToLatinToDev;
2184 // UnicodeString gotResult;
2185 // for(int i= 0; source[i] != 0; i++){
2186 // gotResult = source[i];
2187 // expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2188 // expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2190 // delete latinToDevToLatin;
2191 // delete devToLatinToDev;
2195 * Inverse of "Null" should be "Null". (J21)
2197 void TransliteratorTest::TestNullInverse() {
2199 UErrorCode ec
= U_ZERO_ERROR
;
2200 Transliterator
*t
= Transliterator::createInstance("Null", UTRANS_FORWARD
, pe
, ec
);
2201 if (t
== 0 || U_FAILURE(ec
)) {
2202 errln("FAIL: createInstance");
2205 Transliterator
*u
= t
->createInverse(ec
);
2206 if (u
== 0 || U_FAILURE(ec
)) {
2207 errln("FAIL: createInverse");
2211 if (u
->getID() != "Null") {
2212 errln("FAIL: Inverse of Null should be Null");
2219 * Check ID of inverse of alias. (J22)
2221 void TransliteratorTest::TestAliasInverseID() {
2222 UnicodeString
ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2224 UErrorCode ec
= U_ZERO_ERROR
;
2225 Transliterator
*t
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
);
2226 if (t
== 0 || U_FAILURE(ec
)) {
2227 errln("FAIL: createInstance");
2230 Transliterator
*u
= t
->createInverse(ec
);
2231 if (u
== 0 || U_FAILURE(ec
)) {
2232 errln("FAIL: createInverse");
2236 UnicodeString exp
= "Hangul-Latin";
2237 UnicodeString got
= u
->getID();
2239 errln((UnicodeString
)"FAIL: Inverse of " + ID
+ " is " + got
+
2240 ", expected " + exp
);
2247 * Test IDs of inverses of compound transliterators. (J20)
2249 void TransliteratorTest::TestCompoundInverseID() {
2250 UnicodeString ID
= "Latin-Jamo;NFC(NFD)";
2252 UErrorCode ec
= U_ZERO_ERROR
;
2253 Transliterator
*t
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
);
2254 if (t
== 0 || U_FAILURE(ec
)) {
2255 errln("FAIL: createInstance");
2258 Transliterator
*u
= t
->createInverse(ec
);
2259 if (u
== 0 || U_FAILURE(ec
)) {
2260 errln("FAIL: createInverse");
2264 UnicodeString exp
= "NFD(NFC);Jamo-Latin";
2265 UnicodeString got
= u
->getID();
2267 errln((UnicodeString
)"FAIL: Inverse of " + ID
+ " is " + got
+
2268 ", expected " + exp
);
2275 * Test undefined variable.
2278 void TransliteratorTest::TestUndefinedVariable() {
2279 UnicodeString rule
= "$initial } a <> \\u1161;";
2281 UErrorCode ec
= U_ZERO_ERROR
;
2282 Transliterator
*t
= Transliterator::createFromRules("<ID>", rule
, UTRANS_FORWARD
, pe
, ec
);
2284 if (U_FAILURE(ec
)) {
2285 logln((UnicodeString
)"OK: Got exception for " + rule
+ ", as expected: " +
2289 errln((UnicodeString
)"Fail: bogus rule " + rule
+ " compiled with error " +
2294 * Test empty context.
2296 void TransliteratorTest::TestEmptyContext() {
2297 expect(" { a } > b;", "xay a ", "xby b ");
2301 * Test compound filter ID syntax
2303 void TransliteratorTest::TestCompoundFilterID(void) {
2304 static const char* DATA
[] = {
2305 // Col. 1 = ID or rule set (latter must start with #)
2307 // = columns > 1 are null if expect col. 1 to be illegal =
2309 // Col. 2 = direction, "F..." or "R..."
2310 // Col. 3 = source string
2311 // Col. 4 = exp result
2313 "[abc]; [abc]", NULL
, NULL
, NULL
, // multiple filters
2314 "Latin-Greek; [abc];", NULL
, NULL
, NULL
, // misplaced filter
2315 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2316 "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2317 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2318 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2322 for (int32_t i
=0; DATA
[i
]; i
+=4) {
2323 UnicodeString id
= CharsToUnicodeString(DATA
[i
]);
2324 UTransDirection direction
= (DATA
[i
+1] != NULL
&& DATA
[i
+1][0] == 'R') ?
2325 UTRANS_REVERSE
: UTRANS_FORWARD
;
2326 UnicodeString source
;
2328 if (DATA
[i
+2] != NULL
) {
2329 source
= CharsToUnicodeString(DATA
[i
+2]);
2330 exp
= CharsToUnicodeString(DATA
[i
+3]);
2332 UBool expOk
= (DATA
[i
+1] != NULL
);
2333 Transliterator
* t
= NULL
;
2335 UErrorCode ec
= U_ZERO_ERROR
;
2336 if (id
.charAt(0) == 0x23/*#*/) {
2337 t
= Transliterator::createFromRules("ID", id
, direction
, pe
, ec
);
2339 t
= Transliterator::createInstance(id
, direction
, pe
, ec
);
2341 UBool ok
= (t
!= NULL
&& U_SUCCESS(ec
));
2342 UnicodeString transID
;
2344 transID
= t
->getID();
2347 transID
= UnicodeString("NULL", "");
2350 logln((UnicodeString
)"Ok: " + id
+ " => " + transID
+ ", " +
2352 if (source
.length() != 0) {
2353 expect(*t
, source
, exp
);
2357 errln((UnicodeString
)"FAIL: " + id
+ " => " + transID
+ ", " +
2364 * Test new property set syntax
2366 void TransliteratorTest::TestPropertySet() {
2367 expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2368 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2369 "[ a stitch ]\n[ in time ]\r[ saves 9]");
2373 * Test various failure points of the new 2.0 engine.
2375 void TransliteratorTest::TestNewEngine() {
2377 UErrorCode ec
= U_ZERO_ERROR
;
2378 Transliterator
*t
= Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD
, pe
, ec
);
2379 if (t
== 0 || U_FAILURE(ec
)) {
2380 errln("FAIL: createInstance Latin-Hiragana");
2383 // Katakana should be untouched
2384 expect(*t
, CharsToUnicodeString("a\\u3042\\u30A2"),
2385 CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2390 // This test will only work if Transliterator.ROLLBACK is
2391 // true. Otherwise, this test will fail, revealing a
2392 // limitation of global filters in incremental mode.
2394 Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD
, pe
, ec
);
2396 Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD
, pe
, ec
);
2397 if (U_FAILURE(ec
)) {
2403 Transliterator
* array
[3];
2405 array
[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD
, pe
, ec
);
2407 if (U_FAILURE(ec
)) {
2408 errln("FAIL: createInstance NFD");
2415 t
= new CompoundTransliterator(array
, 3, new UnicodeSet("[:Ll:]", ec
));
2416 if (U_FAILURE(ec
)) {
2417 errln("FAIL: UnicodeSet constructor");
2425 expect(*t
, "aAaA", "bAbA");
2427 assertTrue("countElements", t
->countElements() == 3);
2428 assertEquals("getElement(0)", t
->getElement(0, ec
).getID(), "a_to_A");
2429 assertEquals("getElement(1)", t
->getElement(1, ec
).getID(), "NFD");
2430 assertEquals("getElement(2)", t
->getElement(2, ec
).getID(), "A_to_b");
2431 assertSuccess("getElement", ec
);
2439 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2443 UnicodeString gr
= CharsToUnicodeString(
2445 "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2446 "$rough = \\u0314 ;"
2447 "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2451 expect(gr
, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2455 * Test quantified segment behavior. We want:
2456 * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2458 void TransliteratorTest::TestQuantifiedSegment(void) {
2460 expect("([abc]+) > x $1 x;", "cba", "xcbax");
2462 // The tricky case; the quantifier is around the segment
2463 expect("([abc])+ > x $1 x;", "cba", "xax");
2465 // Tricky case in reverse direction
2466 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2468 // Check post-context segment
2469 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2471 // Test toRule/toPattern for non-quantified segment.
2472 // Careful with spacing here.
2473 UnicodeString
r("([a-c]){q} > x $1 x;");
2475 UErrorCode ec
= U_ZERO_ERROR
;
2476 Transliterator
* t
= Transliterator::createFromRules("ID", r
, UTRANS_FORWARD
, pe
, ec
);
2477 if (U_FAILURE(ec
)) {
2478 errln("FAIL: createFromRules");
2483 t
->toRules(rr
, TRUE
);
2485 errln((UnicodeString
)"FAIL: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2487 logln((UnicodeString
)"Ok: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2491 // Test toRule/toPattern for quantified segment.
2492 // Careful with spacing here.
2493 r
= "([a-c])+{q} > x $1 x;";
2494 t
= Transliterator::createFromRules("ID", r
, UTRANS_FORWARD
, pe
, ec
);
2495 if (U_FAILURE(ec
)) {
2496 errln("FAIL: createFromRules");
2500 t
->toRules(rr
, TRUE
);
2502 errln((UnicodeString
)"FAIL: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2504 logln((UnicodeString
)"Ok: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2509 //======================================================================
2511 //======================================================================
2512 void TransliteratorTest::TestDevanagariLatinRT(){
2513 const int MAX_LEN
= 52;
2514 const char* const source
[MAX_LEN
] = {
2529 //"r\\u0323ya", // \u095c is not valid in Devanagari
2555 "\\u1E6Dh\\u1E6Dha",
2562 // Not roundtrippable --
2563 // \\u0939\\u094d\\u094d\\u092E - hma
2564 // \\u0939\\u094d\\u092E - hma
2565 // CharsToUnicodeString("hma"),
2570 "san\\u0304j\\u012Bb s\\u0113nagupta",
2571 "\\u0101nand vaddir\\u0101ju",
2575 const char* const expected
[MAX_LEN
] = {
2576 "\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */
2577 "\\u0915\\u094D\\u0930", /* kra */
2578 "\\u0915\\u094D\\u0937", /* ks\\u0323a */
2579 "\\u0916\\u094D\\u0930", /* khra */
2580 "\\u0917\\u094D\\u0930", /* gra */
2581 "\\u0919\\u094D\\u0930", /* n\\u0307ra */
2582 "\\u091A\\u094D\\u0930", /* cra */
2583 "\\u091B\\u094D\\u0930", /* chra */
2584 "\\u091C\\u094D\\u091E", /* jn\\u0303a */
2585 "\\u091D\\u094D\\u0930", /* jhra */
2586 "\\u091E\\u094D\\u0930", /* n\\u0303ra */
2587 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2588 "\\u0920\\u094D\\u0930", /* t\\u0323hra */
2589 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2590 //"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari
2591 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2592 "\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */
2593 "\\u0923\\u094D\\u0930", /* n\\u0323ra */
2594 "\\u0924\\u094D\\u0924", /* tta */
2595 "\\u0925\\u094D\\u0930", /* thra */
2596 "\\u0926\\u094D\\u0926", /* dda */
2597 "\\u0927\\u094D\\u0930", /* dhra */
2598 "\\u0928\\u094D\\u0928", /* nna */
2599 "\\u092A\\u094D\\u0930", /* pra */
2600 "\\u092B\\u094D\\u0930", /* phra */
2601 "\\u092C\\u094D\\u0930", /* bra */
2602 "\\u092D\\u094D\\u0930", /* bhra */
2603 "\\u092E\\u094D\\u0930", /* mra */
2604 "\\u0929\\u094D\\u0930", /* n\\u0331ra */
2605 //"\\u0934\\u094D\\u0930", /* l\\u0331ra */
2606 "\\u092F\\u094D\\u0930", /* yra */
2607 "\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */
2609 "\\u0935\\u094D\\u0930", /* vra */
2610 "\\u0936\\u094D\\u0930", /* s\\u0301ra */
2611 "\\u0937\\u094D\\u0930", /* s\\u0323ra */
2612 "\\u0938\\u094D\\u0930", /* sra */
2613 "\\u0939\\u094d\\u092E", /* hma */
2614 "\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */
2615 "\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */
2616 "\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/
2617 "\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */
2618 "\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */
2619 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2620 "\\u0920\\u094D\\u092F", /* t\\u0323hya */
2621 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2622 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2624 "\\u0939\\u094D\\u092F", /* hya */
2625 "\\u0936\\u0943", /* s\\u0301r\\u0325a */
2626 "\\u0936\\u094D\\u091A", /* s\\u0301ca */
2627 "\\u090d", /* e\\u0306 */
2628 "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2629 "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2633 UErrorCode status
= U_ZERO_ERROR
;
2634 UParseError parseError
;
2635 UnicodeString message
;
2636 Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2637 Transliterator
* devToLatin
=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
);
2638 if(U_FAILURE(status
)){
2639 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2640 errln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2643 UnicodeString gotResult
;
2644 for(int i
= 0; i
<MAX_LEN
; i
++){
2645 gotResult
= source
[i
];
2646 expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
]));
2647 expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
]));
2653 void TransliteratorTest::TestTeluguLatinRT(){
2654 const int MAX_LEN
=10;
2655 const char* const source
[MAX_LEN
] = {
2656 "raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */
2657 "\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */
2658 "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */
2659 "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */
2660 "san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */
2661 "amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */
2662 "ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */
2663 "\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */
2664 "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */
2665 "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */
2668 const char* const expected
[MAX_LEN
] = {
2669 "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2670 "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2671 "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2672 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2673 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2674 "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2675 "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2676 "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2677 "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2678 "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2681 UErrorCode status
= U_ZERO_ERROR
;
2682 UParseError parseError
;
2683 UnicodeString message
;
2684 Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD
, parseError
, status
);
2685 Transliterator
* devToLatin
=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD
, parseError
, status
);
2686 if(U_FAILURE(status
)){
2687 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2688 errln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2691 UnicodeString gotResult
;
2692 for(int i
= 0; i
<MAX_LEN
; i
++){
2693 gotResult
= source
[i
];
2694 expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
]));
2695 expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
]));
2701 void TransliteratorTest::TestSanskritLatinRT(){
2702 const int MAX_LEN
=16;
2703 const char* const source
[MAX_LEN
] = {
2704 "rmk\\u1E63\\u0113t",
2705 "\\u015Br\\u012Bmad",
2706 "bhagavadg\\u012Bt\\u0101",
2709 "vi\\u1E63\\u0101da",
2711 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2712 "uv\\u0101cr\\u0325",
2713 "dharmak\\u1E63\\u0113tr\\u0113",
2714 "kuruk\\u1E63\\u0113tr\\u0113",
2715 "samav\\u0113t\\u0101",
2717 "m\\u0101mak\\u0101\\u1E25",
2718 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2722 const char* const expected
[MAX_LEN
] = {
2723 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2724 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2725 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2726 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2727 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2728 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2729 "\\u092f\\u094b\\u0917",
2730 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2731 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2732 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2733 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2734 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2735 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2736 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2737 //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2738 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2739 "\\u0938\\u0902\\u091c\\u0935",
2741 UErrorCode status
= U_ZERO_ERROR
;
2742 UParseError parseError
;
2743 UnicodeString message
;
2744 Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2745 Transliterator
* devToLatin
=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
);
2746 if(U_FAILURE(status
)){
2747 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2748 errln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2751 UnicodeString gotResult
;
2752 for(int i
= 0; i
<MAX_LEN
; i
++){
2753 gotResult
= source
[i
];
2754 expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
]));
2755 expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
]));
2762 void TransliteratorTest::TestCompoundLatinRT(){
2763 const char* const source
[] = {
2764 "rmk\\u1E63\\u0113t",
2765 "\\u015Br\\u012Bmad",
2766 "bhagavadg\\u012Bt\\u0101",
2769 "vi\\u1E63\\u0101da",
2771 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2772 "uv\\u0101cr\\u0325",
2773 "dharmak\\u1E63\\u0113tr\\u0113",
2774 "kuruk\\u1E63\\u0113tr\\u0113",
2775 "samav\\u0113t\\u0101",
2777 "m\\u0101mak\\u0101\\u1E25",
2778 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2782 const int MAX_LEN
= sizeof(source
)/sizeof(source
[0]);
2783 const char* const expected
[MAX_LEN
] = {
2784 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2785 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2786 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2787 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2788 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2789 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2790 "\\u092f\\u094b\\u0917",
2791 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2792 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2793 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2794 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2795 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2796 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2797 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2798 // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2799 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2800 "\\u0938\\u0902\\u091c\\u0935"
2802 if(MAX_LEN
!= sizeof(expected
)/sizeof(expected
[0])) {
2803 errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2807 UErrorCode status
= U_ZERO_ERROR
;
2808 UParseError parseError
;
2809 UnicodeString message
;
2810 Transliterator
* devToLatinToDev
=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2811 Transliterator
* latinToDevToLatin
=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
);
2812 Transliterator
* devToTelToDev
=Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2813 Transliterator
* latinToTelToLatin
=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD
, parseError
, status
);
2815 if(U_FAILURE(status
)){
2816 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2817 errln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2820 UnicodeString gotResult
;
2821 for(int i
= 0; i
<MAX_LEN
; i
++){
2822 gotResult
= source
[i
];
2823 expect(*devToLatinToDev
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(expected
[i
]));
2824 expect(*latinToDevToLatin
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(source
[i
]));
2825 expect(*latinToTelToLatin
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(source
[i
]));
2828 delete(latinToDevToLatin
);
2829 delete(devToLatinToDev
);
2830 delete(devToTelToDev
);
2831 delete(latinToTelToLatin
);
2835 * Test Gurmukhi-Devanagari Tippi and Bindi
2837 void TransliteratorTest::TestGurmukhiDevanagari(){
2839 // (\u0902) (when preceded by vowel) ---> (\u0A02)
2840 // (\u0902) (when preceded by consonant) ---> (\u0A70)
2841 UErrorCode status
= U_ZERO_ERROR
;
2842 UnicodeSet
vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV
).unescape(), status
);
2843 UnicodeSet
non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV
).unescape(), status
);
2844 UParseError parseError
;
2846 UnicodeSetIterator
vIter(vowel
);
2847 UnicodeSetIterator
nvIter(non_vowel
);
2848 Transliterator
* trans
= Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD
, parseError
, status
);
2849 if(U_FAILURE(status
)) {
2850 errln("Error creating transliterator %s", u_errorName(status
));
2854 UnicodeString
src (" \\u0902", -1, US_INV
);
2855 UnicodeString
expected(" \\u0A02", -1, US_INV
);
2856 src
= src
.unescape();
2857 expected
= expected
.unescape();
2859 while(vIter
.next()){
2860 src
.setCharAt(0,(UChar
) vIter
.getCodepoint());
2861 expected
.setCharAt(0,(UChar
) (vIter
.getCodepoint()+0x0100));
2862 expect(*trans
,src
,expected
);
2865 expected
.setCharAt(1,0x0A70);
2866 while(nvIter
.next()){
2867 //src.setCharAt(0,(char) nvIter.codepoint);
2868 src
.setCharAt(0,(UChar
)nvIter
.getCodepoint());
2869 expected
.setCharAt(0,(UChar
) (nvIter
.getCodepoint()+0x0100));
2870 expect(*trans
,src
,expected
);
2875 * Test instantiation from a locale.
2877 void TransliteratorTest::TestLocaleInstantiation(void) {
2879 UErrorCode ec
= U_ZERO_ERROR
;
2880 Transliterator
*t
= Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD
, pe
, ec
);
2881 if (U_FAILURE(ec
)) {
2882 errln("FAIL: createInstance(ru_RU-Latin)");
2886 expect(*t
, CharsToUnicodeString("\\u0430"), "a");
2889 t
= Transliterator::createInstance("en-el", UTRANS_FORWARD
, pe
, ec
);
2890 if (U_FAILURE(ec
)) {
2891 errln("FAIL: createInstance(en-el)");
2895 expect(*t
, "a", CharsToUnicodeString("\\u03B1"));
2900 * Test title case handling of accent (should ignore accents)
2902 void TransliteratorTest::TestTitleAccents(void) {
2904 UErrorCode ec
= U_ZERO_ERROR
;
2905 Transliterator
*t
= Transliterator::createInstance("Title", UTRANS_FORWARD
, pe
, ec
);
2906 if (U_FAILURE(ec
)) {
2907 errln("FAIL: createInstance(Title)");
2911 expect(*t
, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2916 * Basic test of a locale resource based rule.
2918 void TransliteratorTest::TestLocaleResource() {
2919 const char* DATA
[] = {
2921 //"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0",
2922 "Latin-el", "b", "\\u03bc\\u03c0",
2923 "Latin-Greek", "b", "\\u03B2",
2924 "Greek-Latin/UNGEGN", "\\u03B2", "v",
2925 "el-Latin", "\\u03B2", "v",
2926 "Greek-Latin", "\\u03B2", "b",
2928 const int32_t DATA_length
= sizeof(DATA
) / sizeof(DATA
[0]);
2929 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
2931 UErrorCode ec
= U_ZERO_ERROR
;
2932 Transliterator
*t
= Transliterator::createInstance(DATA
[i
], UTRANS_FORWARD
, pe
, ec
);
2933 if (U_FAILURE(ec
)) {
2934 errln((UnicodeString
)"FAIL: createInstance(" + DATA
[i
] + ")");
2938 expect(*t
, CharsToUnicodeString(DATA
[i
+1]),
2939 CharsToUnicodeString(DATA
[i
+2]));
2945 * Make sure parse errors reference the right line.
2947 void TransliteratorTest::TestParseError() {
2948 static const char* rule
=
2952 UErrorCode ec
= U_ZERO_ERROR
;
2954 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
2956 if (U_FAILURE(ec
)) {
2957 UnicodeString
err(pe
.preContext
);
2958 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
2959 if (err
.indexOf("d << b") >= 0) {
2960 logln("Ok: " + err
);
2962 errln("FAIL: " + err
);
2966 errln("FAIL: no syntax error");
2968 static const char* maskingRule
=
2973 delete Transliterator::createFromRules("ID", maskingRule
, UTRANS_FORWARD
, pe
, ec
);
2974 if (ec
!= U_RULE_MASK_ERROR
) {
2975 errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec
));
2977 else if (UnicodeString("a > x;") != UnicodeString(pe
.preContext
)) {
2978 errln("FAIL: did not get expected precontext");
2980 else if (UnicodeString("ab > y;") != UnicodeString(pe
.postContext
)) {
2981 errln("FAIL: did not get expected postcontext");
2986 * Make sure sets on output are disallowed.
2988 void TransliteratorTest::TestOutputSet() {
2989 UnicodeString rule
= "$set = [a-cm-n]; b > $set;";
2990 UErrorCode ec
= U_ZERO_ERROR
;
2992 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
2994 if (U_FAILURE(ec
)) {
2995 UnicodeString
err(pe
.preContext
);
2996 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
2997 logln("Ok: " + err
);
3000 errln("FAIL: No syntax error");
3004 * Test the use variable range pragma, making sure that use of
3005 * variable range characters is detected and flagged as an error.
3007 void TransliteratorTest::TestVariableRange() {
3008 UnicodeString rule
= "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3009 UErrorCode ec
= U_ZERO_ERROR
;
3011 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
3013 if (U_FAILURE(ec
)) {
3014 UnicodeString
err(pe
.preContext
);
3015 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
3016 logln("Ok: " + err
);
3019 errln("FAIL: No syntax error");
3023 * Test invalid post context error handling
3025 void TransliteratorTest::TestInvalidPostContext() {
3026 UnicodeString rule
= "a}b{c>d;";
3027 UErrorCode ec
= U_ZERO_ERROR
;
3029 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
3031 if (U_FAILURE(ec
)) {
3032 UnicodeString
err(pe
.preContext
);
3033 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
3034 if (err
.indexOf("a}b{c") >= 0) {
3035 logln("Ok: " + err
);
3037 errln("FAIL: " + err
);
3041 errln("FAIL: No syntax error");
3045 * Test ID form variants
3047 void TransliteratorTest::TestIDForms() {
3048 const char* DATA
[] = {
3050 "nfd", NULL
, "NFC", // make sure case is ignored
3051 "Any-NFKD", NULL
, "Any-NFKC",
3052 "Null", NULL
, "Null",
3053 "-nfkc", "nfkc", "NFKD",
3054 "-nfkc/", "nfkc", "NFKD",
3055 "Latin-Greek/UNGEGN", NULL
, "Greek-Latin/UNGEGN",
3056 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3057 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3058 "Source-", NULL
, NULL
,
3059 "Source/Variant-", NULL
, NULL
,
3060 "Source-/Variant", NULL
, NULL
,
3061 "/Variant", NULL
, NULL
,
3062 "/Variant-", NULL
, NULL
,
3063 "-/Variant", NULL
, NULL
,
3068 const int32_t DATA_length
= sizeof(DATA
)/sizeof(DATA
[0]);
3070 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
3071 const char* ID
= DATA
[i
];
3072 const char* expID
= DATA
[i
+1];
3073 const char* expInvID
= DATA
[i
+2];
3074 UBool expValid
= (expInvID
!= NULL
);
3075 if (expID
== NULL
) {
3079 UErrorCode ec
= U_ZERO_ERROR
;
3081 Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
);
3082 if (U_FAILURE(ec
)) {
3084 logln((UnicodeString
)"Ok: getInstance(" + ID
+") => " + u_errorName(ec
));
3086 errln((UnicodeString
)"FAIL: Couldn't create " + ID
);
3091 Transliterator
*u
= t
->createInverse(ec
);
3092 if (U_FAILURE(ec
)) {
3093 errln((UnicodeString
)"FAIL: Couldn't create inverse of " + ID
);
3098 if (t
->getID() == expID
&&
3099 u
->getID() == expInvID
) {
3100 logln((UnicodeString
)"Ok: " + ID
+ ".getInverse() => " + expInvID
);
3102 errln((UnicodeString
)"FAIL: getInstance(" + ID
+ ") => " +
3103 t
->getID() + " x getInverse() => " + u
->getID() +
3104 ", expected " + expInvID
);
3111 static const UChar SPACE
[] = {32,0};
3112 static const UChar NEWLINE
[] = {10,0};
3113 static const UChar RETURN
[] = {13,0};
3114 static const UChar EMPTY
[] = {0};
3116 void TransliteratorTest::checkRules(const UnicodeString
& label
, Transliterator
& t2
,
3117 const UnicodeString
& testRulesForward
) {
3118 UnicodeString rules2
; t2
.toRules(rules2
, TRUE
);
3119 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3120 rules2
.findAndReplace(SPACE
, EMPTY
);
3121 rules2
.findAndReplace(NEWLINE
, EMPTY
);
3122 rules2
.findAndReplace(RETURN
, EMPTY
);
3124 UnicodeString
testRules(testRulesForward
); testRules
.findAndReplace(SPACE
, EMPTY
);
3126 if (rules2
!= testRules
) {
3128 logln((UnicodeString
)"GENERATED RULES: " + rules2
);
3129 logln((UnicodeString
)"SHOULD BE: " + testRulesForward
);
3134 * Mark's toRules test.
3136 void TransliteratorTest::TestToRulesMark() {
3137 const char* testRules
=
3138 "::[[:Latin:][:Mark:]];"
3141 "a <> \\u03B1;" // alpha
3145 "::([[:Greek:][:Mark:]]);"
3147 const char* testRulesForward
=
3148 "::[[:Latin:][:Mark:]];"
3156 const char* testRulesBackward
=
3157 "::[[:Greek:][:Mark:]];"
3164 UnicodeString source
= CharsToUnicodeString("\\u00E1"); // a-acute
3165 UnicodeString target
= CharsToUnicodeString("\\u03AC"); // alpha-acute
3168 UErrorCode ec
= U_ZERO_ERROR
;
3169 Transliterator
*t2
= Transliterator::createFromRules("source-target", UnicodeString(testRules
, -1, US_INV
), UTRANS_FORWARD
, pe
, ec
);
3170 Transliterator
*t3
= Transliterator::createFromRules("target-source", UnicodeString(testRules
, -1, US_INV
), UTRANS_REVERSE
, pe
, ec
);
3172 if (U_FAILURE(ec
)) {
3175 errln((UnicodeString
)"FAIL: createFromRules => " + u_errorName(ec
));
3179 expect(*t2
, source
, target
);
3180 expect(*t3
, target
, source
);
3182 checkRules("Failed toRules FORWARD", *t2
, UnicodeString(testRulesForward
, -1, US_INV
));
3183 checkRules("Failed toRules BACKWARD", *t3
, UnicodeString(testRulesBackward
, -1, US_INV
));
3190 * Test Escape and Unescape transliterators.
3192 void TransliteratorTest::TestEscape() {
3198 t
= Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, pe
, ec
);
3199 if (U_FAILURE(ec
)) {
3200 errln((UnicodeString
)"FAIL: createInstance");
3203 UNICODE_STRING_SIMPLE("\\x{40}\\U000000312Q"),
3209 t
= Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD
, pe
, ec
);
3210 if (U_FAILURE(ec
)) {
3211 errln((UnicodeString
)"FAIL: createInstance");
3214 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3215 UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3220 t
= Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD
, pe
, ec
);
3221 if (U_FAILURE(ec
)) {
3222 errln((UnicodeString
)"FAIL: createInstance");
3225 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3226 UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3231 t
= Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD
, pe
, ec
);
3232 if (U_FAILURE(ec
)) {
3233 errln((UnicodeString
)"FAIL: createInstance");
3236 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3237 UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3243 void TransliteratorTest::TestAnchorMasking(){
3244 UnicodeString
rule ("^a > Q; a > q;");
3245 UErrorCode status
= U_ZERO_ERROR
;
3246 UParseError parseError
;
3248 Transliterator
* t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
,parseError
,status
);
3249 if(U_FAILURE(status
)){
3250 errln(UnicodeString("FAIL: ") + "ID" +
3251 ".createFromRules() => bad rules" +
3252 /*", parse error " + parseError.code +*/
3253 ", line " + parseError
.line
+
3254 ", offset " + parseError
.offset
+
3255 ", context " + prettify(parseError
.preContext
, TRUE
) +
3256 ", rules: " + prettify(rule
, TRUE
));
3262 * Make sure display names of variants look reasonable.
3264 void TransliteratorTest::TestDisplayName() {
3265 #if UCONFIG_NO_FORMATTING
3266 logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3269 static const char* DATA
[] = {
3270 // ID, forward name, reverse name
3271 // Update the text as necessary -- the important thing is
3272 // not the text itself, but how various cases are handled.
3275 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3278 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3281 "NFC", "Any to NFC", "Any to NFD",
3284 int32_t DATA_length
= sizeof(DATA
) / sizeof(DATA
[0]);
3286 Locale
US("en", "US");
3288 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
3290 Transliterator::getDisplayName(DATA
[i
], US
, name
);
3291 if (name
!= DATA
[i
+1]) {
3292 errln((UnicodeString
)"FAIL: " + DATA
[i
] + ".getDisplayName() => " +
3293 name
+ ", expected " + DATA
[i
+1]);
3295 logln((UnicodeString
)"Ok: " + DATA
[i
] + ".getDisplayName() => " + name
);
3297 UErrorCode ec
= U_ZERO_ERROR
;
3299 Transliterator
*t
= Transliterator::createInstance(DATA
[i
], UTRANS_REVERSE
, pe
, ec
);
3300 if (U_FAILURE(ec
)) {
3302 errln("FAIL: createInstance failed");
3305 name
= Transliterator::getDisplayName(t
->getID(), US
, name
);
3306 if (name
!= DATA
[i
+2]) {
3307 errln((UnicodeString
)"FAIL: " + t
->getID() + ".getDisplayName() => " +
3308 name
+ ", expected " + DATA
[i
+2]);
3310 logln((UnicodeString
)"Ok: " + t
->getID() + ".getDisplayName() => " + name
);
3317 void TransliteratorTest::TestSpecialCases(void) {
3318 const UnicodeString registerRules
[] = {
3319 "Any-Dev1", "x > X; y > Y;",
3320 "Any-Dev2", "XY > Z",
3322 CharsToUnicodeString
3323 ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3327 const UnicodeString testCases
[] = {
3329 // should add more test cases
3330 "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3331 "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3332 "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3333 "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3336 "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3337 "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3339 // check for devanagari bug
3340 "nfd;Dev1;Dev2;nfc", "xy", "Z",
3342 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3343 "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
,
3344 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE
+ DESERET_dee
,
3346 //TODO: enable this test once Titlecase works right
3348 "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3349 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3351 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
,
3352 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE
+ DESERET_DEE
,
3353 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
,
3354 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee
+ DESERET_dee
,
3356 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
, "",
3357 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
, "",
3360 "Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3361 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3362 "Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3363 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3364 "Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3365 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3366 "Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3367 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3369 // Upper: TAT\\u02B9\\u00C2NA
3370 // Lower: tat\\u02B9\\u00E2na
3371 // Title: Tat\\u02B9\\u00E2na
3372 "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3373 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3374 "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3375 CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3376 "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3377 CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3384 for (i
= 0; registerRules
[i
].length()!=0; i
+=2) {
3385 UErrorCode status
= U_ZERO_ERROR
;
3387 Transliterator
*t
= Transliterator::createFromRules(registerRules
[0+i
],
3388 registerRules
[i
+1], UTRANS_FORWARD
, pos
, status
);
3389 if (U_FAILURE(status
)) {
3390 errln("Fails: Unable to create the transliterator from rules.");
3392 Transliterator::registerInstance(t
);
3395 for (i
= 0; testCases
[i
].length()!=0; i
+=3) {
3396 UErrorCode ec
= U_ZERO_ERROR
;
3398 const UnicodeString
& name
= testCases
[i
];
3399 Transliterator
*t
= Transliterator::createInstance(name
, UTRANS_FORWARD
, pe
, ec
);
3400 if (U_FAILURE(ec
)) {
3401 errln((UnicodeString
)"FAIL: Couldn't create " + name
);
3405 const UnicodeString
& id
= t
->getID();
3406 const UnicodeString
& source
= testCases
[i
+1];
3407 UnicodeString target
;
3409 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3411 if (testCases
[i
+2].length() > 0) {
3412 target
= testCases
[i
+2];
3413 } else if (0==id
.caseCompare("NFD", U_FOLD_CASE_DEFAULT
)) {
3414 Normalizer::normalize(source
, UNORM_NFD
, 0, target
, ec
);
3415 } else if (0==id
.caseCompare("NFC", U_FOLD_CASE_DEFAULT
)) {
3416 Normalizer::normalize(source
, UNORM_NFC
, 0, target
, ec
);
3417 } else if (0==id
.caseCompare("NFKD", U_FOLD_CASE_DEFAULT
)) {
3418 Normalizer::normalize(source
, UNORM_NFKD
, 0, target
, ec
);
3419 } else if (0==id
.caseCompare("NFKC", U_FOLD_CASE_DEFAULT
)) {
3420 Normalizer::normalize(source
, UNORM_NFKC
, 0, target
, ec
);
3421 } else if (0==id
.caseCompare("Lower", U_FOLD_CASE_DEFAULT
)) {
3423 target
.toLower(Locale::getUS());
3424 } else if (0==id
.caseCompare("Upper", U_FOLD_CASE_DEFAULT
)) {
3426 target
.toUpper(Locale::getUS());
3428 if (U_FAILURE(ec
)) {
3429 errln((UnicodeString
)"FAIL: Internal error normalizing " + source
);
3433 expect(*t
, source
, target
);
3436 for (i
= 0; registerRules
[i
].length()!=0; i
+=2) {
3437 Transliterator::unregister(registerRules
[i
]);
3441 char* Char32ToEscapedChars(UChar32 ch
, char* buffer
) {
3443 sprintf(buffer
, "\\u%04x", (int)ch
);
3445 sprintf(buffer
, "\\U%08x", (int)ch
);
3450 void TransliteratorTest::TestSurrogateCasing (void) {
3451 // check that casing handles surrogates
3452 // titlecase is currently defective
3456 UTF_GET_CHAR(DESERET_dee
,0, 0, DESERET_dee
.length(), dee
);
3457 UnicodeString
DEE(u_totitle(dee
));
3458 if (DEE
!= DESERET_DEE
) {
3459 err("Fails titlecase of surrogates");
3460 err(Char32ToEscapedChars(dee
, buffer
));
3462 errln(Char32ToEscapedChars(DEE
.char32At(0), buffer
));
3465 UnicodeString deeDEETest
=DESERET_dee
+ DESERET_DEE
;
3466 UnicodeString deedeeTest
= DESERET_dee
+ DESERET_dee
;
3467 UnicodeString DEEDEETest
= DESERET_DEE
+ DESERET_DEE
;
3468 UErrorCode status
= U_ZERO_ERROR
;
3470 u_strToUpper(buffer2
, 20, deeDEETest
.getBuffer(), deeDEETest
.length(), NULL
, &status
);
3471 if (U_FAILURE(status
) || (UnicodeString(buffer2
)!= DEEDEETest
)) {
3472 errln("Fails: Can't uppercase surrogates.");
3475 status
= U_ZERO_ERROR
;
3476 u_strToLower(buffer2
, 20, deeDEETest
.getBuffer(), deeDEETest
.length(), NULL
, &status
);
3477 if (U_FAILURE(status
) || (UnicodeString(buffer2
)!= deedeeTest
)) {
3478 errln("Fails: Can't lowercase surrogates.");
3482 static void _trans(Transliterator
& t
, const UnicodeString
& src
,
3483 UnicodeString
& result
) {
3485 t
.transliterate(result
);
3488 static void _trans(const UnicodeString
& id
, const UnicodeString
& src
,
3489 UnicodeString
& result
, UErrorCode ec
) {
3491 Transliterator
*t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, pe
, ec
);
3492 if (U_SUCCESS(ec
)) {
3493 _trans(*t
, src
, result
);
3498 static UnicodeString
_findMatch(const UnicodeString
& source
,
3499 const UnicodeString
* pairs
) {
3500 UnicodeString empty
;
3501 for (int32_t i
=0; pairs
[i
].length() > 0; i
+=2) {
3502 if (0==source
.caseCompare(pairs
[i
], U_FOLD_CASE_DEFAULT
)) {
3509 // Check to see that incremental gets at least part way through a reasonable string.
3511 void TransliteratorTest::TestIncrementalProgress(void) {
3512 UErrorCode ec
= U_ZERO_ERROR
;
3513 UnicodeString latinTest
= "The Quick Brown Fox.";
3514 UnicodeString devaTest
;
3515 _trans("Latin-Devanagari", latinTest
, devaTest
, ec
);
3516 UnicodeString kataTest
;
3517 _trans("Latin-Katakana", latinTest
, kataTest
, ec
);
3518 if (U_FAILURE(ec
)) {
3519 errln("FAIL: Internal error");
3522 const UnicodeString tests
[] = {
3525 "Halfwidth", latinTest
,
3526 "Devanagari", devaTest
,
3527 "Katakana", kataTest
,
3531 UnicodeString
test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3532 int32_t i
= 0, j
=0, k
=0;
3533 int32_t sources
= Transliterator::countAvailableSources();
3534 for (i
= 0; i
< sources
; i
++) {
3535 UnicodeString source
;
3536 Transliterator::getAvailableSource(i
, source
);
3537 UnicodeString test
= _findMatch(source
, tests
);
3538 if (test
.length() == 0) {
3539 logln((UnicodeString
)"Skipping " + source
+ "-X");
3542 int32_t targets
= Transliterator::countAvailableTargets(source
);
3543 for (j
= 0; j
< targets
; j
++) {
3544 UnicodeString target
;
3545 Transliterator::getAvailableTarget(j
, source
, target
);
3546 int32_t variants
= Transliterator::countAvailableVariants(source
, target
);
3547 for (k
=0; k
< variants
; k
++) {
3548 UnicodeString variant
;
3550 UErrorCode status
= U_ZERO_ERROR
;
3552 Transliterator::getAvailableVariant(k
, source
, target
, variant
);
3553 UnicodeString id
= source
+ "-" + target
+ "/" + variant
;
3555 Transliterator
*t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, err
, status
);
3556 if (U_FAILURE(status
)) {
3557 errln((UnicodeString
)"FAIL: Could not create " + id
);
3561 status
= U_ZERO_ERROR
;
3562 CheckIncrementalAux(t
, test
);
3565 _trans(*t
, test
, rev
);
3566 Transliterator
*inv
= t
->createInverse(status
);
3567 if (U_FAILURE(status
)) {
3568 errln((UnicodeString
)"FAIL: Could not create inverse of " + id
);
3573 CheckIncrementalAux(inv
, rev
);
3581 void TransliteratorTest::CheckIncrementalAux(const Transliterator
* t
,
3582 const UnicodeString
& input
) {
3583 UErrorCode ec
= U_ZERO_ERROR
;
3585 UnicodeString test
= input
;
3587 pos
.contextStart
= 0;
3588 pos
.contextLimit
= input
.length();
3590 pos
.limit
= input
.length();
3592 t
->transliterate(test
, pos
, ec
);
3593 if (U_FAILURE(ec
)) {
3594 errln((UnicodeString
)"FAIL: transliterate() error " + u_errorName(ec
));
3597 UBool gotError
= FALSE
;
3599 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3601 if (pos
.start
== 0 && pos
.limit
!= 0 && t
->getID() != "Hex-Any/Unicode") {
3602 errln((UnicodeString
)"No Progress, " +
3603 t
->getID() + ": " + formatInput(test
, input
, pos
));
3606 logln((UnicodeString
)"PASS Progress, " +
3607 t
->getID() + ": " + formatInput(test
, input
, pos
));
3609 t
->finishTransliteration(test
, pos
);
3610 if (pos
.start
!= pos
.limit
) {
3611 errln((UnicodeString
)"Incomplete, " +
3612 t
->getID() + ": " + formatInput(test
, input
, pos
));
3617 void TransliteratorTest::TestFunction() {
3618 // Careful with spacing and ';' here: Phrase this exactly
3619 // as toRules() is going to return it. If toRules() changes
3620 // with regard to spacing or ';', then adjust this string.
3621 UnicodeString rule
=
3622 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3625 UErrorCode ec
= U_ZERO_ERROR
;
3626 Transliterator
*t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3628 errln("FAIL: createFromRules failed");
3633 t
->toRules(r
, TRUE
);
3635 logln((UnicodeString
)"OK: toRules() => " + r
);
3637 errln((UnicodeString
)"FAIL: toRules() => " + r
+
3638 ", expected " + rule
);
3641 expect(*t
, "The Quick Brown Fox",
3642 UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3647 void TransliteratorTest::TestInvalidBackRef(void) {
3648 UnicodeString rule
= ". > $1;";
3649 UnicodeString rule2
=CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3651 UErrorCode ec
= U_ZERO_ERROR
;
3652 Transliterator
*t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3653 Transliterator
*t2
= Transliterator::createFromRules("Test2", rule2
, UTRANS_FORWARD
, pe
, ec
);
3656 errln("FAIL: createFromRules should have returned NULL");
3661 errln("FAIL: createFromRules should have returned NULL");
3665 if (U_SUCCESS(ec
)) {
3666 errln("FAIL: Ok: . > $1; => no error");
3668 logln((UnicodeString
)"Ok: . > $1; => " + u_errorName(ec
));
3672 void TransliteratorTest::TestMulticharStringSet() {
3679 " e } [{fg}] > r;" ;
3682 UErrorCode ec
= U_ZERO_ERROR
;
3683 Transliterator
* t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3684 if (t
== NULL
|| U_FAILURE(ec
)) {
3686 errln("FAIL: createFromRules failed");
3690 expect(*t
, "a aa ab bc d gd de gde gdefg ddefg",
3691 "y x yz z d gd de gdq gdqfg ddrfg");
3694 // Overlapped string test. Make sure that when multiple
3695 // strings can match that the longest one is matched.
3697 " [a {ab} {abc}] > x;"
3700 " q [t {st} {rst}] { e > p;" ;
3702 t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3703 if (t
== NULL
|| U_FAILURE(ec
)) {
3705 errln("FAIL: createFromRules failed");
3709 expect(*t
, "a ab abc qte qste qrste",
3710 "x x x qtp qstp qrstp");
3714 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3715 // BEGIN TestUserFunction support factory
3717 Transliterator
* _TUFF
[4];
3718 UnicodeString
* _TUFID
[4];
3720 static Transliterator
* U_EXPORT2
_TUFFactory(const UnicodeString
& /*ID*/,
3721 Transliterator::Token context
) {
3722 return _TUFF
[context
.integer
]->clone();
3725 static void _TUFReg(const UnicodeString
& ID
, Transliterator
* t
, int32_t n
) {
3727 _TUFID
[n
] = new UnicodeString(ID
);
3728 Transliterator::registerFactory(ID
, _TUFFactory
, Transliterator::integerToken(n
));
3731 static void _TUFUnreg(int32_t n
) {
3732 if (_TUFF
[n
] != NULL
) {
3733 Transliterator::unregister(*_TUFID
[n
]);
3739 // END TestUserFunction support factory
3740 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3743 * Test that user-registered transliterators can be used under function
3746 void TransliteratorTest::TestUserFunction() {
3750 UErrorCode ec
= U_ZERO_ERROR
;
3752 // Setup our factory
3754 for (i
=0; i
<4; ++i
) {
3758 // There's no need to register inverses if we don't use them
3759 t
= Transliterator::createFromRules("gif",
3760 UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3761 UTRANS_FORWARD
, pe
, ec
);
3762 if (t
== NULL
|| U_FAILURE(ec
)) {
3763 errln((UnicodeString
)"FAIL: createFromRules gif " + u_errorName(ec
));
3766 _TUFReg("Any-gif", t
, 0);
3768 t
= Transliterator::createFromRules("RemoveCurly",
3769 UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3770 UTRANS_FORWARD
, pe
, ec
);
3771 if (t
== NULL
|| U_FAILURE(ec
)) {
3772 errln((UnicodeString
)"FAIL: createFromRules RemoveCurly " + u_errorName(ec
));
3775 expect(*t
, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3776 _TUFReg("Any-RemoveCurly", t
, 1);
3778 logln("Trying &hex");
3779 t
= Transliterator::createFromRules("hex2",
3781 UTRANS_FORWARD
, pe
, ec
);
3782 if (t
== NULL
|| U_FAILURE(ec
)) {
3783 errln("FAIL: createFromRules");
3786 logln("Registering");
3787 _TUFReg("Any-hex2", t
, 2);
3788 t
= Transliterator::createInstance("Any-hex2", UTRANS_FORWARD
, ec
);
3789 if (t
== NULL
|| U_FAILURE(ec
)) {
3790 errln((UnicodeString
)"FAIL: createInstance Any-hex2 " + u_errorName(ec
));
3793 expect(*t
, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3796 logln("Trying &gif");
3797 t
= Transliterator::createFromRules("gif2",
3798 "(.) > &Gif(&Hex2($1));",
3799 UTRANS_FORWARD
, pe
, ec
);
3800 if (t
== NULL
|| U_FAILURE(ec
)) {
3801 errln((UnicodeString
)"FAIL: createFromRules gif2 " + u_errorName(ec
));
3804 logln("Registering");
3805 _TUFReg("Any-gif2", t
, 3);
3806 t
= Transliterator::createInstance("Any-gif2", UTRANS_FORWARD
, ec
);
3807 if (t
== NULL
|| U_FAILURE(ec
)) {
3808 errln((UnicodeString
)"FAIL: createInstance Any-gif2 " + u_errorName(ec
));
3811 expect(*t
, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3812 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3815 // Test that filters are allowed after &
3816 t
= Transliterator::createFromRules("test",
3817 "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3818 UTRANS_FORWARD
, pe
, ec
);
3819 if (t
== NULL
|| U_FAILURE(ec
)) {
3820 errln((UnicodeString
)"FAIL: createFromRules test " + u_errorName(ec
));
3824 UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3828 for (i
=0; i
<4; ++i
) {
3834 * Test the Any-X transliterators.
3836 void TransliteratorTest::TestAnyX(void) {
3837 UParseError parseError
;
3838 UErrorCode status
= U_ZERO_ERROR
;
3839 Transliterator
* anyLatin
=
3840 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD
, parseError
, status
);
3842 errln("FAIL: createInstance returned NULL");
3848 CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3849 CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3855 * Test the source and target set API. These are only implemented
3856 * for RBT and CompoundTransliterator at this time.
3858 void TransliteratorTest::TestSourceTargetSet() {
3859 UErrorCode ec
= U_ZERO_ERROR
;
3867 UnicodeSet
expSrc("[arx{lu}]", ec
);
3870 UnicodeSet
expTrg("[bq]", ec
);
3873 Transliterator
* t
= Transliterator::createFromRules("test", r
, UTRANS_FORWARD
, pe
, ec
);
3875 if (U_FAILURE(ec
)) {
3877 errln("FAIL: Couldn't set up test");
3881 UnicodeSet src
; t
->getSourceSet(src
);
3882 UnicodeSet trg
; t
->getTargetSet(trg
);
3884 if (src
== expSrc
&& trg
== expTrg
) {
3886 logln((UnicodeString
)"Ok: " +
3887 r
+ " => source = " + src
.toPattern(a
, TRUE
) +
3888 ", target = " + trg
.toPattern(b
, TRUE
));
3890 UnicodeString a
, b
, c
, d
;
3891 errln((UnicodeString
)"FAIL: " +
3892 r
+ " => source = " + src
.toPattern(a
, TRUE
) +
3893 ", expected " + expSrc
.toPattern(b
, TRUE
) +
3894 "; target = " + trg
.toPattern(c
, TRUE
) +
3895 ", expected " + expTrg
.toPattern(d
, TRUE
));
3902 * Test handling of rule whitespace, for both RBT and UnicodeSet.
3904 void TransliteratorTest::TestRuleWhitespace() {
3906 const char* r
= "a > \\u200E b;";
3908 UErrorCode ec
= U_ZERO_ERROR
;
3910 Transliterator
* t
= Transliterator::createFromRules("test", CharsToUnicodeString(r
), UTRANS_FORWARD
, pe
, ec
);
3912 if (U_FAILURE(ec
)) {
3913 errln("FAIL: Couldn't set up test");
3915 expect(*t
, "a", "b");
3921 UnicodeSet
set(CharsToUnicodeString("[a \\u200E]"), ec
);
3923 if (U_FAILURE(ec
)) {
3924 errln("FAIL: Couldn't set up test");
3926 if (set
.contains(0x200E)) {
3927 errln("FAIL: U+200E not being ignored by UnicodeSet");
3931 //======================================================================
3932 // this method is in TestUScript.java
3933 //======================================================================
3934 void TransliteratorTest::TestAllCodepoints(){
3935 UScriptCode code
= USCRIPT_INVALID_CODE
;
3936 char id
[256]={'\0'};
3937 char abbr
[256]={'\0'};
3938 char newId
[256]={'\0'};
3939 char newAbbrId
[256]={'\0'};
3940 char oldId
[256]={'\0'};
3941 char oldAbbrId
[256]={'\0'};
3943 UErrorCode status
=U_ZERO_ERROR
;
3946 for(uint32_t i
= 0; i
<=0x10ffff; i
++){
3947 code
= uscript_getScript(i
,&status
);
3948 if(code
== USCRIPT_INVALID_CODE
){
3949 errln("uscript_getScript for codepoint \\U%08X failed.\n", i
);
3951 const char* myId
= uscript_getName(code
);
3953 errln("Valid script code returned NULL name. Check your data!");
3956 uprv_strcpy(id
,myId
);
3957 uprv_strcpy(abbr
,uscript_getShortName(code
));
3959 uprv_strcpy(newId
,"[:");
3960 uprv_strcat(newId
,id
);
3961 uprv_strcat(newId
,":];NFD");
3963 uprv_strcpy(newAbbrId
,"[:");
3964 uprv_strcat(newAbbrId
,abbr
);
3965 uprv_strcat(newAbbrId
,":];NFD");
3967 if(uprv_strcmp(newId
,oldId
)!=0){
3968 Transliterator
* t
= Transliterator::createInstance(newId
,UTRANS_FORWARD
,pe
,status
);
3969 if(t
==NULL
|| U_FAILURE(status
)){
3970 errln((UnicodeString
)"FAIL: Could not create " + id
);
3974 if(uprv_strcmp(newAbbrId
,oldAbbrId
)!=0){
3975 Transliterator
* t
= Transliterator::createInstance(newAbbrId
,UTRANS_FORWARD
,pe
,status
);
3976 if(t
==NULL
|| U_FAILURE(status
)){
3977 errln((UnicodeString
)"FAIL: Could not create " + id
);
3981 uprv_strcpy(oldId
,newId
);
3982 uprv_strcpy(oldAbbrId
, newAbbrId
);
3988 #define TEST_TRANSLIT_ID(id, cls) { \
3989 UErrorCode ec = U_ZERO_ERROR; \
3990 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
3991 if (U_FAILURE(ec)) { \
3992 errln("FAIL: Couldn't create " id); \
3994 if (t->getDynamicClassID() != cls::getStaticClassID()) { \
3995 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
3997 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4002 #define TEST_TRANSLIT_RULE(rule, cls) { \
4003 UErrorCode ec = U_ZERO_ERROR; \
4005 Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4006 if (U_FAILURE(ec)) { \
4007 errln("FAIL: Couldn't create " rule); \
4009 if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4010 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4012 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4017 void TransliteratorTest::TestBoilerplate() {
4018 TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator
);
4019 TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator
);
4020 TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator
);
4021 TEST_TRANSLIT_ID("Lower", LowercaseTransliterator
);
4022 TEST_TRANSLIT_ID("Upper", UppercaseTransliterator
);
4023 TEST_TRANSLIT_ID("Title", TitlecaseTransliterator
);
4024 TEST_TRANSLIT_ID("Null", NullTransliterator
);
4025 TEST_TRANSLIT_ID("Remove", RemoveTransliterator
);
4026 TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator
);
4027 TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator
);
4028 TEST_TRANSLIT_ID("NFD", NormalizationTransliterator
);
4029 TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator
);
4030 TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator
);
4033 void TransliteratorTest::TestAlternateSyntax() {
4038 expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4041 expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4042 CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4043 UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4046 static const char* BEGIN_END_RULES
[] = {
4060 "", // test case commented out below, this is here to keep from messing up the indexes
4069 "", // test case commented out below, this is here to keep from messing up the indexes
4078 "", // test case commented out below, this is here to keep from messing up the indexes
4097 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4098 "$delim = [\\-$ws];"
4099 "$ws $delim* > ' ';"
4100 "'-' $delim* > '-';",
4104 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4105 "$delim = [\\-$ws];"
4106 "$ws $delim* > ' ';"
4107 "'-' $delim* > '-';",
4110 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4111 "$delim = [\\-$ws];"
4112 "$ws $delim* > ' ';"
4113 "'-' $delim* > '-';"
4117 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4118 "$delim = [\\-$ws];"
4120 "$ws $delim* > ' ';"
4121 "'-' $delim* > '-';",
4126 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4127 "$delim = [\\-$ws];"
4129 "$ws $delim* > ' ';"
4130 "'-' $delim* > '-';",
4132 "", // test case commented out below, this is here to keep from messing up the indexes
4136 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4137 "$delim = [\\-$ws];"
4139 "$ws $delim* > ' ';"
4140 "'-' $delim* > '-';"
4143 "", // test case commented out below, this is here to keep from messing up the indexes
4147 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4148 "$delim = [\\-$ws];"
4151 "$ws $delim* > ' ';"
4152 "'-' $delim* > '-';"
4155 "$ab { ' ' } $ab > '-';"
4162 "", // test case commented out below, this is here to keep from messing up the indexes
4165 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4166 "$delim = [\\-$ws];"
4169 "$ws $delim* > ' ';"
4170 "'-' $delim* > '-';"
4172 "$ab { ' ' } $ab > '-';"
4188 "", // test case commented out below, this is here to keep from messing up the indexes
4209 "", // test case commented out below, this is here to keep from messing up the indexes
4219 static const int32_t BEGIN_END_RULES_length
= (int32_t)(sizeof(BEGIN_END_RULES
) / sizeof(BEGIN_END_RULES
[0]));
4222 (This entire test is commented out below and will need some heavy revision when we re-add
4223 the ::BEGIN/::END stuff)
4224 static const char* BOGUS_BEGIN_END_RULES[] = {
4243 static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
4246 static const char* BEGIN_END_TEST_CASES
[] = {
4247 // rules input expected output
4248 BEGIN_END_RULES
[0], "abc ababc aba", "xy zbc z",
4249 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
4250 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
4251 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
4252 BEGIN_END_RULES
[4], "abc ababc aba", "xy abxy z",
4253 BEGIN_END_RULES
[5], "abccabaacababcbc", "PXAARXQBR",
4255 BEGIN_END_RULES
[6], "e e - e---e- e", "e e e-e-e",
4256 BEGIN_END_RULES
[7], "e e - e---e- e", "e e e-e-e",
4257 BEGIN_END_RULES
[8], "e e - e---e- e", "e e e-e-e",
4258 BEGIN_END_RULES
[9], "e e - e---e- e", "e e e-e-e",
4259 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e",
4260 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e",
4261 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e",
4262 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a",
4263 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
4264 BEGIN_END_RULES
[13], "e e - e---e- e", "e e e-e-e",
4265 BEGIN_END_RULES
[13], "a a a a", "a%a%a%a",
4266 BEGIN_END_RULES
[13], "a a-b c b a", "a%a-b cb-a",
4268 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4269 BEGIN_END_RULES
[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4270 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4271 BEGIN_END_RULES
[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4273 static const int32_t BEGIN_END_TEST_CASES_length
= (int32_t)(sizeof(BEGIN_END_TEST_CASES
) / sizeof(BEGIN_END_TEST_CASES
[0]));
4275 void TransliteratorTest::TestBeginEnd() {
4276 // run through the list of test cases above
4278 for (i
= 0; i
< BEGIN_END_TEST_CASES_length
; i
+= 3) {
4279 expect((UnicodeString
)"Test case #" + (i
/ 3),
4280 UnicodeString(BEGIN_END_TEST_CASES
[i
], -1, US_INV
),
4281 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 1], -1, US_INV
),
4282 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 2], -1, US_INV
));
4285 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4286 UParseError parseError
;
4287 UErrorCode status
= U_ZERO_ERROR
;
4288 Transliterator
* reversed
= Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES
[17]),
4289 UTRANS_REVERSE
, parseError
, status
);
4290 if (reversed
== 0 || U_FAILURE(status
)) {
4291 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError
, status
);
4293 expect(*reversed
, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4297 // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4298 // that all of them cause errors
4300 (commented out until we have the real ::BEGIN/::END stuff in place
4301 for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4302 UParseError parseError;
4303 UErrorCode status = U_ZERO_ERROR;
4304 Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4305 UTRANS_FORWARD, parseError, status);
4306 if (!U_FAILURE(status)) {
4308 errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4314 void TransliteratorTest::TestBeginEndToRules() {
4315 // run through the same list of test cases we used above, but this time, instead of just
4316 // instantiating a Transliterator from the rules and running the test against it, we instantiate
4317 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4318 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4319 // to (i.e., does the same thing as) the original rule set
4320 for (int32_t i
= 0; i
< BEGIN_END_TEST_CASES_length
; i
+= 3) {
4321 UParseError parseError
;
4322 UErrorCode status
= U_ZERO_ERROR
;
4323 Transliterator
* t
= Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES
[i
], -1, US_INV
),
4324 UTRANS_FORWARD
, parseError
, status
);
4325 if (U_FAILURE(status
)) {
4326 reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError
, status
);
4328 UnicodeString rules
;
4329 t
->toRules(rules
, TRUE
);
4330 Transliterator
* t2
= Transliterator::createFromRules((UnicodeString
)"Test case #" + (i
/ 3), rules
,
4331 UTRANS_FORWARD
, parseError
, status
);
4332 if (U_FAILURE(status
)) {
4333 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4334 parseError
, status
);
4338 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 1], -1, US_INV
),
4339 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 2], -1, US_INV
));
4346 // do the same thing for the reversible test case
4347 UParseError parseError
;
4348 UErrorCode status
= U_ZERO_ERROR
;
4349 Transliterator
* reversed
= Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES
[17]),
4350 UTRANS_REVERSE
, parseError
, status
);
4351 if (U_FAILURE(status
)) {
4352 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError
, status
);
4354 UnicodeString rules
;
4355 reversed
->toRules(rules
, FALSE
);
4356 Transliterator
* reversed2
= Transliterator::createFromRules("Reversed", rules
, UTRANS_FORWARD
,
4357 parseError
, status
);
4358 if (U_FAILURE(status
)) {
4359 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4360 parseError
, status
);
4364 UnicodeString("xy XY XYZ yz YZ"),
4365 UnicodeString("xy abc xaba yz aba"));
4372 void TransliteratorTest::TestRegisterAlias() {
4373 UnicodeString
longID("Lower;[aeiou]Upper");
4374 UnicodeString
shortID("Any-CapVowels");
4375 UnicodeString
reallyShortID("CapVowels");
4377 Transliterator::registerAlias(shortID
, longID
);
4379 UErrorCode err
= U_ZERO_ERROR
;
4380 Transliterator
* t1
= Transliterator::createInstance(longID
, UTRANS_FORWARD
, err
);
4381 if (U_FAILURE(err
)) {
4382 errln("Failed to instantiate transliterator with long ID");
4383 Transliterator::unregister(shortID
);
4386 Transliterator
* t2
= Transliterator::createInstance(reallyShortID
, UTRANS_FORWARD
, err
);
4387 if (U_FAILURE(err
)) {
4388 errln("Failed to instantiate transliterator with short ID");
4390 Transliterator::unregister(shortID
);
4394 if (t1
->getID() != longID
)
4395 errln("Transliterator instantiated with long ID doesn't have long ID");
4396 if (t2
->getID() != reallyShortID
)
4397 errln("Transliterator instantiated with short ID doesn't have short ID");
4399 UnicodeString rules1
;
4400 UnicodeString rules2
;
4402 t1
->toRules(rules1
, TRUE
);
4403 t2
->toRules(rules2
, TRUE
);
4404 if (rules1
!= rules2
)
4405 errln("Alias transliterators aren't the same");
4409 Transliterator::unregister(shortID
);
4411 t1
= Transliterator::createInstance(shortID
, UTRANS_FORWARD
, err
);
4412 if (U_SUCCESS(err
)) {
4413 errln("Instantiation with short ID succeeded after short ID was unregistered");
4417 // try the same thing again, but this time with something other than
4418 // an instance of CompoundTransliterator
4419 UnicodeString
realID("Latin-Greek");
4420 UnicodeString
fakeID("Latin-dlgkjdflkjdl");
4421 Transliterator::registerAlias(fakeID
, realID
);
4424 t1
= Transliterator::createInstance(realID
, UTRANS_FORWARD
, err
);
4425 if (U_FAILURE(err
)) {
4426 errln("Failed to instantiate transliterator with real ID");
4427 Transliterator::unregister(realID
);
4430 t2
= Transliterator::createInstance(fakeID
, UTRANS_FORWARD
, err
);
4431 if (U_FAILURE(err
)) {
4432 errln("Failed to instantiate transliterator with fake ID");
4434 Transliterator::unregister(realID
);
4438 t1
->toRules(rules1
, TRUE
);
4439 t2
->toRules(rules2
, TRUE
);
4440 if (rules1
!= rules2
)
4441 errln("Alias transliterators aren't the same");
4445 Transliterator::unregister(fakeID
);
4448 void TransliteratorTest::TestRuleStripping() {
4451 \uE001>\u0C01; # SIGN
4453 static const UChar rule
[] = {
4454 0x0023,0x0020,0x000D,0x000A,
4455 0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4457 static const UChar expectedRule
[] = {
4458 0xE001,0x003E,0x0C01,0x003B,0
4460 UChar result
[sizeof(rule
)/sizeof(rule
[0])];
4461 UErrorCode status
= U_ZERO_ERROR
;
4462 int32_t len
= utrans_stripRules(rule
, (int32_t)(sizeof(rule
)/sizeof(rule
[0])), result
, &status
);
4463 if (len
!= u_strlen(expectedRule
)) {
4464 errln("utrans_stripRules return len = %d", len
);
4466 if (u_strncmp(expectedRule
, result
, len
) != 0) {
4467 errln("utrans_stripRules did not return expected string");
4472 * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4474 void TransliteratorTest::TestHalfwidthFullwidth(void) {
4475 UParseError parseError
;
4476 UErrorCode status
= U_ZERO_ERROR
;
4477 Transliterator
* hf
= Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD
, parseError
, status
);
4478 Transliterator
* fh
= Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD
, parseError
, status
);
4479 if (hf
== 0 || fh
== 0) {
4480 errln("FAIL: createInstance failed");
4486 // Array of 2n items
4488 // "hf"|"fh"|"both",
4491 const char* DATA
[] = {
4493 "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4494 "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4496 int32_t DATA_length
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0]));
4498 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
4499 UnicodeString h
= CharsToUnicodeString(DATA
[i
+1]);
4500 UnicodeString f
= CharsToUnicodeString(DATA
[i
+2]);
4502 case 0x68: //'h': // Halfwidth-Fullwidth only
4505 case 0x66: //'f': // Fullwidth-Halfwidth only
4508 case 0x62: //'b': // both directions
4520 * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4521 * TODO: confirm that the expected results are correct.
4522 * For now, test just confirms that C++ and Java give identical results.
4524 void TransliteratorTest::TestThai(void) {
4525 UParseError parseError
;
4526 UErrorCode status
= U_ZERO_ERROR
;
4527 Transliterator
* tr
= Transliterator::createInstance("Any-Latin", UTRANS_FORWARD
, parseError
, status
);
4529 errln("FAIL: createInstance failed");
4532 if (U_FAILURE(status
)) {
4533 errln("FAIL: createInstance failed with %s", u_errorName(status
));
4536 const char *thaiText
=
4537 "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4538 "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4539 "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4540 "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4541 "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4542 "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4543 "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4544 "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4545 "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4546 "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4547 "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4548 "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4549 "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4550 "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4551 "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4552 "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4553 "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4554 "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4555 "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4556 "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4557 "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4558 "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4559 "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4560 "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4561 " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4562 "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4563 "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4564 " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4565 "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4566 "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4568 const char *latinText
=
4569 "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4570 "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4571 "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4572 "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4573 "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4574 " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4575 "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4576 "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4577 "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4578 "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4579 "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4580 "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4581 " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4582 "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4583 " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4584 "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4585 "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4586 "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4589 UnicodeString
xlitText(thaiText
);
4590 xlitText
= xlitText
.unescape();
4591 tr
->transliterate(xlitText
);
4593 UnicodeString
expectedText(latinText
);
4594 expectedText
= expectedText
.unescape();
4595 expect(*tr
, xlitText
, expectedText
);
4601 //======================================================================
4603 //======================================================================
4604 void TransliteratorTest::expectT(const UnicodeString
& id
,
4605 const UnicodeString
& source
,
4606 const UnicodeString
& expectedResult
) {
4607 UErrorCode ec
= U_ZERO_ERROR
;
4609 Transliterator
*t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, pe
, ec
);
4610 if (U_FAILURE(ec
)) {
4611 errln((UnicodeString
)"FAIL: Could not create " + id
);
4615 expect(*t
, source
, expectedResult
);
4619 void TransliteratorTest::reportParseError(const UnicodeString
& message
,
4620 const UParseError
& parseError
,
4621 const UErrorCode
& status
) {
4623 /*", parse error " + parseError.code +*/
4624 ", line " + parseError
.line
+
4625 ", offset " + parseError
.offset
+
4626 ", pre-context " + prettify(parseError
.preContext
, TRUE
) +
4627 ", post-context " + prettify(parseError
.postContext
,TRUE
) +
4628 ", Error: " + u_errorName(status
));
4631 void TransliteratorTest::expect(const UnicodeString
& rules
,
4632 const UnicodeString
& source
,
4633 const UnicodeString
& expectedResult
,
4634 UTransPosition
*pos
) {
4635 expect("<ID>", rules
, source
, expectedResult
, pos
);
4638 void TransliteratorTest::expect(const UnicodeString
& id
,
4639 const UnicodeString
& rules
,
4640 const UnicodeString
& source
,
4641 const UnicodeString
& expectedResult
,
4642 UTransPosition
*pos
) {
4643 UErrorCode status
= U_ZERO_ERROR
;
4644 UParseError parseError
;
4645 Transliterator
* t
= Transliterator::createFromRules(id
, rules
, UTRANS_FORWARD
, parseError
, status
);
4646 if (U_FAILURE(status
)) {
4647 reportParseError(UnicodeString("Couldn't create transliterator from ") + rules
, parseError
, status
);
4649 expect(*t
, source
, expectedResult
, pos
);
4654 void TransliteratorTest::expect(const Transliterator
& t
,
4655 const UnicodeString
& source
,
4656 const UnicodeString
& expectedResult
,
4657 const Transliterator
& reverseTransliterator
) {
4658 expect(t
, source
, expectedResult
);
4659 expect(reverseTransliterator
, expectedResult
, source
);
4662 void TransliteratorTest::expect(const Transliterator
& t
,
4663 const UnicodeString
& source
,
4664 const UnicodeString
& expectedResult
,
4665 UTransPosition
*pos
) {
4667 UnicodeString
result(source
);
4668 t
.transliterate(result
);
4669 expectAux(t
.getID() + ":String", source
, result
, expectedResult
);
4671 UTransPosition index
={0, 0, 0, 0};
4676 UnicodeString
rsource(source
);
4678 t
.transliterate(rsource
);
4680 // Do it all at once -- below we do it incrementally
4681 t
.finishTransliteration(rsource
, *pos
);
4683 expectAux(t
.getID() + ":Replaceable", source
, rsource
, expectedResult
);
4685 // Test keyboard (incremental) transliteration -- this result
4686 // must be the same after we finalize (see below).
4691 formatInput(log
, rsource
, index
);
4693 UErrorCode status
= U_ZERO_ERROR
;
4694 t
.transliterate(rsource
, index
, status
);
4695 formatInput(log
, rsource
, index
);
4697 for (int32_t i
=0; i
<source
.length(); ++i
) {
4701 log
.append(source
.charAt(i
)).append(" -> ");
4702 UErrorCode status
= U_ZERO_ERROR
;
4703 t
.transliterate(rsource
, index
, source
.charAt(i
), status
);
4704 formatInput(log
, rsource
, index
);
4708 // As a final step in keyboard transliteration, we must call
4709 // transliterate to finish off any pending partial matches that
4710 // were waiting for more input.
4711 t
.finishTransliteration(rsource
, index
);
4712 log
.append(" => ").append(rsource
);
4714 expectAux(t
.getID() + ":Keyboard", log
,
4715 rsource
== expectedResult
,
4721 * @param appendTo result is appended to this param.
4722 * @param input the string being transliterated
4723 * @param pos the index struct
4725 UnicodeString
& TransliteratorTest::formatInput(UnicodeString
&appendTo
,
4726 const UnicodeString
& input
,
4727 const UTransPosition
& pos
) {
4728 // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4729 // the {} indicate the context start and limit, and the ||
4730 // indicate the start and limit.
4731 if (0 <= pos
.contextStart
&&
4732 pos
.contextStart
<= pos
.start
&&
4733 pos
.start
<= pos
.limit
&&
4734 pos
.limit
<= pos
.contextLimit
&&
4735 pos
.contextLimit
<= input
.length()) {
4737 UnicodeString a
, b
, c
, d
, e
;
4738 input
.extractBetween(0, pos
.contextStart
, a
);
4739 input
.extractBetween(pos
.contextStart
, pos
.start
, b
);
4740 input
.extractBetween(pos
.start
, pos
.limit
, c
);
4741 input
.extractBetween(pos
.limit
, pos
.contextLimit
, d
);
4742 input
.extractBetween(pos
.contextLimit
, input
.length(), e
);
4743 appendTo
.append(a
).append((UChar
)123/*{*/).append(b
).
4744 append((UChar
)PIPE
).append(c
).append((UChar
)PIPE
).append(d
).
4745 append((UChar
)125/*}*/).append(e
);
4747 appendTo
.append((UnicodeString
)"INVALID UTransPosition {cs=" +
4748 pos
.contextStart
+ ", s=" + pos
.start
+ ", l=" +
4749 pos
.limit
+ ", cl=" + pos
.contextLimit
+ "} on " +
4755 void TransliteratorTest::expectAux(const UnicodeString
& tag
,
4756 const UnicodeString
& source
,
4757 const UnicodeString
& result
,
4758 const UnicodeString
& expectedResult
) {
4759 expectAux(tag
, source
+ " -> " + result
,
4760 result
== expectedResult
,
4764 void TransliteratorTest::expectAux(const UnicodeString
& tag
,
4765 const UnicodeString
& summary
, UBool pass
,
4766 const UnicodeString
& expectedResult
) {
4768 logln(UnicodeString("(")+tag
+") " + prettify(summary
));
4770 errln(UnicodeString("FAIL: (")+tag
+") "
4772 + ", expected " + prettify(expectedResult
));
4776 #endif /* #if !UCONFIG_NO_TRANSLITERATION */