2 **********************************************************************
3 * Copyright (C) 1999-2006, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 11/10/99 aliu Creation.
8 **********************************************************************
11 #include "unicode/utypes.h"
13 #if !UCONFIG_NO_TRANSLITERATION
16 #include "unicode/locid.h"
17 #include "unicode/dtfmtsym.h"
18 #include "unicode/normlzr.h"
19 #include "unicode/translit.h"
20 #include "unicode/uchar.h"
21 #include "unicode/unifilt.h"
22 #include "unicode/uniset.h"
23 #include "unicode/ustring.h"
24 #include "unicode/usetiter.h"
25 #include "unicode/uscript.h"
43 /***********************************************************************
45 HOW TO USE THIS TEST FILE
47 How I developed on two platforms
48 without losing (too much of) my mind
51 1. Add new tests by copying/pasting/changing existing tests. On Java,
52 any public void method named Test...() taking no parameters becomes
53 a test. On C++, you need to modify the header and add a line to
54 the runIndexedTest() dispatch method.
56 2. Make liberal use of the expect() method; it is your friend.
58 3. The tests in this file exactly match those in a sister file on the
59 other side. The two files are:
61 icu4j: src/com/ibm/test/translit/TransliteratorTest.java
62 icu4c: source/test/intltest/transtst.cpp
64 ==> THIS IS THE IMPORTANT PART <==
66 When you add a test in this file, add it in TransliteratorTest.java
67 too. Give it the same name and put it in the same relative place.
68 This makes maintenance a lot simpler for any poor soul who ends up
69 trying to synchronize the tests between icu4j and icu4c.
71 4. If you MUST enter a test that is NOT paralleled in the sister file,
72 then add it in the special non-mirrored section. These are
81 Make sure you document the reason the test is here and not there.
86 ***********************************************************************/
88 // Define character constants thusly to be EBCDIC-friendly
90 LEFT_BRACE
=((UChar
)0x007B), /*{*/
91 PIPE
=((UChar
)0x007C), /*|*/
92 ZERO
=((UChar
)0x0030), /*0*/
93 UPPER_A
=((UChar
)0x0041) /*A*/
96 TransliteratorTest::TransliteratorTest()
97 : DESERET_DEE((UChar32
)0x10414),
98 DESERET_dee((UChar32
)0x1043C)
102 TransliteratorTest::~TransliteratorTest() {}
105 TransliteratorTest::runIndexedTest(int32_t index
, UBool exec
,
106 const char* &name
, char* /*par*/) {
108 TESTCASE(0,TestInstantiation
);
109 TESTCASE(1,TestSimpleRules
);
110 TESTCASE(2,TestRuleBasedInverse
);
111 TESTCASE(3,TestKeyboard
);
112 TESTCASE(4,TestKeyboard2
);
113 TESTCASE(5,TestKeyboard3
);
114 TESTCASE(6,TestArabic
);
115 TESTCASE(7,TestCompoundKana
);
116 TESTCASE(8,TestCompoundHex
);
117 TESTCASE(9,TestFiltering
);
118 TESTCASE(10,TestInlineSet
);
119 TESTCASE(11,TestPatternQuoting
);
120 TESTCASE(12,TestJ277
);
121 TESTCASE(13,TestJ243
);
122 TESTCASE(14,TestJ329
);
123 TESTCASE(15,TestSegments
);
124 TESTCASE(16,TestCursorOffset
);
125 TESTCASE(17,TestArbitraryVariableValues
);
126 TESTCASE(18,TestPositionHandling
);
127 TESTCASE(19,TestHiraganaKatakana
);
128 TESTCASE(20,TestCopyJ476
);
129 TESTCASE(21,TestAnchors
);
130 TESTCASE(22,TestInterIndic
);
131 TESTCASE(23,TestFilterIDs
);
132 TESTCASE(24,TestCaseMap
);
133 TESTCASE(25,TestNameMap
);
134 TESTCASE(26,TestLiberalizedID
);
135 TESTCASE(27,TestCreateInstance
);
136 TESTCASE(28,TestNormalizationTransliterator
);
137 TESTCASE(29,TestCompoundRBT
);
138 TESTCASE(30,TestCompoundFilter
);
139 TESTCASE(31,TestRemove
);
140 TESTCASE(32,TestToRules
);
141 TESTCASE(33,TestContext
);
142 TESTCASE(34,TestSupplemental
);
143 TESTCASE(35,TestQuantifier
);
144 TESTCASE(36,TestSTV
);
145 TESTCASE(37,TestCompoundInverse
);
146 TESTCASE(38,TestNFDChainRBT
);
147 TESTCASE(39,TestNullInverse
);
148 TESTCASE(40,TestAliasInverseID
);
149 TESTCASE(41,TestCompoundInverseID
);
150 TESTCASE(42,TestUndefinedVariable
);
151 TESTCASE(43,TestEmptyContext
);
152 TESTCASE(44,TestCompoundFilterID
);
153 TESTCASE(45,TestPropertySet
);
154 TESTCASE(46,TestNewEngine
);
155 TESTCASE(47,TestQuantifiedSegment
);
156 TESTCASE(48,TestDevanagariLatinRT
);
157 TESTCASE(49,TestTeluguLatinRT
);
158 TESTCASE(50,TestCompoundLatinRT
);
159 TESTCASE(51,TestSanskritLatinRT
);
160 TESTCASE(52,TestLocaleInstantiation
);
161 TESTCASE(53,TestTitleAccents
);
162 TESTCASE(54,TestLocaleResource
);
163 TESTCASE(55,TestParseError
);
164 TESTCASE(56,TestOutputSet
);
165 TESTCASE(57,TestVariableRange
);
166 TESTCASE(58,TestInvalidPostContext
);
167 TESTCASE(59,TestIDForms
);
168 TESTCASE(60,TestToRulesMark
);
169 TESTCASE(61,TestEscape
);
170 TESTCASE(62,TestAnchorMasking
);
171 TESTCASE(63,TestDisplayName
);
172 TESTCASE(64,TestSpecialCases
);
173 TESTCASE(65,TestIncrementalProgress
);
174 TESTCASE(66,TestSurrogateCasing
);
175 TESTCASE(67,TestFunction
);
176 TESTCASE(68,TestInvalidBackRef
);
177 TESTCASE(69,TestMulticharStringSet
);
178 TESTCASE(70,TestUserFunction
);
179 TESTCASE(71,TestAnyX
);
180 TESTCASE(72,TestSourceTargetSet
);
181 TESTCASE(73,TestGurmukhiDevanagari
);
182 TESTCASE(74,TestRuleWhitespace
);
183 TESTCASE(75,TestAllCodepoints
);
184 TESTCASE(76,TestBoilerplate
);
185 TESTCASE(77,TestAlternateSyntax
);
186 TESTCASE(78,TestBeginEnd
);
187 TESTCASE(79,TestBeginEndToRules
);
188 TESTCASE(80,TestRegisterAlias
);
189 default: name
= ""; break;
193 static const UVersionInfo ICU_37
= {3,7,0,0};
195 * Make sure every system transliterator can be instantiated.
197 * ALSO test that the result of toRules() for each rule is a valid
198 * rule. Do this here so we don't have to have another test that
199 * instantiates everything as well.
201 void TransliteratorTest::TestInstantiation() {
202 UErrorCode ec
= U_ZERO_ERROR
;
203 StringEnumeration
* avail
= Transliterator::getAvailableIDs(ec
);
204 assertSuccess("getAvailableIDs()", ec
);
205 assertTrue("getAvailableIDs()!=NULL", avail
!=NULL
);
206 int32_t n
= Transliterator::countAvailableIDs();
207 assertTrue("getAvailableIDs().count()==countAvailableIDs()",
208 avail
->count(ec
) == n
);
209 assertSuccess("count()", ec
);
211 for (int32_t i
=0; i
<n
; ++i
) {
212 const UnicodeString
& id
= *avail
->snext(ec
);
213 if (!assertSuccess("snext()", ec
) ||
214 !assertTrue("snext()!=NULL", (&id
)!=NULL
, TRUE
)) {
217 UnicodeString id2
= Transliterator::getAvailableID(i
);
218 if (id
.length() < 1) {
219 errln(UnicodeString("FAIL: getAvailableID(") +
220 i
+ ") returned empty string");
224 errln(UnicodeString("FAIL: getAvailableID(") +
225 i
+ ") != getAvailableIDs().snext()");
228 if(id2
.indexOf("Thai")>-1 && !isICUVersionAtLeast(ICU_37
)){
229 /* The Thai-Latin transliterator doesn't exist in ICU4C yet */
232 UParseError parseError
;
233 UErrorCode status
= U_ZERO_ERROR
;
234 Transliterator
* t
= Transliterator::createInstance(id
,
235 UTRANS_FORWARD
, parseError
,status
);
237 Transliterator::getDisplayName(id
, name
);
239 errln(UnicodeString("FAIL: Couldn't create ") + id
+
240 /*", parse error " + parseError.code +*/
241 ", line " + parseError
.line
+
242 ", offset " + parseError
.offset
+
243 ", pre-context " + prettify(parseError
.preContext
, TRUE
) +
244 ", post-context " +prettify(parseError
.postContext
,TRUE
) +
245 ", Error: " + u_errorName(status
));
246 // When createInstance fails, it deletes the failing
247 // entry from the available ID list. We detect this
248 // here by looking for a change in countAvailableIDs.
249 int32_t nn
= Transliterator::countAvailableIDs();
252 --i
; // Compensate for deleted entry
255 logln(UnicodeString("OK: ") + name
+ " (" + id
+ ")");
259 t
->toRules(rules
, TRUE
);
260 Transliterator
*u
= Transliterator::createFromRules("x",
261 rules
, UTRANS_FORWARD
, parseError
,status
);
263 errln(UnicodeString("FAIL: ") + id
+
264 ".createFromRules() => bad rules" +
265 /*", parse error " + parseError.code +*/
266 ", line " + parseError
.line
+
267 ", offset " + parseError
.offset
+
268 ", context " + prettify(parseError
.preContext
, TRUE
) +
269 ", rules: " + prettify(rules
, TRUE
));
276 assertTrue("snext()==NULL", avail
->snext(ec
)==NULL
);
277 assertSuccess("snext()", ec
);
280 // Now test the failure path
281 UParseError parseError
;
282 UErrorCode status
= U_ZERO_ERROR
;
283 UnicodeString
id("<Not a valid Transliterator ID>");
284 Transliterator
* t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, parseError
, status
);
286 errln("FAIL: " + id
+ " returned a transliterator");
289 logln("OK: Bogus ID handled properly");
293 void TransliteratorTest::TestSimpleRules(void) {
294 /* Example: rules 1. ab>x|y
297 * []|eabcd start - no match, copy e to tranlated buffer
298 * [e]|abcd match rule 1 - copy output & adjust cursor
299 * [ex|y]cd match rule 2 - copy output & adjust cursor
300 * [exz]|d no match, copy d to transliterated buffer
303 expect(UnicodeString("ab>x|y;", "") +
307 /* Another set of rules:
319 expect(UnicodeString("ab>x|yzacw;") +
327 UErrorCode status
= U_ZERO_ERROR
;
328 RuleBasedTransliterator
t(
330 UnicodeString("$dummy=").append((UChar
)0xE100) +
332 "$vowel=[aeiouAEIOU];"
334 "$vowel } $lu > '!';"
340 if (U_FAILURE(status
)) {
341 errln("FAIL: RBT constructor failed");
344 expect(t
, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
348 * Test inline set syntax and set variable syntax.
350 void TransliteratorTest::TestInlineSet(void) {
351 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
352 expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
354 expect(UnicodeString(
357 "$alphanumeric = [$digit $alpha];" // ***
358 "$special = [^$alphanumeric];" // ***
359 "$alphanumeric > '-';"
360 "$special > '*';", ""),
362 "thx-1138", "---*----");
366 * Create some inverses and confirm that they work. We have to be
367 * careful how we do this, since the inverses will not be true
368 * inverses -- we can't throw any random string at the composition
369 * of the transliterators and expect the identity function. F x
370 * F' != I. However, if we are careful about the input, we will
371 * get the expected results.
373 void TransliteratorTest::TestRuleBasedInverse(void) {
374 UnicodeString RULES
=
375 UnicodeString("abc>zyx;") +
393 const char* DATA
[] = {
394 // Careful here -- random strings will not work. If we keep
395 // the left side to the domain and the right side to the range
396 // we will be okay though (left, abc; right xyz).
398 "abcacab", "zyxxxyy",
402 int32_t DATA_length
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0]));
404 UErrorCode status
= U_ZERO_ERROR
;
405 RuleBasedTransliterator
fwd("<ID>", RULES
, status
);
406 RuleBasedTransliterator
rev("<ID>", RULES
,
407 UTRANS_REVERSE
, status
);
408 if (U_FAILURE(status
)) {
409 errln("FAIL: RBT constructor failed");
412 for (int32_t i
=0; i
<DATA_length
; i
+=2) {
413 expect(fwd
, DATA
[i
], DATA
[i
+1]);
414 expect(rev
, DATA
[i
+1], DATA
[i
]);
419 * Basic test of keyboard.
421 void TransliteratorTest::TestKeyboard(void) {
422 UErrorCode status
= U_ZERO_ERROR
;
423 RuleBasedTransliterator
t("<ID>",
424 UnicodeString("psch>Y;")
429 if (U_FAILURE(status
)) {
430 errln("FAIL: RBT constructor failed");
433 const char* DATA
[] = {
441 0, "AycAY", // null means finishKeyboardTransliteration
444 keyboardAux(t
, DATA
, (int32_t)(sizeof(DATA
)/sizeof(DATA
[0])));
448 * Basic test of keyboard with cursor.
450 void TransliteratorTest::TestKeyboard2(void) {
451 UErrorCode status
= U_ZERO_ERROR
;
452 RuleBasedTransliterator
t("<ID>",
453 UnicodeString("ych>Y;")
458 if (U_FAILURE(status
)) {
459 errln("FAIL: RBT constructor failed");
462 const char* DATA
[] = {
466 "s", "Aps", // modified for rollback - "Ay",
467 "c", "Apsc", // modified for rollback - "Ayc",
470 "s", "AycAps", // modified for rollback - "AycAy",
471 "c", "AycApsc", // modified for rollback - "AycAyc",
473 0, "AycAY", // null means finishKeyboardTransliteration
476 keyboardAux(t
, DATA
, (int32_t)(sizeof(DATA
)/sizeof(DATA
[0])));
480 * Test keyboard transliteration with back-replacement.
482 void TransliteratorTest::TestKeyboard3(void) {
483 // We want th>z but t>y. Furthermore, during keyboard
484 // transliteration we want t>y then yh>z if t, then h are
486 UnicodeString
RULES("t>|y;"
489 const char* DATA
[] = {
490 // Column 1: characters to add to buffer (as if typed)
491 // Column 2: expected appearance of buffer after
492 // keyboard xliteration.
495 "t", "abt", // modified for rollback - "aby",
497 "t", "abyct", // modified for rollback - "abycy",
499 0, "abycz", // null means finishKeyboardTransliteration
502 UErrorCode status
= U_ZERO_ERROR
;
503 RuleBasedTransliterator
t("<ID>", RULES
, status
);
504 if (U_FAILURE(status
)) {
505 errln("FAIL: RBT constructor failed");
508 keyboardAux(t
, DATA
, (int32_t)(sizeof(DATA
)/sizeof(DATA
[0])));
511 void TransliteratorTest::keyboardAux(const Transliterator
& t
,
512 const char* DATA
[], int32_t DATA_length
) {
513 UErrorCode status
= U_ZERO_ERROR
;
514 UTransPosition index
={0, 0, 0, 0};
516 for (int32_t i
=0; i
<DATA_length
; i
+=2) {
522 t
.transliterate(s
, index
, DATA
[i
], status
);
525 t
.finishTransliteration(s
, index
);
527 // Show the start index '{' and the cursor '|'
528 UnicodeString a
, b
, c
;
529 s
.extractBetween(0, index
.contextStart
, a
);
530 s
.extractBetween(index
.contextStart
, index
.start
, b
);
531 s
.extractBetween(index
.start
, s
.length(), c
);
533 append((UChar
)LEFT_BRACE
).
537 if (s
== DATA
[i
+1] && U_SUCCESS(status
)) {
540 errln(UnicodeString("FAIL: ") + log
+ ", expected " + DATA
[i
+1]);
545 void TransliteratorTest::TestArabic(void) {
546 // Test disabled for 2.0 until new Arabic transliterator can be written.
548 // const char* DATA[] = {
549 // "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
550 // "\u0627\u0644\u0644\u063a\u0629\u0020"+
551 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
552 // "\u0628\u0628\u0646\u0638\u0645\u0020"+
553 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
554 // "\u062c\u0645\u064a\u0644\u0629",
558 // UChar ar_raw[] = {
559 // 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
560 // 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
561 // 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
562 // 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
563 // 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
564 // 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
566 // UnicodeString ar(ar_raw);
567 // UErrorCode status=U_ZERO_ERROR;
568 // UParseError parseError;
569 // Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
571 // errln("FAIL: createInstance failed");
574 // expect(*t, "Arabic", ar);
579 * Compose the Kana transliterator forward and reverse and try
580 * some strings that should come out unchanged.
582 void TransliteratorTest::TestCompoundKana(void) {
583 UParseError parseError
;
584 UErrorCode status
= U_ZERO_ERROR
;
585 Transliterator
* t
= Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD
, parseError
, status
);
587 errln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed");
589 expect(*t
, "aaaaa", "aaaaa");
595 * Compose the hex transliterators forward and reverse.
597 void TransliteratorTest::TestCompoundHex(void) {
598 UParseError parseError
;
599 UErrorCode status
= U_ZERO_ERROR
;
600 Transliterator
* a
= Transliterator::createInstance("Any-Hex", UTRANS_FORWARD
, parseError
, status
);
601 Transliterator
* b
= Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, parseError
, status
);
602 Transliterator
* transab
[] = { a
, b
};
603 Transliterator
* transba
[] = { b
, a
};
604 if (a
== 0 || b
== 0) {
605 errln("FAIL: construction failed");
610 // Do some basic tests of a
611 expect(*a
, "01", UnicodeString("\\u0030\\u0031", ""));
612 // Do some basic tests of b
613 expect(*b
, UnicodeString("\\u0030\\u0031", ""), "01");
615 Transliterator
* ab
= new CompoundTransliterator(transab
, 2);
616 UnicodeString
s("abcde", "");
619 UnicodeString
str(s
);
620 a
->transliterate(str
);
621 Transliterator
* ba
= new CompoundTransliterator(transba
, 2);
622 expect(*ba
, str
, str
);
630 int gTestFilterClassID
= 0;
632 * Used by TestFiltering().
634 class TestFilter
: public UnicodeFilter
{
635 virtual UnicodeFunctor
* clone() const {
636 return new TestFilter(*this);
638 virtual UBool
contains(UChar32 c
) const {
639 return c
!= (UChar
)0x0063 /*c*/;
642 virtual UnicodeString
& toPattern(UnicodeString
& result
,
643 UBool
/*escapeUnprintable*/) const {
646 virtual UBool
matchesIndexValue(uint8_t /*v*/) const {
649 virtual void addMatchSetTo(UnicodeSet
& /*toUnionTo*/) const {}
651 UClassID
getDynamicClassID() const { return (UClassID
)&gTestFilterClassID
; }
655 * Do some basic tests of filtering.
657 void TransliteratorTest::TestFiltering(void) {
658 UParseError parseError
;
659 UErrorCode status
= U_ZERO_ERROR
;
660 Transliterator
* hex
= Transliterator::createInstance("Any-Hex", UTRANS_FORWARD
, parseError
, status
);
662 errln("FAIL: createInstance(Any-Hex) failed");
665 hex
->adoptFilter(new TestFilter());
666 UnicodeString
s("abcde");
667 hex
->transliterate(s
);
668 UnicodeString
exp("\\u0061\\u0062c\\u0064\\u0065", "");
670 logln(UnicodeString("Ok: \"") + exp
+ "\"");
672 logln(UnicodeString("FAIL: \"") + s
+ "\", wanted \"" + exp
+ "\"");
675 // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
676 UnicodeFilter
*f
= hex
->orphanFilter();
678 errln("FAIL: orphanFilter() should get a UnicodeFilter");
688 void TransliteratorTest::TestAnchors(void) {
689 expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""),
692 expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
695 expect(UnicodeString("^ab > 01 ;"
703 expect(UnicodeString("$s = [z$] ;"
710 "abzababbabxzabxabx",
715 * Test pattern quoting and escape mechanisms.
717 void TransliteratorTest::TestPatternQuoting(void) {
719 // Each item is <rules>, <input>, <expected output>
720 const UnicodeString DATA
[] = {
721 UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
722 UnicodeString(UChar(0x4E01)),
726 for (int32_t i
=0; i
<3; i
+=3) {
727 logln(UnicodeString("Pattern: ") + prettify(DATA
[i
]));
728 UErrorCode status
= U_ZERO_ERROR
;
729 RuleBasedTransliterator
t("<ID>", DATA
[i
], status
);
730 if (U_FAILURE(status
)) {
731 errln("RBT constructor failed");
733 expect(t
, DATA
[i
+1], DATA
[i
+2]);
739 * Regression test for bugs found in Greek transliteration.
741 void TransliteratorTest::TestJ277(void) {
742 UErrorCode status
= U_ZERO_ERROR
;
743 UParseError parseError
;
744 Transliterator
*gl
= Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD
, parseError
, status
);
746 errln("FAIL: createInstance(Greek-Latin) returned NULL");
751 UChar upsilon
= 0x3C5;
753 // UChar PHI = 0x3A6;
755 // UChar omega = 0x3C9;
756 // UChar omicron = 0x3BF;
757 // UChar epsilon = 0x3B5;
759 // sigma upsilon nu -> syn
761 syn
.append(sigma
).append(upsilon
).append(nu
);
762 expect(*gl
, syn
, "syn");
764 // sigma alpha upsilon nu -> saun
766 sayn
.append(sigma
).append(alpha
).append(upsilon
).append(nu
);
767 expect(*gl
, sayn
, "saun");
769 // Again, using a smaller rule set
774 "$ypsilon = \\u03C5;"
775 "$vowel = [aeiouAEIOU$alpha$ypsilon];"
778 "u <> $vowel { $ypsilon;"
782 RuleBasedTransliterator
mini("mini", rules
, UTRANS_REVERSE
, status
);
783 if (U_FAILURE(status
)) { errln("FAIL: Transliterator constructor failed"); return; }
784 expect(mini
, syn
, "syn");
785 expect(mini
, sayn
, "saun");
787 #if !UCONFIG_NO_FORMATTING
788 // Transliterate the Greek locale data
790 DateFormatSymbols
syms(el
, status
);
791 if (U_FAILURE(status
)) { errln("FAIL: Transliterator constructor failed"); return; }
793 const UnicodeString
* data
= syms
.getMonths(count
);
794 for (i
=0; i
<count
; ++i
) {
795 if (data
[i
].length() == 0) {
798 UnicodeString
out(data
[i
]);
799 gl
->transliterate(out
);
801 if (data
[i
].length() >= 2 && out
.length() >= 2 &&
802 u_isupper(data
[i
].charAt(0)) && u_islower(data
[i
].charAt(1))) {
803 if (!(u_isupper(out
.charAt(0)) && u_islower(out
.charAt(1)))) {
808 logln(prettify(data
[i
] + " -> " + out
));
810 errln(UnicodeString("FAIL: ") + prettify(data
[i
] + " -> " + out
));
819 * Prefix, suffix support in hex transliterators
821 void TransliteratorTest::TestJ243(void) {
822 UErrorCode ec
= U_ZERO_ERROR
;
824 // Test default Hex-Any, which should handle
825 // \u, \U, u+, and U+
826 Transliterator
*hex
=
827 Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, ec
);
828 if (assertSuccess("getInstance", ec
)) {
829 expect(*hex
, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
833 // // Try a custom Hex-Unicode
834 // // \uXXXX and &#xXXXX;
835 // ec = U_ZERO_ERROR;
836 // HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
837 // expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x0123", ""),
838 // "abcd5fx0123");
839 // // Try custom Any-Hex (default is tested elsewhere)
840 // ec = U_ZERO_ERROR;
841 // UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
842 // expect(hex3, "012", "012");
846 * Parsers need better syntax error messages.
848 void TransliteratorTest::TestJ329(void) {
850 struct { UBool containsErrors
; const char* rule
; } DATA
[] = {
851 { FALSE
, "a > b; c > d" },
852 { TRUE
, "a > b; no operator; c > d" },
854 int32_t DATA_length
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0]));
856 for (int32_t i
=0; i
<DATA_length
; ++i
) {
857 UErrorCode status
= U_ZERO_ERROR
;
858 UParseError parseError
;
859 RuleBasedTransliterator
rbt("<ID>",
865 UBool gotError
= U_FAILURE(status
);
866 UnicodeString
desc(DATA
[i
].rule
);
867 desc
.append(gotError
? " -> error" : " -> no error");
869 desc
= desc
+ ", ParseError code=" + u_errorName(status
) +
870 " line=" + parseError
.line
+
871 " offset=" + parseError
.offset
+
872 " context=" + parseError
.preContext
;
874 if (gotError
== DATA
[i
].containsErrors
) {
875 logln(UnicodeString("Ok: ") + desc
);
877 errln(UnicodeString("FAIL: ") + desc
);
883 * Test segments and segment references.
885 void TransliteratorTest::TestSegments(void) {
887 // Each item is <rules>, <input>, <expected output>
888 UnicodeString DATA
[] = {
889 "([a-z]) '.' ([0-9]) > $2 '-' $1",
894 "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
898 int32_t DATA_length
= (int32_t)(sizeof(DATA
)/sizeof(*DATA
));
900 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
901 logln("Pattern: " + prettify(DATA
[i
]));
902 UErrorCode status
= U_ZERO_ERROR
;
903 RuleBasedTransliterator
t("ID", DATA
[i
], status
);
904 if (U_FAILURE(status
)) {
905 errln("FAIL: RBT constructor");
907 expect(t
, DATA
[i
+1], DATA
[i
+2]);
913 * Test cursor positioning outside of the key
915 void TransliteratorTest::TestCursorOffset(void) {
917 // Each item is <rules>, <input>, <expected output>
918 UnicodeString DATA
[] = {
919 "pre {alpha} post > | @ ALPHA ;"
921 "pre {beta} post > BETA @@ | ;"
924 "prealphapost prebetapost",
926 "prbetaxyz preBETApost",
928 int32_t DATA_length
= (int32_t)(sizeof(DATA
)/sizeof(*DATA
));
930 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
931 logln("Pattern: " + prettify(DATA
[i
]));
932 UErrorCode status
= U_ZERO_ERROR
;
933 RuleBasedTransliterator
t("<ID>", DATA
[i
], status
);
934 if (U_FAILURE(status
)) {
935 errln("FAIL: RBT constructor");
937 expect(t
, DATA
[i
+1], DATA
[i
+2]);
943 * Test zero length and > 1 char length variable values. Test
944 * use of variable refs in UnicodeSets.
946 void TransliteratorTest::TestArbitraryVariableValues(void) {
948 // Each item is <rules>, <input>, <expected output>
949 UnicodeString DATA
[] = {
967 int32_t DATA_length
= (int32_t)(sizeof(DATA
)/sizeof(*DATA
));
969 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
970 logln("Pattern: " + prettify(DATA
[i
]));
971 UErrorCode status
= U_ZERO_ERROR
;
972 RuleBasedTransliterator
t("<ID>", DATA
[i
], status
);
973 if (U_FAILURE(status
)) {
974 errln("FAIL: RBT constructor");
976 expect(t
, DATA
[i
+1], DATA
[i
+2]);
982 * Confirm that the contextStart, contextLimit, start, and limit
983 * behave correctly. J474.
985 void TransliteratorTest::TestPositionHandling(void) {
987 // Each item is <rules>, <input>, <expected output>
988 const char* DATA
[] = {
989 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
990 "xtat txtb", // pos 0,9,0,9
993 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
994 "xtat txtb", // pos 2,9,3,8
997 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
998 "xtat txtb", // pos 3,8,3,8
1002 // Array of 4n positions -- these go with the DATA array
1003 // They are: contextStart, contextLimit, start, limit
1010 int32_t n
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0])) / 3;
1011 for (int32_t i
=0; i
<n
; i
++) {
1012 UErrorCode status
= U_ZERO_ERROR
;
1013 Transliterator
*t
= new RuleBasedTransliterator("<ID>",
1015 if (U_FAILURE(status
)) {
1017 errln("FAIL: RBT constructor");
1021 pos
.contextStart
= POS
[4*i
];
1022 pos
.contextLimit
= POS
[4*i
+1];
1023 pos
.start
= POS
[4*i
+2];
1024 pos
.limit
= POS
[4*i
+3];
1025 UnicodeString
rsource(DATA
[3*i
+1]);
1026 t
->transliterate(rsource
, pos
, status
);
1027 if (U_FAILURE(status
)) {
1029 errln("FAIL: transliterate");
1032 t
->finishTransliteration(rsource
, pos
);
1033 expectAux(DATA
[3*i
],
1042 * Test the Hiragana-Katakana transliterator.
1044 void TransliteratorTest::TestHiraganaKatakana(void) {
1045 UParseError parseError
;
1046 UErrorCode status
= U_ZERO_ERROR
;
1047 Transliterator
* hk
= Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD
, parseError
, status
);
1048 Transliterator
* kh
= Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD
, parseError
, status
);
1049 if (hk
== 0 || kh
== 0) {
1050 errln("FAIL: createInstance failed");
1056 // Array of 3n items
1057 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1058 const char* DATA
[] = {
1060 "\\u3042\\u3090\\u3099\\u3092\\u3050",
1061 "\\u30A2\\u30F8\\u30F2\\u30B0",
1064 "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1065 "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1067 int32_t DATA_length
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0]));
1069 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
1070 UnicodeString h
= CharsToUnicodeString(DATA
[i
+1]);
1071 UnicodeString k
= CharsToUnicodeString(DATA
[i
+2]);
1073 case 0x68: //'h': // Hiragana-Katakana
1076 case 0x6B: //'k': // Katakana-Hiragana
1079 case 0x62: //'b': // both
1090 * Test cloning / copy constructor of RBT.
1092 void TransliteratorTest::TestCopyJ476(void) {
1093 // The real test here is what happens when the destructors are
1094 // called. So we let one object get destructed, and check to
1095 // see that its copy still works.
1096 RuleBasedTransliterator
*t2
= 0;
1098 UErrorCode status
= U_ZERO_ERROR
;
1099 RuleBasedTransliterator
t1("t1", "a>A;b>B;", status
);
1100 if (U_FAILURE(status
)) {
1101 errln("FAIL: RBT constructor");
1104 t2
= new RuleBasedTransliterator(t1
);
1105 expect(t1
, "abc", "ABc");
1107 expect(*t2
, "abc", "ABc");
1112 * Test inter-Indic transliterators. These are composed.
1113 * ICU4C Jitterbug 483.
1115 void TransliteratorTest::TestInterIndic(void) {
1116 UnicodeString
ID("Devanagari-Gujarati", "");
1117 UErrorCode status
= U_ZERO_ERROR
;
1118 UParseError parseError
;
1119 Transliterator
* dg
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, parseError
, status
);
1121 errln("FAIL: createInstance(" + ID
+ ") returned NULL");
1124 UnicodeString id
= dg
->getID();
1126 errln("FAIL: createInstance(" + ID
+ ")->getID() => " + id
);
1128 UnicodeString dev
= CharsToUnicodeString("\\u0901\\u090B\\u0925");
1129 UnicodeString guj
= CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1130 expect(*dg
, dev
, guj
);
1135 * Test filter syntax in IDs. (J918)
1137 void TransliteratorTest::TestFilterIDs(void) {
1138 // Array of 3n strings:
1139 // <id>, <inverse id>, <input>, <expected output>
1140 const char* DATA
[] = {
1141 "[aeiou]Any-Hex", // ID
1142 "[aeiou]Hex-Any", // expected inverse ID
1144 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1146 "[aeiou]Any-Hex;[^5]Hex-Any",
1147 "[^5]Any-Hex;[aeiou]Hex-Any",
1156 enum { DATA_length
= sizeof(DATA
) / sizeof(DATA
[0]) };
1158 for (int i
=0; i
<DATA_length
; i
+=4) {
1159 UnicodeString
ID(DATA
[i
], "");
1160 UnicodeString
uID(DATA
[i
+1], "");
1161 UnicodeString
data2(DATA
[i
+2], "");
1162 UnicodeString
data3(DATA
[i
+3], "");
1163 UParseError parseError
;
1164 UErrorCode status
= U_ZERO_ERROR
;
1165 Transliterator
*t
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, parseError
, status
);
1167 errln("FAIL: createInstance(" + ID
+ ") returned NULL");
1170 expect(*t
, data2
, data3
);
1173 if (ID
!= t
->getID()) {
1174 errln("FAIL: createInstance(" + ID
+ ").getID() => " +
1178 // Check the inverse
1179 Transliterator
*u
= t
->createInverse(status
);
1181 errln("FAIL: " + ID
+ ".createInverse() returned NULL");
1182 } else if (u
->getID() != uID
) {
1183 errln("FAIL: " + ID
+ ".createInverse().getID() => " +
1184 u
->getID() + ", expected " + uID
);
1193 * Test the case mapping transliterators.
1195 void TransliteratorTest::TestCaseMap(void) {
1196 UParseError parseError
;
1197 UErrorCode status
= U_ZERO_ERROR
;
1198 Transliterator
* toUpper
=
1199 Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
);
1200 Transliterator
* toLower
=
1201 Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
);
1202 Transliterator
* toTitle
=
1203 Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
);
1204 if (toUpper
==0 || toLower
==0 || toTitle
==0) {
1205 errln("FAIL: createInstance returned NULL");
1212 expect(*toUpper
, "The quick brown fox jumped over the lazy dogs.",
1213 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1214 expect(*toLower
, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1215 "the quick brown foX jumped over the lazY dogs.");
1216 expect(*toTitle
, "the quick brown foX can't jump over the laZy dogs.",
1217 "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1225 * Test the name mapping transliterators.
1227 void TransliteratorTest::TestNameMap(void) {
1228 UParseError parseError
;
1229 UErrorCode status
= U_ZERO_ERROR
;
1230 Transliterator
* uni2name
=
1231 Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD
, parseError
, status
);
1232 Transliterator
* name2uni
=
1233 Transliterator::createInstance("Name-Any", UTRANS_FORWARD
, parseError
, status
);
1234 if (uni2name
==0 || name2uni
==0) {
1235 errln("FAIL: createInstance returned NULL");
1241 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1242 expect(*uni2name
, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1243 CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{END OF TRANSMISSION}\\\\N{CHARACTER TABULATION}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1244 expect(*name2uni
, "{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{",
1245 CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1252 Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD
, parseError
, status
);
1254 errln("FAIL: createInstance returned NULL");
1259 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1260 UnicodeString s
= CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1266 * Test liberalized ID syntax. 1006c
1268 void TransliteratorTest::TestLiberalizedID(void) {
1269 // Some test cases have an expected getID() value of NULL. This
1270 // means I have disabled the test case for now. This stuff is
1271 // still under development, and I haven't decided whether to make
1272 // getID() return canonical case yet. It will all get rewritten
1273 // with the move to Source-Target/Variant IDs anyway. [aliu]
1274 const char* DATA
[] = {
1275 "latin-greek", NULL
/*"Latin-Greek"*/, "case insensitivity",
1276 " Null ", "Null", "whitespace",
1277 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",
1278 " null ; latin-greek ", NULL
/*"Null;Latin-Greek"*/, "compound whitespace",
1280 const int32_t DATA_length
= sizeof(DATA
)/sizeof(DATA
[0]);
1281 UParseError parseError
;
1282 UErrorCode status
= U_ZERO_ERROR
;
1283 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
1284 Transliterator
*t
= Transliterator::createInstance(DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
1286 errln(UnicodeString("FAIL: ") + DATA
[i
+2] +
1287 " cannot create ID \"" + DATA
[i
] + "\"");
1291 exp
= UnicodeString(DATA
[i
+1], "");
1293 // Don't worry about getID() if the expected char*
1294 // is NULL -- see above.
1295 if (exp
.length() == 0 || exp
== t
->getID()) {
1296 logln(UnicodeString("Ok: ") + DATA
[i
+2] +
1297 " create ID \"" + DATA
[i
] + "\" => \"" +
1300 errln(UnicodeString("FAIL: ") + DATA
[i
+2] +
1301 " create ID \"" + DATA
[i
] + "\" => \"" +
1302 t
->getID() + "\", exp \"" + exp
+ "\"");
1309 /* test for Jitterbug 912 */
1310 void TransliteratorTest::TestCreateInstance(){
1311 const char* FORWARD
= "F";
1312 const char* REVERSE
= "R";
1313 const char* DATA
[] = {
1315 // Column 2: direction
1316 // Column 3: expected ID, or "" if expect failure
1317 "Latin-Hangul", REVERSE
, "Hangul-Latin", // JB#912
1319 // JB#2689: bad compound causes crash
1320 "InvalidSource-InvalidTarget", FORWARD
, "",
1321 "InvalidSource-InvalidTarget", REVERSE
, "",
1322 "Hex-Any;InvalidSource-InvalidTarget", FORWARD
, "",
1323 "Hex-Any;InvalidSource-InvalidTarget", REVERSE
, "",
1324 "InvalidSource-InvalidTarget;Hex-Any", FORWARD
, "",
1325 "InvalidSource-InvalidTarget;Hex-Any", REVERSE
, "",
1330 for (int32_t i
=0; DATA
[i
]; i
+=3) {
1332 UErrorCode ec
= U_ZERO_ERROR
;
1333 UnicodeString
id(DATA
[i
]);
1334 UTransDirection dir
= (DATA
[i
+1]==FORWARD
)?
1335 UTRANS_FORWARD
:UTRANS_REVERSE
;
1336 UnicodeString
expID(DATA
[i
+2]);
1338 Transliterator::createInstance(id
,dir
,err
,ec
);
1339 UnicodeString newID
;
1343 UBool ok
= (newID
== expID
);
1345 newID
= u_errorName(ec
);
1348 logln((UnicodeString
)"Ok: createInstance(" +
1349 id
+ "," + DATA
[i
+1] + ") => " + newID
);
1351 errln((UnicodeString
)"FAIL: createInstance(" +
1352 id
+ "," + DATA
[i
+1] + ") => " + newID
+
1353 ", expected " + expID
);
1360 * Test the normalization transliterator.
1362 void TransliteratorTest::TestNormalizationTransliterator() {
1363 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1364 // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1365 const char* CANON
[] = {
1366 // Input Decomposed Composed
1367 "cat", "cat", "cat" ,
1368 "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" ,
1370 "\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above
1371 "D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above
1373 "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above
1374 "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below
1375 "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above
1377 "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1378 "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1380 "\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave
1381 "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave
1382 "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron
1384 "\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign
1385 "\\u00c5", "A\\u030a", "\\u00c5" , // A-ring
1387 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0
1388 "\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0
1390 "Henry IV", "Henry IV", "Henry IV" ,
1391 "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" ,
1393 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1394 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1395 "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten
1396 "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten
1397 "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten
1399 "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" ,
1403 const char* COMPAT
[] = {
1404 // Input Decomposed Composed
1405 "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed
1407 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0
1408 "\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i
1410 "Henry IV", "Henry IV", "Henry IV" ,
1411 "Henry \\u2163", "Henry IV", "Henry IV" ,
1413 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1414 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1416 "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten
1421 UParseError parseError
;
1422 UErrorCode status
= U_ZERO_ERROR
;
1423 Transliterator
* NFD
= Transliterator::createInstance("NFD", UTRANS_FORWARD
, parseError
, status
);
1424 Transliterator
* NFC
= Transliterator::createInstance("NFC", UTRANS_FORWARD
, parseError
, status
);
1426 errln("FAIL: createInstance failed");
1431 for (i
=0; CANON
[i
]; i
+=3) {
1432 UnicodeString in
= CharsToUnicodeString(CANON
[i
]);
1433 UnicodeString expd
= CharsToUnicodeString(CANON
[i
+1]);
1434 UnicodeString expc
= CharsToUnicodeString(CANON
[i
+2]);
1435 expect(*NFD
, in
, expd
);
1436 expect(*NFC
, in
, expc
);
1441 Transliterator
* NFKD
= Transliterator::createInstance("NFKD", UTRANS_FORWARD
, parseError
, status
);
1442 Transliterator
* NFKC
= Transliterator::createInstance("NFKC", UTRANS_FORWARD
, parseError
, status
);
1443 if (!NFKD
|| !NFKC
) {
1444 errln("FAIL: createInstance failed");
1449 for (i
=0; COMPAT
[i
]; i
+=3) {
1450 UnicodeString in
= CharsToUnicodeString(COMPAT
[i
]);
1451 UnicodeString expkd
= CharsToUnicodeString(COMPAT
[i
+1]);
1452 UnicodeString expkc
= CharsToUnicodeString(COMPAT
[i
+2]);
1453 expect(*NFKD
, in
, expkd
);
1454 expect(*NFKC
, in
, expkc
);
1460 status
= U_ZERO_ERROR
;
1461 Transliterator
*t
= Transliterator::createInstance("NFD; [x]Remove",
1465 errln("FAIL: createInstance failed");
1467 expect(*t
, CharsToUnicodeString("\\u010dx"),
1468 CharsToUnicodeString("c\\u030C"));
1473 * Test compound RBT rules.
1475 void TransliteratorTest::TestCompoundRBT(void) {
1476 // Careful with spacing and ';' here: Phrase this exactly
1477 // as toRules() is going to return it. If toRules() changes
1478 // with regard to spacing or ';', then adjust this string.
1479 UnicodeString
rule("::Hex-Any;\n"
1483 "::[^t]Any-Upper;", "");
1484 UParseError parseError
;
1485 UErrorCode status
= U_ZERO_ERROR
;
1486 Transliterator
*t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, parseError
, status
);
1488 errln("FAIL: createFromRules failed");
1491 expect(*t
, "\\u0043at in the hat, bat on the mat",
1492 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1494 t
->toRules(r
, TRUE
);
1496 logln((UnicodeString
)"OK: toRules() => " + r
);
1498 errln((UnicodeString
)"FAIL: toRules() => " + r
+
1499 ", expected " + rule
);
1504 t
= Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD
, parseError
, status
);
1506 errln("FAIL: createInstance failed");
1509 UnicodeString
exp("::Greek-Latin;\n::Latin-Cyrillic;");
1510 t
->toRules(r
, TRUE
);
1512 errln((UnicodeString
)"FAIL: toRules() => " + r
+
1513 ", expected " + exp
);
1515 logln((UnicodeString
)"OK: toRules() => " + r
);
1519 // Round trip the result of toRules
1520 t
= Transliterator::createFromRules("Test", r
, UTRANS_FORWARD
, parseError
, status
);
1522 errln("FAIL: createFromRules #2 failed");
1525 logln((UnicodeString
)"OK: createFromRules(" + r
+ ") succeeded");
1528 // Test toRules again
1529 t
->toRules(r
, TRUE
);
1531 errln((UnicodeString
)"FAIL: toRules() => " + r
+
1532 ", expected " + exp
);
1534 logln((UnicodeString
)"OK: toRules() => " + r
);
1539 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform
1540 // to what the regenerated ID will look like.
1541 UnicodeString
id("Upper(Lower);(NFKC)", "");
1542 t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, parseError
, status
);
1544 errln("FAIL: createInstance #2 failed");
1547 if (t
->getID() == id
) {
1548 logln((UnicodeString
)"OK: created " + id
);
1550 errln((UnicodeString
)"FAIL: createInstance(" + id
+
1551 ").getID() => " + t
->getID());
1554 Transliterator
*u
= t
->createInverse(status
);
1556 errln("FAIL: createInverse failed");
1560 exp
= "NFKC();Lower(Upper)";
1561 if (u
->getID() == exp
) {
1562 logln((UnicodeString
)"OK: createInverse(" + id
+ ") => " +
1565 errln((UnicodeString
)"FAIL: createInverse(" + id
+ ") => " +
1573 * Compound filter semantics were orginially not implemented
1574 * correctly. Originally, each component filter f(i) is replaced by
1575 * f'(i) = f(i) && g, where g is the filter for the compound
1580 * Suppose and I have a transliterator X. Internally X is
1581 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1583 * The compound should convert all greek characters (through latin) to
1584 * cyrillic, then lowercase the result. The filter should say "don't
1585 * touch 'A' in the original". But because an intermediate result
1586 * happens to go through "A", the Greek Alpha gets hung up.
1588 void TransliteratorTest::TestCompoundFilter(void) {
1589 UParseError parseError
;
1590 UErrorCode status
= U_ZERO_ERROR
;
1591 Transliterator
*t
= Transliterator::createInstance
1592 ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD
, parseError
, status
);
1594 errln("FAIL: createInstance failed");
1597 t
->adoptFilter(new UnicodeSet("[^A]", status
));
1598 if (U_FAILURE(status
)) {
1599 errln("FAIL: UnicodeSet ct failed");
1604 // Only the 'A' at index 1 should remain unchanged
1606 CharsToUnicodeString("BA\\u039A\\u0391"),
1607 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1611 void TransliteratorTest::TestRemove(void) {
1612 UParseError parseError
;
1613 UErrorCode status
= U_ZERO_ERROR
;
1614 Transliterator
*t
= Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD
, parseError
, status
);
1616 errln("FAIL: createInstance failed");
1620 expect(*t
, "Able bodied baker's cats", "Ale odied ker's ts");
1622 // extra test for RemoveTransliterator::clone(), which at one point wasn't
1623 // duplicating the filter
1624 Transliterator
* t2
= t
->clone();
1625 expect(*t2
, "Able bodied baker's cats", "Ale odied ker's ts");
1631 void TransliteratorTest::TestToRules(void) {
1632 const char* RBT
= "rbt";
1633 const char* SET
= "set";
1634 static const char* DATA
[] = {
1636 "$a=\\u4E61; [$a] > A;",
1640 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1641 "[[:Zs:][:Zl:]]{a} > A;",
1668 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1669 "[^[:Zs:]]{a} > A;",
1672 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1673 "[[a-z]-[:Zs:]]{a} > A;",
1676 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1677 "[[:Zs:]&[a-z]]{a} > A;",
1680 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1681 "[x[:Zs:]]{a} > A;",
1684 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1685 "$macron = \\u0304 ;"
1686 "$evowel = [aeiouyAEIOUY] ;"
1687 "$iotasub = \\u0345 ;"
1688 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1689 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1692 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1693 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1695 static const int32_t DATA_length
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0]));
1697 for (int32_t d
=0; d
< DATA_length
; d
+=3) {
1698 if (DATA
[d
] == RBT
) {
1699 // Transliterator test
1700 UParseError parseError
;
1701 UErrorCode status
= U_ZERO_ERROR
;
1702 Transliterator
*t
= Transliterator::createFromRules("ID",
1703 DATA
[d
+1], UTRANS_FORWARD
, parseError
, status
);
1705 errln("FAIL: createFromRules failed");
1708 UnicodeString rules
, escapedRules
;
1709 t
->toRules(rules
, FALSE
);
1710 t
->toRules(escapedRules
, TRUE
);
1711 UnicodeString expRules
= CharsToUnicodeString(DATA
[d
+2]);
1712 UnicodeString
expEscapedRules(DATA
[d
+2]);
1713 if (rules
== expRules
) {
1714 logln((UnicodeString
)"Ok: " + DATA
[d
+1] +
1717 errln((UnicodeString
)"FAIL: " + DATA
[d
+1] +
1718 " => " + rules
+ ", exp " + expRules
);
1720 if (escapedRules
== expEscapedRules
) {
1721 logln((UnicodeString
)"Ok: " + DATA
[d
+1] +
1722 " => " + escapedRules
);
1724 errln((UnicodeString
)"FAIL: " + DATA
[d
+1] +
1725 " => " + escapedRules
+ ", exp " + expEscapedRules
);
1731 UErrorCode status
= U_ZERO_ERROR
;
1732 UnicodeString
pat(DATA
[d
+1]);
1733 UnicodeString
expToPat(DATA
[d
+2]);
1734 UnicodeSet
set(pat
, status
);
1735 if (U_FAILURE(status
)) {
1736 errln("FAIL: UnicodeSet ct failed");
1739 // Adjust spacing etc. as necessary.
1740 UnicodeString toPat
;
1741 set
.toPattern(toPat
);
1742 if (expToPat
== toPat
) {
1743 logln((UnicodeString
)"Ok: " + pat
+
1746 errln((UnicodeString
)"FAIL: " + pat
+
1747 " => " + prettify(toPat
, TRUE
) +
1748 ", exp " + prettify(pat
, TRUE
));
1754 void TransliteratorTest::TestContext() {
1755 UTransPosition pos
= {0, 2, 0, 1}; // cs cl s l
1756 expect("de > x; {d}e > y;",
1761 expect("ab{c} > z;",
1766 void TransliteratorTest::TestSupplemental() {
1768 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1770 CharsToUnicodeString("ab\\U0001030Fx"),
1771 CharsToUnicodeString("\\U00010300bix"));
1773 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1774 "$b=[A-Z\\U00010400-\\U0001044D];"
1775 "($a)($b) > $2 $1;"),
1776 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1777 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1779 // k|ax\\U00010300xm
1781 // k|a\\U00010400\\U00010300xm
1782 // ky|\\U00010400\\U00010300xm
1783 // ky\\U00010400|\\U00010300xm
1785 // ky\\U00010400|\\U00010300\\U00010400m
1786 // ky\\U00010400y|\\U00010400m
1787 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1788 "$a {x} > | @ \\U00010400;"
1789 "{$a} [^\\u0000-\\uFFFF] > y;"),
1790 CharsToUnicodeString("kax\\U00010300xm"),
1791 CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1794 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1795 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}");
1797 expectT("Any-Hex/Unicode",
1798 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1799 "U+10330U+10FF00U+E0061U+00A0");
1801 expectT("Any-Hex/C",
1802 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1803 "\\U00010330\\U0010FF00\\U000E0061\\u00A0");
1805 expectT("Any-Hex/Perl",
1806 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1807 "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}");
1809 expectT("Any-Hex/Java",
1810 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1811 "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0");
1813 expectT("Any-Hex/XML",
1814 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1815 "𐌰􏼀󠁡 ");
1817 expectT("Any-Hex/XML10",
1818 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1819 "𐌰􏼀󠁡 ");
1821 expectT("[\\U000E0000-\\U000E0FFF] Remove",
1822 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1823 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1826 void TransliteratorTest::TestQuantifier() {
1828 // Make sure @ in a quantified anteContext works
1829 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1833 // Make sure @ in a quantified postContext works
1834 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1838 // Make sure @ in a quantified postContext with seg ref works
1839 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1843 // Make sure @ past ante context doesn't enter ante context
1844 UTransPosition pos
= {0, 5, 3, 5};
1845 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1850 // Make sure @ past post context doesn't pass limit
1851 UTransPosition pos2
= {0, 4, 0, 2};
1852 expect("{b} a+ > c @@ |; x > y; a > A;",
1857 // Make sure @ past post context doesn't enter post context
1858 expect("{b} a+ > c @@ |; x > y; a > A;",
1862 expect("(ab)? c > d;",
1866 // NOTE: The (ab)+ when referenced just yields a single "ab",
1867 // not the full sequence of them. This accords with perl behavior.
1868 expect("(ab)+ {x} > '(' $1 ')';",
1870 "x ab(ab) abab(ab)y");
1873 "ac abc abbc abbbc",
1876 expect("[abc]+ > x;",
1877 "qac abrc abbcs abtbbc",
1880 expect("q{(ab)+} > x;",
1881 "qa qab qaba qababc qaba",
1882 "qa qx qxa qxc qxa");
1884 expect("q(ab)* > x;",
1885 "qa qab qaba qababc",
1888 // NOTE: The (ab)+ when referenced just yields a single "ab",
1889 // not the full sequence of them. This accords with perl behavior.
1890 expect("q(ab)* > '(' $1 ')';",
1891 "qa qab qaba qababc",
1892 "()a (ab) (ab)a (ab)c");
1894 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1896 expect("'ab'+ > x;",
1900 // $foo+ and $foo* -- the quantifier should apply to the entire
1901 // variable reference
1902 expect("$var = ab; $var+ > x;",
1907 class TestTrans
: public NullTransliterator
{
1909 TestTrans(const UnicodeString
& id
) {
1915 * Test Source-Target/Variant.
1917 void TransliteratorTest::TestSTV(void) {
1918 int32_t ns
= Transliterator::countAvailableSources();
1919 if (ns
< 0 || ns
> 255) {
1920 errln((UnicodeString
)"FAIL: Bad source count: " + ns
);
1924 for (i
=0; i
<ns
; ++i
) {
1925 UnicodeString source
;
1926 Transliterator::getAvailableSource(i
, source
);
1927 logln((UnicodeString
)"" + i
+ ": " + source
);
1928 if (source
.length() == 0) {
1929 errln("FAIL: empty source");
1932 int32_t nt
= Transliterator::countAvailableTargets(source
);
1933 if (nt
< 0 || nt
> 255) {
1934 errln((UnicodeString
)"FAIL: Bad target count: " + nt
);
1937 for (int32_t j
=0; j
<nt
; ++j
) {
1938 UnicodeString target
;
1939 Transliterator::getAvailableTarget(j
, source
, target
);
1940 logln((UnicodeString
)" " + j
+ ": " + target
);
1941 if (target
.length() == 0) {
1942 errln("FAIL: empty target");
1945 int32_t nv
= Transliterator::countAvailableVariants(source
, target
);
1946 if (nv
< 0 || nv
> 255) {
1947 errln((UnicodeString
)"FAIL: Bad variant count: " + nv
);
1950 for (int32_t k
=0; k
<nv
; ++k
) {
1951 UnicodeString variant
;
1952 Transliterator::getAvailableVariant(k
, source
, target
, variant
);
1953 if (variant
.length() == 0) {
1954 logln((UnicodeString
)" " + k
+ ": <empty>");
1956 logln((UnicodeString
)" " + k
+ ": " + variant
);
1962 // Test registration
1963 const char* IDS
[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
1964 const char* FULL_IDS
[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
1965 const char* SOURCES
[] = { NULL
, "Seoridf", "Oewoir" };
1966 for (i
=0; i
<3; ++i
) {
1967 Transliterator
*t
= new TestTrans(IDS
[i
]);
1969 errln("FAIL: out of memory");
1972 if (t
->getID() != IDS
[i
]) {
1973 errln((UnicodeString
)"FAIL: ID mismatch for " + IDS
[i
]);
1977 Transliterator::registerInstance(t
);
1978 UErrorCode status
= U_ZERO_ERROR
;
1979 t
= Transliterator::createInstance(IDS
[i
], UTRANS_FORWARD
, status
);
1981 errln((UnicodeString
)"FAIL: Registration/creation failed for ID " +
1984 logln((UnicodeString
)"Ok: Registration/creation succeeded for ID " +
1988 Transliterator::unregister(IDS
[i
]);
1989 t
= Transliterator::createInstance(IDS
[i
], UTRANS_FORWARD
, status
);
1991 errln((UnicodeString
)"FAIL: Unregistration failed for ID " +
1997 // Make sure getAvailable API reflects removal
1998 int32_t n
= Transliterator::countAvailableIDs();
1999 for (i
=0; i
<n
; ++i
) {
2000 UnicodeString id
= Transliterator::getAvailableID(i
);
2001 for (j
=0; j
<3; ++j
) {
2002 if (id
.caseCompare(FULL_IDS
[j
],0)==0) {
2003 errln((UnicodeString
)"FAIL: unregister(" + id
+ ") failed");
2007 n
= Transliterator::countAvailableTargets("Any");
2008 for (i
=0; i
<n
; ++i
) {
2010 Transliterator::getAvailableTarget(i
, "Any", t
);
2011 if (t
.caseCompare(IDS
[0],0)==0) {
2012 errln((UnicodeString
)"FAIL: unregister(Any-" + t
+ ") failed");
2015 n
= Transliterator::countAvailableSources();
2016 for (i
=0; i
<n
; ++i
) {
2018 Transliterator::getAvailableSource(i
, s
);
2019 for (j
=0; j
<3; ++j
) {
2020 if (SOURCES
[j
] == NULL
) continue;
2021 if (s
.caseCompare(SOURCES
[j
],0)==0) {
2022 errln((UnicodeString
)"FAIL: unregister(" + s
+ "-*) failed");
2029 * Test inverse of Greek-Latin; Title()
2031 void TransliteratorTest::TestCompoundInverse(void) {
2032 UParseError parseError
;
2033 UErrorCode status
= U_ZERO_ERROR
;
2034 Transliterator
*t
= Transliterator::createInstance
2035 ("Greek-Latin; Title()", UTRANS_REVERSE
,parseError
, status
);
2037 errln("FAIL: createInstance");
2040 UnicodeString
exp("(Title);Latin-Greek");
2041 if (t
->getID() == exp
) {
2042 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2045 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2046 t
->getID() + "\", expected \"" + exp
+ "\"");
2052 * Test NFD chaining with RBT
2054 void TransliteratorTest::TestNFDChainRBT() {
2056 UErrorCode ec
= U_ZERO_ERROR
;
2057 Transliterator
* t
= Transliterator::createFromRules(
2058 "TEST", "::NFD; aa > Q; a > q;",
2059 UTRANS_FORWARD
, pe
, ec
);
2060 if (t
== NULL
|| U_FAILURE(ec
)) {
2061 errln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec
));
2064 expect(*t
, "aa", "Q");
2067 // TEMPORARY TESTS -- BEING DEBUGGED
2068 //=- UnicodeString s, s2;
2069 //=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2070 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2071 //=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2072 //=- expect(*t, s, s2);
2075 //=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2076 //=- expect(*t, s2, s);
2079 //=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2080 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2081 //=- expect(*t, s, s);
2084 // const char* source[] = {
2086 // "\\u015Br\\u012Bmad",
2087 // "bhagavadg\\u012Bt\\u0101",
2090 // "vi\\u1E63\\u0101da",
2092 // "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2093 // "uv\\u0101cr\\u0325",
2095 // "rmk\\u1E63\\u0113t",
2096 // //"dharmak\\u1E63\\u0113tr\\u0113",
2098 // "kuruk\\u1E63\\u0113tr\\u0113",
2099 // "samav\\u0113t\\u0101",
2100 // "yuyutsava-\\u1E25",
2101 // "m\\u0101mak\\u0101-\\u1E25",
2102 // // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2104 // "san\\u0304java",
2109 // const char* expected[] = {
2111 // "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2112 // "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2113 // "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2114 // "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2115 // "\\u0935\\u093f\\u0937\\u093e\\u0926",
2116 // "\\u092f\\u094b\\u0917",
2117 // "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2118 // "\\u0909\\u0935\\u093E\\u091A\\u0943",
2121 // //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2123 // "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2124 // "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2125 // "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2126 // "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2127 // // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2128 // "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2129 // "\\u0938\\u0902\\u091c\\u0935",
2133 // UErrorCode status = U_ZERO_ERROR;
2134 // UParseError parseError;
2135 // UnicodeString message;
2136 // Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2137 // Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2138 // if(U_FAILURE(status)){
2139 // errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2140 // errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2141 // delete latinToDevToLatin;
2142 // delete devToLatinToDev;
2145 // UnicodeString gotResult;
2146 // for(int i= 0; source[i] != 0; i++){
2147 // gotResult = source[i];
2148 // expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2149 // expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2151 // delete latinToDevToLatin;
2152 // delete devToLatinToDev;
2156 * Inverse of "Null" should be "Null". (J21)
2158 void TransliteratorTest::TestNullInverse() {
2160 UErrorCode ec
= U_ZERO_ERROR
;
2161 Transliterator
*t
= Transliterator::createInstance("Null", UTRANS_FORWARD
, pe
, ec
);
2162 if (t
== 0 || U_FAILURE(ec
)) {
2163 errln("FAIL: createInstance");
2166 Transliterator
*u
= t
->createInverse(ec
);
2167 if (u
== 0 || U_FAILURE(ec
)) {
2168 errln("FAIL: createInverse");
2172 if (u
->getID() != "Null") {
2173 errln("FAIL: Inverse of Null should be Null");
2180 * Check ID of inverse of alias. (J22)
2182 void TransliteratorTest::TestAliasInverseID() {
2183 UnicodeString
ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2185 UErrorCode ec
= U_ZERO_ERROR
;
2186 Transliterator
*t
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
);
2187 if (t
== 0 || U_FAILURE(ec
)) {
2188 errln("FAIL: createInstance");
2191 Transliterator
*u
= t
->createInverse(ec
);
2192 if (u
== 0 || U_FAILURE(ec
)) {
2193 errln("FAIL: createInverse");
2197 UnicodeString exp
= "Hangul-Latin";
2198 UnicodeString got
= u
->getID();
2200 errln((UnicodeString
)"FAIL: Inverse of " + ID
+ " is " + got
+
2201 ", expected " + exp
);
2208 * Test IDs of inverses of compound transliterators. (J20)
2210 void TransliteratorTest::TestCompoundInverseID() {
2211 UnicodeString ID
= "Latin-Jamo;NFC(NFD)";
2213 UErrorCode ec
= U_ZERO_ERROR
;
2214 Transliterator
*t
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
);
2215 if (t
== 0 || U_FAILURE(ec
)) {
2216 errln("FAIL: createInstance");
2219 Transliterator
*u
= t
->createInverse(ec
);
2220 if (u
== 0 || U_FAILURE(ec
)) {
2221 errln("FAIL: createInverse");
2225 UnicodeString exp
= "NFD(NFC);Jamo-Latin";
2226 UnicodeString got
= u
->getID();
2228 errln((UnicodeString
)"FAIL: Inverse of " + ID
+ " is " + got
+
2229 ", expected " + exp
);
2236 * Test undefined variable.
2239 void TransliteratorTest::TestUndefinedVariable() {
2240 UnicodeString rule
= "$initial } a <> \\u1161;";
2242 UErrorCode ec
= U_ZERO_ERROR
;
2243 Transliterator
*t
= new RuleBasedTransliterator("<ID>", rule
, UTRANS_FORWARD
, 0, pe
, ec
);
2245 if (U_FAILURE(ec
)) {
2246 logln((UnicodeString
)"OK: Got exception for " + rule
+ ", as expected: " +
2250 errln((UnicodeString
)"Fail: bogus rule " + rule
+ " compiled with error " +
2255 * Test empty context.
2257 void TransliteratorTest::TestEmptyContext() {
2258 expect(" { a } > b;", "xay a ", "xby b ");
2262 * Test compound filter ID syntax
2264 void TransliteratorTest::TestCompoundFilterID(void) {
2265 static const char* DATA
[] = {
2266 // Col. 1 = ID or rule set (latter must start with #)
2268 // = columns > 1 are null if expect col. 1 to be illegal =
2270 // Col. 2 = direction, "F..." or "R..."
2271 // Col. 3 = source string
2272 // Col. 4 = exp result
2274 "[abc]; [abc]", NULL
, NULL
, NULL
, // multiple filters
2275 "Latin-Greek; [abc];", NULL
, NULL
, NULL
, // misplaced filter
2276 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2277 "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2278 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2279 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2283 for (int32_t i
=0; DATA
[i
]; i
+=4) {
2284 UnicodeString id
= CharsToUnicodeString(DATA
[i
]);
2285 UTransDirection direction
= (DATA
[i
+1] != NULL
&& DATA
[i
+1][0] == 'R') ?
2286 UTRANS_REVERSE
: UTRANS_FORWARD
;
2287 UnicodeString source
;
2289 if (DATA
[i
+2] != NULL
) {
2290 source
= CharsToUnicodeString(DATA
[i
+2]);
2291 exp
= CharsToUnicodeString(DATA
[i
+3]);
2293 UBool expOk
= (DATA
[i
+1] != NULL
);
2294 Transliterator
* t
= NULL
;
2296 UErrorCode ec
= U_ZERO_ERROR
;
2297 if (id
.charAt(0) == 0x23/*#*/) {
2298 t
= Transliterator::createFromRules("ID", id
, direction
, pe
, ec
);
2300 t
= Transliterator::createInstance(id
, direction
, pe
, ec
);
2302 UBool ok
= (t
!= NULL
&& U_SUCCESS(ec
));
2303 UnicodeString transID
;
2305 transID
= t
->getID();
2308 transID
= UnicodeString("NULL", "");
2311 logln((UnicodeString
)"Ok: " + id
+ " => " + transID
+ ", " +
2313 if (source
.length() != 0) {
2314 expect(*t
, source
, exp
);
2318 errln((UnicodeString
)"FAIL: " + id
+ " => " + transID
+ ", " +
2325 * Test new property set syntax
2327 void TransliteratorTest::TestPropertySet() {
2328 expect("a>A; \\p{Lu}>x; \\p{ANY}>y;", "abcDEF", "Ayyxxx");
2329 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2330 "[ a stitch ]\n[ in time ]\r[ saves 9]");
2334 * Test various failure points of the new 2.0 engine.
2336 void TransliteratorTest::TestNewEngine() {
2338 UErrorCode ec
= U_ZERO_ERROR
;
2339 Transliterator
*t
= Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD
, pe
, ec
);
2340 if (t
== 0 || U_FAILURE(ec
)) {
2341 errln("FAIL: createInstance Latin-Hiragana");
2344 // Katakana should be untouched
2345 expect(*t
, CharsToUnicodeString("a\\u3042\\u30A2"),
2346 CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2351 // This test will only work if Transliterator.ROLLBACK is
2352 // true. Otherwise, this test will fail, revealing a
2353 // limitation of global filters in incremental mode.
2355 Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD
, pe
, ec
);
2357 Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD
, pe
, ec
);
2358 if (U_FAILURE(ec
)) {
2364 Transliterator
* array
[3];
2366 array
[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD
, pe
, ec
);
2368 if (U_FAILURE(ec
)) {
2369 errln("FAIL: createInstance NFD");
2376 t
= new CompoundTransliterator(array
, 3, new UnicodeSet("[:Ll:]", ec
));
2377 if (U_FAILURE(ec
)) {
2378 errln("FAIL: UnicodeSet constructor");
2386 expect(*t
, "aAaA", "bAbA");
2388 assertTrue("countElements", t
->countElements() == 3);
2389 assertEquals("getElement(0)", t
->getElement(0, ec
).getID(), "a_to_A");
2390 assertEquals("getElement(1)", t
->getElement(1, ec
).getID(), "NFD");
2391 assertEquals("getElement(2)", t
->getElement(2, ec
).getID(), "A_to_b");
2392 assertSuccess("getElement", ec
);
2400 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2404 UnicodeString gr
= CharsToUnicodeString(
2406 "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2407 "$rough = \\u0314 ;"
2408 "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2412 expect(gr
, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2416 * Test quantified segment behavior. We want:
2417 * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2419 void TransliteratorTest::TestQuantifiedSegment(void) {
2421 expect("([abc]+) > x $1 x;", "cba", "xcbax");
2423 // The tricky case; the quantifier is around the segment
2424 expect("([abc])+ > x $1 x;", "cba", "xax");
2426 // Tricky case in reverse direction
2427 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2429 // Check post-context segment
2430 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2432 // Test toRule/toPattern for non-quantified segment.
2433 // Careful with spacing here.
2434 UnicodeString
r("([a-c]){q} > x $1 x;");
2436 UErrorCode ec
= U_ZERO_ERROR
;
2437 Transliterator
* t
= Transliterator::createFromRules("ID", r
, UTRANS_FORWARD
, pe
, ec
);
2438 if (U_FAILURE(ec
)) {
2439 errln("FAIL: createFromRules");
2444 t
->toRules(rr
, TRUE
);
2446 errln((UnicodeString
)"FAIL: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2448 logln((UnicodeString
)"Ok: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2452 // Test toRule/toPattern for quantified segment.
2453 // Careful with spacing here.
2454 r
= "([a-c])+{q} > x $1 x;";
2455 t
= Transliterator::createFromRules("ID", r
, UTRANS_FORWARD
, pe
, ec
);
2456 if (U_FAILURE(ec
)) {
2457 errln("FAIL: createFromRules");
2461 t
->toRules(rr
, TRUE
);
2463 errln((UnicodeString
)"FAIL: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2465 logln((UnicodeString
)"Ok: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2470 //======================================================================
2472 //======================================================================
2473 void TransliteratorTest::TestDevanagariLatinRT(){
2474 const int MAX_LEN
= 52;
2475 const char* const source
[MAX_LEN
] = {
2490 //"r\\u0323ya", // \u095c is not valid in Devanagari
2516 "\\u1E6Dh\\u1E6Dha",
2523 // Not roundtrippable --
2524 // \\u0939\\u094d\\u094d\\u092E - hma
2525 // \\u0939\\u094d\\u092E - hma
2526 // CharsToUnicodeString("hma"),
2531 "san\\u0304j\\u012Bb s\\u0113nagupta",
2532 "\\u0101nand vaddir\\u0101ju",
2536 const char* const expected
[MAX_LEN
] = {
2537 "\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */
2538 "\\u0915\\u094D\\u0930", /* kra */
2539 "\\u0915\\u094D\\u0937", /* ks\\u0323a */
2540 "\\u0916\\u094D\\u0930", /* khra */
2541 "\\u0917\\u094D\\u0930", /* gra */
2542 "\\u0919\\u094D\\u0930", /* n\\u0307ra */
2543 "\\u091A\\u094D\\u0930", /* cra */
2544 "\\u091B\\u094D\\u0930", /* chra */
2545 "\\u091C\\u094D\\u091E", /* jn\\u0303a */
2546 "\\u091D\\u094D\\u0930", /* jhra */
2547 "\\u091E\\u094D\\u0930", /* n\\u0303ra */
2548 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2549 "\\u0920\\u094D\\u0930", /* t\\u0323hra */
2550 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2551 //"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari
2552 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2553 "\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */
2554 "\\u0923\\u094D\\u0930", /* n\\u0323ra */
2555 "\\u0924\\u094D\\u0924", /* tta */
2556 "\\u0925\\u094D\\u0930", /* thra */
2557 "\\u0926\\u094D\\u0926", /* dda */
2558 "\\u0927\\u094D\\u0930", /* dhra */
2559 "\\u0928\\u094D\\u0928", /* nna */
2560 "\\u092A\\u094D\\u0930", /* pra */
2561 "\\u092B\\u094D\\u0930", /* phra */
2562 "\\u092C\\u094D\\u0930", /* bra */
2563 "\\u092D\\u094D\\u0930", /* bhra */
2564 "\\u092E\\u094D\\u0930", /* mra */
2565 "\\u0929\\u094D\\u0930", /* n\\u0331ra */
2566 //"\\u0934\\u094D\\u0930", /* l\\u0331ra */
2567 "\\u092F\\u094D\\u0930", /* yra */
2568 "\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */
2570 "\\u0935\\u094D\\u0930", /* vra */
2571 "\\u0936\\u094D\\u0930", /* s\\u0301ra */
2572 "\\u0937\\u094D\\u0930", /* s\\u0323ra */
2573 "\\u0938\\u094D\\u0930", /* sra */
2574 "\\u0939\\u094d\\u092E", /* hma */
2575 "\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */
2576 "\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */
2577 "\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/
2578 "\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */
2579 "\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */
2580 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2581 "\\u0920\\u094D\\u092F", /* t\\u0323hya */
2582 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2583 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2585 "\\u0939\\u094D\\u092F", /* hya */
2586 "\\u0936\\u0943", /* s\\u0301r\\u0325a */
2587 "\\u0936\\u094D\\u091A", /* s\\u0301ca */
2588 "\\u090d", /* e\\u0306 */
2589 "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2590 "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2594 UErrorCode status
= U_ZERO_ERROR
;
2595 UParseError parseError
;
2596 UnicodeString message
;
2597 Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2598 Transliterator
* devToLatin
=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
);
2599 if(U_FAILURE(status
)){
2600 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2601 errln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2604 UnicodeString gotResult
;
2605 for(int i
= 0; i
<MAX_LEN
; i
++){
2606 gotResult
= source
[i
];
2607 expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
]));
2608 expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
]));
2614 void TransliteratorTest::TestTeluguLatinRT(){
2615 const int MAX_LEN
=10;
2616 const char* const source
[MAX_LEN
] = {
2617 "raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */
2618 "\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */
2619 "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */
2620 "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */
2621 "san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */
2622 "amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */
2623 "ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */
2624 "\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */
2625 "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */
2626 "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */
2629 const char* const expected
[MAX_LEN
] = {
2630 "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2631 "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2632 "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2633 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2634 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2635 "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2636 "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2637 "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2638 "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2639 "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2642 UErrorCode status
= U_ZERO_ERROR
;
2643 UParseError parseError
;
2644 UnicodeString message
;
2645 Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD
, parseError
, status
);
2646 Transliterator
* devToLatin
=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD
, parseError
, status
);
2647 if(U_FAILURE(status
)){
2648 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2649 errln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2652 UnicodeString gotResult
;
2653 for(int i
= 0; i
<MAX_LEN
; i
++){
2654 gotResult
= source
[i
];
2655 expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
]));
2656 expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
]));
2662 void TransliteratorTest::TestSanskritLatinRT(){
2663 const int MAX_LEN
=16;
2664 const char* const source
[MAX_LEN
] = {
2665 "rmk\\u1E63\\u0113t",
2666 "\\u015Br\\u012Bmad",
2667 "bhagavadg\\u012Bt\\u0101",
2670 "vi\\u1E63\\u0101da",
2672 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2673 "uv\\u0101cr\\u0325",
2674 "dharmak\\u1E63\\u0113tr\\u0113",
2675 "kuruk\\u1E63\\u0113tr\\u0113",
2676 "samav\\u0113t\\u0101",
2678 "m\\u0101mak\\u0101\\u1E25",
2679 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2683 const char* const expected
[MAX_LEN
] = {
2684 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2685 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2686 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2687 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2688 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2689 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2690 "\\u092f\\u094b\\u0917",
2691 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2692 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2693 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2694 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2695 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2696 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2697 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2698 //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2699 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2700 "\\u0938\\u0902\\u091c\\u0935",
2702 UErrorCode status
= U_ZERO_ERROR
;
2703 UParseError parseError
;
2704 UnicodeString message
;
2705 Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2706 Transliterator
* devToLatin
=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
);
2707 if(U_FAILURE(status
)){
2708 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2709 errln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2712 UnicodeString gotResult
;
2713 for(int i
= 0; i
<MAX_LEN
; i
++){
2714 gotResult
= source
[i
];
2715 expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
]));
2716 expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
]));
2723 void TransliteratorTest::TestCompoundLatinRT(){
2724 const char* const source
[] = {
2725 "rmk\\u1E63\\u0113t",
2726 "\\u015Br\\u012Bmad",
2727 "bhagavadg\\u012Bt\\u0101",
2730 "vi\\u1E63\\u0101da",
2732 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2733 "uv\\u0101cr\\u0325",
2734 "dharmak\\u1E63\\u0113tr\\u0113",
2735 "kuruk\\u1E63\\u0113tr\\u0113",
2736 "samav\\u0113t\\u0101",
2738 "m\\u0101mak\\u0101\\u1E25",
2739 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2743 const int MAX_LEN
= sizeof(source
)/sizeof(source
[0]);
2744 const char* const expected
[MAX_LEN
] = {
2745 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2746 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2747 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2748 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2749 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2750 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2751 "\\u092f\\u094b\\u0917",
2752 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2753 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2754 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2755 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2756 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2757 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2758 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2759 // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2760 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2761 "\\u0938\\u0902\\u091c\\u0935"
2763 if(MAX_LEN
!= sizeof(expected
)/sizeof(expected
[0])) {
2764 errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2768 UErrorCode status
= U_ZERO_ERROR
;
2769 UParseError parseError
;
2770 UnicodeString message
;
2771 Transliterator
* devToLatinToDev
=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2772 Transliterator
* latinToDevToLatin
=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
);
2773 Transliterator
* devToTelToDev
=Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2774 Transliterator
* latinToTelToLatin
=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD
, parseError
, status
);
2776 if(U_FAILURE(status
)){
2777 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2778 errln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2781 UnicodeString gotResult
;
2782 for(int i
= 0; i
<MAX_LEN
; i
++){
2783 gotResult
= source
[i
];
2784 expect(*devToLatinToDev
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(expected
[i
]));
2785 expect(*latinToDevToLatin
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(source
[i
]));
2786 expect(*latinToTelToLatin
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(source
[i
]));
2789 delete(latinToDevToLatin
);
2790 delete(devToLatinToDev
);
2791 delete(devToTelToDev
);
2792 delete(latinToTelToLatin
);
2796 * Test Gurmukhi-Devanagari Tippi and Bindi
2798 void TransliteratorTest::TestGurmukhiDevanagari(){
2800 // (\u0902) (when preceded by vowel) ---> (\u0A02)
2801 // (\u0902) (when preceded by consonant) ---> (\u0A70)
2802 UErrorCode status
= U_ZERO_ERROR
;
2803 UnicodeSet
vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]").unescape(), status
);
2804 UnicodeSet
non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]").unescape(), status
);
2805 UParseError parseError
;
2807 UnicodeSetIterator
vIter(vowel
);
2808 UnicodeSetIterator
nvIter(non_vowel
);
2809 Transliterator
* trans
= Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD
, parseError
, status
);
2810 if(U_FAILURE(status
)) {
2811 errln("Error creating transliterator %s", u_errorName(status
));
2815 UnicodeString
src (" \\u0902");
2816 UnicodeString
expected(" \\u0A02");
2817 src
= src
.unescape();
2818 expected
= expected
.unescape();
2820 while(vIter
.next()){
2821 src
.setCharAt(0,(UChar
) vIter
.getCodepoint());
2822 expected
.setCharAt(0,(UChar
) (vIter
.getCodepoint()+0x0100));
2823 expect(*trans
,src
,expected
);
2826 expected
.setCharAt(1,0x0A70);
2827 while(nvIter
.next()){
2828 //src.setCharAt(0,(char) nvIter.codepoint);
2829 src
.setCharAt(0,(UChar
)nvIter
.getCodepoint());
2830 expected
.setCharAt(0,(UChar
) (nvIter
.getCodepoint()+0x0100));
2831 expect(*trans
,src
,expected
);
2836 * Test instantiation from a locale.
2838 void TransliteratorTest::TestLocaleInstantiation(void) {
2840 UErrorCode ec
= U_ZERO_ERROR
;
2841 Transliterator
*t
= Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD
, pe
, ec
);
2842 if (U_FAILURE(ec
)) {
2843 errln("FAIL: createInstance(ru_RU-Latin)");
2847 expect(*t
, CharsToUnicodeString("\\u0430"), "a");
2850 t
= Transliterator::createInstance("en-el", UTRANS_FORWARD
, pe
, ec
);
2851 if (U_FAILURE(ec
)) {
2852 errln("FAIL: createInstance(en-el)");
2856 expect(*t
, "a", CharsToUnicodeString("\\u03B1"));
2861 * Test title case handling of accent (should ignore accents)
2863 void TransliteratorTest::TestTitleAccents(void) {
2865 UErrorCode ec
= U_ZERO_ERROR
;
2866 Transliterator
*t
= Transliterator::createInstance("Title", UTRANS_FORWARD
, pe
, ec
);
2867 if (U_FAILURE(ec
)) {
2868 errln("FAIL: createInstance(Title)");
2872 expect(*t
, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2877 * Basic test of a locale resource based rule.
2879 void TransliteratorTest::TestLocaleResource() {
2880 const char* DATA
[] = {
2882 //"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0",
2883 "Latin-el", "b", "\\u03bc\\u03c0",
2884 "Latin-Greek", "b", "\\u03B2",
2885 "Greek-Latin/UNGEGN", "\\u03B2", "v",
2886 "el-Latin", "\\u03B2", "v",
2887 "Greek-Latin", "\\u03B2", "b",
2889 const int32_t DATA_length
= sizeof(DATA
) / sizeof(DATA
[0]);
2890 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
2892 UErrorCode ec
= U_ZERO_ERROR
;
2893 Transliterator
*t
= Transliterator::createInstance(DATA
[i
], UTRANS_FORWARD
, pe
, ec
);
2894 if (U_FAILURE(ec
)) {
2895 errln((UnicodeString
)"FAIL: createInstance(" + DATA
[i
] + ")");
2899 expect(*t
, CharsToUnicodeString(DATA
[i
+1]),
2900 CharsToUnicodeString(DATA
[i
+2]));
2906 * Make sure parse errors reference the right line.
2908 void TransliteratorTest::TestParseError() {
2913 UErrorCode ec
= U_ZERO_ERROR
;
2915 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
2917 if (U_FAILURE(ec
)) {
2918 UnicodeString
err(pe
.preContext
);
2919 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
2920 if (err
.indexOf("d << b") >= 0) {
2921 logln("Ok: " + err
);
2923 errln("FAIL: " + err
);
2927 errln("FAIL: no syntax error");
2931 * Make sure sets on output are disallowed.
2933 void TransliteratorTest::TestOutputSet() {
2934 UnicodeString rule
= "$set = [a-cm-n]; b > $set;";
2935 UErrorCode ec
= U_ZERO_ERROR
;
2937 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
2939 if (U_FAILURE(ec
)) {
2940 UnicodeString
err(pe
.preContext
);
2941 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
2942 logln("Ok: " + err
);
2945 errln("FAIL: No syntax error");
2949 * Test the use variable range pragma, making sure that use of
2950 * variable range characters is detected and flagged as an error.
2952 void TransliteratorTest::TestVariableRange() {
2953 UnicodeString rule
= "use variable range 0x70 0x72; a > A; b > B; q > Q;";
2954 UErrorCode ec
= U_ZERO_ERROR
;
2956 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
2958 if (U_FAILURE(ec
)) {
2959 UnicodeString
err(pe
.preContext
);
2960 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
2961 logln("Ok: " + err
);
2964 errln("FAIL: No syntax error");
2968 * Test invalid post context error handling
2970 void TransliteratorTest::TestInvalidPostContext() {
2971 UnicodeString rule
= "a}b{c>d;";
2972 UErrorCode ec
= U_ZERO_ERROR
;
2974 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
2976 if (U_FAILURE(ec
)) {
2977 UnicodeString
err(pe
.preContext
);
2978 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
2979 if (err
.indexOf("a}b{c") >= 0) {
2980 logln("Ok: " + err
);
2982 errln("FAIL: " + err
);
2986 errln("FAIL: No syntax error");
2990 * Test ID form variants
2992 void TransliteratorTest::TestIDForms() {
2993 const char* DATA
[] = {
2995 "nfd", NULL
, "NFC", // make sure case is ignored
2996 "Any-NFKD", NULL
, "Any-NFKC",
2997 "Null", NULL
, "Null",
2998 "-nfkc", "nfkc", "NFKD",
2999 "-nfkc/", "nfkc", "NFKD",
3000 "Latin-Greek/UNGEGN", NULL
, "Greek-Latin/UNGEGN",
3001 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3002 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3003 "Source-", NULL
, NULL
,
3004 "Source/Variant-", NULL
, NULL
,
3005 "Source-/Variant", NULL
, NULL
,
3006 "/Variant", NULL
, NULL
,
3007 "/Variant-", NULL
, NULL
,
3008 "-/Variant", NULL
, NULL
,
3013 const int32_t DATA_length
= sizeof(DATA
)/sizeof(DATA
[0]);
3015 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
3016 const char* ID
= DATA
[i
];
3017 const char* expID
= DATA
[i
+1];
3018 const char* expInvID
= DATA
[i
+2];
3019 UBool expValid
= (expInvID
!= NULL
);
3020 if (expID
== NULL
) {
3024 UErrorCode ec
= U_ZERO_ERROR
;
3026 Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
);
3027 if (U_FAILURE(ec
)) {
3029 logln((UnicodeString
)"Ok: getInstance(" + ID
+") => " + u_errorName(ec
));
3031 errln((UnicodeString
)"FAIL: Couldn't create " + ID
);
3036 Transliterator
*u
= t
->createInverse(ec
);
3037 if (U_FAILURE(ec
)) {
3038 errln((UnicodeString
)"FAIL: Couldn't create inverse of " + ID
);
3043 if (t
->getID() == expID
&&
3044 u
->getID() == expInvID
) {
3045 logln((UnicodeString
)"Ok: " + ID
+ ".getInverse() => " + expInvID
);
3047 errln((UnicodeString
)"FAIL: getInstance(" + ID
+ ") => " +
3048 t
->getID() + " x getInverse() => " + u
->getID() +
3049 ", expected " + expInvID
);
3056 static const UChar SPACE
[] = {32,0};
3057 static const UChar NEWLINE
[] = {10,0};
3058 static const UChar RETURN
[] = {13,0};
3059 static const UChar EMPTY
[] = {0};
3061 void TransliteratorTest::checkRules(const UnicodeString
& label
, Transliterator
& t2
,
3062 const UnicodeString
& testRulesForward
) {
3063 UnicodeString rules2
; t2
.toRules(rules2
, TRUE
);
3064 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3065 rules2
.findAndReplace(SPACE
, EMPTY
);
3066 rules2
.findAndReplace(NEWLINE
, EMPTY
);
3067 rules2
.findAndReplace(RETURN
, EMPTY
);
3069 UnicodeString
testRules(testRulesForward
); testRules
.findAndReplace(SPACE
, EMPTY
);
3071 if (rules2
!= testRules
) {
3073 logln((UnicodeString
)"GENERATED RULES: " + rules2
);
3074 logln((UnicodeString
)"SHOULD BE: " + testRulesForward
);
3079 * Mark's toRules test.
3081 void TransliteratorTest::TestToRulesMark() {
3082 const char* testRules
=
3083 "::[[:Latin:][:Mark:]];"
3086 "a <> \\u03B1;" // alpha
3090 "::([[:Greek:][:Mark:]]);"
3092 const char* testRulesForward
=
3093 "::[[:Latin:][:Mark:]];"
3101 const char* testRulesBackward
=
3102 "::[[:Greek:][:Mark:]];"
3109 UnicodeString source
= CharsToUnicodeString("\\u00E1"); // a-acute
3110 UnicodeString target
= CharsToUnicodeString("\\u03AC"); // alpha-acute
3113 UErrorCode ec
= U_ZERO_ERROR
;
3114 Transliterator
*t2
= Transliterator::createFromRules("source-target", testRules
, UTRANS_FORWARD
, pe
, ec
);
3115 Transliterator
*t3
= Transliterator::createFromRules("target-source", testRules
, UTRANS_REVERSE
, pe
, ec
);
3117 if (U_FAILURE(ec
)) {
3120 errln((UnicodeString
)"FAIL: createFromRules => " + u_errorName(ec
));
3124 expect(*t2
, source
, target
);
3125 expect(*t3
, target
, source
);
3127 checkRules("Failed toRules FORWARD", *t2
, testRulesForward
);
3128 checkRules("Failed toRules BACKWARD", *t3
, testRulesBackward
);
3135 * Test Escape and Unescape transliterators.
3137 void TransliteratorTest::TestEscape() {
3143 t
= Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, pe
, ec
);
3144 if (U_FAILURE(ec
)) {
3145 errln((UnicodeString
)"FAIL: createInstance");
3148 "\\x{40}\\U000000312Q",
3154 t
= Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD
, pe
, ec
);
3155 if (U_FAILURE(ec
)) {
3156 errln((UnicodeString
)"FAIL: createInstance");
3159 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3160 "\\u0041\\U0010BEEF\\uFEED");
3165 t
= Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD
, pe
, ec
);
3166 if (U_FAILURE(ec
)) {
3167 errln((UnicodeString
)"FAIL: createInstance");
3170 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3171 "\\u0041\\uDBEF\\uDEEF\\uFEED");
3176 t
= Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD
, pe
, ec
);
3177 if (U_FAILURE(ec
)) {
3178 errln((UnicodeString
)"FAIL: createInstance");
3181 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3182 "\\x{41}\\x{10BEEF}\\x{FEED}");
3188 void TransliteratorTest::TestAnchorMasking(){
3189 UnicodeString
rule ("^a > Q; a > q;");
3190 UErrorCode status
= U_ZERO_ERROR
;
3191 UParseError parseError
;
3193 Transliterator
* t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
,parseError
,status
);
3194 if(U_FAILURE(status
)){
3195 errln(UnicodeString("FAIL: ") + "ID" +
3196 ".createFromRules() => bad rules" +
3197 /*", parse error " + parseError.code +*/
3198 ", line " + parseError
.line
+
3199 ", offset " + parseError
.offset
+
3200 ", context " + prettify(parseError
.preContext
, TRUE
) +
3201 ", rules: " + prettify(rule
, TRUE
));
3207 * Make sure display names of variants look reasonable.
3209 void TransliteratorTest::TestDisplayName() {
3210 #if UCONFIG_NO_FORMATTING
3211 logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3214 static const char* DATA
[] = {
3215 // ID, forward name, reverse name
3216 // Update the text as necessary -- the important thing is
3217 // not the text itself, but how various cases are handled.
3220 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3223 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3226 "NFC", "Any to NFC", "Any to NFD",
3229 int32_t DATA_length
= sizeof(DATA
) / sizeof(DATA
[0]);
3231 Locale
US("en", "US");
3233 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
3235 Transliterator::getDisplayName(DATA
[i
], US
, name
);
3236 if (name
!= DATA
[i
+1]) {
3237 errln((UnicodeString
)"FAIL: " + DATA
[i
] + ".getDisplayName() => " +
3238 name
+ ", expected " + DATA
[i
+1]);
3240 logln((UnicodeString
)"Ok: " + DATA
[i
] + ".getDisplayName() => " + name
);
3242 UErrorCode ec
= U_ZERO_ERROR
;
3244 Transliterator
*t
= Transliterator::createInstance(DATA
[i
], UTRANS_REVERSE
, pe
, ec
);
3245 if (U_FAILURE(ec
)) {
3247 errln("FAIL: createInstance failed");
3250 name
= Transliterator::getDisplayName(t
->getID(), US
, name
);
3251 if (name
!= DATA
[i
+2]) {
3252 errln((UnicodeString
)"FAIL: " + t
->getID() + ".getDisplayName() => " +
3253 name
+ ", expected " + DATA
[i
+2]);
3255 logln((UnicodeString
)"Ok: " + t
->getID() + ".getDisplayName() => " + name
);
3262 void TransliteratorTest::TestSpecialCases(void) {
3263 const UnicodeString registerRules
[] = {
3264 "Any-Dev1", "x > X; y > Y;",
3265 "Any-Dev2", "XY > Z",
3267 CharsToUnicodeString
3268 ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3272 const UnicodeString testCases
[] = {
3274 // should add more test cases
3275 "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3276 "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3277 "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3278 "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3281 "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3282 "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3284 // check for devanagari bug
3285 "nfd;Dev1;Dev2;nfc", "xy", "Z",
3287 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3288 "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
,
3289 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE
+ DESERET_dee
,
3291 //TODO: enable this test once Titlecase works right
3293 "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3294 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3296 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
,
3297 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE
+ DESERET_DEE
,
3298 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
,
3299 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee
+ DESERET_dee
,
3301 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
, "",
3302 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
, "",
3305 "Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3306 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3307 "Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3308 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3309 "Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3310 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3311 "Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3312 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3314 // Upper: TAT\\u02B9\\u00C2NA
3315 // Lower: tat\\u02B9\\u00E2na
3316 // Title: Tat\\u02B9\\u00E2na
3317 "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3318 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3319 "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3320 CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3321 "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3322 CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3329 for (i
= 0; registerRules
[i
].length()!=0; i
+=2) {
3330 UErrorCode status
= U_ZERO_ERROR
;
3332 Transliterator
*t
= Transliterator::createFromRules(registerRules
[0+i
],
3333 registerRules
[i
+1], UTRANS_FORWARD
, pos
, status
);
3334 if (U_FAILURE(status
)) {
3335 errln("Fails: Unable to create the transliterator from rules.");
3337 Transliterator::registerInstance(t
);
3340 for (i
= 0; testCases
[i
].length()!=0; i
+=3) {
3341 UErrorCode ec
= U_ZERO_ERROR
;
3343 const UnicodeString
& name
= testCases
[i
];
3344 Transliterator
*t
= Transliterator::createInstance(name
, UTRANS_FORWARD
, pe
, ec
);
3345 if (U_FAILURE(ec
)) {
3346 errln((UnicodeString
)"FAIL: Couldn't create " + name
);
3350 const UnicodeString
& id
= t
->getID();
3351 const UnicodeString
& source
= testCases
[i
+1];
3352 UnicodeString target
;
3354 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3356 if (testCases
[i
+2].length() > 0) {
3357 target
= testCases
[i
+2];
3358 } else if (0==id
.caseCompare("NFD", U_FOLD_CASE_DEFAULT
)) {
3359 Normalizer::normalize(source
, UNORM_NFD
, 0, target
, ec
);
3360 } else if (0==id
.caseCompare("NFC", U_FOLD_CASE_DEFAULT
)) {
3361 Normalizer::normalize(source
, UNORM_NFC
, 0, target
, ec
);
3362 } else if (0==id
.caseCompare("NFKD", U_FOLD_CASE_DEFAULT
)) {
3363 Normalizer::normalize(source
, UNORM_NFKD
, 0, target
, ec
);
3364 } else if (0==id
.caseCompare("NFKC", U_FOLD_CASE_DEFAULT
)) {
3365 Normalizer::normalize(source
, UNORM_NFKC
, 0, target
, ec
);
3366 } else if (0==id
.caseCompare("Lower", U_FOLD_CASE_DEFAULT
)) {
3368 target
.toLower(Locale::getUS());
3369 } else if (0==id
.caseCompare("Upper", U_FOLD_CASE_DEFAULT
)) {
3371 target
.toUpper(Locale::getUS());
3373 if (U_FAILURE(ec
)) {
3374 errln((UnicodeString
)"FAIL: Internal error normalizing " + source
);
3378 expect(*t
, source
, target
);
3381 for (i
= 0; registerRules
[i
].length()!=0; i
+=2) {
3382 Transliterator::unregister(registerRules
[i
]);
3386 char* Char32ToEscapedChars(UChar32 ch
, char* buffer
) {
3388 sprintf(buffer
, "\\u%04x", (int)ch
);
3390 sprintf(buffer
, "\\U%08x", (int)ch
);
3395 void TransliteratorTest::TestSurrogateCasing (void) {
3396 // check that casing handles surrogates
3397 // titlecase is currently defective
3401 UTF_GET_CHAR(DESERET_dee
,0, 0, DESERET_dee
.length(), dee
);
3402 UnicodeString
DEE(u_totitle(dee
));
3403 if (DEE
!= DESERET_DEE
) {
3404 err("Fails titlecase of surrogates");
3405 err(Char32ToEscapedChars(dee
, buffer
));
3407 errln(Char32ToEscapedChars(DEE
.char32At(0), buffer
));
3410 UnicodeString deeDEETest
=DESERET_dee
+ DESERET_DEE
;
3411 UnicodeString deedeeTest
= DESERET_dee
+ DESERET_dee
;
3412 UnicodeString DEEDEETest
= DESERET_DEE
+ DESERET_DEE
;
3413 UErrorCode status
= U_ZERO_ERROR
;
3415 u_strToUpper(buffer2
, 20, deeDEETest
.getBuffer(), deeDEETest
.length(), NULL
, &status
);
3416 if (U_FAILURE(status
) || (UnicodeString(buffer2
)!= DEEDEETest
)) {
3417 errln("Fails: Can't uppercase surrogates.");
3420 status
= U_ZERO_ERROR
;
3421 u_strToLower(buffer2
, 20, deeDEETest
.getBuffer(), deeDEETest
.length(), NULL
, &status
);
3422 if (U_FAILURE(status
) || (UnicodeString(buffer2
)!= deedeeTest
)) {
3423 errln("Fails: Can't lowercase surrogates.");
3427 static void _trans(Transliterator
& t
, const UnicodeString
& src
,
3428 UnicodeString
& result
) {
3430 t
.transliterate(result
);
3433 static void _trans(const UnicodeString
& id
, const UnicodeString
& src
,
3434 UnicodeString
& result
, UErrorCode ec
) {
3436 Transliterator
*t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, pe
, ec
);
3437 if (U_SUCCESS(ec
)) {
3438 _trans(*t
, src
, result
);
3443 static UnicodeString
_findMatch(const UnicodeString
& source
,
3444 const UnicodeString
* pairs
) {
3445 UnicodeString empty
;
3446 for (int32_t i
=0; pairs
[i
].length() > 0; i
+=2) {
3447 if (0==source
.caseCompare(pairs
[i
], U_FOLD_CASE_DEFAULT
)) {
3454 // Check to see that incremental gets at least part way through a reasonable string.
3456 void TransliteratorTest::TestIncrementalProgress(void) {
3457 UErrorCode ec
= U_ZERO_ERROR
;
3458 UnicodeString latinTest
= "The Quick Brown Fox.";
3459 UnicodeString devaTest
;
3460 _trans("Latin-Devanagari", latinTest
, devaTest
, ec
);
3461 UnicodeString kataTest
;
3462 _trans("Latin-Katakana", latinTest
, kataTest
, ec
);
3463 if (U_FAILURE(ec
)) {
3464 errln("FAIL: Internal error");
3467 const UnicodeString tests
[] = {
3470 "Halfwidth", latinTest
,
3471 "Devanagari", devaTest
,
3472 "Katakana", kataTest
,
3476 UnicodeString
test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3477 int32_t i
= 0, j
=0, k
=0;
3478 int32_t sources
= Transliterator::countAvailableSources();
3479 for (i
= 0; i
< sources
; i
++) {
3480 UnicodeString source
;
3481 Transliterator::getAvailableSource(i
, source
);
3482 UnicodeString test
= _findMatch(source
, tests
);
3483 if (test
.length() == 0) {
3484 logln((UnicodeString
)"Skipping " + source
+ "-X");
3487 int32_t targets
= Transliterator::countAvailableTargets(source
);
3488 for (j
= 0; j
< targets
; j
++) {
3489 UnicodeString target
;
3490 Transliterator::getAvailableTarget(j
, source
, target
);
3491 int32_t variants
= Transliterator::countAvailableVariants(source
, target
);
3492 for (k
=0; k
< variants
; k
++) {
3493 UnicodeString variant
;
3495 UErrorCode status
= U_ZERO_ERROR
;
3497 Transliterator::getAvailableVariant(k
, source
, target
, variant
);
3498 UnicodeString id
= source
+ "-" + target
+ "/" + variant
;
3500 if(id
.indexOf("Thai")>-1 && !isICUVersionAtLeast(ICU_37
)){
3501 /* The Thai-Latin transliterator doesn't exist in ICU4C yet */
3504 Transliterator
*t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, err
, status
);
3505 if (U_FAILURE(status
)) {
3506 errln((UnicodeString
)"FAIL: Could not create " + id
);
3510 status
= U_ZERO_ERROR
;
3511 CheckIncrementalAux(t
, test
);
3514 _trans(*t
, test
, rev
);
3515 Transliterator
*inv
= t
->createInverse(status
);
3516 if (U_FAILURE(status
)) {
3517 errln((UnicodeString
)"FAIL: Could not create inverse of " + id
);
3522 CheckIncrementalAux(inv
, rev
);
3530 void TransliteratorTest::CheckIncrementalAux(const Transliterator
* t
,
3531 const UnicodeString
& input
) {
3532 UErrorCode ec
= U_ZERO_ERROR
;
3534 UnicodeString test
= input
;
3536 pos
.contextStart
= 0;
3537 pos
.contextLimit
= input
.length();
3539 pos
.limit
= input
.length();
3541 t
->transliterate(test
, pos
, ec
);
3542 if (U_FAILURE(ec
)) {
3543 errln((UnicodeString
)"FAIL: transliterate() error " + u_errorName(ec
));
3546 UBool gotError
= FALSE
;
3548 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3550 if (pos
.start
== 0 && pos
.limit
!= 0 && t
->getID() != "Hex-Any/Unicode") {
3551 errln((UnicodeString
)"No Progress, " +
3552 t
->getID() + ": " + formatInput(test
, input
, pos
));
3555 logln((UnicodeString
)"PASS Progress, " +
3556 t
->getID() + ": " + formatInput(test
, input
, pos
));
3558 t
->finishTransliteration(test
, pos
);
3559 if (pos
.start
!= pos
.limit
) {
3560 errln((UnicodeString
)"Incomplete, " +
3561 t
->getID() + ": " + formatInput(test
, input
, pos
));
3566 void TransliteratorTest::TestFunction() {
3567 // Careful with spacing and ';' here: Phrase this exactly
3568 // as toRules() is going to return it. If toRules() changes
3569 // with regard to spacing or ';', then adjust this string.
3570 UnicodeString rule
=
3571 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3574 UErrorCode ec
= U_ZERO_ERROR
;
3575 Transliterator
*t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3577 errln("FAIL: createFromRules failed");
3582 t
->toRules(r
, TRUE
);
3584 logln((UnicodeString
)"OK: toRules() => " + r
);
3586 errln((UnicodeString
)"FAIL: toRules() => " + r
+
3587 ", expected " + rule
);
3590 expect(*t
, "The Quick Brown Fox",
3591 "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox");
3596 void TransliteratorTest::TestInvalidBackRef(void) {
3597 UnicodeString rule
= ". > $1;";
3598 UnicodeString rule2
=CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3600 UErrorCode ec
= U_ZERO_ERROR
;
3601 Transliterator
*t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3602 Transliterator
*t2
= Transliterator::createFromRules("Test2", rule2
, UTRANS_FORWARD
, pe
, ec
);
3605 errln("FAIL: createFromRules should have returned NULL");
3610 errln("FAIL: createFromRules should have returned NULL");
3614 if (U_SUCCESS(ec
)) {
3615 errln("FAIL: Ok: . > $1; => no error");
3617 logln((UnicodeString
)"Ok: . > $1; => " + u_errorName(ec
));
3621 void TransliteratorTest::TestMulticharStringSet() {
3628 " e } [{fg}] > r;" ;
3631 UErrorCode ec
= U_ZERO_ERROR
;
3632 Transliterator
* t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3633 if (t
== NULL
|| U_FAILURE(ec
)) {
3635 errln("FAIL: createFromRules failed");
3639 expect(*t
, "a aa ab bc d gd de gde gdefg ddefg",
3640 "y x yz z d gd de gdq gdqfg ddrfg");
3643 // Overlapped string test. Make sure that when multiple
3644 // strings can match that the longest one is matched.
3646 " [a {ab} {abc}] > x;"
3649 " q [t {st} {rst}] { e > p;" ;
3651 t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3652 if (t
== NULL
|| U_FAILURE(ec
)) {
3654 errln("FAIL: createFromRules failed");
3658 expect(*t
, "a ab abc qte qste qrste",
3659 "x x x qtp qstp qrstp");
3663 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3664 // BEGIN TestUserFunction support factory
3666 Transliterator
* _TUFF
[4];
3667 UnicodeString
* _TUFID
[4];
3669 static Transliterator
* U_EXPORT2
_TUFFactory(const UnicodeString
& /*ID*/,
3670 Transliterator::Token context
) {
3671 return _TUFF
[context
.integer
]->clone();
3674 static void _TUFReg(const UnicodeString
& ID
, Transliterator
* t
, int32_t n
) {
3676 _TUFID
[n
] = new UnicodeString(ID
);
3677 Transliterator::registerFactory(ID
, _TUFFactory
, Transliterator::integerToken(n
));
3680 static void _TUFUnreg(int32_t n
) {
3681 if (_TUFF
[n
] != NULL
) {
3682 Transliterator::unregister(*_TUFID
[n
]);
3688 // END TestUserFunction support factory
3689 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3692 * Test that user-registered transliterators can be used under function
3695 void TransliteratorTest::TestUserFunction() {
3699 UErrorCode ec
= U_ZERO_ERROR
;
3701 // Setup our factory
3703 for (i
=0; i
<4; ++i
) {
3707 // There's no need to register inverses if we don't use them
3708 t
= Transliterator::createFromRules("gif",
3709 "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",
3710 UTRANS_FORWARD
, pe
, ec
);
3711 if (t
== NULL
|| U_FAILURE(ec
)) {
3712 errln((UnicodeString
)"FAIL: createFromRules gif " + u_errorName(ec
));
3715 _TUFReg("Any-gif", t
, 0);
3717 t
= Transliterator::createFromRules("RemoveCurly",
3718 "[\\{\\}] > ; '\\N' > ;",
3719 UTRANS_FORWARD
, pe
, ec
);
3720 if (t
== NULL
|| U_FAILURE(ec
)) {
3721 errln((UnicodeString
)"FAIL: createFromRules RemoveCurly " + u_errorName(ec
));
3724 expect(*t
, "\\N{name}", "name");
3725 _TUFReg("Any-RemoveCurly", t
, 1);
3727 logln("Trying &hex");
3728 t
= Transliterator::createFromRules("hex2",
3730 UTRANS_FORWARD
, pe
, ec
);
3731 if (t
== NULL
|| U_FAILURE(ec
)) {
3732 errln("FAIL: createFromRules");
3735 logln("Registering");
3736 _TUFReg("Any-hex2", t
, 2);
3737 t
= Transliterator::createInstance("Any-hex2", UTRANS_FORWARD
, ec
);
3738 if (t
== NULL
|| U_FAILURE(ec
)) {
3739 errln((UnicodeString
)"FAIL: createInstance Any-hex2 " + u_errorName(ec
));
3742 expect(*t
, "abc", "\\u0061\\u0062\\u0063");
3745 logln("Trying &gif");
3746 t
= Transliterator::createFromRules("gif2",
3747 "(.) > &Gif(&Hex2($1));",
3748 UTRANS_FORWARD
, pe
, ec
);
3749 if (t
== NULL
|| U_FAILURE(ec
)) {
3750 errln((UnicodeString
)"FAIL: createFromRules gif2 " + u_errorName(ec
));
3753 logln("Registering");
3754 _TUFReg("Any-gif2", t
, 3);
3755 t
= Transliterator::createInstance("Any-gif2", UTRANS_FORWARD
, ec
);
3756 if (t
== NULL
|| U_FAILURE(ec
)) {
3757 errln((UnicodeString
)"FAIL: createInstance Any-gif2 " + u_errorName(ec
));
3760 expect(*t
, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3761 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3764 // Test that filters are allowed after &
3765 t
= Transliterator::createFromRules("test",
3766 "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3767 UTRANS_FORWARD
, pe
, ec
);
3768 if (t
== NULL
|| U_FAILURE(ec
)) {
3769 errln((UnicodeString
)"FAIL: createFromRules test " + u_errorName(ec
));
3773 "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");
3777 for (i
=0; i
<4; ++i
) {
3783 * Test the Any-X transliterators.
3785 void TransliteratorTest::TestAnyX(void) {
3786 UParseError parseError
;
3787 UErrorCode status
= U_ZERO_ERROR
;
3788 Transliterator
* anyLatin
=
3789 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD
, parseError
, status
);
3791 errln("FAIL: createInstance returned NULL");
3797 CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3798 CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3804 * Test the source and target set API. These are only implemented
3805 * for RBT and CompoundTransliterator at this time.
3807 void TransliteratorTest::TestSourceTargetSet() {
3808 UErrorCode ec
= U_ZERO_ERROR
;
3816 UnicodeSet
expSrc("[arx{lu}]", ec
);
3819 UnicodeSet
expTrg("[bq]", ec
);
3822 Transliterator
* t
= Transliterator::createFromRules("test", r
, UTRANS_FORWARD
, pe
, ec
);
3824 if (U_FAILURE(ec
)) {
3826 errln("FAIL: Couldn't set up test");
3830 UnicodeSet src
; t
->getSourceSet(src
);
3831 UnicodeSet trg
; t
->getTargetSet(trg
);
3833 if (src
== expSrc
&& trg
== expTrg
) {
3835 logln((UnicodeString
)"Ok: " +
3836 r
+ " => source = " + src
.toPattern(a
, TRUE
) +
3837 ", target = " + trg
.toPattern(b
, TRUE
));
3839 UnicodeString a
, b
, c
, d
;
3840 errln((UnicodeString
)"FAIL: " +
3841 r
+ " => source = " + src
.toPattern(a
, TRUE
) +
3842 ", expected " + expSrc
.toPattern(b
, TRUE
) +
3843 "; target = " + trg
.toPattern(c
, TRUE
) +
3844 ", expected " + expTrg
.toPattern(d
, TRUE
));
3851 * Test handling of rule whitespace, for both RBT and UnicodeSet.
3853 void TransliteratorTest::TestRuleWhitespace() {
3855 const char* r
= "a > \\u200E b;";
3857 UErrorCode ec
= U_ZERO_ERROR
;
3859 Transliterator
* t
= Transliterator::createFromRules("test", CharsToUnicodeString(r
), UTRANS_FORWARD
, pe
, ec
);
3861 if (U_FAILURE(ec
)) {
3862 errln("FAIL: Couldn't set up test");
3864 expect(*t
, "a", "b");
3870 UnicodeSet
set(CharsToUnicodeString("[a \\u200E]"), ec
);
3872 if (U_FAILURE(ec
)) {
3873 errln("FAIL: Couldn't set up test");
3875 if (set
.contains(0x200E)) {
3876 errln("FAIL: U+200E not being ignored by UnicodeSet");
3880 //======================================================================
3881 // this method is in TestUScript.java
3882 //======================================================================
3883 void TransliteratorTest::TestAllCodepoints(){
3884 UScriptCode code
= USCRIPT_INVALID_CODE
;
3885 char id
[256]={'\0'};
3886 char abbr
[256]={'\0'};
3887 char newId
[256]={'\0'};
3888 char newAbbrId
[256]={'\0'};
3889 char oldId
[256]={'\0'};
3890 char oldAbbrId
[256]={'\0'};
3892 UErrorCode status
=U_ZERO_ERROR
;
3895 for(uint32_t i
= 0; i
<=0x10ffff; i
++){
3896 code
= uscript_getScript(i
,&status
);
3897 if(code
== USCRIPT_INVALID_CODE
){
3898 errln("uscript_getScript for codepoint \\U%08X failed.\n", i
);
3900 const char* myId
= uscript_getName(code
);
3902 errln("Valid script code returned NULL name. Check your data!");
3905 uprv_strcpy(id
,myId
);
3906 uprv_strcpy(abbr
,uscript_getShortName(code
));
3908 uprv_strcpy(newId
,"[:");
3909 uprv_strcat(newId
,id
);
3910 uprv_strcat(newId
,":];NFD");
3912 uprv_strcpy(newAbbrId
,"[:");
3913 uprv_strcat(newAbbrId
,abbr
);
3914 uprv_strcat(newAbbrId
,":];NFD");
3916 if(uprv_strcmp(newId
,oldId
)!=0){
3917 Transliterator
* t
= Transliterator::createInstance(newId
,UTRANS_FORWARD
,pe
,status
);
3918 if(t
==NULL
|| U_FAILURE(status
)){
3919 errln((UnicodeString
)"FAIL: Could not create " + id
);
3923 if(uprv_strcmp(newAbbrId
,oldAbbrId
)!=0){
3924 Transliterator
* t
= Transliterator::createInstance(newAbbrId
,UTRANS_FORWARD
,pe
,status
);
3925 if(t
==NULL
|| U_FAILURE(status
)){
3926 errln((UnicodeString
)"FAIL: Could not create " + id
);
3930 uprv_strcpy(oldId
,newId
);
3931 uprv_strcpy(oldAbbrId
, newAbbrId
);
3937 #define TEST_TRANSLIT_ID(id, cls) { \
3938 UErrorCode ec = U_ZERO_ERROR; \
3939 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
3940 if (U_FAILURE(ec)) { \
3941 errln("FAIL: Couldn't create " id); \
3943 if (t->getDynamicClassID() != cls::getStaticClassID()) { \
3944 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
3946 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
3951 #define TEST_TRANSLIT_RULE(rule, cls) { \
3952 UErrorCode ec = U_ZERO_ERROR; \
3954 Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
3955 if (U_FAILURE(ec)) { \
3956 errln("FAIL: Couldn't create " rule); \
3958 if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
3959 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
3961 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
3966 void TransliteratorTest::TestBoilerplate() {
3967 TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator
);
3968 TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator
);
3969 TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator
);
3970 TEST_TRANSLIT_ID("Lower", LowercaseTransliterator
);
3971 TEST_TRANSLIT_ID("Upper", UppercaseTransliterator
);
3972 TEST_TRANSLIT_ID("Title", TitlecaseTransliterator
);
3973 TEST_TRANSLIT_ID("Null", NullTransliterator
);
3974 TEST_TRANSLIT_ID("Remove", RemoveTransliterator
);
3975 TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator
);
3976 TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator
);
3977 TEST_TRANSLIT_ID("NFD", NormalizationTransliterator
);
3978 TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator
);
3979 TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator
);
3982 void TransliteratorTest::TestAlternateSyntax() {
3987 expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
3990 expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
3991 CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
3992 "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}");
3995 static const char* BEGIN_END_RULES
[] = {
4009 "", // test case commented out below, this is here to keep from messing up the indexes
4018 "", // test case commented out below, this is here to keep from messing up the indexes
4027 "", // test case commented out below, this is here to keep from messing up the indexes
4046 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4047 "$delim = [\\-$ws];"
4048 "$ws $delim* > ' ';"
4049 "'-' $delim* > '-';",
4053 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4054 "$delim = [\\-$ws];"
4055 "$ws $delim* > ' ';"
4056 "'-' $delim* > '-';",
4059 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4060 "$delim = [\\-$ws];"
4061 "$ws $delim* > ' ';"
4062 "'-' $delim* > '-';"
4066 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4067 "$delim = [\\-$ws];"
4069 "$ws $delim* > ' ';"
4070 "'-' $delim* > '-';",
4075 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4076 "$delim = [\\-$ws];"
4078 "$ws $delim* > ' ';"
4079 "'-' $delim* > '-';",
4081 "", // test case commented out below, this is here to keep from messing up the indexes
4085 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4086 "$delim = [\\-$ws];"
4088 "$ws $delim* > ' ';"
4089 "'-' $delim* > '-';"
4092 "", // test case commented out below, this is here to keep from messing up the indexes
4096 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4097 "$delim = [\\-$ws];"
4100 "$ws $delim* > ' ';"
4101 "'-' $delim* > '-';"
4104 "$ab { ' ' } $ab > '-';"
4111 "", // test case commented out below, this is here to keep from messing up the indexes
4114 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4115 "$delim = [\\-$ws];"
4118 "$ws $delim* > ' ';"
4119 "'-' $delim* > '-';"
4121 "$ab { ' ' } $ab > '-';"
4137 "", // test case commented out below, this is here to keep from messing up the indexes
4158 "", // test case commented out below, this is here to keep from messing up the indexes
4168 static const int32_t BEGIN_END_RULES_length
= (int32_t)(sizeof(BEGIN_END_RULES
) / sizeof(BEGIN_END_RULES
[0]));
4171 (This entire test is commented out below and will need some heavy revision when we re-add
4172 the ::BEGIN/::END stuff)
4173 static const char* BOGUS_BEGIN_END_RULES[] = {
4192 static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
4195 static const char* BEGIN_END_TEST_CASES
[] = {
4196 // rules input expected output
4197 BEGIN_END_RULES
[0], "abc ababc aba", "xy zbc z",
4198 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
4199 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
4200 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
4201 BEGIN_END_RULES
[4], "abc ababc aba", "xy abxy z",
4202 BEGIN_END_RULES
[5], "abccabaacababcbc", "PXAARXQBR",
4204 BEGIN_END_RULES
[6], "e e - e---e- e", "e e e-e-e",
4205 BEGIN_END_RULES
[7], "e e - e---e- e", "e e e-e-e",
4206 BEGIN_END_RULES
[8], "e e - e---e- e", "e e e-e-e",
4207 BEGIN_END_RULES
[9], "e e - e---e- e", "e e e-e-e",
4208 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e",
4209 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e",
4210 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e",
4211 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a",
4212 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
4213 BEGIN_END_RULES
[13], "e e - e---e- e", "e e e-e-e",
4214 BEGIN_END_RULES
[13], "a a a a", "a%a%a%a",
4215 BEGIN_END_RULES
[13], "a a-b c b a", "a%a-b cb-a",
4217 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4218 BEGIN_END_RULES
[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4219 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4220 BEGIN_END_RULES
[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4222 static const int32_t BEGIN_END_TEST_CASES_length
= (int32_t)(sizeof(BEGIN_END_TEST_CASES
) / sizeof(BEGIN_END_TEST_CASES
[0]));
4224 void TransliteratorTest::TestBeginEnd() {
4225 // run through the list of test cases above
4227 for (i
= 0; i
< BEGIN_END_TEST_CASES_length
; i
+= 3) {
4228 expect((UnicodeString
)"Test case #" + (i
/ 3),
4229 UnicodeString(BEGIN_END_TEST_CASES
[i
]),
4230 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 1]),
4231 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 2]));
4234 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4235 UParseError parseError
;
4236 UErrorCode status
= U_ZERO_ERROR
;
4237 Transliterator
* reversed
= Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES
[17]),
4238 UTRANS_REVERSE
, parseError
, status
);
4239 if (reversed
== 0 || U_FAILURE(status
)) {
4240 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError
, status
);
4242 expect(*reversed
, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4246 // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4247 // that all of them cause errors
4249 (commented out until we have the real ::BEGIN/::END stuff in place
4250 for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4251 UParseError parseError;
4252 UErrorCode status = U_ZERO_ERROR;
4253 Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4254 UTRANS_FORWARD, parseError, status);
4255 if (!U_FAILURE(status)) {
4257 errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4263 void TransliteratorTest::TestBeginEndToRules() {
4264 // run through the same list of test cases we used above, but this time, instead of just
4265 // instantiating a Transliterator from the rules and running the test against it, we instantiate
4266 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4267 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4268 // to (i.e., does the same thing as) the original rule set
4269 for (int32_t i
= 0; i
< BEGIN_END_TEST_CASES_length
; i
+= 3) {
4270 UParseError parseError
;
4271 UErrorCode status
= U_ZERO_ERROR
;
4272 Transliterator
* t
= Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES
[i
]),
4273 UTRANS_FORWARD
, parseError
, status
);
4274 if (U_FAILURE(status
)) {
4275 reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError
, status
);
4277 UnicodeString rules
;
4278 t
->toRules(rules
, TRUE
);
4279 Transliterator
* t2
= Transliterator::createFromRules((UnicodeString
)"Test case #" + (i
/ 3), rules
,
4280 UTRANS_FORWARD
, parseError
, status
);
4281 if (U_FAILURE(status
)) {
4282 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4283 parseError
, status
);
4287 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 1]),
4288 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 2]));
4295 // do the same thing for the reversible test case
4296 UParseError parseError
;
4297 UErrorCode status
= U_ZERO_ERROR
;
4298 Transliterator
* reversed
= Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES
[17]),
4299 UTRANS_REVERSE
, parseError
, status
);
4300 if (U_FAILURE(status
)) {
4301 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError
, status
);
4303 UnicodeString rules
;
4304 reversed
->toRules(rules
, FALSE
);
4305 Transliterator
* reversed2
= Transliterator::createFromRules("Reversed", rules
, UTRANS_FORWARD
,
4306 parseError
, status
);
4307 if (U_FAILURE(status
)) {
4308 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4309 parseError
, status
);
4313 UnicodeString("xy XY XYZ yz YZ"),
4314 UnicodeString("xy abc xaba yz aba"));
4321 void TransliteratorTest::TestRegisterAlias() {
4322 UnicodeString
longID("Lower;[aeiou]Upper");
4323 UnicodeString
shortID("Any-CapVowels");
4324 UnicodeString
reallyShortID("CapVowels");
4326 Transliterator::registerAlias(shortID
, longID
);
4328 UErrorCode err
= U_ZERO_ERROR
;
4329 Transliterator
* t1
= Transliterator::createInstance(longID
, UTRANS_FORWARD
, err
);
4330 if (U_FAILURE(err
)) {
4331 errln("Failed to instantiate transliterator with long ID");
4332 Transliterator::unregister(shortID
);
4335 Transliterator
* t2
= Transliterator::createInstance(reallyShortID
, UTRANS_FORWARD
, err
);
4336 if (U_FAILURE(err
)) {
4337 errln("Failed to instantiate transliterator with short ID");
4339 Transliterator::unregister(shortID
);
4343 if (t1
->getID() != longID
)
4344 errln("Transliterator instantiated with long ID doesn't have long ID");
4345 if (t2
->getID() != reallyShortID
)
4346 errln("Transliterator instantiated with short ID doesn't have short ID");
4348 UnicodeString rules1
;
4349 UnicodeString rules2
;
4351 t1
->toRules(rules1
, TRUE
);
4352 t2
->toRules(rules2
, TRUE
);
4353 if (rules1
!= rules2
)
4354 errln("Alias transliterators aren't the same");
4358 Transliterator::unregister(shortID
);
4360 t1
= Transliterator::createInstance(shortID
, UTRANS_FORWARD
, err
);
4361 if (U_SUCCESS(err
)) {
4362 errln("Instantiation with short ID succeeded after short ID was unregistered");
4366 // try the same thing again, but this time with something other than
4367 // an instance of CompoundTransliterator
4368 UnicodeString
realID("Latin-Greek");
4369 UnicodeString
fakeID("Latin-dlgkjdflkjdl");
4370 Transliterator::registerAlias(fakeID
, realID
);
4373 t1
= Transliterator::createInstance(realID
, UTRANS_FORWARD
, err
);
4374 if (U_FAILURE(err
)) {
4375 errln("Failed to instantiate transliterator with real ID");
4376 Transliterator::unregister(realID
);
4379 t2
= Transliterator::createInstance(fakeID
, UTRANS_FORWARD
, err
);
4380 if (U_FAILURE(err
)) {
4381 errln("Failed to instantiate transliterator with fake ID");
4383 Transliterator::unregister(realID
);
4387 t1
->toRules(rules1
, TRUE
);
4388 t2
->toRules(rules2
, TRUE
);
4389 if (rules1
!= rules2
)
4390 errln("Alias transliterators aren't the same");
4394 Transliterator::unregister(fakeID
);
4397 //======================================================================
4399 //======================================================================
4400 void TransliteratorTest::expectT(const UnicodeString
& id
,
4401 const UnicodeString
& source
,
4402 const UnicodeString
& expectedResult
) {
4403 UErrorCode ec
= U_ZERO_ERROR
;
4405 Transliterator
*t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, pe
, ec
);
4406 if (U_FAILURE(ec
)) {
4407 errln((UnicodeString
)"FAIL: Could not create " + id
);
4411 expect(*t
, source
, expectedResult
);
4415 void TransliteratorTest::reportParseError(const UnicodeString
& message
,
4416 const UParseError
& parseError
,
4417 const UErrorCode
& status
) {
4419 /*", parse error " + parseError.code +*/
4420 ", line " + parseError
.line
+
4421 ", offset " + parseError
.offset
+
4422 ", pre-context " + prettify(parseError
.preContext
, TRUE
) +
4423 ", post-context " + prettify(parseError
.postContext
,TRUE
) +
4424 ", Error: " + u_errorName(status
));
4427 void TransliteratorTest::expect(const UnicodeString
& rules
,
4428 const UnicodeString
& source
,
4429 const UnicodeString
& expectedResult
,
4430 UTransPosition
*pos
) {
4431 expect("<ID>", rules
, source
, expectedResult
, pos
);
4434 void TransliteratorTest::expect(const UnicodeString
& id
,
4435 const UnicodeString
& rules
,
4436 const UnicodeString
& source
,
4437 const UnicodeString
& expectedResult
,
4438 UTransPosition
*pos
) {
4439 UErrorCode status
= U_ZERO_ERROR
;
4440 UParseError parseError
;
4441 Transliterator
* t
= Transliterator::createFromRules(id
, rules
, UTRANS_FORWARD
, parseError
, status
);
4442 if (U_FAILURE(status
)) {
4443 reportParseError(UnicodeString("Couldn't create transliterator from ") + rules
, parseError
, status
);
4445 expect(*t
, source
, expectedResult
, pos
);
4450 void TransliteratorTest::expect(const Transliterator
& t
,
4451 const UnicodeString
& source
,
4452 const UnicodeString
& expectedResult
,
4453 const Transliterator
& reverseTransliterator
) {
4454 expect(t
, source
, expectedResult
);
4455 expect(reverseTransliterator
, expectedResult
, source
);
4458 void TransliteratorTest::expect(const Transliterator
& t
,
4459 const UnicodeString
& source
,
4460 const UnicodeString
& expectedResult
,
4461 UTransPosition
*pos
) {
4463 UnicodeString
result(source
);
4464 t
.transliterate(result
);
4465 expectAux(t
.getID() + ":String", source
, result
, expectedResult
);
4467 UTransPosition index
={0, 0, 0, 0};
4472 UnicodeString
rsource(source
);
4474 t
.transliterate(rsource
);
4476 // Do it all at once -- below we do it incrementally
4477 t
.finishTransliteration(rsource
, *pos
);
4479 expectAux(t
.getID() + ":Replaceable", source
, rsource
, expectedResult
);
4481 // Test keyboard (incremental) transliteration -- this result
4482 // must be the same after we finalize (see below).
4487 formatInput(log
, rsource
, index
);
4489 UErrorCode status
= U_ZERO_ERROR
;
4490 t
.transliterate(rsource
, index
, status
);
4491 formatInput(log
, rsource
, index
);
4493 for (int32_t i
=0; i
<source
.length(); ++i
) {
4497 log
.append(source
.charAt(i
)).append(" -> ");
4498 UErrorCode status
= U_ZERO_ERROR
;
4499 t
.transliterate(rsource
, index
, source
.charAt(i
), status
);
4500 formatInput(log
, rsource
, index
);
4504 // As a final step in keyboard transliteration, we must call
4505 // transliterate to finish off any pending partial matches that
4506 // were waiting for more input.
4507 t
.finishTransliteration(rsource
, index
);
4508 log
.append(" => ").append(rsource
);
4510 expectAux(t
.getID() + ":Keyboard", log
,
4511 rsource
== expectedResult
,
4517 * @param appendTo result is appended to this param.
4518 * @param input the string being transliterated
4519 * @param pos the index struct
4521 UnicodeString
& TransliteratorTest::formatInput(UnicodeString
&appendTo
,
4522 const UnicodeString
& input
,
4523 const UTransPosition
& pos
) {
4524 // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4525 // the {} indicate the context start and limit, and the ||
4526 // indicate the start and limit.
4527 if (0 <= pos
.contextStart
&&
4528 pos
.contextStart
<= pos
.start
&&
4529 pos
.start
<= pos
.limit
&&
4530 pos
.limit
<= pos
.contextLimit
&&
4531 pos
.contextLimit
<= input
.length()) {
4533 UnicodeString a
, b
, c
, d
, e
;
4534 input
.extractBetween(0, pos
.contextStart
, a
);
4535 input
.extractBetween(pos
.contextStart
, pos
.start
, b
);
4536 input
.extractBetween(pos
.start
, pos
.limit
, c
);
4537 input
.extractBetween(pos
.limit
, pos
.contextLimit
, d
);
4538 input
.extractBetween(pos
.contextLimit
, input
.length(), e
);
4539 appendTo
.append(a
).append((UChar
)123/*{*/).append(b
).
4540 append((UChar
)PIPE
).append(c
).append((UChar
)PIPE
).append(d
).
4541 append((UChar
)125/*}*/).append(e
);
4543 appendTo
.append((UnicodeString
)"INVALID UTransPosition {cs=" +
4544 pos
.contextStart
+ ", s=" + pos
.start
+ ", l=" +
4545 pos
.limit
+ ", cl=" + pos
.contextLimit
+ "} on " +
4551 void TransliteratorTest::expectAux(const UnicodeString
& tag
,
4552 const UnicodeString
& source
,
4553 const UnicodeString
& result
,
4554 const UnicodeString
& expectedResult
) {
4555 expectAux(tag
, source
+ " -> " + result
,
4556 result
== expectedResult
,
4560 void TransliteratorTest::expectAux(const UnicodeString
& tag
,
4561 const UnicodeString
& summary
, UBool pass
,
4562 const UnicodeString
& expectedResult
) {
4564 logln(UnicodeString("(")+tag
+") " + prettify(summary
));
4566 errln(UnicodeString("FAIL: (")+tag
+") "
4568 + ", expected " + prettify(expectedResult
));
4572 #endif /* #if !UCONFIG_NO_TRANSLITERATION */