2 **********************************************************************
3 * Copyright (C) 1999-2004, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 11/10/99 aliu Creation.
8 **********************************************************************
11 #include "unicode/utypes.h"
13 #if !UCONFIG_NO_TRANSLITERATION
16 #include "unicode/locid.h"
17 #include "unicode/dtfmtsym.h"
18 #include "unicode/normlzr.h"
19 #include "unicode/translit.h"
20 #include "unicode/uchar.h"
21 #include "unicode/unifilt.h"
22 #include "unicode/uniset.h"
23 #include "unicode/ustring.h"
24 #include "unicode/usetiter.h"
25 #include "unicode/uscript.h"
43 /***********************************************************************
45 HOW TO USE THIS TEST FILE
47 How I developed on two platforms
48 without losing (too much of) my mind
51 1. Add new tests by copying/pasting/changing existing tests. On Java,
52 any public void method named Test...() taking no parameters becomes
53 a test. On C++, you need to modify the header and add a line to
54 the runIndexedTest() dispatch method.
56 2. Make liberal use of the expect() method; it is your friend.
58 3. The tests in this file exactly match those in a sister file on the
59 other side. The two files are:
61 icu4j: src/com/ibm/test/translit/TransliteratorTest.java
62 icu4c: source/test/intltest/transtst.cpp
64 ==> THIS IS THE IMPORTANT PART <==
66 When you add a test in this file, add it in TransliteratorTest.java
67 too. Give it the same name and put it in the same relative place.
68 This makes maintenance a lot simpler for any poor soul who ends up
69 trying to synchronize the tests between icu4j and icu4c.
71 4. If you MUST enter a test that is NOT paralleled in the sister file,
72 then add it in the special non-mirrored section. These are
81 Make sure you document the reason the test is here and not there.
86 ***********************************************************************/
88 // Define character constants thusly to be EBCDIC-friendly
90 LEFT_BRACE
=((UChar
)0x007B), /*{*/
91 PIPE
=((UChar
)0x007C), /*|*/
92 ZERO
=((UChar
)0x0030), /*0*/
93 UPPER_A
=((UChar
)0x0041) /*A*/
96 TransliteratorTest::TransliteratorTest()
97 : DESERET_DEE((UChar32
)0x10414),
98 DESERET_dee((UChar32
)0x1043C)
102 TransliteratorTest::~TransliteratorTest() {}
105 TransliteratorTest::runIndexedTest(int32_t index
, UBool exec
,
106 const char* &name
, char* /*par*/) {
108 TESTCASE(0,TestInstantiation
);
109 TESTCASE(1,TestSimpleRules
);
110 TESTCASE(2,TestRuleBasedInverse
);
111 TESTCASE(3,TestKeyboard
);
112 TESTCASE(4,TestKeyboard2
);
113 TESTCASE(5,TestKeyboard3
);
114 TESTCASE(6,TestArabic
);
115 TESTCASE(7,TestCompoundKana
);
116 TESTCASE(8,TestCompoundHex
);
117 TESTCASE(9,TestFiltering
);
118 TESTCASE(10,TestInlineSet
);
119 TESTCASE(11,TestPatternQuoting
);
120 TESTCASE(12,TestJ277
);
121 TESTCASE(13,TestJ243
);
122 TESTCASE(14,TestJ329
);
123 TESTCASE(15,TestSegments
);
124 TESTCASE(16,TestCursorOffset
);
125 TESTCASE(17,TestArbitraryVariableValues
);
126 TESTCASE(18,TestPositionHandling
);
127 TESTCASE(19,TestHiraganaKatakana
);
128 TESTCASE(20,TestCopyJ476
);
129 TESTCASE(21,TestAnchors
);
130 TESTCASE(22,TestInterIndic
);
131 TESTCASE(23,TestFilterIDs
);
132 TESTCASE(24,TestCaseMap
);
133 TESTCASE(25,TestNameMap
);
134 TESTCASE(26,TestLiberalizedID
);
135 TESTCASE(27,TestCreateInstance
);
136 TESTCASE(28,TestNormalizationTransliterator
);
137 TESTCASE(29,TestCompoundRBT
);
138 TESTCASE(30,TestCompoundFilter
);
139 TESTCASE(31,TestRemove
);
140 TESTCASE(32,TestToRules
);
141 TESTCASE(33,TestContext
);
142 TESTCASE(34,TestSupplemental
);
143 TESTCASE(35,TestQuantifier
);
144 TESTCASE(36,TestSTV
);
145 TESTCASE(37,TestCompoundInverse
);
146 TESTCASE(38,TestNFDChainRBT
);
147 TESTCASE(39,TestNullInverse
);
148 TESTCASE(40,TestAliasInverseID
);
149 TESTCASE(41,TestCompoundInverseID
);
150 TESTCASE(42,TestUndefinedVariable
);
151 TESTCASE(43,TestEmptyContext
);
152 TESTCASE(44,TestCompoundFilterID
);
153 TESTCASE(45,TestPropertySet
);
154 TESTCASE(46,TestNewEngine
);
155 TESTCASE(47,TestQuantifiedSegment
);
156 TESTCASE(48,TestDevanagariLatinRT
);
157 TESTCASE(49,TestTeluguLatinRT
);
158 TESTCASE(50,TestCompoundLatinRT
);
159 TESTCASE(51,TestSanskritLatinRT
);
160 TESTCASE(52,TestLocaleInstantiation
);
161 TESTCASE(53,TestTitleAccents
);
162 TESTCASE(54,TestLocaleResource
);
163 TESTCASE(55,TestParseError
);
164 TESTCASE(56,TestOutputSet
);
165 TESTCASE(57,TestVariableRange
);
166 TESTCASE(58,TestInvalidPostContext
);
167 TESTCASE(59,TestIDForms
);
168 TESTCASE(60,TestToRulesMark
);
169 TESTCASE(61,TestEscape
);
170 TESTCASE(62,TestAnchorMasking
);
171 TESTCASE(63,TestDisplayName
);
172 TESTCASE(64,TestSpecialCases
);
173 TESTCASE(65,TestIncrementalProgress
);
174 TESTCASE(66,TestSurrogateCasing
);
175 TESTCASE(67,TestFunction
);
176 TESTCASE(68,TestInvalidBackRef
);
177 TESTCASE(69,TestMulticharStringSet
);
178 TESTCASE(70,TestUserFunction
);
179 TESTCASE(71,TestAnyX
);
180 TESTCASE(72,TestSourceTargetSet
);
181 TESTCASE(73,TestGurmukhiDevanagari
);
182 TESTCASE(74,TestRuleWhitespace
);
183 TESTCASE(75,TestAllCodepoints
);
184 TESTCASE(76,TestBoilerplate
);
185 TESTCASE(77,TestAlternateSyntax
);
186 default: name
= ""; break;
190 static const UVersionInfo ICU_31
= {3,1,0,0};
192 * Make sure every system transliterator can be instantiated.
194 * ALSO test that the result of toRules() for each rule is a valid
195 * rule. Do this here so we don't have to have another test that
196 * instantiates everything as well.
198 void TransliteratorTest::TestInstantiation() {
199 UErrorCode ec
= U_ZERO_ERROR
;
200 StringEnumeration
* avail
= Transliterator::getAvailableIDs(ec
);
201 assertSuccess("getAvailableIDs()", ec
);
202 assertTrue("getAvailableIDs()!=NULL", avail
!=NULL
);
203 int32_t n
= Transliterator::countAvailableIDs();
204 assertTrue("getAvailableIDs().count()==countAvailableIDs()",
205 avail
->count(ec
) == n
);
206 assertSuccess("count()", ec
);
208 for (int32_t i
=0; i
<n
; ++i
) {
209 const UnicodeString
& id
= *avail
->snext(ec
);
210 if (!assertSuccess("snext()", ec
) ||
211 !assertTrue("snext()!=NULL", (&id
)!=NULL
, TRUE
)) {
214 UnicodeString id2
= Transliterator::getAvailableID(i
);
215 if (id
.length() < 1) {
216 errln(UnicodeString("FAIL: getAvailableID(") +
217 i
+ ") returned empty string");
221 errln(UnicodeString("FAIL: getAvailableID(") +
222 i
+ ") != getAvailableIDs().snext()");
225 if(id2
.indexOf("Thai")>-1 && isICUVersionAtLeast(ICU_31
)){
228 UParseError parseError
;
229 UErrorCode status
= U_ZERO_ERROR
;
230 Transliterator
* t
= Transliterator::createInstance(id
,
231 UTRANS_FORWARD
, parseError
,status
);
233 Transliterator::getDisplayName(id
, name
);
235 errln(UnicodeString("FAIL: Couldn't create ") + id
+
236 /*", parse error " + parseError.code +*/
237 ", line " + parseError
.line
+
238 ", offset " + parseError
.offset
+
239 ", pre-context " + prettify(parseError
.preContext
, TRUE
) +
240 ", post-context " +prettify(parseError
.postContext
,TRUE
) +
241 ", Error: " + u_errorName(status
));
242 // When createInstance fails, it deletes the failing
243 // entry from the available ID list. We detect this
244 // here by looking for a change in countAvailableIDs.
245 int32_t nn
= Transliterator::countAvailableIDs();
248 --i
; // Compensate for deleted entry
251 logln(UnicodeString("OK: ") + name
+ " (" + id
+ ")");
255 t
->toRules(rules
, TRUE
);
256 Transliterator
*u
= Transliterator::createFromRules("x",
257 rules
, UTRANS_FORWARD
, parseError
,status
);
259 errln(UnicodeString("FAIL: ") + id
+
260 ".createFromRules() => bad rules" +
261 /*", parse error " + parseError.code +*/
262 ", line " + parseError
.line
+
263 ", offset " + parseError
.offset
+
264 ", context " + prettify(parseError
.preContext
, TRUE
) +
265 ", rules: " + prettify(rules
, TRUE
));
272 assertTrue("snext()==NULL", avail
->snext(ec
)==NULL
);
273 assertSuccess("snext()", ec
);
276 // Now test the failure path
277 UParseError parseError
;
278 UErrorCode status
= U_ZERO_ERROR
;
279 UnicodeString
id("<Not a valid Transliterator ID>");
280 Transliterator
* t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, parseError
, status
);
282 errln("FAIL: " + id
+ " returned a transliterator");
285 logln("OK: Bogus ID handled properly");
289 void TransliteratorTest::TestSimpleRules(void) {
290 /* Example: rules 1. ab>x|y
293 * []|eabcd start - no match, copy e to tranlated buffer
294 * [e]|abcd match rule 1 - copy output & adjust cursor
295 * [ex|y]cd match rule 2 - copy output & adjust cursor
296 * [exz]|d no match, copy d to transliterated buffer
299 expect(UnicodeString("ab>x|y;", "") +
303 /* Another set of rules:
315 expect(UnicodeString("ab>x|yzacw;") +
323 UErrorCode status
= U_ZERO_ERROR
;
324 RuleBasedTransliterator
t(
326 UnicodeString("$dummy=").append((UChar
)0xE100) +
328 "$vowel=[aeiouAEIOU];"
330 "$vowel } $lu > '!';"
336 if (U_FAILURE(status
)) {
337 errln("FAIL: RBT constructor failed");
340 expect(t
, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
344 * Test inline set syntax and set variable syntax.
346 void TransliteratorTest::TestInlineSet(void) {
347 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
348 expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
350 expect(UnicodeString(
353 "$alphanumeric = [$digit $alpha];" // ***
354 "$special = [^$alphanumeric];" // ***
355 "$alphanumeric > '-';"
356 "$special > '*';", ""),
358 "thx-1138", "---*----");
362 * Create some inverses and confirm that they work. We have to be
363 * careful how we do this, since the inverses will not be true
364 * inverses -- we can't throw any random string at the composition
365 * of the transliterators and expect the identity function. F x
366 * F' != I. However, if we are careful about the input, we will
367 * get the expected results.
369 void TransliteratorTest::TestRuleBasedInverse(void) {
370 UnicodeString RULES
=
371 UnicodeString("abc>zyx;") +
389 const char* DATA
[] = {
390 // Careful here -- random strings will not work. If we keep
391 // the left side to the domain and the right side to the range
392 // we will be okay though (left, abc; right xyz).
394 "abcacab", "zyxxxyy",
398 int32_t DATA_length
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0]));
400 UErrorCode status
= U_ZERO_ERROR
;
401 RuleBasedTransliterator
fwd("<ID>", RULES
, status
);
402 RuleBasedTransliterator
rev("<ID>", RULES
,
403 UTRANS_REVERSE
, status
);
404 if (U_FAILURE(status
)) {
405 errln("FAIL: RBT constructor failed");
408 for (int32_t i
=0; i
<DATA_length
; i
+=2) {
409 expect(fwd
, DATA
[i
], DATA
[i
+1]);
410 expect(rev
, DATA
[i
+1], DATA
[i
]);
415 * Basic test of keyboard.
417 void TransliteratorTest::TestKeyboard(void) {
418 UErrorCode status
= U_ZERO_ERROR
;
419 RuleBasedTransliterator
t("<ID>",
420 UnicodeString("psch>Y;")
425 if (U_FAILURE(status
)) {
426 errln("FAIL: RBT constructor failed");
429 const char* DATA
[] = {
437 0, "AycAY", // null means finishKeyboardTransliteration
440 keyboardAux(t
, DATA
, (int32_t)(sizeof(DATA
)/sizeof(DATA
[0])));
444 * Basic test of keyboard with cursor.
446 void TransliteratorTest::TestKeyboard2(void) {
447 UErrorCode status
= U_ZERO_ERROR
;
448 RuleBasedTransliterator
t("<ID>",
449 UnicodeString("ych>Y;")
454 if (U_FAILURE(status
)) {
455 errln("FAIL: RBT constructor failed");
458 const char* DATA
[] = {
462 "s", "Aps", // modified for rollback - "Ay",
463 "c", "Apsc", // modified for rollback - "Ayc",
466 "s", "AycAps", // modified for rollback - "AycAy",
467 "c", "AycApsc", // modified for rollback - "AycAyc",
469 0, "AycAY", // null means finishKeyboardTransliteration
472 keyboardAux(t
, DATA
, (int32_t)(sizeof(DATA
)/sizeof(DATA
[0])));
476 * Test keyboard transliteration with back-replacement.
478 void TransliteratorTest::TestKeyboard3(void) {
479 // We want th>z but t>y. Furthermore, during keyboard
480 // transliteration we want t>y then yh>z if t, then h are
482 UnicodeString
RULES("t>|y;"
485 const char* DATA
[] = {
486 // Column 1: characters to add to buffer (as if typed)
487 // Column 2: expected appearance of buffer after
488 // keyboard xliteration.
491 "t", "abt", // modified for rollback - "aby",
493 "t", "abyct", // modified for rollback - "abycy",
495 0, "abycz", // null means finishKeyboardTransliteration
498 UErrorCode status
= U_ZERO_ERROR
;
499 RuleBasedTransliterator
t("<ID>", RULES
, status
);
500 if (U_FAILURE(status
)) {
501 errln("FAIL: RBT constructor failed");
504 keyboardAux(t
, DATA
, (int32_t)(sizeof(DATA
)/sizeof(DATA
[0])));
507 void TransliteratorTest::keyboardAux(const Transliterator
& t
,
508 const char* DATA
[], int32_t DATA_length
) {
509 UErrorCode status
= U_ZERO_ERROR
;
510 UTransPosition index
={0, 0, 0, 0};
512 for (int32_t i
=0; i
<DATA_length
; i
+=2) {
518 t
.transliterate(s
, index
, DATA
[i
], status
);
521 t
.finishTransliteration(s
, index
);
523 // Show the start index '{' and the cursor '|'
524 UnicodeString a
, b
, c
;
525 s
.extractBetween(0, index
.contextStart
, a
);
526 s
.extractBetween(index
.contextStart
, index
.start
, b
);
527 s
.extractBetween(index
.start
, s
.length(), c
);
529 append((UChar
)LEFT_BRACE
).
533 if (s
== DATA
[i
+1] && U_SUCCESS(status
)) {
536 errln(UnicodeString("FAIL: ") + log
+ ", expected " + DATA
[i
+1]);
541 void TransliteratorTest::TestArabic(void) {
542 // Test disabled for 2.0 until new Arabic transliterator can be written.
544 // const char* DATA[] = {
545 // "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
546 // "\u0627\u0644\u0644\u063a\u0629\u0020"+
547 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
548 // "\u0628\u0628\u0646\u0638\u0645\u0020"+
549 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
550 // "\u062c\u0645\u064a\u0644\u0629",
554 // UChar ar_raw[] = {
555 // 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
556 // 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
557 // 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
558 // 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
559 // 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
560 // 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
562 // UnicodeString ar(ar_raw);
563 // UErrorCode status=U_ZERO_ERROR;
564 // UParseError parseError;
565 // Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
567 // errln("FAIL: createInstance failed");
570 // expect(*t, "Arabic", ar);
575 * Compose the Kana transliterator forward and reverse and try
576 * some strings that should come out unchanged.
578 void TransliteratorTest::TestCompoundKana(void) {
579 UParseError parseError
;
580 UErrorCode status
= U_ZERO_ERROR
;
581 Transliterator
* t
= Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD
, parseError
, status
);
583 errln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed");
585 expect(*t
, "aaaaa", "aaaaa");
591 * Compose the hex transliterators forward and reverse.
593 void TransliteratorTest::TestCompoundHex(void) {
594 UParseError parseError
;
595 UErrorCode status
= U_ZERO_ERROR
;
596 Transliterator
* a
= Transliterator::createInstance("Any-Hex", UTRANS_FORWARD
, parseError
, status
);
597 Transliterator
* b
= Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, parseError
, status
);
598 Transliterator
* transab
[] = { a
, b
};
599 Transliterator
* transba
[] = { b
, a
};
600 if (a
== 0 || b
== 0) {
601 errln("FAIL: construction failed");
606 // Do some basic tests of a
607 expect(*a
, "01", UnicodeString("\\u0030\\u0031", ""));
608 // Do some basic tests of b
609 expect(*b
, UnicodeString("\\u0030\\u0031", ""), "01");
611 Transliterator
* ab
= new CompoundTransliterator(transab
, 2);
612 UnicodeString
s("abcde", "");
615 UnicodeString
str(s
);
616 a
->transliterate(str
);
617 Transliterator
* ba
= new CompoundTransliterator(transba
, 2);
618 expect(*ba
, str
, str
);
626 int gTestFilterClassID
= 0;
628 * Used by TestFiltering().
630 class TestFilter
: public UnicodeFilter
{
631 virtual UnicodeFunctor
* clone() const {
632 return new TestFilter(*this);
634 virtual UBool
contains(UChar32 c
) const {
635 return c
!= (UChar
)0x0063 /*c*/;
638 virtual UnicodeString
& toPattern(UnicodeString
& result
,
639 UBool
/*escapeUnprintable*/) const {
642 virtual UBool
matchesIndexValue(uint8_t /*v*/) const {
645 virtual void addMatchSetTo(UnicodeSet
& /*toUnionTo*/) const {}
647 UClassID
getDynamicClassID() const { return (UClassID
)&gTestFilterClassID
; }
651 * Do some basic tests of filtering.
653 void TransliteratorTest::TestFiltering(void) {
654 UParseError parseError
;
655 UErrorCode status
= U_ZERO_ERROR
;
656 Transliterator
* hex
= Transliterator::createInstance("Any-Hex", UTRANS_FORWARD
, parseError
, status
);
658 errln("FAIL: createInstance(Any-Hex) failed");
661 hex
->adoptFilter(new TestFilter());
662 UnicodeString
s("abcde");
663 hex
->transliterate(s
);
664 UnicodeString
exp("\\u0061\\u0062c\\u0064\\u0065", "");
666 logln(UnicodeString("Ok: \"") + exp
+ "\"");
668 logln(UnicodeString("FAIL: \"") + s
+ "\", wanted \"" + exp
+ "\"");
676 void TransliteratorTest::TestAnchors(void) {
677 expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""),
680 expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
683 expect(UnicodeString("^ab > 01 ;"
691 expect(UnicodeString("$s = [z$] ;"
698 "abzababbabxzabxabx",
703 * Test pattern quoting and escape mechanisms.
705 void TransliteratorTest::TestPatternQuoting(void) {
707 // Each item is <rules>, <input>, <expected output>
708 const UnicodeString DATA
[] = {
709 UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
710 UnicodeString(UChar(0x4E01)),
714 for (int32_t i
=0; i
<3; i
+=3) {
715 logln(UnicodeString("Pattern: ") + prettify(DATA
[i
]));
716 UErrorCode status
= U_ZERO_ERROR
;
717 RuleBasedTransliterator
t("<ID>", DATA
[i
], status
);
718 if (U_FAILURE(status
)) {
719 errln("RBT constructor failed");
721 expect(t
, DATA
[i
+1], DATA
[i
+2]);
727 * Regression test for bugs found in Greek transliteration.
729 void TransliteratorTest::TestJ277(void) {
730 UErrorCode status
= U_ZERO_ERROR
;
731 UParseError parseError
;
732 Transliterator
*gl
= Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD
, parseError
, status
);
734 errln("FAIL: createInstance(Greek-Latin) returned NULL");
739 UChar upsilon
= 0x3C5;
741 // UChar PHI = 0x3A6;
743 // UChar omega = 0x3C9;
744 // UChar omicron = 0x3BF;
745 // UChar epsilon = 0x3B5;
747 // sigma upsilon nu -> syn
749 syn
.append(sigma
).append(upsilon
).append(nu
);
750 expect(*gl
, syn
, "syn");
752 // sigma alpha upsilon nu -> saun
754 sayn
.append(sigma
).append(alpha
).append(upsilon
).append(nu
);
755 expect(*gl
, sayn
, "saun");
757 // Again, using a smaller rule set
762 "$ypsilon = \\u03C5;"
763 "$vowel = [aeiouAEIOU$alpha$ypsilon];"
766 "u <> $vowel { $ypsilon;"
770 RuleBasedTransliterator
mini("mini", rules
, UTRANS_REVERSE
, status
);
771 if (U_FAILURE(status
)) { errln("FAIL: Transliterator constructor failed"); return; }
772 expect(mini
, syn
, "syn");
773 expect(mini
, sayn
, "saun");
775 #if !UCONFIG_NO_FORMATTING
776 // Transliterate the Greek locale data
778 DateFormatSymbols
syms(el
, status
);
779 if (U_FAILURE(status
)) { errln("FAIL: Transliterator constructor failed"); return; }
781 const UnicodeString
* data
= syms
.getMonths(count
);
782 for (i
=0; i
<count
; ++i
) {
783 if (data
[i
].length() == 0) {
786 UnicodeString
out(data
[i
]);
787 gl
->transliterate(out
);
789 if (data
[i
].length() >= 2 && out
.length() >= 2 &&
790 u_isupper(data
[i
].charAt(0)) && u_islower(data
[i
].charAt(1))) {
791 if (!(u_isupper(out
.charAt(0)) && u_islower(out
.charAt(1)))) {
796 logln(prettify(data
[i
] + " -> " + out
));
798 errln(UnicodeString("FAIL: ") + prettify(data
[i
] + " -> " + out
));
807 * Prefix, suffix support in hex transliterators
809 void TransliteratorTest::TestJ243(void) {
810 UErrorCode ec
= U_ZERO_ERROR
;
812 // Test default Hex-Any, which should handle
813 // \u, \U, u+, and U+
814 Transliterator
*hex
=
815 Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, ec
);
816 if (assertSuccess("getInstance", ec
)) {
817 expect(*hex
, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
821 // // Try a custom Hex-Unicode
822 // // \uXXXX and &#xXXXX;
823 // ec = U_ZERO_ERROR;
824 // HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
825 // expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x0123", ""),
826 // "abcd5fx0123");
827 // // Try custom Any-Hex (default is tested elsewhere)
828 // ec = U_ZERO_ERROR;
829 // UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
830 // expect(hex3, "012", "012");
834 * Parsers need better syntax error messages.
836 void TransliteratorTest::TestJ329(void) {
838 struct { UBool containsErrors
; const char* rule
; } DATA
[] = {
839 { FALSE
, "a > b; c > d" },
840 { TRUE
, "a > b; no operator; c > d" },
842 int32_t DATA_length
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0]));
844 for (int32_t i
=0; i
<DATA_length
; ++i
) {
845 UErrorCode status
= U_ZERO_ERROR
;
846 UParseError parseError
;
847 RuleBasedTransliterator
rbt("<ID>",
853 UBool gotError
= U_FAILURE(status
);
854 UnicodeString
desc(DATA
[i
].rule
);
855 desc
.append(gotError
? " -> error" : " -> no error");
857 desc
= desc
+ ", ParseError code=" + u_errorName(status
) +
858 " line=" + parseError
.line
+
859 " offset=" + parseError
.offset
+
860 " context=" + parseError
.preContext
;
862 if (gotError
== DATA
[i
].containsErrors
) {
863 logln(UnicodeString("Ok: ") + desc
);
865 errln(UnicodeString("FAIL: ") + desc
);
871 * Test segments and segment references.
873 void TransliteratorTest::TestSegments(void) {
875 // Each item is <rules>, <input>, <expected output>
876 UnicodeString DATA
[] = {
877 "([a-z]) '.' ([0-9]) > $2 '-' $1",
882 "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
886 int32_t DATA_length
= (int32_t)(sizeof(DATA
)/sizeof(*DATA
));
888 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
889 logln("Pattern: " + prettify(DATA
[i
]));
890 UErrorCode status
= U_ZERO_ERROR
;
891 RuleBasedTransliterator
t("ID", DATA
[i
], status
);
892 if (U_FAILURE(status
)) {
893 errln("FAIL: RBT constructor");
895 expect(t
, DATA
[i
+1], DATA
[i
+2]);
901 * Test cursor positioning outside of the key
903 void TransliteratorTest::TestCursorOffset(void) {
905 // Each item is <rules>, <input>, <expected output>
906 UnicodeString DATA
[] = {
907 "pre {alpha} post > | @ ALPHA ;"
909 "pre {beta} post > BETA @@ | ;"
912 "prealphapost prebetapost",
914 "prbetaxyz preBETApost",
916 int32_t DATA_length
= (int32_t)(sizeof(DATA
)/sizeof(*DATA
));
918 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
919 logln("Pattern: " + prettify(DATA
[i
]));
920 UErrorCode status
= U_ZERO_ERROR
;
921 RuleBasedTransliterator
t("<ID>", DATA
[i
], status
);
922 if (U_FAILURE(status
)) {
923 errln("FAIL: RBT constructor");
925 expect(t
, DATA
[i
+1], DATA
[i
+2]);
931 * Test zero length and > 1 char length variable values. Test
932 * use of variable refs in UnicodeSets.
934 void TransliteratorTest::TestArbitraryVariableValues(void) {
936 // Each item is <rules>, <input>, <expected output>
937 UnicodeString DATA
[] = {
955 int32_t DATA_length
= (int32_t)(sizeof(DATA
)/sizeof(*DATA
));
957 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
958 logln("Pattern: " + prettify(DATA
[i
]));
959 UErrorCode status
= U_ZERO_ERROR
;
960 RuleBasedTransliterator
t("<ID>", DATA
[i
], status
);
961 if (U_FAILURE(status
)) {
962 errln("FAIL: RBT constructor");
964 expect(t
, DATA
[i
+1], DATA
[i
+2]);
970 * Confirm that the contextStart, contextLimit, start, and limit
971 * behave correctly. J474.
973 void TransliteratorTest::TestPositionHandling(void) {
975 // Each item is <rules>, <input>, <expected output>
976 const char* DATA
[] = {
977 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
978 "xtat txtb", // pos 0,9,0,9
981 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
982 "xtat txtb", // pos 2,9,3,8
985 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
986 "xtat txtb", // pos 3,8,3,8
990 // Array of 4n positions -- these go with the DATA array
991 // They are: contextStart, contextLimit, start, limit
998 int32_t n
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0])) / 3;
999 for (int32_t i
=0; i
<n
; i
++) {
1000 UErrorCode status
= U_ZERO_ERROR
;
1001 Transliterator
*t
= new RuleBasedTransliterator("<ID>",
1003 if (U_FAILURE(status
)) {
1005 errln("FAIL: RBT constructor");
1009 pos
.contextStart
= POS
[4*i
];
1010 pos
.contextLimit
= POS
[4*i
+1];
1011 pos
.start
= POS
[4*i
+2];
1012 pos
.limit
= POS
[4*i
+3];
1013 UnicodeString
rsource(DATA
[3*i
+1]);
1014 t
->transliterate(rsource
, pos
, status
);
1015 if (U_FAILURE(status
)) {
1017 errln("FAIL: transliterate");
1020 t
->finishTransliteration(rsource
, pos
);
1021 expectAux(DATA
[3*i
],
1030 * Test the Hiragana-Katakana transliterator.
1032 void TransliteratorTest::TestHiraganaKatakana(void) {
1033 UParseError parseError
;
1034 UErrorCode status
= U_ZERO_ERROR
;
1035 Transliterator
* hk
= Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD
, parseError
, status
);
1036 Transliterator
* kh
= Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD
, parseError
, status
);
1037 if (hk
== 0 || kh
== 0) {
1038 errln("FAIL: createInstance failed");
1044 // Array of 3n items
1045 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1046 const char* DATA
[] = {
1048 "\\u3042\\u3090\\u3099\\u3092\\u3050",
1049 "\\u30A2\\u30F8\\u30F2\\u30B0",
1052 "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1053 "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1055 int32_t DATA_length
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0]));
1057 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
1058 UnicodeString h
= CharsToUnicodeString(DATA
[i
+1]);
1059 UnicodeString k
= CharsToUnicodeString(DATA
[i
+2]);
1061 case 0x68: //'h': // Hiragana-Katakana
1064 case 0x6B: //'k': // Katakana-Hiragana
1067 case 0x62: //'b': // both
1078 * Test cloning / copy constructor of RBT.
1080 void TransliteratorTest::TestCopyJ476(void) {
1081 // The real test here is what happens when the destructors are
1082 // called. So we let one object get destructed, and check to
1083 // see that its copy still works.
1084 RuleBasedTransliterator
*t2
= 0;
1086 UErrorCode status
= U_ZERO_ERROR
;
1087 RuleBasedTransliterator
t1("t1", "a>A;b>B;", status
);
1088 if (U_FAILURE(status
)) {
1089 errln("FAIL: RBT constructor");
1092 t2
= new RuleBasedTransliterator(t1
);
1093 expect(t1
, "abc", "ABc");
1095 expect(*t2
, "abc", "ABc");
1100 * Test inter-Indic transliterators. These are composed.
1101 * ICU4C Jitterbug 483.
1103 void TransliteratorTest::TestInterIndic(void) {
1104 UnicodeString
ID("Devanagari-Gujarati", "");
1105 UErrorCode status
= U_ZERO_ERROR
;
1106 UParseError parseError
;
1107 Transliterator
* dg
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, parseError
, status
);
1109 errln("FAIL: createInstance(" + ID
+ ") returned NULL");
1112 UnicodeString id
= dg
->getID();
1114 errln("FAIL: createInstance(" + ID
+ ")->getID() => " + id
);
1116 UnicodeString dev
= CharsToUnicodeString("\\u0901\\u090B\\u0925");
1117 UnicodeString guj
= CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1118 expect(*dg
, dev
, guj
);
1123 * Test filter syntax in IDs. (J918)
1125 void TransliteratorTest::TestFilterIDs(void) {
1126 // Array of 3n strings:
1127 // <id>, <inverse id>, <input>, <expected output>
1128 const char* DATA
[] = {
1129 "[aeiou]Any-Hex", // ID
1130 "[aeiou]Hex-Any", // expected inverse ID
1132 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1134 "[aeiou]Any-Hex;[^5]Hex-Any",
1135 "[^5]Any-Hex;[aeiou]Hex-Any",
1144 enum { DATA_length
= sizeof(DATA
) / sizeof(DATA
[0]) };
1146 for (int i
=0; i
<DATA_length
; i
+=4) {
1147 UnicodeString
ID(DATA
[i
], "");
1148 UnicodeString
uID(DATA
[i
+1], "");
1149 UnicodeString
data2(DATA
[i
+2], "");
1150 UnicodeString
data3(DATA
[i
+3], "");
1151 UParseError parseError
;
1152 UErrorCode status
= U_ZERO_ERROR
;
1153 Transliterator
*t
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, parseError
, status
);
1155 errln("FAIL: createInstance(" + ID
+ ") returned NULL");
1158 expect(*t
, data2
, data3
);
1161 if (ID
!= t
->getID()) {
1162 errln("FAIL: createInstance(" + ID
+ ").getID() => " +
1166 // Check the inverse
1167 Transliterator
*u
= t
->createInverse(status
);
1169 errln("FAIL: " + ID
+ ".createInverse() returned NULL");
1170 } else if (u
->getID() != uID
) {
1171 errln("FAIL: " + ID
+ ".createInverse().getID() => " +
1172 u
->getID() + ", expected " + uID
);
1181 * Test the case mapping transliterators.
1183 void TransliteratorTest::TestCaseMap(void) {
1184 UParseError parseError
;
1185 UErrorCode status
= U_ZERO_ERROR
;
1186 Transliterator
* toUpper
=
1187 Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
);
1188 Transliterator
* toLower
=
1189 Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
);
1190 Transliterator
* toTitle
=
1191 Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
);
1192 if (toUpper
==0 || toLower
==0 || toTitle
==0) {
1193 errln("FAIL: createInstance returned NULL");
1200 expect(*toUpper
, "The quick brown fox jumped over the lazy dogs.",
1201 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1202 expect(*toLower
, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1203 "the quick brown foX jumped over the lazY dogs.");
1204 expect(*toTitle
, "the quick brown foX can't jump over the laZy dogs.",
1205 "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1213 * Test the name mapping transliterators.
1215 void TransliteratorTest::TestNameMap(void) {
1216 UParseError parseError
;
1217 UErrorCode status
= U_ZERO_ERROR
;
1218 Transliterator
* uni2name
=
1219 Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD
, parseError
, status
);
1220 Transliterator
* name2uni
=
1221 Transliterator::createInstance("Name-Any", UTRANS_FORWARD
, parseError
, status
);
1222 if (uni2name
==0 || name2uni
==0) {
1223 errln("FAIL: createInstance returned NULL");
1229 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1230 expect(*uni2name
, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1231 CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{END OF TRANSMISSION}\\\\N{CHARACTER TABULATION}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1232 expect(*name2uni
, "{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{",
1233 CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1240 Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD
, parseError
, status
);
1242 errln("FAIL: createInstance returned NULL");
1247 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1248 UnicodeString s
= CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1254 * Test liberalized ID syntax. 1006c
1256 void TransliteratorTest::TestLiberalizedID(void) {
1257 // Some test cases have an expected getID() value of NULL. This
1258 // means I have disabled the test case for now. This stuff is
1259 // still under development, and I haven't decided whether to make
1260 // getID() return canonical case yet. It will all get rewritten
1261 // with the move to Source-Target/Variant IDs anyway. [aliu]
1262 const char* DATA
[] = {
1263 "latin-greek", NULL
/*"Latin-Greek"*/, "case insensitivity",
1264 " Null ", "Null", "whitespace",
1265 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",
1266 " null ; latin-greek ", NULL
/*"Null;Latin-Greek"*/, "compound whitespace",
1268 const int32_t DATA_length
= sizeof(DATA
)/sizeof(DATA
[0]);
1269 UParseError parseError
;
1270 UErrorCode status
= U_ZERO_ERROR
;
1271 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
1272 Transliterator
*t
= Transliterator::createInstance(DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
1274 errln(UnicodeString("FAIL: ") + DATA
[i
+2] +
1275 " cannot create ID \"" + DATA
[i
] + "\"");
1279 exp
= UnicodeString(DATA
[i
+1], "");
1281 // Don't worry about getID() if the expected char*
1282 // is NULL -- see above.
1283 if (exp
.length() == 0 || exp
== t
->getID()) {
1284 logln(UnicodeString("Ok: ") + DATA
[i
+2] +
1285 " create ID \"" + DATA
[i
] + "\" => \"" +
1288 errln(UnicodeString("FAIL: ") + DATA
[i
+2] +
1289 " create ID \"" + DATA
[i
] + "\" => \"" +
1290 t
->getID() + "\", exp \"" + exp
+ "\"");
1297 /* test for Jitterbug 912 */
1298 void TransliteratorTest::TestCreateInstance(){
1299 const char* FORWARD
= "F";
1300 const char* REVERSE
= "R";
1301 const char* DATA
[] = {
1303 // Column 2: direction
1304 // Column 3: expected ID, or "" if expect failure
1305 "Latin-Hangul", REVERSE
, "Hangul-Latin", // JB#912
1307 // JB#2689: bad compound causes crash
1308 "InvalidSource-InvalidTarget", FORWARD
, "",
1309 "InvalidSource-InvalidTarget", REVERSE
, "",
1310 "Hex-Any;InvalidSource-InvalidTarget", FORWARD
, "",
1311 "Hex-Any;InvalidSource-InvalidTarget", REVERSE
, "",
1312 "InvalidSource-InvalidTarget;Hex-Any", FORWARD
, "",
1313 "InvalidSource-InvalidTarget;Hex-Any", REVERSE
, "",
1318 for (int32_t i
=0; DATA
[i
]; i
+=3) {
1320 UErrorCode ec
= U_ZERO_ERROR
;
1321 UnicodeString
id(DATA
[i
]);
1322 UTransDirection dir
= (DATA
[i
+1]==FORWARD
)?
1323 UTRANS_FORWARD
:UTRANS_REVERSE
;
1324 UnicodeString
expID(DATA
[i
+2]);
1326 Transliterator::createInstance(id
,dir
,err
,ec
);
1327 UnicodeString newID
;
1331 UBool ok
= (newID
== expID
);
1333 newID
= u_errorName(ec
);
1336 logln((UnicodeString
)"Ok: createInstance(" +
1337 id
+ "," + DATA
[i
+1] + ") => " + newID
);
1339 errln((UnicodeString
)"FAIL: createInstance(" +
1340 id
+ "," + DATA
[i
+1] + ") => " + newID
+
1341 ", expected " + expID
);
1348 * Test the normalization transliterator.
1350 void TransliteratorTest::TestNormalizationTransliterator() {
1351 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1352 // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1353 const char* CANON
[] = {
1354 // Input Decomposed Composed
1355 "cat", "cat", "cat" ,
1356 "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" ,
1358 "\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above
1359 "D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above
1361 "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above
1362 "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below
1363 "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above
1365 "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1366 "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1368 "\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave
1369 "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave
1370 "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron
1372 "\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign
1373 "\\u00c5", "A\\u030a", "\\u00c5" , // A-ring
1375 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0
1376 "\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0
1378 "Henry IV", "Henry IV", "Henry IV" ,
1379 "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" ,
1381 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1382 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1383 "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten
1384 "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten
1385 "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten
1387 "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" ,
1391 const char* COMPAT
[] = {
1392 // Input Decomposed Composed
1393 "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed
1395 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0
1396 "\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i
1398 "Henry IV", "Henry IV", "Henry IV" ,
1399 "Henry \\u2163", "Henry IV", "Henry IV" ,
1401 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1402 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1404 "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten
1409 UParseError parseError
;
1410 UErrorCode status
= U_ZERO_ERROR
;
1411 Transliterator
* NFD
= Transliterator::createInstance("NFD", UTRANS_FORWARD
, parseError
, status
);
1412 Transliterator
* NFC
= Transliterator::createInstance("NFC", UTRANS_FORWARD
, parseError
, status
);
1414 errln("FAIL: createInstance failed");
1419 for (i
=0; CANON
[i
]; i
+=3) {
1420 UnicodeString in
= CharsToUnicodeString(CANON
[i
]);
1421 UnicodeString expd
= CharsToUnicodeString(CANON
[i
+1]);
1422 UnicodeString expc
= CharsToUnicodeString(CANON
[i
+2]);
1423 expect(*NFD
, in
, expd
);
1424 expect(*NFC
, in
, expc
);
1429 Transliterator
* NFKD
= Transliterator::createInstance("NFKD", UTRANS_FORWARD
, parseError
, status
);
1430 Transliterator
* NFKC
= Transliterator::createInstance("NFKC", UTRANS_FORWARD
, parseError
, status
);
1431 if (!NFKD
|| !NFKC
) {
1432 errln("FAIL: createInstance failed");
1437 for (i
=0; COMPAT
[i
]; i
+=3) {
1438 UnicodeString in
= CharsToUnicodeString(COMPAT
[i
]);
1439 UnicodeString expkd
= CharsToUnicodeString(COMPAT
[i
+1]);
1440 UnicodeString expkc
= CharsToUnicodeString(COMPAT
[i
+2]);
1441 expect(*NFKD
, in
, expkd
);
1442 expect(*NFKC
, in
, expkc
);
1448 status
= U_ZERO_ERROR
;
1449 Transliterator
*t
= Transliterator::createInstance("NFD; [x]Remove",
1453 errln("FAIL: createInstance failed");
1455 expect(*t
, CharsToUnicodeString("\\u010dx"),
1456 CharsToUnicodeString("c\\u030C"));
1461 * Test compound RBT rules.
1463 void TransliteratorTest::TestCompoundRBT(void) {
1464 // Careful with spacing and ';' here: Phrase this exactly
1465 // as toRules() is going to return it. If toRules() changes
1466 // with regard to spacing or ';', then adjust this string.
1467 UnicodeString
rule("::Hex-Any;\n"
1471 "::[^t]Any-Upper;", "");
1472 UParseError parseError
;
1473 UErrorCode status
= U_ZERO_ERROR
;
1474 Transliterator
*t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, parseError
, status
);
1476 errln("FAIL: createFromRules failed");
1479 expect(*t
, "\\u0043at in the hat, bat on the mat",
1480 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1482 t
->toRules(r
, TRUE
);
1484 logln((UnicodeString
)"OK: toRules() => " + r
);
1486 errln((UnicodeString
)"FAIL: toRules() => " + r
+
1487 ", expected " + rule
);
1492 t
= Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD
, parseError
, status
);
1494 errln("FAIL: createInstance failed");
1497 UnicodeString
exp("::Greek-Latin;\n::Latin-Cyrillic;");
1498 t
->toRules(r
, TRUE
);
1500 errln((UnicodeString
)"FAIL: toRules() => " + r
+
1501 ", expected " + exp
);
1503 logln((UnicodeString
)"OK: toRules() => " + r
);
1507 // Round trip the result of toRules
1508 t
= Transliterator::createFromRules("Test", r
, UTRANS_FORWARD
, parseError
, status
);
1510 errln("FAIL: createFromRules #2 failed");
1513 logln((UnicodeString
)"OK: createFromRules(" + r
+ ") succeeded");
1516 // Test toRules again
1517 t
->toRules(r
, TRUE
);
1519 errln((UnicodeString
)"FAIL: toRules() => " + r
+
1520 ", expected " + exp
);
1522 logln((UnicodeString
)"OK: toRules() => " + r
);
1527 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform
1528 // to what the regenerated ID will look like.
1529 UnicodeString
id("Upper(Lower);(NFKC)", "");
1530 t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, parseError
, status
);
1532 errln("FAIL: createInstance #2 failed");
1535 if (t
->getID() == id
) {
1536 logln((UnicodeString
)"OK: created " + id
);
1538 errln((UnicodeString
)"FAIL: createInstance(" + id
+
1539 ").getID() => " + t
->getID());
1542 Transliterator
*u
= t
->createInverse(status
);
1544 errln("FAIL: createInverse failed");
1548 exp
= "NFKC();Lower(Upper)";
1549 if (u
->getID() == exp
) {
1550 logln((UnicodeString
)"OK: createInverse(" + id
+ ") => " +
1553 errln((UnicodeString
)"FAIL: createInverse(" + id
+ ") => " +
1561 * Compound filter semantics were orginially not implemented
1562 * correctly. Originally, each component filter f(i) is replaced by
1563 * f'(i) = f(i) && g, where g is the filter for the compound
1568 * Suppose and I have a transliterator X. Internally X is
1569 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1571 * The compound should convert all greek characters (through latin) to
1572 * cyrillic, then lowercase the result. The filter should say "don't
1573 * touch 'A' in the original". But because an intermediate result
1574 * happens to go through "A", the Greek Alpha gets hung up.
1576 void TransliteratorTest::TestCompoundFilter(void) {
1577 UParseError parseError
;
1578 UErrorCode status
= U_ZERO_ERROR
;
1579 Transliterator
*t
= Transliterator::createInstance
1580 ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD
, parseError
, status
);
1582 errln("FAIL: createInstance failed");
1585 t
->adoptFilter(new UnicodeSet("[^A]", status
));
1586 if (U_FAILURE(status
)) {
1587 errln("FAIL: UnicodeSet ct failed");
1592 // Only the 'A' at index 1 should remain unchanged
1594 CharsToUnicodeString("BA\\u039A\\u0391"),
1595 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1599 void TransliteratorTest::TestRemove(void) {
1600 UParseError parseError
;
1601 UErrorCode status
= U_ZERO_ERROR
;
1602 Transliterator
*t
= Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD
, parseError
, status
);
1604 errln("FAIL: createInstance failed");
1608 expect(*t
, "Able bodied baker's cats", "Ale odied ker's ts");
1612 void TransliteratorTest::TestToRules(void) {
1613 const char* RBT
= "rbt";
1614 const char* SET
= "set";
1615 static const char* DATA
[] = {
1617 "$a=\\u4E61; [$a] > A;",
1621 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1622 "[[:Zs:][:Zl:]]{a} > A;",
1649 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1650 "[^[:Zs:]]{a} > A;",
1653 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1654 "[[a-z]-[:Zs:]]{a} > A;",
1657 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1658 "[[:Zs:]&[a-z]]{a} > A;",
1661 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1662 "[x[:Zs:]]{a} > A;",
1665 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1666 "$macron = \\u0304 ;"
1667 "$evowel = [aeiouyAEIOUY] ;"
1668 "$iotasub = \\u0345 ;"
1669 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1670 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1673 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1674 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1676 static const int32_t DATA_length
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0]));
1678 for (int32_t d
=0; d
< DATA_length
; d
+=3) {
1679 if (DATA
[d
] == RBT
) {
1680 // Transliterator test
1681 UParseError parseError
;
1682 UErrorCode status
= U_ZERO_ERROR
;
1683 Transliterator
*t
= Transliterator::createFromRules("ID",
1684 DATA
[d
+1], UTRANS_FORWARD
, parseError
, status
);
1686 errln("FAIL: createFromRules failed");
1689 UnicodeString rules
, escapedRules
;
1690 t
->toRules(rules
, FALSE
);
1691 t
->toRules(escapedRules
, TRUE
);
1692 UnicodeString expRules
= CharsToUnicodeString(DATA
[d
+2]);
1693 UnicodeString
expEscapedRules(DATA
[d
+2]);
1694 if (rules
== expRules
) {
1695 logln((UnicodeString
)"Ok: " + DATA
[d
+1] +
1698 errln((UnicodeString
)"FAIL: " + DATA
[d
+1] +
1699 " => " + rules
+ ", exp " + expRules
);
1701 if (escapedRules
== expEscapedRules
) {
1702 logln((UnicodeString
)"Ok: " + DATA
[d
+1] +
1703 " => " + escapedRules
);
1705 errln((UnicodeString
)"FAIL: " + DATA
[d
+1] +
1706 " => " + escapedRules
+ ", exp " + expEscapedRules
);
1712 UErrorCode status
= U_ZERO_ERROR
;
1713 UnicodeString
pat(DATA
[d
+1]);
1714 UnicodeString
expToPat(DATA
[d
+2]);
1715 UnicodeSet
set(pat
, status
);
1716 if (U_FAILURE(status
)) {
1717 errln("FAIL: UnicodeSet ct failed");
1720 // Adjust spacing etc. as necessary.
1721 UnicodeString toPat
;
1722 set
.toPattern(toPat
);
1723 if (expToPat
== toPat
) {
1724 logln((UnicodeString
)"Ok: " + pat
+
1727 errln((UnicodeString
)"FAIL: " + pat
+
1728 " => " + prettify(toPat
, TRUE
) +
1729 ", exp " + prettify(pat
, TRUE
));
1735 void TransliteratorTest::TestContext() {
1736 UTransPosition pos
= {0, 2, 0, 1}; // cs cl s l
1737 expect("de > x; {d}e > y;",
1742 expect("ab{c} > z;",
1747 void TransliteratorTest::TestSupplemental() {
1749 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1751 CharsToUnicodeString("ab\\U0001030Fx"),
1752 CharsToUnicodeString("\\U00010300bix"));
1754 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1755 "$b=[A-Z\\U00010400-\\U0001044D];"
1756 "($a)($b) > $2 $1;"),
1757 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1758 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1760 // k|ax\\U00010300xm
1762 // k|a\\U00010400\\U00010300xm
1763 // ky|\\U00010400\\U00010300xm
1764 // ky\\U00010400|\\U00010300xm
1766 // ky\\U00010400|\\U00010300\\U00010400m
1767 // ky\\U00010400y|\\U00010400m
1768 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1769 "$a {x} > | @ \\U00010400;"
1770 "{$a} [^\\u0000-\\uFFFF] > y;"),
1771 CharsToUnicodeString("kax\\U00010300xm"),
1772 CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1775 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1776 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}");
1778 expectT("Any-Hex/Unicode",
1779 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1780 "U+10330U+10FF00U+E0061U+00A0");
1782 expectT("Any-Hex/C",
1783 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1784 "\\U00010330\\U0010FF00\\U000E0061\\u00A0");
1786 expectT("Any-Hex/Perl",
1787 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1788 "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}");
1790 expectT("Any-Hex/Java",
1791 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1792 "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0");
1794 expectT("Any-Hex/XML",
1795 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1796 "𐌰􏼀󠁡 ");
1798 expectT("Any-Hex/XML10",
1799 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1800 "𐌰􏼀󠁡 ");
1802 expectT("[\\U000E0000-\\U000E0FFF] Remove",
1803 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1804 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1807 void TransliteratorTest::TestQuantifier() {
1809 // Make sure @ in a quantified anteContext works
1810 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1814 // Make sure @ in a quantified postContext works
1815 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1819 // Make sure @ in a quantified postContext with seg ref works
1820 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1824 // Make sure @ past ante context doesn't enter ante context
1825 UTransPosition pos
= {0, 5, 3, 5};
1826 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1831 // Make sure @ past post context doesn't pass limit
1832 UTransPosition pos2
= {0, 4, 0, 2};
1833 expect("{b} a+ > c @@ |; x > y; a > A;",
1838 // Make sure @ past post context doesn't enter post context
1839 expect("{b} a+ > c @@ |; x > y; a > A;",
1843 expect("(ab)? c > d;",
1847 // NOTE: The (ab)+ when referenced just yields a single "ab",
1848 // not the full sequence of them. This accords with perl behavior.
1849 expect("(ab)+ {x} > '(' $1 ')';",
1851 "x ab(ab) abab(ab)y");
1854 "ac abc abbc abbbc",
1857 expect("[abc]+ > x;",
1858 "qac abrc abbcs abtbbc",
1861 expect("q{(ab)+} > x;",
1862 "qa qab qaba qababc qaba",
1863 "qa qx qxa qxc qxa");
1865 expect("q(ab)* > x;",
1866 "qa qab qaba qababc",
1869 // NOTE: The (ab)+ when referenced just yields a single "ab",
1870 // not the full sequence of them. This accords with perl behavior.
1871 expect("q(ab)* > '(' $1 ')';",
1872 "qa qab qaba qababc",
1873 "()a (ab) (ab)a (ab)c");
1875 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1877 expect("'ab'+ > x;",
1881 // $foo+ and $foo* -- the quantifier should apply to the entire
1882 // variable reference
1883 expect("$var = ab; $var+ > x;",
1888 class TestTrans
: public NullTransliterator
{
1890 TestTrans(const UnicodeString
& id
) {
1896 * Test Source-Target/Variant.
1898 void TransliteratorTest::TestSTV(void) {
1899 int32_t ns
= Transliterator::countAvailableSources();
1900 if (ns
< 0 || ns
> 255) {
1901 errln((UnicodeString
)"FAIL: Bad source count: " + ns
);
1905 for (i
=0; i
<ns
; ++i
) {
1906 UnicodeString source
;
1907 Transliterator::getAvailableSource(i
, source
);
1908 logln((UnicodeString
)"" + i
+ ": " + source
);
1909 if (source
.length() == 0) {
1910 errln("FAIL: empty source");
1913 int32_t nt
= Transliterator::countAvailableTargets(source
);
1914 if (nt
< 0 || nt
> 255) {
1915 errln((UnicodeString
)"FAIL: Bad target count: " + nt
);
1918 for (int32_t j
=0; j
<nt
; ++j
) {
1919 UnicodeString target
;
1920 Transliterator::getAvailableTarget(j
, source
, target
);
1921 logln((UnicodeString
)" " + j
+ ": " + target
);
1922 if (target
.length() == 0) {
1923 errln("FAIL: empty target");
1926 int32_t nv
= Transliterator::countAvailableVariants(source
, target
);
1927 if (nv
< 0 || nv
> 255) {
1928 errln((UnicodeString
)"FAIL: Bad variant count: " + nv
);
1931 for (int32_t k
=0; k
<nv
; ++k
) {
1932 UnicodeString variant
;
1933 Transliterator::getAvailableVariant(k
, source
, target
, variant
);
1934 if (variant
.length() == 0) {
1935 logln((UnicodeString
)" " + k
+ ": <empty>");
1937 logln((UnicodeString
)" " + k
+ ": " + variant
);
1943 // Test registration
1944 const char* IDS
[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
1945 const char* FULL_IDS
[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
1946 const char* SOURCES
[] = { NULL
, "Seoridf", "Oewoir" };
1947 for (i
=0; i
<3; ++i
) {
1948 Transliterator
*t
= new TestTrans(IDS
[i
]);
1950 errln("FAIL: out of memory");
1953 if (t
->getID() != IDS
[i
]) {
1954 errln((UnicodeString
)"FAIL: ID mismatch for " + IDS
[i
]);
1958 Transliterator::registerInstance(t
);
1959 UErrorCode status
= U_ZERO_ERROR
;
1960 t
= Transliterator::createInstance(IDS
[i
], UTRANS_FORWARD
, status
);
1962 errln((UnicodeString
)"FAIL: Registration/creation failed for ID " +
1965 logln((UnicodeString
)"Ok: Registration/creation succeeded for ID " +
1969 Transliterator::unregister(IDS
[i
]);
1970 t
= Transliterator::createInstance(IDS
[i
], UTRANS_FORWARD
, status
);
1972 errln((UnicodeString
)"FAIL: Unregistration failed for ID " +
1978 // Make sure getAvailable API reflects removal
1979 int32_t n
= Transliterator::countAvailableIDs();
1980 for (i
=0; i
<n
; ++i
) {
1981 UnicodeString id
= Transliterator::getAvailableID(i
);
1982 for (j
=0; j
<3; ++j
) {
1983 if (id
.caseCompare(FULL_IDS
[j
],0)==0) {
1984 errln((UnicodeString
)"FAIL: unregister(" + id
+ ") failed");
1988 n
= Transliterator::countAvailableTargets("Any");
1989 for (i
=0; i
<n
; ++i
) {
1991 Transliterator::getAvailableTarget(i
, "Any", t
);
1992 if (t
.caseCompare(IDS
[0],0)==0) {
1993 errln((UnicodeString
)"FAIL: unregister(Any-" + t
+ ") failed");
1996 n
= Transliterator::countAvailableSources();
1997 for (i
=0; i
<n
; ++i
) {
1999 Transliterator::getAvailableSource(i
, s
);
2000 for (j
=0; j
<3; ++j
) {
2001 if (SOURCES
[j
] == NULL
) continue;
2002 if (s
.caseCompare(SOURCES
[j
],0)==0) {
2003 errln((UnicodeString
)"FAIL: unregister(" + s
+ "-*) failed");
2010 * Test inverse of Greek-Latin; Title()
2012 void TransliteratorTest::TestCompoundInverse(void) {
2013 UParseError parseError
;
2014 UErrorCode status
= U_ZERO_ERROR
;
2015 Transliterator
*t
= Transliterator::createInstance
2016 ("Greek-Latin; Title()", UTRANS_REVERSE
,parseError
, status
);
2018 errln("FAIL: createInstance");
2021 UnicodeString
exp("(Title);Latin-Greek");
2022 if (t
->getID() == exp
) {
2023 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2026 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2027 t
->getID() + "\", expected \"" + exp
+ "\"");
2033 * Test NFD chaining with RBT
2035 void TransliteratorTest::TestNFDChainRBT() {
2037 UErrorCode ec
= U_ZERO_ERROR
;
2038 Transliterator
* t
= Transliterator::createFromRules(
2039 "TEST", "::NFD; aa > Q; a > q;",
2040 UTRANS_FORWARD
, pe
, ec
);
2041 if (t
== NULL
|| U_FAILURE(ec
)) {
2042 errln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec
));
2045 expect(*t
, "aa", "Q");
2048 // TEMPORARY TESTS -- BEING DEBUGGED
2049 //=- UnicodeString s, s2;
2050 //=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2051 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2052 //=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2053 //=- expect(*t, s, s2);
2056 //=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2057 //=- expect(*t, s2, s);
2060 //=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2061 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2062 //=- expect(*t, s, s);
2065 // const char* source[] = {
2067 // "\\u015Br\\u012Bmad",
2068 // "bhagavadg\\u012Bt\\u0101",
2071 // "vi\\u1E63\\u0101da",
2073 // "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2074 // "uv\\u0101cr\\u0325",
2076 // "rmk\\u1E63\\u0113t",
2077 // //"dharmak\\u1E63\\u0113tr\\u0113",
2079 // "kuruk\\u1E63\\u0113tr\\u0113",
2080 // "samav\\u0113t\\u0101",
2081 // "yuyutsava-\\u1E25",
2082 // "m\\u0101mak\\u0101-\\u1E25",
2083 // // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2085 // "san\\u0304java",
2090 // const char* expected[] = {
2092 // "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2093 // "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2094 // "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2095 // "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2096 // "\\u0935\\u093f\\u0937\\u093e\\u0926",
2097 // "\\u092f\\u094b\\u0917",
2098 // "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2099 // "\\u0909\\u0935\\u093E\\u091A\\u0943",
2102 // //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2104 // "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2105 // "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2106 // "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2107 // "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2108 // // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2109 // "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2110 // "\\u0938\\u0902\\u091c\\u0935",
2114 // UErrorCode status = U_ZERO_ERROR;
2115 // UParseError parseError;
2116 // UnicodeString message;
2117 // Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2118 // Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2119 // if(U_FAILURE(status)){
2120 // errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2121 // errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2122 // delete latinToDevToLatin;
2123 // delete devToLatinToDev;
2126 // UnicodeString gotResult;
2127 // for(int i= 0; source[i] != 0; i++){
2128 // gotResult = source[i];
2129 // expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2130 // expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2132 // delete latinToDevToLatin;
2133 // delete devToLatinToDev;
2137 * Inverse of "Null" should be "Null". (J21)
2139 void TransliteratorTest::TestNullInverse() {
2141 UErrorCode ec
= U_ZERO_ERROR
;
2142 Transliterator
*t
= Transliterator::createInstance("Null", UTRANS_FORWARD
, pe
, ec
);
2143 if (t
== 0 || U_FAILURE(ec
)) {
2144 errln("FAIL: createInstance");
2147 Transliterator
*u
= t
->createInverse(ec
);
2148 if (u
== 0 || U_FAILURE(ec
)) {
2149 errln("FAIL: createInverse");
2153 if (u
->getID() != "Null") {
2154 errln("FAIL: Inverse of Null should be Null");
2161 * Check ID of inverse of alias. (J22)
2163 void TransliteratorTest::TestAliasInverseID() {
2164 UnicodeString
ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2166 UErrorCode ec
= U_ZERO_ERROR
;
2167 Transliterator
*t
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
);
2168 if (t
== 0 || U_FAILURE(ec
)) {
2169 errln("FAIL: createInstance");
2172 Transliterator
*u
= t
->createInverse(ec
);
2173 if (u
== 0 || U_FAILURE(ec
)) {
2174 errln("FAIL: createInverse");
2178 UnicodeString exp
= "Hangul-Latin";
2179 UnicodeString got
= u
->getID();
2181 errln((UnicodeString
)"FAIL: Inverse of " + ID
+ " is " + got
+
2182 ", expected " + exp
);
2189 * Test IDs of inverses of compound transliterators. (J20)
2191 void TransliteratorTest::TestCompoundInverseID() {
2192 UnicodeString ID
= "Latin-Jamo;NFC(NFD)";
2194 UErrorCode ec
= U_ZERO_ERROR
;
2195 Transliterator
*t
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
);
2196 if (t
== 0 || U_FAILURE(ec
)) {
2197 errln("FAIL: createInstance");
2200 Transliterator
*u
= t
->createInverse(ec
);
2201 if (u
== 0 || U_FAILURE(ec
)) {
2202 errln("FAIL: createInverse");
2206 UnicodeString exp
= "NFD(NFC);Jamo-Latin";
2207 UnicodeString got
= u
->getID();
2209 errln((UnicodeString
)"FAIL: Inverse of " + ID
+ " is " + got
+
2210 ", expected " + exp
);
2217 * Test undefined variable.
2220 void TransliteratorTest::TestUndefinedVariable() {
2221 UnicodeString rule
= "$initial } a <> \\u1161;";
2223 UErrorCode ec
= U_ZERO_ERROR
;
2224 Transliterator
*t
= new RuleBasedTransliterator("<ID>", rule
, UTRANS_FORWARD
, 0, pe
, ec
);
2226 if (U_FAILURE(ec
)) {
2227 logln((UnicodeString
)"OK: Got exception for " + rule
+ ", as expected: " +
2231 errln((UnicodeString
)"Fail: bogus rule " + rule
+ " compiled with error " +
2236 * Test empty context.
2238 void TransliteratorTest::TestEmptyContext() {
2239 expect(" { a } > b;", "xay a ", "xby b ");
2243 * Test compound filter ID syntax
2245 void TransliteratorTest::TestCompoundFilterID(void) {
2246 static const char* DATA
[] = {
2247 // Col. 1 = ID or rule set (latter must start with #)
2249 // = columns > 1 are null if expect col. 1 to be illegal =
2251 // Col. 2 = direction, "F..." or "R..."
2252 // Col. 3 = source string
2253 // Col. 4 = exp result
2255 "[abc]; [abc]", NULL
, NULL
, NULL
, // multiple filters
2256 "Latin-Greek; [abc];", NULL
, NULL
, NULL
, // misplaced filter
2257 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2258 "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2259 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2260 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2264 for (int32_t i
=0; DATA
[i
]; i
+=4) {
2265 UnicodeString id
= CharsToUnicodeString(DATA
[i
]);
2266 UTransDirection direction
= (DATA
[i
+1] != NULL
&& DATA
[i
+1][0] == 'R') ?
2267 UTRANS_REVERSE
: UTRANS_FORWARD
;
2268 UnicodeString source
;
2270 if (DATA
[i
+2] != NULL
) {
2271 source
= CharsToUnicodeString(DATA
[i
+2]);
2272 exp
= CharsToUnicodeString(DATA
[i
+3]);
2274 UBool expOk
= (DATA
[i
+1] != NULL
);
2275 Transliterator
* t
= NULL
;
2277 UErrorCode ec
= U_ZERO_ERROR
;
2278 if (id
.charAt(0) == 0x23/*#*/) {
2279 t
= Transliterator::createFromRules("ID", id
, direction
, pe
, ec
);
2281 t
= Transliterator::createInstance(id
, direction
, pe
, ec
);
2283 UBool ok
= (t
!= NULL
&& U_SUCCESS(ec
));
2284 UnicodeString transID
;
2286 transID
= t
->getID();
2289 transID
= UnicodeString("NULL", "");
2292 logln((UnicodeString
)"Ok: " + id
+ " => " + transID
+ ", " +
2294 if (source
.length() != 0) {
2295 expect(*t
, source
, exp
);
2299 errln((UnicodeString
)"FAIL: " + id
+ " => " + transID
+ ", " +
2306 * Test new property set syntax
2308 void TransliteratorTest::TestPropertySet() {
2309 expect("a>A; \\p{Lu}>x; \\p{ANY}>y;", "abcDEF", "Ayyxxx");
2310 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2311 "[ a stitch ]\n[ in time ]\r[ saves 9]");
2315 * Test various failure points of the new 2.0 engine.
2317 void TransliteratorTest::TestNewEngine() {
2319 UErrorCode ec
= U_ZERO_ERROR
;
2320 Transliterator
*t
= Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD
, pe
, ec
);
2321 if (t
== 0 || U_FAILURE(ec
)) {
2322 errln("FAIL: createInstance Latin-Hiragana");
2325 // Katakana should be untouched
2326 expect(*t
, CharsToUnicodeString("a\\u3042\\u30A2"),
2327 CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2332 // This test will only work if Transliterator.ROLLBACK is
2333 // true. Otherwise, this test will fail, revealing a
2334 // limitation of global filters in incremental mode.
2336 Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD
, pe
, ec
);
2338 Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD
, pe
, ec
);
2339 if (U_FAILURE(ec
)) {
2345 Transliterator
* array
[3];
2347 array
[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD
, pe
, ec
);
2349 if (U_FAILURE(ec
)) {
2350 errln("FAIL: createInstance NFD");
2357 t
= new CompoundTransliterator(array
, 3, new UnicodeSet("[:Ll:]", ec
));
2358 if (U_FAILURE(ec
)) {
2359 errln("FAIL: UnicodeSet constructor");
2367 expect(*t
, "aAaA", "bAbA");
2369 assertTrue("countElements", t
->countElements() == 3);
2370 assertEquals("getElement(0)", t
->getElement(0, ec
).getID(), "a_to_A");
2371 assertEquals("getElement(1)", t
->getElement(1, ec
).getID(), "NFD");
2372 assertEquals("getElement(2)", t
->getElement(2, ec
).getID(), "A_to_b");
2373 assertSuccess("getElement", ec
);
2381 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2385 UnicodeString gr
= CharsToUnicodeString(
2387 "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2388 "$rough = \\u0314 ;"
2389 "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2393 expect(gr
, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2397 * Test quantified segment behavior. We want:
2398 * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2400 void TransliteratorTest::TestQuantifiedSegment(void) {
2402 expect("([abc]+) > x $1 x;", "cba", "xcbax");
2404 // The tricky case; the quantifier is around the segment
2405 expect("([abc])+ > x $1 x;", "cba", "xax");
2407 // Tricky case in reverse direction
2408 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2410 // Check post-context segment
2411 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2413 // Test toRule/toPattern for non-quantified segment.
2414 // Careful with spacing here.
2415 UnicodeString
r("([a-c]){q} > x $1 x;");
2417 UErrorCode ec
= U_ZERO_ERROR
;
2418 Transliterator
* t
= Transliterator::createFromRules("ID", r
, UTRANS_FORWARD
, pe
, ec
);
2419 if (U_FAILURE(ec
)) {
2420 errln("FAIL: createFromRules");
2425 t
->toRules(rr
, TRUE
);
2427 errln((UnicodeString
)"FAIL: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2429 logln((UnicodeString
)"Ok: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2433 // Test toRule/toPattern for quantified segment.
2434 // Careful with spacing here.
2435 r
= "([a-c])+{q} > x $1 x;";
2436 t
= Transliterator::createFromRules("ID", r
, UTRANS_FORWARD
, pe
, ec
);
2437 if (U_FAILURE(ec
)) {
2438 errln("FAIL: createFromRules");
2442 t
->toRules(rr
, TRUE
);
2444 errln((UnicodeString
)"FAIL: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2446 logln((UnicodeString
)"Ok: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2451 //======================================================================
2453 //======================================================================
2454 void TransliteratorTest::TestDevanagariLatinRT(){
2455 const int MAX_LEN
= 52;
2456 const char* const source
[MAX_LEN
] = {
2471 //"r\\u0323ya", // \u095c is not valid in Devanagari
2497 "\\u1E6Dh\\u1E6Dha",
2504 // Not roundtrippable --
2505 // \\u0939\\u094d\\u094d\\u092E - hma
2506 // \\u0939\\u094d\\u092E - hma
2507 // CharsToUnicodeString("hma"),
2512 "san\\u0304j\\u012Bb s\\u0113nagupta",
2513 "\\u0101nand vaddir\\u0101ju",
2517 const char* const expected
[MAX_LEN
] = {
2518 "\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */
2519 "\\u0915\\u094D\\u0930", /* kra */
2520 "\\u0915\\u094D\\u0937", /* ks\\u0323a */
2521 "\\u0916\\u094D\\u0930", /* khra */
2522 "\\u0917\\u094D\\u0930", /* gra */
2523 "\\u0919\\u094D\\u0930", /* n\\u0307ra */
2524 "\\u091A\\u094D\\u0930", /* cra */
2525 "\\u091B\\u094D\\u0930", /* chra */
2526 "\\u091C\\u094D\\u091E", /* jn\\u0303a */
2527 "\\u091D\\u094D\\u0930", /* jhra */
2528 "\\u091E\\u094D\\u0930", /* n\\u0303ra */
2529 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2530 "\\u0920\\u094D\\u0930", /* t\\u0323hra */
2531 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2532 //"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari
2533 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2534 "\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */
2535 "\\u0923\\u094D\\u0930", /* n\\u0323ra */
2536 "\\u0924\\u094D\\u0924", /* tta */
2537 "\\u0925\\u094D\\u0930", /* thra */
2538 "\\u0926\\u094D\\u0926", /* dda */
2539 "\\u0927\\u094D\\u0930", /* dhra */
2540 "\\u0928\\u094D\\u0928", /* nna */
2541 "\\u092A\\u094D\\u0930", /* pra */
2542 "\\u092B\\u094D\\u0930", /* phra */
2543 "\\u092C\\u094D\\u0930", /* bra */
2544 "\\u092D\\u094D\\u0930", /* bhra */
2545 "\\u092E\\u094D\\u0930", /* mra */
2546 "\\u0929\\u094D\\u0930", /* n\\u0331ra */
2547 //"\\u0934\\u094D\\u0930", /* l\\u0331ra */
2548 "\\u092F\\u094D\\u0930", /* yra */
2549 "\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */
2551 "\\u0935\\u094D\\u0930", /* vra */
2552 "\\u0936\\u094D\\u0930", /* s\\u0301ra */
2553 "\\u0937\\u094D\\u0930", /* s\\u0323ra */
2554 "\\u0938\\u094D\\u0930", /* sra */
2555 "\\u0939\\u094d\\u092E", /* hma */
2556 "\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */
2557 "\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */
2558 "\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/
2559 "\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */
2560 "\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */
2561 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2562 "\\u0920\\u094D\\u092F", /* t\\u0323hya */
2563 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2564 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2566 "\\u0939\\u094D\\u092F", /* hya */
2567 "\\u0936\\u0943", /* s\\u0301r\\u0325a */
2568 "\\u0936\\u094D\\u091A", /* s\\u0301ca */
2569 "\\u090d", /* e\\u0306 */
2570 "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2571 "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2575 UErrorCode status
= U_ZERO_ERROR
;
2576 UParseError parseError
;
2577 UnicodeString message
;
2578 Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2579 Transliterator
* devToLatin
=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
);
2580 if(U_FAILURE(status
)){
2581 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2582 errln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2585 UnicodeString gotResult
;
2586 for(int i
= 0; i
<MAX_LEN
; i
++){
2587 gotResult
= source
[i
];
2588 expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
]));
2589 expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
]));
2595 void TransliteratorTest::TestTeluguLatinRT(){
2596 const int MAX_LEN
=10;
2597 const char* const source
[MAX_LEN
] = {
2598 "raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */
2599 "\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */
2600 "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */
2601 "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */
2602 "san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */
2603 "amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */
2604 "ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */
2605 "\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */
2606 "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */
2607 "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */
2610 const char* const expected
[MAX_LEN
] = {
2611 "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2612 "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2613 "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2614 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2615 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2616 "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2617 "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2618 "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2619 "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2620 "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2623 UErrorCode status
= U_ZERO_ERROR
;
2624 UParseError parseError
;
2625 UnicodeString message
;
2626 Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD
, parseError
, status
);
2627 Transliterator
* devToLatin
=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD
, parseError
, status
);
2628 if(U_FAILURE(status
)){
2629 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2630 errln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2633 UnicodeString gotResult
;
2634 for(int i
= 0; i
<MAX_LEN
; i
++){
2635 gotResult
= source
[i
];
2636 expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
]));
2637 expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
]));
2643 void TransliteratorTest::TestSanskritLatinRT(){
2644 const int MAX_LEN
=16;
2645 const char* const source
[MAX_LEN
] = {
2646 "rmk\\u1E63\\u0113t",
2647 "\\u015Br\\u012Bmad",
2648 "bhagavadg\\u012Bt\\u0101",
2651 "vi\\u1E63\\u0101da",
2653 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2654 "uv\\u0101cr\\u0325",
2655 "dharmak\\u1E63\\u0113tr\\u0113",
2656 "kuruk\\u1E63\\u0113tr\\u0113",
2657 "samav\\u0113t\\u0101",
2659 "m\\u0101mak\\u0101\\u1E25",
2660 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2664 const char* const expected
[MAX_LEN
] = {
2665 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2666 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2667 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2668 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2669 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2670 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2671 "\\u092f\\u094b\\u0917",
2672 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2673 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2674 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2675 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2676 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2677 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2678 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2679 //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2680 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2681 "\\u0938\\u0902\\u091c\\u0935",
2683 UErrorCode status
= U_ZERO_ERROR
;
2684 UParseError parseError
;
2685 UnicodeString message
;
2686 Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2687 Transliterator
* devToLatin
=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
);
2688 if(U_FAILURE(status
)){
2689 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2690 errln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2693 UnicodeString gotResult
;
2694 for(int i
= 0; i
<MAX_LEN
; i
++){
2695 gotResult
= source
[i
];
2696 expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
]));
2697 expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
]));
2704 void TransliteratorTest::TestCompoundLatinRT(){
2705 const char* const source
[] = {
2706 "rmk\\u1E63\\u0113t",
2707 "\\u015Br\\u012Bmad",
2708 "bhagavadg\\u012Bt\\u0101",
2711 "vi\\u1E63\\u0101da",
2713 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2714 "uv\\u0101cr\\u0325",
2715 "dharmak\\u1E63\\u0113tr\\u0113",
2716 "kuruk\\u1E63\\u0113tr\\u0113",
2717 "samav\\u0113t\\u0101",
2719 "m\\u0101mak\\u0101\\u1E25",
2720 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2724 const int MAX_LEN
= sizeof(source
)/sizeof(source
[0]);
2725 const char* const expected
[MAX_LEN
] = {
2726 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2727 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2728 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2729 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2730 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2731 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2732 "\\u092f\\u094b\\u0917",
2733 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2734 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2735 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2736 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2737 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2738 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2739 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2740 // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2741 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2742 "\\u0938\\u0902\\u091c\\u0935"
2744 if(MAX_LEN
!= sizeof(expected
)/sizeof(expected
[0])) {
2745 errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2749 UErrorCode status
= U_ZERO_ERROR
;
2750 UParseError parseError
;
2751 UnicodeString message
;
2752 Transliterator
* devToLatinToDev
=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2753 Transliterator
* latinToDevToLatin
=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
);
2754 Transliterator
* devToTelToDev
=Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2755 Transliterator
* latinToTelToLatin
=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD
, parseError
, status
);
2757 if(U_FAILURE(status
)){
2758 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2759 errln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2762 UnicodeString gotResult
;
2763 for(int i
= 0; i
<MAX_LEN
; i
++){
2764 gotResult
= source
[i
];
2765 expect(*devToLatinToDev
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(expected
[i
]));
2766 expect(*latinToDevToLatin
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(source
[i
]));
2767 expect(*latinToTelToLatin
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(source
[i
]));
2770 delete(latinToDevToLatin
);
2771 delete(devToLatinToDev
);
2772 delete(devToTelToDev
);
2773 delete(latinToTelToLatin
);
2777 * Test Gurmukhi-Devanagari Tippi and Bindi
2779 void TransliteratorTest::TestGurmukhiDevanagari(){
2781 // (\u0902) (when preceded by vowel) ---> (\u0A02)
2782 // (\u0902) (when preceded by consonant) ---> (\u0A70)
2783 UErrorCode status
= U_ZERO_ERROR
;
2784 UnicodeSet
vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]").unescape(), status
);
2785 UnicodeSet
non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]").unescape(), status
);
2786 UParseError parseError
;
2788 UnicodeSetIterator
vIter(vowel
);
2789 UnicodeSetIterator
nvIter(non_vowel
);
2790 Transliterator
* trans
= Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD
, parseError
, status
);
2791 if(U_FAILURE(status
)) {
2792 errln("Error creating transliterator %s", u_errorName(status
));
2796 UnicodeString
src (" \\u0902");
2797 UnicodeString
expected(" \\u0A02");
2798 src
= src
.unescape();
2799 expected
= expected
.unescape();
2801 while(vIter
.next()){
2802 src
.setCharAt(0,(UChar
) vIter
.getCodepoint());
2803 expected
.setCharAt(0,(UChar
) (vIter
.getCodepoint()+0x0100));
2804 expect(*trans
,src
,expected
);
2807 expected
.setCharAt(1,0x0A70);
2808 while(nvIter
.next()){
2809 //src.setCharAt(0,(char) nvIter.codepoint);
2810 src
.setCharAt(0,(UChar
)nvIter
.getCodepoint());
2811 expected
.setCharAt(0,(UChar
) (nvIter
.getCodepoint()+0x0100));
2812 expect(*trans
,src
,expected
);
2817 * Test instantiation from a locale.
2819 void TransliteratorTest::TestLocaleInstantiation(void) {
2821 UErrorCode ec
= U_ZERO_ERROR
;
2822 Transliterator
*t
= Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD
, pe
, ec
);
2823 if (U_FAILURE(ec
)) {
2824 errln("FAIL: createInstance(ru_RU-Latin)");
2828 expect(*t
, CharsToUnicodeString("\\u0430"), "a");
2831 t
= Transliterator::createInstance("en-el", UTRANS_FORWARD
, pe
, ec
);
2832 if (U_FAILURE(ec
)) {
2833 errln("FAIL: createInstance(en-el)");
2837 expect(*t
, "a", CharsToUnicodeString("\\u03B1"));
2842 * Test title case handling of accent (should ignore accents)
2844 void TransliteratorTest::TestTitleAccents(void) {
2846 UErrorCode ec
= U_ZERO_ERROR
;
2847 Transliterator
*t
= Transliterator::createInstance("Title", UTRANS_FORWARD
, pe
, ec
);
2848 if (U_FAILURE(ec
)) {
2849 errln("FAIL: createInstance(Title)");
2853 expect(*t
, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2858 * Basic test of a locale resource based rule.
2860 void TransliteratorTest::TestLocaleResource() {
2861 const char* DATA
[] = {
2863 //"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0",
2864 "Latin-el", "b", "\\u03bc\\u03c0",
2865 "Latin-Greek", "b", "\\u03B2",
2866 "Greek-Latin/UNGEGN", "\\u03B2", "v",
2867 "el-Latin", "\\u03B2", "v",
2868 "Greek-Latin", "\\u03B2", "b",
2870 const int32_t DATA_length
= sizeof(DATA
) / sizeof(DATA
[0]);
2871 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
2873 UErrorCode ec
= U_ZERO_ERROR
;
2874 Transliterator
*t
= Transliterator::createInstance(DATA
[i
], UTRANS_FORWARD
, pe
, ec
);
2875 if (U_FAILURE(ec
)) {
2876 errln((UnicodeString
)"FAIL: createInstance(" + DATA
[i
] + ")");
2880 expect(*t
, CharsToUnicodeString(DATA
[i
+1]),
2881 CharsToUnicodeString(DATA
[i
+2]));
2887 * Make sure parse errors reference the right line.
2889 void TransliteratorTest::TestParseError() {
2894 UErrorCode ec
= U_ZERO_ERROR
;
2896 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
2898 if (U_FAILURE(ec
)) {
2899 UnicodeString
err(pe
.preContext
);
2900 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
2901 if (err
.indexOf("d << b") >= 0) {
2902 logln("Ok: " + err
);
2904 errln("FAIL: " + err
);
2908 errln("FAIL: no syntax error");
2912 * Make sure sets on output are disallowed.
2914 void TransliteratorTest::TestOutputSet() {
2915 UnicodeString rule
= "$set = [a-cm-n]; b > $set;";
2916 UErrorCode ec
= U_ZERO_ERROR
;
2918 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
2920 if (U_FAILURE(ec
)) {
2921 UnicodeString
err(pe
.preContext
);
2922 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
2923 logln("Ok: " + err
);
2926 errln("FAIL: No syntax error");
2930 * Test the use variable range pragma, making sure that use of
2931 * variable range characters is detected and flagged as an error.
2933 void TransliteratorTest::TestVariableRange() {
2934 UnicodeString rule
= "use variable range 0x70 0x72; a > A; b > B; q > Q;";
2935 UErrorCode ec
= U_ZERO_ERROR
;
2937 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
2939 if (U_FAILURE(ec
)) {
2940 UnicodeString
err(pe
.preContext
);
2941 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
2942 logln("Ok: " + err
);
2945 errln("FAIL: No syntax error");
2949 * Test invalid post context error handling
2951 void TransliteratorTest::TestInvalidPostContext() {
2952 UnicodeString rule
= "a}b{c>d;";
2953 UErrorCode ec
= U_ZERO_ERROR
;
2955 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
2957 if (U_FAILURE(ec
)) {
2958 UnicodeString
err(pe
.preContext
);
2959 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
2960 if (err
.indexOf("a}b{c") >= 0) {
2961 logln("Ok: " + err
);
2963 errln("FAIL: " + err
);
2967 errln("FAIL: No syntax error");
2971 * Test ID form variants
2973 void TransliteratorTest::TestIDForms() {
2974 const char* DATA
[] = {
2976 "nfd", NULL
, "NFC", // make sure case is ignored
2977 "Any-NFKD", NULL
, "Any-NFKC",
2978 "Null", NULL
, "Null",
2979 "-nfkc", "nfkc", "NFKD",
2980 "-nfkc/", "nfkc", "NFKD",
2981 "Latin-Greek/UNGEGN", NULL
, "Greek-Latin/UNGEGN",
2982 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
2983 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
2984 "Source-", NULL
, NULL
,
2985 "Source/Variant-", NULL
, NULL
,
2986 "Source-/Variant", NULL
, NULL
,
2987 "/Variant", NULL
, NULL
,
2988 "/Variant-", NULL
, NULL
,
2989 "-/Variant", NULL
, NULL
,
2994 const int32_t DATA_length
= sizeof(DATA
)/sizeof(DATA
[0]);
2996 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
2997 const char* ID
= DATA
[i
];
2998 const char* expID
= DATA
[i
+1];
2999 const char* expInvID
= DATA
[i
+2];
3000 UBool expValid
= (expInvID
!= NULL
);
3001 if (expID
== NULL
) {
3005 UErrorCode ec
= U_ZERO_ERROR
;
3007 Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
);
3008 if (U_FAILURE(ec
)) {
3010 logln((UnicodeString
)"Ok: getInstance(" + ID
+") => " + u_errorName(ec
));
3012 errln((UnicodeString
)"FAIL: Couldn't create " + ID
);
3017 Transliterator
*u
= t
->createInverse(ec
);
3018 if (U_FAILURE(ec
)) {
3019 errln((UnicodeString
)"FAIL: Couldn't create inverse of " + ID
);
3024 if (t
->getID() == expID
&&
3025 u
->getID() == expInvID
) {
3026 logln((UnicodeString
)"Ok: " + ID
+ ".getInverse() => " + expInvID
);
3028 errln((UnicodeString
)"FAIL: getInstance(" + ID
+ ") => " +
3029 t
->getID() + " x getInverse() => " + u
->getID() +
3030 ", expected " + expInvID
);
3037 static const UChar SPACE
[] = {32,0};
3038 static const UChar NEWLINE
[] = {10,0};
3039 static const UChar RETURN
[] = {13,0};
3040 static const UChar EMPTY
[] = {0};
3042 void TransliteratorTest::checkRules(const UnicodeString
& label
, Transliterator
& t2
,
3043 const UnicodeString
& testRulesForward
) {
3044 UnicodeString rules2
; t2
.toRules(rules2
, TRUE
);
3045 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3046 rules2
.findAndReplace(SPACE
, EMPTY
);
3047 rules2
.findAndReplace(NEWLINE
, EMPTY
);
3048 rules2
.findAndReplace(RETURN
, EMPTY
);
3050 UnicodeString
testRules(testRulesForward
); testRules
.findAndReplace(SPACE
, EMPTY
);
3052 if (rules2
!= testRules
) {
3054 logln((UnicodeString
)"GENERATED RULES: " + rules2
);
3055 logln((UnicodeString
)"SHOULD BE: " + testRulesForward
);
3060 * Mark's toRules test.
3062 void TransliteratorTest::TestToRulesMark() {
3063 const char* testRules
=
3064 "::[[:Latin:][:Mark:]];"
3067 "a <> \\u03B1;" // alpha
3071 "::([[:Greek:][:Mark:]]);"
3073 const char* testRulesForward
=
3074 "::[[:Latin:][:Mark:]];"
3082 const char* testRulesBackward
=
3083 "::[[:Greek:][:Mark:]];"
3090 UnicodeString source
= CharsToUnicodeString("\\u00E1"); // a-acute
3091 UnicodeString target
= CharsToUnicodeString("\\u03AC"); // alpha-acute
3094 UErrorCode ec
= U_ZERO_ERROR
;
3095 Transliterator
*t2
= Transliterator::createFromRules("source-target", testRules
, UTRANS_FORWARD
, pe
, ec
);
3096 Transliterator
*t3
= Transliterator::createFromRules("target-source", testRules
, UTRANS_REVERSE
, pe
, ec
);
3098 if (U_FAILURE(ec
)) {
3101 errln((UnicodeString
)"FAIL: createFromRules => " + u_errorName(ec
));
3105 expect(*t2
, source
, target
);
3106 expect(*t3
, target
, source
);
3108 checkRules("Failed toRules FORWARD", *t2
, testRulesForward
);
3109 checkRules("Failed toRules BACKWARD", *t3
, testRulesBackward
);
3116 * Test Escape and Unescape transliterators.
3118 void TransliteratorTest::TestEscape() {
3124 t
= Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, pe
, ec
);
3125 if (U_FAILURE(ec
)) {
3126 errln((UnicodeString
)"FAIL: createInstance");
3129 "\\x{40}\\U000000312Q",
3135 t
= Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD
, pe
, ec
);
3136 if (U_FAILURE(ec
)) {
3137 errln((UnicodeString
)"FAIL: createInstance");
3140 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3141 "\\u0041\\U0010BEEF\\uFEED");
3146 t
= Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD
, pe
, ec
);
3147 if (U_FAILURE(ec
)) {
3148 errln((UnicodeString
)"FAIL: createInstance");
3151 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3152 "\\u0041\\uDBEF\\uDEEF\\uFEED");
3157 t
= Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD
, pe
, ec
);
3158 if (U_FAILURE(ec
)) {
3159 errln((UnicodeString
)"FAIL: createInstance");
3162 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3163 "\\x{41}\\x{10BEEF}\\x{FEED}");
3169 void TransliteratorTest::TestAnchorMasking(){
3170 UnicodeString
rule ("^a > Q; a > q;");
3171 UErrorCode status
= U_ZERO_ERROR
;
3172 UParseError parseError
;
3174 Transliterator
* t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
,parseError
,status
);
3175 if(U_FAILURE(status
)){
3176 errln(UnicodeString("FAIL: ") + "ID" +
3177 ".createFromRules() => bad rules" +
3178 /*", parse error " + parseError.code +*/
3179 ", line " + parseError
.line
+
3180 ", offset " + parseError
.offset
+
3181 ", context " + prettify(parseError
.preContext
, TRUE
) +
3182 ", rules: " + prettify(rule
, TRUE
));
3188 * Make sure display names of variants look reasonable.
3190 void TransliteratorTest::TestDisplayName() {
3191 #if UCONFIG_NO_FORMATTING
3192 logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3195 static const char* DATA
[] = {
3196 // ID, forward name, reverse name
3197 // Update the text as necessary -- the important thing is
3198 // not the text itself, but how various cases are handled.
3201 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3204 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3207 "NFC", "Any to NFC", "Any to NFD",
3210 int32_t DATA_length
= sizeof(DATA
) / sizeof(DATA
[0]);
3212 Locale
US("en", "US");
3214 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
3216 Transliterator::getDisplayName(DATA
[i
], US
, name
);
3217 if (name
!= DATA
[i
+1]) {
3218 errln((UnicodeString
)"FAIL: " + DATA
[i
] + ".getDisplayName() => " +
3219 name
+ ", expected " + DATA
[i
+1]);
3221 logln((UnicodeString
)"Ok: " + DATA
[i
] + ".getDisplayName() => " + name
);
3223 UErrorCode ec
= U_ZERO_ERROR
;
3225 Transliterator
*t
= Transliterator::createInstance(DATA
[i
], UTRANS_REVERSE
, pe
, ec
);
3226 if (U_FAILURE(ec
)) {
3228 errln("FAIL: createInstance failed");
3231 name
= Transliterator::getDisplayName(t
->getID(), US
, name
);
3232 if (name
!= DATA
[i
+2]) {
3233 errln((UnicodeString
)"FAIL: " + t
->getID() + ".getDisplayName() => " +
3234 name
+ ", expected " + DATA
[i
+2]);
3236 logln((UnicodeString
)"Ok: " + t
->getID() + ".getDisplayName() => " + name
);
3243 void TransliteratorTest::TestSpecialCases(void) {
3244 const UnicodeString registerRules
[] = {
3245 "Any-Dev1", "x > X; y > Y;",
3246 "Any-Dev2", "XY > Z",
3248 CharsToUnicodeString
3249 ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3253 const UnicodeString testCases
[] = {
3255 // should add more test cases
3256 "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3257 "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3258 "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3259 "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3262 "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3263 "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3265 // check for devanagari bug
3266 "nfd;Dev1;Dev2;nfc", "xy", "Z",
3268 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3269 "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
,
3270 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE
+ DESERET_dee
,
3272 //TODO: enable this test once Titlecase works right
3274 "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3275 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3277 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
,
3278 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE
+ DESERET_DEE
,
3279 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
,
3280 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee
+ DESERET_dee
,
3282 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
, "",
3283 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
, "",
3286 "Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3287 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3288 "Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3289 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3290 "Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3291 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3292 "Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3293 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3295 // Upper: TAT\\u02B9\\u00C2NA
3296 // Lower: tat\\u02B9\\u00E2na
3297 // Title: Tat\\u02B9\\u00E2na
3298 "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3299 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3300 "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3301 CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3302 "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3303 CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3310 for (i
= 0; registerRules
[i
].length()!=0; i
+=2) {
3311 UErrorCode status
= U_ZERO_ERROR
;
3313 Transliterator
*t
= Transliterator::createFromRules(registerRules
[0+i
],
3314 registerRules
[i
+1], UTRANS_FORWARD
, pos
, status
);
3315 if (U_FAILURE(status
)) {
3316 errln("Fails: Unable to create the transliterator from rules.");
3318 Transliterator::registerInstance(t
);
3321 for (i
= 0; testCases
[i
].length()!=0; i
+=3) {
3322 UErrorCode ec
= U_ZERO_ERROR
;
3324 const UnicodeString
& name
= testCases
[i
];
3325 Transliterator
*t
= Transliterator::createInstance(name
, UTRANS_FORWARD
, pe
, ec
);
3326 if (U_FAILURE(ec
)) {
3327 errln((UnicodeString
)"FAIL: Couldn't create " + name
);
3331 const UnicodeString
& id
= t
->getID();
3332 const UnicodeString
& source
= testCases
[i
+1];
3333 UnicodeString target
;
3335 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3337 if (testCases
[i
+2].length() > 0) {
3338 target
= testCases
[i
+2];
3339 } else if (0==id
.caseCompare("NFD", U_FOLD_CASE_DEFAULT
)) {
3340 Normalizer::normalize(source
, UNORM_NFD
, 0, target
, ec
);
3341 } else if (0==id
.caseCompare("NFC", U_FOLD_CASE_DEFAULT
)) {
3342 Normalizer::normalize(source
, UNORM_NFC
, 0, target
, ec
);
3343 } else if (0==id
.caseCompare("NFKD", U_FOLD_CASE_DEFAULT
)) {
3344 Normalizer::normalize(source
, UNORM_NFKD
, 0, target
, ec
);
3345 } else if (0==id
.caseCompare("NFKC", U_FOLD_CASE_DEFAULT
)) {
3346 Normalizer::normalize(source
, UNORM_NFKC
, 0, target
, ec
);
3347 } else if (0==id
.caseCompare("Lower", U_FOLD_CASE_DEFAULT
)) {
3349 target
.toLower(Locale::getUS());
3350 } else if (0==id
.caseCompare("Upper", U_FOLD_CASE_DEFAULT
)) {
3352 target
.toUpper(Locale::getUS());
3354 if (U_FAILURE(ec
)) {
3355 errln((UnicodeString
)"FAIL: Internal error normalizing " + source
);
3359 expect(*t
, source
, target
);
3362 for (i
= 0; registerRules
[i
].length()!=0; i
+=2) {
3363 Transliterator::unregister(registerRules
[i
]);
3367 char* Char32ToEscapedChars(UChar32 ch
, char* buffer
) {
3369 sprintf(buffer
, "\\u%04x", (int)ch
);
3371 sprintf(buffer
, "\\U%08x", (int)ch
);
3376 void TransliteratorTest::TestSurrogateCasing (void) {
3377 // check that casing handles surrogates
3378 // titlecase is currently defective
3382 UTF_GET_CHAR(DESERET_dee
,0, 0, DESERET_dee
.length(), dee
);
3383 UnicodeString
DEE(u_totitle(dee
));
3384 if (DEE
!= DESERET_DEE
) {
3385 err("Fails titlecase of surrogates");
3386 err(Char32ToEscapedChars(dee
, buffer
));
3388 errln(Char32ToEscapedChars(DEE
.char32At(0), buffer
));
3391 UnicodeString deeDEETest
=DESERET_dee
+ DESERET_DEE
;
3392 UnicodeString deedeeTest
= DESERET_dee
+ DESERET_dee
;
3393 UnicodeString DEEDEETest
= DESERET_DEE
+ DESERET_DEE
;
3394 UErrorCode status
= U_ZERO_ERROR
;
3396 u_strToUpper(buffer2
, 20, deeDEETest
.getBuffer(), deeDEETest
.length(), NULL
, &status
);
3397 if (U_FAILURE(status
) || (UnicodeString(buffer2
)!= DEEDEETest
)) {
3398 errln("Fails: Can't uppercase surrogates.");
3401 status
= U_ZERO_ERROR
;
3402 u_strToLower(buffer2
, 20, deeDEETest
.getBuffer(), deeDEETest
.length(), NULL
, &status
);
3403 if (U_FAILURE(status
) || (UnicodeString(buffer2
)!= deedeeTest
)) {
3404 errln("Fails: Can't lowercase surrogates.");
3408 static void _trans(Transliterator
& t
, const UnicodeString
& src
,
3409 UnicodeString
& result
) {
3411 t
.transliterate(result
);
3414 static void _trans(const UnicodeString
& id
, const UnicodeString
& src
,
3415 UnicodeString
& result
, UErrorCode ec
) {
3417 Transliterator
*t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, pe
, ec
);
3418 if (U_SUCCESS(ec
)) {
3419 _trans(*t
, src
, result
);
3424 static UnicodeString
_findMatch(const UnicodeString
& source
,
3425 const UnicodeString
* pairs
) {
3426 UnicodeString empty
;
3427 for (int32_t i
=0; pairs
[i
].length() > 0; i
+=2) {
3428 if (0==source
.caseCompare(pairs
[i
], U_FOLD_CASE_DEFAULT
)) {
3435 // Check to see that incremental gets at least part way through a reasonable string.
3437 void TransliteratorTest::TestIncrementalProgress(void) {
3438 UErrorCode ec
= U_ZERO_ERROR
;
3439 UnicodeString latinTest
= "The Quick Brown Fox.";
3440 UnicodeString devaTest
;
3441 _trans("Latin-Devanagari", latinTest
, devaTest
, ec
);
3442 UnicodeString kataTest
;
3443 _trans("Latin-Katakana", latinTest
, kataTest
, ec
);
3444 if (U_FAILURE(ec
)) {
3445 errln("FAIL: Internal error");
3448 const UnicodeString tests
[] = {
3451 "Halfwidth", latinTest
,
3452 "Devanagari", devaTest
,
3453 "Katakana", kataTest
,
3457 UnicodeString
test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3458 int32_t i
= 0, j
=0, k
=0;
3459 int32_t sources
= Transliterator::countAvailableSources();
3460 for (i
= 0; i
< sources
; i
++) {
3461 UnicodeString source
;
3462 Transliterator::getAvailableSource(i
, source
);
3463 UnicodeString test
= _findMatch(source
, tests
);
3464 if (test
.length() == 0) {
3465 logln((UnicodeString
)"Skipping " + source
+ "-X");
3468 int32_t targets
= Transliterator::countAvailableTargets(source
);
3469 for (j
= 0; j
< targets
; j
++) {
3470 UnicodeString target
;
3471 Transliterator::getAvailableTarget(j
, source
, target
);
3472 int32_t variants
= Transliterator::countAvailableVariants(source
, target
);
3473 for (k
=0; k
< variants
; k
++) {
3474 UnicodeString variant
;
3476 UErrorCode status
= U_ZERO_ERROR
;
3478 Transliterator::getAvailableVariant(k
, source
, target
, variant
);
3479 UnicodeString id
= source
+ "-" + target
+ "/" + variant
;
3481 if(id
.indexOf("Thai")>-1 && isICUVersionAtLeast(ICU_31
)){
3484 Transliterator
*t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, err
, status
);
3485 if (U_FAILURE(status
)) {
3486 errln((UnicodeString
)"FAIL: Could not create " + id
);
3490 status
= U_ZERO_ERROR
;
3491 CheckIncrementalAux(t
, test
);
3494 _trans(*t
, test
, rev
);
3495 Transliterator
*inv
= t
->createInverse(status
);
3496 if (U_FAILURE(status
)) {
3497 errln((UnicodeString
)"FAIL: Could not create inverse of " + id
);
3502 CheckIncrementalAux(inv
, rev
);
3510 void TransliteratorTest::CheckIncrementalAux(const Transliterator
* t
,
3511 const UnicodeString
& input
) {
3512 UErrorCode ec
= U_ZERO_ERROR
;
3514 UnicodeString test
= input
;
3516 pos
.contextStart
= 0;
3517 pos
.contextLimit
= input
.length();
3519 pos
.limit
= input
.length();
3521 t
->transliterate(test
, pos
, ec
);
3522 if (U_FAILURE(ec
)) {
3523 errln((UnicodeString
)"FAIL: transliterate() error " + u_errorName(ec
));
3526 UBool gotError
= FALSE
;
3528 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3530 if (pos
.start
== 0 && pos
.limit
!= 0 && t
->getID() != "Hex-Any/Unicode") {
3531 errln((UnicodeString
)"No Progress, " +
3532 t
->getID() + ": " + formatInput(test
, input
, pos
));
3535 logln((UnicodeString
)"PASS Progress, " +
3536 t
->getID() + ": " + formatInput(test
, input
, pos
));
3538 t
->finishTransliteration(test
, pos
);
3539 if (pos
.start
!= pos
.limit
) {
3540 errln((UnicodeString
)"Incomplete, " +
3541 t
->getID() + ": " + formatInput(test
, input
, pos
));
3546 void TransliteratorTest::TestFunction() {
3547 // Careful with spacing and ';' here: Phrase this exactly
3548 // as toRules() is going to return it. If toRules() changes
3549 // with regard to spacing or ';', then adjust this string.
3550 UnicodeString rule
=
3551 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3554 UErrorCode ec
= U_ZERO_ERROR
;
3555 Transliterator
*t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3557 errln("FAIL: createFromRules failed");
3562 t
->toRules(r
, TRUE
);
3564 logln((UnicodeString
)"OK: toRules() => " + r
);
3566 errln((UnicodeString
)"FAIL: toRules() => " + r
+
3567 ", expected " + rule
);
3570 expect(*t
, "The Quick Brown Fox",
3571 "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox");
3576 void TransliteratorTest::TestInvalidBackRef(void) {
3577 UnicodeString rule
= ". > $1;";
3578 UnicodeString rule2
=CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3580 UErrorCode ec
= U_ZERO_ERROR
;
3581 Transliterator
*t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3582 Transliterator
*t2
= Transliterator::createFromRules("Test2", rule2
, UTRANS_FORWARD
, pe
, ec
);
3585 errln("FAIL: createFromRules should have returned NULL");
3590 errln("FAIL: createFromRules should have returned NULL");
3594 if (U_SUCCESS(ec
)) {
3595 errln("FAIL: Ok: . > $1; => no error");
3597 logln((UnicodeString
)"Ok: . > $1; => " + u_errorName(ec
));
3601 void TransliteratorTest::TestMulticharStringSet() {
3608 " e } [{fg}] > r;" ;
3611 UErrorCode ec
= U_ZERO_ERROR
;
3612 Transliterator
* t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3613 if (t
== NULL
|| U_FAILURE(ec
)) {
3615 errln("FAIL: createFromRules failed");
3619 expect(*t
, "a aa ab bc d gd de gde gdefg ddefg",
3620 "y x yz z d gd de gdq gdqfg ddrfg");
3623 // Overlapped string test. Make sure that when multiple
3624 // strings can match that the longest one is matched.
3626 " [a {ab} {abc}] > x;"
3629 " q [t {st} {rst}] { e > p;" ;
3631 t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3632 if (t
== NULL
|| U_FAILURE(ec
)) {
3634 errln("FAIL: createFromRules failed");
3638 expect(*t
, "a ab abc qte qste qrste",
3639 "x x x qtp qstp qrstp");
3643 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3644 // BEGIN TestUserFunction support factory
3646 Transliterator
* _TUFF
[4];
3647 UnicodeString
* _TUFID
[4];
3649 static Transliterator
* U_EXPORT2
_TUFFactory(const UnicodeString
& /*ID*/,
3650 Transliterator::Token context
) {
3651 return _TUFF
[context
.integer
]->clone();
3654 static void _TUFReg(const UnicodeString
& ID
, Transliterator
* t
, int32_t n
) {
3656 _TUFID
[n
] = new UnicodeString(ID
);
3657 Transliterator::registerFactory(ID
, _TUFFactory
, Transliterator::integerToken(n
));
3660 static void _TUFUnreg(int32_t n
) {
3661 if (_TUFF
[n
] != NULL
) {
3662 Transliterator::unregister(*_TUFID
[n
]);
3668 // END TestUserFunction support factory
3669 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3672 * Test that user-registered transliterators can be used under function
3675 void TransliteratorTest::TestUserFunction() {
3679 UErrorCode ec
= U_ZERO_ERROR
;
3681 // Setup our factory
3683 for (i
=0; i
<4; ++i
) {
3687 // There's no need to register inverses if we don't use them
3688 t
= Transliterator::createFromRules("gif",
3689 "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",
3690 UTRANS_FORWARD
, pe
, ec
);
3691 if (t
== NULL
|| U_FAILURE(ec
)) {
3692 errln((UnicodeString
)"FAIL: createFromRules gif " + u_errorName(ec
));
3695 _TUFReg("Any-gif", t
, 0);
3697 t
= Transliterator::createFromRules("RemoveCurly",
3698 "[\\{\\}] > ; '\\N' > ;",
3699 UTRANS_FORWARD
, pe
, ec
);
3700 if (t
== NULL
|| U_FAILURE(ec
)) {
3701 errln((UnicodeString
)"FAIL: createFromRules RemoveCurly " + u_errorName(ec
));
3704 expect(*t
, "\\N{name}", "name");
3705 _TUFReg("Any-RemoveCurly", t
, 1);
3707 logln("Trying &hex");
3708 t
= Transliterator::createFromRules("hex2",
3710 UTRANS_FORWARD
, pe
, ec
);
3711 if (t
== NULL
|| U_FAILURE(ec
)) {
3712 errln("FAIL: createFromRules");
3715 logln("Registering");
3716 _TUFReg("Any-hex2", t
, 2);
3717 t
= Transliterator::createInstance("Any-hex2", UTRANS_FORWARD
, ec
);
3718 if (t
== NULL
|| U_FAILURE(ec
)) {
3719 errln((UnicodeString
)"FAIL: createInstance Any-hex2 " + u_errorName(ec
));
3722 expect(*t
, "abc", "\\u0061\\u0062\\u0063");
3725 logln("Trying &gif");
3726 t
= Transliterator::createFromRules("gif2",
3727 "(.) > &Gif(&Hex2($1));",
3728 UTRANS_FORWARD
, pe
, ec
);
3729 if (t
== NULL
|| U_FAILURE(ec
)) {
3730 errln((UnicodeString
)"FAIL: createFromRules gif2 " + u_errorName(ec
));
3733 logln("Registering");
3734 _TUFReg("Any-gif2", t
, 3);
3735 t
= Transliterator::createInstance("Any-gif2", UTRANS_FORWARD
, ec
);
3736 if (t
== NULL
|| U_FAILURE(ec
)) {
3737 errln((UnicodeString
)"FAIL: createInstance Any-gif2 " + u_errorName(ec
));
3740 expect(*t
, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3741 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3744 // Test that filters are allowed after &
3745 t
= Transliterator::createFromRules("test",
3746 "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3747 UTRANS_FORWARD
, pe
, ec
);
3748 if (t
== NULL
|| U_FAILURE(ec
)) {
3749 errln((UnicodeString
)"FAIL: createFromRules test " + u_errorName(ec
));
3753 "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");
3757 for (i
=0; i
<4; ++i
) {
3763 * Test the Any-X transliterators.
3765 void TransliteratorTest::TestAnyX(void) {
3766 UParseError parseError
;
3767 UErrorCode status
= U_ZERO_ERROR
;
3768 Transliterator
* anyLatin
=
3769 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD
, parseError
, status
);
3771 errln("FAIL: createInstance returned NULL");
3777 CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3778 CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3784 * Test the source and target set API. These are only implemented
3785 * for RBT and CompoundTransliterator at this time.
3787 void TransliteratorTest::TestSourceTargetSet() {
3788 UErrorCode ec
= U_ZERO_ERROR
;
3796 UnicodeSet
expSrc("[arx{lu}]", ec
);
3799 UnicodeSet
expTrg("[bq]", ec
);
3802 Transliterator
* t
= Transliterator::createFromRules("test", r
, UTRANS_FORWARD
, pe
, ec
);
3804 if (U_FAILURE(ec
)) {
3806 errln("FAIL: Couldn't set up test");
3810 UnicodeSet src
; t
->getSourceSet(src
);
3811 UnicodeSet trg
; t
->getTargetSet(trg
);
3813 if (src
== expSrc
&& trg
== expTrg
) {
3815 logln((UnicodeString
)"Ok: " +
3816 r
+ " => source = " + src
.toPattern(a
, TRUE
) +
3817 ", target = " + trg
.toPattern(b
, TRUE
));
3819 UnicodeString a
, b
, c
, d
;
3820 errln((UnicodeString
)"FAIL: " +
3821 r
+ " => source = " + src
.toPattern(a
, TRUE
) +
3822 ", expected " + expSrc
.toPattern(b
, TRUE
) +
3823 "; target = " + trg
.toPattern(c
, TRUE
) +
3824 ", expected " + expTrg
.toPattern(d
, TRUE
));
3831 * Test handling of rule whitespace, for both RBT and UnicodeSet.
3833 void TransliteratorTest::TestRuleWhitespace() {
3835 const char* r
= "a > \\u200E b;";
3837 UErrorCode ec
= U_ZERO_ERROR
;
3839 Transliterator
* t
= Transliterator::createFromRules("test", CharsToUnicodeString(r
), UTRANS_FORWARD
, pe
, ec
);
3841 if (U_FAILURE(ec
)) {
3842 errln("FAIL: Couldn't set up test");
3844 expect(*t
, "a", "b");
3850 UnicodeSet
set(CharsToUnicodeString("[a \\u200E]"), ec
);
3852 if (U_FAILURE(ec
)) {
3853 errln("FAIL: Couldn't set up test");
3855 if (set
.contains(0x200E)) {
3856 errln("FAIL: U+200E not being ignored by UnicodeSet");
3860 //======================================================================
3861 // this method is in TestUScript.java
3862 //======================================================================
3863 void TransliteratorTest::TestAllCodepoints(){
3864 UScriptCode code
= USCRIPT_INVALID_CODE
;
3865 char id
[256]={'\0'};
3866 char abbr
[256]={'\0'};
3867 char newId
[256]={'\0'};
3868 char newAbbrId
[256]={'\0'};
3869 char oldId
[256]={'\0'};
3870 char oldAbbrId
[256]={'\0'};
3872 UErrorCode status
=U_ZERO_ERROR
;
3875 for(uint32_t i
= 0; i
<=0x10ffff; i
++){
3876 code
= uscript_getScript(i
,&status
);
3877 if(code
== USCRIPT_INVALID_CODE
){
3878 errln("uscript_getScript for codepoint \\U%08X failed.\n", i
);
3880 const char* myId
= uscript_getName(code
);
3882 errln("Valid script code returned NULL name. Check your data!");
3885 uprv_strcpy(id
,myId
);
3886 uprv_strcpy(abbr
,uscript_getShortName(code
));
3888 uprv_strcpy(newId
,"[:");
3889 uprv_strcat(newId
,id
);
3890 uprv_strcat(newId
,":];NFD");
3892 uprv_strcpy(newAbbrId
,"[:");
3893 uprv_strcat(newAbbrId
,abbr
);
3894 uprv_strcat(newAbbrId
,":];NFD");
3896 if(uprv_strcmp(newId
,oldId
)!=0){
3897 Transliterator
* t
= Transliterator::createInstance(newId
,UTRANS_FORWARD
,pe
,status
);
3898 if(t
==NULL
|| U_FAILURE(status
)){
3899 errln((UnicodeString
)"FAIL: Could not create " + id
);
3903 if(uprv_strcmp(newAbbrId
,oldAbbrId
)!=0){
3904 Transliterator
* t
= Transliterator::createInstance(newAbbrId
,UTRANS_FORWARD
,pe
,status
);
3905 if(t
==NULL
|| U_FAILURE(status
)){
3906 errln((UnicodeString
)"FAIL: Could not create " + id
);
3910 uprv_strcpy(oldId
,newId
);
3911 uprv_strcpy(oldAbbrId
, newAbbrId
);
3917 #define TEST_TRANSLIT_ID(id, cls) { \
3918 UErrorCode ec = U_ZERO_ERROR; \
3919 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
3920 if (U_FAILURE(ec)) { \
3921 errln("FAIL: Couldn't create " id); \
3923 if (t->getDynamicClassID() != cls::getStaticClassID()) { \
3924 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
3926 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
3931 #define TEST_TRANSLIT_RULE(rule, cls) { \
3932 UErrorCode ec = U_ZERO_ERROR; \
3934 Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
3935 if (U_FAILURE(ec)) { \
3936 errln("FAIL: Couldn't create " rule); \
3938 if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
3939 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
3941 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
3946 void TransliteratorTest::TestBoilerplate() {
3947 TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator
);
3948 TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator
);
3949 TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator
);
3950 TEST_TRANSLIT_ID("Lower", LowercaseTransliterator
);
3951 TEST_TRANSLIT_ID("Upper", UppercaseTransliterator
);
3952 TEST_TRANSLIT_ID("Title", TitlecaseTransliterator
);
3953 TEST_TRANSLIT_ID("Null", NullTransliterator
);
3954 TEST_TRANSLIT_ID("Remove", RemoveTransliterator
);
3955 TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator
);
3956 TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator
);
3957 TEST_TRANSLIT_ID("NFD", NormalizationTransliterator
);
3958 TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator
);
3959 TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator
);
3962 void TransliteratorTest::TestAlternateSyntax() {
3967 expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
3970 expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
3971 CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
3972 "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}");
3975 //======================================================================
3977 //======================================================================
3978 void TransliteratorTest::expectT(const UnicodeString
& id
,
3979 const UnicodeString
& source
,
3980 const UnicodeString
& expectedResult
) {
3981 UErrorCode ec
= U_ZERO_ERROR
;
3983 Transliterator
*t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, pe
, ec
);
3984 if (U_FAILURE(ec
)) {
3985 errln((UnicodeString
)"FAIL: Could not create " + id
);
3989 expect(*t
, source
, expectedResult
);
3993 void TransliteratorTest::expect(const UnicodeString
& rules
,
3994 const UnicodeString
& source
,
3995 const UnicodeString
& expectedResult
,
3996 UTransPosition
*pos
) {
3997 UErrorCode status
= U_ZERO_ERROR
;
3998 Transliterator
*t
= new RuleBasedTransliterator("<ID>", rules
, status
);
3999 if (U_FAILURE(status
)) {
4000 errln("FAIL: Transliterator constructor failed");
4002 expect(*t
, source
, expectedResult
, pos
);
4007 void TransliteratorTest::expect(const Transliterator
& t
,
4008 const UnicodeString
& source
,
4009 const UnicodeString
& expectedResult
,
4010 const Transliterator
& reverseTransliterator
) {
4011 expect(t
, source
, expectedResult
);
4012 expect(reverseTransliterator
, expectedResult
, source
);
4015 void TransliteratorTest::expect(const Transliterator
& t
,
4016 const UnicodeString
& source
,
4017 const UnicodeString
& expectedResult
,
4018 UTransPosition
*pos
) {
4020 UnicodeString
result(source
);
4021 t
.transliterate(result
);
4022 expectAux(t
.getID() + ":String", source
, result
, expectedResult
);
4025 UTransPosition index
={0, 0, 0, 0};
4030 UnicodeString
rsource(source
);
4032 t
.transliterate(rsource
);
4034 // Do it all at once -- below we do it incrementally
4035 t
.finishTransliteration(rsource
, *pos
);
4037 expectAux(t
.getID() + ":Replaceable", source
, rsource
, expectedResult
);
4039 // Test keyboard (incremental) transliteration -- this result
4040 // must be the same after we finalize (see below).
4045 formatInput(log
, rsource
, index
);
4047 UErrorCode status
= U_ZERO_ERROR
;
4048 t
.transliterate(rsource
, index
, status
);
4049 formatInput(log
, rsource
, index
);
4051 for (int32_t i
=0; i
<source
.length(); ++i
) {
4055 log
.append(source
.charAt(i
)).append(" -> ");
4056 UErrorCode status
= U_ZERO_ERROR
;
4057 t
.transliterate(rsource
, index
, source
.charAt(i
), status
);
4058 formatInput(log
, rsource
, index
);
4062 // As a final step in keyboard transliteration, we must call
4063 // transliterate to finish off any pending partial matches that
4064 // were waiting for more input.
4065 t
.finishTransliteration(rsource
, index
);
4066 log
.append(" => ").append(rsource
);
4068 expectAux(t
.getID() + ":Keyboard", log
,
4069 rsource
== expectedResult
,
4075 * @param appendTo result is appended to this param.
4076 * @param input the string being transliterated
4077 * @param pos the index struct
4079 UnicodeString
& TransliteratorTest::formatInput(UnicodeString
&appendTo
,
4080 const UnicodeString
& input
,
4081 const UTransPosition
& pos
) {
4082 // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4083 // the {} indicate the context start and limit, and the ||
4084 // indicate the start and limit.
4085 if (0 <= pos
.contextStart
&&
4086 pos
.contextStart
<= pos
.start
&&
4087 pos
.start
<= pos
.limit
&&
4088 pos
.limit
<= pos
.contextLimit
&&
4089 pos
.contextLimit
<= input
.length()) {
4091 UnicodeString a
, b
, c
, d
, e
;
4092 input
.extractBetween(0, pos
.contextStart
, a
);
4093 input
.extractBetween(pos
.contextStart
, pos
.start
, b
);
4094 input
.extractBetween(pos
.start
, pos
.limit
, c
);
4095 input
.extractBetween(pos
.limit
, pos
.contextLimit
, d
);
4096 input
.extractBetween(pos
.contextLimit
, input
.length(), e
);
4097 appendTo
.append(a
).append((UChar
)123/*{*/).append(b
).
4098 append((UChar
)PIPE
).append(c
).append((UChar
)PIPE
).append(d
).
4099 append((UChar
)125/*}*/).append(e
);
4101 appendTo
.append((UnicodeString
)"INVALID UTransPosition {cs=" +
4102 pos
.contextStart
+ ", s=" + pos
.start
+ ", l=" +
4103 pos
.limit
+ ", cl=" + pos
.contextLimit
+ "} on " +
4109 void TransliteratorTest::expectAux(const UnicodeString
& tag
,
4110 const UnicodeString
& source
,
4111 const UnicodeString
& result
,
4112 const UnicodeString
& expectedResult
) {
4113 expectAux(tag
, source
+ " -> " + result
,
4114 result
== expectedResult
,
4118 void TransliteratorTest::expectAux(const UnicodeString
& tag
,
4119 const UnicodeString
& summary
, UBool pass
,
4120 const UnicodeString
& expectedResult
) {
4122 logln(UnicodeString("(")+tag
+") " + prettify(summary
));
4124 errln(UnicodeString("FAIL: (")+tag
+") "
4126 + ", expected " + prettify(expectedResult
));
4130 #endif /* #if !UCONFIG_NO_TRANSLITERATION */