1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (C) 1999-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 * Date Name Description
9 * 11/10/99 aliu Creation.
10 **********************************************************************
13 #include "unicode/utypes.h"
15 #if !UCONFIG_NO_TRANSLITERATION
18 #include "unicode/locid.h"
19 #include "unicode/dtfmtsym.h"
20 #include "unicode/normlzr.h"
21 #include "unicode/translit.h"
22 #include "unicode/uchar.h"
23 #include "unicode/unifilt.h"
24 #include "unicode/uniset.h"
25 #include "unicode/ustring.h"
26 #include "unicode/usetiter.h"
27 #include "unicode/uscript.h"
28 #include "unicode/utf16.h"
47 /***********************************************************************
49 HOW TO USE THIS TEST FILE
51 How I developed on two platforms
52 without losing (too much of) my mind
55 1. Add new tests by copying/pasting/changing existing tests. On Java,
56 any public void method named Test...() taking no parameters becomes
57 a test. On C++, you need to modify the header and add a line to
58 the runIndexedTest() dispatch method.
60 2. Make liberal use of the expect() method; it is your friend.
62 3. The tests in this file exactly match those in a sister file on the
63 other side. The two files are:
65 icu4j: src/com/ibm/test/translit/TransliteratorTest.java
66 icu4c: source/test/intltest/transtst.cpp
68 ==> THIS IS THE IMPORTANT PART <==
70 When you add a test in this file, add it in TransliteratorTest.java
71 too. Give it the same name and put it in the same relative place.
72 This makes maintenance a lot simpler for any poor soul who ends up
73 trying to synchronize the tests between icu4j and icu4c.
75 4. If you MUST enter a test that is NOT paralleled in the sister file,
76 then add it in the special non-mirrored section. These are
85 Make sure you document the reason the test is here and not there.
90 ***********************************************************************/
92 // Define character constants thusly to be EBCDIC-friendly
94 LEFT_BRACE
=((UChar
)0x007B), /*{*/
95 PIPE
=((UChar
)0x007C), /*|*/
96 ZERO
=((UChar
)0x0030), /*0*/
97 UPPER_A
=((UChar
)0x0041) /*A*/
100 TransliteratorTest::TransliteratorTest()
101 : DESERET_DEE((UChar32
)0x10414),
102 DESERET_dee((UChar32
)0x1043C)
106 TransliteratorTest::~TransliteratorTest() {}
109 TransliteratorTest::runIndexedTest(int32_t index
, UBool exec
,
110 const char* &name
, char* /*par*/) {
112 TESTCASE(0,TestInstantiation
);
113 TESTCASE(1,TestSimpleRules
);
114 TESTCASE(2,TestRuleBasedInverse
);
115 TESTCASE(3,TestKeyboard
);
116 TESTCASE(4,TestKeyboard2
);
117 TESTCASE(5,TestKeyboard3
);
118 TESTCASE(6,TestArabic
);
119 TESTCASE(7,TestCompoundKana
);
120 TESTCASE(8,TestCompoundHex
);
121 TESTCASE(9,TestFiltering
);
122 TESTCASE(10,TestInlineSet
);
123 TESTCASE(11,TestPatternQuoting
);
124 TESTCASE(12,TestJ277
);
125 TESTCASE(13,TestJ243
);
126 TESTCASE(14,TestJ329
);
127 TESTCASE(15,TestSegments
);
128 TESTCASE(16,TestCursorOffset
);
129 TESTCASE(17,TestArbitraryVariableValues
);
130 TESTCASE(18,TestPositionHandling
);
131 TESTCASE(19,TestHiraganaKatakana
);
132 TESTCASE(20,TestCopyJ476
);
133 TESTCASE(21,TestAnchors
);
134 TESTCASE(22,TestInterIndic
);
135 TESTCASE(23,TestFilterIDs
);
136 TESTCASE(24,TestCaseMap
);
137 TESTCASE(25,TestNameMap
);
138 TESTCASE(26,TestLiberalizedID
);
139 TESTCASE(27,TestCreateInstance
);
140 TESTCASE(28,TestNormalizationTransliterator
);
141 TESTCASE(29,TestCompoundRBT
);
142 TESTCASE(30,TestCompoundFilter
);
143 TESTCASE(31,TestRemove
);
144 TESTCASE(32,TestToRules
);
145 TESTCASE(33,TestContext
);
146 TESTCASE(34,TestSupplemental
);
147 TESTCASE(35,TestQuantifier
);
148 TESTCASE(36,TestSTV
);
149 TESTCASE(37,TestCompoundInverse
);
150 TESTCASE(38,TestNFDChainRBT
);
151 TESTCASE(39,TestNullInverse
);
152 TESTCASE(40,TestAliasInverseID
);
153 TESTCASE(41,TestCompoundInverseID
);
154 TESTCASE(42,TestUndefinedVariable
);
155 TESTCASE(43,TestEmptyContext
);
156 TESTCASE(44,TestCompoundFilterID
);
157 TESTCASE(45,TestPropertySet
);
158 TESTCASE(46,TestNewEngine
);
159 TESTCASE(47,TestQuantifiedSegment
);
160 TESTCASE(48,TestDevanagariLatinRT
);
161 TESTCASE(49,TestTeluguLatinRT
);
162 TESTCASE(50,TestCompoundLatinRT
);
163 TESTCASE(51,TestSanskritLatinRT
);
164 TESTCASE(52,TestLocaleInstantiation
);
165 TESTCASE(53,TestTitleAccents
);
166 TESTCASE(54,TestLocaleResource
);
167 TESTCASE(55,TestParseError
);
168 TESTCASE(56,TestOutputSet
);
169 TESTCASE(57,TestVariableRange
);
170 TESTCASE(58,TestInvalidPostContext
);
171 TESTCASE(59,TestIDForms
);
172 TESTCASE(60,TestToRulesMark
);
173 TESTCASE(61,TestEscape
);
174 TESTCASE(62,TestAnchorMasking
);
175 TESTCASE(63,TestDisplayName
);
176 TESTCASE(64,TestSpecialCases
);
177 #if !UCONFIG_NO_FILE_IO
178 TESTCASE(65,TestIncrementalProgress
);
180 TESTCASE(66,TestSurrogateCasing
);
181 TESTCASE(67,TestFunction
);
182 TESTCASE(68,TestInvalidBackRef
);
183 TESTCASE(69,TestMulticharStringSet
);
184 TESTCASE(70,TestUserFunction
);
185 TESTCASE(71,TestAnyX
);
186 TESTCASE(72,TestSourceTargetSet
);
187 TESTCASE(73,TestGurmukhiDevanagari
);
188 TESTCASE(74,TestPatternWhiteSpace
);
189 TESTCASE(75,TestAllCodepoints
);
190 TESTCASE(76,TestBoilerplate
);
191 TESTCASE(77,TestAlternateSyntax
);
192 TESTCASE(78,TestBeginEnd
);
193 TESTCASE(79,TestBeginEndToRules
);
194 TESTCASE(80,TestRegisterAlias
);
195 TESTCASE(81,TestRuleStripping
);
196 TESTCASE(82,TestHalfwidthFullwidth
);
197 TESTCASE(83,TestThai
);
198 TESTCASE(84,TestAny
);
199 TESTCASE(85,TestHansHant
);
200 default: name
= ""; break;
205 * Make sure every system transliterator can be instantiated.
207 * ALSO test that the result of toRules() for each rule is a valid
208 * rule. Do this here so we don't have to have another test that
209 * instantiates everything as well.
211 void TransliteratorTest::TestInstantiation() {
212 UErrorCode ec
= U_ZERO_ERROR
;
213 StringEnumeration
* avail
= Transliterator::getAvailableIDs(ec
);
214 assertSuccess("getAvailableIDs()", ec
);
215 assertTrue("getAvailableIDs()!=NULL", avail
!=NULL
);
216 int32_t n
= Transliterator::countAvailableIDs();
217 assertTrue("getAvailableIDs().count()==countAvailableIDs()",
218 avail
->count(ec
) == n
);
219 assertSuccess("count()", ec
);
221 for (int32_t i
=0; i
<n
; ++i
) {
222 const UnicodeString
& id
= *avail
->snext(ec
);
223 if (!assertSuccess("snext()", ec
) ||
224 !assertTrue("snext()!=NULL", (&id
)!=NULL
, TRUE
)) {
227 UnicodeString id2
= Transliterator::getAvailableID(i
);
228 if (id
.length() < 1) {
229 errln(UnicodeString("FAIL: getAvailableID(") +
230 i
+ ") returned empty string");
234 errln(UnicodeString("FAIL: getAvailableID(") +
235 i
+ ") != getAvailableIDs().snext()");
238 UParseError parseError
;
239 UErrorCode status
= U_ZERO_ERROR
;
240 Transliterator
* t
= Transliterator::createInstance(id
,
241 UTRANS_FORWARD
, parseError
,status
);
243 Transliterator::getDisplayName(id
, name
);
245 #if UCONFIG_NO_BREAK_ITERATION
246 // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
247 if (id
.compare((UnicodeString
)"Thai-Latn") != 0 &&
248 id
.compare((UnicodeString
)"Thai-Latin") != 0)
250 dataerrln(UnicodeString("FAIL: Couldn't create ") + id
+
251 /*", parse error " + parseError.code +*/
252 ", line " + parseError
.line
+
253 ", offset " + parseError
.offset
+
254 ", pre-context " + prettify(parseError
.preContext
, TRUE
) +
255 ", post-context " +prettify(parseError
.postContext
,TRUE
) +
256 ", Error: " + u_errorName(status
));
257 // When createInstance fails, it deletes the failing
258 // entry from the available ID list. We detect this
259 // here by looking for a change in countAvailableIDs.
260 int32_t nn
= Transliterator::countAvailableIDs();
263 --i
; // Compensate for deleted entry
266 logln(UnicodeString("OK: ") + name
+ " (" + id
+ ")");
270 t
->toRules(rules
, TRUE
);
271 Transliterator
*u
= Transliterator::createFromRules("x",
272 rules
, UTRANS_FORWARD
, parseError
,status
);
274 errln(UnicodeString("FAIL: ") + id
+
275 ".createFromRules() => bad rules" +
276 /*", parse error " + parseError.code +*/
277 ", line " + parseError
.line
+
278 ", offset " + parseError
.offset
+
279 ", context " + prettify(parseError
.preContext
, TRUE
) +
280 ", rules: " + prettify(rules
, TRUE
));
287 assertTrue("snext()==NULL", avail
->snext(ec
)==NULL
);
288 assertSuccess("snext()", ec
);
291 // Now test the failure path
292 UParseError parseError
;
293 UErrorCode status
= U_ZERO_ERROR
;
294 UnicodeString
id("<Not a valid Transliterator ID>");
295 Transliterator
* t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, parseError
, status
);
297 errln("FAIL: " + id
+ " returned a transliterator");
300 logln("OK: Bogus ID handled properly");
304 void TransliteratorTest::TestSimpleRules(void) {
305 /* Example: rules 1. ab>x|y
308 * []|eabcd start - no match, copy e to tranlated buffer
309 * [e]|abcd match rule 1 - copy output & adjust cursor
310 * [ex|y]cd match rule 2 - copy output & adjust cursor
311 * [exz]|d no match, copy d to transliterated buffer
314 expect(UnicodeString("ab>x|y;", "") +
318 /* Another set of rules:
330 expect(UnicodeString("ab>x|yzacw;") +
338 UErrorCode status
= U_ZERO_ERROR
;
339 UParseError parseError
;
340 Transliterator
*t
= Transliterator::createFromRules(
342 UnicodeString("$dummy=").append((UChar
)0xE100) +
344 "$vowel=[aeiouAEIOU];"
346 "$vowel } $lu > '!';"
351 UTRANS_FORWARD
, parseError
,
353 if (U_FAILURE(status
)) {
354 dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status
));
357 expect(*t
, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
362 * Test inline set syntax and set variable syntax.
364 void TransliteratorTest::TestInlineSet(void) {
365 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
366 expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
368 expect(UnicodeString(
371 "$alphanumeric = [$digit $alpha];" // ***
372 "$special = [^$alphanumeric];" // ***
373 "$alphanumeric > '-';"
374 "$special > '*';", ""),
376 "thx-1138", "---*----");
380 * Create some inverses and confirm that they work. We have to be
381 * careful how we do this, since the inverses will not be true
382 * inverses -- we can't throw any random string at the composition
383 * of the transliterators and expect the identity function. F x
384 * F' != I. However, if we are careful about the input, we will
385 * get the expected results.
387 void TransliteratorTest::TestRuleBasedInverse(void) {
388 UnicodeString RULES
=
389 UnicodeString("abc>zyx;") +
407 const char* DATA
[] = {
408 // Careful here -- random strings will not work. If we keep
409 // the left side to the domain and the right side to the range
410 // we will be okay though (left, abc; right xyz).
412 "abcacab", "zyxxxyy",
416 int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
418 UErrorCode status
= U_ZERO_ERROR
;
419 UParseError parseError
;
420 Transliterator
*fwd
= Transliterator::createFromRules("<ID>", RULES
,
421 UTRANS_FORWARD
, parseError
, status
);
422 Transliterator
*rev
= Transliterator::createFromRules("<ID>", RULES
,
423 UTRANS_REVERSE
, parseError
, status
);
424 if (U_FAILURE(status
)) {
425 errln("FAIL: RBT constructor failed");
428 for (int32_t i
=0; i
<DATA_length
; i
+=2) {
429 expect(*fwd
, DATA
[i
], DATA
[i
+1]);
430 expect(*rev
, DATA
[i
+1], DATA
[i
]);
437 * Basic test of keyboard.
439 void TransliteratorTest::TestKeyboard(void) {
440 UParseError parseError
;
441 UErrorCode status
= U_ZERO_ERROR
;
442 Transliterator
*t
= Transliterator::createFromRules("<ID>",
443 UnicodeString("psch>Y;")
447 UTRANS_FORWARD
, parseError
,
449 if (U_FAILURE(status
)) {
450 errln("FAIL: RBT constructor failed");
453 const char* DATA
[] = {
461 0, "AycAY", // null means finishKeyboardTransliteration
464 keyboardAux(*t
, DATA
, UPRV_LENGTHOF(DATA
));
469 * Basic test of keyboard with cursor.
471 void TransliteratorTest::TestKeyboard2(void) {
472 UParseError parseError
;
473 UErrorCode status
= U_ZERO_ERROR
;
474 Transliterator
*t
= Transliterator::createFromRules("<ID>",
475 UnicodeString("ych>Y;")
479 UTRANS_FORWARD
, parseError
,
481 if (U_FAILURE(status
)) {
482 errln("FAIL: RBT constructor failed");
485 const char* DATA
[] = {
489 "s", "Aps", // modified for rollback - "Ay",
490 "c", "Apsc", // modified for rollback - "Ayc",
493 "s", "AycAps", // modified for rollback - "AycAy",
494 "c", "AycApsc", // modified for rollback - "AycAyc",
496 0, "AycAY", // null means finishKeyboardTransliteration
499 keyboardAux(*t
, DATA
, UPRV_LENGTHOF(DATA
));
504 * Test keyboard transliteration with back-replacement.
506 void TransliteratorTest::TestKeyboard3(void) {
507 // We want th>z but t>y. Furthermore, during keyboard
508 // transliteration we want t>y then yh>z if t, then h are
510 UnicodeString
RULES("t>|y;"
513 const char* DATA
[] = {
514 // Column 1: characters to add to buffer (as if typed)
515 // Column 2: expected appearance of buffer after
516 // keyboard xliteration.
519 "t", "abt", // modified for rollback - "aby",
521 "t", "abyct", // modified for rollback - "abycy",
523 0, "abycz", // null means finishKeyboardTransliteration
526 UParseError parseError
;
527 UErrorCode status
= U_ZERO_ERROR
;
528 Transliterator
*t
= Transliterator::createFromRules("<ID>", RULES
, UTRANS_FORWARD
, parseError
, status
);
529 if (U_FAILURE(status
)) {
530 errln("FAIL: RBT constructor failed");
533 keyboardAux(*t
, DATA
, UPRV_LENGTHOF(DATA
));
537 void TransliteratorTest::keyboardAux(const Transliterator
& t
,
538 const char* DATA
[], int32_t DATA_length
) {
539 UErrorCode status
= U_ZERO_ERROR
;
540 UTransPosition index
={0, 0, 0, 0};
542 for (int32_t i
=0; i
<DATA_length
; i
+=2) {
548 t
.transliterate(s
, index
, DATA
[i
], status
);
551 t
.finishTransliteration(s
, index
);
553 // Show the start index '{' and the cursor '|'
554 UnicodeString a
, b
, c
;
555 s
.extractBetween(0, index
.contextStart
, a
);
556 s
.extractBetween(index
.contextStart
, index
.start
, b
);
557 s
.extractBetween(index
.start
, s
.length(), c
);
559 append((UChar
)LEFT_BRACE
).
563 if (s
== DATA
[i
+1] && U_SUCCESS(status
)) {
566 errln(UnicodeString("FAIL: ") + log
+ ", expected " + DATA
[i
+1]);
571 void TransliteratorTest::TestArabic(void) {
572 // Test disabled for 2.0 until new Arabic transliterator can be written.
574 // const char* DATA[] = {
575 // "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
576 // "\u0627\u0644\u0644\u063a\u0629\u0020"+
577 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
578 // "\u0628\u0628\u0646\u0638\u0645\u0020"+
579 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
580 // "\u062c\u0645\u064a\u0644\u0629",
584 // UChar ar_raw[] = {
585 // 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
586 // 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
587 // 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
588 // 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
589 // 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
590 // 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
592 // UnicodeString ar(ar_raw);
593 // UErrorCode status=U_ZERO_ERROR;
594 // UParseError parseError;
595 // Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
597 // errln("FAIL: createInstance failed");
600 // expect(*t, "Arabic", ar);
605 * Compose the Kana transliterator forward and reverse and try
606 * some strings that should come out unchanged.
608 void TransliteratorTest::TestCompoundKana(void) {
609 UParseError parseError
;
610 UErrorCode status
= U_ZERO_ERROR
;
611 Transliterator
* t
= Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD
, parseError
, status
);
613 dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status
));
615 expect(*t
, "aaaaa", "aaaaa");
621 * Compose the hex transliterators forward and reverse.
623 void TransliteratorTest::TestCompoundHex(void) {
624 UParseError parseError
;
625 UErrorCode status
= U_ZERO_ERROR
;
626 Transliterator
* a
= Transliterator::createInstance("Any-Hex", UTRANS_FORWARD
, parseError
, status
);
627 Transliterator
* b
= Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, parseError
, status
);
628 Transliterator
* transab
[] = { a
, b
};
629 Transliterator
* transba
[] = { b
, a
};
630 if (a
== 0 || b
== 0) {
631 errln("FAIL: construction failed");
636 // Do some basic tests of a
637 expect(*a
, "01", UnicodeString("\\u0030\\u0031", ""));
638 // Do some basic tests of b
639 expect(*b
, UnicodeString("\\u0030\\u0031", ""), "01");
641 Transliterator
* ab
= new CompoundTransliterator(transab
, 2);
642 UnicodeString
s("abcde", "");
645 UnicodeString
str(s
);
646 a
->transliterate(str
);
647 Transliterator
* ba
= new CompoundTransliterator(transba
, 2);
648 expect(*ba
, str
, str
);
656 int gTestFilterClassID
= 0;
658 * Used by TestFiltering().
660 class TestFilter
: public UnicodeFilter
{
661 virtual TestFilter
* clone() const {
662 return new TestFilter(*this);
664 virtual UBool
contains(UChar32 c
) const {
665 return c
!= (UChar
)0x0063 /*c*/;
668 virtual UnicodeString
& toPattern(UnicodeString
& result
,
669 UBool
/*escapeUnprintable*/) const {
672 virtual UBool
matchesIndexValue(uint8_t /*v*/) const {
675 virtual void addMatchSetTo(UnicodeSet
& /*toUnionTo*/) const {}
677 UClassID
getDynamicClassID() const { return (UClassID
)&gTestFilterClassID
; }
681 * Do some basic tests of filtering.
683 void TransliteratorTest::TestFiltering(void) {
684 UParseError parseError
;
685 UErrorCode status
= U_ZERO_ERROR
;
686 Transliterator
* hex
= Transliterator::createInstance("Any-Hex", UTRANS_FORWARD
, parseError
, status
);
688 errln("FAIL: createInstance(Any-Hex) failed");
691 hex
->adoptFilter(new TestFilter());
692 UnicodeString
s("abcde");
693 hex
->transliterate(s
);
694 UnicodeString
exp("\\u0061\\u0062c\\u0064\\u0065", "");
696 logln(UnicodeString("Ok: \"") + exp
+ "\"");
698 logln(UnicodeString("FAIL: \"") + s
+ "\", wanted \"" + exp
+ "\"");
701 // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
702 UnicodeFilter
*f
= hex
->orphanFilter();
704 errln("FAIL: orphanFilter() should get a UnicodeFilter");
714 void TransliteratorTest::TestAnchors(void) {
715 expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""),
718 expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
721 expect(UnicodeString("^ab > 01 ;"
729 expect(UnicodeString("$s = [z$] ;"
736 "abzababbabxzabxabx",
741 * Test pattern quoting and escape mechanisms.
743 void TransliteratorTest::TestPatternQuoting(void) {
745 // Each item is <rules>, <input>, <expected output>
746 const UnicodeString DATA
[] = {
747 UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
748 UnicodeString(UChar(0x4E01)),
752 for (int32_t i
=0; i
<3; i
+=3) {
753 logln(UnicodeString("Pattern: ") + prettify(DATA
[i
]));
754 UParseError parseError
;
755 UErrorCode status
= U_ZERO_ERROR
;
756 Transliterator
*t
= Transliterator::createFromRules("<ID>", DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
757 if (U_FAILURE(status
)) {
758 errln("RBT constructor failed");
760 expect(*t
, DATA
[i
+1], DATA
[i
+2]);
767 * Regression test for bugs found in Greek transliteration.
769 void TransliteratorTest::TestJ277(void) {
770 UErrorCode status
= U_ZERO_ERROR
;
771 UParseError parseError
;
772 Transliterator
*gl
= Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD
, parseError
, status
);
774 dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status
));
779 UChar upsilon
= 0x3C5;
781 // UChar PHI = 0x3A6;
783 // UChar omega = 0x3C9;
784 // UChar omicron = 0x3BF;
785 // UChar epsilon = 0x3B5;
787 // sigma upsilon nu -> syn
789 syn
.append(sigma
).append(upsilon
).append(nu
);
790 expect(*gl
, syn
, "syn");
792 // sigma alpha upsilon nu -> saun
794 sayn
.append(sigma
).append(alpha
).append(upsilon
).append(nu
);
795 expect(*gl
, sayn
, "saun");
797 // Again, using a smaller rule set
802 "$ypsilon = \\u03C5;"
803 "$vowel = [aeiouAEIOU$alpha$ypsilon];"
806 "u <> $vowel { $ypsilon;"
810 Transliterator
*mini
= Transliterator::createFromRules("mini", rules
, UTRANS_REVERSE
, parseError
, status
);
811 if (U_FAILURE(status
)) { errln("FAIL: Transliterator constructor failed"); return; }
812 expect(*mini
, syn
, "syn");
813 expect(*mini
, sayn
, "saun");
817 #if !UCONFIG_NO_FORMATTING
818 // Transliterate the Greek locale data
820 DateFormatSymbols
syms(el
, status
);
821 if (U_FAILURE(status
)) { errln("FAIL: Transliterator constructor failed"); return; }
823 const UnicodeString
* data
= syms
.getMonths(count
);
824 for (i
=0; i
<count
; ++i
) {
825 if (data
[i
].length() == 0) {
828 UnicodeString
out(data
[i
]);
829 gl
->transliterate(out
);
831 if (data
[i
].length() >= 2 && out
.length() >= 2 &&
832 u_isupper(data
[i
].charAt(0)) && u_islower(data
[i
].charAt(1))) {
833 if (!(u_isupper(out
.charAt(0)) && u_islower(out
.charAt(1)))) {
838 logln(prettify(data
[i
] + " -> " + out
));
840 errln(UnicodeString("FAIL: ") + prettify(data
[i
] + " -> " + out
));
849 * Prefix, suffix support in hex transliterators
851 void TransliteratorTest::TestJ243(void) {
852 UErrorCode ec
= U_ZERO_ERROR
;
854 // Test default Hex-Any, which should handle
855 // \u, \U, u+, and U+
856 Transliterator
*hex
=
857 Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, ec
);
858 if (assertSuccess("getInstance", ec
)) {
859 expect(*hex
, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
863 // // Try a custom Hex-Unicode
864 // // \uXXXX and &#xXXXX;
865 // ec = U_ZERO_ERROR;
866 // HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
867 // expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x0123", ""),
868 // "abcd5fx0123");
869 // // Try custom Any-Hex (default is tested elsewhere)
870 // ec = U_ZERO_ERROR;
871 // UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
872 // expect(hex3, "012", "012");
876 * Parsers need better syntax error messages.
878 void TransliteratorTest::TestJ329(void) {
880 struct { UBool containsErrors
; const char* rule
; } DATA
[] = {
881 { FALSE
, "a > b; c > d" },
882 { TRUE
, "a > b; no operator; c > d" },
884 int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
886 for (int32_t i
=0; i
<DATA_length
; ++i
) {
887 UErrorCode status
= U_ZERO_ERROR
;
888 UParseError parseError
;
889 Transliterator
*rbt
= Transliterator::createFromRules("<ID>",
894 UBool gotError
= U_FAILURE(status
);
895 UnicodeString
desc(DATA
[i
].rule
);
896 desc
.append(gotError
? " -> error" : " -> no error");
898 desc
= desc
+ ", ParseError code=" + u_errorName(status
) +
899 " line=" + parseError
.line
+
900 " offset=" + parseError
.offset
+
901 " context=" + parseError
.preContext
;
903 if (gotError
== DATA
[i
].containsErrors
) {
904 logln(UnicodeString("Ok: ") + desc
);
906 errln(UnicodeString("FAIL: ") + desc
);
913 * Test segments and segment references.
915 void TransliteratorTest::TestSegments(void) {
917 // Each item is <rules>, <input>, <expected output>
918 UnicodeString DATA
[] = {
919 "([a-z]) '.' ([0-9]) > $2 '-' $1",
924 "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
928 int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
930 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
931 logln("Pattern: " + prettify(DATA
[i
]));
932 UParseError parseError
;
933 UErrorCode status
= U_ZERO_ERROR
;
934 Transliterator
*t
= Transliterator::createFromRules("ID", DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
935 if (U_FAILURE(status
)) {
936 errln("FAIL: RBT constructor");
938 expect(*t
, DATA
[i
+1], DATA
[i
+2]);
945 * Test cursor positioning outside of the key
947 void TransliteratorTest::TestCursorOffset(void) {
949 // Each item is <rules>, <input>, <expected output>
950 UnicodeString DATA
[] = {
951 "pre {alpha} post > | @ ALPHA ;"
953 "pre {beta} post > BETA @@ | ;"
956 "prealphapost prebetapost",
958 "prbetaxyz preBETApost",
960 int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
962 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
963 logln("Pattern: " + prettify(DATA
[i
]));
964 UParseError parseError
;
965 UErrorCode status
= U_ZERO_ERROR
;
966 Transliterator
*t
= Transliterator::createFromRules("<ID>", DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
967 if (U_FAILURE(status
)) {
968 errln("FAIL: RBT constructor");
970 expect(*t
, DATA
[i
+1], DATA
[i
+2]);
977 * Test zero length and > 1 char length variable values. Test
978 * use of variable refs in UnicodeSets.
980 void TransliteratorTest::TestArbitraryVariableValues(void) {
982 // Each item is <rules>, <input>, <expected output>
983 UnicodeString DATA
[] = {
1001 int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
1003 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
1004 logln("Pattern: " + prettify(DATA
[i
]));
1005 UParseError parseError
;
1006 UErrorCode status
= U_ZERO_ERROR
;
1007 Transliterator
*t
= Transliterator::createFromRules("<ID>", DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
1008 if (U_FAILURE(status
)) {
1009 errln("FAIL: RBT constructor");
1011 expect(*t
, DATA
[i
+1], DATA
[i
+2]);
1018 * Confirm that the contextStart, contextLimit, start, and limit
1019 * behave correctly. J474.
1021 void TransliteratorTest::TestPositionHandling(void) {
1022 // Array of 3n items
1023 // Each item is <rules>, <input>, <expected output>
1024 const char* DATA
[] = {
1025 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1026 "xtat txtb", // pos 0,9,0,9
1029 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1030 "xtat txtb", // pos 2,9,3,8
1033 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1034 "xtat txtb", // pos 3,8,3,8
1038 // Array of 4n positions -- these go with the DATA array
1039 // They are: contextStart, contextLimit, start, limit
1046 int32_t n
= UPRV_LENGTHOF(DATA
) / 3;
1047 for (int32_t i
=0; i
<n
; i
++) {
1048 UErrorCode status
= U_ZERO_ERROR
;
1049 UParseError parseError
;
1050 Transliterator
*t
= Transliterator::createFromRules("<ID>",
1051 DATA
[3*i
], UTRANS_FORWARD
, parseError
, status
);
1052 if (U_FAILURE(status
)) {
1054 errln("FAIL: RBT constructor");
1058 pos
.contextStart
= POS
[4*i
];
1059 pos
.contextLimit
= POS
[4*i
+1];
1060 pos
.start
= POS
[4*i
+2];
1061 pos
.limit
= POS
[4*i
+3];
1062 UnicodeString
rsource(DATA
[3*i
+1]);
1063 t
->transliterate(rsource
, pos
, status
);
1064 if (U_FAILURE(status
)) {
1066 errln("FAIL: transliterate");
1069 t
->finishTransliteration(rsource
, pos
);
1070 expectAux(DATA
[3*i
],
1079 * Test the Hiragana-Katakana transliterator.
1081 void TransliteratorTest::TestHiraganaKatakana(void) {
1082 UParseError parseError
;
1083 UErrorCode status
= U_ZERO_ERROR
;
1084 Transliterator
* hk
= Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD
, parseError
, status
);
1085 Transliterator
* kh
= Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD
, parseError
, status
);
1086 if (hk
== 0 || kh
== 0) {
1087 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
1093 // Array of 3n items
1094 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1095 const char* DATA
[] = {
1097 "\\u3042\\u3090\\u3099\\u3092\\u3050",
1098 "\\u30A2\\u30F8\\u30F2\\u30B0",
1101 "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1102 "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1104 int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
1106 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
1107 UnicodeString h
= CharsToUnicodeString(DATA
[i
+1]);
1108 UnicodeString k
= CharsToUnicodeString(DATA
[i
+2]);
1110 case 0x68: //'h': // Hiragana-Katakana
1113 case 0x6B: //'k': // Katakana-Hiragana
1116 case 0x62: //'b': // both
1127 * Test cloning / copy constructor of RBT.
1129 void TransliteratorTest::TestCopyJ476(void) {
1130 // The real test here is what happens when the destructors are
1131 // called. So we let one object get destructed, and check to
1132 // see that its copy still works.
1133 Transliterator
*t2
= 0;
1135 UParseError parseError
;
1136 UErrorCode status
= U_ZERO_ERROR
;
1137 Transliterator
*t1
= Transliterator::createFromRules("t1",
1138 "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD
, parseError
, status
);
1139 if (U_FAILURE(status
)) {
1140 errln("FAIL: RBT constructor");
1143 t2
= t1
->clone(); // Call copy constructor under the covers.
1144 expect(*t1
, "abcfoofoo", "ABcbar");
1147 expect(*t2
, "abcfoofoo", "ABcbar");
1152 * Test inter-Indic transliterators. These are composed.
1153 * ICU4C Jitterbug 483.
1155 void TransliteratorTest::TestInterIndic(void) {
1156 UnicodeString
ID("Devanagari-Gujarati", "");
1157 UErrorCode status
= U_ZERO_ERROR
;
1158 UParseError parseError
;
1159 Transliterator
* dg
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, parseError
, status
);
1161 dataerrln("FAIL: createInstance(" + ID
+ ") returned NULL - " + u_errorName(status
));
1164 UnicodeString id
= dg
->getID();
1166 errln("FAIL: createInstance(" + ID
+ ")->getID() => " + id
);
1168 UnicodeString dev
= CharsToUnicodeString("\\u0901\\u090B\\u0925");
1169 UnicodeString guj
= CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1170 expect(*dg
, dev
, guj
);
1175 * Test filter syntax in IDs. (J918)
1177 void TransliteratorTest::TestFilterIDs(void) {
1178 // Array of 3n strings:
1179 // <id>, <inverse id>, <input>, <expected output>
1180 const char* DATA
[] = {
1181 "[aeiou]Any-Hex", // ID
1182 "[aeiou]Hex-Any", // expected inverse ID
1184 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1186 "[aeiou]Any-Hex;[^5]Hex-Any",
1187 "[^5]Any-Hex;[aeiou]Hex-Any",
1196 enum { DATA_length
= UPRV_LENGTHOF(DATA
) };
1198 for (int i
=0; i
<DATA_length
; i
+=4) {
1199 UnicodeString
ID(DATA
[i
], "");
1200 UnicodeString
uID(DATA
[i
+1], "");
1201 UnicodeString
data2(DATA
[i
+2], "");
1202 UnicodeString
data3(DATA
[i
+3], "");
1203 UParseError parseError
;
1204 UErrorCode status
= U_ZERO_ERROR
;
1205 Transliterator
*t
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, parseError
, status
);
1207 errln("FAIL: createInstance(" + ID
+ ") returned NULL");
1210 expect(*t
, data2
, data3
);
1213 if (ID
!= t
->getID()) {
1214 errln("FAIL: createInstance(" + ID
+ ").getID() => " +
1218 // Check the inverse
1219 Transliterator
*u
= t
->createInverse(status
);
1221 errln("FAIL: " + ID
+ ".createInverse() returned NULL");
1222 } else if (u
->getID() != uID
) {
1223 errln("FAIL: " + ID
+ ".createInverse().getID() => " +
1224 u
->getID() + ", expected " + uID
);
1233 * Test the case mapping transliterators.
1235 void TransliteratorTest::TestCaseMap(void) {
1236 UParseError parseError
;
1237 UErrorCode status
= U_ZERO_ERROR
;
1238 Transliterator
* toUpper
=
1239 Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
);
1240 Transliterator
* toLower
=
1241 Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
);
1242 Transliterator
* toTitle
=
1243 Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
);
1244 if (toUpper
==0 || toLower
==0 || toTitle
==0) {
1245 errln("FAIL: createInstance returned NULL");
1252 expect(*toUpper
, "The quick brown fox jumped over the lazy dogs.",
1253 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1254 expect(*toLower
, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1255 "the quick brown foX jumped over the lazY dogs.");
1256 expect(*toTitle
, "the quick brown foX can't jump over the laZy dogs.",
1257 "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1265 * Test the name mapping transliterators.
1267 void TransliteratorTest::TestNameMap(void) {
1268 UParseError parseError
;
1269 UErrorCode status
= U_ZERO_ERROR
;
1270 Transliterator
* uni2name
=
1271 Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD
, parseError
, status
);
1272 Transliterator
* name2uni
=
1273 Transliterator::createInstance("Name-Any", UTRANS_FORWARD
, parseError
, status
);
1274 if (uni2name
==0 || name2uni
==0) {
1275 errln("FAIL: createInstance returned NULL");
1281 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1282 expect(*uni2name
, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1283 CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1284 expect(*name2uni
, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1285 CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1292 Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD
, parseError
, status
);
1294 errln("FAIL: createInstance returned NULL");
1299 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1300 UnicodeString s
= CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1306 * Test liberalized ID syntax. 1006c
1308 void TransliteratorTest::TestLiberalizedID(void) {
1309 // Some test cases have an expected getID() value of NULL. This
1310 // means I have disabled the test case for now. This stuff is
1311 // still under development, and I haven't decided whether to make
1312 // getID() return canonical case yet. It will all get rewritten
1313 // with the move to Source-Target/Variant IDs anyway. [aliu]
1314 const char* DATA
[] = {
1315 "latin-greek", NULL
/*"Latin-Greek"*/, "case insensitivity",
1316 " Null ", "Null", "whitespace",
1317 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",
1318 " null ; latin-greek ", NULL
/*"Null;Latin-Greek"*/, "compound whitespace",
1320 const int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
1321 UParseError parseError
;
1322 UErrorCode status
= U_ZERO_ERROR
;
1323 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
1324 Transliterator
*t
= Transliterator::createInstance(DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
1326 dataerrln(UnicodeString("FAIL: ") + DATA
[i
+2] +
1327 " cannot create ID \"" + DATA
[i
] + "\" - " + u_errorName(status
));
1331 exp
= UnicodeString(DATA
[i
+1], "");
1333 // Don't worry about getID() if the expected char*
1334 // is NULL -- see above.
1335 if (exp
.length() == 0 || exp
== t
->getID()) {
1336 logln(UnicodeString("Ok: ") + DATA
[i
+2] +
1337 " create ID \"" + DATA
[i
] + "\" => \"" +
1340 errln(UnicodeString("FAIL: ") + DATA
[i
+2] +
1341 " create ID \"" + DATA
[i
] + "\" => \"" +
1342 t
->getID() + "\", exp \"" + exp
+ "\"");
1349 /* test for Jitterbug 912 */
1350 void TransliteratorTest::TestCreateInstance(){
1351 const char* FORWARD
= "F";
1352 const char* REVERSE
= "R";
1353 const char* DATA
[] = {
1355 // Column 2: direction
1356 // Column 3: expected ID, or "" if expect failure
1357 "Latin-Hangul", REVERSE
, "Hangul-Latin", // JB#912
1359 // JB#2689: bad compound causes crash
1360 "InvalidSource-InvalidTarget", FORWARD
, "",
1361 "InvalidSource-InvalidTarget", REVERSE
, "",
1362 "Hex-Any;InvalidSource-InvalidTarget", FORWARD
, "",
1363 "Hex-Any;InvalidSource-InvalidTarget", REVERSE
, "",
1364 "InvalidSource-InvalidTarget;Hex-Any", FORWARD
, "",
1365 "InvalidSource-InvalidTarget;Hex-Any", REVERSE
, "",
1370 for (int32_t i
=0; DATA
[i
]; i
+=3) {
1372 UErrorCode ec
= U_ZERO_ERROR
;
1373 UnicodeString
id(DATA
[i
]);
1374 UTransDirection dir
= (DATA
[i
+1]==FORWARD
)?
1375 UTRANS_FORWARD
:UTRANS_REVERSE
;
1376 UnicodeString
expID(DATA
[i
+2]);
1378 Transliterator::createInstance(id
,dir
,err
,ec
);
1379 UnicodeString newID
;
1383 UBool ok
= (newID
== expID
);
1385 newID
= u_errorName(ec
);
1388 logln((UnicodeString
)"Ok: createInstance(" +
1389 id
+ "," + DATA
[i
+1] + ") => " + newID
);
1391 dataerrln((UnicodeString
)"FAIL: createInstance(" +
1392 id
+ "," + DATA
[i
+1] + ") => " + newID
+
1393 ", expected " + expID
);
1400 * Test the normalization transliterator.
1402 void TransliteratorTest::TestNormalizationTransliterator() {
1403 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1404 // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1405 const char* CANON
[] = {
1406 // Input Decomposed Composed
1407 "cat", "cat", "cat" ,
1408 "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" ,
1410 "\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above
1411 "D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above
1413 "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above
1414 "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below
1415 "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above
1417 "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1418 "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1420 "\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave
1421 "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave
1422 "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron
1424 "\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign
1425 "\\u00c5", "A\\u030a", "\\u00c5" , // A-ring
1427 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0
1428 "\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0
1430 "Henry IV", "Henry IV", "Henry IV" ,
1431 "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" ,
1433 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1434 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1435 "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten
1436 "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten
1437 "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten
1439 "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" ,
1443 const char* COMPAT
[] = {
1444 // Input Decomposed Composed
1445 "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed
1447 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0
1448 "\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i
1450 "Henry IV", "Henry IV", "Henry IV" ,
1451 "Henry \\u2163", "Henry IV", "Henry IV" ,
1453 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1454 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1456 "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten
1461 UParseError parseError
;
1462 UErrorCode status
= U_ZERO_ERROR
;
1463 Transliterator
* NFD
= Transliterator::createInstance("NFD", UTRANS_FORWARD
, parseError
, status
);
1464 Transliterator
* NFC
= Transliterator::createInstance("NFC", UTRANS_FORWARD
, parseError
, status
);
1466 dataerrln("FAIL: createInstance failed: %s", u_errorName(status
));
1471 for (i
=0; CANON
[i
]; i
+=3) {
1472 UnicodeString in
= CharsToUnicodeString(CANON
[i
]);
1473 UnicodeString expd
= CharsToUnicodeString(CANON
[i
+1]);
1474 UnicodeString expc
= CharsToUnicodeString(CANON
[i
+2]);
1475 expect(*NFD
, in
, expd
);
1476 expect(*NFC
, in
, expc
);
1481 Transliterator
* NFKD
= Transliterator::createInstance("NFKD", UTRANS_FORWARD
, parseError
, status
);
1482 Transliterator
* NFKC
= Transliterator::createInstance("NFKC", UTRANS_FORWARD
, parseError
, status
);
1483 if (!NFKD
|| !NFKC
) {
1484 dataerrln("FAIL: createInstance failed");
1489 for (i
=0; COMPAT
[i
]; i
+=3) {
1490 UnicodeString in
= CharsToUnicodeString(COMPAT
[i
]);
1491 UnicodeString expkd
= CharsToUnicodeString(COMPAT
[i
+1]);
1492 UnicodeString expkc
= CharsToUnicodeString(COMPAT
[i
+2]);
1493 expect(*NFKD
, in
, expkd
);
1494 expect(*NFKC
, in
, expkc
);
1500 status
= U_ZERO_ERROR
;
1501 Transliterator
*t
= Transliterator::createInstance("NFD; [x]Remove",
1505 errln("FAIL: createInstance failed");
1507 expect(*t
, CharsToUnicodeString("\\u010dx"),
1508 CharsToUnicodeString("c\\u030C"));
1513 * Test compound RBT rules.
1515 void TransliteratorTest::TestCompoundRBT(void) {
1516 // Careful with spacing and ';' here: Phrase this exactly
1517 // as toRules() is going to return it. If toRules() changes
1518 // with regard to spacing or ';', then adjust this string.
1519 UnicodeString
rule("::Hex-Any;\n"
1523 "::[^t]Any-Upper;", "");
1524 UParseError parseError
;
1525 UErrorCode status
= U_ZERO_ERROR
;
1526 Transliterator
*t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, parseError
, status
);
1528 errln("FAIL: createFromRules failed");
1531 expect(*t
, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1532 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1534 t
->toRules(r
, TRUE
);
1536 logln((UnicodeString
)"OK: toRules() => " + r
);
1538 errln((UnicodeString
)"FAIL: toRules() => " + r
+
1539 ", expected " + rule
);
1544 t
= Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD
, parseError
, status
);
1546 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
1549 UnicodeString
exp("::Greek-Latin;\n::Latin-Cyrillic;");
1550 t
->toRules(r
, TRUE
);
1552 errln((UnicodeString
)"FAIL: toRules() => " + r
+
1553 ", expected " + exp
);
1555 logln((UnicodeString
)"OK: toRules() => " + r
);
1559 // Round trip the result of toRules
1560 t
= Transliterator::createFromRules("Test", r
, UTRANS_FORWARD
, parseError
, status
);
1562 errln("FAIL: createFromRules #2 failed");
1565 logln((UnicodeString
)"OK: createFromRules(" + r
+ ") succeeded");
1568 // Test toRules again
1569 t
->toRules(r
, TRUE
);
1571 errln((UnicodeString
)"FAIL: toRules() => " + r
+
1572 ", expected " + exp
);
1574 logln((UnicodeString
)"OK: toRules() => " + r
);
1579 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform
1580 // to what the regenerated ID will look like.
1581 UnicodeString
id("Upper(Lower);(NFKC)", "");
1582 t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, parseError
, status
);
1584 errln("FAIL: createInstance #2 failed");
1587 if (t
->getID() == id
) {
1588 logln((UnicodeString
)"OK: created " + id
);
1590 errln((UnicodeString
)"FAIL: createInstance(" + id
+
1591 ").getID() => " + t
->getID());
1594 Transliterator
*u
= t
->createInverse(status
);
1596 errln("FAIL: createInverse failed");
1600 exp
= "NFKC();Lower(Upper)";
1601 if (u
->getID() == exp
) {
1602 logln((UnicodeString
)"OK: createInverse(" + id
+ ") => " +
1605 errln((UnicodeString
)"FAIL: createInverse(" + id
+ ") => " +
1613 * Compound filter semantics were orginially not implemented
1614 * correctly. Originally, each component filter f(i) is replaced by
1615 * f'(i) = f(i) && g, where g is the filter for the compound
1620 * Suppose and I have a transliterator X. Internally X is
1621 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1623 * The compound should convert all greek characters (through latin) to
1624 * cyrillic, then lowercase the result. The filter should say "don't
1625 * touch 'A' in the original". But because an intermediate result
1626 * happens to go through "A", the Greek Alpha gets hung up.
1628 void TransliteratorTest::TestCompoundFilter(void) {
1629 UParseError parseError
;
1630 UErrorCode status
= U_ZERO_ERROR
;
1631 Transliterator
*t
= Transliterator::createInstance
1632 ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD
, parseError
, status
);
1634 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
1637 t
->adoptFilter(new UnicodeSet("[^A]", status
));
1638 if (U_FAILURE(status
)) {
1639 errln("FAIL: UnicodeSet ct failed");
1644 // Only the 'A' at index 1 should remain unchanged
1646 CharsToUnicodeString("BA\\u039A\\u0391"),
1647 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1651 void TransliteratorTest::TestRemove(void) {
1652 UParseError parseError
;
1653 UErrorCode status
= U_ZERO_ERROR
;
1654 Transliterator
*t
= Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD
, parseError
, status
);
1656 errln("FAIL: createInstance failed");
1660 expect(*t
, "Able bodied baker's cats", "Ale odied ker's ts");
1662 // extra test for RemoveTransliterator::clone(), which at one point wasn't
1663 // duplicating the filter
1664 Transliterator
* t2
= t
->clone();
1665 expect(*t2
, "Able bodied baker's cats", "Ale odied ker's ts");
1671 void TransliteratorTest::TestToRules(void) {
1672 const char* RBT
= "rbt";
1673 const char* SET
= "set";
1674 static const char* DATA
[] = {
1676 "$a=\\u4E61; [$a] > A;",
1680 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1681 "[[:Zs:][:Zl:]]{a} > A;",
1708 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1709 "[^[:Zs:]]{a} > A;",
1712 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1713 "[[a-z]-[:Zs:]]{a} > A;",
1716 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1717 "[[:Zs:]&[a-z]]{a} > A;",
1720 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1721 "[x[:Zs:]]{a} > A;",
1724 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1725 "$macron = \\u0304 ;"
1726 "$evowel = [aeiouyAEIOUY] ;"
1727 "$iotasub = \\u0345 ;"
1728 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1729 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1732 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1733 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1735 static const int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
1737 for (int32_t d
=0; d
< DATA_length
; d
+=3) {
1738 if (DATA
[d
] == RBT
) {
1739 // Transliterator test
1740 UParseError parseError
;
1741 UErrorCode status
= U_ZERO_ERROR
;
1742 Transliterator
*t
= Transliterator::createFromRules("ID",
1743 UnicodeString(DATA
[d
+1], -1, US_INV
), UTRANS_FORWARD
, parseError
, status
);
1745 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status
));
1748 UnicodeString rules
, escapedRules
;
1749 t
->toRules(rules
, FALSE
);
1750 t
->toRules(escapedRules
, TRUE
);
1751 UnicodeString expRules
= CharsToUnicodeString(DATA
[d
+2]);
1752 UnicodeString
expEscapedRules(DATA
[d
+2], -1, US_INV
);
1753 if (rules
== expRules
) {
1754 logln((UnicodeString
)"Ok: " + UnicodeString(DATA
[d
+1], -1, US_INV
) +
1757 errln((UnicodeString
)"FAIL: " + UnicodeString(DATA
[d
+1], -1, US_INV
) +
1758 " => " + rules
+ ", exp " + expRules
);
1760 if (escapedRules
== expEscapedRules
) {
1761 logln((UnicodeString
)"Ok: " + UnicodeString(DATA
[d
+1], -1, US_INV
) +
1762 " => " + escapedRules
);
1764 errln((UnicodeString
)"FAIL: " + UnicodeString(DATA
[d
+1], -1, US_INV
) +
1765 " => " + escapedRules
+ ", exp " + expEscapedRules
);
1771 UErrorCode status
= U_ZERO_ERROR
;
1772 UnicodeString
pat(DATA
[d
+1], -1, US_INV
);
1773 UnicodeString
expToPat(DATA
[d
+2], -1, US_INV
);
1774 UnicodeSet
set(pat
, status
);
1775 if (U_FAILURE(status
)) {
1776 errln("FAIL: UnicodeSet ct failed");
1779 // Adjust spacing etc. as necessary.
1780 UnicodeString toPat
;
1781 set
.toPattern(toPat
);
1782 if (expToPat
== toPat
) {
1783 logln((UnicodeString
)"Ok: " + pat
+
1786 errln((UnicodeString
)"FAIL: " + pat
+
1787 " => " + prettify(toPat
, TRUE
) +
1788 ", exp " + prettify(pat
, TRUE
));
1794 void TransliteratorTest::TestContext() {
1795 UTransPosition pos
= {0, 2, 0, 1}; // cs cl s l
1796 expect("de > x; {d}e > y;",
1801 expect("ab{c} > z;",
1806 void TransliteratorTest::TestSupplemental() {
1808 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1810 CharsToUnicodeString("ab\\U0001030Fx"),
1811 CharsToUnicodeString("\\U00010300bix"));
1813 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1814 "$b=[A-Z\\U00010400-\\U0001044D];"
1815 "($a)($b) > $2 $1;"),
1816 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1817 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1819 // k|ax\\U00010300xm
1821 // k|a\\U00010400\\U00010300xm
1822 // ky|\\U00010400\\U00010300xm
1823 // ky\\U00010400|\\U00010300xm
1825 // ky\\U00010400|\\U00010300\\U00010400m
1826 // ky\\U00010400y|\\U00010400m
1827 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1828 "$a {x} > | @ \\U00010400;"
1829 "{$a} [^\\u0000-\\uFFFF] > y;"),
1830 CharsToUnicodeString("kax\\U00010300xm"),
1831 CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1834 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1835 UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1837 expectT("Any-Hex/Unicode",
1838 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1839 UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1841 expectT("Any-Hex/C",
1842 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1843 UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1845 expectT("Any-Hex/Perl",
1846 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1847 UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1849 expectT("Any-Hex/Java",
1850 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1851 UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1853 expectT("Any-Hex/XML",
1854 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1855 "𐌰􏼀󠁡 ");
1857 expectT("Any-Hex/XML10",
1858 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1859 "𐌰􏼀󠁡 ");
1861 expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1862 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1863 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1866 void TransliteratorTest::TestQuantifier() {
1868 // Make sure @ in a quantified anteContext works
1869 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1873 // Make sure @ in a quantified postContext works
1874 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1878 // Make sure @ in a quantified postContext with seg ref works
1879 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1883 // Make sure @ past ante context doesn't enter ante context
1884 UTransPosition pos
= {0, 5, 3, 5};
1885 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1890 // Make sure @ past post context doesn't pass limit
1891 UTransPosition pos2
= {0, 4, 0, 2};
1892 expect("{b} a+ > c @@ |; x > y; a > A;",
1897 // Make sure @ past post context doesn't enter post context
1898 expect("{b} a+ > c @@ |; x > y; a > A;",
1902 expect("(ab)? c > d;",
1906 // NOTE: The (ab)+ when referenced just yields a single "ab",
1907 // not the full sequence of them. This accords with perl behavior.
1908 expect("(ab)+ {x} > '(' $1 ')';",
1910 "x ab(ab) abab(ab)y");
1913 "ac abc abbc abbbc",
1916 expect("[abc]+ > x;",
1917 "qac abrc abbcs abtbbc",
1920 expect("q{(ab)+} > x;",
1921 "qa qab qaba qababc qaba",
1922 "qa qx qxa qxc qxa");
1924 expect("q(ab)* > x;",
1925 "qa qab qaba qababc",
1928 // NOTE: The (ab)+ when referenced just yields a single "ab",
1929 // not the full sequence of them. This accords with perl behavior.
1930 expect("q(ab)* > '(' $1 ')';",
1931 "qa qab qaba qababc",
1932 "()a (ab) (ab)a (ab)c");
1934 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1936 expect("'ab'+ > x;",
1940 // $foo+ and $foo* -- the quantifier should apply to the entire
1941 // variable reference
1942 expect("$var = ab; $var+ > x;",
1947 class TestTrans
: public Transliterator
{
1949 TestTrans(const UnicodeString
& id
) : Transliterator(id
, 0) {
1951 virtual TestTrans
* clone(void) const {
1952 return new TestTrans(getID());
1954 virtual void handleTransliterate(Replaceable
& /*text*/, UTransPosition
& offsets
,
1955 UBool
/*isIncremental*/) const
1957 offsets
.start
= offsets
.limit
;
1959 virtual UClassID
getDynamicClassID() const;
1960 static UClassID U_EXPORT2
getStaticClassID();
1962 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans
)
1965 * Test Source-Target/Variant.
1967 void TransliteratorTest::TestSTV(void) {
1968 int32_t ns
= Transliterator::countAvailableSources();
1969 if (ns
< 0 || ns
> 255) {
1970 errln((UnicodeString
)"FAIL: Bad source count: " + ns
);
1974 for (i
=0; i
<ns
; ++i
) {
1975 UnicodeString source
;
1976 Transliterator::getAvailableSource(i
, source
);
1977 logln((UnicodeString
)"" + i
+ ": " + source
);
1978 if (source
.length() == 0) {
1979 errln("FAIL: empty source");
1982 int32_t nt
= Transliterator::countAvailableTargets(source
);
1983 if (nt
< 0 || nt
> 255) {
1984 errln((UnicodeString
)"FAIL: Bad target count: " + nt
);
1987 for (int32_t j
=0; j
<nt
; ++j
) {
1988 UnicodeString target
;
1989 Transliterator::getAvailableTarget(j
, source
, target
);
1990 logln((UnicodeString
)" " + j
+ ": " + target
);
1991 if (target
.length() == 0) {
1992 errln("FAIL: empty target");
1995 int32_t nv
= Transliterator::countAvailableVariants(source
, target
);
1996 if (nv
< 0 || nv
> 255) {
1997 errln((UnicodeString
)"FAIL: Bad variant count: " + nv
);
2000 for (int32_t k
=0; k
<nv
; ++k
) {
2001 UnicodeString variant
;
2002 Transliterator::getAvailableVariant(k
, source
, target
, variant
);
2003 if (variant
.length() == 0) {
2004 logln((UnicodeString
)" " + k
+ ": <empty>");
2006 logln((UnicodeString
)" " + k
+ ": " + variant
);
2012 // Test registration
2013 const char* IDS
[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2014 const char* FULL_IDS
[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2015 const char* SOURCES
[] = { NULL
, "Seoridf", "Oewoir" };
2016 for (i
=0; i
<3; ++i
) {
2017 Transliterator
*t
= new TestTrans(IDS
[i
]);
2019 errln("FAIL: out of memory");
2022 if (t
->getID() != IDS
[i
]) {
2023 errln((UnicodeString
)"FAIL: ID mismatch for " + IDS
[i
]);
2027 Transliterator::registerInstance(t
);
2028 UErrorCode status
= U_ZERO_ERROR
;
2029 t
= Transliterator::createInstance(IDS
[i
], UTRANS_FORWARD
, status
);
2031 errln((UnicodeString
)"FAIL: Registration/creation failed for ID " +
2034 logln((UnicodeString
)"Ok: Registration/creation succeeded for ID " +
2038 Transliterator::unregister(IDS
[i
]);
2039 t
= Transliterator::createInstance(IDS
[i
], UTRANS_FORWARD
, status
);
2041 errln((UnicodeString
)"FAIL: Unregistration failed for ID " +
2047 // Make sure getAvailable API reflects removal
2048 int32_t n
= Transliterator::countAvailableIDs();
2049 for (i
=0; i
<n
; ++i
) {
2050 UnicodeString id
= Transliterator::getAvailableID(i
);
2051 for (j
=0; j
<3; ++j
) {
2052 if (id
.caseCompare(FULL_IDS
[j
],0)==0) {
2053 errln((UnicodeString
)"FAIL: unregister(" + id
+ ") failed");
2057 n
= Transliterator::countAvailableTargets("Any");
2058 for (i
=0; i
<n
; ++i
) {
2060 Transliterator::getAvailableTarget(i
, "Any", t
);
2061 if (t
.caseCompare(IDS
[0],0)==0) {
2062 errln((UnicodeString
)"FAIL: unregister(Any-" + t
+ ") failed");
2065 n
= Transliterator::countAvailableSources();
2066 for (i
=0; i
<n
; ++i
) {
2068 Transliterator::getAvailableSource(i
, s
);
2069 for (j
=0; j
<3; ++j
) {
2070 if (SOURCES
[j
] == NULL
) continue;
2071 if (s
.caseCompare(SOURCES
[j
],0)==0) {
2072 errln((UnicodeString
)"FAIL: unregister(" + s
+ "-*) failed");
2079 * Test inverse of Greek-Latin; Title()
2081 void TransliteratorTest::TestCompoundInverse(void) {
2082 UParseError parseError
;
2083 UErrorCode status
= U_ZERO_ERROR
;
2084 Transliterator
*t
= Transliterator::createInstance
2085 ("Greek-Latin; Title()", UTRANS_REVERSE
,parseError
, status
);
2087 dataerrln("FAIL: createInstance - %s", u_errorName(status
));
2090 UnicodeString
exp("(Title);Latin-Greek");
2091 if (t
->getID() == exp
) {
2092 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2095 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2096 t
->getID() + "\", expected \"" + exp
+ "\"");
2102 * Test NFD chaining with RBT
2104 void TransliteratorTest::TestNFDChainRBT() {
2106 UErrorCode ec
= U_ZERO_ERROR
;
2107 Transliterator
* t
= Transliterator::createFromRules(
2108 "TEST", "::NFD; aa > Q; a > q;",
2109 UTRANS_FORWARD
, pe
, ec
);
2110 if (t
== NULL
|| U_FAILURE(ec
)) {
2111 dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec
));
2114 expect(*t
, "aa", "Q");
2117 // TEMPORARY TESTS -- BEING DEBUGGED
2118 //=- UnicodeString s, s2;
2119 //=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2120 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2121 //=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2122 //=- expect(*t, s, s2);
2125 //=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2126 //=- expect(*t, s2, s);
2129 //=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2130 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2131 //=- expect(*t, s, s);
2134 // const char* source[] = {
2136 // "\\u015Br\\u012Bmad",
2137 // "bhagavadg\\u012Bt\\u0101",
2140 // "vi\\u1E63\\u0101da",
2142 // "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2143 // "uv\\u0101cr\\u0325",
2145 // "rmk\\u1E63\\u0113t",
2146 // //"dharmak\\u1E63\\u0113tr\\u0113",
2148 // "kuruk\\u1E63\\u0113tr\\u0113",
2149 // "samav\\u0113t\\u0101",
2150 // "yuyutsava-\\u1E25",
2151 // "m\\u0101mak\\u0101-\\u1E25",
2152 // // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2154 // "san\\u0304java",
2159 // const char* expected[] = {
2161 // "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2162 // "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2163 // "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2164 // "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2165 // "\\u0935\\u093f\\u0937\\u093e\\u0926",
2166 // "\\u092f\\u094b\\u0917",
2167 // "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2168 // "\\u0909\\u0935\\u093E\\u091A\\u0943",
2171 // //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2173 // "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2174 // "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2175 // "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2176 // "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2177 // // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2178 // "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2179 // "\\u0938\\u0902\\u091c\\u0935",
2183 // UErrorCode status = U_ZERO_ERROR;
2184 // UParseError parseError;
2185 // UnicodeString message;
2186 // Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2187 // Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2188 // if(U_FAILURE(status)){
2189 // errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2190 // errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2191 // delete latinToDevToLatin;
2192 // delete devToLatinToDev;
2195 // UnicodeString gotResult;
2196 // for(int i= 0; source[i] != 0; i++){
2197 // gotResult = source[i];
2198 // expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2199 // expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2201 // delete latinToDevToLatin;
2202 // delete devToLatinToDev;
2206 * Inverse of "Null" should be "Null". (J21)
2208 void TransliteratorTest::TestNullInverse() {
2210 UErrorCode ec
= U_ZERO_ERROR
;
2211 Transliterator
*t
= Transliterator::createInstance("Null", UTRANS_FORWARD
, pe
, ec
);
2212 if (t
== 0 || U_FAILURE(ec
)) {
2213 errln("FAIL: createInstance");
2216 Transliterator
*u
= t
->createInverse(ec
);
2217 if (u
== 0 || U_FAILURE(ec
)) {
2218 errln("FAIL: createInverse");
2222 if (u
->getID() != "Null") {
2223 errln("FAIL: Inverse of Null should be Null");
2230 * Check ID of inverse of alias. (J22)
2232 void TransliteratorTest::TestAliasInverseID() {
2233 UnicodeString
ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2235 UErrorCode ec
= U_ZERO_ERROR
;
2236 Transliterator
*t
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
);
2237 if (t
== 0 || U_FAILURE(ec
)) {
2238 dataerrln("FAIL: createInstance - %s", u_errorName(ec
));
2241 Transliterator
*u
= t
->createInverse(ec
);
2242 if (u
== 0 || U_FAILURE(ec
)) {
2243 errln("FAIL: createInverse");
2247 UnicodeString exp
= "Hangul-Latin";
2248 UnicodeString got
= u
->getID();
2250 errln((UnicodeString
)"FAIL: Inverse of " + ID
+ " is " + got
+
2251 ", expected " + exp
);
2258 * Test IDs of inverses of compound transliterators. (J20)
2260 void TransliteratorTest::TestCompoundInverseID() {
2261 UnicodeString ID
= "Latin-Jamo;NFC(NFD)";
2263 UErrorCode ec
= U_ZERO_ERROR
;
2264 Transliterator
*t
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
);
2265 if (t
== 0 || U_FAILURE(ec
)) {
2266 dataerrln("FAIL: createInstance - %s", u_errorName(ec
));
2269 Transliterator
*u
= t
->createInverse(ec
);
2270 if (u
== 0 || U_FAILURE(ec
)) {
2271 errln("FAIL: createInverse");
2275 UnicodeString exp
= "NFD(NFC);Jamo-Latin";
2276 UnicodeString got
= u
->getID();
2278 errln((UnicodeString
)"FAIL: Inverse of " + ID
+ " is " + got
+
2279 ", expected " + exp
);
2286 * Test undefined variable.
2289 void TransliteratorTest::TestUndefinedVariable() {
2290 UnicodeString rule
= "$initial } a <> \\u1161;";
2292 UErrorCode ec
= U_ZERO_ERROR
;
2293 Transliterator
*t
= Transliterator::createFromRules("<ID>", rule
, UTRANS_FORWARD
, pe
, ec
);
2295 if (U_FAILURE(ec
)) {
2296 logln((UnicodeString
)"OK: Got exception for " + rule
+ ", as expected: " +
2300 errln((UnicodeString
)"Fail: bogus rule " + rule
+ " compiled with error " +
2305 * Test empty context.
2307 void TransliteratorTest::TestEmptyContext() {
2308 expect(" { a } > b;", "xay a ", "xby b ");
2312 * Test compound filter ID syntax
2314 void TransliteratorTest::TestCompoundFilterID(void) {
2315 static const char* DATA
[] = {
2316 // Col. 1 = ID or rule set (latter must start with #)
2318 // = columns > 1 are null if expect col. 1 to be illegal =
2320 // Col. 2 = direction, "F..." or "R..."
2321 // Col. 3 = source string
2322 // Col. 4 = exp result
2324 "[abc]; [abc]", NULL
, NULL
, NULL
, // multiple filters
2325 "Latin-Greek; [abc];", NULL
, NULL
, NULL
, // misplaced filter
2326 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2327 "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2328 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2329 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2333 for (int32_t i
=0; DATA
[i
]; i
+=4) {
2334 UnicodeString id
= CharsToUnicodeString(DATA
[i
]);
2335 UTransDirection direction
= (DATA
[i
+1] != NULL
&& DATA
[i
+1][0] == 'R') ?
2336 UTRANS_REVERSE
: UTRANS_FORWARD
;
2337 UnicodeString source
;
2339 if (DATA
[i
+2] != NULL
) {
2340 source
= CharsToUnicodeString(DATA
[i
+2]);
2341 exp
= CharsToUnicodeString(DATA
[i
+3]);
2343 UBool expOk
= (DATA
[i
+1] != NULL
);
2344 LocalPointer
<Transliterator
> t
;
2346 UErrorCode ec
= U_ZERO_ERROR
;
2347 if (id
.charAt(0) == 0x23/*#*/) {
2348 t
.adoptInstead(Transliterator::createFromRules("ID", id
, direction
, pe
, ec
));
2350 t
.adoptInstead(Transliterator::createInstance(id
, direction
, pe
, ec
));
2352 UBool ok
= (t
.isValid() && U_SUCCESS(ec
));
2353 UnicodeString transID
;
2355 transID
= t
->getID();
2358 transID
= UnicodeString("NULL", "");
2361 logln((UnicodeString
)"Ok: " + id
+ " => " + transID
+ ", " +
2363 if (source
.length() != 0) {
2364 expect(*t
, source
, exp
);
2367 dataerrln((UnicodeString
)"FAIL: " + id
+ " => " + transID
+ ", " +
2374 * Test new property set syntax
2376 void TransliteratorTest::TestPropertySet() {
2377 expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2378 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2379 "[ a stitch ]\n[ in time ]\r[ saves 9]");
2383 * Test various failure points of the new 2.0 engine.
2385 void TransliteratorTest::TestNewEngine() {
2387 UErrorCode ec
= U_ZERO_ERROR
;
2388 Transliterator
*t
= Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD
, pe
, ec
);
2389 if (t
== 0 || U_FAILURE(ec
)) {
2390 dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec
));
2393 // Katakana should be untouched
2394 expect(*t
, CharsToUnicodeString("a\\u3042\\u30A2"),
2395 CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2400 // This test will only work if Transliterator.ROLLBACK is
2401 // true. Otherwise, this test will fail, revealing a
2402 // limitation of global filters in incremental mode.
2404 Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD
, pe
, ec
);
2406 Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD
, pe
, ec
);
2407 if (U_FAILURE(ec
)) {
2413 Transliterator
* array
[3];
2415 array
[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD
, pe
, ec
);
2417 if (U_FAILURE(ec
)) {
2418 errln("FAIL: createInstance NFD");
2425 t
= new CompoundTransliterator(array
, 3, new UnicodeSet("[:Ll:]", ec
));
2426 if (U_FAILURE(ec
)) {
2427 errln("FAIL: UnicodeSet constructor");
2435 expect(*t
, "aAaA", "bAbA");
2437 assertTrue("countElements", t
->countElements() == 3);
2438 assertEquals("getElement(0)", t
->getElement(0, ec
).getID(), "a_to_A");
2439 assertEquals("getElement(1)", t
->getElement(1, ec
).getID(), "NFD");
2440 assertEquals("getElement(2)", t
->getElement(2, ec
).getID(), "A_to_b");
2441 assertSuccess("getElement", ec
);
2449 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2453 UnicodeString gr
= CharsToUnicodeString(
2455 "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2456 "$rough = \\u0314 ;"
2457 "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2461 expect(gr
, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2465 * Test quantified segment behavior. We want:
2466 * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2468 void TransliteratorTest::TestQuantifiedSegment(void) {
2470 expect("([abc]+) > x $1 x;", "cba", "xcbax");
2472 // The tricky case; the quantifier is around the segment
2473 expect("([abc])+ > x $1 x;", "cba", "xax");
2475 // Tricky case in reverse direction
2476 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2478 // Check post-context segment
2479 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2481 // Test toRule/toPattern for non-quantified segment.
2482 // Careful with spacing here.
2483 UnicodeString
r("([a-c]){q} > x $1 x;");
2485 UErrorCode ec
= U_ZERO_ERROR
;
2486 Transliterator
* t
= Transliterator::createFromRules("ID", r
, UTRANS_FORWARD
, pe
, ec
);
2487 if (U_FAILURE(ec
)) {
2488 errln("FAIL: createFromRules");
2493 t
->toRules(rr
, TRUE
);
2495 errln((UnicodeString
)"FAIL: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2497 logln((UnicodeString
)"Ok: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2501 // Test toRule/toPattern for quantified segment.
2502 // Careful with spacing here.
2503 r
= "([a-c])+{q} > x $1 x;";
2504 t
= Transliterator::createFromRules("ID", r
, UTRANS_FORWARD
, pe
, ec
);
2505 if (U_FAILURE(ec
)) {
2506 errln("FAIL: createFromRules");
2510 t
->toRules(rr
, TRUE
);
2512 errln((UnicodeString
)"FAIL: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2514 logln((UnicodeString
)"Ok: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2519 //======================================================================
2521 //======================================================================
2522 void TransliteratorTest::TestDevanagariLatinRT(){
2523 const int MAX_LEN
= 52;
2524 const char* const source
[MAX_LEN
] = {
2539 //"r\\u0323ya", // \u095c is not valid in Devanagari
2565 "\\u1E6Dh\\u1E6Dha",
2572 // Not roundtrippable --
2573 // \\u0939\\u094d\\u094d\\u092E - hma
2574 // \\u0939\\u094d\\u092E - hma
2575 // CharsToUnicodeString("hma"),
2580 "san\\u0304j\\u012Bb s\\u0113nagupta",
2581 "\\u0101nand vaddir\\u0101ju",
2585 const char* const expected
[MAX_LEN
] = {
2586 "\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */
2587 "\\u0915\\u094D\\u0930", /* kra */
2588 "\\u0915\\u094D\\u0937", /* ks\\u0323a */
2589 "\\u0916\\u094D\\u0930", /* khra */
2590 "\\u0917\\u094D\\u0930", /* gra */
2591 "\\u0919\\u094D\\u0930", /* n\\u0307ra */
2592 "\\u091A\\u094D\\u0930", /* cra */
2593 "\\u091B\\u094D\\u0930", /* chra */
2594 "\\u091C\\u094D\\u091E", /* jn\\u0303a */
2595 "\\u091D\\u094D\\u0930", /* jhra */
2596 "\\u091E\\u094D\\u0930", /* n\\u0303ra */
2597 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2598 "\\u0920\\u094D\\u0930", /* t\\u0323hra */
2599 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2600 //"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari
2601 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2602 "\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */
2603 "\\u0923\\u094D\\u0930", /* n\\u0323ra */
2604 "\\u0924\\u094D\\u0924", /* tta */
2605 "\\u0925\\u094D\\u0930", /* thra */
2606 "\\u0926\\u094D\\u0926", /* dda */
2607 "\\u0927\\u094D\\u0930", /* dhra */
2608 "\\u0928\\u094D\\u0928", /* nna */
2609 "\\u092A\\u094D\\u0930", /* pra */
2610 "\\u092B\\u094D\\u0930", /* phra */
2611 "\\u092C\\u094D\\u0930", /* bra */
2612 "\\u092D\\u094D\\u0930", /* bhra */
2613 "\\u092E\\u094D\\u0930", /* mra */
2614 "\\u0929\\u094D\\u0930", /* n\\u0331ra */
2615 //"\\u0934\\u094D\\u0930", /* l\\u0331ra */
2616 "\\u092F\\u094D\\u0930", /* yra */
2617 "\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */
2619 "\\u0935\\u094D\\u0930", /* vra */
2620 "\\u0936\\u094D\\u0930", /* s\\u0301ra */
2621 "\\u0937\\u094D\\u0930", /* s\\u0323ra */
2622 "\\u0938\\u094D\\u0930", /* sra */
2623 "\\u0939\\u094d\\u092E", /* hma */
2624 "\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */
2625 "\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */
2626 "\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/
2627 "\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */
2628 "\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */
2629 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2630 "\\u0920\\u094D\\u092F", /* t\\u0323hya */
2631 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2632 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2634 "\\u0939\\u094D\\u092F", /* hya */
2635 "\\u0936\\u0943", /* s\\u0301r\\u0325a */
2636 "\\u0936\\u094D\\u091A", /* s\\u0301ca */
2637 "\\u090d", /* e\\u0306 */
2638 "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2639 "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2643 UErrorCode status
= U_ZERO_ERROR
;
2644 UParseError parseError
;
2645 UnicodeString message
;
2646 Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2647 Transliterator
* devToLatin
=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
);
2648 if(U_FAILURE(status
)){
2649 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2650 dataerrln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2653 UnicodeString gotResult
;
2654 for(int i
= 0; i
<MAX_LEN
; i
++){
2655 gotResult
= source
[i
];
2656 expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
]));
2657 expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
]));
2663 void TransliteratorTest::TestTeluguLatinRT(){
2664 const int MAX_LEN
=10;
2665 const char* const source
[MAX_LEN
] = {
2666 "raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */
2667 "\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */
2668 "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */
2669 "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */
2670 "san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */
2671 "amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */
2672 "ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */
2673 "\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */
2674 "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */
2675 "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */
2678 const char* const expected
[MAX_LEN
] = {
2679 "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2680 "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2681 "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2682 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2683 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2684 "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2685 "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2686 "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2687 "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2688 "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2691 UErrorCode status
= U_ZERO_ERROR
;
2692 UParseError parseError
;
2693 UnicodeString message
;
2694 Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD
, parseError
, status
);
2695 Transliterator
* devToLatin
=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD
, parseError
, status
);
2696 if(U_FAILURE(status
)){
2697 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2698 dataerrln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2701 UnicodeString gotResult
;
2702 for(int i
= 0; i
<MAX_LEN
; i
++){
2703 gotResult
= source
[i
];
2704 expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
]));
2705 expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
]));
2711 void TransliteratorTest::TestSanskritLatinRT(){
2712 const int MAX_LEN
=16;
2713 const char* const source
[MAX_LEN
] = {
2714 "rmk\\u1E63\\u0113t",
2715 "\\u015Br\\u012Bmad",
2716 "bhagavadg\\u012Bt\\u0101",
2719 "vi\\u1E63\\u0101da",
2721 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2722 "uv\\u0101cr\\u0325",
2723 "dharmak\\u1E63\\u0113tr\\u0113",
2724 "kuruk\\u1E63\\u0113tr\\u0113",
2725 "samav\\u0113t\\u0101",
2727 "m\\u0101mak\\u0101\\u1E25",
2728 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2732 const char* const expected
[MAX_LEN
] = {
2733 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2734 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2735 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2736 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2737 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2738 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2739 "\\u092f\\u094b\\u0917",
2740 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2741 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2742 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2743 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2744 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2745 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2746 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2747 //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2748 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2749 "\\u0938\\u0902\\u091c\\u0935",
2751 UErrorCode status
= U_ZERO_ERROR
;
2752 UParseError parseError
;
2753 UnicodeString message
;
2754 Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2755 Transliterator
* devToLatin
=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
);
2756 if(U_FAILURE(status
)){
2757 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2758 dataerrln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2761 UnicodeString gotResult
;
2762 for(int i
= 0; i
<MAX_LEN
; i
++){
2763 gotResult
= source
[i
];
2764 expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
]));
2765 expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
]));
2772 void TransliteratorTest::TestCompoundLatinRT(){
2773 const char* const source
[] = {
2774 "rmk\\u1E63\\u0113t",
2775 "\\u015Br\\u012Bmad",
2776 "bhagavadg\\u012Bt\\u0101",
2779 "vi\\u1E63\\u0101da",
2781 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2782 "uv\\u0101cr\\u0325",
2783 "dharmak\\u1E63\\u0113tr\\u0113",
2784 "kuruk\\u1E63\\u0113tr\\u0113",
2785 "samav\\u0113t\\u0101",
2787 "m\\u0101mak\\u0101\\u1E25",
2788 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2792 const int MAX_LEN
= UPRV_LENGTHOF(source
);
2793 const char* const expected
[MAX_LEN
] = {
2794 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2795 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2796 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2797 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2798 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2799 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2800 "\\u092f\\u094b\\u0917",
2801 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2802 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2803 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2804 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2805 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2806 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2807 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2808 // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2809 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2810 "\\u0938\\u0902\\u091c\\u0935"
2812 if(MAX_LEN
!= UPRV_LENGTHOF(expected
)) {
2813 errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2817 UErrorCode status
= U_ZERO_ERROR
;
2818 UParseError parseError
;
2819 UnicodeString message
;
2820 Transliterator
* devToLatinToDev
=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2821 Transliterator
* latinToDevToLatin
=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
);
2822 Transliterator
* devToTelToDev
=Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2823 Transliterator
* latinToTelToLatin
=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD
, parseError
, status
);
2825 if(U_FAILURE(status
)){
2826 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2827 dataerrln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2830 UnicodeString gotResult
;
2831 for(int i
= 0; i
<MAX_LEN
; i
++){
2832 gotResult
= source
[i
];
2833 expect(*devToLatinToDev
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(expected
[i
]));
2834 expect(*latinToDevToLatin
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(source
[i
]));
2835 expect(*latinToTelToLatin
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(source
[i
]));
2838 delete(latinToDevToLatin
);
2839 delete(devToLatinToDev
);
2840 delete(devToTelToDev
);
2841 delete(latinToTelToLatin
);
2845 * Test Gurmukhi-Devanagari Tippi and Bindi
2847 void TransliteratorTest::TestGurmukhiDevanagari(){
2849 // (\u0902) (when preceded by vowel) ---> (\u0A02)
2850 // (\u0902) (when preceded by consonant) ---> (\u0A70)
2851 UErrorCode status
= U_ZERO_ERROR
;
2852 UnicodeSet
vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV
).unescape(), status
);
2853 UnicodeSet
non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV
).unescape(), status
);
2854 UParseError parseError
;
2856 UnicodeSetIterator
vIter(vowel
);
2857 UnicodeSetIterator
nvIter(non_vowel
);
2858 Transliterator
* trans
= Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD
, parseError
, status
);
2859 if(U_FAILURE(status
)) {
2860 dataerrln("Error creating transliterator %s", u_errorName(status
));
2864 UnicodeString
src (" \\u0902", -1, US_INV
);
2865 UnicodeString
expected(" \\u0A02", -1, US_INV
);
2866 src
= src
.unescape();
2867 expected
= expected
.unescape();
2869 while(vIter
.next()){
2870 src
.setCharAt(0,(UChar
) vIter
.getCodepoint());
2871 expected
.setCharAt(0,(UChar
) (vIter
.getCodepoint()+0x0100));
2872 expect(*trans
,src
,expected
);
2875 expected
.setCharAt(1,0x0A70);
2876 while(nvIter
.next()){
2877 //src.setCharAt(0,(char) nvIter.codepoint);
2878 src
.setCharAt(0,(UChar
)nvIter
.getCodepoint());
2879 expected
.setCharAt(0,(UChar
) (nvIter
.getCodepoint()+0x0100));
2880 expect(*trans
,src
,expected
);
2885 * Test instantiation from a locale.
2887 void TransliteratorTest::TestLocaleInstantiation(void) {
2889 UErrorCode ec
= U_ZERO_ERROR
;
2890 Transliterator
*t
= Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD
, pe
, ec
);
2891 if (U_FAILURE(ec
)) {
2892 dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec
));
2896 expect(*t
, CharsToUnicodeString("\\u0430"), "a");
2899 t
= Transliterator::createInstance("en-el", UTRANS_FORWARD
, pe
, ec
);
2900 if (U_FAILURE(ec
)) {
2901 errln("FAIL: createInstance(en-el)");
2905 expect(*t
, "a", CharsToUnicodeString("\\u03B1"));
2910 * Test title case handling of accent (should ignore accents)
2912 void TransliteratorTest::TestTitleAccents(void) {
2914 UErrorCode ec
= U_ZERO_ERROR
;
2915 Transliterator
*t
= Transliterator::createInstance("Title", UTRANS_FORWARD
, pe
, ec
);
2916 if (U_FAILURE(ec
)) {
2917 errln("FAIL: createInstance(Title)");
2921 expect(*t
, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2926 * Basic test of a locale resource based rule.
2928 void TransliteratorTest::TestLocaleResource() {
2929 const char* DATA
[] = {
2931 //"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0",
2932 "Latin-el", "b", "\\u03bc\\u03c0",
2933 "Latin-Greek", "b", "\\u03B2",
2934 "Greek-Latin/UNGEGN", "\\u03B2", "v",
2935 "el-Latin", "\\u03B2", "v",
2936 "Greek-Latin", "\\u03B2", "b",
2938 const int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
2939 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
2941 UErrorCode ec
= U_ZERO_ERROR
;
2942 Transliterator
*t
= Transliterator::createInstance(DATA
[i
], UTRANS_FORWARD
, pe
, ec
);
2943 if (U_FAILURE(ec
)) {
2944 dataerrln((UnicodeString
)"FAIL: createInstance(" + DATA
[i
] + ") - " + u_errorName(ec
));
2948 expect(*t
, CharsToUnicodeString(DATA
[i
+1]),
2949 CharsToUnicodeString(DATA
[i
+2]));
2955 * Make sure parse errors reference the right line.
2957 void TransliteratorTest::TestParseError() {
2958 static const char* rule
=
2962 UErrorCode ec
= U_ZERO_ERROR
;
2964 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
2966 if (U_FAILURE(ec
)) {
2967 UnicodeString
err(pe
.preContext
);
2968 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
2969 if (err
.indexOf("d << b") >= 0) {
2970 logln("Ok: " + err
);
2972 errln("FAIL: " + err
);
2976 errln("FAIL: no syntax error");
2978 static const char* maskingRule
=
2983 delete Transliterator::createFromRules("ID", maskingRule
, UTRANS_FORWARD
, pe
, ec
);
2984 if (ec
!= U_RULE_MASK_ERROR
) {
2985 errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec
));
2987 else if (UnicodeString("a > x;") != UnicodeString(pe
.preContext
)) {
2988 errln("FAIL: did not get expected precontext");
2990 else if (UnicodeString("ab > y;") != UnicodeString(pe
.postContext
)) {
2991 errln("FAIL: did not get expected postcontext");
2996 * Make sure sets on output are disallowed.
2998 void TransliteratorTest::TestOutputSet() {
2999 UnicodeString rule
= "$set = [a-cm-n]; b > $set;";
3000 UErrorCode ec
= U_ZERO_ERROR
;
3002 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
3004 if (U_FAILURE(ec
)) {
3005 UnicodeString
err(pe
.preContext
);
3006 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
3007 logln("Ok: " + err
);
3010 errln("FAIL: No syntax error");
3014 * Test the use variable range pragma, making sure that use of
3015 * variable range characters is detected and flagged as an error.
3017 void TransliteratorTest::TestVariableRange() {
3018 UnicodeString rule
= "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3019 UErrorCode ec
= U_ZERO_ERROR
;
3021 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
3023 if (U_FAILURE(ec
)) {
3024 UnicodeString
err(pe
.preContext
);
3025 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
3026 logln("Ok: " + err
);
3029 errln("FAIL: No syntax error");
3033 * Test invalid post context error handling
3035 void TransliteratorTest::TestInvalidPostContext() {
3036 UnicodeString rule
= "a}b{c>d;";
3037 UErrorCode ec
= U_ZERO_ERROR
;
3039 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
3041 if (U_FAILURE(ec
)) {
3042 UnicodeString
err(pe
.preContext
);
3043 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
3044 if (err
.indexOf("a}b{c") >= 0) {
3045 logln("Ok: " + err
);
3047 errln("FAIL: " + err
);
3051 errln("FAIL: No syntax error");
3055 * Test ID form variants
3057 void TransliteratorTest::TestIDForms() {
3058 const char* DATA
[] = {
3060 "nfd", NULL
, "NFC", // make sure case is ignored
3061 "Any-NFKD", NULL
, "Any-NFKC",
3062 "Null", NULL
, "Null",
3063 "-nfkc", "nfkc", "NFKD",
3064 "-nfkc/", "nfkc", "NFKD",
3065 "Latin-Greek/UNGEGN", NULL
, "Greek-Latin/UNGEGN",
3066 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3067 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3068 "Source-", NULL
, NULL
,
3069 "Source/Variant-", NULL
, NULL
,
3070 "Source-/Variant", NULL
, NULL
,
3071 "/Variant", NULL
, NULL
,
3072 "/Variant-", NULL
, NULL
,
3073 "-/Variant", NULL
, NULL
,
3078 const int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
3080 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
3081 const char* ID
= DATA
[i
];
3082 const char* expID
= DATA
[i
+1];
3083 const char* expInvID
= DATA
[i
+2];
3084 UBool expValid
= (expInvID
!= NULL
);
3085 if (expID
== NULL
) {
3089 UErrorCode ec
= U_ZERO_ERROR
;
3091 Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
);
3092 if (U_FAILURE(ec
)) {
3094 logln((UnicodeString
)"Ok: getInstance(" + ID
+") => " + u_errorName(ec
));
3096 dataerrln((UnicodeString
)"FAIL: Couldn't create " + ID
+ " - " + u_errorName(ec
));
3101 Transliterator
*u
= t
->createInverse(ec
);
3102 if (U_FAILURE(ec
)) {
3103 errln((UnicodeString
)"FAIL: Couldn't create inverse of " + ID
);
3108 if (t
->getID() == expID
&&
3109 u
->getID() == expInvID
) {
3110 logln((UnicodeString
)"Ok: " + ID
+ ".getInverse() => " + expInvID
);
3112 errln((UnicodeString
)"FAIL: getInstance(" + ID
+ ") => " +
3113 t
->getID() + " x getInverse() => " + u
->getID() +
3114 ", expected " + expInvID
);
3121 static const UChar SPACE
[] = {32,0};
3122 static const UChar NEWLINE
[] = {10,0};
3123 static const UChar RETURN
[] = {13,0};
3124 static const UChar EMPTY
[] = {0};
3126 void TransliteratorTest::checkRules(const UnicodeString
& label
, Transliterator
& t2
,
3127 const UnicodeString
& testRulesForward
) {
3128 UnicodeString rules2
; t2
.toRules(rules2
, TRUE
);
3129 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3130 rules2
.findAndReplace(SPACE
, EMPTY
);
3131 rules2
.findAndReplace(NEWLINE
, EMPTY
);
3132 rules2
.findAndReplace(RETURN
, EMPTY
);
3134 UnicodeString
testRules(testRulesForward
); testRules
.findAndReplace(SPACE
, EMPTY
);
3136 if (rules2
!= testRules
) {
3138 logln((UnicodeString
)"GENERATED RULES: " + rules2
);
3139 logln((UnicodeString
)"SHOULD BE: " + testRulesForward
);
3144 * Mark's toRules test.
3146 void TransliteratorTest::TestToRulesMark() {
3147 const char* testRules
=
3148 "::[[:Latin:][:Mark:]];"
3151 "a <> \\u03B1;" // alpha
3155 "::([[:Greek:][:Mark:]]);"
3157 const char* testRulesForward
=
3158 "::[[:Latin:][:Mark:]];"
3166 const char* testRulesBackward
=
3167 "::[[:Greek:][:Mark:]];"
3174 UnicodeString source
= CharsToUnicodeString("\\u00E1"); // a-acute
3175 UnicodeString target
= CharsToUnicodeString("\\u03AC"); // alpha-acute
3178 UErrorCode ec
= U_ZERO_ERROR
;
3179 LocalPointer
<Transliterator
> t2(
3180 Transliterator::createFromRules("source-target", UnicodeString(testRules
, -1, US_INV
), UTRANS_FORWARD
, pe
, ec
));
3181 LocalPointer
<Transliterator
> t3(
3182 Transliterator::createFromRules("target-source", UnicodeString(testRules
, -1, US_INV
), UTRANS_REVERSE
, pe
, ec
));
3184 if (U_FAILURE(ec
)) {
3185 dataerrln((UnicodeString
)"FAIL: createFromRules => " + u_errorName(ec
));
3189 expect(*t2
, source
, target
);
3190 expect(*t3
, target
, source
);
3192 checkRules("Failed toRules FORWARD", *t2
, UnicodeString(testRulesForward
, -1, US_INV
));
3193 checkRules("Failed toRules BACKWARD", *t3
, UnicodeString(testRulesBackward
, -1, US_INV
));
3197 * Test Escape and Unescape transliterators.
3199 void TransliteratorTest::TestEscape() {
3205 t
= Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, pe
, ec
);
3206 if (U_FAILURE(ec
)) {
3207 errln((UnicodeString
)"FAIL: createInstance");
3210 UNICODE_STRING_SIMPLE("\\x{40}\\U000000312Q"),
3216 t
= Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD
, pe
, ec
);
3217 if (U_FAILURE(ec
)) {
3218 errln((UnicodeString
)"FAIL: createInstance");
3221 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3222 UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3227 t
= Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD
, pe
, ec
);
3228 if (U_FAILURE(ec
)) {
3229 errln((UnicodeString
)"FAIL: createInstance");
3232 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3233 UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3238 t
= Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD
, pe
, ec
);
3239 if (U_FAILURE(ec
)) {
3240 errln((UnicodeString
)"FAIL: createInstance");
3243 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3244 UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3250 void TransliteratorTest::TestAnchorMasking(){
3251 UnicodeString
rule ("^a > Q; a > q;");
3252 UErrorCode status
= U_ZERO_ERROR
;
3253 UParseError parseError
;
3255 Transliterator
* t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
,parseError
,status
);
3256 if(U_FAILURE(status
)){
3257 errln(UnicodeString("FAIL: ") + "ID" +
3258 ".createFromRules() => bad rules" +
3259 /*", parse error " + parseError.code +*/
3260 ", line " + parseError
.line
+
3261 ", offset " + parseError
.offset
+
3262 ", context " + prettify(parseError
.preContext
, TRUE
) +
3263 ", rules: " + prettify(rule
, TRUE
));
3269 * Make sure display names of variants look reasonable.
3271 void TransliteratorTest::TestDisplayName() {
3272 #if UCONFIG_NO_FORMATTING
3273 logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3276 static const char* DATA
[] = {
3277 // ID, forward name, reverse name
3278 // Update the text as necessary -- the important thing is
3279 // not the text itself, but how various cases are handled.
3282 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3285 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3288 "NFC", "Any to NFC", "Any to NFD",
3291 int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
3293 Locale
US("en", "US");
3295 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
3297 Transliterator::getDisplayName(DATA
[i
], US
, name
);
3298 if (name
!= DATA
[i
+1]) {
3299 dataerrln((UnicodeString
)"FAIL: " + DATA
[i
] + ".getDisplayName() => " +
3300 name
+ ", expected " + DATA
[i
+1]);
3302 logln((UnicodeString
)"Ok: " + DATA
[i
] + ".getDisplayName() => " + name
);
3304 UErrorCode ec
= U_ZERO_ERROR
;
3306 Transliterator
*t
= Transliterator::createInstance(DATA
[i
], UTRANS_REVERSE
, pe
, ec
);
3307 if (U_FAILURE(ec
)) {
3309 dataerrln("FAIL: createInstance failed - %s", u_errorName(ec
));
3312 name
= Transliterator::getDisplayName(t
->getID(), US
, name
);
3313 if (name
!= DATA
[i
+2]) {
3314 dataerrln((UnicodeString
)"FAIL: " + t
->getID() + ".getDisplayName() => " +
3315 name
+ ", expected " + DATA
[i
+2]);
3317 logln((UnicodeString
)"Ok: " + t
->getID() + ".getDisplayName() => " + name
);
3324 void TransliteratorTest::TestSpecialCases(void) {
3325 const UnicodeString registerRules
[] = {
3326 "Any-Dev1", "x > X; y > Y;",
3327 "Any-Dev2", "XY > Z",
3329 CharsToUnicodeString
3330 ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3334 const UnicodeString testCases
[] = {
3336 // should add more test cases
3337 "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3338 "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3339 "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3340 "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3343 "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3344 "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3346 // check for devanagari bug
3347 "nfd;Dev1;Dev2;nfc", "xy", "Z",
3349 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3350 "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
,
3351 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE
+ DESERET_dee
,
3353 //TODO: enable this test once Titlecase works right
3355 "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3356 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3358 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
,
3359 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE
+ DESERET_DEE
,
3360 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
,
3361 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee
+ DESERET_dee
,
3363 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
, "",
3364 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
, "",
3367 "Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3368 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3369 "Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3370 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3371 "Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3372 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3373 "Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3374 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3376 // Upper: TAT\\u02B9\\u00C2NA
3377 // Lower: tat\\u02B9\\u00E2na
3378 // Title: Tat\\u02B9\\u00E2na
3379 "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3380 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3381 "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3382 CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3383 "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3384 CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3391 for (i
= 0; registerRules
[i
].length()!=0; i
+=2) {
3392 UErrorCode status
= U_ZERO_ERROR
;
3394 Transliterator
*t
= Transliterator::createFromRules(registerRules
[0+i
],
3395 registerRules
[i
+1], UTRANS_FORWARD
, pos
, status
);
3396 if (U_FAILURE(status
)) {
3397 dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status
));
3399 Transliterator::registerInstance(t
);
3402 for (i
= 0; testCases
[i
].length()!=0; i
+=3) {
3403 UErrorCode ec
= U_ZERO_ERROR
;
3405 const UnicodeString
& name
= testCases
[i
];
3406 Transliterator
*t
= Transliterator::createInstance(name
, UTRANS_FORWARD
, pe
, ec
);
3407 if (U_FAILURE(ec
)) {
3408 dataerrln((UnicodeString
)"FAIL: Couldn't create " + name
+ " - " + u_errorName(ec
));
3412 const UnicodeString
& id
= t
->getID();
3413 const UnicodeString
& source
= testCases
[i
+1];
3414 UnicodeString target
;
3416 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3418 if (testCases
[i
+2].length() > 0) {
3419 target
= testCases
[i
+2];
3420 } else if (0==id
.caseCompare("NFD", U_FOLD_CASE_DEFAULT
)) {
3421 Normalizer::normalize(source
, UNORM_NFD
, 0, target
, ec
);
3422 } else if (0==id
.caseCompare("NFC", U_FOLD_CASE_DEFAULT
)) {
3423 Normalizer::normalize(source
, UNORM_NFC
, 0, target
, ec
);
3424 } else if (0==id
.caseCompare("NFKD", U_FOLD_CASE_DEFAULT
)) {
3425 Normalizer::normalize(source
, UNORM_NFKD
, 0, target
, ec
);
3426 } else if (0==id
.caseCompare("NFKC", U_FOLD_CASE_DEFAULT
)) {
3427 Normalizer::normalize(source
, UNORM_NFKC
, 0, target
, ec
);
3428 } else if (0==id
.caseCompare("Lower", U_FOLD_CASE_DEFAULT
)) {
3430 target
.toLower(Locale::getUS());
3431 } else if (0==id
.caseCompare("Upper", U_FOLD_CASE_DEFAULT
)) {
3433 target
.toUpper(Locale::getUS());
3435 if (U_FAILURE(ec
)) {
3436 errln((UnicodeString
)"FAIL: Internal error normalizing " + source
);
3440 expect(*t
, source
, target
);
3443 for (i
= 0; registerRules
[i
].length()!=0; i
+=2) {
3444 Transliterator::unregister(registerRules
[i
]);
3448 char* Char32ToEscapedChars(UChar32 ch
, char* buffer
) {
3450 sprintf(buffer
, "\\u%04x", (int)ch
);
3452 sprintf(buffer
, "\\U%08x", (int)ch
);
3457 void TransliteratorTest::TestSurrogateCasing (void) {
3458 // check that casing handles surrogates
3459 // titlecase is currently defective
3463 U16_GET(DESERET_dee
,0, 0, DESERET_dee
.length(), dee
);
3464 UnicodeString
DEE(u_totitle(dee
));
3465 if (DEE
!= DESERET_DEE
) {
3466 err("Fails titlecase of surrogates");
3467 err(Char32ToEscapedChars(dee
, buffer
));
3469 errln(Char32ToEscapedChars(DEE
.char32At(0), buffer
));
3472 UnicodeString deeDEETest
=DESERET_dee
+ DESERET_DEE
;
3473 UnicodeString deedeeTest
= DESERET_dee
+ DESERET_dee
;
3474 UnicodeString DEEDEETest
= DESERET_DEE
+ DESERET_DEE
;
3475 UErrorCode status
= U_ZERO_ERROR
;
3477 u_strToUpper(buffer2
, 20, deeDEETest
.getBuffer(), deeDEETest
.length(), NULL
, &status
);
3478 if (U_FAILURE(status
) || (UnicodeString(buffer2
)!= DEEDEETest
)) {
3479 errln("Fails: Can't uppercase surrogates.");
3482 status
= U_ZERO_ERROR
;
3483 u_strToLower(buffer2
, 20, deeDEETest
.getBuffer(), deeDEETest
.length(), NULL
, &status
);
3484 if (U_FAILURE(status
) || (UnicodeString(buffer2
)!= deedeeTest
)) {
3485 errln("Fails: Can't lowercase surrogates.");
3489 static void _trans(Transliterator
& t
, const UnicodeString
& src
,
3490 UnicodeString
& result
) {
3492 t
.transliterate(result
);
3495 static void _trans(const UnicodeString
& id
, const UnicodeString
& src
,
3496 UnicodeString
& result
, UErrorCode ec
) {
3498 Transliterator
*t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, pe
, ec
);
3499 if (U_SUCCESS(ec
)) {
3500 _trans(*t
, src
, result
);
3505 static UnicodeString
_findMatch(const UnicodeString
& source
,
3506 const UnicodeString
* pairs
) {
3507 UnicodeString empty
;
3508 for (int32_t i
=0; pairs
[i
].length() > 0; i
+=2) {
3509 if (0==source
.caseCompare(pairs
[i
], U_FOLD_CASE_DEFAULT
)) {
3516 // Check to see that incremental gets at least part way through a reasonable string.
3518 void TransliteratorTest::TestIncrementalProgress(void) {
3519 UErrorCode ec
= U_ZERO_ERROR
;
3520 UnicodeString latinTest
= "The Quick Brown Fox.";
3521 UnicodeString devaTest
;
3522 _trans("Latin-Devanagari", latinTest
, devaTest
, ec
);
3523 UnicodeString kataTest
;
3524 _trans("Latin-Katakana", latinTest
, kataTest
, ec
);
3525 if (U_FAILURE(ec
)) {
3526 errln("FAIL: Internal error");
3529 const UnicodeString tests
[] = {
3532 "Halfwidth", latinTest
,
3533 "Devanagari", devaTest
,
3534 "Katakana", kataTest
,
3538 UnicodeString
test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3539 int32_t i
= 0, j
=0, k
=0;
3540 int32_t sources
= Transliterator::countAvailableSources();
3541 for (i
= 0; i
< sources
; i
++) {
3542 UnicodeString source
;
3543 Transliterator::getAvailableSource(i
, source
);
3544 UnicodeString test
= _findMatch(source
, tests
);
3545 if (test
.length() == 0) {
3546 logln((UnicodeString
)"Skipping " + source
+ "-X");
3549 int32_t targets
= Transliterator::countAvailableTargets(source
);
3550 for (j
= 0; j
< targets
; j
++) {
3551 UnicodeString target
;
3552 Transliterator::getAvailableTarget(j
, source
, target
);
3553 int32_t variants
= Transliterator::countAvailableVariants(source
, target
);
3554 for (k
=0; k
< variants
; k
++) {
3555 UnicodeString variant
;
3557 UErrorCode status
= U_ZERO_ERROR
;
3559 Transliterator::getAvailableVariant(k
, source
, target
, variant
);
3560 UnicodeString id
= source
+ "-" + target
+ "/" + variant
;
3562 Transliterator
*t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, err
, status
);
3563 if (U_FAILURE(status
)) {
3564 dataerrln((UnicodeString
)"FAIL: Could not create " + id
);
3568 status
= U_ZERO_ERROR
;
3569 CheckIncrementalAux(t
, test
);
3572 _trans(*t
, test
, rev
);
3573 Transliterator
*inv
= t
->createInverse(status
);
3574 if (U_FAILURE(status
)) {
3575 // The following are forward-only, it is OK that creating an inverse will not work:
3576 // 1. Devanagari-Arabic
3578 // 2a. Any-*/BGN_1981
3581 // If UCONFIG_NO_BREAK_ITERATION is on, Latin-Thai is also not expected to work.
3582 if ( id
.compare((UnicodeString
)"Devanagari-Arabic/") != 0
3583 && !(id
.startsWith((UnicodeString
)"Any-") &&
3584 (id
.endsWith((UnicodeString
)"/BGN") || id
.endsWith((UnicodeString
)"/BGN_1981") || id
.endsWith((UnicodeString
)"/UNGEGN") || id
.endsWith((UnicodeString
)"/MNS"))
3586 #if UCONFIG_NO_BREAK_ITERATION
3587 && id
.compare((UnicodeString
)"Latin-Thai/") != 0
3591 errln((UnicodeString
)"FAIL: Could not create inverse of " + id
);
3597 CheckIncrementalAux(inv
, rev
);
3605 void TransliteratorTest::CheckIncrementalAux(const Transliterator
* t
,
3606 const UnicodeString
& input
) {
3607 UErrorCode ec
= U_ZERO_ERROR
;
3609 UnicodeString test
= input
;
3611 pos
.contextStart
= 0;
3612 pos
.contextLimit
= input
.length();
3614 pos
.limit
= input
.length();
3616 t
->transliterate(test
, pos
, ec
);
3617 if (U_FAILURE(ec
)) {
3618 errln((UnicodeString
)"FAIL: transliterate() error " + u_errorName(ec
));
3621 UBool gotError
= FALSE
;
3622 (void)gotError
; // Suppress set but not used warning.
3624 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3626 if (pos
.start
== 0 && pos
.limit
!= 0 && t
->getID() != "Hex-Any/Unicode") {
3627 errln((UnicodeString
)"No Progress, " +
3628 t
->getID() + ": " + formatInput(test
, input
, pos
));
3631 logln((UnicodeString
)"PASS Progress, " +
3632 t
->getID() + ": " + formatInput(test
, input
, pos
));
3634 t
->finishTransliteration(test
, pos
);
3635 if (pos
.start
!= pos
.limit
) {
3636 errln((UnicodeString
)"Incomplete, " +
3637 t
->getID() + ": " + formatInput(test
, input
, pos
));
3642 void TransliteratorTest::TestFunction() {
3643 // Careful with spacing and ';' here: Phrase this exactly
3644 // as toRules() is going to return it. If toRules() changes
3645 // with regard to spacing or ';', then adjust this string.
3646 UnicodeString rule
=
3647 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3650 UErrorCode ec
= U_ZERO_ERROR
;
3651 Transliterator
*t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3653 dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec
));
3658 t
->toRules(r
, TRUE
);
3660 logln((UnicodeString
)"OK: toRules() => " + r
);
3662 errln((UnicodeString
)"FAIL: toRules() => " + r
+
3663 ", expected " + rule
);
3666 expect(*t
, "The Quick Brown Fox",
3667 UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3672 void TransliteratorTest::TestInvalidBackRef(void) {
3673 UnicodeString rule
= ". > $1;";
3674 UnicodeString rule2
=CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3676 UErrorCode ec
= U_ZERO_ERROR
;
3677 Transliterator
*t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3678 Transliterator
*t2
= Transliterator::createFromRules("Test2", rule2
, UTRANS_FORWARD
, pe
, ec
);
3681 errln("FAIL: createFromRules should have returned NULL");
3686 errln("FAIL: createFromRules should have returned NULL");
3690 if (U_SUCCESS(ec
)) {
3691 errln("FAIL: Ok: . > $1; => no error");
3693 logln((UnicodeString
)"Ok: . > $1; => " + u_errorName(ec
));
3697 void TransliteratorTest::TestMulticharStringSet() {
3704 " e } [{fg}] > r;" ;
3707 UErrorCode ec
= U_ZERO_ERROR
;
3708 Transliterator
* t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3709 if (t
== NULL
|| U_FAILURE(ec
)) {
3711 errln("FAIL: createFromRules failed");
3715 expect(*t
, "a aa ab bc d gd de gde gdefg ddefg",
3716 "y x yz z d gd de gdq gdqfg ddrfg");
3719 // Overlapped string test. Make sure that when multiple
3720 // strings can match that the longest one is matched.
3722 " [a {ab} {abc}] > x;"
3725 " q [t {st} {rst}] { e > p;" ;
3727 t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3728 if (t
== NULL
|| U_FAILURE(ec
)) {
3730 errln("FAIL: createFromRules failed");
3734 expect(*t
, "a ab abc qte qste qrste",
3735 "x x x qtp qstp qrstp");
3739 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3740 // BEGIN TestUserFunction support factory
3742 Transliterator
* _TUFF
[4];
3743 UnicodeString
* _TUFID
[4];
3745 static Transliterator
* U_EXPORT2
_TUFFactory(const UnicodeString
& /*ID*/,
3746 Transliterator::Token context
) {
3747 return _TUFF
[context
.integer
]->clone();
3750 static void _TUFReg(const UnicodeString
& ID
, Transliterator
* t
, int32_t n
) {
3752 _TUFID
[n
] = new UnicodeString(ID
);
3753 Transliterator::registerFactory(ID
, _TUFFactory
, Transliterator::integerToken(n
));
3756 static void _TUFUnreg(int32_t n
) {
3757 if (_TUFF
[n
] != NULL
) {
3758 Transliterator::unregister(*_TUFID
[n
]);
3764 // END TestUserFunction support factory
3765 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3768 * Test that user-registered transliterators can be used under function
3771 void TransliteratorTest::TestUserFunction() {
3775 UErrorCode ec
= U_ZERO_ERROR
;
3777 // Setup our factory
3779 for (i
=0; i
<4; ++i
) {
3783 // There's no need to register inverses if we don't use them
3784 t
= Transliterator::createFromRules("gif",
3785 UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3786 UTRANS_FORWARD
, pe
, ec
);
3787 if (t
== NULL
|| U_FAILURE(ec
)) {
3788 dataerrln((UnicodeString
)"FAIL: createFromRules gif " + u_errorName(ec
));
3791 _TUFReg("Any-gif", t
, 0);
3793 t
= Transliterator::createFromRules("RemoveCurly",
3794 UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3795 UTRANS_FORWARD
, pe
, ec
);
3796 if (t
== NULL
|| U_FAILURE(ec
)) {
3797 errln((UnicodeString
)"FAIL: createFromRules RemoveCurly " + u_errorName(ec
));
3800 expect(*t
, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3801 _TUFReg("Any-RemoveCurly", t
, 1);
3803 logln("Trying &hex");
3804 t
= Transliterator::createFromRules("hex2",
3806 UTRANS_FORWARD
, pe
, ec
);
3807 if (t
== NULL
|| U_FAILURE(ec
)) {
3808 errln("FAIL: createFromRules");
3811 logln("Registering");
3812 _TUFReg("Any-hex2", t
, 2);
3813 t
= Transliterator::createInstance("Any-hex2", UTRANS_FORWARD
, ec
);
3814 if (t
== NULL
|| U_FAILURE(ec
)) {
3815 errln((UnicodeString
)"FAIL: createInstance Any-hex2 " + u_errorName(ec
));
3818 expect(*t
, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3821 logln("Trying &gif");
3822 t
= Transliterator::createFromRules("gif2",
3823 "(.) > &Gif(&Hex2($1));",
3824 UTRANS_FORWARD
, pe
, ec
);
3825 if (t
== NULL
|| U_FAILURE(ec
)) {
3826 errln((UnicodeString
)"FAIL: createFromRules gif2 " + u_errorName(ec
));
3829 logln("Registering");
3830 _TUFReg("Any-gif2", t
, 3);
3831 t
= Transliterator::createInstance("Any-gif2", UTRANS_FORWARD
, ec
);
3832 if (t
== NULL
|| U_FAILURE(ec
)) {
3833 errln((UnicodeString
)"FAIL: createInstance Any-gif2 " + u_errorName(ec
));
3836 expect(*t
, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3837 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3840 // Test that filters are allowed after &
3841 t
= Transliterator::createFromRules("test",
3842 "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3843 UTRANS_FORWARD
, pe
, ec
);
3844 if (t
== NULL
|| U_FAILURE(ec
)) {
3845 errln((UnicodeString
)"FAIL: createFromRules test " + u_errorName(ec
));
3849 UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3853 for (i
=0; i
<4; ++i
) {
3859 * Test the Any-X transliterators.
3861 void TransliteratorTest::TestAnyX(void) {
3862 UParseError parseError
;
3863 UErrorCode status
= U_ZERO_ERROR
;
3864 Transliterator
* anyLatin
=
3865 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD
, parseError
, status
);
3867 dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status
));
3873 CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3874 CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3878 status
= U_ZERO_ERROR
;
3879 Transliterator
* anyASCII
=
3880 Transliterator::createInstance("Any-Latin;Latin-ASCII", UTRANS_FORWARD
, parseError
, status
);
3881 if (U_FAILURE(status
) || anyASCII
==0) {
3882 dataerrln("FAIL: createInstance returned NULL and/or set status %s", u_errorName(status
));
3888 CharsToUnicodeString("ArabicDigits:\\u0660\\u0661\\u0664\\u0669 PersianDigits:\\u06F0\\u06F1\\u06F4\\u06F9"),
3889 CharsToUnicodeString("ArabicDigits:0149 PersianDigits:0149"));
3895 * Test Any-X transliterators with sample letters from all scripts.
3897 void TransliteratorTest::TestAny(void) {
3898 UErrorCode status
= U_ZERO_ERROR
;
3899 // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3900 // function call parameters going on in this test.
3901 UnicodeSet
alphabetic("[:alphabetic:]", status
);
3902 if (U_FAILURE(status
)) {
3903 dataerrln("Failure: file %s, line %d, status = %s", __FILE__
, __LINE__
, u_errorName(status
));
3906 alphabetic
.freeze();
3908 UnicodeString testString
;
3909 for (int32_t i
= 0; i
< USCRIPT_CODE_LIMIT
; i
++) {
3910 const char *scriptName
= uscript_getShortName((UScriptCode
)i
);
3911 if (scriptName
== NULL
) {
3912 errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__
, __LINE__
, i
);
3917 sample
.applyPropertyAlias("script", scriptName
, status
);
3918 if (U_FAILURE(status
)) {
3919 errln("Failure: file %s, line %d, status = %s", __FILE__
, __LINE__
, u_errorName(status
));
3922 sample
.retainAll(alphabetic
);
3923 for (int32_t count
=0; count
<5; count
++) {
3924 UChar32 c
= sample
.charAt(count
);
3928 testString
.append(c
);
3932 UParseError parseError
;
3933 Transliterator
* anyLatin
=
3934 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD
, parseError
, status
);
3935 if (U_FAILURE(status
)) {
3936 dataerrln("Failure: file %s, line %d, status = %s", __FILE__
, __LINE__
, u_errorName(status
));
3940 logln(UnicodeString("Sample set for Any-Latin: ") + testString
);
3941 anyLatin
->transliterate(testString
);
3942 logln(UnicodeString("Sample result for Any-Latin: ") + testString
);
3948 * Test the source and target set API. These are only implemented
3949 * for RBT and CompoundTransliterator at this time.
3951 void TransliteratorTest::TestSourceTargetSet() {
3952 UErrorCode ec
= U_ZERO_ERROR
;
3960 UnicodeSet
expSrc("[arx{lu}]", ec
);
3963 UnicodeSet
expTrg("[bq]", ec
);
3966 Transliterator
* t
= Transliterator::createFromRules("test", r
, UTRANS_FORWARD
, pe
, ec
);
3968 if (U_FAILURE(ec
)) {
3970 errln("FAIL: Couldn't set up test");
3974 UnicodeSet src
; t
->getSourceSet(src
);
3975 UnicodeSet trg
; t
->getTargetSet(trg
);
3977 if (src
== expSrc
&& trg
== expTrg
) {
3979 logln((UnicodeString
)"Ok: " +
3980 r
+ " => source = " + src
.toPattern(a
, TRUE
) +
3981 ", target = " + trg
.toPattern(b
, TRUE
));
3983 UnicodeString a
, b
, c
, d
;
3984 errln((UnicodeString
)"FAIL: " +
3985 r
+ " => source = " + src
.toPattern(a
, TRUE
) +
3986 ", expected " + expSrc
.toPattern(b
, TRUE
) +
3987 "; target = " + trg
.toPattern(c
, TRUE
) +
3988 ", expected " + expTrg
.toPattern(d
, TRUE
));
3995 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3997 void TransliteratorTest::TestPatternWhiteSpace() {
3999 const char* r
= "a > \\u200E b;";
4001 UErrorCode ec
= U_ZERO_ERROR
;
4003 Transliterator
* t
= Transliterator::createFromRules("test", CharsToUnicodeString(r
), UTRANS_FORWARD
, pe
, ec
);
4005 if (U_FAILURE(ec
)) {
4006 errln("FAIL: Couldn't set up test");
4008 expect(*t
, "a", "b");
4014 UnicodeSet
set(CharsToUnicodeString("[a \\u200E]"), ec
);
4016 if (U_FAILURE(ec
)) {
4017 errln("FAIL: Couldn't set up test");
4019 if (set
.contains(0x200E)) {
4020 errln("FAIL: U+200E not being ignored by UnicodeSet");
4024 //======================================================================
4025 // this method is in TestUScript.java
4026 //======================================================================
4027 void TransliteratorTest::TestAllCodepoints(){
4028 UScriptCode code
= USCRIPT_INVALID_CODE
;
4029 char id
[256]={'\0'};
4030 char abbr
[256]={'\0'};
4031 char newId
[256]={'\0'};
4032 char newAbbrId
[256]={'\0'};
4033 char oldId
[256]={'\0'};
4034 char oldAbbrId
[256]={'\0'};
4036 UErrorCode status
=U_ZERO_ERROR
;
4039 for(uint32_t i
= 0; i
<=0x10ffff; i
++){
4040 code
= uscript_getScript(i
,&status
);
4041 if(code
== USCRIPT_INVALID_CODE
){
4042 dataerrln("uscript_getScript for codepoint \\U%08X failed.", i
);
4044 const char* myId
= uscript_getName(code
);
4046 dataerrln("Valid script code returned NULL name. Check your data!");
4049 uprv_strcpy(id
,myId
);
4050 uprv_strcpy(abbr
,uscript_getShortName(code
));
4052 uprv_strcpy(newId
,"[:");
4053 uprv_strcat(newId
,id
);
4054 uprv_strcat(newId
,":];NFD");
4056 uprv_strcpy(newAbbrId
,"[:");
4057 uprv_strcat(newAbbrId
,abbr
);
4058 uprv_strcat(newAbbrId
,":];NFD");
4060 if(uprv_strcmp(newId
,oldId
)!=0){
4061 Transliterator
* t
= Transliterator::createInstance(newId
,UTRANS_FORWARD
,pe
,status
);
4062 if(t
==NULL
|| U_FAILURE(status
)){
4063 dataerrln((UnicodeString
)"FAIL: Could not create " + id
+ " - " + u_errorName(status
));
4067 if(uprv_strcmp(newAbbrId
,oldAbbrId
)!=0){
4068 Transliterator
* t
= Transliterator::createInstance(newAbbrId
,UTRANS_FORWARD
,pe
,status
);
4069 if(t
==NULL
|| U_FAILURE(status
)){
4070 dataerrln((UnicodeString
)"FAIL: Could not create " + id
+ " - " + u_errorName(status
));
4074 uprv_strcpy(oldId
,newId
);
4075 uprv_strcpy(oldAbbrId
, newAbbrId
);
4081 #define TEST_TRANSLIT_ID(id, cls) UPRV_BLOCK_MACRO_BEGIN { \
4082 UErrorCode ec = U_ZERO_ERROR; \
4083 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4084 if (U_FAILURE(ec)) { \
4085 dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
4087 if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4088 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4090 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4093 } UPRV_BLOCK_MACRO_END
4095 #define TEST_TRANSLIT_RULE(rule, cls) UPRV_BLOCK_MACRO_BEGIN { \
4096 UErrorCode ec = U_ZERO_ERROR; \
4098 Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4099 if (U_FAILURE(ec)) { \
4100 errln("FAIL: Couldn't create " rule); \
4102 if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4103 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4105 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4108 } UPRV_BLOCK_MACRO_END
4110 void TransliteratorTest::TestBoilerplate() {
4111 TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator
);
4112 TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator
);
4113 TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator
);
4114 TEST_TRANSLIT_ID("Lower", LowercaseTransliterator
);
4115 TEST_TRANSLIT_ID("Upper", UppercaseTransliterator
);
4116 TEST_TRANSLIT_ID("Title", TitlecaseTransliterator
);
4117 TEST_TRANSLIT_ID("Null", NullTransliterator
);
4118 TEST_TRANSLIT_ID("Remove", RemoveTransliterator
);
4119 TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator
);
4120 TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator
);
4121 TEST_TRANSLIT_ID("NFD", NormalizationTransliterator
);
4122 TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator
);
4123 TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator
);
4126 void TransliteratorTest::TestAlternateSyntax() {
4131 expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4134 expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4135 CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4136 UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4139 static const char* BEGIN_END_RULES
[] = {
4153 "", // test case commented out below, this is here to keep from messing up the indexes
4162 "", // test case commented out below, this is here to keep from messing up the indexes
4171 "", // test case commented out below, this is here to keep from messing up the indexes
4190 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4191 "$delim = [\\-$ws];"
4192 "$ws $delim* > ' ';"
4193 "'-' $delim* > '-';",
4197 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4198 "$delim = [\\-$ws];"
4199 "$ws $delim* > ' ';"
4200 "'-' $delim* > '-';",
4203 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4204 "$delim = [\\-$ws];"
4205 "$ws $delim* > ' ';"
4206 "'-' $delim* > '-';"
4210 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4211 "$delim = [\\-$ws];"
4213 "$ws $delim* > ' ';"
4214 "'-' $delim* > '-';",
4219 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4220 "$delim = [\\-$ws];"
4222 "$ws $delim* > ' ';"
4223 "'-' $delim* > '-';",
4225 "", // test case commented out below, this is here to keep from messing up the indexes
4229 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4230 "$delim = [\\-$ws];"
4232 "$ws $delim* > ' ';"
4233 "'-' $delim* > '-';"
4236 "", // test case commented out below, this is here to keep from messing up the indexes
4240 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4241 "$delim = [\\-$ws];"
4244 "$ws $delim* > ' ';"
4245 "'-' $delim* > '-';"
4248 "$ab { ' ' } $ab > '-';"
4255 "", // test case commented out below, this is here to keep from messing up the indexes
4258 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4259 "$delim = [\\-$ws];"
4262 "$ws $delim* > ' ';"
4263 "'-' $delim* > '-';"
4265 "$ab { ' ' } $ab > '-';"
4281 "", // test case commented out below, this is here to keep from messing up the indexes
4302 "", // test case commented out below, this is here to keep from messing up the indexes
4314 (This entire test is commented out below and will need some heavy revision when we re-add
4315 the ::BEGIN/::END stuff)
4316 static const char* BOGUS_BEGIN_END_RULES[] = {
4335 static const int32_t BOGUS_BEGIN_END_RULES_length = UPRV_LENGTHOF(BOGUS_BEGIN_END_RULES);
4338 static const char* BEGIN_END_TEST_CASES
[] = {
4339 // rules input expected output
4340 BEGIN_END_RULES
[0], "abc ababc aba", "xy zbc z",
4341 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
4342 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
4343 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
4344 BEGIN_END_RULES
[4], "abc ababc aba", "xy abxy z",
4345 BEGIN_END_RULES
[5], "abccabaacababcbc", "PXAARXQBR",
4347 BEGIN_END_RULES
[6], "e e - e---e- e", "e e e-e-e",
4348 BEGIN_END_RULES
[7], "e e - e---e- e", "e e e-e-e",
4349 BEGIN_END_RULES
[8], "e e - e---e- e", "e e e-e-e",
4350 BEGIN_END_RULES
[9], "e e - e---e- e", "e e e-e-e",
4351 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e",
4352 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e",
4353 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e",
4354 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a",
4355 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
4356 BEGIN_END_RULES
[13], "e e - e---e- e", "e e e-e-e",
4357 BEGIN_END_RULES
[13], "a a a a", "a%a%a%a",
4358 BEGIN_END_RULES
[13], "a a-b c b a", "a%a-b cb-a",
4360 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4361 BEGIN_END_RULES
[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4362 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4363 BEGIN_END_RULES
[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4365 static const int32_t BEGIN_END_TEST_CASES_length
= UPRV_LENGTHOF(BEGIN_END_TEST_CASES
);
4367 void TransliteratorTest::TestBeginEnd() {
4368 // run through the list of test cases above
4370 for (i
= 0; i
< BEGIN_END_TEST_CASES_length
; i
+= 3) {
4371 expect((UnicodeString
)"Test case #" + (i
/ 3),
4372 UnicodeString(BEGIN_END_TEST_CASES
[i
], -1, US_INV
),
4373 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 1], -1, US_INV
),
4374 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 2], -1, US_INV
));
4377 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4378 UParseError parseError
;
4379 UErrorCode status
= U_ZERO_ERROR
;
4380 Transliterator
* reversed
= Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES
[17]),
4381 UTRANS_REVERSE
, parseError
, status
);
4382 if (reversed
== 0 || U_FAILURE(status
)) {
4383 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError
, status
);
4385 expect(*reversed
, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4389 // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4390 // that all of them cause errors
4392 (commented out until we have the real ::BEGIN/::END stuff in place
4393 for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4394 UParseError parseError;
4395 UErrorCode status = U_ZERO_ERROR;
4396 Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4397 UTRANS_FORWARD, parseError, status);
4398 if (!U_FAILURE(status)) {
4400 errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4406 void TransliteratorTest::TestBeginEndToRules() {
4407 // run through the same list of test cases we used above, but this time, instead of just
4408 // instantiating a Transliterator from the rules and running the test against it, we instantiate
4409 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4410 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4411 // to (i.e., does the same thing as) the original rule set
4412 for (int32_t i
= 0; i
< BEGIN_END_TEST_CASES_length
; i
+= 3) {
4413 UParseError parseError
;
4414 UErrorCode status
= U_ZERO_ERROR
;
4415 Transliterator
* t
= Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES
[i
], -1, US_INV
),
4416 UTRANS_FORWARD
, parseError
, status
);
4417 if (U_FAILURE(status
)) {
4418 reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError
, status
);
4420 UnicodeString rules
;
4421 t
->toRules(rules
, TRUE
);
4422 Transliterator
* t2
= Transliterator::createFromRules((UnicodeString
)"Test case #" + (i
/ 3), rules
,
4423 UTRANS_FORWARD
, parseError
, status
);
4424 if (U_FAILURE(status
)) {
4425 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4426 parseError
, status
);
4430 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 1], -1, US_INV
),
4431 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 2], -1, US_INV
));
4438 // do the same thing for the reversible test case
4439 UParseError parseError
;
4440 UErrorCode status
= U_ZERO_ERROR
;
4441 Transliterator
* reversed
= Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES
[17]),
4442 UTRANS_REVERSE
, parseError
, status
);
4443 if (U_FAILURE(status
)) {
4444 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError
, status
);
4446 UnicodeString rules
;
4447 reversed
->toRules(rules
, FALSE
);
4448 Transliterator
* reversed2
= Transliterator::createFromRules("Reversed", rules
, UTRANS_FORWARD
,
4449 parseError
, status
);
4450 if (U_FAILURE(status
)) {
4451 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4452 parseError
, status
);
4456 UnicodeString("xy XY XYZ yz YZ"),
4457 UnicodeString("xy abc xaba yz aba"));
4464 void TransliteratorTest::TestRegisterAlias() {
4465 UnicodeString
longID("Lower;[aeiou]Upper");
4466 UnicodeString
shortID("Any-CapVowels");
4467 UnicodeString
reallyShortID("CapVowels");
4469 Transliterator::registerAlias(shortID
, longID
);
4471 UErrorCode err
= U_ZERO_ERROR
;
4472 Transliterator
* t1
= Transliterator::createInstance(longID
, UTRANS_FORWARD
, err
);
4473 if (U_FAILURE(err
)) {
4474 errln("Failed to instantiate transliterator with long ID");
4475 Transliterator::unregister(shortID
);
4478 Transliterator
* t2
= Transliterator::createInstance(reallyShortID
, UTRANS_FORWARD
, err
);
4479 if (U_FAILURE(err
)) {
4480 errln("Failed to instantiate transliterator with short ID");
4482 Transliterator::unregister(shortID
);
4486 if (t1
->getID() != longID
)
4487 errln("Transliterator instantiated with long ID doesn't have long ID");
4488 if (t2
->getID() != reallyShortID
)
4489 errln("Transliterator instantiated with short ID doesn't have short ID");
4491 UnicodeString rules1
;
4492 UnicodeString rules2
;
4494 t1
->toRules(rules1
, TRUE
);
4495 t2
->toRules(rules2
, TRUE
);
4496 if (rules1
!= rules2
)
4497 errln("Alias transliterators aren't the same");
4501 Transliterator::unregister(shortID
);
4503 t1
= Transliterator::createInstance(shortID
, UTRANS_FORWARD
, err
);
4504 if (U_SUCCESS(err
)) {
4505 errln("Instantiation with short ID succeeded after short ID was unregistered");
4509 // try the same thing again, but this time with something other than
4510 // an instance of CompoundTransliterator
4511 UnicodeString
realID("Latin-Greek");
4512 UnicodeString
fakeID("Latin-dlgkjdflkjdl");
4513 Transliterator::registerAlias(fakeID
, realID
);
4516 t1
= Transliterator::createInstance(realID
, UTRANS_FORWARD
, err
);
4517 if (U_FAILURE(err
)) {
4518 dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err
));
4519 Transliterator::unregister(realID
);
4522 t2
= Transliterator::createInstance(fakeID
, UTRANS_FORWARD
, err
);
4523 if (U_FAILURE(err
)) {
4524 errln("Failed to instantiate transliterator with fake ID");
4526 Transliterator::unregister(realID
);
4530 t1
->toRules(rules1
, TRUE
);
4531 t2
->toRules(rules2
, TRUE
);
4532 if (rules1
!= rules2
)
4533 errln("Alias transliterators aren't the same");
4537 Transliterator::unregister(fakeID
);
4540 void TransliteratorTest::TestRuleStripping() {
4543 \uE001>\u0C01; # SIGN
4545 static const UChar rule
[] = {
4546 0x0023,0x0020,0x000D,0x000A,
4547 0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4549 static const UChar expectedRule
[] = {
4550 0xE001,0x003E,0x0C01,0x003B,0
4552 UChar result
[UPRV_LENGTHOF(rule
)];
4553 UErrorCode status
= U_ZERO_ERROR
;
4554 int32_t len
= utrans_stripRules(rule
, UPRV_LENGTHOF(rule
), result
, &status
);
4555 if (len
!= u_strlen(expectedRule
)) {
4556 errln("utrans_stripRules return len = %d", len
);
4558 if (u_strncmp(expectedRule
, result
, len
) != 0) {
4559 errln("utrans_stripRules did not return expected string");
4564 * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4566 void TransliteratorTest::TestHalfwidthFullwidth(void) {
4567 UParseError parseError
;
4568 UErrorCode status
= U_ZERO_ERROR
;
4569 Transliterator
* hf
= Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD
, parseError
, status
);
4570 Transliterator
* fh
= Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD
, parseError
, status
);
4571 if (hf
== 0 || fh
== 0) {
4572 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
4578 // Array of 2n items
4580 // "hf"|"fh"|"both",
4583 const char* DATA
[] = {
4585 "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4586 "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4588 int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
4590 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
4591 UnicodeString h
= CharsToUnicodeString(DATA
[i
+1]);
4592 UnicodeString f
= CharsToUnicodeString(DATA
[i
+2]);
4594 case 0x68: //'h': // Halfwidth-Fullwidth only
4597 case 0x66: //'f': // Fullwidth-Halfwidth only
4600 case 0x62: //'b': // both directions
4612 * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4613 * TODO: confirm that the expected results are correct.
4614 * For now, test just confirms that C++ and Java give identical results.
4616 void TransliteratorTest::TestThai(void) {
4617 #if !UCONFIG_NO_BREAK_ITERATION
4618 UParseError parseError
;
4619 UErrorCode status
= U_ZERO_ERROR
;
4620 Transliterator
* tr
= Transliterator::createInstance("Any-Latin", UTRANS_FORWARD
, parseError
, status
);
4622 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
4625 if (U_FAILURE(status
)) {
4626 errln("FAIL: createInstance failed with %s", u_errorName(status
));
4629 const char *thaiText
=
4630 "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4631 "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4632 "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4633 "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4634 "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4635 "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4636 "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4637 "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4638 "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4639 "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4640 "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4641 "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4642 "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4643 "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4644 "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4645 "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4646 "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4647 "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4648 "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4649 "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4650 "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4651 "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4652 "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4653 "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4654 " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4655 "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4656 "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4657 " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4658 "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4659 "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4661 const char *latinText
=
4662 "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4663 "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4664 "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4665 "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4666 "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4667 " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4668 "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4669 "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4670 "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4671 "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4672 "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4673 "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4674 " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4675 "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4676 " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4677 "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4678 "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4679 "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4682 UnicodeString
xlitText(thaiText
);
4683 xlitText
= xlitText
.unescape();
4684 tr
->transliterate(xlitText
);
4686 UnicodeString
expectedText(latinText
);
4687 expectedText
= expectedText
.unescape();
4688 expect(*tr
, xlitText
, expectedText
);
4695 * Test for rdar://problem/61817095 (and maybe eventually other Hans-Hant errors)
4699 void TransliteratorTest::TestHansHant(void) {
4700 UParseError parseError
;
4701 UErrorCode status
= U_ZERO_ERROR
;
4702 Transliterator
* tr
= Transliterator::createInstance("Hans-Hant", UTRANS_FORWARD
, parseError
, status
);
4703 if (U_FAILURE(status
)) {
4704 errln("FAIL: createInstance failed with %s", u_errorName(status
));
4708 const char* _sourceText
= "\\u810f \\u5185\\u810f \\u810f\\u5668 \\u4e94\\u810f \\u5fc3\\u810f \\u809d\\u810f \\u813e\\u810f \\u80c3\\u810f \\u80be\\u810f \\u80f0\\u810f \\u810f\\u8151 \\u80ba\\u810f";
4709 const char* _expectedResult
= "\\u9ad2 \\u5167\\u81df \\u81df\\u5668 \\u4e94\\u81df \\u5fc3\\u81df \\u809d\\u81df \\u813e\\u81df \\u80c3\\u81df \\u814e\\u81df \\u80f0\\u81df \\u81df\\u8151 \\u80ba\\u81df";
4711 UnicodeString
sourceText(_sourceText
);
4712 UnicodeString
expectedResult(_expectedResult
);
4713 sourceText
= sourceText
.unescape();
4714 expectedResult
= expectedResult
.unescape();
4716 expect(*tr
, sourceText
, expectedResult
);
4721 //======================================================================
4723 //======================================================================
4724 void TransliteratorTest::expectT(const UnicodeString
& id
,
4725 const UnicodeString
& source
,
4726 const UnicodeString
& expectedResult
) {
4727 UErrorCode ec
= U_ZERO_ERROR
;
4729 Transliterator
*t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, pe
, ec
);
4730 if (U_FAILURE(ec
)) {
4731 errln((UnicodeString
)"FAIL: Could not create " + id
+ " - " + u_errorName(ec
));
4735 expect(*t
, source
, expectedResult
);
4739 void TransliteratorTest::reportParseError(const UnicodeString
& message
,
4740 const UParseError
& parseError
,
4741 const UErrorCode
& status
) {
4743 /*", parse error " + parseError.code +*/
4744 ", line " + parseError
.line
+
4745 ", offset " + parseError
.offset
+
4746 ", pre-context " + prettify(parseError
.preContext
, TRUE
) +
4747 ", post-context " + prettify(parseError
.postContext
,TRUE
) +
4748 ", Error: " + u_errorName(status
));
4751 void TransliteratorTest::expect(const UnicodeString
& rules
,
4752 const UnicodeString
& source
,
4753 const UnicodeString
& expectedResult
,
4754 UTransPosition
*pos
) {
4755 expect("<ID>", rules
, source
, expectedResult
, pos
);
4758 void TransliteratorTest::expect(const UnicodeString
& id
,
4759 const UnicodeString
& rules
,
4760 const UnicodeString
& source
,
4761 const UnicodeString
& expectedResult
,
4762 UTransPosition
*pos
) {
4763 UErrorCode status
= U_ZERO_ERROR
;
4764 UParseError parseError
;
4765 Transliterator
* t
= Transliterator::createFromRules(id
, rules
, UTRANS_FORWARD
, parseError
, status
);
4766 if (U_FAILURE(status
)) {
4767 reportParseError(UnicodeString("Couldn't create transliterator from ") + rules
, parseError
, status
);
4769 expect(*t
, source
, expectedResult
, pos
);
4774 void TransliteratorTest::expect(const Transliterator
& t
,
4775 const UnicodeString
& source
,
4776 const UnicodeString
& expectedResult
,
4777 const Transliterator
& reverseTransliterator
) {
4778 expect(t
, source
, expectedResult
);
4779 expect(reverseTransliterator
, expectedResult
, source
);
4782 void TransliteratorTest::expect(const Transliterator
& t
,
4783 const UnicodeString
& source
,
4784 const UnicodeString
& expectedResult
,
4785 UTransPosition
*pos
) {
4787 UnicodeString
result(source
);
4788 t
.transliterate(result
);
4789 expectAux(t
.getID() + ":String", source
, result
, expectedResult
);
4791 UTransPosition index
={0, 0, 0, 0};
4796 UnicodeString
rsource(source
);
4798 t
.transliterate(rsource
);
4800 // Do it all at once -- below we do it incrementally
4801 t
.finishTransliteration(rsource
, *pos
);
4803 expectAux(t
.getID() + ":Replaceable", source
, rsource
, expectedResult
);
4805 // Test keyboard (incremental) transliteration -- this result
4806 // must be the same after we finalize (see below).
4811 formatInput(log
, rsource
, index
);
4813 UErrorCode status
= U_ZERO_ERROR
;
4814 t
.transliterate(rsource
, index
, status
);
4815 formatInput(log
, rsource
, index
);
4817 for (int32_t i
=0; i
<source
.length(); ++i
) {
4821 log
.append(source
.charAt(i
)).append(" -> ");
4822 UErrorCode status
= U_ZERO_ERROR
;
4823 t
.transliterate(rsource
, index
, source
.charAt(i
), status
);
4824 formatInput(log
, rsource
, index
);
4828 // As a final step in keyboard transliteration, we must call
4829 // transliterate to finish off any pending partial matches that
4830 // were waiting for more input.
4831 t
.finishTransliteration(rsource
, index
);
4832 log
.append(" => ").append(rsource
);
4834 expectAux(t
.getID() + ":Keyboard", log
,
4835 rsource
== expectedResult
,
4841 * @param appendTo result is appended to this param.
4842 * @param input the string being transliterated
4843 * @param pos the index struct
4845 UnicodeString
& TransliteratorTest::formatInput(UnicodeString
&appendTo
,
4846 const UnicodeString
& input
,
4847 const UTransPosition
& pos
) {
4848 // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4849 // the {} indicate the context start and limit, and the ||
4850 // indicate the start and limit.
4851 if (0 <= pos
.contextStart
&&
4852 pos
.contextStart
<= pos
.start
&&
4853 pos
.start
<= pos
.limit
&&
4854 pos
.limit
<= pos
.contextLimit
&&
4855 pos
.contextLimit
<= input
.length()) {
4857 UnicodeString a
, b
, c
, d
, e
;
4858 input
.extractBetween(0, pos
.contextStart
, a
);
4859 input
.extractBetween(pos
.contextStart
, pos
.start
, b
);
4860 input
.extractBetween(pos
.start
, pos
.limit
, c
);
4861 input
.extractBetween(pos
.limit
, pos
.contextLimit
, d
);
4862 input
.extractBetween(pos
.contextLimit
, input
.length(), e
);
4863 appendTo
.append(a
).append((UChar
)123/*{*/).append(b
).
4864 append((UChar
)PIPE
).append(c
).append((UChar
)PIPE
).append(d
).
4865 append((UChar
)125/*}*/).append(e
);
4867 appendTo
.append((UnicodeString
)"INVALID UTransPosition {cs=" +
4868 pos
.contextStart
+ ", s=" + pos
.start
+ ", l=" +
4869 pos
.limit
+ ", cl=" + pos
.contextLimit
+ "} on " +
4875 void TransliteratorTest::expectAux(const UnicodeString
& tag
,
4876 const UnicodeString
& source
,
4877 const UnicodeString
& result
,
4878 const UnicodeString
& expectedResult
) {
4879 expectAux(tag
, source
+ " -> " + result
,
4880 result
== expectedResult
,
4884 void TransliteratorTest::expectAux(const UnicodeString
& tag
,
4885 const UnicodeString
& summary
, UBool pass
,
4886 const UnicodeString
& expectedResult
) {
4888 logln(UnicodeString("(")+tag
+") " + prettify(summary
));
4890 dataerrln(UnicodeString("FAIL: (")+tag
+") "
4892 + ", expected " + prettify(expectedResult
));
4896 #endif /* #if !UCONFIG_NO_TRANSLITERATION */