2 **********************************************************************
3 * Copyright (C) 1999-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 11/10/99 aliu Creation.
8 **********************************************************************
11 #include "unicode/utypes.h"
13 #if !UCONFIG_NO_TRANSLITERATION
16 #include "unicode/locid.h"
17 #include "unicode/dtfmtsym.h"
18 #include "unicode/normlzr.h"
19 #include "unicode/translit.h"
20 #include "unicode/uchar.h"
21 #include "unicode/unifilt.h"
22 #include "unicode/uniset.h"
23 #include "unicode/ustring.h"
24 #include "unicode/usetiter.h"
25 #include "unicode/uscript.h"
26 #include "unicode/utf16.h"
45 /***********************************************************************
47 HOW TO USE THIS TEST FILE
49 How I developed on two platforms
50 without losing (too much of) my mind
53 1. Add new tests by copying/pasting/changing existing tests. On Java,
54 any public void method named Test...() taking no parameters becomes
55 a test. On C++, you need to modify the header and add a line to
56 the runIndexedTest() dispatch method.
58 2. Make liberal use of the expect() method; it is your friend.
60 3. The tests in this file exactly match those in a sister file on the
61 other side. The two files are:
63 icu4j: src/com/ibm/test/translit/TransliteratorTest.java
64 icu4c: source/test/intltest/transtst.cpp
66 ==> THIS IS THE IMPORTANT PART <==
68 When you add a test in this file, add it in TransliteratorTest.java
69 too. Give it the same name and put it in the same relative place.
70 This makes maintenance a lot simpler for any poor soul who ends up
71 trying to synchronize the tests between icu4j and icu4c.
73 4. If you MUST enter a test that is NOT paralleled in the sister file,
74 then add it in the special non-mirrored section. These are
83 Make sure you document the reason the test is here and not there.
88 ***********************************************************************/
90 // Define character constants thusly to be EBCDIC-friendly
92 LEFT_BRACE
=((UChar
)0x007B), /*{*/
93 PIPE
=((UChar
)0x007C), /*|*/
94 ZERO
=((UChar
)0x0030), /*0*/
95 UPPER_A
=((UChar
)0x0041) /*A*/
98 TransliteratorTest::TransliteratorTest()
99 : DESERET_DEE((UChar32
)0x10414),
100 DESERET_dee((UChar32
)0x1043C)
104 TransliteratorTest::~TransliteratorTest() {}
107 TransliteratorTest::runIndexedTest(int32_t index
, UBool exec
,
108 const char* &name
, char* /*par*/) {
110 TESTCASE(0,TestInstantiation
);
111 TESTCASE(1,TestSimpleRules
);
112 TESTCASE(2,TestRuleBasedInverse
);
113 TESTCASE(3,TestKeyboard
);
114 TESTCASE(4,TestKeyboard2
);
115 TESTCASE(5,TestKeyboard3
);
116 TESTCASE(6,TestArabic
);
117 TESTCASE(7,TestCompoundKana
);
118 TESTCASE(8,TestCompoundHex
);
119 TESTCASE(9,TestFiltering
);
120 TESTCASE(10,TestInlineSet
);
121 TESTCASE(11,TestPatternQuoting
);
122 TESTCASE(12,TestJ277
);
123 TESTCASE(13,TestJ243
);
124 TESTCASE(14,TestJ329
);
125 TESTCASE(15,TestSegments
);
126 TESTCASE(16,TestCursorOffset
);
127 TESTCASE(17,TestArbitraryVariableValues
);
128 TESTCASE(18,TestPositionHandling
);
129 TESTCASE(19,TestHiraganaKatakana
);
130 TESTCASE(20,TestCopyJ476
);
131 TESTCASE(21,TestAnchors
);
132 TESTCASE(22,TestInterIndic
);
133 TESTCASE(23,TestFilterIDs
);
134 TESTCASE(24,TestCaseMap
);
135 TESTCASE(25,TestNameMap
);
136 TESTCASE(26,TestLiberalizedID
);
137 TESTCASE(27,TestCreateInstance
);
138 TESTCASE(28,TestNormalizationTransliterator
);
139 TESTCASE(29,TestCompoundRBT
);
140 TESTCASE(30,TestCompoundFilter
);
141 TESTCASE(31,TestRemove
);
142 TESTCASE(32,TestToRules
);
143 TESTCASE(33,TestContext
);
144 TESTCASE(34,TestSupplemental
);
145 TESTCASE(35,TestQuantifier
);
146 TESTCASE(36,TestSTV
);
147 TESTCASE(37,TestCompoundInverse
);
148 TESTCASE(38,TestNFDChainRBT
);
149 TESTCASE(39,TestNullInverse
);
150 TESTCASE(40,TestAliasInverseID
);
151 TESTCASE(41,TestCompoundInverseID
);
152 TESTCASE(42,TestUndefinedVariable
);
153 TESTCASE(43,TestEmptyContext
);
154 TESTCASE(44,TestCompoundFilterID
);
155 TESTCASE(45,TestPropertySet
);
156 TESTCASE(46,TestNewEngine
);
157 TESTCASE(47,TestQuantifiedSegment
);
158 TESTCASE(48,TestDevanagariLatinRT
);
159 TESTCASE(49,TestTeluguLatinRT
);
160 TESTCASE(50,TestCompoundLatinRT
);
161 TESTCASE(51,TestSanskritLatinRT
);
162 TESTCASE(52,TestLocaleInstantiation
);
163 TESTCASE(53,TestTitleAccents
);
164 TESTCASE(54,TestLocaleResource
);
165 TESTCASE(55,TestParseError
);
166 TESTCASE(56,TestOutputSet
);
167 TESTCASE(57,TestVariableRange
);
168 TESTCASE(58,TestInvalidPostContext
);
169 TESTCASE(59,TestIDForms
);
170 TESTCASE(60,TestToRulesMark
);
171 TESTCASE(61,TestEscape
);
172 TESTCASE(62,TestAnchorMasking
);
173 TESTCASE(63,TestDisplayName
);
174 TESTCASE(64,TestSpecialCases
);
175 #if !UCONFIG_NO_FILE_IO
176 TESTCASE(65,TestIncrementalProgress
);
178 TESTCASE(66,TestSurrogateCasing
);
179 TESTCASE(67,TestFunction
);
180 TESTCASE(68,TestInvalidBackRef
);
181 TESTCASE(69,TestMulticharStringSet
);
182 TESTCASE(70,TestUserFunction
);
183 TESTCASE(71,TestAnyX
);
184 TESTCASE(72,TestSourceTargetSet
);
185 TESTCASE(73,TestGurmukhiDevanagari
);
186 TESTCASE(74,TestPatternWhiteSpace
);
187 TESTCASE(75,TestAllCodepoints
);
188 TESTCASE(76,TestBoilerplate
);
189 TESTCASE(77,TestAlternateSyntax
);
190 TESTCASE(78,TestBeginEnd
);
191 TESTCASE(79,TestBeginEndToRules
);
192 TESTCASE(80,TestRegisterAlias
);
193 TESTCASE(81,TestRuleStripping
);
194 TESTCASE(82,TestHalfwidthFullwidth
);
195 TESTCASE(83,TestThai
);
196 TESTCASE(84,TestAny
);
197 default: name
= ""; break;
201 static const UVersionInfo ICU_39
= {3,9,4,0};
203 * Make sure every system transliterator can be instantiated.
205 * ALSO test that the result of toRules() for each rule is a valid
206 * rule. Do this here so we don't have to have another test that
207 * instantiates everything as well.
209 void TransliteratorTest::TestInstantiation() {
210 UErrorCode ec
= U_ZERO_ERROR
;
211 StringEnumeration
* avail
= Transliterator::getAvailableIDs(ec
);
212 assertSuccess("getAvailableIDs()", ec
);
213 assertTrue("getAvailableIDs()!=NULL", avail
!=NULL
);
214 int32_t n
= Transliterator::countAvailableIDs();
215 assertTrue("getAvailableIDs().count()==countAvailableIDs()",
216 avail
->count(ec
) == n
);
217 assertSuccess("count()", ec
);
219 for (int32_t i
=0; i
<n
; ++i
) {
220 const UnicodeString
& id
= *avail
->snext(ec
);
221 if (!assertSuccess("snext()", ec
) ||
222 !assertTrue("snext()!=NULL", (&id
)!=NULL
, TRUE
)) {
225 UnicodeString id2
= Transliterator::getAvailableID(i
);
226 if (id
.length() < 1) {
227 errln(UnicodeString("FAIL: getAvailableID(") +
228 i
+ ") returned empty string");
232 errln(UnicodeString("FAIL: getAvailableID(") +
233 i
+ ") != getAvailableIDs().snext()");
236 UParseError parseError
;
237 UErrorCode status
= U_ZERO_ERROR
;
238 Transliterator
* t
= Transliterator::createInstance(id
,
239 UTRANS_FORWARD
, parseError
,status
);
241 Transliterator::getDisplayName(id
, name
);
243 #if UCONFIG_NO_BREAK_ITERATION
244 // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
245 if (id
.compare((UnicodeString
)"Thai-Latin") != 0)
247 dataerrln(UnicodeString("FAIL: Couldn't create ") + id
+
248 /*", parse error " + parseError.code +*/
249 ", line " + parseError
.line
+
250 ", offset " + parseError
.offset
+
251 ", pre-context " + prettify(parseError
.preContext
, TRUE
) +
252 ", post-context " +prettify(parseError
.postContext
,TRUE
) +
253 ", Error: " + u_errorName(status
));
254 // When createInstance fails, it deletes the failing
255 // entry from the available ID list. We detect this
256 // here by looking for a change in countAvailableIDs.
257 int32_t nn
= Transliterator::countAvailableIDs();
260 --i
; // Compensate for deleted entry
263 logln(UnicodeString("OK: ") + name
+ " (" + id
+ ")");
267 t
->toRules(rules
, TRUE
);
268 Transliterator
*u
= Transliterator::createFromRules("x",
269 rules
, UTRANS_FORWARD
, parseError
,status
);
271 errln(UnicodeString("FAIL: ") + id
+
272 ".createFromRules() => bad rules" +
273 /*", parse error " + parseError.code +*/
274 ", line " + parseError
.line
+
275 ", offset " + parseError
.offset
+
276 ", context " + prettify(parseError
.preContext
, TRUE
) +
277 ", rules: " + prettify(rules
, TRUE
));
284 assertTrue("snext()==NULL", avail
->snext(ec
)==NULL
);
285 assertSuccess("snext()", ec
);
288 // Now test the failure path
289 UParseError parseError
;
290 UErrorCode status
= U_ZERO_ERROR
;
291 UnicodeString
id("<Not a valid Transliterator ID>");
292 Transliterator
* t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, parseError
, status
);
294 errln("FAIL: " + id
+ " returned a transliterator");
297 logln("OK: Bogus ID handled properly");
301 void TransliteratorTest::TestSimpleRules(void) {
302 /* Example: rules 1. ab>x|y
305 * []|eabcd start - no match, copy e to tranlated buffer
306 * [e]|abcd match rule 1 - copy output & adjust cursor
307 * [ex|y]cd match rule 2 - copy output & adjust cursor
308 * [exz]|d no match, copy d to transliterated buffer
311 expect(UnicodeString("ab>x|y;", "") +
315 /* Another set of rules:
327 expect(UnicodeString("ab>x|yzacw;") +
335 UErrorCode status
= U_ZERO_ERROR
;
336 UParseError parseError
;
337 Transliterator
*t
= Transliterator::createFromRules(
339 UnicodeString("$dummy=").append((UChar
)0xE100) +
341 "$vowel=[aeiouAEIOU];"
343 "$vowel } $lu > '!';"
348 UTRANS_FORWARD
, parseError
,
350 if (U_FAILURE(status
)) {
351 dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status
));
354 expect(*t
, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
359 * Test inline set syntax and set variable syntax.
361 void TransliteratorTest::TestInlineSet(void) {
362 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
363 expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
365 expect(UnicodeString(
368 "$alphanumeric = [$digit $alpha];" // ***
369 "$special = [^$alphanumeric];" // ***
370 "$alphanumeric > '-';"
371 "$special > '*';", ""),
373 "thx-1138", "---*----");
377 * Create some inverses and confirm that they work. We have to be
378 * careful how we do this, since the inverses will not be true
379 * inverses -- we can't throw any random string at the composition
380 * of the transliterators and expect the identity function. F x
381 * F' != I. However, if we are careful about the input, we will
382 * get the expected results.
384 void TransliteratorTest::TestRuleBasedInverse(void) {
385 UnicodeString RULES
=
386 UnicodeString("abc>zyx;") +
404 const char* DATA
[] = {
405 // Careful here -- random strings will not work. If we keep
406 // the left side to the domain and the right side to the range
407 // we will be okay though (left, abc; right xyz).
409 "abcacab", "zyxxxyy",
413 int32_t DATA_length
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0]));
415 UErrorCode status
= U_ZERO_ERROR
;
416 UParseError parseError
;
417 Transliterator
*fwd
= Transliterator::createFromRules("<ID>", RULES
,
418 UTRANS_FORWARD
, parseError
, status
);
419 Transliterator
*rev
= Transliterator::createFromRules("<ID>", RULES
,
420 UTRANS_REVERSE
, parseError
, status
);
421 if (U_FAILURE(status
)) {
422 errln("FAIL: RBT constructor failed");
425 for (int32_t i
=0; i
<DATA_length
; i
+=2) {
426 expect(*fwd
, DATA
[i
], DATA
[i
+1]);
427 expect(*rev
, DATA
[i
+1], DATA
[i
]);
434 * Basic test of keyboard.
436 void TransliteratorTest::TestKeyboard(void) {
437 UParseError parseError
;
438 UErrorCode status
= U_ZERO_ERROR
;
439 Transliterator
*t
= Transliterator::createFromRules("<ID>",
440 UnicodeString("psch>Y;")
444 UTRANS_FORWARD
, parseError
,
446 if (U_FAILURE(status
)) {
447 errln("FAIL: RBT constructor failed");
450 const char* DATA
[] = {
458 0, "AycAY", // null means finishKeyboardTransliteration
461 keyboardAux(*t
, DATA
, (int32_t)(sizeof(DATA
)/sizeof(DATA
[0])));
466 * Basic test of keyboard with cursor.
468 void TransliteratorTest::TestKeyboard2(void) {
469 UParseError parseError
;
470 UErrorCode status
= U_ZERO_ERROR
;
471 Transliterator
*t
= Transliterator::createFromRules("<ID>",
472 UnicodeString("ych>Y;")
476 UTRANS_FORWARD
, parseError
,
478 if (U_FAILURE(status
)) {
479 errln("FAIL: RBT constructor failed");
482 const char* DATA
[] = {
486 "s", "Aps", // modified for rollback - "Ay",
487 "c", "Apsc", // modified for rollback - "Ayc",
490 "s", "AycAps", // modified for rollback - "AycAy",
491 "c", "AycApsc", // modified for rollback - "AycAyc",
493 0, "AycAY", // null means finishKeyboardTransliteration
496 keyboardAux(*t
, DATA
, (int32_t)(sizeof(DATA
)/sizeof(DATA
[0])));
501 * Test keyboard transliteration with back-replacement.
503 void TransliteratorTest::TestKeyboard3(void) {
504 // We want th>z but t>y. Furthermore, during keyboard
505 // transliteration we want t>y then yh>z if t, then h are
507 UnicodeString
RULES("t>|y;"
510 const char* DATA
[] = {
511 // Column 1: characters to add to buffer (as if typed)
512 // Column 2: expected appearance of buffer after
513 // keyboard xliteration.
516 "t", "abt", // modified for rollback - "aby",
518 "t", "abyct", // modified for rollback - "abycy",
520 0, "abycz", // null means finishKeyboardTransliteration
523 UParseError parseError
;
524 UErrorCode status
= U_ZERO_ERROR
;
525 Transliterator
*t
= Transliterator::createFromRules("<ID>", RULES
, UTRANS_FORWARD
, parseError
, status
);
526 if (U_FAILURE(status
)) {
527 errln("FAIL: RBT constructor failed");
530 keyboardAux(*t
, DATA
, (int32_t)(sizeof(DATA
)/sizeof(DATA
[0])));
534 void TransliteratorTest::keyboardAux(const Transliterator
& t
,
535 const char* DATA
[], int32_t DATA_length
) {
536 UErrorCode status
= U_ZERO_ERROR
;
537 UTransPosition index
={0, 0, 0, 0};
539 for (int32_t i
=0; i
<DATA_length
; i
+=2) {
545 t
.transliterate(s
, index
, DATA
[i
], status
);
548 t
.finishTransliteration(s
, index
);
550 // Show the start index '{' and the cursor '|'
551 UnicodeString a
, b
, c
;
552 s
.extractBetween(0, index
.contextStart
, a
);
553 s
.extractBetween(index
.contextStart
, index
.start
, b
);
554 s
.extractBetween(index
.start
, s
.length(), c
);
556 append((UChar
)LEFT_BRACE
).
560 if (s
== DATA
[i
+1] && U_SUCCESS(status
)) {
563 errln(UnicodeString("FAIL: ") + log
+ ", expected " + DATA
[i
+1]);
568 void TransliteratorTest::TestArabic(void) {
569 // Test disabled for 2.0 until new Arabic transliterator can be written.
571 // const char* DATA[] = {
572 // "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
573 // "\u0627\u0644\u0644\u063a\u0629\u0020"+
574 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
575 // "\u0628\u0628\u0646\u0638\u0645\u0020"+
576 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
577 // "\u062c\u0645\u064a\u0644\u0629",
581 // UChar ar_raw[] = {
582 // 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
583 // 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
584 // 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
585 // 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
586 // 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
587 // 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
589 // UnicodeString ar(ar_raw);
590 // UErrorCode status=U_ZERO_ERROR;
591 // UParseError parseError;
592 // Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
594 // errln("FAIL: createInstance failed");
597 // expect(*t, "Arabic", ar);
602 * Compose the Kana transliterator forward and reverse and try
603 * some strings that should come out unchanged.
605 void TransliteratorTest::TestCompoundKana(void) {
606 UParseError parseError
;
607 UErrorCode status
= U_ZERO_ERROR
;
608 Transliterator
* t
= Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD
, parseError
, status
);
610 dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status
));
612 expect(*t
, "aaaaa", "aaaaa");
618 * Compose the hex transliterators forward and reverse.
620 void TransliteratorTest::TestCompoundHex(void) {
621 UParseError parseError
;
622 UErrorCode status
= U_ZERO_ERROR
;
623 Transliterator
* a
= Transliterator::createInstance("Any-Hex", UTRANS_FORWARD
, parseError
, status
);
624 Transliterator
* b
= Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, parseError
, status
);
625 Transliterator
* transab
[] = { a
, b
};
626 Transliterator
* transba
[] = { b
, a
};
627 if (a
== 0 || b
== 0) {
628 errln("FAIL: construction failed");
633 // Do some basic tests of a
634 expect(*a
, "01", UnicodeString("\\u0030\\u0031", ""));
635 // Do some basic tests of b
636 expect(*b
, UnicodeString("\\u0030\\u0031", ""), "01");
638 Transliterator
* ab
= new CompoundTransliterator(transab
, 2);
639 UnicodeString
s("abcde", "");
642 UnicodeString
str(s
);
643 a
->transliterate(str
);
644 Transliterator
* ba
= new CompoundTransliterator(transba
, 2);
645 expect(*ba
, str
, str
);
653 int gTestFilterClassID
= 0;
655 * Used by TestFiltering().
657 class TestFilter
: public UnicodeFilter
{
658 virtual UnicodeFunctor
* clone() const {
659 return new TestFilter(*this);
661 virtual UBool
contains(UChar32 c
) const {
662 return c
!= (UChar
)0x0063 /*c*/;
665 virtual UnicodeString
& toPattern(UnicodeString
& result
,
666 UBool
/*escapeUnprintable*/) const {
669 virtual UBool
matchesIndexValue(uint8_t /*v*/) const {
672 virtual void addMatchSetTo(UnicodeSet
& /*toUnionTo*/) const {}
674 UClassID
getDynamicClassID() const { return (UClassID
)&gTestFilterClassID
; }
678 * Do some basic tests of filtering.
680 void TransliteratorTest::TestFiltering(void) {
681 UParseError parseError
;
682 UErrorCode status
= U_ZERO_ERROR
;
683 Transliterator
* hex
= Transliterator::createInstance("Any-Hex", UTRANS_FORWARD
, parseError
, status
);
685 errln("FAIL: createInstance(Any-Hex) failed");
688 hex
->adoptFilter(new TestFilter());
689 UnicodeString
s("abcde");
690 hex
->transliterate(s
);
691 UnicodeString
exp("\\u0061\\u0062c\\u0064\\u0065", "");
693 logln(UnicodeString("Ok: \"") + exp
+ "\"");
695 logln(UnicodeString("FAIL: \"") + s
+ "\", wanted \"" + exp
+ "\"");
698 // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
699 UnicodeFilter
*f
= hex
->orphanFilter();
701 errln("FAIL: orphanFilter() should get a UnicodeFilter");
711 void TransliteratorTest::TestAnchors(void) {
712 expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""),
715 expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
718 expect(UnicodeString("^ab > 01 ;"
726 expect(UnicodeString("$s = [z$] ;"
733 "abzababbabxzabxabx",
738 * Test pattern quoting and escape mechanisms.
740 void TransliteratorTest::TestPatternQuoting(void) {
742 // Each item is <rules>, <input>, <expected output>
743 const UnicodeString DATA
[] = {
744 UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
745 UnicodeString(UChar(0x4E01)),
749 for (int32_t i
=0; i
<3; i
+=3) {
750 logln(UnicodeString("Pattern: ") + prettify(DATA
[i
]));
751 UParseError parseError
;
752 UErrorCode status
= U_ZERO_ERROR
;
753 Transliterator
*t
= Transliterator::createFromRules("<ID>", DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
754 if (U_FAILURE(status
)) {
755 errln("RBT constructor failed");
757 expect(*t
, DATA
[i
+1], DATA
[i
+2]);
764 * Regression test for bugs found in Greek transliteration.
766 void TransliteratorTest::TestJ277(void) {
767 UErrorCode status
= U_ZERO_ERROR
;
768 UParseError parseError
;
769 Transliterator
*gl
= Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD
, parseError
, status
);
771 dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status
));
776 UChar upsilon
= 0x3C5;
778 // UChar PHI = 0x3A6;
780 // UChar omega = 0x3C9;
781 // UChar omicron = 0x3BF;
782 // UChar epsilon = 0x3B5;
784 // sigma upsilon nu -> syn
786 syn
.append(sigma
).append(upsilon
).append(nu
);
787 expect(*gl
, syn
, "syn");
789 // sigma alpha upsilon nu -> saun
791 sayn
.append(sigma
).append(alpha
).append(upsilon
).append(nu
);
792 expect(*gl
, sayn
, "saun");
794 // Again, using a smaller rule set
799 "$ypsilon = \\u03C5;"
800 "$vowel = [aeiouAEIOU$alpha$ypsilon];"
803 "u <> $vowel { $ypsilon;"
807 Transliterator
*mini
= Transliterator::createFromRules("mini", rules
, UTRANS_REVERSE
, parseError
, status
);
808 if (U_FAILURE(status
)) { errln("FAIL: Transliterator constructor failed"); return; }
809 expect(*mini
, syn
, "syn");
810 expect(*mini
, sayn
, "saun");
814 #if !UCONFIG_NO_FORMATTING
815 // Transliterate the Greek locale data
817 DateFormatSymbols
syms(el
, status
);
818 if (U_FAILURE(status
)) { errln("FAIL: Transliterator constructor failed"); return; }
820 const UnicodeString
* data
= syms
.getMonths(count
);
821 for (i
=0; i
<count
; ++i
) {
822 if (data
[i
].length() == 0) {
825 UnicodeString
out(data
[i
]);
826 gl
->transliterate(out
);
828 if (data
[i
].length() >= 2 && out
.length() >= 2 &&
829 u_isupper(data
[i
].charAt(0)) && u_islower(data
[i
].charAt(1))) {
830 if (!(u_isupper(out
.charAt(0)) && u_islower(out
.charAt(1)))) {
835 logln(prettify(data
[i
] + " -> " + out
));
837 errln(UnicodeString("FAIL: ") + prettify(data
[i
] + " -> " + out
));
846 * Prefix, suffix support in hex transliterators
848 void TransliteratorTest::TestJ243(void) {
849 UErrorCode ec
= U_ZERO_ERROR
;
851 // Test default Hex-Any, which should handle
852 // \u, \U, u+, and U+
853 Transliterator
*hex
=
854 Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, ec
);
855 if (assertSuccess("getInstance", ec
)) {
856 expect(*hex
, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
860 // // Try a custom Hex-Unicode
861 // // \uXXXX and &#xXXXX;
862 // ec = U_ZERO_ERROR;
863 // HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
864 // expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x0123", ""),
865 // "abcd5fx0123");
866 // // Try custom Any-Hex (default is tested elsewhere)
867 // ec = U_ZERO_ERROR;
868 // UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
869 // expect(hex3, "012", "012");
873 * Parsers need better syntax error messages.
875 void TransliteratorTest::TestJ329(void) {
877 struct { UBool containsErrors
; const char* rule
; } DATA
[] = {
878 { FALSE
, "a > b; c > d" },
879 { TRUE
, "a > b; no operator; c > d" },
881 int32_t DATA_length
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0]));
883 for (int32_t i
=0; i
<DATA_length
; ++i
) {
884 UErrorCode status
= U_ZERO_ERROR
;
885 UParseError parseError
;
886 Transliterator
*rbt
= Transliterator::createFromRules("<ID>",
891 UBool gotError
= U_FAILURE(status
);
892 UnicodeString
desc(DATA
[i
].rule
);
893 desc
.append(gotError
? " -> error" : " -> no error");
895 desc
= desc
+ ", ParseError code=" + u_errorName(status
) +
896 " line=" + parseError
.line
+
897 " offset=" + parseError
.offset
+
898 " context=" + parseError
.preContext
;
900 if (gotError
== DATA
[i
].containsErrors
) {
901 logln(UnicodeString("Ok: ") + desc
);
903 errln(UnicodeString("FAIL: ") + desc
);
910 * Test segments and segment references.
912 void TransliteratorTest::TestSegments(void) {
914 // Each item is <rules>, <input>, <expected output>
915 UnicodeString DATA
[] = {
916 "([a-z]) '.' ([0-9]) > $2 '-' $1",
921 "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
925 int32_t DATA_length
= (int32_t)(sizeof(DATA
)/sizeof(*DATA
));
927 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
928 logln("Pattern: " + prettify(DATA
[i
]));
929 UParseError parseError
;
930 UErrorCode status
= U_ZERO_ERROR
;
931 Transliterator
*t
= Transliterator::createFromRules("ID", DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
932 if (U_FAILURE(status
)) {
933 errln("FAIL: RBT constructor");
935 expect(*t
, DATA
[i
+1], DATA
[i
+2]);
942 * Test cursor positioning outside of the key
944 void TransliteratorTest::TestCursorOffset(void) {
946 // Each item is <rules>, <input>, <expected output>
947 UnicodeString DATA
[] = {
948 "pre {alpha} post > | @ ALPHA ;"
950 "pre {beta} post > BETA @@ | ;"
953 "prealphapost prebetapost",
955 "prbetaxyz preBETApost",
957 int32_t DATA_length
= (int32_t)(sizeof(DATA
)/sizeof(*DATA
));
959 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
960 logln("Pattern: " + prettify(DATA
[i
]));
961 UParseError parseError
;
962 UErrorCode status
= U_ZERO_ERROR
;
963 Transliterator
*t
= Transliterator::createFromRules("<ID>", DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
964 if (U_FAILURE(status
)) {
965 errln("FAIL: RBT constructor");
967 expect(*t
, DATA
[i
+1], DATA
[i
+2]);
974 * Test zero length and > 1 char length variable values. Test
975 * use of variable refs in UnicodeSets.
977 void TransliteratorTest::TestArbitraryVariableValues(void) {
979 // Each item is <rules>, <input>, <expected output>
980 UnicodeString DATA
[] = {
998 int32_t DATA_length
= (int32_t)(sizeof(DATA
)/sizeof(*DATA
));
1000 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
1001 logln("Pattern: " + prettify(DATA
[i
]));
1002 UParseError parseError
;
1003 UErrorCode status
= U_ZERO_ERROR
;
1004 Transliterator
*t
= Transliterator::createFromRules("<ID>", DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
1005 if (U_FAILURE(status
)) {
1006 errln("FAIL: RBT constructor");
1008 expect(*t
, DATA
[i
+1], DATA
[i
+2]);
1015 * Confirm that the contextStart, contextLimit, start, and limit
1016 * behave correctly. J474.
1018 void TransliteratorTest::TestPositionHandling(void) {
1019 // Array of 3n items
1020 // Each item is <rules>, <input>, <expected output>
1021 const char* DATA
[] = {
1022 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1023 "xtat txtb", // pos 0,9,0,9
1026 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1027 "xtat txtb", // pos 2,9,3,8
1030 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1031 "xtat txtb", // pos 3,8,3,8
1035 // Array of 4n positions -- these go with the DATA array
1036 // They are: contextStart, contextLimit, start, limit
1043 int32_t n
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0])) / 3;
1044 for (int32_t i
=0; i
<n
; i
++) {
1045 UErrorCode status
= U_ZERO_ERROR
;
1046 UParseError parseError
;
1047 Transliterator
*t
= Transliterator::createFromRules("<ID>",
1048 DATA
[3*i
], UTRANS_FORWARD
, parseError
, status
);
1049 if (U_FAILURE(status
)) {
1051 errln("FAIL: RBT constructor");
1055 pos
.contextStart
= POS
[4*i
];
1056 pos
.contextLimit
= POS
[4*i
+1];
1057 pos
.start
= POS
[4*i
+2];
1058 pos
.limit
= POS
[4*i
+3];
1059 UnicodeString
rsource(DATA
[3*i
+1]);
1060 t
->transliterate(rsource
, pos
, status
);
1061 if (U_FAILURE(status
)) {
1063 errln("FAIL: transliterate");
1066 t
->finishTransliteration(rsource
, pos
);
1067 expectAux(DATA
[3*i
],
1076 * Test the Hiragana-Katakana transliterator.
1078 void TransliteratorTest::TestHiraganaKatakana(void) {
1079 UParseError parseError
;
1080 UErrorCode status
= U_ZERO_ERROR
;
1081 Transliterator
* hk
= Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD
, parseError
, status
);
1082 Transliterator
* kh
= Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD
, parseError
, status
);
1083 if (hk
== 0 || kh
== 0) {
1084 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
1090 // Array of 3n items
1091 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1092 const char* DATA
[] = {
1094 "\\u3042\\u3090\\u3099\\u3092\\u3050",
1095 "\\u30A2\\u30F8\\u30F2\\u30B0",
1098 "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1099 "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1101 int32_t DATA_length
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0]));
1103 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
1104 UnicodeString h
= CharsToUnicodeString(DATA
[i
+1]);
1105 UnicodeString k
= CharsToUnicodeString(DATA
[i
+2]);
1107 case 0x68: //'h': // Hiragana-Katakana
1110 case 0x6B: //'k': // Katakana-Hiragana
1113 case 0x62: //'b': // both
1124 * Test cloning / copy constructor of RBT.
1126 void TransliteratorTest::TestCopyJ476(void) {
1127 // The real test here is what happens when the destructors are
1128 // called. So we let one object get destructed, and check to
1129 // see that its copy still works.
1130 Transliterator
*t2
= 0;
1132 UParseError parseError
;
1133 UErrorCode status
= U_ZERO_ERROR
;
1134 Transliterator
*t1
= Transliterator::createFromRules("t1",
1135 "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD
, parseError
, status
);
1136 if (U_FAILURE(status
)) {
1137 errln("FAIL: RBT constructor");
1140 t2
= t1
->clone(); // Call copy constructor under the covers.
1141 expect(*t1
, "abcfoofoo", "ABcbar");
1144 expect(*t2
, "abcfoofoo", "ABcbar");
1149 * Test inter-Indic transliterators. These are composed.
1150 * ICU4C Jitterbug 483.
1152 void TransliteratorTest::TestInterIndic(void) {
1153 UnicodeString
ID("Devanagari-Gujarati", "");
1154 UErrorCode status
= U_ZERO_ERROR
;
1155 UParseError parseError
;
1156 Transliterator
* dg
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, parseError
, status
);
1158 dataerrln("FAIL: createInstance(" + ID
+ ") returned NULL - " + u_errorName(status
));
1161 UnicodeString id
= dg
->getID();
1163 errln("FAIL: createInstance(" + ID
+ ")->getID() => " + id
);
1165 UnicodeString dev
= CharsToUnicodeString("\\u0901\\u090B\\u0925");
1166 UnicodeString guj
= CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1167 expect(*dg
, dev
, guj
);
1172 * Test filter syntax in IDs. (J918)
1174 void TransliteratorTest::TestFilterIDs(void) {
1175 // Array of 3n strings:
1176 // <id>, <inverse id>, <input>, <expected output>
1177 const char* DATA
[] = {
1178 "[aeiou]Any-Hex", // ID
1179 "[aeiou]Hex-Any", // expected inverse ID
1181 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1183 "[aeiou]Any-Hex;[^5]Hex-Any",
1184 "[^5]Any-Hex;[aeiou]Hex-Any",
1193 enum { DATA_length
= sizeof(DATA
) / sizeof(DATA
[0]) };
1195 for (int i
=0; i
<DATA_length
; i
+=4) {
1196 UnicodeString
ID(DATA
[i
], "");
1197 UnicodeString
uID(DATA
[i
+1], "");
1198 UnicodeString
data2(DATA
[i
+2], "");
1199 UnicodeString
data3(DATA
[i
+3], "");
1200 UParseError parseError
;
1201 UErrorCode status
= U_ZERO_ERROR
;
1202 Transliterator
*t
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, parseError
, status
);
1204 errln("FAIL: createInstance(" + ID
+ ") returned NULL");
1207 expect(*t
, data2
, data3
);
1210 if (ID
!= t
->getID()) {
1211 errln("FAIL: createInstance(" + ID
+ ").getID() => " +
1215 // Check the inverse
1216 Transliterator
*u
= t
->createInverse(status
);
1218 errln("FAIL: " + ID
+ ".createInverse() returned NULL");
1219 } else if (u
->getID() != uID
) {
1220 errln("FAIL: " + ID
+ ".createInverse().getID() => " +
1221 u
->getID() + ", expected " + uID
);
1230 * Test the case mapping transliterators.
1232 void TransliteratorTest::TestCaseMap(void) {
1233 UParseError parseError
;
1234 UErrorCode status
= U_ZERO_ERROR
;
1235 Transliterator
* toUpper
=
1236 Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
);
1237 Transliterator
* toLower
=
1238 Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
);
1239 Transliterator
* toTitle
=
1240 Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
);
1241 if (toUpper
==0 || toLower
==0 || toTitle
==0) {
1242 errln("FAIL: createInstance returned NULL");
1249 expect(*toUpper
, "The quick brown fox jumped over the lazy dogs.",
1250 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1251 expect(*toLower
, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1252 "the quick brown foX jumped over the lazY dogs.");
1253 expect(*toTitle
, "the quick brown foX can't jump over the laZy dogs.",
1254 "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1262 * Test the name mapping transliterators.
1264 void TransliteratorTest::TestNameMap(void) {
1265 UParseError parseError
;
1266 UErrorCode status
= U_ZERO_ERROR
;
1267 Transliterator
* uni2name
=
1268 Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD
, parseError
, status
);
1269 Transliterator
* name2uni
=
1270 Transliterator::createInstance("Name-Any", UTRANS_FORWARD
, parseError
, status
);
1271 if (uni2name
==0 || name2uni
==0) {
1272 errln("FAIL: createInstance returned NULL");
1278 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1279 expect(*uni2name
, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1280 CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1281 expect(*name2uni
, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1282 CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1289 Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD
, parseError
, status
);
1291 errln("FAIL: createInstance returned NULL");
1296 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1297 UnicodeString s
= CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1303 * Test liberalized ID syntax. 1006c
1305 void TransliteratorTest::TestLiberalizedID(void) {
1306 // Some test cases have an expected getID() value of NULL. This
1307 // means I have disabled the test case for now. This stuff is
1308 // still under development, and I haven't decided whether to make
1309 // getID() return canonical case yet. It will all get rewritten
1310 // with the move to Source-Target/Variant IDs anyway. [aliu]
1311 const char* DATA
[] = {
1312 "latin-greek", NULL
/*"Latin-Greek"*/, "case insensitivity",
1313 " Null ", "Null", "whitespace",
1314 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",
1315 " null ; latin-greek ", NULL
/*"Null;Latin-Greek"*/, "compound whitespace",
1317 const int32_t DATA_length
= sizeof(DATA
)/sizeof(DATA
[0]);
1318 UParseError parseError
;
1319 UErrorCode status
= U_ZERO_ERROR
;
1320 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
1321 Transliterator
*t
= Transliterator::createInstance(DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
1323 dataerrln(UnicodeString("FAIL: ") + DATA
[i
+2] +
1324 " cannot create ID \"" + DATA
[i
] + "\" - " + u_errorName(status
));
1328 exp
= UnicodeString(DATA
[i
+1], "");
1330 // Don't worry about getID() if the expected char*
1331 // is NULL -- see above.
1332 if (exp
.length() == 0 || exp
== t
->getID()) {
1333 logln(UnicodeString("Ok: ") + DATA
[i
+2] +
1334 " create ID \"" + DATA
[i
] + "\" => \"" +
1337 errln(UnicodeString("FAIL: ") + DATA
[i
+2] +
1338 " create ID \"" + DATA
[i
] + "\" => \"" +
1339 t
->getID() + "\", exp \"" + exp
+ "\"");
1346 /* test for Jitterbug 912 */
1347 void TransliteratorTest::TestCreateInstance(){
1348 const char* FORWARD
= "F";
1349 const char* REVERSE
= "R";
1350 const char* DATA
[] = {
1352 // Column 2: direction
1353 // Column 3: expected ID, or "" if expect failure
1354 "Latin-Hangul", REVERSE
, "Hangul-Latin", // JB#912
1356 // JB#2689: bad compound causes crash
1357 "InvalidSource-InvalidTarget", FORWARD
, "",
1358 "InvalidSource-InvalidTarget", REVERSE
, "",
1359 "Hex-Any;InvalidSource-InvalidTarget", FORWARD
, "",
1360 "Hex-Any;InvalidSource-InvalidTarget", REVERSE
, "",
1361 "InvalidSource-InvalidTarget;Hex-Any", FORWARD
, "",
1362 "InvalidSource-InvalidTarget;Hex-Any", REVERSE
, "",
1367 for (int32_t i
=0; DATA
[i
]; i
+=3) {
1369 UErrorCode ec
= U_ZERO_ERROR
;
1370 UnicodeString
id(DATA
[i
]);
1371 UTransDirection dir
= (DATA
[i
+1]==FORWARD
)?
1372 UTRANS_FORWARD
:UTRANS_REVERSE
;
1373 UnicodeString
expID(DATA
[i
+2]);
1375 Transliterator::createInstance(id
,dir
,err
,ec
);
1376 UnicodeString newID
;
1380 UBool ok
= (newID
== expID
);
1382 newID
= u_errorName(ec
);
1385 logln((UnicodeString
)"Ok: createInstance(" +
1386 id
+ "," + DATA
[i
+1] + ") => " + newID
);
1388 dataerrln((UnicodeString
)"FAIL: createInstance(" +
1389 id
+ "," + DATA
[i
+1] + ") => " + newID
+
1390 ", expected " + expID
);
1397 * Test the normalization transliterator.
1399 void TransliteratorTest::TestNormalizationTransliterator() {
1400 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1401 // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1402 const char* CANON
[] = {
1403 // Input Decomposed Composed
1404 "cat", "cat", "cat" ,
1405 "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" ,
1407 "\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above
1408 "D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above
1410 "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above
1411 "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below
1412 "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above
1414 "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1415 "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1417 "\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave
1418 "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave
1419 "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron
1421 "\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign
1422 "\\u00c5", "A\\u030a", "\\u00c5" , // A-ring
1424 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0
1425 "\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0
1427 "Henry IV", "Henry IV", "Henry IV" ,
1428 "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" ,
1430 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1431 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1432 "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten
1433 "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten
1434 "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten
1436 "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" ,
1440 const char* COMPAT
[] = {
1441 // Input Decomposed Composed
1442 "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed
1444 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0
1445 "\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i
1447 "Henry IV", "Henry IV", "Henry IV" ,
1448 "Henry \\u2163", "Henry IV", "Henry IV" ,
1450 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1451 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1453 "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten
1458 UParseError parseError
;
1459 UErrorCode status
= U_ZERO_ERROR
;
1460 Transliterator
* NFD
= Transliterator::createInstance("NFD", UTRANS_FORWARD
, parseError
, status
);
1461 Transliterator
* NFC
= Transliterator::createInstance("NFC", UTRANS_FORWARD
, parseError
, status
);
1463 dataerrln("FAIL: createInstance failed: %s", u_errorName(status
));
1468 for (i
=0; CANON
[i
]; i
+=3) {
1469 UnicodeString in
= CharsToUnicodeString(CANON
[i
]);
1470 UnicodeString expd
= CharsToUnicodeString(CANON
[i
+1]);
1471 UnicodeString expc
= CharsToUnicodeString(CANON
[i
+2]);
1472 expect(*NFD
, in
, expd
);
1473 expect(*NFC
, in
, expc
);
1478 Transliterator
* NFKD
= Transliterator::createInstance("NFKD", UTRANS_FORWARD
, parseError
, status
);
1479 Transliterator
* NFKC
= Transliterator::createInstance("NFKC", UTRANS_FORWARD
, parseError
, status
);
1480 if (!NFKD
|| !NFKC
) {
1481 errln("FAIL: createInstance failed");
1486 for (i
=0; COMPAT
[i
]; i
+=3) {
1487 UnicodeString in
= CharsToUnicodeString(COMPAT
[i
]);
1488 UnicodeString expkd
= CharsToUnicodeString(COMPAT
[i
+1]);
1489 UnicodeString expkc
= CharsToUnicodeString(COMPAT
[i
+2]);
1490 expect(*NFKD
, in
, expkd
);
1491 expect(*NFKC
, in
, expkc
);
1497 status
= U_ZERO_ERROR
;
1498 Transliterator
*t
= Transliterator::createInstance("NFD; [x]Remove",
1502 errln("FAIL: createInstance failed");
1504 expect(*t
, CharsToUnicodeString("\\u010dx"),
1505 CharsToUnicodeString("c\\u030C"));
1510 * Test compound RBT rules.
1512 void TransliteratorTest::TestCompoundRBT(void) {
1513 // Careful with spacing and ';' here: Phrase this exactly
1514 // as toRules() is going to return it. If toRules() changes
1515 // with regard to spacing or ';', then adjust this string.
1516 UnicodeString
rule("::Hex-Any;\n"
1520 "::[^t]Any-Upper;", "");
1521 UParseError parseError
;
1522 UErrorCode status
= U_ZERO_ERROR
;
1523 Transliterator
*t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, parseError
, status
);
1525 errln("FAIL: createFromRules failed");
1528 expect(*t
, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1529 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1531 t
->toRules(r
, TRUE
);
1533 logln((UnicodeString
)"OK: toRules() => " + r
);
1535 errln((UnicodeString
)"FAIL: toRules() => " + r
+
1536 ", expected " + rule
);
1541 t
= Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD
, parseError
, status
);
1543 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
1546 UnicodeString
exp("::Greek-Latin;\n::Latin-Cyrillic;");
1547 t
->toRules(r
, TRUE
);
1549 errln((UnicodeString
)"FAIL: toRules() => " + r
+
1550 ", expected " + exp
);
1552 logln((UnicodeString
)"OK: toRules() => " + r
);
1556 // Round trip the result of toRules
1557 t
= Transliterator::createFromRules("Test", r
, UTRANS_FORWARD
, parseError
, status
);
1559 errln("FAIL: createFromRules #2 failed");
1562 logln((UnicodeString
)"OK: createFromRules(" + r
+ ") succeeded");
1565 // Test toRules again
1566 t
->toRules(r
, TRUE
);
1568 errln((UnicodeString
)"FAIL: toRules() => " + r
+
1569 ", expected " + exp
);
1571 logln((UnicodeString
)"OK: toRules() => " + r
);
1576 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform
1577 // to what the regenerated ID will look like.
1578 UnicodeString
id("Upper(Lower);(NFKC)", "");
1579 t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, parseError
, status
);
1581 errln("FAIL: createInstance #2 failed");
1584 if (t
->getID() == id
) {
1585 logln((UnicodeString
)"OK: created " + id
);
1587 errln((UnicodeString
)"FAIL: createInstance(" + id
+
1588 ").getID() => " + t
->getID());
1591 Transliterator
*u
= t
->createInverse(status
);
1593 errln("FAIL: createInverse failed");
1597 exp
= "NFKC();Lower(Upper)";
1598 if (u
->getID() == exp
) {
1599 logln((UnicodeString
)"OK: createInverse(" + id
+ ") => " +
1602 errln((UnicodeString
)"FAIL: createInverse(" + id
+ ") => " +
1610 * Compound filter semantics were orginially not implemented
1611 * correctly. Originally, each component filter f(i) is replaced by
1612 * f'(i) = f(i) && g, where g is the filter for the compound
1617 * Suppose and I have a transliterator X. Internally X is
1618 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1620 * The compound should convert all greek characters (through latin) to
1621 * cyrillic, then lowercase the result. The filter should say "don't
1622 * touch 'A' in the original". But because an intermediate result
1623 * happens to go through "A", the Greek Alpha gets hung up.
1625 void TransliteratorTest::TestCompoundFilter(void) {
1626 UParseError parseError
;
1627 UErrorCode status
= U_ZERO_ERROR
;
1628 Transliterator
*t
= Transliterator::createInstance
1629 ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD
, parseError
, status
);
1631 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
1634 t
->adoptFilter(new UnicodeSet("[^A]", status
));
1635 if (U_FAILURE(status
)) {
1636 errln("FAIL: UnicodeSet ct failed");
1641 // Only the 'A' at index 1 should remain unchanged
1643 CharsToUnicodeString("BA\\u039A\\u0391"),
1644 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1648 void TransliteratorTest::TestRemove(void) {
1649 UParseError parseError
;
1650 UErrorCode status
= U_ZERO_ERROR
;
1651 Transliterator
*t
= Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD
, parseError
, status
);
1653 errln("FAIL: createInstance failed");
1657 expect(*t
, "Able bodied baker's cats", "Ale odied ker's ts");
1659 // extra test for RemoveTransliterator::clone(), which at one point wasn't
1660 // duplicating the filter
1661 Transliterator
* t2
= t
->clone();
1662 expect(*t2
, "Able bodied baker's cats", "Ale odied ker's ts");
1668 void TransliteratorTest::TestToRules(void) {
1669 const char* RBT
= "rbt";
1670 const char* SET
= "set";
1671 static const char* DATA
[] = {
1673 "$a=\\u4E61; [$a] > A;",
1677 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1678 "[[:Zs:][:Zl:]]{a} > A;",
1705 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1706 "[^[:Zs:]]{a} > A;",
1709 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1710 "[[a-z]-[:Zs:]]{a} > A;",
1713 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1714 "[[:Zs:]&[a-z]]{a} > A;",
1717 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1718 "[x[:Zs:]]{a} > A;",
1721 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1722 "$macron = \\u0304 ;"
1723 "$evowel = [aeiouyAEIOUY] ;"
1724 "$iotasub = \\u0345 ;"
1725 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1726 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1729 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1730 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1732 static const int32_t DATA_length
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0]));
1734 for (int32_t d
=0; d
< DATA_length
; d
+=3) {
1735 if (DATA
[d
] == RBT
) {
1736 // Transliterator test
1737 UParseError parseError
;
1738 UErrorCode status
= U_ZERO_ERROR
;
1739 Transliterator
*t
= Transliterator::createFromRules("ID",
1740 UnicodeString(DATA
[d
+1], -1, US_INV
), UTRANS_FORWARD
, parseError
, status
);
1742 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status
));
1745 UnicodeString rules
, escapedRules
;
1746 t
->toRules(rules
, FALSE
);
1747 t
->toRules(escapedRules
, TRUE
);
1748 UnicodeString expRules
= CharsToUnicodeString(DATA
[d
+2]);
1749 UnicodeString
expEscapedRules(DATA
[d
+2], -1, US_INV
);
1750 if (rules
== expRules
) {
1751 logln((UnicodeString
)"Ok: " + UnicodeString(DATA
[d
+1], -1, US_INV
) +
1754 errln((UnicodeString
)"FAIL: " + UnicodeString(DATA
[d
+1], -1, US_INV
) +
1755 " => " + rules
+ ", exp " + expRules
);
1757 if (escapedRules
== expEscapedRules
) {
1758 logln((UnicodeString
)"Ok: " + UnicodeString(DATA
[d
+1], -1, US_INV
) +
1759 " => " + escapedRules
);
1761 errln((UnicodeString
)"FAIL: " + UnicodeString(DATA
[d
+1], -1, US_INV
) +
1762 " => " + escapedRules
+ ", exp " + expEscapedRules
);
1768 UErrorCode status
= U_ZERO_ERROR
;
1769 UnicodeString
pat(DATA
[d
+1], -1, US_INV
);
1770 UnicodeString
expToPat(DATA
[d
+2], -1, US_INV
);
1771 UnicodeSet
set(pat
, status
);
1772 if (U_FAILURE(status
)) {
1773 errln("FAIL: UnicodeSet ct failed");
1776 // Adjust spacing etc. as necessary.
1777 UnicodeString toPat
;
1778 set
.toPattern(toPat
);
1779 if (expToPat
== toPat
) {
1780 logln((UnicodeString
)"Ok: " + pat
+
1783 errln((UnicodeString
)"FAIL: " + pat
+
1784 " => " + prettify(toPat
, TRUE
) +
1785 ", exp " + prettify(pat
, TRUE
));
1791 void TransliteratorTest::TestContext() {
1792 UTransPosition pos
= {0, 2, 0, 1}; // cs cl s l
1793 expect("de > x; {d}e > y;",
1798 expect("ab{c} > z;",
1803 void TransliteratorTest::TestSupplemental() {
1805 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1807 CharsToUnicodeString("ab\\U0001030Fx"),
1808 CharsToUnicodeString("\\U00010300bix"));
1810 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1811 "$b=[A-Z\\U00010400-\\U0001044D];"
1812 "($a)($b) > $2 $1;"),
1813 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1814 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1816 // k|ax\\U00010300xm
1818 // k|a\\U00010400\\U00010300xm
1819 // ky|\\U00010400\\U00010300xm
1820 // ky\\U00010400|\\U00010300xm
1822 // ky\\U00010400|\\U00010300\\U00010400m
1823 // ky\\U00010400y|\\U00010400m
1824 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1825 "$a {x} > | @ \\U00010400;"
1826 "{$a} [^\\u0000-\\uFFFF] > y;"),
1827 CharsToUnicodeString("kax\\U00010300xm"),
1828 CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1831 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1832 UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1834 expectT("Any-Hex/Unicode",
1835 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1836 UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1838 expectT("Any-Hex/C",
1839 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1840 UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1842 expectT("Any-Hex/Perl",
1843 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1844 UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1846 expectT("Any-Hex/Java",
1847 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1848 UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1850 expectT("Any-Hex/XML",
1851 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1852 "𐌰􏼀󠁡 ");
1854 expectT("Any-Hex/XML10",
1855 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1856 "𐌰􏼀󠁡 ");
1858 expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1859 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1860 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1863 void TransliteratorTest::TestQuantifier() {
1865 // Make sure @ in a quantified anteContext works
1866 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1870 // Make sure @ in a quantified postContext works
1871 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1875 // Make sure @ in a quantified postContext with seg ref works
1876 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1880 // Make sure @ past ante context doesn't enter ante context
1881 UTransPosition pos
= {0, 5, 3, 5};
1882 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1887 // Make sure @ past post context doesn't pass limit
1888 UTransPosition pos2
= {0, 4, 0, 2};
1889 expect("{b} a+ > c @@ |; x > y; a > A;",
1894 // Make sure @ past post context doesn't enter post context
1895 expect("{b} a+ > c @@ |; x > y; a > A;",
1899 expect("(ab)? c > d;",
1903 // NOTE: The (ab)+ when referenced just yields a single "ab",
1904 // not the full sequence of them. This accords with perl behavior.
1905 expect("(ab)+ {x} > '(' $1 ')';",
1907 "x ab(ab) abab(ab)y");
1910 "ac abc abbc abbbc",
1913 expect("[abc]+ > x;",
1914 "qac abrc abbcs abtbbc",
1917 expect("q{(ab)+} > x;",
1918 "qa qab qaba qababc qaba",
1919 "qa qx qxa qxc qxa");
1921 expect("q(ab)* > x;",
1922 "qa qab qaba qababc",
1925 // NOTE: The (ab)+ when referenced just yields a single "ab",
1926 // not the full sequence of them. This accords with perl behavior.
1927 expect("q(ab)* > '(' $1 ')';",
1928 "qa qab qaba qababc",
1929 "()a (ab) (ab)a (ab)c");
1931 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1933 expect("'ab'+ > x;",
1937 // $foo+ and $foo* -- the quantifier should apply to the entire
1938 // variable reference
1939 expect("$var = ab; $var+ > x;",
1944 class TestTrans
: public Transliterator
{
1946 TestTrans(const UnicodeString
& id
) : Transliterator(id
, 0) {
1948 virtual Transliterator
* clone(void) const {
1949 return new TestTrans(getID());
1951 virtual void handleTransliterate(Replaceable
& /*text*/, UTransPosition
& offsets
,
1952 UBool
/*isIncremental*/) const
1954 offsets
.start
= offsets
.limit
;
1956 virtual UClassID
getDynamicClassID() const;
1957 static UClassID U_EXPORT2
getStaticClassID();
1959 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans
)
1962 * Test Source-Target/Variant.
1964 void TransliteratorTest::TestSTV(void) {
1965 int32_t ns
= Transliterator::countAvailableSources();
1966 if (ns
< 0 || ns
> 255) {
1967 errln((UnicodeString
)"FAIL: Bad source count: " + ns
);
1971 for (i
=0; i
<ns
; ++i
) {
1972 UnicodeString source
;
1973 Transliterator::getAvailableSource(i
, source
);
1974 logln((UnicodeString
)"" + i
+ ": " + source
);
1975 if (source
.length() == 0) {
1976 errln("FAIL: empty source");
1979 int32_t nt
= Transliterator::countAvailableTargets(source
);
1980 if (nt
< 0 || nt
> 255) {
1981 errln((UnicodeString
)"FAIL: Bad target count: " + nt
);
1984 for (int32_t j
=0; j
<nt
; ++j
) {
1985 UnicodeString target
;
1986 Transliterator::getAvailableTarget(j
, source
, target
);
1987 logln((UnicodeString
)" " + j
+ ": " + target
);
1988 if (target
.length() == 0) {
1989 errln("FAIL: empty target");
1992 int32_t nv
= Transliterator::countAvailableVariants(source
, target
);
1993 if (nv
< 0 || nv
> 255) {
1994 errln((UnicodeString
)"FAIL: Bad variant count: " + nv
);
1997 for (int32_t k
=0; k
<nv
; ++k
) {
1998 UnicodeString variant
;
1999 Transliterator::getAvailableVariant(k
, source
, target
, variant
);
2000 if (variant
.length() == 0) {
2001 logln((UnicodeString
)" " + k
+ ": <empty>");
2003 logln((UnicodeString
)" " + k
+ ": " + variant
);
2009 // Test registration
2010 const char* IDS
[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2011 const char* FULL_IDS
[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2012 const char* SOURCES
[] = { NULL
, "Seoridf", "Oewoir" };
2013 for (i
=0; i
<3; ++i
) {
2014 Transliterator
*t
= new TestTrans(IDS
[i
]);
2016 errln("FAIL: out of memory");
2019 if (t
->getID() != IDS
[i
]) {
2020 errln((UnicodeString
)"FAIL: ID mismatch for " + IDS
[i
]);
2024 Transliterator::registerInstance(t
);
2025 UErrorCode status
= U_ZERO_ERROR
;
2026 t
= Transliterator::createInstance(IDS
[i
], UTRANS_FORWARD
, status
);
2028 errln((UnicodeString
)"FAIL: Registration/creation failed for ID " +
2031 logln((UnicodeString
)"Ok: Registration/creation succeeded for ID " +
2035 Transliterator::unregister(IDS
[i
]);
2036 t
= Transliterator::createInstance(IDS
[i
], UTRANS_FORWARD
, status
);
2038 errln((UnicodeString
)"FAIL: Unregistration failed for ID " +
2044 // Make sure getAvailable API reflects removal
2045 int32_t n
= Transliterator::countAvailableIDs();
2046 for (i
=0; i
<n
; ++i
) {
2047 UnicodeString id
= Transliterator::getAvailableID(i
);
2048 for (j
=0; j
<3; ++j
) {
2049 if (id
.caseCompare(FULL_IDS
[j
],0)==0) {
2050 errln((UnicodeString
)"FAIL: unregister(" + id
+ ") failed");
2054 n
= Transliterator::countAvailableTargets("Any");
2055 for (i
=0; i
<n
; ++i
) {
2057 Transliterator::getAvailableTarget(i
, "Any", t
);
2058 if (t
.caseCompare(IDS
[0],0)==0) {
2059 errln((UnicodeString
)"FAIL: unregister(Any-" + t
+ ") failed");
2062 n
= Transliterator::countAvailableSources();
2063 for (i
=0; i
<n
; ++i
) {
2065 Transliterator::getAvailableSource(i
, s
);
2066 for (j
=0; j
<3; ++j
) {
2067 if (SOURCES
[j
] == NULL
) continue;
2068 if (s
.caseCompare(SOURCES
[j
],0)==0) {
2069 errln((UnicodeString
)"FAIL: unregister(" + s
+ "-*) failed");
2076 * Test inverse of Greek-Latin; Title()
2078 void TransliteratorTest::TestCompoundInverse(void) {
2079 UParseError parseError
;
2080 UErrorCode status
= U_ZERO_ERROR
;
2081 Transliterator
*t
= Transliterator::createInstance
2082 ("Greek-Latin; Title()", UTRANS_REVERSE
,parseError
, status
);
2084 dataerrln("FAIL: createInstance - %s", u_errorName(status
));
2087 UnicodeString
exp("(Title);Latin-Greek");
2088 if (t
->getID() == exp
) {
2089 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2092 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2093 t
->getID() + "\", expected \"" + exp
+ "\"");
2099 * Test NFD chaining with RBT
2101 void TransliteratorTest::TestNFDChainRBT() {
2103 UErrorCode ec
= U_ZERO_ERROR
;
2104 Transliterator
* t
= Transliterator::createFromRules(
2105 "TEST", "::NFD; aa > Q; a > q;",
2106 UTRANS_FORWARD
, pe
, ec
);
2107 if (t
== NULL
|| U_FAILURE(ec
)) {
2108 dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec
));
2111 expect(*t
, "aa", "Q");
2114 // TEMPORARY TESTS -- BEING DEBUGGED
2115 //=- UnicodeString s, s2;
2116 //=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2117 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2118 //=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2119 //=- expect(*t, s, s2);
2122 //=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2123 //=- expect(*t, s2, s);
2126 //=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2127 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2128 //=- expect(*t, s, s);
2131 // const char* source[] = {
2133 // "\\u015Br\\u012Bmad",
2134 // "bhagavadg\\u012Bt\\u0101",
2137 // "vi\\u1E63\\u0101da",
2139 // "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2140 // "uv\\u0101cr\\u0325",
2142 // "rmk\\u1E63\\u0113t",
2143 // //"dharmak\\u1E63\\u0113tr\\u0113",
2145 // "kuruk\\u1E63\\u0113tr\\u0113",
2146 // "samav\\u0113t\\u0101",
2147 // "yuyutsava-\\u1E25",
2148 // "m\\u0101mak\\u0101-\\u1E25",
2149 // // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2151 // "san\\u0304java",
2156 // const char* expected[] = {
2158 // "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2159 // "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2160 // "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2161 // "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2162 // "\\u0935\\u093f\\u0937\\u093e\\u0926",
2163 // "\\u092f\\u094b\\u0917",
2164 // "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2165 // "\\u0909\\u0935\\u093E\\u091A\\u0943",
2168 // //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2170 // "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2171 // "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2172 // "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2173 // "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2174 // // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2175 // "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2176 // "\\u0938\\u0902\\u091c\\u0935",
2180 // UErrorCode status = U_ZERO_ERROR;
2181 // UParseError parseError;
2182 // UnicodeString message;
2183 // Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2184 // Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2185 // if(U_FAILURE(status)){
2186 // errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2187 // errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2188 // delete latinToDevToLatin;
2189 // delete devToLatinToDev;
2192 // UnicodeString gotResult;
2193 // for(int i= 0; source[i] != 0; i++){
2194 // gotResult = source[i];
2195 // expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2196 // expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2198 // delete latinToDevToLatin;
2199 // delete devToLatinToDev;
2203 * Inverse of "Null" should be "Null". (J21)
2205 void TransliteratorTest::TestNullInverse() {
2207 UErrorCode ec
= U_ZERO_ERROR
;
2208 Transliterator
*t
= Transliterator::createInstance("Null", UTRANS_FORWARD
, pe
, ec
);
2209 if (t
== 0 || U_FAILURE(ec
)) {
2210 errln("FAIL: createInstance");
2213 Transliterator
*u
= t
->createInverse(ec
);
2214 if (u
== 0 || U_FAILURE(ec
)) {
2215 errln("FAIL: createInverse");
2219 if (u
->getID() != "Null") {
2220 errln("FAIL: Inverse of Null should be Null");
2227 * Check ID of inverse of alias. (J22)
2229 void TransliteratorTest::TestAliasInverseID() {
2230 UnicodeString
ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2232 UErrorCode ec
= U_ZERO_ERROR
;
2233 Transliterator
*t
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
);
2234 if (t
== 0 || U_FAILURE(ec
)) {
2235 dataerrln("FAIL: createInstance - %s", u_errorName(ec
));
2238 Transliterator
*u
= t
->createInverse(ec
);
2239 if (u
== 0 || U_FAILURE(ec
)) {
2240 errln("FAIL: createInverse");
2244 UnicodeString exp
= "Hangul-Latin";
2245 UnicodeString got
= u
->getID();
2247 errln((UnicodeString
)"FAIL: Inverse of " + ID
+ " is " + got
+
2248 ", expected " + exp
);
2255 * Test IDs of inverses of compound transliterators. (J20)
2257 void TransliteratorTest::TestCompoundInverseID() {
2258 UnicodeString ID
= "Latin-Jamo;NFC(NFD)";
2260 UErrorCode ec
= U_ZERO_ERROR
;
2261 Transliterator
*t
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
);
2262 if (t
== 0 || U_FAILURE(ec
)) {
2263 dataerrln("FAIL: createInstance - %s", u_errorName(ec
));
2266 Transliterator
*u
= t
->createInverse(ec
);
2267 if (u
== 0 || U_FAILURE(ec
)) {
2268 errln("FAIL: createInverse");
2272 UnicodeString exp
= "NFD(NFC);Jamo-Latin";
2273 UnicodeString got
= u
->getID();
2275 errln((UnicodeString
)"FAIL: Inverse of " + ID
+ " is " + got
+
2276 ", expected " + exp
);
2283 * Test undefined variable.
2286 void TransliteratorTest::TestUndefinedVariable() {
2287 UnicodeString rule
= "$initial } a <> \\u1161;";
2289 UErrorCode ec
= U_ZERO_ERROR
;
2290 Transliterator
*t
= Transliterator::createFromRules("<ID>", rule
, UTRANS_FORWARD
, pe
, ec
);
2292 if (U_FAILURE(ec
)) {
2293 logln((UnicodeString
)"OK: Got exception for " + rule
+ ", as expected: " +
2297 errln((UnicodeString
)"Fail: bogus rule " + rule
+ " compiled with error " +
2302 * Test empty context.
2304 void TransliteratorTest::TestEmptyContext() {
2305 expect(" { a } > b;", "xay a ", "xby b ");
2309 * Test compound filter ID syntax
2311 void TransliteratorTest::TestCompoundFilterID(void) {
2312 static const char* DATA
[] = {
2313 // Col. 1 = ID or rule set (latter must start with #)
2315 // = columns > 1 are null if expect col. 1 to be illegal =
2317 // Col. 2 = direction, "F..." or "R..."
2318 // Col. 3 = source string
2319 // Col. 4 = exp result
2321 "[abc]; [abc]", NULL
, NULL
, NULL
, // multiple filters
2322 "Latin-Greek; [abc];", NULL
, NULL
, NULL
, // misplaced filter
2323 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2324 "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2325 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2326 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2330 for (int32_t i
=0; DATA
[i
]; i
+=4) {
2331 UnicodeString id
= CharsToUnicodeString(DATA
[i
]);
2332 UTransDirection direction
= (DATA
[i
+1] != NULL
&& DATA
[i
+1][0] == 'R') ?
2333 UTRANS_REVERSE
: UTRANS_FORWARD
;
2334 UnicodeString source
;
2336 if (DATA
[i
+2] != NULL
) {
2337 source
= CharsToUnicodeString(DATA
[i
+2]);
2338 exp
= CharsToUnicodeString(DATA
[i
+3]);
2340 UBool expOk
= (DATA
[i
+1] != NULL
);
2341 Transliterator
* t
= NULL
;
2343 UErrorCode ec
= U_ZERO_ERROR
;
2344 if (id
.charAt(0) == 0x23/*#*/) {
2345 t
= Transliterator::createFromRules("ID", id
, direction
, pe
, ec
);
2347 t
= Transliterator::createInstance(id
, direction
, pe
, ec
);
2349 UBool ok
= (t
!= NULL
&& U_SUCCESS(ec
));
2350 UnicodeString transID
;
2352 transID
= t
->getID();
2355 transID
= UnicodeString("NULL", "");
2358 logln((UnicodeString
)"Ok: " + id
+ " => " + transID
+ ", " +
2360 if (source
.length() != 0) {
2361 expect(*t
, source
, exp
);
2365 dataerrln((UnicodeString
)"FAIL: " + id
+ " => " + transID
+ ", " +
2372 * Test new property set syntax
2374 void TransliteratorTest::TestPropertySet() {
2375 expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2376 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2377 "[ a stitch ]\n[ in time ]\r[ saves 9]");
2381 * Test various failure points of the new 2.0 engine.
2383 void TransliteratorTest::TestNewEngine() {
2385 UErrorCode ec
= U_ZERO_ERROR
;
2386 Transliterator
*t
= Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD
, pe
, ec
);
2387 if (t
== 0 || U_FAILURE(ec
)) {
2388 dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec
));
2391 // Katakana should be untouched
2392 expect(*t
, CharsToUnicodeString("a\\u3042\\u30A2"),
2393 CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2398 // This test will only work if Transliterator.ROLLBACK is
2399 // true. Otherwise, this test will fail, revealing a
2400 // limitation of global filters in incremental mode.
2402 Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD
, pe
, ec
);
2404 Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD
, pe
, ec
);
2405 if (U_FAILURE(ec
)) {
2411 Transliterator
* array
[3];
2413 array
[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD
, pe
, ec
);
2415 if (U_FAILURE(ec
)) {
2416 errln("FAIL: createInstance NFD");
2423 t
= new CompoundTransliterator(array
, 3, new UnicodeSet("[:Ll:]", ec
));
2424 if (U_FAILURE(ec
)) {
2425 errln("FAIL: UnicodeSet constructor");
2433 expect(*t
, "aAaA", "bAbA");
2435 assertTrue("countElements", t
->countElements() == 3);
2436 assertEquals("getElement(0)", t
->getElement(0, ec
).getID(), "a_to_A");
2437 assertEquals("getElement(1)", t
->getElement(1, ec
).getID(), "NFD");
2438 assertEquals("getElement(2)", t
->getElement(2, ec
).getID(), "A_to_b");
2439 assertSuccess("getElement", ec
);
2447 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2451 UnicodeString gr
= CharsToUnicodeString(
2453 "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2454 "$rough = \\u0314 ;"
2455 "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2459 expect(gr
, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2463 * Test quantified segment behavior. We want:
2464 * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2466 void TransliteratorTest::TestQuantifiedSegment(void) {
2468 expect("([abc]+) > x $1 x;", "cba", "xcbax");
2470 // The tricky case; the quantifier is around the segment
2471 expect("([abc])+ > x $1 x;", "cba", "xax");
2473 // Tricky case in reverse direction
2474 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2476 // Check post-context segment
2477 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2479 // Test toRule/toPattern for non-quantified segment.
2480 // Careful with spacing here.
2481 UnicodeString
r("([a-c]){q} > x $1 x;");
2483 UErrorCode ec
= U_ZERO_ERROR
;
2484 Transliterator
* t
= Transliterator::createFromRules("ID", r
, UTRANS_FORWARD
, pe
, ec
);
2485 if (U_FAILURE(ec
)) {
2486 errln("FAIL: createFromRules");
2491 t
->toRules(rr
, TRUE
);
2493 errln((UnicodeString
)"FAIL: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2495 logln((UnicodeString
)"Ok: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2499 // Test toRule/toPattern for quantified segment.
2500 // Careful with spacing here.
2501 r
= "([a-c])+{q} > x $1 x;";
2502 t
= Transliterator::createFromRules("ID", r
, UTRANS_FORWARD
, pe
, ec
);
2503 if (U_FAILURE(ec
)) {
2504 errln("FAIL: createFromRules");
2508 t
->toRules(rr
, TRUE
);
2510 errln((UnicodeString
)"FAIL: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2512 logln((UnicodeString
)"Ok: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2517 //======================================================================
2519 //======================================================================
2520 void TransliteratorTest::TestDevanagariLatinRT(){
2521 const int MAX_LEN
= 52;
2522 const char* const source
[MAX_LEN
] = {
2537 //"r\\u0323ya", // \u095c is not valid in Devanagari
2563 "\\u1E6Dh\\u1E6Dha",
2570 // Not roundtrippable --
2571 // \\u0939\\u094d\\u094d\\u092E - hma
2572 // \\u0939\\u094d\\u092E - hma
2573 // CharsToUnicodeString("hma"),
2578 "san\\u0304j\\u012Bb s\\u0113nagupta",
2579 "\\u0101nand vaddir\\u0101ju",
2583 const char* const expected
[MAX_LEN
] = {
2584 "\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */
2585 "\\u0915\\u094D\\u0930", /* kra */
2586 "\\u0915\\u094D\\u0937", /* ks\\u0323a */
2587 "\\u0916\\u094D\\u0930", /* khra */
2588 "\\u0917\\u094D\\u0930", /* gra */
2589 "\\u0919\\u094D\\u0930", /* n\\u0307ra */
2590 "\\u091A\\u094D\\u0930", /* cra */
2591 "\\u091B\\u094D\\u0930", /* chra */
2592 "\\u091C\\u094D\\u091E", /* jn\\u0303a */
2593 "\\u091D\\u094D\\u0930", /* jhra */
2594 "\\u091E\\u094D\\u0930", /* n\\u0303ra */
2595 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2596 "\\u0920\\u094D\\u0930", /* t\\u0323hra */
2597 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2598 //"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari
2599 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2600 "\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */
2601 "\\u0923\\u094D\\u0930", /* n\\u0323ra */
2602 "\\u0924\\u094D\\u0924", /* tta */
2603 "\\u0925\\u094D\\u0930", /* thra */
2604 "\\u0926\\u094D\\u0926", /* dda */
2605 "\\u0927\\u094D\\u0930", /* dhra */
2606 "\\u0928\\u094D\\u0928", /* nna */
2607 "\\u092A\\u094D\\u0930", /* pra */
2608 "\\u092B\\u094D\\u0930", /* phra */
2609 "\\u092C\\u094D\\u0930", /* bra */
2610 "\\u092D\\u094D\\u0930", /* bhra */
2611 "\\u092E\\u094D\\u0930", /* mra */
2612 "\\u0929\\u094D\\u0930", /* n\\u0331ra */
2613 //"\\u0934\\u094D\\u0930", /* l\\u0331ra */
2614 "\\u092F\\u094D\\u0930", /* yra */
2615 "\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */
2617 "\\u0935\\u094D\\u0930", /* vra */
2618 "\\u0936\\u094D\\u0930", /* s\\u0301ra */
2619 "\\u0937\\u094D\\u0930", /* s\\u0323ra */
2620 "\\u0938\\u094D\\u0930", /* sra */
2621 "\\u0939\\u094d\\u092E", /* hma */
2622 "\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */
2623 "\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */
2624 "\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/
2625 "\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */
2626 "\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */
2627 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2628 "\\u0920\\u094D\\u092F", /* t\\u0323hya */
2629 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2630 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2632 "\\u0939\\u094D\\u092F", /* hya */
2633 "\\u0936\\u0943", /* s\\u0301r\\u0325a */
2634 "\\u0936\\u094D\\u091A", /* s\\u0301ca */
2635 "\\u090d", /* e\\u0306 */
2636 "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2637 "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2641 UErrorCode status
= U_ZERO_ERROR
;
2642 UParseError parseError
;
2643 UnicodeString message
;
2644 Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2645 Transliterator
* devToLatin
=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
);
2646 if(U_FAILURE(status
)){
2647 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2648 dataerrln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2651 UnicodeString gotResult
;
2652 for(int i
= 0; i
<MAX_LEN
; i
++){
2653 gotResult
= source
[i
];
2654 expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
]));
2655 expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
]));
2661 void TransliteratorTest::TestTeluguLatinRT(){
2662 const int MAX_LEN
=10;
2663 const char* const source
[MAX_LEN
] = {
2664 "raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */
2665 "\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */
2666 "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */
2667 "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */
2668 "san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */
2669 "amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */
2670 "ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */
2671 "\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */
2672 "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */
2673 "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */
2676 const char* const expected
[MAX_LEN
] = {
2677 "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2678 "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2679 "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2680 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2681 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2682 "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2683 "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2684 "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2685 "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2686 "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2689 UErrorCode status
= U_ZERO_ERROR
;
2690 UParseError parseError
;
2691 UnicodeString message
;
2692 Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD
, parseError
, status
);
2693 Transliterator
* devToLatin
=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD
, parseError
, status
);
2694 if(U_FAILURE(status
)){
2695 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2696 dataerrln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2699 UnicodeString gotResult
;
2700 for(int i
= 0; i
<MAX_LEN
; i
++){
2701 gotResult
= source
[i
];
2702 expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
]));
2703 expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
]));
2709 void TransliteratorTest::TestSanskritLatinRT(){
2710 const int MAX_LEN
=16;
2711 const char* const source
[MAX_LEN
] = {
2712 "rmk\\u1E63\\u0113t",
2713 "\\u015Br\\u012Bmad",
2714 "bhagavadg\\u012Bt\\u0101",
2717 "vi\\u1E63\\u0101da",
2719 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2720 "uv\\u0101cr\\u0325",
2721 "dharmak\\u1E63\\u0113tr\\u0113",
2722 "kuruk\\u1E63\\u0113tr\\u0113",
2723 "samav\\u0113t\\u0101",
2725 "m\\u0101mak\\u0101\\u1E25",
2726 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2730 const char* const expected
[MAX_LEN
] = {
2731 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2732 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2733 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2734 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2735 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2736 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2737 "\\u092f\\u094b\\u0917",
2738 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2739 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2740 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2741 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2742 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2743 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2744 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2745 //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2746 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2747 "\\u0938\\u0902\\u091c\\u0935",
2749 UErrorCode status
= U_ZERO_ERROR
;
2750 UParseError parseError
;
2751 UnicodeString message
;
2752 Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2753 Transliterator
* devToLatin
=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
);
2754 if(U_FAILURE(status
)){
2755 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2756 dataerrln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2759 UnicodeString gotResult
;
2760 for(int i
= 0; i
<MAX_LEN
; i
++){
2761 gotResult
= source
[i
];
2762 expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
]));
2763 expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
]));
2770 void TransliteratorTest::TestCompoundLatinRT(){
2771 const char* const source
[] = {
2772 "rmk\\u1E63\\u0113t",
2773 "\\u015Br\\u012Bmad",
2774 "bhagavadg\\u012Bt\\u0101",
2777 "vi\\u1E63\\u0101da",
2779 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2780 "uv\\u0101cr\\u0325",
2781 "dharmak\\u1E63\\u0113tr\\u0113",
2782 "kuruk\\u1E63\\u0113tr\\u0113",
2783 "samav\\u0113t\\u0101",
2785 "m\\u0101mak\\u0101\\u1E25",
2786 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2790 const int MAX_LEN
= sizeof(source
)/sizeof(source
[0]);
2791 const char* const expected
[MAX_LEN
] = {
2792 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2793 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2794 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2795 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2796 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2797 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2798 "\\u092f\\u094b\\u0917",
2799 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2800 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2801 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2802 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2803 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2804 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2805 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2806 // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2807 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2808 "\\u0938\\u0902\\u091c\\u0935"
2810 if(MAX_LEN
!= sizeof(expected
)/sizeof(expected
[0])) {
2811 errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2815 UErrorCode status
= U_ZERO_ERROR
;
2816 UParseError parseError
;
2817 UnicodeString message
;
2818 Transliterator
* devToLatinToDev
=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2819 Transliterator
* latinToDevToLatin
=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
);
2820 Transliterator
* devToTelToDev
=Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2821 Transliterator
* latinToTelToLatin
=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD
, parseError
, status
);
2823 if(U_FAILURE(status
)){
2824 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2825 dataerrln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2828 UnicodeString gotResult
;
2829 for(int i
= 0; i
<MAX_LEN
; i
++){
2830 gotResult
= source
[i
];
2831 expect(*devToLatinToDev
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(expected
[i
]));
2832 expect(*latinToDevToLatin
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(source
[i
]));
2833 expect(*latinToTelToLatin
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(source
[i
]));
2836 delete(latinToDevToLatin
);
2837 delete(devToLatinToDev
);
2838 delete(devToTelToDev
);
2839 delete(latinToTelToLatin
);
2843 * Test Gurmukhi-Devanagari Tippi and Bindi
2845 void TransliteratorTest::TestGurmukhiDevanagari(){
2847 // (\u0902) (when preceded by vowel) ---> (\u0A02)
2848 // (\u0902) (when preceded by consonant) ---> (\u0A70)
2849 UErrorCode status
= U_ZERO_ERROR
;
2850 UnicodeSet
vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV
).unescape(), status
);
2851 UnicodeSet
non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV
).unescape(), status
);
2852 UParseError parseError
;
2854 UnicodeSetIterator
vIter(vowel
);
2855 UnicodeSetIterator
nvIter(non_vowel
);
2856 Transliterator
* trans
= Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD
, parseError
, status
);
2857 if(U_FAILURE(status
)) {
2858 dataerrln("Error creating transliterator %s", u_errorName(status
));
2862 UnicodeString
src (" \\u0902", -1, US_INV
);
2863 UnicodeString
expected(" \\u0A02", -1, US_INV
);
2864 src
= src
.unescape();
2865 expected
= expected
.unescape();
2867 while(vIter
.next()){
2868 src
.setCharAt(0,(UChar
) vIter
.getCodepoint());
2869 expected
.setCharAt(0,(UChar
) (vIter
.getCodepoint()+0x0100));
2870 expect(*trans
,src
,expected
);
2873 expected
.setCharAt(1,0x0A70);
2874 while(nvIter
.next()){
2875 //src.setCharAt(0,(char) nvIter.codepoint);
2876 src
.setCharAt(0,(UChar
)nvIter
.getCodepoint());
2877 expected
.setCharAt(0,(UChar
) (nvIter
.getCodepoint()+0x0100));
2878 expect(*trans
,src
,expected
);
2883 * Test instantiation from a locale.
2885 void TransliteratorTest::TestLocaleInstantiation(void) {
2887 UErrorCode ec
= U_ZERO_ERROR
;
2888 Transliterator
*t
= Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD
, pe
, ec
);
2889 if (U_FAILURE(ec
)) {
2890 dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec
));
2894 expect(*t
, CharsToUnicodeString("\\u0430"), "a");
2897 t
= Transliterator::createInstance("en-el", UTRANS_FORWARD
, pe
, ec
);
2898 if (U_FAILURE(ec
)) {
2899 errln("FAIL: createInstance(en-el)");
2903 expect(*t
, "a", CharsToUnicodeString("\\u03B1"));
2908 * Test title case handling of accent (should ignore accents)
2910 void TransliteratorTest::TestTitleAccents(void) {
2912 UErrorCode ec
= U_ZERO_ERROR
;
2913 Transliterator
*t
= Transliterator::createInstance("Title", UTRANS_FORWARD
, pe
, ec
);
2914 if (U_FAILURE(ec
)) {
2915 errln("FAIL: createInstance(Title)");
2919 expect(*t
, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2924 * Basic test of a locale resource based rule.
2926 void TransliteratorTest::TestLocaleResource() {
2927 const char* DATA
[] = {
2929 //"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0",
2930 "Latin-el", "b", "\\u03bc\\u03c0",
2931 "Latin-Greek", "b", "\\u03B2",
2932 "Greek-Latin/UNGEGN", "\\u03B2", "v",
2933 "el-Latin", "\\u03B2", "v",
2934 "Greek-Latin", "\\u03B2", "b",
2936 const int32_t DATA_length
= sizeof(DATA
) / sizeof(DATA
[0]);
2937 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
2939 UErrorCode ec
= U_ZERO_ERROR
;
2940 Transliterator
*t
= Transliterator::createInstance(DATA
[i
], UTRANS_FORWARD
, pe
, ec
);
2941 if (U_FAILURE(ec
)) {
2942 dataerrln((UnicodeString
)"FAIL: createInstance(" + DATA
[i
] + ") - " + u_errorName(ec
));
2946 expect(*t
, CharsToUnicodeString(DATA
[i
+1]),
2947 CharsToUnicodeString(DATA
[i
+2]));
2953 * Make sure parse errors reference the right line.
2955 void TransliteratorTest::TestParseError() {
2956 static const char* rule
=
2960 UErrorCode ec
= U_ZERO_ERROR
;
2962 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
2964 if (U_FAILURE(ec
)) {
2965 UnicodeString
err(pe
.preContext
);
2966 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
2967 if (err
.indexOf("d << b") >= 0) {
2968 logln("Ok: " + err
);
2970 errln("FAIL: " + err
);
2974 errln("FAIL: no syntax error");
2976 static const char* maskingRule
=
2981 delete Transliterator::createFromRules("ID", maskingRule
, UTRANS_FORWARD
, pe
, ec
);
2982 if (ec
!= U_RULE_MASK_ERROR
) {
2983 errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec
));
2985 else if (UnicodeString("a > x;") != UnicodeString(pe
.preContext
)) {
2986 errln("FAIL: did not get expected precontext");
2988 else if (UnicodeString("ab > y;") != UnicodeString(pe
.postContext
)) {
2989 errln("FAIL: did not get expected postcontext");
2994 * Make sure sets on output are disallowed.
2996 void TransliteratorTest::TestOutputSet() {
2997 UnicodeString rule
= "$set = [a-cm-n]; b > $set;";
2998 UErrorCode ec
= U_ZERO_ERROR
;
3000 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
3002 if (U_FAILURE(ec
)) {
3003 UnicodeString
err(pe
.preContext
);
3004 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
3005 logln("Ok: " + err
);
3008 errln("FAIL: No syntax error");
3012 * Test the use variable range pragma, making sure that use of
3013 * variable range characters is detected and flagged as an error.
3015 void TransliteratorTest::TestVariableRange() {
3016 UnicodeString rule
= "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3017 UErrorCode ec
= U_ZERO_ERROR
;
3019 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
3021 if (U_FAILURE(ec
)) {
3022 UnicodeString
err(pe
.preContext
);
3023 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
3024 logln("Ok: " + err
);
3027 errln("FAIL: No syntax error");
3031 * Test invalid post context error handling
3033 void TransliteratorTest::TestInvalidPostContext() {
3034 UnicodeString rule
= "a}b{c>d;";
3035 UErrorCode ec
= U_ZERO_ERROR
;
3037 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
3039 if (U_FAILURE(ec
)) {
3040 UnicodeString
err(pe
.preContext
);
3041 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
3042 if (err
.indexOf("a}b{c") >= 0) {
3043 logln("Ok: " + err
);
3045 errln("FAIL: " + err
);
3049 errln("FAIL: No syntax error");
3053 * Test ID form variants
3055 void TransliteratorTest::TestIDForms() {
3056 const char* DATA
[] = {
3058 "nfd", NULL
, "NFC", // make sure case is ignored
3059 "Any-NFKD", NULL
, "Any-NFKC",
3060 "Null", NULL
, "Null",
3061 "-nfkc", "nfkc", "NFKD",
3062 "-nfkc/", "nfkc", "NFKD",
3063 "Latin-Greek/UNGEGN", NULL
, "Greek-Latin/UNGEGN",
3064 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3065 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3066 "Source-", NULL
, NULL
,
3067 "Source/Variant-", NULL
, NULL
,
3068 "Source-/Variant", NULL
, NULL
,
3069 "/Variant", NULL
, NULL
,
3070 "/Variant-", NULL
, NULL
,
3071 "-/Variant", NULL
, NULL
,
3076 const int32_t DATA_length
= sizeof(DATA
)/sizeof(DATA
[0]);
3078 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
3079 const char* ID
= DATA
[i
];
3080 const char* expID
= DATA
[i
+1];
3081 const char* expInvID
= DATA
[i
+2];
3082 UBool expValid
= (expInvID
!= NULL
);
3083 if (expID
== NULL
) {
3087 UErrorCode ec
= U_ZERO_ERROR
;
3089 Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
);
3090 if (U_FAILURE(ec
)) {
3092 logln((UnicodeString
)"Ok: getInstance(" + ID
+") => " + u_errorName(ec
));
3094 dataerrln((UnicodeString
)"FAIL: Couldn't create " + ID
+ " - " + u_errorName(ec
));
3099 Transliterator
*u
= t
->createInverse(ec
);
3100 if (U_FAILURE(ec
)) {
3101 errln((UnicodeString
)"FAIL: Couldn't create inverse of " + ID
);
3106 if (t
->getID() == expID
&&
3107 u
->getID() == expInvID
) {
3108 logln((UnicodeString
)"Ok: " + ID
+ ".getInverse() => " + expInvID
);
3110 errln((UnicodeString
)"FAIL: getInstance(" + ID
+ ") => " +
3111 t
->getID() + " x getInverse() => " + u
->getID() +
3112 ", expected " + expInvID
);
3119 static const UChar SPACE
[] = {32,0};
3120 static const UChar NEWLINE
[] = {10,0};
3121 static const UChar RETURN
[] = {13,0};
3122 static const UChar EMPTY
[] = {0};
3124 void TransliteratorTest::checkRules(const UnicodeString
& label
, Transliterator
& t2
,
3125 const UnicodeString
& testRulesForward
) {
3126 UnicodeString rules2
; t2
.toRules(rules2
, TRUE
);
3127 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3128 rules2
.findAndReplace(SPACE
, EMPTY
);
3129 rules2
.findAndReplace(NEWLINE
, EMPTY
);
3130 rules2
.findAndReplace(RETURN
, EMPTY
);
3132 UnicodeString
testRules(testRulesForward
); testRules
.findAndReplace(SPACE
, EMPTY
);
3134 if (rules2
!= testRules
) {
3136 logln((UnicodeString
)"GENERATED RULES: " + rules2
);
3137 logln((UnicodeString
)"SHOULD BE: " + testRulesForward
);
3142 * Mark's toRules test.
3144 void TransliteratorTest::TestToRulesMark() {
3145 const char* testRules
=
3146 "::[[:Latin:][:Mark:]];"
3149 "a <> \\u03B1;" // alpha
3153 "::([[:Greek:][:Mark:]]);"
3155 const char* testRulesForward
=
3156 "::[[:Latin:][:Mark:]];"
3164 const char* testRulesBackward
=
3165 "::[[:Greek:][:Mark:]];"
3172 UnicodeString source
= CharsToUnicodeString("\\u00E1"); // a-acute
3173 UnicodeString target
= CharsToUnicodeString("\\u03AC"); // alpha-acute
3176 UErrorCode ec
= U_ZERO_ERROR
;
3177 Transliterator
*t2
= Transliterator::createFromRules("source-target", UnicodeString(testRules
, -1, US_INV
), UTRANS_FORWARD
, pe
, ec
);
3178 Transliterator
*t3
= Transliterator::createFromRules("target-source", UnicodeString(testRules
, -1, US_INV
), UTRANS_REVERSE
, pe
, ec
);
3180 if (U_FAILURE(ec
)) {
3183 dataerrln((UnicodeString
)"FAIL: createFromRules => " + u_errorName(ec
));
3187 expect(*t2
, source
, target
);
3188 expect(*t3
, target
, source
);
3190 checkRules("Failed toRules FORWARD", *t2
, UnicodeString(testRulesForward
, -1, US_INV
));
3191 checkRules("Failed toRules BACKWARD", *t3
, UnicodeString(testRulesBackward
, -1, US_INV
));
3198 * Test Escape and Unescape transliterators.
3200 void TransliteratorTest::TestEscape() {
3206 t
= Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, pe
, ec
);
3207 if (U_FAILURE(ec
)) {
3208 errln((UnicodeString
)"FAIL: createInstance");
3211 UNICODE_STRING_SIMPLE("\\x{40}\\U000000312Q"),
3217 t
= Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD
, pe
, ec
);
3218 if (U_FAILURE(ec
)) {
3219 errln((UnicodeString
)"FAIL: createInstance");
3222 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3223 UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3228 t
= Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD
, pe
, ec
);
3229 if (U_FAILURE(ec
)) {
3230 errln((UnicodeString
)"FAIL: createInstance");
3233 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3234 UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3239 t
= Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD
, pe
, ec
);
3240 if (U_FAILURE(ec
)) {
3241 errln((UnicodeString
)"FAIL: createInstance");
3244 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3245 UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3251 void TransliteratorTest::TestAnchorMasking(){
3252 UnicodeString
rule ("^a > Q; a > q;");
3253 UErrorCode status
= U_ZERO_ERROR
;
3254 UParseError parseError
;
3256 Transliterator
* t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
,parseError
,status
);
3257 if(U_FAILURE(status
)){
3258 errln(UnicodeString("FAIL: ") + "ID" +
3259 ".createFromRules() => bad rules" +
3260 /*", parse error " + parseError.code +*/
3261 ", line " + parseError
.line
+
3262 ", offset " + parseError
.offset
+
3263 ", context " + prettify(parseError
.preContext
, TRUE
) +
3264 ", rules: " + prettify(rule
, TRUE
));
3270 * Make sure display names of variants look reasonable.
3272 void TransliteratorTest::TestDisplayName() {
3273 #if UCONFIG_NO_FORMATTING
3274 logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3277 static const char* DATA
[] = {
3278 // ID, forward name, reverse name
3279 // Update the text as necessary -- the important thing is
3280 // not the text itself, but how various cases are handled.
3283 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3286 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3289 "NFC", "Any to NFC", "Any to NFD",
3292 int32_t DATA_length
= sizeof(DATA
) / sizeof(DATA
[0]);
3294 Locale
US("en", "US");
3296 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
3298 Transliterator::getDisplayName(DATA
[i
], US
, name
);
3299 if (name
!= DATA
[i
+1]) {
3300 dataerrln((UnicodeString
)"FAIL: " + DATA
[i
] + ".getDisplayName() => " +
3301 name
+ ", expected " + DATA
[i
+1]);
3303 logln((UnicodeString
)"Ok: " + DATA
[i
] + ".getDisplayName() => " + name
);
3305 UErrorCode ec
= U_ZERO_ERROR
;
3307 Transliterator
*t
= Transliterator::createInstance(DATA
[i
], UTRANS_REVERSE
, pe
, ec
);
3308 if (U_FAILURE(ec
)) {
3310 dataerrln("FAIL: createInstance failed - %s", u_errorName(ec
));
3313 name
= Transliterator::getDisplayName(t
->getID(), US
, name
);
3314 if (name
!= DATA
[i
+2]) {
3315 dataerrln((UnicodeString
)"FAIL: " + t
->getID() + ".getDisplayName() => " +
3316 name
+ ", expected " + DATA
[i
+2]);
3318 logln((UnicodeString
)"Ok: " + t
->getID() + ".getDisplayName() => " + name
);
3325 void TransliteratorTest::TestSpecialCases(void) {
3326 const UnicodeString registerRules
[] = {
3327 "Any-Dev1", "x > X; y > Y;",
3328 "Any-Dev2", "XY > Z",
3330 CharsToUnicodeString
3331 ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3335 const UnicodeString testCases
[] = {
3337 // should add more test cases
3338 "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3339 "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3340 "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3341 "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3344 "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3345 "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3347 // check for devanagari bug
3348 "nfd;Dev1;Dev2;nfc", "xy", "Z",
3350 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3351 "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
,
3352 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE
+ DESERET_dee
,
3354 //TODO: enable this test once Titlecase works right
3356 "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3357 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3359 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
,
3360 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE
+ DESERET_DEE
,
3361 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
,
3362 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee
+ DESERET_dee
,
3364 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
, "",
3365 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
, "",
3368 "Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3369 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3370 "Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3371 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3372 "Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3373 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3374 "Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3375 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3377 // Upper: TAT\\u02B9\\u00C2NA
3378 // Lower: tat\\u02B9\\u00E2na
3379 // Title: Tat\\u02B9\\u00E2na
3380 "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3381 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3382 "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3383 CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3384 "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3385 CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3392 for (i
= 0; registerRules
[i
].length()!=0; i
+=2) {
3393 UErrorCode status
= U_ZERO_ERROR
;
3395 Transliterator
*t
= Transliterator::createFromRules(registerRules
[0+i
],
3396 registerRules
[i
+1], UTRANS_FORWARD
, pos
, status
);
3397 if (U_FAILURE(status
)) {
3398 dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status
));
3400 Transliterator::registerInstance(t
);
3403 for (i
= 0; testCases
[i
].length()!=0; i
+=3) {
3404 UErrorCode ec
= U_ZERO_ERROR
;
3406 const UnicodeString
& name
= testCases
[i
];
3407 Transliterator
*t
= Transliterator::createInstance(name
, UTRANS_FORWARD
, pe
, ec
);
3408 if (U_FAILURE(ec
)) {
3409 dataerrln((UnicodeString
)"FAIL: Couldn't create " + name
+ " - " + u_errorName(ec
));
3413 const UnicodeString
& id
= t
->getID();
3414 const UnicodeString
& source
= testCases
[i
+1];
3415 UnicodeString target
;
3417 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3419 if (testCases
[i
+2].length() > 0) {
3420 target
= testCases
[i
+2];
3421 } else if (0==id
.caseCompare("NFD", U_FOLD_CASE_DEFAULT
)) {
3422 Normalizer::normalize(source
, UNORM_NFD
, 0, target
, ec
);
3423 } else if (0==id
.caseCompare("NFC", U_FOLD_CASE_DEFAULT
)) {
3424 Normalizer::normalize(source
, UNORM_NFC
, 0, target
, ec
);
3425 } else if (0==id
.caseCompare("NFKD", U_FOLD_CASE_DEFAULT
)) {
3426 Normalizer::normalize(source
, UNORM_NFKD
, 0, target
, ec
);
3427 } else if (0==id
.caseCompare("NFKC", U_FOLD_CASE_DEFAULT
)) {
3428 Normalizer::normalize(source
, UNORM_NFKC
, 0, target
, ec
);
3429 } else if (0==id
.caseCompare("Lower", U_FOLD_CASE_DEFAULT
)) {
3431 target
.toLower(Locale::getUS());
3432 } else if (0==id
.caseCompare("Upper", U_FOLD_CASE_DEFAULT
)) {
3434 target
.toUpper(Locale::getUS());
3436 if (U_FAILURE(ec
)) {
3437 errln((UnicodeString
)"FAIL: Internal error normalizing " + source
);
3441 expect(*t
, source
, target
);
3444 for (i
= 0; registerRules
[i
].length()!=0; i
+=2) {
3445 Transliterator::unregister(registerRules
[i
]);
3449 char* Char32ToEscapedChars(UChar32 ch
, char* buffer
) {
3451 sprintf(buffer
, "\\u%04x", (int)ch
);
3453 sprintf(buffer
, "\\U%08x", (int)ch
);
3458 void TransliteratorTest::TestSurrogateCasing (void) {
3459 // check that casing handles surrogates
3460 // titlecase is currently defective
3464 U16_GET(DESERET_dee
,0, 0, DESERET_dee
.length(), dee
);
3465 UnicodeString
DEE(u_totitle(dee
));
3466 if (DEE
!= DESERET_DEE
) {
3467 err("Fails titlecase of surrogates");
3468 err(Char32ToEscapedChars(dee
, buffer
));
3470 errln(Char32ToEscapedChars(DEE
.char32At(0), buffer
));
3473 UnicodeString deeDEETest
=DESERET_dee
+ DESERET_DEE
;
3474 UnicodeString deedeeTest
= DESERET_dee
+ DESERET_dee
;
3475 UnicodeString DEEDEETest
= DESERET_DEE
+ DESERET_DEE
;
3476 UErrorCode status
= U_ZERO_ERROR
;
3478 u_strToUpper(buffer2
, 20, deeDEETest
.getBuffer(), deeDEETest
.length(), NULL
, &status
);
3479 if (U_FAILURE(status
) || (UnicodeString(buffer2
)!= DEEDEETest
)) {
3480 errln("Fails: Can't uppercase surrogates.");
3483 status
= U_ZERO_ERROR
;
3484 u_strToLower(buffer2
, 20, deeDEETest
.getBuffer(), deeDEETest
.length(), NULL
, &status
);
3485 if (U_FAILURE(status
) || (UnicodeString(buffer2
)!= deedeeTest
)) {
3486 errln("Fails: Can't lowercase surrogates.");
3490 static void _trans(Transliterator
& t
, const UnicodeString
& src
,
3491 UnicodeString
& result
) {
3493 t
.transliterate(result
);
3496 static void _trans(const UnicodeString
& id
, const UnicodeString
& src
,
3497 UnicodeString
& result
, UErrorCode ec
) {
3499 Transliterator
*t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, pe
, ec
);
3500 if (U_SUCCESS(ec
)) {
3501 _trans(*t
, src
, result
);
3506 static UnicodeString
_findMatch(const UnicodeString
& source
,
3507 const UnicodeString
* pairs
) {
3508 UnicodeString empty
;
3509 for (int32_t i
=0; pairs
[i
].length() > 0; i
+=2) {
3510 if (0==source
.caseCompare(pairs
[i
], U_FOLD_CASE_DEFAULT
)) {
3517 // Check to see that incremental gets at least part way through a reasonable string.
3519 void TransliteratorTest::TestIncrementalProgress(void) {
3520 UErrorCode ec
= U_ZERO_ERROR
;
3521 UnicodeString latinTest
= "The Quick Brown Fox.";
3522 UnicodeString devaTest
;
3523 _trans("Latin-Devanagari", latinTest
, devaTest
, ec
);
3524 UnicodeString kataTest
;
3525 _trans("Latin-Katakana", latinTest
, kataTest
, ec
);
3526 if (U_FAILURE(ec
)) {
3527 errln("FAIL: Internal error");
3530 const UnicodeString tests
[] = {
3533 "Halfwidth", latinTest
,
3534 "Devanagari", devaTest
,
3535 "Katakana", kataTest
,
3539 UnicodeString
test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3540 int32_t i
= 0, j
=0, k
=0;
3541 int32_t sources
= Transliterator::countAvailableSources();
3542 for (i
= 0; i
< sources
; i
++) {
3543 UnicodeString source
;
3544 Transliterator::getAvailableSource(i
, source
);
3545 UnicodeString test
= _findMatch(source
, tests
);
3546 if (test
.length() == 0) {
3547 logln((UnicodeString
)"Skipping " + source
+ "-X");
3550 int32_t targets
= Transliterator::countAvailableTargets(source
);
3551 for (j
= 0; j
< targets
; j
++) {
3552 UnicodeString target
;
3553 Transliterator::getAvailableTarget(j
, source
, target
);
3554 int32_t variants
= Transliterator::countAvailableVariants(source
, target
);
3555 for (k
=0; k
< variants
; k
++) {
3556 UnicodeString variant
;
3558 UErrorCode status
= U_ZERO_ERROR
;
3560 Transliterator::getAvailableVariant(k
, source
, target
, variant
);
3561 UnicodeString id
= source
+ "-" + target
+ "/" + variant
;
3563 Transliterator
*t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, err
, status
);
3564 if (U_FAILURE(status
)) {
3565 dataerrln((UnicodeString
)"FAIL: Could not create " + id
);
3569 status
= U_ZERO_ERROR
;
3570 CheckIncrementalAux(t
, test
);
3573 _trans(*t
, test
, rev
);
3574 Transliterator
*inv
= t
->createInverse(status
);
3575 if (U_FAILURE(status
)) {
3576 #if UCONFIG_NO_BREAK_ITERATION
3577 // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
3578 if (id
.compare((UnicodeString
)"Latin-Thai/") != 0)
3580 errln((UnicodeString
)"FAIL: Could not create inverse of " + id
);
3586 CheckIncrementalAux(inv
, rev
);
3594 void TransliteratorTest::CheckIncrementalAux(const Transliterator
* t
,
3595 const UnicodeString
& input
) {
3596 UErrorCode ec
= U_ZERO_ERROR
;
3598 UnicodeString test
= input
;
3600 pos
.contextStart
= 0;
3601 pos
.contextLimit
= input
.length();
3603 pos
.limit
= input
.length();
3605 t
->transliterate(test
, pos
, ec
);
3606 if (U_FAILURE(ec
)) {
3607 errln((UnicodeString
)"FAIL: transliterate() error " + u_errorName(ec
));
3610 UBool gotError
= FALSE
;
3612 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3614 if (pos
.start
== 0 && pos
.limit
!= 0 && t
->getID() != "Hex-Any/Unicode") {
3615 errln((UnicodeString
)"No Progress, " +
3616 t
->getID() + ": " + formatInput(test
, input
, pos
));
3619 logln((UnicodeString
)"PASS Progress, " +
3620 t
->getID() + ": " + formatInput(test
, input
, pos
));
3622 t
->finishTransliteration(test
, pos
);
3623 if (pos
.start
!= pos
.limit
) {
3624 errln((UnicodeString
)"Incomplete, " +
3625 t
->getID() + ": " + formatInput(test
, input
, pos
));
3630 void TransliteratorTest::TestFunction() {
3631 // Careful with spacing and ';' here: Phrase this exactly
3632 // as toRules() is going to return it. If toRules() changes
3633 // with regard to spacing or ';', then adjust this string.
3634 UnicodeString rule
=
3635 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3638 UErrorCode ec
= U_ZERO_ERROR
;
3639 Transliterator
*t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3641 dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec
));
3646 t
->toRules(r
, TRUE
);
3648 logln((UnicodeString
)"OK: toRules() => " + r
);
3650 errln((UnicodeString
)"FAIL: toRules() => " + r
+
3651 ", expected " + rule
);
3654 expect(*t
, "The Quick Brown Fox",
3655 UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3660 void TransliteratorTest::TestInvalidBackRef(void) {
3661 UnicodeString rule
= ". > $1;";
3662 UnicodeString rule2
=CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3664 UErrorCode ec
= U_ZERO_ERROR
;
3665 Transliterator
*t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3666 Transliterator
*t2
= Transliterator::createFromRules("Test2", rule2
, UTRANS_FORWARD
, pe
, ec
);
3669 errln("FAIL: createFromRules should have returned NULL");
3674 errln("FAIL: createFromRules should have returned NULL");
3678 if (U_SUCCESS(ec
)) {
3679 errln("FAIL: Ok: . > $1; => no error");
3681 logln((UnicodeString
)"Ok: . > $1; => " + u_errorName(ec
));
3685 void TransliteratorTest::TestMulticharStringSet() {
3692 " e } [{fg}] > r;" ;
3695 UErrorCode ec
= U_ZERO_ERROR
;
3696 Transliterator
* t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3697 if (t
== NULL
|| U_FAILURE(ec
)) {
3699 errln("FAIL: createFromRules failed");
3703 expect(*t
, "a aa ab bc d gd de gde gdefg ddefg",
3704 "y x yz z d gd de gdq gdqfg ddrfg");
3707 // Overlapped string test. Make sure that when multiple
3708 // strings can match that the longest one is matched.
3710 " [a {ab} {abc}] > x;"
3713 " q [t {st} {rst}] { e > p;" ;
3715 t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3716 if (t
== NULL
|| U_FAILURE(ec
)) {
3718 errln("FAIL: createFromRules failed");
3722 expect(*t
, "a ab abc qte qste qrste",
3723 "x x x qtp qstp qrstp");
3727 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3728 // BEGIN TestUserFunction support factory
3730 Transliterator
* _TUFF
[4];
3731 UnicodeString
* _TUFID
[4];
3733 static Transliterator
* U_EXPORT2
_TUFFactory(const UnicodeString
& /*ID*/,
3734 Transliterator::Token context
) {
3735 return _TUFF
[context
.integer
]->clone();
3738 static void _TUFReg(const UnicodeString
& ID
, Transliterator
* t
, int32_t n
) {
3740 _TUFID
[n
] = new UnicodeString(ID
);
3741 Transliterator::registerFactory(ID
, _TUFFactory
, Transliterator::integerToken(n
));
3744 static void _TUFUnreg(int32_t n
) {
3745 if (_TUFF
[n
] != NULL
) {
3746 Transliterator::unregister(*_TUFID
[n
]);
3752 // END TestUserFunction support factory
3753 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3756 * Test that user-registered transliterators can be used under function
3759 void TransliteratorTest::TestUserFunction() {
3763 UErrorCode ec
= U_ZERO_ERROR
;
3765 // Setup our factory
3767 for (i
=0; i
<4; ++i
) {
3771 // There's no need to register inverses if we don't use them
3772 t
= Transliterator::createFromRules("gif",
3773 UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3774 UTRANS_FORWARD
, pe
, ec
);
3775 if (t
== NULL
|| U_FAILURE(ec
)) {
3776 dataerrln((UnicodeString
)"FAIL: createFromRules gif " + u_errorName(ec
));
3779 _TUFReg("Any-gif", t
, 0);
3781 t
= Transliterator::createFromRules("RemoveCurly",
3782 UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3783 UTRANS_FORWARD
, pe
, ec
);
3784 if (t
== NULL
|| U_FAILURE(ec
)) {
3785 errln((UnicodeString
)"FAIL: createFromRules RemoveCurly " + u_errorName(ec
));
3788 expect(*t
, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3789 _TUFReg("Any-RemoveCurly", t
, 1);
3791 logln("Trying &hex");
3792 t
= Transliterator::createFromRules("hex2",
3794 UTRANS_FORWARD
, pe
, ec
);
3795 if (t
== NULL
|| U_FAILURE(ec
)) {
3796 errln("FAIL: createFromRules");
3799 logln("Registering");
3800 _TUFReg("Any-hex2", t
, 2);
3801 t
= Transliterator::createInstance("Any-hex2", UTRANS_FORWARD
, ec
);
3802 if (t
== NULL
|| U_FAILURE(ec
)) {
3803 errln((UnicodeString
)"FAIL: createInstance Any-hex2 " + u_errorName(ec
));
3806 expect(*t
, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3809 logln("Trying &gif");
3810 t
= Transliterator::createFromRules("gif2",
3811 "(.) > &Gif(&Hex2($1));",
3812 UTRANS_FORWARD
, pe
, ec
);
3813 if (t
== NULL
|| U_FAILURE(ec
)) {
3814 errln((UnicodeString
)"FAIL: createFromRules gif2 " + u_errorName(ec
));
3817 logln("Registering");
3818 _TUFReg("Any-gif2", t
, 3);
3819 t
= Transliterator::createInstance("Any-gif2", UTRANS_FORWARD
, ec
);
3820 if (t
== NULL
|| U_FAILURE(ec
)) {
3821 errln((UnicodeString
)"FAIL: createInstance Any-gif2 " + u_errorName(ec
));
3824 expect(*t
, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3825 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3828 // Test that filters are allowed after &
3829 t
= Transliterator::createFromRules("test",
3830 "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3831 UTRANS_FORWARD
, pe
, ec
);
3832 if (t
== NULL
|| U_FAILURE(ec
)) {
3833 errln((UnicodeString
)"FAIL: createFromRules test " + u_errorName(ec
));
3837 UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3841 for (i
=0; i
<4; ++i
) {
3847 * Test the Any-X transliterators.
3849 void TransliteratorTest::TestAnyX(void) {
3850 UParseError parseError
;
3851 UErrorCode status
= U_ZERO_ERROR
;
3852 Transliterator
* anyLatin
=
3853 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD
, parseError
, status
);
3855 dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status
));
3861 CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3862 CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3868 * Test Any-X transliterators with sample letters from all scripts.
3870 void TransliteratorTest::TestAny(void) {
3871 UErrorCode status
= U_ZERO_ERROR
;
3872 // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3873 // function call parameters going on in this test.
3874 UnicodeSet
alphabetic("[:alphabetic:]", status
);
3875 if (U_FAILURE(status
)) {
3876 dataerrln("Failure: file %s, line %d, status = %s", __FILE__
, __LINE__
, u_errorName(status
));
3879 alphabetic
.freeze();
3881 UnicodeString testString
;
3882 for (int32_t i
= 0; i
< USCRIPT_CODE_LIMIT
; i
++) {
3883 const char *scriptName
= uscript_getShortName((UScriptCode
)i
);
3884 if (scriptName
== NULL
) {
3885 errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__
, __LINE__
, i
);
3890 sample
.applyPropertyAlias("script", scriptName
, status
);
3891 if (U_FAILURE(status
)) {
3892 errln("Failure: file %s, line %d, status = %s", __FILE__
, __LINE__
, u_errorName(status
));
3895 sample
.retainAll(alphabetic
);
3896 for (int32_t count
=0; count
<5; count
++) {
3897 UChar32 c
= sample
.charAt(count
);
3901 testString
.append(c
);
3905 UParseError parseError
;
3906 Transliterator
* anyLatin
=
3907 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD
, parseError
, status
);
3908 if (U_FAILURE(status
)) {
3909 dataerrln("Failure: file %s, line %d, status = %s", __FILE__
, __LINE__
, u_errorName(status
));
3913 logln(UnicodeString("Sample set for Any-Latin: ") + testString
);
3914 anyLatin
->transliterate(testString
);
3915 logln(UnicodeString("Sample result for Any-Latin: ") + testString
);
3921 * Test the source and target set API. These are only implemented
3922 * for RBT and CompoundTransliterator at this time.
3924 void TransliteratorTest::TestSourceTargetSet() {
3925 UErrorCode ec
= U_ZERO_ERROR
;
3933 UnicodeSet
expSrc("[arx{lu}]", ec
);
3936 UnicodeSet
expTrg("[bq]", ec
);
3939 Transliterator
* t
= Transliterator::createFromRules("test", r
, UTRANS_FORWARD
, pe
, ec
);
3941 if (U_FAILURE(ec
)) {
3943 errln("FAIL: Couldn't set up test");
3947 UnicodeSet src
; t
->getSourceSet(src
);
3948 UnicodeSet trg
; t
->getTargetSet(trg
);
3950 if (src
== expSrc
&& trg
== expTrg
) {
3952 logln((UnicodeString
)"Ok: " +
3953 r
+ " => source = " + src
.toPattern(a
, TRUE
) +
3954 ", target = " + trg
.toPattern(b
, TRUE
));
3956 UnicodeString a
, b
, c
, d
;
3957 errln((UnicodeString
)"FAIL: " +
3958 r
+ " => source = " + src
.toPattern(a
, TRUE
) +
3959 ", expected " + expSrc
.toPattern(b
, TRUE
) +
3960 "; target = " + trg
.toPattern(c
, TRUE
) +
3961 ", expected " + expTrg
.toPattern(d
, TRUE
));
3968 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3970 void TransliteratorTest::TestPatternWhiteSpace() {
3972 const char* r
= "a > \\u200E b;";
3974 UErrorCode ec
= U_ZERO_ERROR
;
3976 Transliterator
* t
= Transliterator::createFromRules("test", CharsToUnicodeString(r
), UTRANS_FORWARD
, pe
, ec
);
3978 if (U_FAILURE(ec
)) {
3979 errln("FAIL: Couldn't set up test");
3981 expect(*t
, "a", "b");
3987 UnicodeSet
set(CharsToUnicodeString("[a \\u200E]"), ec
);
3989 if (U_FAILURE(ec
)) {
3990 errln("FAIL: Couldn't set up test");
3992 if (set
.contains(0x200E)) {
3993 errln("FAIL: U+200E not being ignored by UnicodeSet");
3997 //======================================================================
3998 // this method is in TestUScript.java
3999 //======================================================================
4000 void TransliteratorTest::TestAllCodepoints(){
4001 UScriptCode code
= USCRIPT_INVALID_CODE
;
4002 char id
[256]={'\0'};
4003 char abbr
[256]={'\0'};
4004 char newId
[256]={'\0'};
4005 char newAbbrId
[256]={'\0'};
4006 char oldId
[256]={'\0'};
4007 char oldAbbrId
[256]={'\0'};
4009 UErrorCode status
=U_ZERO_ERROR
;
4012 for(uint32_t i
= 0; i
<=0x10ffff; i
++){
4013 code
= uscript_getScript(i
,&status
);
4014 if(code
== USCRIPT_INVALID_CODE
){
4015 dataerrln("uscript_getScript for codepoint \\U%08X failed.", i
);
4017 const char* myId
= uscript_getName(code
);
4019 dataerrln("Valid script code returned NULL name. Check your data!");
4022 uprv_strcpy(id
,myId
);
4023 uprv_strcpy(abbr
,uscript_getShortName(code
));
4025 uprv_strcpy(newId
,"[:");
4026 uprv_strcat(newId
,id
);
4027 uprv_strcat(newId
,":];NFD");
4029 uprv_strcpy(newAbbrId
,"[:");
4030 uprv_strcat(newAbbrId
,abbr
);
4031 uprv_strcat(newAbbrId
,":];NFD");
4033 if(uprv_strcmp(newId
,oldId
)!=0){
4034 Transliterator
* t
= Transliterator::createInstance(newId
,UTRANS_FORWARD
,pe
,status
);
4035 if(t
==NULL
|| U_FAILURE(status
)){
4036 dataerrln((UnicodeString
)"FAIL: Could not create " + id
+ " - " + u_errorName(status
));
4040 if(uprv_strcmp(newAbbrId
,oldAbbrId
)!=0){
4041 Transliterator
* t
= Transliterator::createInstance(newAbbrId
,UTRANS_FORWARD
,pe
,status
);
4042 if(t
==NULL
|| U_FAILURE(status
)){
4043 dataerrln((UnicodeString
)"FAIL: Could not create " + id
+ " - " + u_errorName(status
));
4047 uprv_strcpy(oldId
,newId
);
4048 uprv_strcpy(oldAbbrId
, newAbbrId
);
4054 #define TEST_TRANSLIT_ID(id, cls) { \
4055 UErrorCode ec = U_ZERO_ERROR; \
4056 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4057 if (U_FAILURE(ec)) { \
4058 dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
4060 if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4061 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4063 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4068 #define TEST_TRANSLIT_RULE(rule, cls) { \
4069 UErrorCode ec = U_ZERO_ERROR; \
4071 Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4072 if (U_FAILURE(ec)) { \
4073 errln("FAIL: Couldn't create " rule); \
4075 if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4076 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4078 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4083 void TransliteratorTest::TestBoilerplate() {
4084 TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator
);
4085 TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator
);
4086 TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator
);
4087 TEST_TRANSLIT_ID("Lower", LowercaseTransliterator
);
4088 TEST_TRANSLIT_ID("Upper", UppercaseTransliterator
);
4089 TEST_TRANSLIT_ID("Title", TitlecaseTransliterator
);
4090 TEST_TRANSLIT_ID("Null", NullTransliterator
);
4091 TEST_TRANSLIT_ID("Remove", RemoveTransliterator
);
4092 TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator
);
4093 TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator
);
4094 TEST_TRANSLIT_ID("NFD", NormalizationTransliterator
);
4095 TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator
);
4096 TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator
);
4099 void TransliteratorTest::TestAlternateSyntax() {
4104 expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4107 expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4108 CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4109 UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4112 static const char* BEGIN_END_RULES
[] = {
4126 "", // test case commented out below, this is here to keep from messing up the indexes
4135 "", // test case commented out below, this is here to keep from messing up the indexes
4144 "", // test case commented out below, this is here to keep from messing up the indexes
4163 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4164 "$delim = [\\-$ws];"
4165 "$ws $delim* > ' ';"
4166 "'-' $delim* > '-';",
4170 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4171 "$delim = [\\-$ws];"
4172 "$ws $delim* > ' ';"
4173 "'-' $delim* > '-';",
4176 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4177 "$delim = [\\-$ws];"
4178 "$ws $delim* > ' ';"
4179 "'-' $delim* > '-';"
4183 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4184 "$delim = [\\-$ws];"
4186 "$ws $delim* > ' ';"
4187 "'-' $delim* > '-';",
4192 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4193 "$delim = [\\-$ws];"
4195 "$ws $delim* > ' ';"
4196 "'-' $delim* > '-';",
4198 "", // test case commented out below, this is here to keep from messing up the indexes
4202 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4203 "$delim = [\\-$ws];"
4205 "$ws $delim* > ' ';"
4206 "'-' $delim* > '-';"
4209 "", // test case commented out below, this is here to keep from messing up the indexes
4213 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4214 "$delim = [\\-$ws];"
4217 "$ws $delim* > ' ';"
4218 "'-' $delim* > '-';"
4221 "$ab { ' ' } $ab > '-';"
4228 "", // test case commented out below, this is here to keep from messing up the indexes
4231 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4232 "$delim = [\\-$ws];"
4235 "$ws $delim* > ' ';"
4236 "'-' $delim* > '-';"
4238 "$ab { ' ' } $ab > '-';"
4254 "", // test case commented out below, this is here to keep from messing up the indexes
4275 "", // test case commented out below, this is here to keep from messing up the indexes
4285 static const int32_t BEGIN_END_RULES_length
= (int32_t)(sizeof(BEGIN_END_RULES
) / sizeof(BEGIN_END_RULES
[0]));
4288 (This entire test is commented out below and will need some heavy revision when we re-add
4289 the ::BEGIN/::END stuff)
4290 static const char* BOGUS_BEGIN_END_RULES[] = {
4309 static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
4312 static const char* BEGIN_END_TEST_CASES
[] = {
4313 // rules input expected output
4314 BEGIN_END_RULES
[0], "abc ababc aba", "xy zbc z",
4315 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
4316 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
4317 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
4318 BEGIN_END_RULES
[4], "abc ababc aba", "xy abxy z",
4319 BEGIN_END_RULES
[5], "abccabaacababcbc", "PXAARXQBR",
4321 BEGIN_END_RULES
[6], "e e - e---e- e", "e e e-e-e",
4322 BEGIN_END_RULES
[7], "e e - e---e- e", "e e e-e-e",
4323 BEGIN_END_RULES
[8], "e e - e---e- e", "e e e-e-e",
4324 BEGIN_END_RULES
[9], "e e - e---e- e", "e e e-e-e",
4325 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e",
4326 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e",
4327 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e",
4328 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a",
4329 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
4330 BEGIN_END_RULES
[13], "e e - e---e- e", "e e e-e-e",
4331 BEGIN_END_RULES
[13], "a a a a", "a%a%a%a",
4332 BEGIN_END_RULES
[13], "a a-b c b a", "a%a-b cb-a",
4334 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4335 BEGIN_END_RULES
[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4336 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4337 BEGIN_END_RULES
[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4339 static const int32_t BEGIN_END_TEST_CASES_length
= (int32_t)(sizeof(BEGIN_END_TEST_CASES
) / sizeof(BEGIN_END_TEST_CASES
[0]));
4341 void TransliteratorTest::TestBeginEnd() {
4342 // run through the list of test cases above
4344 for (i
= 0; i
< BEGIN_END_TEST_CASES_length
; i
+= 3) {
4345 expect((UnicodeString
)"Test case #" + (i
/ 3),
4346 UnicodeString(BEGIN_END_TEST_CASES
[i
], -1, US_INV
),
4347 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 1], -1, US_INV
),
4348 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 2], -1, US_INV
));
4351 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4352 UParseError parseError
;
4353 UErrorCode status
= U_ZERO_ERROR
;
4354 Transliterator
* reversed
= Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES
[17]),
4355 UTRANS_REVERSE
, parseError
, status
);
4356 if (reversed
== 0 || U_FAILURE(status
)) {
4357 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError
, status
);
4359 expect(*reversed
, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4363 // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4364 // that all of them cause errors
4366 (commented out until we have the real ::BEGIN/::END stuff in place
4367 for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4368 UParseError parseError;
4369 UErrorCode status = U_ZERO_ERROR;
4370 Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4371 UTRANS_FORWARD, parseError, status);
4372 if (!U_FAILURE(status)) {
4374 errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4380 void TransliteratorTest::TestBeginEndToRules() {
4381 // run through the same list of test cases we used above, but this time, instead of just
4382 // instantiating a Transliterator from the rules and running the test against it, we instantiate
4383 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4384 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4385 // to (i.e., does the same thing as) the original rule set
4386 for (int32_t i
= 0; i
< BEGIN_END_TEST_CASES_length
; i
+= 3) {
4387 UParseError parseError
;
4388 UErrorCode status
= U_ZERO_ERROR
;
4389 Transliterator
* t
= Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES
[i
], -1, US_INV
),
4390 UTRANS_FORWARD
, parseError
, status
);
4391 if (U_FAILURE(status
)) {
4392 reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError
, status
);
4394 UnicodeString rules
;
4395 t
->toRules(rules
, TRUE
);
4396 Transliterator
* t2
= Transliterator::createFromRules((UnicodeString
)"Test case #" + (i
/ 3), rules
,
4397 UTRANS_FORWARD
, parseError
, status
);
4398 if (U_FAILURE(status
)) {
4399 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4400 parseError
, status
);
4404 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 1], -1, US_INV
),
4405 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 2], -1, US_INV
));
4412 // do the same thing for the reversible test case
4413 UParseError parseError
;
4414 UErrorCode status
= U_ZERO_ERROR
;
4415 Transliterator
* reversed
= Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES
[17]),
4416 UTRANS_REVERSE
, parseError
, status
);
4417 if (U_FAILURE(status
)) {
4418 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError
, status
);
4420 UnicodeString rules
;
4421 reversed
->toRules(rules
, FALSE
);
4422 Transliterator
* reversed2
= Transliterator::createFromRules("Reversed", rules
, UTRANS_FORWARD
,
4423 parseError
, status
);
4424 if (U_FAILURE(status
)) {
4425 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4426 parseError
, status
);
4430 UnicodeString("xy XY XYZ yz YZ"),
4431 UnicodeString("xy abc xaba yz aba"));
4438 void TransliteratorTest::TestRegisterAlias() {
4439 UnicodeString
longID("Lower;[aeiou]Upper");
4440 UnicodeString
shortID("Any-CapVowels");
4441 UnicodeString
reallyShortID("CapVowels");
4443 Transliterator::registerAlias(shortID
, longID
);
4445 UErrorCode err
= U_ZERO_ERROR
;
4446 Transliterator
* t1
= Transliterator::createInstance(longID
, UTRANS_FORWARD
, err
);
4447 if (U_FAILURE(err
)) {
4448 errln("Failed to instantiate transliterator with long ID");
4449 Transliterator::unregister(shortID
);
4452 Transliterator
* t2
= Transliterator::createInstance(reallyShortID
, UTRANS_FORWARD
, err
);
4453 if (U_FAILURE(err
)) {
4454 errln("Failed to instantiate transliterator with short ID");
4456 Transliterator::unregister(shortID
);
4460 if (t1
->getID() != longID
)
4461 errln("Transliterator instantiated with long ID doesn't have long ID");
4462 if (t2
->getID() != reallyShortID
)
4463 errln("Transliterator instantiated with short ID doesn't have short ID");
4465 UnicodeString rules1
;
4466 UnicodeString rules2
;
4468 t1
->toRules(rules1
, TRUE
);
4469 t2
->toRules(rules2
, TRUE
);
4470 if (rules1
!= rules2
)
4471 errln("Alias transliterators aren't the same");
4475 Transliterator::unregister(shortID
);
4477 t1
= Transliterator::createInstance(shortID
, UTRANS_FORWARD
, err
);
4478 if (U_SUCCESS(err
)) {
4479 errln("Instantiation with short ID succeeded after short ID was unregistered");
4483 // try the same thing again, but this time with something other than
4484 // an instance of CompoundTransliterator
4485 UnicodeString
realID("Latin-Greek");
4486 UnicodeString
fakeID("Latin-dlgkjdflkjdl");
4487 Transliterator::registerAlias(fakeID
, realID
);
4490 t1
= Transliterator::createInstance(realID
, UTRANS_FORWARD
, err
);
4491 if (U_FAILURE(err
)) {
4492 dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err
));
4493 Transliterator::unregister(realID
);
4496 t2
= Transliterator::createInstance(fakeID
, UTRANS_FORWARD
, err
);
4497 if (U_FAILURE(err
)) {
4498 errln("Failed to instantiate transliterator with fake ID");
4500 Transliterator::unregister(realID
);
4504 t1
->toRules(rules1
, TRUE
);
4505 t2
->toRules(rules2
, TRUE
);
4506 if (rules1
!= rules2
)
4507 errln("Alias transliterators aren't the same");
4511 Transliterator::unregister(fakeID
);
4514 void TransliteratorTest::TestRuleStripping() {
4517 \uE001>\u0C01; # SIGN
4519 static const UChar rule
[] = {
4520 0x0023,0x0020,0x000D,0x000A,
4521 0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4523 static const UChar expectedRule
[] = {
4524 0xE001,0x003E,0x0C01,0x003B,0
4526 UChar result
[sizeof(rule
)/sizeof(rule
[0])];
4527 UErrorCode status
= U_ZERO_ERROR
;
4528 int32_t len
= utrans_stripRules(rule
, (int32_t)(sizeof(rule
)/sizeof(rule
[0])), result
, &status
);
4529 if (len
!= u_strlen(expectedRule
)) {
4530 errln("utrans_stripRules return len = %d", len
);
4532 if (u_strncmp(expectedRule
, result
, len
) != 0) {
4533 errln("utrans_stripRules did not return expected string");
4538 * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4540 void TransliteratorTest::TestHalfwidthFullwidth(void) {
4541 UParseError parseError
;
4542 UErrorCode status
= U_ZERO_ERROR
;
4543 Transliterator
* hf
= Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD
, parseError
, status
);
4544 Transliterator
* fh
= Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD
, parseError
, status
);
4545 if (hf
== 0 || fh
== 0) {
4546 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
4552 // Array of 2n items
4554 // "hf"|"fh"|"both",
4557 const char* DATA
[] = {
4559 "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4560 "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4562 int32_t DATA_length
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0]));
4564 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
4565 UnicodeString h
= CharsToUnicodeString(DATA
[i
+1]);
4566 UnicodeString f
= CharsToUnicodeString(DATA
[i
+2]);
4568 case 0x68: //'h': // Halfwidth-Fullwidth only
4571 case 0x66: //'f': // Fullwidth-Halfwidth only
4574 case 0x62: //'b': // both directions
4586 * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4587 * TODO: confirm that the expected results are correct.
4588 * For now, test just confirms that C++ and Java give identical results.
4590 void TransliteratorTest::TestThai(void) {
4591 #if !UCONFIG_NO_BREAK_ITERATION
4592 UParseError parseError
;
4593 UErrorCode status
= U_ZERO_ERROR
;
4594 Transliterator
* tr
= Transliterator::createInstance("Any-Latin", UTRANS_FORWARD
, parseError
, status
);
4596 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
4599 if (U_FAILURE(status
)) {
4600 errln("FAIL: createInstance failed with %s", u_errorName(status
));
4603 const char *thaiText
=
4604 "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4605 "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4606 "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4607 "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4608 "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4609 "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4610 "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4611 "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4612 "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4613 "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4614 "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4615 "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4616 "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4617 "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4618 "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4619 "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4620 "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4621 "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4622 "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4623 "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4624 "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4625 "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4626 "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4627 "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4628 " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4629 "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4630 "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4631 " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4632 "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4633 "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4635 const char *latinText
=
4636 "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4637 "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4638 "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4639 "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4640 "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4641 " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4642 "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4643 "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4644 "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4645 "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4646 "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4647 "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4648 " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4649 "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4650 " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4651 "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4652 "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4653 "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4656 UnicodeString
xlitText(thaiText
);
4657 xlitText
= xlitText
.unescape();
4658 tr
->transliterate(xlitText
);
4660 UnicodeString
expectedText(latinText
);
4661 expectedText
= expectedText
.unescape();
4662 expect(*tr
, xlitText
, expectedText
);
4669 //======================================================================
4671 //======================================================================
4672 void TransliteratorTest::expectT(const UnicodeString
& id
,
4673 const UnicodeString
& source
,
4674 const UnicodeString
& expectedResult
) {
4675 UErrorCode ec
= U_ZERO_ERROR
;
4677 Transliterator
*t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, pe
, ec
);
4678 if (U_FAILURE(ec
)) {
4679 errln((UnicodeString
)"FAIL: Could not create " + id
+ " - " + u_errorName(ec
));
4683 expect(*t
, source
, expectedResult
);
4687 void TransliteratorTest::reportParseError(const UnicodeString
& message
,
4688 const UParseError
& parseError
,
4689 const UErrorCode
& status
) {
4691 /*", parse error " + parseError.code +*/
4692 ", line " + parseError
.line
+
4693 ", offset " + parseError
.offset
+
4694 ", pre-context " + prettify(parseError
.preContext
, TRUE
) +
4695 ", post-context " + prettify(parseError
.postContext
,TRUE
) +
4696 ", Error: " + u_errorName(status
));
4699 void TransliteratorTest::expect(const UnicodeString
& rules
,
4700 const UnicodeString
& source
,
4701 const UnicodeString
& expectedResult
,
4702 UTransPosition
*pos
) {
4703 expect("<ID>", rules
, source
, expectedResult
, pos
);
4706 void TransliteratorTest::expect(const UnicodeString
& id
,
4707 const UnicodeString
& rules
,
4708 const UnicodeString
& source
,
4709 const UnicodeString
& expectedResult
,
4710 UTransPosition
*pos
) {
4711 UErrorCode status
= U_ZERO_ERROR
;
4712 UParseError parseError
;
4713 Transliterator
* t
= Transliterator::createFromRules(id
, rules
, UTRANS_FORWARD
, parseError
, status
);
4714 if (U_FAILURE(status
)) {
4715 reportParseError(UnicodeString("Couldn't create transliterator from ") + rules
, parseError
, status
);
4717 expect(*t
, source
, expectedResult
, pos
);
4722 void TransliteratorTest::expect(const Transliterator
& t
,
4723 const UnicodeString
& source
,
4724 const UnicodeString
& expectedResult
,
4725 const Transliterator
& reverseTransliterator
) {
4726 expect(t
, source
, expectedResult
);
4727 expect(reverseTransliterator
, expectedResult
, source
);
4730 void TransliteratorTest::expect(const Transliterator
& t
,
4731 const UnicodeString
& source
,
4732 const UnicodeString
& expectedResult
,
4733 UTransPosition
*pos
) {
4735 UnicodeString
result(source
);
4736 t
.transliterate(result
);
4737 expectAux(t
.getID() + ":String", source
, result
, expectedResult
);
4739 UTransPosition index
={0, 0, 0, 0};
4744 UnicodeString
rsource(source
);
4746 t
.transliterate(rsource
);
4748 // Do it all at once -- below we do it incrementally
4749 t
.finishTransliteration(rsource
, *pos
);
4751 expectAux(t
.getID() + ":Replaceable", source
, rsource
, expectedResult
);
4753 // Test keyboard (incremental) transliteration -- this result
4754 // must be the same after we finalize (see below).
4759 formatInput(log
, rsource
, index
);
4761 UErrorCode status
= U_ZERO_ERROR
;
4762 t
.transliterate(rsource
, index
, status
);
4763 formatInput(log
, rsource
, index
);
4765 for (int32_t i
=0; i
<source
.length(); ++i
) {
4769 log
.append(source
.charAt(i
)).append(" -> ");
4770 UErrorCode status
= U_ZERO_ERROR
;
4771 t
.transliterate(rsource
, index
, source
.charAt(i
), status
);
4772 formatInput(log
, rsource
, index
);
4776 // As a final step in keyboard transliteration, we must call
4777 // transliterate to finish off any pending partial matches that
4778 // were waiting for more input.
4779 t
.finishTransliteration(rsource
, index
);
4780 log
.append(" => ").append(rsource
);
4782 expectAux(t
.getID() + ":Keyboard", log
,
4783 rsource
== expectedResult
,
4789 * @param appendTo result is appended to this param.
4790 * @param input the string being transliterated
4791 * @param pos the index struct
4793 UnicodeString
& TransliteratorTest::formatInput(UnicodeString
&appendTo
,
4794 const UnicodeString
& input
,
4795 const UTransPosition
& pos
) {
4796 // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4797 // the {} indicate the context start and limit, and the ||
4798 // indicate the start and limit.
4799 if (0 <= pos
.contextStart
&&
4800 pos
.contextStart
<= pos
.start
&&
4801 pos
.start
<= pos
.limit
&&
4802 pos
.limit
<= pos
.contextLimit
&&
4803 pos
.contextLimit
<= input
.length()) {
4805 UnicodeString a
, b
, c
, d
, e
;
4806 input
.extractBetween(0, pos
.contextStart
, a
);
4807 input
.extractBetween(pos
.contextStart
, pos
.start
, b
);
4808 input
.extractBetween(pos
.start
, pos
.limit
, c
);
4809 input
.extractBetween(pos
.limit
, pos
.contextLimit
, d
);
4810 input
.extractBetween(pos
.contextLimit
, input
.length(), e
);
4811 appendTo
.append(a
).append((UChar
)123/*{*/).append(b
).
4812 append((UChar
)PIPE
).append(c
).append((UChar
)PIPE
).append(d
).
4813 append((UChar
)125/*}*/).append(e
);
4815 appendTo
.append((UnicodeString
)"INVALID UTransPosition {cs=" +
4816 pos
.contextStart
+ ", s=" + pos
.start
+ ", l=" +
4817 pos
.limit
+ ", cl=" + pos
.contextLimit
+ "} on " +
4823 void TransliteratorTest::expectAux(const UnicodeString
& tag
,
4824 const UnicodeString
& source
,
4825 const UnicodeString
& result
,
4826 const UnicodeString
& expectedResult
) {
4827 expectAux(tag
, source
+ " -> " + result
,
4828 result
== expectedResult
,
4832 void TransliteratorTest::expectAux(const UnicodeString
& tag
,
4833 const UnicodeString
& summary
, UBool pass
,
4834 const UnicodeString
& expectedResult
) {
4836 logln(UnicodeString("(")+tag
+") " + prettify(summary
));
4838 dataerrln(UnicodeString("FAIL: (")+tag
+") "
4840 + ", expected " + prettify(expectedResult
));
4844 #endif /* #if !UCONFIG_NO_TRANSLITERATION */