1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (C) 1999-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 * Date Name Description
9 * 11/10/99 aliu Creation.
10 **********************************************************************
13 #include "unicode/utypes.h"
15 #if !UCONFIG_NO_TRANSLITERATION
18 #include "unicode/locid.h"
19 #include "unicode/dtfmtsym.h"
20 #include "unicode/normlzr.h"
21 #include "unicode/translit.h"
22 #include "unicode/uchar.h"
23 #include "unicode/unifilt.h"
24 #include "unicode/uniset.h"
25 #include "unicode/ustring.h"
26 #include "unicode/usetiter.h"
27 #include "unicode/uscript.h"
28 #include "unicode/utf16.h"
47 /***********************************************************************
49 HOW TO USE THIS TEST FILE
51 How I developed on two platforms
52 without losing (too much of) my mind
55 1. Add new tests by copying/pasting/changing existing tests. On Java,
56 any public void method named Test...() taking no parameters becomes
57 a test. On C++, you need to modify the header and add a line to
58 the runIndexedTest() dispatch method.
60 2. Make liberal use of the expect() method; it is your friend.
62 3. The tests in this file exactly match those in a sister file on the
63 other side. The two files are:
65 icu4j: src/com/ibm/test/translit/TransliteratorTest.java
66 icu4c: source/test/intltest/transtst.cpp
68 ==> THIS IS THE IMPORTANT PART <==
70 When you add a test in this file, add it in TransliteratorTest.java
71 too. Give it the same name and put it in the same relative place.
72 This makes maintenance a lot simpler for any poor soul who ends up
73 trying to synchronize the tests between icu4j and icu4c.
75 4. If you MUST enter a test that is NOT paralleled in the sister file,
76 then add it in the special non-mirrored section. These are
85 Make sure you document the reason the test is here and not there.
90 ***********************************************************************/
92 // Define character constants thusly to be EBCDIC-friendly
94 LEFT_BRACE
=((UChar
)0x007B), /*{*/
95 PIPE
=((UChar
)0x007C), /*|*/
96 ZERO
=((UChar
)0x0030), /*0*/
97 UPPER_A
=((UChar
)0x0041) /*A*/
100 TransliteratorTest::TransliteratorTest()
101 : DESERET_DEE((UChar32
)0x10414),
102 DESERET_dee((UChar32
)0x1043C)
106 TransliteratorTest::~TransliteratorTest() {}
109 TransliteratorTest::runIndexedTest(int32_t index
, UBool exec
,
110 const char* &name
, char* /*par*/) {
112 TESTCASE(0,TestInstantiation
);
113 TESTCASE(1,TestSimpleRules
);
114 TESTCASE(2,TestRuleBasedInverse
);
115 TESTCASE(3,TestKeyboard
);
116 TESTCASE(4,TestKeyboard2
);
117 TESTCASE(5,TestKeyboard3
);
118 TESTCASE(6,TestArabic
);
119 TESTCASE(7,TestCompoundKana
);
120 TESTCASE(8,TestCompoundHex
);
121 TESTCASE(9,TestFiltering
);
122 TESTCASE(10,TestInlineSet
);
123 TESTCASE(11,TestPatternQuoting
);
124 TESTCASE(12,TestJ277
);
125 TESTCASE(13,TestJ243
);
126 TESTCASE(14,TestJ329
);
127 TESTCASE(15,TestSegments
);
128 TESTCASE(16,TestCursorOffset
);
129 TESTCASE(17,TestArbitraryVariableValues
);
130 TESTCASE(18,TestPositionHandling
);
131 TESTCASE(19,TestHiraganaKatakana
);
132 TESTCASE(20,TestCopyJ476
);
133 TESTCASE(21,TestAnchors
);
134 TESTCASE(22,TestInterIndic
);
135 TESTCASE(23,TestFilterIDs
);
136 TESTCASE(24,TestCaseMap
);
137 TESTCASE(25,TestNameMap
);
138 TESTCASE(26,TestLiberalizedID
);
139 TESTCASE(27,TestCreateInstance
);
140 TESTCASE(28,TestNormalizationTransliterator
);
141 TESTCASE(29,TestCompoundRBT
);
142 TESTCASE(30,TestCompoundFilter
);
143 TESTCASE(31,TestRemove
);
144 TESTCASE(32,TestToRules
);
145 TESTCASE(33,TestContext
);
146 TESTCASE(34,TestSupplemental
);
147 TESTCASE(35,TestQuantifier
);
148 TESTCASE(36,TestSTV
);
149 TESTCASE(37,TestCompoundInverse
);
150 TESTCASE(38,TestNFDChainRBT
);
151 TESTCASE(39,TestNullInverse
);
152 TESTCASE(40,TestAliasInverseID
);
153 TESTCASE(41,TestCompoundInverseID
);
154 TESTCASE(42,TestUndefinedVariable
);
155 TESTCASE(43,TestEmptyContext
);
156 TESTCASE(44,TestCompoundFilterID
);
157 TESTCASE(45,TestPropertySet
);
158 TESTCASE(46,TestNewEngine
);
159 TESTCASE(47,TestQuantifiedSegment
);
160 TESTCASE(48,TestDevanagariLatinRT
);
161 TESTCASE(49,TestTeluguLatinRT
);
162 TESTCASE(50,TestCompoundLatinRT
);
163 TESTCASE(51,TestSanskritLatinRT
);
164 TESTCASE(52,TestLocaleInstantiation
);
165 TESTCASE(53,TestTitleAccents
);
166 TESTCASE(54,TestLocaleResource
);
167 TESTCASE(55,TestParseError
);
168 TESTCASE(56,TestOutputSet
);
169 TESTCASE(57,TestVariableRange
);
170 TESTCASE(58,TestInvalidPostContext
);
171 TESTCASE(59,TestIDForms
);
172 TESTCASE(60,TestToRulesMark
);
173 TESTCASE(61,TestEscape
);
174 TESTCASE(62,TestAnchorMasking
);
175 TESTCASE(63,TestDisplayName
);
176 TESTCASE(64,TestSpecialCases
);
177 #if !UCONFIG_NO_FILE_IO
178 TESTCASE(65,TestIncrementalProgress
);
180 TESTCASE(66,TestSurrogateCasing
);
181 TESTCASE(67,TestFunction
);
182 TESTCASE(68,TestInvalidBackRef
);
183 TESTCASE(69,TestMulticharStringSet
);
184 TESTCASE(70,TestUserFunction
);
185 TESTCASE(71,TestAnyX
);
186 TESTCASE(72,TestSourceTargetSet
);
187 TESTCASE(73,TestGurmukhiDevanagari
);
188 TESTCASE(74,TestPatternWhiteSpace
);
189 TESTCASE(75,TestAllCodepoints
);
190 TESTCASE(76,TestBoilerplate
);
191 TESTCASE(77,TestAlternateSyntax
);
192 TESTCASE(78,TestBeginEnd
);
193 TESTCASE(79,TestBeginEndToRules
);
194 TESTCASE(80,TestRegisterAlias
);
195 TESTCASE(81,TestRuleStripping
);
196 TESTCASE(82,TestHalfwidthFullwidth
);
197 TESTCASE(83,TestThai
);
198 TESTCASE(84,TestAny
);
199 default: name
= ""; break;
204 * Make sure every system transliterator can be instantiated.
206 * ALSO test that the result of toRules() for each rule is a valid
207 * rule. Do this here so we don't have to have another test that
208 * instantiates everything as well.
210 void TransliteratorTest::TestInstantiation() {
211 UErrorCode ec
= U_ZERO_ERROR
;
212 StringEnumeration
* avail
= Transliterator::getAvailableIDs(ec
);
213 assertSuccess("getAvailableIDs()", ec
);
214 assertTrue("getAvailableIDs()!=NULL", avail
!=NULL
);
215 int32_t n
= Transliterator::countAvailableIDs();
216 assertTrue("getAvailableIDs().count()==countAvailableIDs()",
217 avail
->count(ec
) == n
);
218 assertSuccess("count()", ec
);
220 for (int32_t i
=0; i
<n
; ++i
) {
221 const UnicodeString
& id
= *avail
->snext(ec
);
222 if (!assertSuccess("snext()", ec
) ||
223 !assertTrue("snext()!=NULL", (&id
)!=NULL
, TRUE
)) {
226 UnicodeString id2
= Transliterator::getAvailableID(i
);
227 if (id
.length() < 1) {
228 errln(UnicodeString("FAIL: getAvailableID(") +
229 i
+ ") returned empty string");
233 errln(UnicodeString("FAIL: getAvailableID(") +
234 i
+ ") != getAvailableIDs().snext()");
237 UParseError parseError
;
238 UErrorCode status
= U_ZERO_ERROR
;
239 Transliterator
* t
= Transliterator::createInstance(id
,
240 UTRANS_FORWARD
, parseError
,status
);
242 Transliterator::getDisplayName(id
, name
);
244 #if UCONFIG_NO_BREAK_ITERATION
245 // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
246 if (id
.compare((UnicodeString
)"Thai-Latn") != 0 &&
247 id
.compare((UnicodeString
)"Thai-Latin") != 0)
249 dataerrln(UnicodeString("FAIL: Couldn't create ") + id
+
250 /*", parse error " + parseError.code +*/
251 ", line " + parseError
.line
+
252 ", offset " + parseError
.offset
+
253 ", pre-context " + prettify(parseError
.preContext
, TRUE
) +
254 ", post-context " +prettify(parseError
.postContext
,TRUE
) +
255 ", Error: " + u_errorName(status
));
256 // When createInstance fails, it deletes the failing
257 // entry from the available ID list. We detect this
258 // here by looking for a change in countAvailableIDs.
259 int32_t nn
= Transliterator::countAvailableIDs();
262 --i
; // Compensate for deleted entry
265 logln(UnicodeString("OK: ") + name
+ " (" + id
+ ")");
269 t
->toRules(rules
, TRUE
);
270 Transliterator
*u
= Transliterator::createFromRules("x",
271 rules
, UTRANS_FORWARD
, parseError
,status
);
273 errln(UnicodeString("FAIL: ") + id
+
274 ".createFromRules() => bad rules" +
275 /*", parse error " + parseError.code +*/
276 ", line " + parseError
.line
+
277 ", offset " + parseError
.offset
+
278 ", context " + prettify(parseError
.preContext
, TRUE
) +
279 ", rules: " + prettify(rules
, TRUE
));
286 assertTrue("snext()==NULL", avail
->snext(ec
)==NULL
);
287 assertSuccess("snext()", ec
);
290 // Now test the failure path
291 UParseError parseError
;
292 UErrorCode status
= U_ZERO_ERROR
;
293 UnicodeString
id("<Not a valid Transliterator ID>");
294 Transliterator
* t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, parseError
, status
);
296 errln("FAIL: " + id
+ " returned a transliterator");
299 logln("OK: Bogus ID handled properly");
303 void TransliteratorTest::TestSimpleRules(void) {
304 /* Example: rules 1. ab>x|y
307 * []|eabcd start - no match, copy e to tranlated buffer
308 * [e]|abcd match rule 1 - copy output & adjust cursor
309 * [ex|y]cd match rule 2 - copy output & adjust cursor
310 * [exz]|d no match, copy d to transliterated buffer
313 expect(UnicodeString("ab>x|y;", "") +
317 /* Another set of rules:
329 expect(UnicodeString("ab>x|yzacw;") +
337 UErrorCode status
= U_ZERO_ERROR
;
338 UParseError parseError
;
339 Transliterator
*t
= Transliterator::createFromRules(
341 UnicodeString("$dummy=").append((UChar
)0xE100) +
343 "$vowel=[aeiouAEIOU];"
345 "$vowel } $lu > '!';"
350 UTRANS_FORWARD
, parseError
,
352 if (U_FAILURE(status
)) {
353 dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status
));
356 expect(*t
, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
361 * Test inline set syntax and set variable syntax.
363 void TransliteratorTest::TestInlineSet(void) {
364 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
365 expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
367 expect(UnicodeString(
370 "$alphanumeric = [$digit $alpha];" // ***
371 "$special = [^$alphanumeric];" // ***
372 "$alphanumeric > '-';"
373 "$special > '*';", ""),
375 "thx-1138", "---*----");
379 * Create some inverses and confirm that they work. We have to be
380 * careful how we do this, since the inverses will not be true
381 * inverses -- we can't throw any random string at the composition
382 * of the transliterators and expect the identity function. F x
383 * F' != I. However, if we are careful about the input, we will
384 * get the expected results.
386 void TransliteratorTest::TestRuleBasedInverse(void) {
387 UnicodeString RULES
=
388 UnicodeString("abc>zyx;") +
406 const char* DATA
[] = {
407 // Careful here -- random strings will not work. If we keep
408 // the left side to the domain and the right side to the range
409 // we will be okay though (left, abc; right xyz).
411 "abcacab", "zyxxxyy",
415 int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
417 UErrorCode status
= U_ZERO_ERROR
;
418 UParseError parseError
;
419 Transliterator
*fwd
= Transliterator::createFromRules("<ID>", RULES
,
420 UTRANS_FORWARD
, parseError
, status
);
421 Transliterator
*rev
= Transliterator::createFromRules("<ID>", RULES
,
422 UTRANS_REVERSE
, parseError
, status
);
423 if (U_FAILURE(status
)) {
424 errln("FAIL: RBT constructor failed");
427 for (int32_t i
=0; i
<DATA_length
; i
+=2) {
428 expect(*fwd
, DATA
[i
], DATA
[i
+1]);
429 expect(*rev
, DATA
[i
+1], DATA
[i
]);
436 * Basic test of keyboard.
438 void TransliteratorTest::TestKeyboard(void) {
439 UParseError parseError
;
440 UErrorCode status
= U_ZERO_ERROR
;
441 Transliterator
*t
= Transliterator::createFromRules("<ID>",
442 UnicodeString("psch>Y;")
446 UTRANS_FORWARD
, parseError
,
448 if (U_FAILURE(status
)) {
449 errln("FAIL: RBT constructor failed");
452 const char* DATA
[] = {
460 0, "AycAY", // null means finishKeyboardTransliteration
463 keyboardAux(*t
, DATA
, UPRV_LENGTHOF(DATA
));
468 * Basic test of keyboard with cursor.
470 void TransliteratorTest::TestKeyboard2(void) {
471 UParseError parseError
;
472 UErrorCode status
= U_ZERO_ERROR
;
473 Transliterator
*t
= Transliterator::createFromRules("<ID>",
474 UnicodeString("ych>Y;")
478 UTRANS_FORWARD
, parseError
,
480 if (U_FAILURE(status
)) {
481 errln("FAIL: RBT constructor failed");
484 const char* DATA
[] = {
488 "s", "Aps", // modified for rollback - "Ay",
489 "c", "Apsc", // modified for rollback - "Ayc",
492 "s", "AycAps", // modified for rollback - "AycAy",
493 "c", "AycApsc", // modified for rollback - "AycAyc",
495 0, "AycAY", // null means finishKeyboardTransliteration
498 keyboardAux(*t
, DATA
, UPRV_LENGTHOF(DATA
));
503 * Test keyboard transliteration with back-replacement.
505 void TransliteratorTest::TestKeyboard3(void) {
506 // We want th>z but t>y. Furthermore, during keyboard
507 // transliteration we want t>y then yh>z if t, then h are
509 UnicodeString
RULES("t>|y;"
512 const char* DATA
[] = {
513 // Column 1: characters to add to buffer (as if typed)
514 // Column 2: expected appearance of buffer after
515 // keyboard xliteration.
518 "t", "abt", // modified for rollback - "aby",
520 "t", "abyct", // modified for rollback - "abycy",
522 0, "abycz", // null means finishKeyboardTransliteration
525 UParseError parseError
;
526 UErrorCode status
= U_ZERO_ERROR
;
527 Transliterator
*t
= Transliterator::createFromRules("<ID>", RULES
, UTRANS_FORWARD
, parseError
, status
);
528 if (U_FAILURE(status
)) {
529 errln("FAIL: RBT constructor failed");
532 keyboardAux(*t
, DATA
, UPRV_LENGTHOF(DATA
));
536 void TransliteratorTest::keyboardAux(const Transliterator
& t
,
537 const char* DATA
[], int32_t DATA_length
) {
538 UErrorCode status
= U_ZERO_ERROR
;
539 UTransPosition index
={0, 0, 0, 0};
541 for (int32_t i
=0; i
<DATA_length
; i
+=2) {
547 t
.transliterate(s
, index
, DATA
[i
], status
);
550 t
.finishTransliteration(s
, index
);
552 // Show the start index '{' and the cursor '|'
553 UnicodeString a
, b
, c
;
554 s
.extractBetween(0, index
.contextStart
, a
);
555 s
.extractBetween(index
.contextStart
, index
.start
, b
);
556 s
.extractBetween(index
.start
, s
.length(), c
);
558 append((UChar
)LEFT_BRACE
).
562 if (s
== DATA
[i
+1] && U_SUCCESS(status
)) {
565 errln(UnicodeString("FAIL: ") + log
+ ", expected " + DATA
[i
+1]);
570 void TransliteratorTest::TestArabic(void) {
571 // Test disabled for 2.0 until new Arabic transliterator can be written.
573 // const char* DATA[] = {
574 // "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
575 // "\u0627\u0644\u0644\u063a\u0629\u0020"+
576 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
577 // "\u0628\u0628\u0646\u0638\u0645\u0020"+
578 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
579 // "\u062c\u0645\u064a\u0644\u0629",
583 // UChar ar_raw[] = {
584 // 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
585 // 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
586 // 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
587 // 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
588 // 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
589 // 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
591 // UnicodeString ar(ar_raw);
592 // UErrorCode status=U_ZERO_ERROR;
593 // UParseError parseError;
594 // Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
596 // errln("FAIL: createInstance failed");
599 // expect(*t, "Arabic", ar);
604 * Compose the Kana transliterator forward and reverse and try
605 * some strings that should come out unchanged.
607 void TransliteratorTest::TestCompoundKana(void) {
608 UParseError parseError
;
609 UErrorCode status
= U_ZERO_ERROR
;
610 Transliterator
* t
= Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD
, parseError
, status
);
612 dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status
));
614 expect(*t
, "aaaaa", "aaaaa");
620 * Compose the hex transliterators forward and reverse.
622 void TransliteratorTest::TestCompoundHex(void) {
623 UParseError parseError
;
624 UErrorCode status
= U_ZERO_ERROR
;
625 Transliterator
* a
= Transliterator::createInstance("Any-Hex", UTRANS_FORWARD
, parseError
, status
);
626 Transliterator
* b
= Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, parseError
, status
);
627 Transliterator
* transab
[] = { a
, b
};
628 Transliterator
* transba
[] = { b
, a
};
629 if (a
== 0 || b
== 0) {
630 errln("FAIL: construction failed");
635 // Do some basic tests of a
636 expect(*a
, "01", UnicodeString("\\u0030\\u0031", ""));
637 // Do some basic tests of b
638 expect(*b
, UnicodeString("\\u0030\\u0031", ""), "01");
640 Transliterator
* ab
= new CompoundTransliterator(transab
, 2);
641 UnicodeString
s("abcde", "");
644 UnicodeString
str(s
);
645 a
->transliterate(str
);
646 Transliterator
* ba
= new CompoundTransliterator(transba
, 2);
647 expect(*ba
, str
, str
);
655 int gTestFilterClassID
= 0;
657 * Used by TestFiltering().
659 class TestFilter
: public UnicodeFilter
{
660 virtual UnicodeFunctor
* clone() const {
661 return new TestFilter(*this);
663 virtual UBool
contains(UChar32 c
) const {
664 return c
!= (UChar
)0x0063 /*c*/;
667 virtual UnicodeString
& toPattern(UnicodeString
& result
,
668 UBool
/*escapeUnprintable*/) const {
671 virtual UBool
matchesIndexValue(uint8_t /*v*/) const {
674 virtual void addMatchSetTo(UnicodeSet
& /*toUnionTo*/) const {}
676 UClassID
getDynamicClassID() const { return (UClassID
)&gTestFilterClassID
; }
680 * Do some basic tests of filtering.
682 void TransliteratorTest::TestFiltering(void) {
683 UParseError parseError
;
684 UErrorCode status
= U_ZERO_ERROR
;
685 Transliterator
* hex
= Transliterator::createInstance("Any-Hex", UTRANS_FORWARD
, parseError
, status
);
687 errln("FAIL: createInstance(Any-Hex) failed");
690 hex
->adoptFilter(new TestFilter());
691 UnicodeString
s("abcde");
692 hex
->transliterate(s
);
693 UnicodeString
exp("\\u0061\\u0062c\\u0064\\u0065", "");
695 logln(UnicodeString("Ok: \"") + exp
+ "\"");
697 logln(UnicodeString("FAIL: \"") + s
+ "\", wanted \"" + exp
+ "\"");
700 // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
701 UnicodeFilter
*f
= hex
->orphanFilter();
703 errln("FAIL: orphanFilter() should get a UnicodeFilter");
713 void TransliteratorTest::TestAnchors(void) {
714 expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""),
717 expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
720 expect(UnicodeString("^ab > 01 ;"
728 expect(UnicodeString("$s = [z$] ;"
735 "abzababbabxzabxabx",
740 * Test pattern quoting and escape mechanisms.
742 void TransliteratorTest::TestPatternQuoting(void) {
744 // Each item is <rules>, <input>, <expected output>
745 const UnicodeString DATA
[] = {
746 UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
747 UnicodeString(UChar(0x4E01)),
751 for (int32_t i
=0; i
<3; i
+=3) {
752 logln(UnicodeString("Pattern: ") + prettify(DATA
[i
]));
753 UParseError parseError
;
754 UErrorCode status
= U_ZERO_ERROR
;
755 Transliterator
*t
= Transliterator::createFromRules("<ID>", DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
756 if (U_FAILURE(status
)) {
757 errln("RBT constructor failed");
759 expect(*t
, DATA
[i
+1], DATA
[i
+2]);
766 * Regression test for bugs found in Greek transliteration.
768 void TransliteratorTest::TestJ277(void) {
769 UErrorCode status
= U_ZERO_ERROR
;
770 UParseError parseError
;
771 Transliterator
*gl
= Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD
, parseError
, status
);
773 dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status
));
778 UChar upsilon
= 0x3C5;
780 // UChar PHI = 0x3A6;
782 // UChar omega = 0x3C9;
783 // UChar omicron = 0x3BF;
784 // UChar epsilon = 0x3B5;
786 // sigma upsilon nu -> syn
788 syn
.append(sigma
).append(upsilon
).append(nu
);
789 expect(*gl
, syn
, "syn");
791 // sigma alpha upsilon nu -> saun
793 sayn
.append(sigma
).append(alpha
).append(upsilon
).append(nu
);
794 expect(*gl
, sayn
, "saun");
796 // Again, using a smaller rule set
801 "$ypsilon = \\u03C5;"
802 "$vowel = [aeiouAEIOU$alpha$ypsilon];"
805 "u <> $vowel { $ypsilon;"
809 Transliterator
*mini
= Transliterator::createFromRules("mini", rules
, UTRANS_REVERSE
, parseError
, status
);
810 if (U_FAILURE(status
)) { errln("FAIL: Transliterator constructor failed"); return; }
811 expect(*mini
, syn
, "syn");
812 expect(*mini
, sayn
, "saun");
816 #if !UCONFIG_NO_FORMATTING
817 // Transliterate the Greek locale data
819 DateFormatSymbols
syms(el
, status
);
820 if (U_FAILURE(status
)) { errln("FAIL: Transliterator constructor failed"); return; }
822 const UnicodeString
* data
= syms
.getMonths(count
);
823 for (i
=0; i
<count
; ++i
) {
824 if (data
[i
].length() == 0) {
827 UnicodeString
out(data
[i
]);
828 gl
->transliterate(out
);
830 if (data
[i
].length() >= 2 && out
.length() >= 2 &&
831 u_isupper(data
[i
].charAt(0)) && u_islower(data
[i
].charAt(1))) {
832 if (!(u_isupper(out
.charAt(0)) && u_islower(out
.charAt(1)))) {
837 logln(prettify(data
[i
] + " -> " + out
));
839 errln(UnicodeString("FAIL: ") + prettify(data
[i
] + " -> " + out
));
848 * Prefix, suffix support in hex transliterators
850 void TransliteratorTest::TestJ243(void) {
851 UErrorCode ec
= U_ZERO_ERROR
;
853 // Test default Hex-Any, which should handle
854 // \u, \U, u+, and U+
855 Transliterator
*hex
=
856 Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, ec
);
857 if (assertSuccess("getInstance", ec
)) {
858 expect(*hex
, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
862 // // Try a custom Hex-Unicode
863 // // \uXXXX and &#xXXXX;
864 // ec = U_ZERO_ERROR;
865 // HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
866 // expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x0123", ""),
867 // "abcd5fx0123");
868 // // Try custom Any-Hex (default is tested elsewhere)
869 // ec = U_ZERO_ERROR;
870 // UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
871 // expect(hex3, "012", "012");
875 * Parsers need better syntax error messages.
877 void TransliteratorTest::TestJ329(void) {
879 struct { UBool containsErrors
; const char* rule
; } DATA
[] = {
880 { FALSE
, "a > b; c > d" },
881 { TRUE
, "a > b; no operator; c > d" },
883 int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
885 for (int32_t i
=0; i
<DATA_length
; ++i
) {
886 UErrorCode status
= U_ZERO_ERROR
;
887 UParseError parseError
;
888 Transliterator
*rbt
= Transliterator::createFromRules("<ID>",
893 UBool gotError
= U_FAILURE(status
);
894 UnicodeString
desc(DATA
[i
].rule
);
895 desc
.append(gotError
? " -> error" : " -> no error");
897 desc
= desc
+ ", ParseError code=" + u_errorName(status
) +
898 " line=" + parseError
.line
+
899 " offset=" + parseError
.offset
+
900 " context=" + parseError
.preContext
;
902 if (gotError
== DATA
[i
].containsErrors
) {
903 logln(UnicodeString("Ok: ") + desc
);
905 errln(UnicodeString("FAIL: ") + desc
);
912 * Test segments and segment references.
914 void TransliteratorTest::TestSegments(void) {
916 // Each item is <rules>, <input>, <expected output>
917 UnicodeString DATA
[] = {
918 "([a-z]) '.' ([0-9]) > $2 '-' $1",
923 "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
927 int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
929 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
930 logln("Pattern: " + prettify(DATA
[i
]));
931 UParseError parseError
;
932 UErrorCode status
= U_ZERO_ERROR
;
933 Transliterator
*t
= Transliterator::createFromRules("ID", DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
934 if (U_FAILURE(status
)) {
935 errln("FAIL: RBT constructor");
937 expect(*t
, DATA
[i
+1], DATA
[i
+2]);
944 * Test cursor positioning outside of the key
946 void TransliteratorTest::TestCursorOffset(void) {
948 // Each item is <rules>, <input>, <expected output>
949 UnicodeString DATA
[] = {
950 "pre {alpha} post > | @ ALPHA ;"
952 "pre {beta} post > BETA @@ | ;"
955 "prealphapost prebetapost",
957 "prbetaxyz preBETApost",
959 int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
961 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
962 logln("Pattern: " + prettify(DATA
[i
]));
963 UParseError parseError
;
964 UErrorCode status
= U_ZERO_ERROR
;
965 Transliterator
*t
= Transliterator::createFromRules("<ID>", DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
966 if (U_FAILURE(status
)) {
967 errln("FAIL: RBT constructor");
969 expect(*t
, DATA
[i
+1], DATA
[i
+2]);
976 * Test zero length and > 1 char length variable values. Test
977 * use of variable refs in UnicodeSets.
979 void TransliteratorTest::TestArbitraryVariableValues(void) {
981 // Each item is <rules>, <input>, <expected output>
982 UnicodeString DATA
[] = {
1000 int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
1002 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
1003 logln("Pattern: " + prettify(DATA
[i
]));
1004 UParseError parseError
;
1005 UErrorCode status
= U_ZERO_ERROR
;
1006 Transliterator
*t
= Transliterator::createFromRules("<ID>", DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
1007 if (U_FAILURE(status
)) {
1008 errln("FAIL: RBT constructor");
1010 expect(*t
, DATA
[i
+1], DATA
[i
+2]);
1017 * Confirm that the contextStart, contextLimit, start, and limit
1018 * behave correctly. J474.
1020 void TransliteratorTest::TestPositionHandling(void) {
1021 // Array of 3n items
1022 // Each item is <rules>, <input>, <expected output>
1023 const char* DATA
[] = {
1024 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1025 "xtat txtb", // pos 0,9,0,9
1028 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1029 "xtat txtb", // pos 2,9,3,8
1032 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1033 "xtat txtb", // pos 3,8,3,8
1037 // Array of 4n positions -- these go with the DATA array
1038 // They are: contextStart, contextLimit, start, limit
1045 int32_t n
= UPRV_LENGTHOF(DATA
) / 3;
1046 for (int32_t i
=0; i
<n
; i
++) {
1047 UErrorCode status
= U_ZERO_ERROR
;
1048 UParseError parseError
;
1049 Transliterator
*t
= Transliterator::createFromRules("<ID>",
1050 DATA
[3*i
], UTRANS_FORWARD
, parseError
, status
);
1051 if (U_FAILURE(status
)) {
1053 errln("FAIL: RBT constructor");
1057 pos
.contextStart
= POS
[4*i
];
1058 pos
.contextLimit
= POS
[4*i
+1];
1059 pos
.start
= POS
[4*i
+2];
1060 pos
.limit
= POS
[4*i
+3];
1061 UnicodeString
rsource(DATA
[3*i
+1]);
1062 t
->transliterate(rsource
, pos
, status
);
1063 if (U_FAILURE(status
)) {
1065 errln("FAIL: transliterate");
1068 t
->finishTransliteration(rsource
, pos
);
1069 expectAux(DATA
[3*i
],
1078 * Test the Hiragana-Katakana transliterator.
1080 void TransliteratorTest::TestHiraganaKatakana(void) {
1081 UParseError parseError
;
1082 UErrorCode status
= U_ZERO_ERROR
;
1083 Transliterator
* hk
= Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD
, parseError
, status
);
1084 Transliterator
* kh
= Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD
, parseError
, status
);
1085 if (hk
== 0 || kh
== 0) {
1086 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
1092 // Array of 3n items
1093 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1094 const char* DATA
[] = {
1096 "\\u3042\\u3090\\u3099\\u3092\\u3050",
1097 "\\u30A2\\u30F8\\u30F2\\u30B0",
1100 "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1101 "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1103 int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
1105 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
1106 UnicodeString h
= CharsToUnicodeString(DATA
[i
+1]);
1107 UnicodeString k
= CharsToUnicodeString(DATA
[i
+2]);
1109 case 0x68: //'h': // Hiragana-Katakana
1112 case 0x6B: //'k': // Katakana-Hiragana
1115 case 0x62: //'b': // both
1126 * Test cloning / copy constructor of RBT.
1128 void TransliteratorTest::TestCopyJ476(void) {
1129 // The real test here is what happens when the destructors are
1130 // called. So we let one object get destructed, and check to
1131 // see that its copy still works.
1132 Transliterator
*t2
= 0;
1134 UParseError parseError
;
1135 UErrorCode status
= U_ZERO_ERROR
;
1136 Transliterator
*t1
= Transliterator::createFromRules("t1",
1137 "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD
, parseError
, status
);
1138 if (U_FAILURE(status
)) {
1139 errln("FAIL: RBT constructor");
1142 t2
= t1
->clone(); // Call copy constructor under the covers.
1143 expect(*t1
, "abcfoofoo", "ABcbar");
1146 expect(*t2
, "abcfoofoo", "ABcbar");
1151 * Test inter-Indic transliterators. These are composed.
1152 * ICU4C Jitterbug 483.
1154 void TransliteratorTest::TestInterIndic(void) {
1155 UnicodeString
ID("Devanagari-Gujarati", "");
1156 UErrorCode status
= U_ZERO_ERROR
;
1157 UParseError parseError
;
1158 Transliterator
* dg
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, parseError
, status
);
1160 dataerrln("FAIL: createInstance(" + ID
+ ") returned NULL - " + u_errorName(status
));
1163 UnicodeString id
= dg
->getID();
1165 errln("FAIL: createInstance(" + ID
+ ")->getID() => " + id
);
1167 UnicodeString dev
= CharsToUnicodeString("\\u0901\\u090B\\u0925");
1168 UnicodeString guj
= CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1169 expect(*dg
, dev
, guj
);
1174 * Test filter syntax in IDs. (J918)
1176 void TransliteratorTest::TestFilterIDs(void) {
1177 // Array of 3n strings:
1178 // <id>, <inverse id>, <input>, <expected output>
1179 const char* DATA
[] = {
1180 "[aeiou]Any-Hex", // ID
1181 "[aeiou]Hex-Any", // expected inverse ID
1183 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1185 "[aeiou]Any-Hex;[^5]Hex-Any",
1186 "[^5]Any-Hex;[aeiou]Hex-Any",
1195 enum { DATA_length
= UPRV_LENGTHOF(DATA
) };
1197 for (int i
=0; i
<DATA_length
; i
+=4) {
1198 UnicodeString
ID(DATA
[i
], "");
1199 UnicodeString
uID(DATA
[i
+1], "");
1200 UnicodeString
data2(DATA
[i
+2], "");
1201 UnicodeString
data3(DATA
[i
+3], "");
1202 UParseError parseError
;
1203 UErrorCode status
= U_ZERO_ERROR
;
1204 Transliterator
*t
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, parseError
, status
);
1206 errln("FAIL: createInstance(" + ID
+ ") returned NULL");
1209 expect(*t
, data2
, data3
);
1212 if (ID
!= t
->getID()) {
1213 errln("FAIL: createInstance(" + ID
+ ").getID() => " +
1217 // Check the inverse
1218 Transliterator
*u
= t
->createInverse(status
);
1220 errln("FAIL: " + ID
+ ".createInverse() returned NULL");
1221 } else if (u
->getID() != uID
) {
1222 errln("FAIL: " + ID
+ ".createInverse().getID() => " +
1223 u
->getID() + ", expected " + uID
);
1232 * Test the case mapping transliterators.
1234 void TransliteratorTest::TestCaseMap(void) {
1235 UParseError parseError
;
1236 UErrorCode status
= U_ZERO_ERROR
;
1237 Transliterator
* toUpper
=
1238 Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
);
1239 Transliterator
* toLower
=
1240 Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
);
1241 Transliterator
* toTitle
=
1242 Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
);
1243 if (toUpper
==0 || toLower
==0 || toTitle
==0) {
1244 errln("FAIL: createInstance returned NULL");
1251 expect(*toUpper
, "The quick brown fox jumped over the lazy dogs.",
1252 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1253 expect(*toLower
, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1254 "the quick brown foX jumped over the lazY dogs.");
1255 expect(*toTitle
, "the quick brown foX can't jump over the laZy dogs.",
1256 "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1264 * Test the name mapping transliterators.
1266 void TransliteratorTest::TestNameMap(void) {
1267 UParseError parseError
;
1268 UErrorCode status
= U_ZERO_ERROR
;
1269 Transliterator
* uni2name
=
1270 Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD
, parseError
, status
);
1271 Transliterator
* name2uni
=
1272 Transliterator::createInstance("Name-Any", UTRANS_FORWARD
, parseError
, status
);
1273 if (uni2name
==0 || name2uni
==0) {
1274 errln("FAIL: createInstance returned NULL");
1280 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1281 expect(*uni2name
, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1282 CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1283 expect(*name2uni
, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1284 CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1291 Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD
, parseError
, status
);
1293 errln("FAIL: createInstance returned NULL");
1298 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1299 UnicodeString s
= CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1305 * Test liberalized ID syntax. 1006c
1307 void TransliteratorTest::TestLiberalizedID(void) {
1308 // Some test cases have an expected getID() value of NULL. This
1309 // means I have disabled the test case for now. This stuff is
1310 // still under development, and I haven't decided whether to make
1311 // getID() return canonical case yet. It will all get rewritten
1312 // with the move to Source-Target/Variant IDs anyway. [aliu]
1313 const char* DATA
[] = {
1314 "latin-greek", NULL
/*"Latin-Greek"*/, "case insensitivity",
1315 " Null ", "Null", "whitespace",
1316 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",
1317 " null ; latin-greek ", NULL
/*"Null;Latin-Greek"*/, "compound whitespace",
1319 const int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
1320 UParseError parseError
;
1321 UErrorCode status
= U_ZERO_ERROR
;
1322 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
1323 Transliterator
*t
= Transliterator::createInstance(DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
1325 dataerrln(UnicodeString("FAIL: ") + DATA
[i
+2] +
1326 " cannot create ID \"" + DATA
[i
] + "\" - " + u_errorName(status
));
1330 exp
= UnicodeString(DATA
[i
+1], "");
1332 // Don't worry about getID() if the expected char*
1333 // is NULL -- see above.
1334 if (exp
.length() == 0 || exp
== t
->getID()) {
1335 logln(UnicodeString("Ok: ") + DATA
[i
+2] +
1336 " create ID \"" + DATA
[i
] + "\" => \"" +
1339 errln(UnicodeString("FAIL: ") + DATA
[i
+2] +
1340 " create ID \"" + DATA
[i
] + "\" => \"" +
1341 t
->getID() + "\", exp \"" + exp
+ "\"");
1348 /* test for Jitterbug 912 */
1349 void TransliteratorTest::TestCreateInstance(){
1350 const char* FORWARD
= "F";
1351 const char* REVERSE
= "R";
1352 const char* DATA
[] = {
1354 // Column 2: direction
1355 // Column 3: expected ID, or "" if expect failure
1356 "Latin-Hangul", REVERSE
, "Hangul-Latin", // JB#912
1358 // JB#2689: bad compound causes crash
1359 "InvalidSource-InvalidTarget", FORWARD
, "",
1360 "InvalidSource-InvalidTarget", REVERSE
, "",
1361 "Hex-Any;InvalidSource-InvalidTarget", FORWARD
, "",
1362 "Hex-Any;InvalidSource-InvalidTarget", REVERSE
, "",
1363 "InvalidSource-InvalidTarget;Hex-Any", FORWARD
, "",
1364 "InvalidSource-InvalidTarget;Hex-Any", REVERSE
, "",
1369 for (int32_t i
=0; DATA
[i
]; i
+=3) {
1371 UErrorCode ec
= U_ZERO_ERROR
;
1372 UnicodeString
id(DATA
[i
]);
1373 UTransDirection dir
= (DATA
[i
+1]==FORWARD
)?
1374 UTRANS_FORWARD
:UTRANS_REVERSE
;
1375 UnicodeString
expID(DATA
[i
+2]);
1377 Transliterator::createInstance(id
,dir
,err
,ec
);
1378 UnicodeString newID
;
1382 UBool ok
= (newID
== expID
);
1384 newID
= u_errorName(ec
);
1387 logln((UnicodeString
)"Ok: createInstance(" +
1388 id
+ "," + DATA
[i
+1] + ") => " + newID
);
1390 dataerrln((UnicodeString
)"FAIL: createInstance(" +
1391 id
+ "," + DATA
[i
+1] + ") => " + newID
+
1392 ", expected " + expID
);
1399 * Test the normalization transliterator.
1401 void TransliteratorTest::TestNormalizationTransliterator() {
1402 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1403 // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1404 const char* CANON
[] = {
1405 // Input Decomposed Composed
1406 "cat", "cat", "cat" ,
1407 "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" ,
1409 "\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above
1410 "D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above
1412 "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above
1413 "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below
1414 "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above
1416 "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1417 "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1419 "\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave
1420 "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave
1421 "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron
1423 "\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign
1424 "\\u00c5", "A\\u030a", "\\u00c5" , // A-ring
1426 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0
1427 "\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0
1429 "Henry IV", "Henry IV", "Henry IV" ,
1430 "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" ,
1432 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1433 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1434 "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten
1435 "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten
1436 "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten
1438 "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" ,
1442 const char* COMPAT
[] = {
1443 // Input Decomposed Composed
1444 "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed
1446 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0
1447 "\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i
1449 "Henry IV", "Henry IV", "Henry IV" ,
1450 "Henry \\u2163", "Henry IV", "Henry IV" ,
1452 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1453 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1455 "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten
1460 UParseError parseError
;
1461 UErrorCode status
= U_ZERO_ERROR
;
1462 Transliterator
* NFD
= Transliterator::createInstance("NFD", UTRANS_FORWARD
, parseError
, status
);
1463 Transliterator
* NFC
= Transliterator::createInstance("NFC", UTRANS_FORWARD
, parseError
, status
);
1465 dataerrln("FAIL: createInstance failed: %s", u_errorName(status
));
1470 for (i
=0; CANON
[i
]; i
+=3) {
1471 UnicodeString in
= CharsToUnicodeString(CANON
[i
]);
1472 UnicodeString expd
= CharsToUnicodeString(CANON
[i
+1]);
1473 UnicodeString expc
= CharsToUnicodeString(CANON
[i
+2]);
1474 expect(*NFD
, in
, expd
);
1475 expect(*NFC
, in
, expc
);
1480 Transliterator
* NFKD
= Transliterator::createInstance("NFKD", UTRANS_FORWARD
, parseError
, status
);
1481 Transliterator
* NFKC
= Transliterator::createInstance("NFKC", UTRANS_FORWARD
, parseError
, status
);
1482 if (!NFKD
|| !NFKC
) {
1483 dataerrln("FAIL: createInstance failed");
1488 for (i
=0; COMPAT
[i
]; i
+=3) {
1489 UnicodeString in
= CharsToUnicodeString(COMPAT
[i
]);
1490 UnicodeString expkd
= CharsToUnicodeString(COMPAT
[i
+1]);
1491 UnicodeString expkc
= CharsToUnicodeString(COMPAT
[i
+2]);
1492 expect(*NFKD
, in
, expkd
);
1493 expect(*NFKC
, in
, expkc
);
1499 status
= U_ZERO_ERROR
;
1500 Transliterator
*t
= Transliterator::createInstance("NFD; [x]Remove",
1504 errln("FAIL: createInstance failed");
1506 expect(*t
, CharsToUnicodeString("\\u010dx"),
1507 CharsToUnicodeString("c\\u030C"));
1512 * Test compound RBT rules.
1514 void TransliteratorTest::TestCompoundRBT(void) {
1515 // Careful with spacing and ';' here: Phrase this exactly
1516 // as toRules() is going to return it. If toRules() changes
1517 // with regard to spacing or ';', then adjust this string.
1518 UnicodeString
rule("::Hex-Any;\n"
1522 "::[^t]Any-Upper;", "");
1523 UParseError parseError
;
1524 UErrorCode status
= U_ZERO_ERROR
;
1525 Transliterator
*t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, parseError
, status
);
1527 errln("FAIL: createFromRules failed");
1530 expect(*t
, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1531 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1533 t
->toRules(r
, TRUE
);
1535 logln((UnicodeString
)"OK: toRules() => " + r
);
1537 errln((UnicodeString
)"FAIL: toRules() => " + r
+
1538 ", expected " + rule
);
1543 t
= Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD
, parseError
, status
);
1545 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
1548 UnicodeString
exp("::Greek-Latin;\n::Latin-Cyrillic;");
1549 t
->toRules(r
, TRUE
);
1551 errln((UnicodeString
)"FAIL: toRules() => " + r
+
1552 ", expected " + exp
);
1554 logln((UnicodeString
)"OK: toRules() => " + r
);
1558 // Round trip the result of toRules
1559 t
= Transliterator::createFromRules("Test", r
, UTRANS_FORWARD
, parseError
, status
);
1561 errln("FAIL: createFromRules #2 failed");
1564 logln((UnicodeString
)"OK: createFromRules(" + r
+ ") succeeded");
1567 // Test toRules again
1568 t
->toRules(r
, TRUE
);
1570 errln((UnicodeString
)"FAIL: toRules() => " + r
+
1571 ", expected " + exp
);
1573 logln((UnicodeString
)"OK: toRules() => " + r
);
1578 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform
1579 // to what the regenerated ID will look like.
1580 UnicodeString
id("Upper(Lower);(NFKC)", "");
1581 t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, parseError
, status
);
1583 errln("FAIL: createInstance #2 failed");
1586 if (t
->getID() == id
) {
1587 logln((UnicodeString
)"OK: created " + id
);
1589 errln((UnicodeString
)"FAIL: createInstance(" + id
+
1590 ").getID() => " + t
->getID());
1593 Transliterator
*u
= t
->createInverse(status
);
1595 errln("FAIL: createInverse failed");
1599 exp
= "NFKC();Lower(Upper)";
1600 if (u
->getID() == exp
) {
1601 logln((UnicodeString
)"OK: createInverse(" + id
+ ") => " +
1604 errln((UnicodeString
)"FAIL: createInverse(" + id
+ ") => " +
1612 * Compound filter semantics were orginially not implemented
1613 * correctly. Originally, each component filter f(i) is replaced by
1614 * f'(i) = f(i) && g, where g is the filter for the compound
1619 * Suppose and I have a transliterator X. Internally X is
1620 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1622 * The compound should convert all greek characters (through latin) to
1623 * cyrillic, then lowercase the result. The filter should say "don't
1624 * touch 'A' in the original". But because an intermediate result
1625 * happens to go through "A", the Greek Alpha gets hung up.
1627 void TransliteratorTest::TestCompoundFilter(void) {
1628 UParseError parseError
;
1629 UErrorCode status
= U_ZERO_ERROR
;
1630 Transliterator
*t
= Transliterator::createInstance
1631 ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD
, parseError
, status
);
1633 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
1636 t
->adoptFilter(new UnicodeSet("[^A]", status
));
1637 if (U_FAILURE(status
)) {
1638 errln("FAIL: UnicodeSet ct failed");
1643 // Only the 'A' at index 1 should remain unchanged
1645 CharsToUnicodeString("BA\\u039A\\u0391"),
1646 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1650 void TransliteratorTest::TestRemove(void) {
1651 UParseError parseError
;
1652 UErrorCode status
= U_ZERO_ERROR
;
1653 Transliterator
*t
= Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD
, parseError
, status
);
1655 errln("FAIL: createInstance failed");
1659 expect(*t
, "Able bodied baker's cats", "Ale odied ker's ts");
1661 // extra test for RemoveTransliterator::clone(), which at one point wasn't
1662 // duplicating the filter
1663 Transliterator
* t2
= t
->clone();
1664 expect(*t2
, "Able bodied baker's cats", "Ale odied ker's ts");
1670 void TransliteratorTest::TestToRules(void) {
1671 const char* RBT
= "rbt";
1672 const char* SET
= "set";
1673 static const char* DATA
[] = {
1675 "$a=\\u4E61; [$a] > A;",
1679 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1680 "[[:Zs:][:Zl:]]{a} > A;",
1707 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1708 "[^[:Zs:]]{a} > A;",
1711 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1712 "[[a-z]-[:Zs:]]{a} > A;",
1715 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1716 "[[:Zs:]&[a-z]]{a} > A;",
1719 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1720 "[x[:Zs:]]{a} > A;",
1723 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1724 "$macron = \\u0304 ;"
1725 "$evowel = [aeiouyAEIOUY] ;"
1726 "$iotasub = \\u0345 ;"
1727 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1728 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1731 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1732 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1734 static const int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
1736 for (int32_t d
=0; d
< DATA_length
; d
+=3) {
1737 if (DATA
[d
] == RBT
) {
1738 // Transliterator test
1739 UParseError parseError
;
1740 UErrorCode status
= U_ZERO_ERROR
;
1741 Transliterator
*t
= Transliterator::createFromRules("ID",
1742 UnicodeString(DATA
[d
+1], -1, US_INV
), UTRANS_FORWARD
, parseError
, status
);
1744 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status
));
1747 UnicodeString rules
, escapedRules
;
1748 t
->toRules(rules
, FALSE
);
1749 t
->toRules(escapedRules
, TRUE
);
1750 UnicodeString expRules
= CharsToUnicodeString(DATA
[d
+2]);
1751 UnicodeString
expEscapedRules(DATA
[d
+2], -1, US_INV
);
1752 if (rules
== expRules
) {
1753 logln((UnicodeString
)"Ok: " + UnicodeString(DATA
[d
+1], -1, US_INV
) +
1756 errln((UnicodeString
)"FAIL: " + UnicodeString(DATA
[d
+1], -1, US_INV
) +
1757 " => " + rules
+ ", exp " + expRules
);
1759 if (escapedRules
== expEscapedRules
) {
1760 logln((UnicodeString
)"Ok: " + UnicodeString(DATA
[d
+1], -1, US_INV
) +
1761 " => " + escapedRules
);
1763 errln((UnicodeString
)"FAIL: " + UnicodeString(DATA
[d
+1], -1, US_INV
) +
1764 " => " + escapedRules
+ ", exp " + expEscapedRules
);
1770 UErrorCode status
= U_ZERO_ERROR
;
1771 UnicodeString
pat(DATA
[d
+1], -1, US_INV
);
1772 UnicodeString
expToPat(DATA
[d
+2], -1, US_INV
);
1773 UnicodeSet
set(pat
, status
);
1774 if (U_FAILURE(status
)) {
1775 errln("FAIL: UnicodeSet ct failed");
1778 // Adjust spacing etc. as necessary.
1779 UnicodeString toPat
;
1780 set
.toPattern(toPat
);
1781 if (expToPat
== toPat
) {
1782 logln((UnicodeString
)"Ok: " + pat
+
1785 errln((UnicodeString
)"FAIL: " + pat
+
1786 " => " + prettify(toPat
, TRUE
) +
1787 ", exp " + prettify(pat
, TRUE
));
1793 void TransliteratorTest::TestContext() {
1794 UTransPosition pos
= {0, 2, 0, 1}; // cs cl s l
1795 expect("de > x; {d}e > y;",
1800 expect("ab{c} > z;",
1805 void TransliteratorTest::TestSupplemental() {
1807 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1809 CharsToUnicodeString("ab\\U0001030Fx"),
1810 CharsToUnicodeString("\\U00010300bix"));
1812 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1813 "$b=[A-Z\\U00010400-\\U0001044D];"
1814 "($a)($b) > $2 $1;"),
1815 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1816 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1818 // k|ax\\U00010300xm
1820 // k|a\\U00010400\\U00010300xm
1821 // ky|\\U00010400\\U00010300xm
1822 // ky\\U00010400|\\U00010300xm
1824 // ky\\U00010400|\\U00010300\\U00010400m
1825 // ky\\U00010400y|\\U00010400m
1826 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1827 "$a {x} > | @ \\U00010400;"
1828 "{$a} [^\\u0000-\\uFFFF] > y;"),
1829 CharsToUnicodeString("kax\\U00010300xm"),
1830 CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1833 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1834 UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1836 expectT("Any-Hex/Unicode",
1837 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1838 UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1840 expectT("Any-Hex/C",
1841 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1842 UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1844 expectT("Any-Hex/Perl",
1845 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1846 UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1848 expectT("Any-Hex/Java",
1849 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1850 UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1852 expectT("Any-Hex/XML",
1853 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1854 "𐌰􏼀󠁡 ");
1856 expectT("Any-Hex/XML10",
1857 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1858 "𐌰􏼀󠁡 ");
1860 expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1861 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1862 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1865 void TransliteratorTest::TestQuantifier() {
1867 // Make sure @ in a quantified anteContext works
1868 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1872 // Make sure @ in a quantified postContext works
1873 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1877 // Make sure @ in a quantified postContext with seg ref works
1878 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1882 // Make sure @ past ante context doesn't enter ante context
1883 UTransPosition pos
= {0, 5, 3, 5};
1884 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1889 // Make sure @ past post context doesn't pass limit
1890 UTransPosition pos2
= {0, 4, 0, 2};
1891 expect("{b} a+ > c @@ |; x > y; a > A;",
1896 // Make sure @ past post context doesn't enter post context
1897 expect("{b} a+ > c @@ |; x > y; a > A;",
1901 expect("(ab)? c > d;",
1905 // NOTE: The (ab)+ when referenced just yields a single "ab",
1906 // not the full sequence of them. This accords with perl behavior.
1907 expect("(ab)+ {x} > '(' $1 ')';",
1909 "x ab(ab) abab(ab)y");
1912 "ac abc abbc abbbc",
1915 expect("[abc]+ > x;",
1916 "qac abrc abbcs abtbbc",
1919 expect("q{(ab)+} > x;",
1920 "qa qab qaba qababc qaba",
1921 "qa qx qxa qxc qxa");
1923 expect("q(ab)* > x;",
1924 "qa qab qaba qababc",
1927 // NOTE: The (ab)+ when referenced just yields a single "ab",
1928 // not the full sequence of them. This accords with perl behavior.
1929 expect("q(ab)* > '(' $1 ')';",
1930 "qa qab qaba qababc",
1931 "()a (ab) (ab)a (ab)c");
1933 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1935 expect("'ab'+ > x;",
1939 // $foo+ and $foo* -- the quantifier should apply to the entire
1940 // variable reference
1941 expect("$var = ab; $var+ > x;",
1946 class TestTrans
: public Transliterator
{
1948 TestTrans(const UnicodeString
& id
) : Transliterator(id
, 0) {
1950 virtual Transliterator
* clone(void) const {
1951 return new TestTrans(getID());
1953 virtual void handleTransliterate(Replaceable
& /*text*/, UTransPosition
& offsets
,
1954 UBool
/*isIncremental*/) const
1956 offsets
.start
= offsets
.limit
;
1958 virtual UClassID
getDynamicClassID() const;
1959 static UClassID U_EXPORT2
getStaticClassID();
1961 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans
)
1964 * Test Source-Target/Variant.
1966 void TransliteratorTest::TestSTV(void) {
1967 int32_t ns
= Transliterator::countAvailableSources();
1968 if (ns
< 0 || ns
> 255) {
1969 errln((UnicodeString
)"FAIL: Bad source count: " + ns
);
1973 for (i
=0; i
<ns
; ++i
) {
1974 UnicodeString source
;
1975 Transliterator::getAvailableSource(i
, source
);
1976 logln((UnicodeString
)"" + i
+ ": " + source
);
1977 if (source
.length() == 0) {
1978 errln("FAIL: empty source");
1981 int32_t nt
= Transliterator::countAvailableTargets(source
);
1982 if (nt
< 0 || nt
> 255) {
1983 errln((UnicodeString
)"FAIL: Bad target count: " + nt
);
1986 for (int32_t j
=0; j
<nt
; ++j
) {
1987 UnicodeString target
;
1988 Transliterator::getAvailableTarget(j
, source
, target
);
1989 logln((UnicodeString
)" " + j
+ ": " + target
);
1990 if (target
.length() == 0) {
1991 errln("FAIL: empty target");
1994 int32_t nv
= Transliterator::countAvailableVariants(source
, target
);
1995 if (nv
< 0 || nv
> 255) {
1996 errln((UnicodeString
)"FAIL: Bad variant count: " + nv
);
1999 for (int32_t k
=0; k
<nv
; ++k
) {
2000 UnicodeString variant
;
2001 Transliterator::getAvailableVariant(k
, source
, target
, variant
);
2002 if (variant
.length() == 0) {
2003 logln((UnicodeString
)" " + k
+ ": <empty>");
2005 logln((UnicodeString
)" " + k
+ ": " + variant
);
2011 // Test registration
2012 const char* IDS
[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2013 const char* FULL_IDS
[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2014 const char* SOURCES
[] = { NULL
, "Seoridf", "Oewoir" };
2015 for (i
=0; i
<3; ++i
) {
2016 Transliterator
*t
= new TestTrans(IDS
[i
]);
2018 errln("FAIL: out of memory");
2021 if (t
->getID() != IDS
[i
]) {
2022 errln((UnicodeString
)"FAIL: ID mismatch for " + IDS
[i
]);
2026 Transliterator::registerInstance(t
);
2027 UErrorCode status
= U_ZERO_ERROR
;
2028 t
= Transliterator::createInstance(IDS
[i
], UTRANS_FORWARD
, status
);
2030 errln((UnicodeString
)"FAIL: Registration/creation failed for ID " +
2033 logln((UnicodeString
)"Ok: Registration/creation succeeded for ID " +
2037 Transliterator::unregister(IDS
[i
]);
2038 t
= Transliterator::createInstance(IDS
[i
], UTRANS_FORWARD
, status
);
2040 errln((UnicodeString
)"FAIL: Unregistration failed for ID " +
2046 // Make sure getAvailable API reflects removal
2047 int32_t n
= Transliterator::countAvailableIDs();
2048 for (i
=0; i
<n
; ++i
) {
2049 UnicodeString id
= Transliterator::getAvailableID(i
);
2050 for (j
=0; j
<3; ++j
) {
2051 if (id
.caseCompare(FULL_IDS
[j
],0)==0) {
2052 errln((UnicodeString
)"FAIL: unregister(" + id
+ ") failed");
2056 n
= Transliterator::countAvailableTargets("Any");
2057 for (i
=0; i
<n
; ++i
) {
2059 Transliterator::getAvailableTarget(i
, "Any", t
);
2060 if (t
.caseCompare(IDS
[0],0)==0) {
2061 errln((UnicodeString
)"FAIL: unregister(Any-" + t
+ ") failed");
2064 n
= Transliterator::countAvailableSources();
2065 for (i
=0; i
<n
; ++i
) {
2067 Transliterator::getAvailableSource(i
, s
);
2068 for (j
=0; j
<3; ++j
) {
2069 if (SOURCES
[j
] == NULL
) continue;
2070 if (s
.caseCompare(SOURCES
[j
],0)==0) {
2071 errln((UnicodeString
)"FAIL: unregister(" + s
+ "-*) failed");
2078 * Test inverse of Greek-Latin; Title()
2080 void TransliteratorTest::TestCompoundInverse(void) {
2081 UParseError parseError
;
2082 UErrorCode status
= U_ZERO_ERROR
;
2083 Transliterator
*t
= Transliterator::createInstance
2084 ("Greek-Latin; Title()", UTRANS_REVERSE
,parseError
, status
);
2086 dataerrln("FAIL: createInstance - %s", u_errorName(status
));
2089 UnicodeString
exp("(Title);Latin-Greek");
2090 if (t
->getID() == exp
) {
2091 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2094 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2095 t
->getID() + "\", expected \"" + exp
+ "\"");
2101 * Test NFD chaining with RBT
2103 void TransliteratorTest::TestNFDChainRBT() {
2105 UErrorCode ec
= U_ZERO_ERROR
;
2106 Transliterator
* t
= Transliterator::createFromRules(
2107 "TEST", "::NFD; aa > Q; a > q;",
2108 UTRANS_FORWARD
, pe
, ec
);
2109 if (t
== NULL
|| U_FAILURE(ec
)) {
2110 dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec
));
2113 expect(*t
, "aa", "Q");
2116 // TEMPORARY TESTS -- BEING DEBUGGED
2117 //=- UnicodeString s, s2;
2118 //=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2119 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2120 //=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2121 //=- expect(*t, s, s2);
2124 //=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2125 //=- expect(*t, s2, s);
2128 //=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2129 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2130 //=- expect(*t, s, s);
2133 // const char* source[] = {
2135 // "\\u015Br\\u012Bmad",
2136 // "bhagavadg\\u012Bt\\u0101",
2139 // "vi\\u1E63\\u0101da",
2141 // "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2142 // "uv\\u0101cr\\u0325",
2144 // "rmk\\u1E63\\u0113t",
2145 // //"dharmak\\u1E63\\u0113tr\\u0113",
2147 // "kuruk\\u1E63\\u0113tr\\u0113",
2148 // "samav\\u0113t\\u0101",
2149 // "yuyutsava-\\u1E25",
2150 // "m\\u0101mak\\u0101-\\u1E25",
2151 // // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2153 // "san\\u0304java",
2158 // const char* expected[] = {
2160 // "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2161 // "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2162 // "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2163 // "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2164 // "\\u0935\\u093f\\u0937\\u093e\\u0926",
2165 // "\\u092f\\u094b\\u0917",
2166 // "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2167 // "\\u0909\\u0935\\u093E\\u091A\\u0943",
2170 // //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2172 // "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2173 // "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2174 // "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2175 // "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2176 // // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2177 // "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2178 // "\\u0938\\u0902\\u091c\\u0935",
2182 // UErrorCode status = U_ZERO_ERROR;
2183 // UParseError parseError;
2184 // UnicodeString message;
2185 // Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2186 // Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2187 // if(U_FAILURE(status)){
2188 // errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2189 // errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2190 // delete latinToDevToLatin;
2191 // delete devToLatinToDev;
2194 // UnicodeString gotResult;
2195 // for(int i= 0; source[i] != 0; i++){
2196 // gotResult = source[i];
2197 // expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2198 // expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2200 // delete latinToDevToLatin;
2201 // delete devToLatinToDev;
2205 * Inverse of "Null" should be "Null". (J21)
2207 void TransliteratorTest::TestNullInverse() {
2209 UErrorCode ec
= U_ZERO_ERROR
;
2210 Transliterator
*t
= Transliterator::createInstance("Null", UTRANS_FORWARD
, pe
, ec
);
2211 if (t
== 0 || U_FAILURE(ec
)) {
2212 errln("FAIL: createInstance");
2215 Transliterator
*u
= t
->createInverse(ec
);
2216 if (u
== 0 || U_FAILURE(ec
)) {
2217 errln("FAIL: createInverse");
2221 if (u
->getID() != "Null") {
2222 errln("FAIL: Inverse of Null should be Null");
2229 * Check ID of inverse of alias. (J22)
2231 void TransliteratorTest::TestAliasInverseID() {
2232 UnicodeString
ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2234 UErrorCode ec
= U_ZERO_ERROR
;
2235 Transliterator
*t
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
);
2236 if (t
== 0 || U_FAILURE(ec
)) {
2237 dataerrln("FAIL: createInstance - %s", u_errorName(ec
));
2240 Transliterator
*u
= t
->createInverse(ec
);
2241 if (u
== 0 || U_FAILURE(ec
)) {
2242 errln("FAIL: createInverse");
2246 UnicodeString exp
= "Hangul-Latin";
2247 UnicodeString got
= u
->getID();
2249 errln((UnicodeString
)"FAIL: Inverse of " + ID
+ " is " + got
+
2250 ", expected " + exp
);
2257 * Test IDs of inverses of compound transliterators. (J20)
2259 void TransliteratorTest::TestCompoundInverseID() {
2260 UnicodeString ID
= "Latin-Jamo;NFC(NFD)";
2262 UErrorCode ec
= U_ZERO_ERROR
;
2263 Transliterator
*t
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
);
2264 if (t
== 0 || U_FAILURE(ec
)) {
2265 dataerrln("FAIL: createInstance - %s", u_errorName(ec
));
2268 Transliterator
*u
= t
->createInverse(ec
);
2269 if (u
== 0 || U_FAILURE(ec
)) {
2270 errln("FAIL: createInverse");
2274 UnicodeString exp
= "NFD(NFC);Jamo-Latin";
2275 UnicodeString got
= u
->getID();
2277 errln((UnicodeString
)"FAIL: Inverse of " + ID
+ " is " + got
+
2278 ", expected " + exp
);
2285 * Test undefined variable.
2288 void TransliteratorTest::TestUndefinedVariable() {
2289 UnicodeString rule
= "$initial } a <> \\u1161;";
2291 UErrorCode ec
= U_ZERO_ERROR
;
2292 Transliterator
*t
= Transliterator::createFromRules("<ID>", rule
, UTRANS_FORWARD
, pe
, ec
);
2294 if (U_FAILURE(ec
)) {
2295 logln((UnicodeString
)"OK: Got exception for " + rule
+ ", as expected: " +
2299 errln((UnicodeString
)"Fail: bogus rule " + rule
+ " compiled with error " +
2304 * Test empty context.
2306 void TransliteratorTest::TestEmptyContext() {
2307 expect(" { a } > b;", "xay a ", "xby b ");
2311 * Test compound filter ID syntax
2313 void TransliteratorTest::TestCompoundFilterID(void) {
2314 static const char* DATA
[] = {
2315 // Col. 1 = ID or rule set (latter must start with #)
2317 // = columns > 1 are null if expect col. 1 to be illegal =
2319 // Col. 2 = direction, "F..." or "R..."
2320 // Col. 3 = source string
2321 // Col. 4 = exp result
2323 "[abc]; [abc]", NULL
, NULL
, NULL
, // multiple filters
2324 "Latin-Greek; [abc];", NULL
, NULL
, NULL
, // misplaced filter
2325 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2326 "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2327 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2328 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2332 for (int32_t i
=0; DATA
[i
]; i
+=4) {
2333 UnicodeString id
= CharsToUnicodeString(DATA
[i
]);
2334 UTransDirection direction
= (DATA
[i
+1] != NULL
&& DATA
[i
+1][0] == 'R') ?
2335 UTRANS_REVERSE
: UTRANS_FORWARD
;
2336 UnicodeString source
;
2338 if (DATA
[i
+2] != NULL
) {
2339 source
= CharsToUnicodeString(DATA
[i
+2]);
2340 exp
= CharsToUnicodeString(DATA
[i
+3]);
2342 UBool expOk
= (DATA
[i
+1] != NULL
);
2343 Transliterator
* t
= NULL
;
2345 UErrorCode ec
= U_ZERO_ERROR
;
2346 if (id
.charAt(0) == 0x23/*#*/) {
2347 t
= Transliterator::createFromRules("ID", id
, direction
, pe
, ec
);
2349 t
= Transliterator::createInstance(id
, direction
, pe
, ec
);
2351 UBool ok
= (t
!= NULL
&& U_SUCCESS(ec
));
2352 UnicodeString transID
;
2354 transID
= t
->getID();
2357 transID
= UnicodeString("NULL", "");
2360 logln((UnicodeString
)"Ok: " + id
+ " => " + transID
+ ", " +
2362 if (source
.length() != 0) {
2363 expect(*t
, source
, exp
);
2367 dataerrln((UnicodeString
)"FAIL: " + id
+ " => " + transID
+ ", " +
2374 * Test new property set syntax
2376 void TransliteratorTest::TestPropertySet() {
2377 expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2378 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2379 "[ a stitch ]\n[ in time ]\r[ saves 9]");
2383 * Test various failure points of the new 2.0 engine.
2385 void TransliteratorTest::TestNewEngine() {
2387 UErrorCode ec
= U_ZERO_ERROR
;
2388 Transliterator
*t
= Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD
, pe
, ec
);
2389 if (t
== 0 || U_FAILURE(ec
)) {
2390 dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec
));
2393 // Katakana should be untouched
2394 expect(*t
, CharsToUnicodeString("a\\u3042\\u30A2"),
2395 CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2400 // This test will only work if Transliterator.ROLLBACK is
2401 // true. Otherwise, this test will fail, revealing a
2402 // limitation of global filters in incremental mode.
2404 Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD
, pe
, ec
);
2406 Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD
, pe
, ec
);
2407 if (U_FAILURE(ec
)) {
2413 Transliterator
* array
[3];
2415 array
[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD
, pe
, ec
);
2417 if (U_FAILURE(ec
)) {
2418 errln("FAIL: createInstance NFD");
2425 t
= new CompoundTransliterator(array
, 3, new UnicodeSet("[:Ll:]", ec
));
2426 if (U_FAILURE(ec
)) {
2427 errln("FAIL: UnicodeSet constructor");
2435 expect(*t
, "aAaA", "bAbA");
2437 assertTrue("countElements", t
->countElements() == 3);
2438 assertEquals("getElement(0)", t
->getElement(0, ec
).getID(), "a_to_A");
2439 assertEquals("getElement(1)", t
->getElement(1, ec
).getID(), "NFD");
2440 assertEquals("getElement(2)", t
->getElement(2, ec
).getID(), "A_to_b");
2441 assertSuccess("getElement", ec
);
2449 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2453 UnicodeString gr
= CharsToUnicodeString(
2455 "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2456 "$rough = \\u0314 ;"
2457 "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2461 expect(gr
, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2465 * Test quantified segment behavior. We want:
2466 * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2468 void TransliteratorTest::TestQuantifiedSegment(void) {
2470 expect("([abc]+) > x $1 x;", "cba", "xcbax");
2472 // The tricky case; the quantifier is around the segment
2473 expect("([abc])+ > x $1 x;", "cba", "xax");
2475 // Tricky case in reverse direction
2476 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2478 // Check post-context segment
2479 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2481 // Test toRule/toPattern for non-quantified segment.
2482 // Careful with spacing here.
2483 UnicodeString
r("([a-c]){q} > x $1 x;");
2485 UErrorCode ec
= U_ZERO_ERROR
;
2486 Transliterator
* t
= Transliterator::createFromRules("ID", r
, UTRANS_FORWARD
, pe
, ec
);
2487 if (U_FAILURE(ec
)) {
2488 errln("FAIL: createFromRules");
2493 t
->toRules(rr
, TRUE
);
2495 errln((UnicodeString
)"FAIL: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2497 logln((UnicodeString
)"Ok: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2501 // Test toRule/toPattern for quantified segment.
2502 // Careful with spacing here.
2503 r
= "([a-c])+{q} > x $1 x;";
2504 t
= Transliterator::createFromRules("ID", r
, UTRANS_FORWARD
, pe
, ec
);
2505 if (U_FAILURE(ec
)) {
2506 errln("FAIL: createFromRules");
2510 t
->toRules(rr
, TRUE
);
2512 errln((UnicodeString
)"FAIL: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2514 logln((UnicodeString
)"Ok: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2519 //======================================================================
2521 //======================================================================
2522 void TransliteratorTest::TestDevanagariLatinRT(){
2523 const int MAX_LEN
= 52;
2524 const char* const source
[MAX_LEN
] = {
2539 //"r\\u0323ya", // \u095c is not valid in Devanagari
2565 "\\u1E6Dh\\u1E6Dha",
2572 // Not roundtrippable --
2573 // \\u0939\\u094d\\u094d\\u092E - hma
2574 // \\u0939\\u094d\\u092E - hma
2575 // CharsToUnicodeString("hma"),
2580 "san\\u0304j\\u012Bb s\\u0113nagupta",
2581 "\\u0101nand vaddir\\u0101ju",
2585 const char* const expected
[MAX_LEN
] = {
2586 "\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */
2587 "\\u0915\\u094D\\u0930", /* kra */
2588 "\\u0915\\u094D\\u0937", /* ks\\u0323a */
2589 "\\u0916\\u094D\\u0930", /* khra */
2590 "\\u0917\\u094D\\u0930", /* gra */
2591 "\\u0919\\u094D\\u0930", /* n\\u0307ra */
2592 "\\u091A\\u094D\\u0930", /* cra */
2593 "\\u091B\\u094D\\u0930", /* chra */
2594 "\\u091C\\u094D\\u091E", /* jn\\u0303a */
2595 "\\u091D\\u094D\\u0930", /* jhra */
2596 "\\u091E\\u094D\\u0930", /* n\\u0303ra */
2597 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2598 "\\u0920\\u094D\\u0930", /* t\\u0323hra */
2599 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2600 //"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari
2601 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2602 "\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */
2603 "\\u0923\\u094D\\u0930", /* n\\u0323ra */
2604 "\\u0924\\u094D\\u0924", /* tta */
2605 "\\u0925\\u094D\\u0930", /* thra */
2606 "\\u0926\\u094D\\u0926", /* dda */
2607 "\\u0927\\u094D\\u0930", /* dhra */
2608 "\\u0928\\u094D\\u0928", /* nna */
2609 "\\u092A\\u094D\\u0930", /* pra */
2610 "\\u092B\\u094D\\u0930", /* phra */
2611 "\\u092C\\u094D\\u0930", /* bra */
2612 "\\u092D\\u094D\\u0930", /* bhra */
2613 "\\u092E\\u094D\\u0930", /* mra */
2614 "\\u0929\\u094D\\u0930", /* n\\u0331ra */
2615 //"\\u0934\\u094D\\u0930", /* l\\u0331ra */
2616 "\\u092F\\u094D\\u0930", /* yra */
2617 "\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */
2619 "\\u0935\\u094D\\u0930", /* vra */
2620 "\\u0936\\u094D\\u0930", /* s\\u0301ra */
2621 "\\u0937\\u094D\\u0930", /* s\\u0323ra */
2622 "\\u0938\\u094D\\u0930", /* sra */
2623 "\\u0939\\u094d\\u092E", /* hma */
2624 "\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */
2625 "\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */
2626 "\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/
2627 "\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */
2628 "\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */
2629 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2630 "\\u0920\\u094D\\u092F", /* t\\u0323hya */
2631 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2632 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2634 "\\u0939\\u094D\\u092F", /* hya */
2635 "\\u0936\\u0943", /* s\\u0301r\\u0325a */
2636 "\\u0936\\u094D\\u091A", /* s\\u0301ca */
2637 "\\u090d", /* e\\u0306 */
2638 "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2639 "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2643 UErrorCode status
= U_ZERO_ERROR
;
2644 UParseError parseError
;
2645 UnicodeString message
;
2646 Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2647 Transliterator
* devToLatin
=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
);
2648 if(U_FAILURE(status
)){
2649 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2650 dataerrln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2653 UnicodeString gotResult
;
2654 for(int i
= 0; i
<MAX_LEN
; i
++){
2655 gotResult
= source
[i
];
2656 expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
]));
2657 expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
]));
2663 void TransliteratorTest::TestTeluguLatinRT(){
2664 const int MAX_LEN
=10;
2665 const char* const source
[MAX_LEN
] = {
2666 "raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */
2667 "\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */
2668 "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */
2669 "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */
2670 "san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */
2671 "amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */
2672 "ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */
2673 "\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */
2674 "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */
2675 "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */
2678 const char* const expected
[MAX_LEN
] = {
2679 "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2680 "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2681 "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2682 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2683 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2684 "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2685 "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2686 "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2687 "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2688 "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2691 UErrorCode status
= U_ZERO_ERROR
;
2692 UParseError parseError
;
2693 UnicodeString message
;
2694 Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD
, parseError
, status
);
2695 Transliterator
* devToLatin
=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD
, parseError
, status
);
2696 if(U_FAILURE(status
)){
2697 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2698 dataerrln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2701 UnicodeString gotResult
;
2702 for(int i
= 0; i
<MAX_LEN
; i
++){
2703 gotResult
= source
[i
];
2704 expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
]));
2705 expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
]));
2711 void TransliteratorTest::TestSanskritLatinRT(){
2712 const int MAX_LEN
=16;
2713 const char* const source
[MAX_LEN
] = {
2714 "rmk\\u1E63\\u0113t",
2715 "\\u015Br\\u012Bmad",
2716 "bhagavadg\\u012Bt\\u0101",
2719 "vi\\u1E63\\u0101da",
2721 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2722 "uv\\u0101cr\\u0325",
2723 "dharmak\\u1E63\\u0113tr\\u0113",
2724 "kuruk\\u1E63\\u0113tr\\u0113",
2725 "samav\\u0113t\\u0101",
2727 "m\\u0101mak\\u0101\\u1E25",
2728 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2732 const char* const expected
[MAX_LEN
] = {
2733 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2734 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2735 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2736 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2737 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2738 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2739 "\\u092f\\u094b\\u0917",
2740 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2741 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2742 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2743 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2744 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2745 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2746 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2747 //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2748 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2749 "\\u0938\\u0902\\u091c\\u0935",
2751 UErrorCode status
= U_ZERO_ERROR
;
2752 UParseError parseError
;
2753 UnicodeString message
;
2754 Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2755 Transliterator
* devToLatin
=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
);
2756 if(U_FAILURE(status
)){
2757 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2758 dataerrln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2761 UnicodeString gotResult
;
2762 for(int i
= 0; i
<MAX_LEN
; i
++){
2763 gotResult
= source
[i
];
2764 expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
]));
2765 expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
]));
2772 void TransliteratorTest::TestCompoundLatinRT(){
2773 const char* const source
[] = {
2774 "rmk\\u1E63\\u0113t",
2775 "\\u015Br\\u012Bmad",
2776 "bhagavadg\\u012Bt\\u0101",
2779 "vi\\u1E63\\u0101da",
2781 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2782 "uv\\u0101cr\\u0325",
2783 "dharmak\\u1E63\\u0113tr\\u0113",
2784 "kuruk\\u1E63\\u0113tr\\u0113",
2785 "samav\\u0113t\\u0101",
2787 "m\\u0101mak\\u0101\\u1E25",
2788 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2792 const int MAX_LEN
= UPRV_LENGTHOF(source
);
2793 const char* const expected
[MAX_LEN
] = {
2794 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2795 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2796 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2797 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2798 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2799 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2800 "\\u092f\\u094b\\u0917",
2801 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2802 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2803 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2804 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2805 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2806 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2807 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2808 // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2809 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2810 "\\u0938\\u0902\\u091c\\u0935"
2812 if(MAX_LEN
!= UPRV_LENGTHOF(expected
)) {
2813 errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2817 UErrorCode status
= U_ZERO_ERROR
;
2818 UParseError parseError
;
2819 UnicodeString message
;
2820 Transliterator
* devToLatinToDev
=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2821 Transliterator
* latinToDevToLatin
=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
);
2822 Transliterator
* devToTelToDev
=Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2823 Transliterator
* latinToTelToLatin
=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD
, parseError
, status
);
2825 if(U_FAILURE(status
)){
2826 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2827 dataerrln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2830 UnicodeString gotResult
;
2831 for(int i
= 0; i
<MAX_LEN
; i
++){
2832 gotResult
= source
[i
];
2833 expect(*devToLatinToDev
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(expected
[i
]));
2834 expect(*latinToDevToLatin
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(source
[i
]));
2835 expect(*latinToTelToLatin
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(source
[i
]));
2838 delete(latinToDevToLatin
);
2839 delete(devToLatinToDev
);
2840 delete(devToTelToDev
);
2841 delete(latinToTelToLatin
);
2845 * Test Gurmukhi-Devanagari Tippi and Bindi
2847 void TransliteratorTest::TestGurmukhiDevanagari(){
2849 // (\u0902) (when preceded by vowel) ---> (\u0A02)
2850 // (\u0902) (when preceded by consonant) ---> (\u0A70)
2851 UErrorCode status
= U_ZERO_ERROR
;
2852 UnicodeSet
vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV
).unescape(), status
);
2853 UnicodeSet
non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV
).unescape(), status
);
2854 UParseError parseError
;
2856 UnicodeSetIterator
vIter(vowel
);
2857 UnicodeSetIterator
nvIter(non_vowel
);
2858 Transliterator
* trans
= Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD
, parseError
, status
);
2859 if(U_FAILURE(status
)) {
2860 dataerrln("Error creating transliterator %s", u_errorName(status
));
2864 UnicodeString
src (" \\u0902", -1, US_INV
);
2865 UnicodeString
expected(" \\u0A02", -1, US_INV
);
2866 src
= src
.unescape();
2867 expected
= expected
.unescape();
2869 while(vIter
.next()){
2870 src
.setCharAt(0,(UChar
) vIter
.getCodepoint());
2871 expected
.setCharAt(0,(UChar
) (vIter
.getCodepoint()+0x0100));
2872 expect(*trans
,src
,expected
);
2875 expected
.setCharAt(1,0x0A70);
2876 while(nvIter
.next()){
2877 //src.setCharAt(0,(char) nvIter.codepoint);
2878 src
.setCharAt(0,(UChar
)nvIter
.getCodepoint());
2879 expected
.setCharAt(0,(UChar
) (nvIter
.getCodepoint()+0x0100));
2880 expect(*trans
,src
,expected
);
2885 * Test instantiation from a locale.
2887 void TransliteratorTest::TestLocaleInstantiation(void) {
2889 UErrorCode ec
= U_ZERO_ERROR
;
2890 Transliterator
*t
= Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD
, pe
, ec
);
2891 if (U_FAILURE(ec
)) {
2892 dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec
));
2896 expect(*t
, CharsToUnicodeString("\\u0430"), "a");
2899 t
= Transliterator::createInstance("en-el", UTRANS_FORWARD
, pe
, ec
);
2900 if (U_FAILURE(ec
)) {
2901 errln("FAIL: createInstance(en-el)");
2905 expect(*t
, "a", CharsToUnicodeString("\\u03B1"));
2910 * Test title case handling of accent (should ignore accents)
2912 void TransliteratorTest::TestTitleAccents(void) {
2914 UErrorCode ec
= U_ZERO_ERROR
;
2915 Transliterator
*t
= Transliterator::createInstance("Title", UTRANS_FORWARD
, pe
, ec
);
2916 if (U_FAILURE(ec
)) {
2917 errln("FAIL: createInstance(Title)");
2921 expect(*t
, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2926 * Basic test of a locale resource based rule.
2928 void TransliteratorTest::TestLocaleResource() {
2929 const char* DATA
[] = {
2931 //"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0",
2932 "Latin-el", "b", "\\u03bc\\u03c0",
2933 "Latin-Greek", "b", "\\u03B2",
2934 "Greek-Latin/UNGEGN", "\\u03B2", "v",
2935 "el-Latin", "\\u03B2", "v",
2936 "Greek-Latin", "\\u03B2", "b",
2938 const int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
2939 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
2941 UErrorCode ec
= U_ZERO_ERROR
;
2942 Transliterator
*t
= Transliterator::createInstance(DATA
[i
], UTRANS_FORWARD
, pe
, ec
);
2943 if (U_FAILURE(ec
)) {
2944 dataerrln((UnicodeString
)"FAIL: createInstance(" + DATA
[i
] + ") - " + u_errorName(ec
));
2948 expect(*t
, CharsToUnicodeString(DATA
[i
+1]),
2949 CharsToUnicodeString(DATA
[i
+2]));
2955 * Make sure parse errors reference the right line.
2957 void TransliteratorTest::TestParseError() {
2958 static const char* rule
=
2962 UErrorCode ec
= U_ZERO_ERROR
;
2964 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
2966 if (U_FAILURE(ec
)) {
2967 UnicodeString
err(pe
.preContext
);
2968 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
2969 if (err
.indexOf("d << b") >= 0) {
2970 logln("Ok: " + err
);
2972 errln("FAIL: " + err
);
2976 errln("FAIL: no syntax error");
2978 static const char* maskingRule
=
2983 delete Transliterator::createFromRules("ID", maskingRule
, UTRANS_FORWARD
, pe
, ec
);
2984 if (ec
!= U_RULE_MASK_ERROR
) {
2985 errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec
));
2987 else if (UnicodeString("a > x;") != UnicodeString(pe
.preContext
)) {
2988 errln("FAIL: did not get expected precontext");
2990 else if (UnicodeString("ab > y;") != UnicodeString(pe
.postContext
)) {
2991 errln("FAIL: did not get expected postcontext");
2996 * Make sure sets on output are disallowed.
2998 void TransliteratorTest::TestOutputSet() {
2999 UnicodeString rule
= "$set = [a-cm-n]; b > $set;";
3000 UErrorCode ec
= U_ZERO_ERROR
;
3002 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
3004 if (U_FAILURE(ec
)) {
3005 UnicodeString
err(pe
.preContext
);
3006 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
3007 logln("Ok: " + err
);
3010 errln("FAIL: No syntax error");
3014 * Test the use variable range pragma, making sure that use of
3015 * variable range characters is detected and flagged as an error.
3017 void TransliteratorTest::TestVariableRange() {
3018 UnicodeString rule
= "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3019 UErrorCode ec
= U_ZERO_ERROR
;
3021 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
3023 if (U_FAILURE(ec
)) {
3024 UnicodeString
err(pe
.preContext
);
3025 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
3026 logln("Ok: " + err
);
3029 errln("FAIL: No syntax error");
3033 * Test invalid post context error handling
3035 void TransliteratorTest::TestInvalidPostContext() {
3036 UnicodeString rule
= "a}b{c>d;";
3037 UErrorCode ec
= U_ZERO_ERROR
;
3039 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
3041 if (U_FAILURE(ec
)) {
3042 UnicodeString
err(pe
.preContext
);
3043 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
3044 if (err
.indexOf("a}b{c") >= 0) {
3045 logln("Ok: " + err
);
3047 errln("FAIL: " + err
);
3051 errln("FAIL: No syntax error");
3055 * Test ID form variants
3057 void TransliteratorTest::TestIDForms() {
3058 const char* DATA
[] = {
3060 "nfd", NULL
, "NFC", // make sure case is ignored
3061 "Any-NFKD", NULL
, "Any-NFKC",
3062 "Null", NULL
, "Null",
3063 "-nfkc", "nfkc", "NFKD",
3064 "-nfkc/", "nfkc", "NFKD",
3065 "Latin-Greek/UNGEGN", NULL
, "Greek-Latin/UNGEGN",
3066 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3067 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3068 "Source-", NULL
, NULL
,
3069 "Source/Variant-", NULL
, NULL
,
3070 "Source-/Variant", NULL
, NULL
,
3071 "/Variant", NULL
, NULL
,
3072 "/Variant-", NULL
, NULL
,
3073 "-/Variant", NULL
, NULL
,
3078 const int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
3080 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
3081 const char* ID
= DATA
[i
];
3082 const char* expID
= DATA
[i
+1];
3083 const char* expInvID
= DATA
[i
+2];
3084 UBool expValid
= (expInvID
!= NULL
);
3085 if (expID
== NULL
) {
3089 UErrorCode ec
= U_ZERO_ERROR
;
3091 Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
);
3092 if (U_FAILURE(ec
)) {
3094 logln((UnicodeString
)"Ok: getInstance(" + ID
+") => " + u_errorName(ec
));
3096 dataerrln((UnicodeString
)"FAIL: Couldn't create " + ID
+ " - " + u_errorName(ec
));
3101 Transliterator
*u
= t
->createInverse(ec
);
3102 if (U_FAILURE(ec
)) {
3103 errln((UnicodeString
)"FAIL: Couldn't create inverse of " + ID
);
3108 if (t
->getID() == expID
&&
3109 u
->getID() == expInvID
) {
3110 logln((UnicodeString
)"Ok: " + ID
+ ".getInverse() => " + expInvID
);
3112 errln((UnicodeString
)"FAIL: getInstance(" + ID
+ ") => " +
3113 t
->getID() + " x getInverse() => " + u
->getID() +
3114 ", expected " + expInvID
);
3121 static const UChar SPACE
[] = {32,0};
3122 static const UChar NEWLINE
[] = {10,0};
3123 static const UChar RETURN
[] = {13,0};
3124 static const UChar EMPTY
[] = {0};
3126 void TransliteratorTest::checkRules(const UnicodeString
& label
, Transliterator
& t2
,
3127 const UnicodeString
& testRulesForward
) {
3128 UnicodeString rules2
; t2
.toRules(rules2
, TRUE
);
3129 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3130 rules2
.findAndReplace(SPACE
, EMPTY
);
3131 rules2
.findAndReplace(NEWLINE
, EMPTY
);
3132 rules2
.findAndReplace(RETURN
, EMPTY
);
3134 UnicodeString
testRules(testRulesForward
); testRules
.findAndReplace(SPACE
, EMPTY
);
3136 if (rules2
!= testRules
) {
3138 logln((UnicodeString
)"GENERATED RULES: " + rules2
);
3139 logln((UnicodeString
)"SHOULD BE: " + testRulesForward
);
3144 * Mark's toRules test.
3146 void TransliteratorTest::TestToRulesMark() {
3147 const char* testRules
=
3148 "::[[:Latin:][:Mark:]];"
3151 "a <> \\u03B1;" // alpha
3155 "::([[:Greek:][:Mark:]]);"
3157 const char* testRulesForward
=
3158 "::[[:Latin:][:Mark:]];"
3166 const char* testRulesBackward
=
3167 "::[[:Greek:][:Mark:]];"
3174 UnicodeString source
= CharsToUnicodeString("\\u00E1"); // a-acute
3175 UnicodeString target
= CharsToUnicodeString("\\u03AC"); // alpha-acute
3178 UErrorCode ec
= U_ZERO_ERROR
;
3179 Transliterator
*t2
= Transliterator::createFromRules("source-target", UnicodeString(testRules
, -1, US_INV
), UTRANS_FORWARD
, pe
, ec
);
3180 Transliterator
*t3
= Transliterator::createFromRules("target-source", UnicodeString(testRules
, -1, US_INV
), UTRANS_REVERSE
, pe
, ec
);
3182 if (U_FAILURE(ec
)) {
3185 dataerrln((UnicodeString
)"FAIL: createFromRules => " + u_errorName(ec
));
3189 expect(*t2
, source
, target
);
3190 expect(*t3
, target
, source
);
3192 checkRules("Failed toRules FORWARD", *t2
, UnicodeString(testRulesForward
, -1, US_INV
));
3193 checkRules("Failed toRules BACKWARD", *t3
, UnicodeString(testRulesBackward
, -1, US_INV
));
3200 * Test Escape and Unescape transliterators.
3202 void TransliteratorTest::TestEscape() {
3208 t
= Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, pe
, ec
);
3209 if (U_FAILURE(ec
)) {
3210 errln((UnicodeString
)"FAIL: createInstance");
3213 UNICODE_STRING_SIMPLE("\\x{40}\\U000000312Q"),
3219 t
= Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD
, pe
, ec
);
3220 if (U_FAILURE(ec
)) {
3221 errln((UnicodeString
)"FAIL: createInstance");
3224 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3225 UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3230 t
= Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD
, pe
, ec
);
3231 if (U_FAILURE(ec
)) {
3232 errln((UnicodeString
)"FAIL: createInstance");
3235 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3236 UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3241 t
= Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD
, pe
, ec
);
3242 if (U_FAILURE(ec
)) {
3243 errln((UnicodeString
)"FAIL: createInstance");
3246 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3247 UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3253 void TransliteratorTest::TestAnchorMasking(){
3254 UnicodeString
rule ("^a > Q; a > q;");
3255 UErrorCode status
= U_ZERO_ERROR
;
3256 UParseError parseError
;
3258 Transliterator
* t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
,parseError
,status
);
3259 if(U_FAILURE(status
)){
3260 errln(UnicodeString("FAIL: ") + "ID" +
3261 ".createFromRules() => bad rules" +
3262 /*", parse error " + parseError.code +*/
3263 ", line " + parseError
.line
+
3264 ", offset " + parseError
.offset
+
3265 ", context " + prettify(parseError
.preContext
, TRUE
) +
3266 ", rules: " + prettify(rule
, TRUE
));
3272 * Make sure display names of variants look reasonable.
3274 void TransliteratorTest::TestDisplayName() {
3275 #if UCONFIG_NO_FORMATTING
3276 logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3279 static const char* DATA
[] = {
3280 // ID, forward name, reverse name
3281 // Update the text as necessary -- the important thing is
3282 // not the text itself, but how various cases are handled.
3285 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3288 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3291 "NFC", "Any to NFC", "Any to NFD",
3294 int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
3296 Locale
US("en", "US");
3298 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
3300 Transliterator::getDisplayName(DATA
[i
], US
, name
);
3301 if (name
!= DATA
[i
+1]) {
3302 dataerrln((UnicodeString
)"FAIL: " + DATA
[i
] + ".getDisplayName() => " +
3303 name
+ ", expected " + DATA
[i
+1]);
3305 logln((UnicodeString
)"Ok: " + DATA
[i
] + ".getDisplayName() => " + name
);
3307 UErrorCode ec
= U_ZERO_ERROR
;
3309 Transliterator
*t
= Transliterator::createInstance(DATA
[i
], UTRANS_REVERSE
, pe
, ec
);
3310 if (U_FAILURE(ec
)) {
3312 dataerrln("FAIL: createInstance failed - %s", u_errorName(ec
));
3315 name
= Transliterator::getDisplayName(t
->getID(), US
, name
);
3316 if (name
!= DATA
[i
+2]) {
3317 dataerrln((UnicodeString
)"FAIL: " + t
->getID() + ".getDisplayName() => " +
3318 name
+ ", expected " + DATA
[i
+2]);
3320 logln((UnicodeString
)"Ok: " + t
->getID() + ".getDisplayName() => " + name
);
3327 void TransliteratorTest::TestSpecialCases(void) {
3328 const UnicodeString registerRules
[] = {
3329 "Any-Dev1", "x > X; y > Y;",
3330 "Any-Dev2", "XY > Z",
3332 CharsToUnicodeString
3333 ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3337 const UnicodeString testCases
[] = {
3339 // should add more test cases
3340 "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3341 "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3342 "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3343 "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3346 "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3347 "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3349 // check for devanagari bug
3350 "nfd;Dev1;Dev2;nfc", "xy", "Z",
3352 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3353 "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
,
3354 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE
+ DESERET_dee
,
3356 //TODO: enable this test once Titlecase works right
3358 "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3359 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3361 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
,
3362 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE
+ DESERET_DEE
,
3363 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
,
3364 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee
+ DESERET_dee
,
3366 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
, "",
3367 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
, "",
3370 "Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3371 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3372 "Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3373 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3374 "Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3375 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3376 "Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3377 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3379 // Upper: TAT\\u02B9\\u00C2NA
3380 // Lower: tat\\u02B9\\u00E2na
3381 // Title: Tat\\u02B9\\u00E2na
3382 "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3383 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3384 "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3385 CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3386 "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3387 CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3394 for (i
= 0; registerRules
[i
].length()!=0; i
+=2) {
3395 UErrorCode status
= U_ZERO_ERROR
;
3397 Transliterator
*t
= Transliterator::createFromRules(registerRules
[0+i
],
3398 registerRules
[i
+1], UTRANS_FORWARD
, pos
, status
);
3399 if (U_FAILURE(status
)) {
3400 dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status
));
3402 Transliterator::registerInstance(t
);
3405 for (i
= 0; testCases
[i
].length()!=0; i
+=3) {
3406 UErrorCode ec
= U_ZERO_ERROR
;
3408 const UnicodeString
& name
= testCases
[i
];
3409 Transliterator
*t
= Transliterator::createInstance(name
, UTRANS_FORWARD
, pe
, ec
);
3410 if (U_FAILURE(ec
)) {
3411 dataerrln((UnicodeString
)"FAIL: Couldn't create " + name
+ " - " + u_errorName(ec
));
3415 const UnicodeString
& id
= t
->getID();
3416 const UnicodeString
& source
= testCases
[i
+1];
3417 UnicodeString target
;
3419 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3421 if (testCases
[i
+2].length() > 0) {
3422 target
= testCases
[i
+2];
3423 } else if (0==id
.caseCompare("NFD", U_FOLD_CASE_DEFAULT
)) {
3424 Normalizer::normalize(source
, UNORM_NFD
, 0, target
, ec
);
3425 } else if (0==id
.caseCompare("NFC", U_FOLD_CASE_DEFAULT
)) {
3426 Normalizer::normalize(source
, UNORM_NFC
, 0, target
, ec
);
3427 } else if (0==id
.caseCompare("NFKD", U_FOLD_CASE_DEFAULT
)) {
3428 Normalizer::normalize(source
, UNORM_NFKD
, 0, target
, ec
);
3429 } else if (0==id
.caseCompare("NFKC", U_FOLD_CASE_DEFAULT
)) {
3430 Normalizer::normalize(source
, UNORM_NFKC
, 0, target
, ec
);
3431 } else if (0==id
.caseCompare("Lower", U_FOLD_CASE_DEFAULT
)) {
3433 target
.toLower(Locale::getUS());
3434 } else if (0==id
.caseCompare("Upper", U_FOLD_CASE_DEFAULT
)) {
3436 target
.toUpper(Locale::getUS());
3438 if (U_FAILURE(ec
)) {
3439 errln((UnicodeString
)"FAIL: Internal error normalizing " + source
);
3443 expect(*t
, source
, target
);
3446 for (i
= 0; registerRules
[i
].length()!=0; i
+=2) {
3447 Transliterator::unregister(registerRules
[i
]);
3451 char* Char32ToEscapedChars(UChar32 ch
, char* buffer
) {
3453 sprintf(buffer
, "\\u%04x", (int)ch
);
3455 sprintf(buffer
, "\\U%08x", (int)ch
);
3460 void TransliteratorTest::TestSurrogateCasing (void) {
3461 // check that casing handles surrogates
3462 // titlecase is currently defective
3466 U16_GET(DESERET_dee
,0, 0, DESERET_dee
.length(), dee
);
3467 UnicodeString
DEE(u_totitle(dee
));
3468 if (DEE
!= DESERET_DEE
) {
3469 err("Fails titlecase of surrogates");
3470 err(Char32ToEscapedChars(dee
, buffer
));
3472 errln(Char32ToEscapedChars(DEE
.char32At(0), buffer
));
3475 UnicodeString deeDEETest
=DESERET_dee
+ DESERET_DEE
;
3476 UnicodeString deedeeTest
= DESERET_dee
+ DESERET_dee
;
3477 UnicodeString DEEDEETest
= DESERET_DEE
+ DESERET_DEE
;
3478 UErrorCode status
= U_ZERO_ERROR
;
3480 u_strToUpper(buffer2
, 20, deeDEETest
.getBuffer(), deeDEETest
.length(), NULL
, &status
);
3481 if (U_FAILURE(status
) || (UnicodeString(buffer2
)!= DEEDEETest
)) {
3482 errln("Fails: Can't uppercase surrogates.");
3485 status
= U_ZERO_ERROR
;
3486 u_strToLower(buffer2
, 20, deeDEETest
.getBuffer(), deeDEETest
.length(), NULL
, &status
);
3487 if (U_FAILURE(status
) || (UnicodeString(buffer2
)!= deedeeTest
)) {
3488 errln("Fails: Can't lowercase surrogates.");
3492 static void _trans(Transliterator
& t
, const UnicodeString
& src
,
3493 UnicodeString
& result
) {
3495 t
.transliterate(result
);
3498 static void _trans(const UnicodeString
& id
, const UnicodeString
& src
,
3499 UnicodeString
& result
, UErrorCode ec
) {
3501 Transliterator
*t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, pe
, ec
);
3502 if (U_SUCCESS(ec
)) {
3503 _trans(*t
, src
, result
);
3508 static UnicodeString
_findMatch(const UnicodeString
& source
,
3509 const UnicodeString
* pairs
) {
3510 UnicodeString empty
;
3511 for (int32_t i
=0; pairs
[i
].length() > 0; i
+=2) {
3512 if (0==source
.caseCompare(pairs
[i
], U_FOLD_CASE_DEFAULT
)) {
3519 // Check to see that incremental gets at least part way through a reasonable string.
3521 void TransliteratorTest::TestIncrementalProgress(void) {
3522 UErrorCode ec
= U_ZERO_ERROR
;
3523 UnicodeString latinTest
= "The Quick Brown Fox.";
3524 UnicodeString devaTest
;
3525 _trans("Latin-Devanagari", latinTest
, devaTest
, ec
);
3526 UnicodeString kataTest
;
3527 _trans("Latin-Katakana", latinTest
, kataTest
, ec
);
3528 if (U_FAILURE(ec
)) {
3529 errln("FAIL: Internal error");
3532 const UnicodeString tests
[] = {
3535 "Halfwidth", latinTest
,
3536 "Devanagari", devaTest
,
3537 "Katakana", kataTest
,
3541 UnicodeString
test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3542 int32_t i
= 0, j
=0, k
=0;
3543 int32_t sources
= Transliterator::countAvailableSources();
3544 for (i
= 0; i
< sources
; i
++) {
3545 UnicodeString source
;
3546 Transliterator::getAvailableSource(i
, source
);
3547 UnicodeString test
= _findMatch(source
, tests
);
3548 if (test
.length() == 0) {
3549 logln((UnicodeString
)"Skipping " + source
+ "-X");
3552 int32_t targets
= Transliterator::countAvailableTargets(source
);
3553 for (j
= 0; j
< targets
; j
++) {
3554 UnicodeString target
;
3555 Transliterator::getAvailableTarget(j
, source
, target
);
3556 int32_t variants
= Transliterator::countAvailableVariants(source
, target
);
3557 for (k
=0; k
< variants
; k
++) {
3558 UnicodeString variant
;
3560 UErrorCode status
= U_ZERO_ERROR
;
3562 Transliterator::getAvailableVariant(k
, source
, target
, variant
);
3563 UnicodeString id
= source
+ "-" + target
+ "/" + variant
;
3565 Transliterator
*t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, err
, status
);
3566 if (U_FAILURE(status
)) {
3567 dataerrln((UnicodeString
)"FAIL: Could not create " + id
);
3571 status
= U_ZERO_ERROR
;
3572 CheckIncrementalAux(t
, test
);
3575 _trans(*t
, test
, rev
);
3576 Transliterator
*inv
= t
->createInverse(status
);
3577 if (U_FAILURE(status
)) {
3578 // The following are forward-only, it is OK that creating an inverse will not work:
3579 // 1. Devanagari-Arabic
3582 // If UCONFIG_NO_BREAK_ITERATION is on, Latin-Thai is also not expected to work.
3583 if ( id
.compare((UnicodeString
)"Devanagari-Arabic/") != 0
3584 && !(id
.startsWith((UnicodeString
)"Any-") &&
3585 (id
.endsWith((UnicodeString
)"/BGN") || id
.endsWith((UnicodeString
)"/UNGEGN") || id
.endsWith((UnicodeString
)"/MNS"))
3587 #if UCONFIG_NO_BREAK_ITERATION
3588 && id
.compare((UnicodeString
)"Latin-Thai/") != 0
3592 errln((UnicodeString
)"FAIL: Could not create inverse of " + id
);
3598 CheckIncrementalAux(inv
, rev
);
3606 void TransliteratorTest::CheckIncrementalAux(const Transliterator
* t
,
3607 const UnicodeString
& input
) {
3608 UErrorCode ec
= U_ZERO_ERROR
;
3610 UnicodeString test
= input
;
3612 pos
.contextStart
= 0;
3613 pos
.contextLimit
= input
.length();
3615 pos
.limit
= input
.length();
3617 t
->transliterate(test
, pos
, ec
);
3618 if (U_FAILURE(ec
)) {
3619 errln((UnicodeString
)"FAIL: transliterate() error " + u_errorName(ec
));
3622 UBool gotError
= FALSE
;
3623 (void)gotError
; // Suppress set but not used warning.
3625 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3627 if (pos
.start
== 0 && pos
.limit
!= 0 && t
->getID() != "Hex-Any/Unicode") {
3628 errln((UnicodeString
)"No Progress, " +
3629 t
->getID() + ": " + formatInput(test
, input
, pos
));
3632 logln((UnicodeString
)"PASS Progress, " +
3633 t
->getID() + ": " + formatInput(test
, input
, pos
));
3635 t
->finishTransliteration(test
, pos
);
3636 if (pos
.start
!= pos
.limit
) {
3637 errln((UnicodeString
)"Incomplete, " +
3638 t
->getID() + ": " + formatInput(test
, input
, pos
));
3643 void TransliteratorTest::TestFunction() {
3644 // Careful with spacing and ';' here: Phrase this exactly
3645 // as toRules() is going to return it. If toRules() changes
3646 // with regard to spacing or ';', then adjust this string.
3647 UnicodeString rule
=
3648 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3651 UErrorCode ec
= U_ZERO_ERROR
;
3652 Transliterator
*t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3654 dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec
));
3659 t
->toRules(r
, TRUE
);
3661 logln((UnicodeString
)"OK: toRules() => " + r
);
3663 errln((UnicodeString
)"FAIL: toRules() => " + r
+
3664 ", expected " + rule
);
3667 expect(*t
, "The Quick Brown Fox",
3668 UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3673 void TransliteratorTest::TestInvalidBackRef(void) {
3674 UnicodeString rule
= ". > $1;";
3675 UnicodeString rule2
=CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3677 UErrorCode ec
= U_ZERO_ERROR
;
3678 Transliterator
*t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3679 Transliterator
*t2
= Transliterator::createFromRules("Test2", rule2
, UTRANS_FORWARD
, pe
, ec
);
3682 errln("FAIL: createFromRules should have returned NULL");
3687 errln("FAIL: createFromRules should have returned NULL");
3691 if (U_SUCCESS(ec
)) {
3692 errln("FAIL: Ok: . > $1; => no error");
3694 logln((UnicodeString
)"Ok: . > $1; => " + u_errorName(ec
));
3698 void TransliteratorTest::TestMulticharStringSet() {
3705 " e } [{fg}] > r;" ;
3708 UErrorCode ec
= U_ZERO_ERROR
;
3709 Transliterator
* t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3710 if (t
== NULL
|| U_FAILURE(ec
)) {
3712 errln("FAIL: createFromRules failed");
3716 expect(*t
, "a aa ab bc d gd de gde gdefg ddefg",
3717 "y x yz z d gd de gdq gdqfg ddrfg");
3720 // Overlapped string test. Make sure that when multiple
3721 // strings can match that the longest one is matched.
3723 " [a {ab} {abc}] > x;"
3726 " q [t {st} {rst}] { e > p;" ;
3728 t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3729 if (t
== NULL
|| U_FAILURE(ec
)) {
3731 errln("FAIL: createFromRules failed");
3735 expect(*t
, "a ab abc qte qste qrste",
3736 "x x x qtp qstp qrstp");
3740 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3741 // BEGIN TestUserFunction support factory
3743 Transliterator
* _TUFF
[4];
3744 UnicodeString
* _TUFID
[4];
3746 static Transliterator
* U_EXPORT2
_TUFFactory(const UnicodeString
& /*ID*/,
3747 Transliterator::Token context
) {
3748 return _TUFF
[context
.integer
]->clone();
3751 static void _TUFReg(const UnicodeString
& ID
, Transliterator
* t
, int32_t n
) {
3753 _TUFID
[n
] = new UnicodeString(ID
);
3754 Transliterator::registerFactory(ID
, _TUFFactory
, Transliterator::integerToken(n
));
3757 static void _TUFUnreg(int32_t n
) {
3758 if (_TUFF
[n
] != NULL
) {
3759 Transliterator::unregister(*_TUFID
[n
]);
3765 // END TestUserFunction support factory
3766 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3769 * Test that user-registered transliterators can be used under function
3772 void TransliteratorTest::TestUserFunction() {
3776 UErrorCode ec
= U_ZERO_ERROR
;
3778 // Setup our factory
3780 for (i
=0; i
<4; ++i
) {
3784 // There's no need to register inverses if we don't use them
3785 t
= Transliterator::createFromRules("gif",
3786 UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3787 UTRANS_FORWARD
, pe
, ec
);
3788 if (t
== NULL
|| U_FAILURE(ec
)) {
3789 dataerrln((UnicodeString
)"FAIL: createFromRules gif " + u_errorName(ec
));
3792 _TUFReg("Any-gif", t
, 0);
3794 t
= Transliterator::createFromRules("RemoveCurly",
3795 UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3796 UTRANS_FORWARD
, pe
, ec
);
3797 if (t
== NULL
|| U_FAILURE(ec
)) {
3798 errln((UnicodeString
)"FAIL: createFromRules RemoveCurly " + u_errorName(ec
));
3801 expect(*t
, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3802 _TUFReg("Any-RemoveCurly", t
, 1);
3804 logln("Trying &hex");
3805 t
= Transliterator::createFromRules("hex2",
3807 UTRANS_FORWARD
, pe
, ec
);
3808 if (t
== NULL
|| U_FAILURE(ec
)) {
3809 errln("FAIL: createFromRules");
3812 logln("Registering");
3813 _TUFReg("Any-hex2", t
, 2);
3814 t
= Transliterator::createInstance("Any-hex2", UTRANS_FORWARD
, ec
);
3815 if (t
== NULL
|| U_FAILURE(ec
)) {
3816 errln((UnicodeString
)"FAIL: createInstance Any-hex2 " + u_errorName(ec
));
3819 expect(*t
, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3822 logln("Trying &gif");
3823 t
= Transliterator::createFromRules("gif2",
3824 "(.) > &Gif(&Hex2($1));",
3825 UTRANS_FORWARD
, pe
, ec
);
3826 if (t
== NULL
|| U_FAILURE(ec
)) {
3827 errln((UnicodeString
)"FAIL: createFromRules gif2 " + u_errorName(ec
));
3830 logln("Registering");
3831 _TUFReg("Any-gif2", t
, 3);
3832 t
= Transliterator::createInstance("Any-gif2", UTRANS_FORWARD
, ec
);
3833 if (t
== NULL
|| U_FAILURE(ec
)) {
3834 errln((UnicodeString
)"FAIL: createInstance Any-gif2 " + u_errorName(ec
));
3837 expect(*t
, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3838 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3841 // Test that filters are allowed after &
3842 t
= Transliterator::createFromRules("test",
3843 "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3844 UTRANS_FORWARD
, pe
, ec
);
3845 if (t
== NULL
|| U_FAILURE(ec
)) {
3846 errln((UnicodeString
)"FAIL: createFromRules test " + u_errorName(ec
));
3850 UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3854 for (i
=0; i
<4; ++i
) {
3860 * Test the Any-X transliterators.
3862 void TransliteratorTest::TestAnyX(void) {
3863 UParseError parseError
;
3864 UErrorCode status
= U_ZERO_ERROR
;
3865 Transliterator
* anyLatin
=
3866 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD
, parseError
, status
);
3868 dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status
));
3874 CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3875 CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3879 status
= U_ZERO_ERROR
;
3880 Transliterator
* anyASCII
=
3881 Transliterator::createInstance("Any-Latin;Latin-ASCII", UTRANS_FORWARD
, parseError
, status
);
3882 if (U_FAILURE(status
) || anyASCII
==0) {
3883 dataerrln("FAIL: createInstance returned NULL and/or set status %s", u_errorName(status
));
3889 CharsToUnicodeString("ArabicDigits:\\u0660\\u0661\\u0664\\u0669 PersianDigits:\\u06F0\\u06F1\\u06F4\\u06F9"),
3890 CharsToUnicodeString("ArabicDigits:0149 PersianDigits:0149"));
3896 * Test Any-X transliterators with sample letters from all scripts.
3898 void TransliteratorTest::TestAny(void) {
3899 UErrorCode status
= U_ZERO_ERROR
;
3900 // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3901 // function call parameters going on in this test.
3902 UnicodeSet
alphabetic("[:alphabetic:]", status
);
3903 if (U_FAILURE(status
)) {
3904 dataerrln("Failure: file %s, line %d, status = %s", __FILE__
, __LINE__
, u_errorName(status
));
3907 alphabetic
.freeze();
3909 UnicodeString testString
;
3910 for (int32_t i
= 0; i
< USCRIPT_CODE_LIMIT
; i
++) {
3911 const char *scriptName
= uscript_getShortName((UScriptCode
)i
);
3912 if (scriptName
== NULL
) {
3913 errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__
, __LINE__
, i
);
3918 sample
.applyPropertyAlias("script", scriptName
, status
);
3919 if (U_FAILURE(status
)) {
3920 errln("Failure: file %s, line %d, status = %s", __FILE__
, __LINE__
, u_errorName(status
));
3923 sample
.retainAll(alphabetic
);
3924 for (int32_t count
=0; count
<5; count
++) {
3925 UChar32 c
= sample
.charAt(count
);
3929 testString
.append(c
);
3933 UParseError parseError
;
3934 Transliterator
* anyLatin
=
3935 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD
, parseError
, status
);
3936 if (U_FAILURE(status
)) {
3937 dataerrln("Failure: file %s, line %d, status = %s", __FILE__
, __LINE__
, u_errorName(status
));
3941 logln(UnicodeString("Sample set for Any-Latin: ") + testString
);
3942 anyLatin
->transliterate(testString
);
3943 logln(UnicodeString("Sample result for Any-Latin: ") + testString
);
3949 * Test the source and target set API. These are only implemented
3950 * for RBT and CompoundTransliterator at this time.
3952 void TransliteratorTest::TestSourceTargetSet() {
3953 UErrorCode ec
= U_ZERO_ERROR
;
3961 UnicodeSet
expSrc("[arx{lu}]", ec
);
3964 UnicodeSet
expTrg("[bq]", ec
);
3967 Transliterator
* t
= Transliterator::createFromRules("test", r
, UTRANS_FORWARD
, pe
, ec
);
3969 if (U_FAILURE(ec
)) {
3971 errln("FAIL: Couldn't set up test");
3975 UnicodeSet src
; t
->getSourceSet(src
);
3976 UnicodeSet trg
; t
->getTargetSet(trg
);
3978 if (src
== expSrc
&& trg
== expTrg
) {
3980 logln((UnicodeString
)"Ok: " +
3981 r
+ " => source = " + src
.toPattern(a
, TRUE
) +
3982 ", target = " + trg
.toPattern(b
, TRUE
));
3984 UnicodeString a
, b
, c
, d
;
3985 errln((UnicodeString
)"FAIL: " +
3986 r
+ " => source = " + src
.toPattern(a
, TRUE
) +
3987 ", expected " + expSrc
.toPattern(b
, TRUE
) +
3988 "; target = " + trg
.toPattern(c
, TRUE
) +
3989 ", expected " + expTrg
.toPattern(d
, TRUE
));
3996 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3998 void TransliteratorTest::TestPatternWhiteSpace() {
4000 const char* r
= "a > \\u200E b;";
4002 UErrorCode ec
= U_ZERO_ERROR
;
4004 Transliterator
* t
= Transliterator::createFromRules("test", CharsToUnicodeString(r
), UTRANS_FORWARD
, pe
, ec
);
4006 if (U_FAILURE(ec
)) {
4007 errln("FAIL: Couldn't set up test");
4009 expect(*t
, "a", "b");
4015 UnicodeSet
set(CharsToUnicodeString("[a \\u200E]"), ec
);
4017 if (U_FAILURE(ec
)) {
4018 errln("FAIL: Couldn't set up test");
4020 if (set
.contains(0x200E)) {
4021 errln("FAIL: U+200E not being ignored by UnicodeSet");
4025 //======================================================================
4026 // this method is in TestUScript.java
4027 //======================================================================
4028 void TransliteratorTest::TestAllCodepoints(){
4029 UScriptCode code
= USCRIPT_INVALID_CODE
;
4030 char id
[256]={'\0'};
4031 char abbr
[256]={'\0'};
4032 char newId
[256]={'\0'};
4033 char newAbbrId
[256]={'\0'};
4034 char oldId
[256]={'\0'};
4035 char oldAbbrId
[256]={'\0'};
4037 UErrorCode status
=U_ZERO_ERROR
;
4040 for(uint32_t i
= 0; i
<=0x10ffff; i
++){
4041 code
= uscript_getScript(i
,&status
);
4042 if(code
== USCRIPT_INVALID_CODE
){
4043 dataerrln("uscript_getScript for codepoint \\U%08X failed.", i
);
4045 const char* myId
= uscript_getName(code
);
4047 dataerrln("Valid script code returned NULL name. Check your data!");
4050 uprv_strcpy(id
,myId
);
4051 uprv_strcpy(abbr
,uscript_getShortName(code
));
4053 uprv_strcpy(newId
,"[:");
4054 uprv_strcat(newId
,id
);
4055 uprv_strcat(newId
,":];NFD");
4057 uprv_strcpy(newAbbrId
,"[:");
4058 uprv_strcat(newAbbrId
,abbr
);
4059 uprv_strcat(newAbbrId
,":];NFD");
4061 if(uprv_strcmp(newId
,oldId
)!=0){
4062 Transliterator
* t
= Transliterator::createInstance(newId
,UTRANS_FORWARD
,pe
,status
);
4063 if(t
==NULL
|| U_FAILURE(status
)){
4064 dataerrln((UnicodeString
)"FAIL: Could not create " + id
+ " - " + u_errorName(status
));
4068 if(uprv_strcmp(newAbbrId
,oldAbbrId
)!=0){
4069 Transliterator
* t
= Transliterator::createInstance(newAbbrId
,UTRANS_FORWARD
,pe
,status
);
4070 if(t
==NULL
|| U_FAILURE(status
)){
4071 dataerrln((UnicodeString
)"FAIL: Could not create " + id
+ " - " + u_errorName(status
));
4075 uprv_strcpy(oldId
,newId
);
4076 uprv_strcpy(oldAbbrId
, newAbbrId
);
4082 #define TEST_TRANSLIT_ID(id, cls) { \
4083 UErrorCode ec = U_ZERO_ERROR; \
4084 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4085 if (U_FAILURE(ec)) { \
4086 dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
4088 if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4089 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4091 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4096 #define TEST_TRANSLIT_RULE(rule, cls) { \
4097 UErrorCode ec = U_ZERO_ERROR; \
4099 Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4100 if (U_FAILURE(ec)) { \
4101 errln("FAIL: Couldn't create " rule); \
4103 if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4104 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4106 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4111 void TransliteratorTest::TestBoilerplate() {
4112 TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator
);
4113 TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator
);
4114 TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator
);
4115 TEST_TRANSLIT_ID("Lower", LowercaseTransliterator
);
4116 TEST_TRANSLIT_ID("Upper", UppercaseTransliterator
);
4117 TEST_TRANSLIT_ID("Title", TitlecaseTransliterator
);
4118 TEST_TRANSLIT_ID("Null", NullTransliterator
);
4119 TEST_TRANSLIT_ID("Remove", RemoveTransliterator
);
4120 TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator
);
4121 TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator
);
4122 TEST_TRANSLIT_ID("NFD", NormalizationTransliterator
);
4123 TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator
);
4124 TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator
);
4127 void TransliteratorTest::TestAlternateSyntax() {
4132 expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4135 expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4136 CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4137 UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4140 static const char* BEGIN_END_RULES
[] = {
4154 "", // test case commented out below, this is here to keep from messing up the indexes
4163 "", // test case commented out below, this is here to keep from messing up the indexes
4172 "", // test case commented out below, this is here to keep from messing up the indexes
4191 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4192 "$delim = [\\-$ws];"
4193 "$ws $delim* > ' ';"
4194 "'-' $delim* > '-';",
4198 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4199 "$delim = [\\-$ws];"
4200 "$ws $delim* > ' ';"
4201 "'-' $delim* > '-';",
4204 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4205 "$delim = [\\-$ws];"
4206 "$ws $delim* > ' ';"
4207 "'-' $delim* > '-';"
4211 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4212 "$delim = [\\-$ws];"
4214 "$ws $delim* > ' ';"
4215 "'-' $delim* > '-';",
4220 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4221 "$delim = [\\-$ws];"
4223 "$ws $delim* > ' ';"
4224 "'-' $delim* > '-';",
4226 "", // test case commented out below, this is here to keep from messing up the indexes
4230 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4231 "$delim = [\\-$ws];"
4233 "$ws $delim* > ' ';"
4234 "'-' $delim* > '-';"
4237 "", // test case commented out below, this is here to keep from messing up the indexes
4241 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4242 "$delim = [\\-$ws];"
4245 "$ws $delim* > ' ';"
4246 "'-' $delim* > '-';"
4249 "$ab { ' ' } $ab > '-';"
4256 "", // test case commented out below, this is here to keep from messing up the indexes
4259 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4260 "$delim = [\\-$ws];"
4263 "$ws $delim* > ' ';"
4264 "'-' $delim* > '-';"
4266 "$ab { ' ' } $ab > '-';"
4282 "", // test case commented out below, this is here to keep from messing up the indexes
4303 "", // test case commented out below, this is here to keep from messing up the indexes
4315 (This entire test is commented out below and will need some heavy revision when we re-add
4316 the ::BEGIN/::END stuff)
4317 static const char* BOGUS_BEGIN_END_RULES[] = {
4336 static const int32_t BOGUS_BEGIN_END_RULES_length = UPRV_LENGTHOF(BOGUS_BEGIN_END_RULES);
4339 static const char* BEGIN_END_TEST_CASES
[] = {
4340 // rules input expected output
4341 BEGIN_END_RULES
[0], "abc ababc aba", "xy zbc z",
4342 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
4343 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
4344 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
4345 BEGIN_END_RULES
[4], "abc ababc aba", "xy abxy z",
4346 BEGIN_END_RULES
[5], "abccabaacababcbc", "PXAARXQBR",
4348 BEGIN_END_RULES
[6], "e e - e---e- e", "e e e-e-e",
4349 BEGIN_END_RULES
[7], "e e - e---e- e", "e e e-e-e",
4350 BEGIN_END_RULES
[8], "e e - e---e- e", "e e e-e-e",
4351 BEGIN_END_RULES
[9], "e e - e---e- e", "e e e-e-e",
4352 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e",
4353 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e",
4354 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e",
4355 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a",
4356 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
4357 BEGIN_END_RULES
[13], "e e - e---e- e", "e e e-e-e",
4358 BEGIN_END_RULES
[13], "a a a a", "a%a%a%a",
4359 BEGIN_END_RULES
[13], "a a-b c b a", "a%a-b cb-a",
4361 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4362 BEGIN_END_RULES
[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4363 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4364 BEGIN_END_RULES
[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4366 static const int32_t BEGIN_END_TEST_CASES_length
= UPRV_LENGTHOF(BEGIN_END_TEST_CASES
);
4368 void TransliteratorTest::TestBeginEnd() {
4369 // run through the list of test cases above
4371 for (i
= 0; i
< BEGIN_END_TEST_CASES_length
; i
+= 3) {
4372 expect((UnicodeString
)"Test case #" + (i
/ 3),
4373 UnicodeString(BEGIN_END_TEST_CASES
[i
], -1, US_INV
),
4374 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 1], -1, US_INV
),
4375 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 2], -1, US_INV
));
4378 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4379 UParseError parseError
;
4380 UErrorCode status
= U_ZERO_ERROR
;
4381 Transliterator
* reversed
= Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES
[17]),
4382 UTRANS_REVERSE
, parseError
, status
);
4383 if (reversed
== 0 || U_FAILURE(status
)) {
4384 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError
, status
);
4386 expect(*reversed
, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4390 // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4391 // that all of them cause errors
4393 (commented out until we have the real ::BEGIN/::END stuff in place
4394 for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4395 UParseError parseError;
4396 UErrorCode status = U_ZERO_ERROR;
4397 Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4398 UTRANS_FORWARD, parseError, status);
4399 if (!U_FAILURE(status)) {
4401 errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4407 void TransliteratorTest::TestBeginEndToRules() {
4408 // run through the same list of test cases we used above, but this time, instead of just
4409 // instantiating a Transliterator from the rules and running the test against it, we instantiate
4410 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4411 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4412 // to (i.e., does the same thing as) the original rule set
4413 for (int32_t i
= 0; i
< BEGIN_END_TEST_CASES_length
; i
+= 3) {
4414 UParseError parseError
;
4415 UErrorCode status
= U_ZERO_ERROR
;
4416 Transliterator
* t
= Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES
[i
], -1, US_INV
),
4417 UTRANS_FORWARD
, parseError
, status
);
4418 if (U_FAILURE(status
)) {
4419 reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError
, status
);
4421 UnicodeString rules
;
4422 t
->toRules(rules
, TRUE
);
4423 Transliterator
* t2
= Transliterator::createFromRules((UnicodeString
)"Test case #" + (i
/ 3), rules
,
4424 UTRANS_FORWARD
, parseError
, status
);
4425 if (U_FAILURE(status
)) {
4426 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4427 parseError
, status
);
4431 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 1], -1, US_INV
),
4432 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 2], -1, US_INV
));
4439 // do the same thing for the reversible test case
4440 UParseError parseError
;
4441 UErrorCode status
= U_ZERO_ERROR
;
4442 Transliterator
* reversed
= Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES
[17]),
4443 UTRANS_REVERSE
, parseError
, status
);
4444 if (U_FAILURE(status
)) {
4445 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError
, status
);
4447 UnicodeString rules
;
4448 reversed
->toRules(rules
, FALSE
);
4449 Transliterator
* reversed2
= Transliterator::createFromRules("Reversed", rules
, UTRANS_FORWARD
,
4450 parseError
, status
);
4451 if (U_FAILURE(status
)) {
4452 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4453 parseError
, status
);
4457 UnicodeString("xy XY XYZ yz YZ"),
4458 UnicodeString("xy abc xaba yz aba"));
4465 void TransliteratorTest::TestRegisterAlias() {
4466 UnicodeString
longID("Lower;[aeiou]Upper");
4467 UnicodeString
shortID("Any-CapVowels");
4468 UnicodeString
reallyShortID("CapVowels");
4470 Transliterator::registerAlias(shortID
, longID
);
4472 UErrorCode err
= U_ZERO_ERROR
;
4473 Transliterator
* t1
= Transliterator::createInstance(longID
, UTRANS_FORWARD
, err
);
4474 if (U_FAILURE(err
)) {
4475 errln("Failed to instantiate transliterator with long ID");
4476 Transliterator::unregister(shortID
);
4479 Transliterator
* t2
= Transliterator::createInstance(reallyShortID
, UTRANS_FORWARD
, err
);
4480 if (U_FAILURE(err
)) {
4481 errln("Failed to instantiate transliterator with short ID");
4483 Transliterator::unregister(shortID
);
4487 if (t1
->getID() != longID
)
4488 errln("Transliterator instantiated with long ID doesn't have long ID");
4489 if (t2
->getID() != reallyShortID
)
4490 errln("Transliterator instantiated with short ID doesn't have short ID");
4492 UnicodeString rules1
;
4493 UnicodeString rules2
;
4495 t1
->toRules(rules1
, TRUE
);
4496 t2
->toRules(rules2
, TRUE
);
4497 if (rules1
!= rules2
)
4498 errln("Alias transliterators aren't the same");
4502 Transliterator::unregister(shortID
);
4504 t1
= Transliterator::createInstance(shortID
, UTRANS_FORWARD
, err
);
4505 if (U_SUCCESS(err
)) {
4506 errln("Instantiation with short ID succeeded after short ID was unregistered");
4510 // try the same thing again, but this time with something other than
4511 // an instance of CompoundTransliterator
4512 UnicodeString
realID("Latin-Greek");
4513 UnicodeString
fakeID("Latin-dlgkjdflkjdl");
4514 Transliterator::registerAlias(fakeID
, realID
);
4517 t1
= Transliterator::createInstance(realID
, UTRANS_FORWARD
, err
);
4518 if (U_FAILURE(err
)) {
4519 dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err
));
4520 Transliterator::unregister(realID
);
4523 t2
= Transliterator::createInstance(fakeID
, UTRANS_FORWARD
, err
);
4524 if (U_FAILURE(err
)) {
4525 errln("Failed to instantiate transliterator with fake ID");
4527 Transliterator::unregister(realID
);
4531 t1
->toRules(rules1
, TRUE
);
4532 t2
->toRules(rules2
, TRUE
);
4533 if (rules1
!= rules2
)
4534 errln("Alias transliterators aren't the same");
4538 Transliterator::unregister(fakeID
);
4541 void TransliteratorTest::TestRuleStripping() {
4544 \uE001>\u0C01; # SIGN
4546 static const UChar rule
[] = {
4547 0x0023,0x0020,0x000D,0x000A,
4548 0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4550 static const UChar expectedRule
[] = {
4551 0xE001,0x003E,0x0C01,0x003B,0
4553 UChar result
[UPRV_LENGTHOF(rule
)];
4554 UErrorCode status
= U_ZERO_ERROR
;
4555 int32_t len
= utrans_stripRules(rule
, UPRV_LENGTHOF(rule
), result
, &status
);
4556 if (len
!= u_strlen(expectedRule
)) {
4557 errln("utrans_stripRules return len = %d", len
);
4559 if (u_strncmp(expectedRule
, result
, len
) != 0) {
4560 errln("utrans_stripRules did not return expected string");
4565 * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4567 void TransliteratorTest::TestHalfwidthFullwidth(void) {
4568 UParseError parseError
;
4569 UErrorCode status
= U_ZERO_ERROR
;
4570 Transliterator
* hf
= Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD
, parseError
, status
);
4571 Transliterator
* fh
= Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD
, parseError
, status
);
4572 if (hf
== 0 || fh
== 0) {
4573 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
4579 // Array of 2n items
4581 // "hf"|"fh"|"both",
4584 const char* DATA
[] = {
4586 "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4587 "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4589 int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
4591 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
4592 UnicodeString h
= CharsToUnicodeString(DATA
[i
+1]);
4593 UnicodeString f
= CharsToUnicodeString(DATA
[i
+2]);
4595 case 0x68: //'h': // Halfwidth-Fullwidth only
4598 case 0x66: //'f': // Fullwidth-Halfwidth only
4601 case 0x62: //'b': // both directions
4613 * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4614 * TODO: confirm that the expected results are correct.
4615 * For now, test just confirms that C++ and Java give identical results.
4617 void TransliteratorTest::TestThai(void) {
4618 #if !UCONFIG_NO_BREAK_ITERATION
4619 UParseError parseError
;
4620 UErrorCode status
= U_ZERO_ERROR
;
4621 Transliterator
* tr
= Transliterator::createInstance("Any-Latin", UTRANS_FORWARD
, parseError
, status
);
4623 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
4626 if (U_FAILURE(status
)) {
4627 errln("FAIL: createInstance failed with %s", u_errorName(status
));
4630 const char *thaiText
=
4631 "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4632 "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4633 "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4634 "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4635 "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4636 "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4637 "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4638 "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4639 "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4640 "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4641 "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4642 "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4643 "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4644 "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4645 "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4646 "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4647 "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4648 "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4649 "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4650 "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4651 "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4652 "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4653 "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4654 "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4655 " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4656 "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4657 "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4658 " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4659 "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4660 "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4662 const char *latinText
=
4663 "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4664 "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4665 "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4666 "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4667 "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4668 " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4669 "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4670 "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4671 "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4672 "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4673 "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4674 "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4675 " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4676 "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4677 " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4678 "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4679 "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4680 "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4683 UnicodeString
xlitText(thaiText
);
4684 xlitText
= xlitText
.unescape();
4685 tr
->transliterate(xlitText
);
4687 UnicodeString
expectedText(latinText
);
4688 expectedText
= expectedText
.unescape();
4689 expect(*tr
, xlitText
, expectedText
);
4696 //======================================================================
4698 //======================================================================
4699 void TransliteratorTest::expectT(const UnicodeString
& id
,
4700 const UnicodeString
& source
,
4701 const UnicodeString
& expectedResult
) {
4702 UErrorCode ec
= U_ZERO_ERROR
;
4704 Transliterator
*t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, pe
, ec
);
4705 if (U_FAILURE(ec
)) {
4706 errln((UnicodeString
)"FAIL: Could not create " + id
+ " - " + u_errorName(ec
));
4710 expect(*t
, source
, expectedResult
);
4714 void TransliteratorTest::reportParseError(const UnicodeString
& message
,
4715 const UParseError
& parseError
,
4716 const UErrorCode
& status
) {
4718 /*", parse error " + parseError.code +*/
4719 ", line " + parseError
.line
+
4720 ", offset " + parseError
.offset
+
4721 ", pre-context " + prettify(parseError
.preContext
, TRUE
) +
4722 ", post-context " + prettify(parseError
.postContext
,TRUE
) +
4723 ", Error: " + u_errorName(status
));
4726 void TransliteratorTest::expect(const UnicodeString
& rules
,
4727 const UnicodeString
& source
,
4728 const UnicodeString
& expectedResult
,
4729 UTransPosition
*pos
) {
4730 expect("<ID>", rules
, source
, expectedResult
, pos
);
4733 void TransliteratorTest::expect(const UnicodeString
& id
,
4734 const UnicodeString
& rules
,
4735 const UnicodeString
& source
,
4736 const UnicodeString
& expectedResult
,
4737 UTransPosition
*pos
) {
4738 UErrorCode status
= U_ZERO_ERROR
;
4739 UParseError parseError
;
4740 Transliterator
* t
= Transliterator::createFromRules(id
, rules
, UTRANS_FORWARD
, parseError
, status
);
4741 if (U_FAILURE(status
)) {
4742 reportParseError(UnicodeString("Couldn't create transliterator from ") + rules
, parseError
, status
);
4744 expect(*t
, source
, expectedResult
, pos
);
4749 void TransliteratorTest::expect(const Transliterator
& t
,
4750 const UnicodeString
& source
,
4751 const UnicodeString
& expectedResult
,
4752 const Transliterator
& reverseTransliterator
) {
4753 expect(t
, source
, expectedResult
);
4754 expect(reverseTransliterator
, expectedResult
, source
);
4757 void TransliteratorTest::expect(const Transliterator
& t
,
4758 const UnicodeString
& source
,
4759 const UnicodeString
& expectedResult
,
4760 UTransPosition
*pos
) {
4762 UnicodeString
result(source
);
4763 t
.transliterate(result
);
4764 expectAux(t
.getID() + ":String", source
, result
, expectedResult
);
4766 UTransPosition index
={0, 0, 0, 0};
4771 UnicodeString
rsource(source
);
4773 t
.transliterate(rsource
);
4775 // Do it all at once -- below we do it incrementally
4776 t
.finishTransliteration(rsource
, *pos
);
4778 expectAux(t
.getID() + ":Replaceable", source
, rsource
, expectedResult
);
4780 // Test keyboard (incremental) transliteration -- this result
4781 // must be the same after we finalize (see below).
4786 formatInput(log
, rsource
, index
);
4788 UErrorCode status
= U_ZERO_ERROR
;
4789 t
.transliterate(rsource
, index
, status
);
4790 formatInput(log
, rsource
, index
);
4792 for (int32_t i
=0; i
<source
.length(); ++i
) {
4796 log
.append(source
.charAt(i
)).append(" -> ");
4797 UErrorCode status
= U_ZERO_ERROR
;
4798 t
.transliterate(rsource
, index
, source
.charAt(i
), status
);
4799 formatInput(log
, rsource
, index
);
4803 // As a final step in keyboard transliteration, we must call
4804 // transliterate to finish off any pending partial matches that
4805 // were waiting for more input.
4806 t
.finishTransliteration(rsource
, index
);
4807 log
.append(" => ").append(rsource
);
4809 expectAux(t
.getID() + ":Keyboard", log
,
4810 rsource
== expectedResult
,
4816 * @param appendTo result is appended to this param.
4817 * @param input the string being transliterated
4818 * @param pos the index struct
4820 UnicodeString
& TransliteratorTest::formatInput(UnicodeString
&appendTo
,
4821 const UnicodeString
& input
,
4822 const UTransPosition
& pos
) {
4823 // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4824 // the {} indicate the context start and limit, and the ||
4825 // indicate the start and limit.
4826 if (0 <= pos
.contextStart
&&
4827 pos
.contextStart
<= pos
.start
&&
4828 pos
.start
<= pos
.limit
&&
4829 pos
.limit
<= pos
.contextLimit
&&
4830 pos
.contextLimit
<= input
.length()) {
4832 UnicodeString a
, b
, c
, d
, e
;
4833 input
.extractBetween(0, pos
.contextStart
, a
);
4834 input
.extractBetween(pos
.contextStart
, pos
.start
, b
);
4835 input
.extractBetween(pos
.start
, pos
.limit
, c
);
4836 input
.extractBetween(pos
.limit
, pos
.contextLimit
, d
);
4837 input
.extractBetween(pos
.contextLimit
, input
.length(), e
);
4838 appendTo
.append(a
).append((UChar
)123/*{*/).append(b
).
4839 append((UChar
)PIPE
).append(c
).append((UChar
)PIPE
).append(d
).
4840 append((UChar
)125/*}*/).append(e
);
4842 appendTo
.append((UnicodeString
)"INVALID UTransPosition {cs=" +
4843 pos
.contextStart
+ ", s=" + pos
.start
+ ", l=" +
4844 pos
.limit
+ ", cl=" + pos
.contextLimit
+ "} on " +
4850 void TransliteratorTest::expectAux(const UnicodeString
& tag
,
4851 const UnicodeString
& source
,
4852 const UnicodeString
& result
,
4853 const UnicodeString
& expectedResult
) {
4854 expectAux(tag
, source
+ " -> " + result
,
4855 result
== expectedResult
,
4859 void TransliteratorTest::expectAux(const UnicodeString
& tag
,
4860 const UnicodeString
& summary
, UBool pass
,
4861 const UnicodeString
& expectedResult
) {
4863 logln(UnicodeString("(")+tag
+") " + prettify(summary
));
4865 dataerrln(UnicodeString("FAIL: (")+tag
+") "
4867 + ", expected " + prettify(expectedResult
));
4871 #endif /* #if !UCONFIG_NO_TRANSLITERATION */