1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (C) 1999-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 * Date Name Description
9 * 11/10/99 aliu Creation.
10 **********************************************************************
13 #include "unicode/utypes.h"
15 #if !UCONFIG_NO_TRANSLITERATION
18 #include "unicode/locid.h"
19 #include "unicode/dtfmtsym.h"
20 #include "unicode/normlzr.h"
21 #include "unicode/translit.h"
22 #include "unicode/uchar.h"
23 #include "unicode/unifilt.h"
24 #include "unicode/uniset.h"
25 #include "unicode/ustring.h"
26 #include "unicode/usetiter.h"
27 #include "unicode/uscript.h"
28 #include "unicode/utf16.h"
47 /***********************************************************************
49 HOW TO USE THIS TEST FILE
51 How I developed on two platforms
52 without losing (too much of) my mind
55 1. Add new tests by copying/pasting/changing existing tests. On Java,
56 any public void method named Test...() taking no parameters becomes
57 a test. On C++, you need to modify the header and add a line to
58 the runIndexedTest() dispatch method.
60 2. Make liberal use of the expect() method; it is your friend.
62 3. The tests in this file exactly match those in a sister file on the
63 other side. The two files are:
65 icu4j: src/com/ibm/test/translit/TransliteratorTest.java
66 icu4c: source/test/intltest/transtst.cpp
68 ==> THIS IS THE IMPORTANT PART <==
70 When you add a test in this file, add it in TransliteratorTest.java
71 too. Give it the same name and put it in the same relative place.
72 This makes maintenance a lot simpler for any poor soul who ends up
73 trying to synchronize the tests between icu4j and icu4c.
75 4. If you MUST enter a test that is NOT paralleled in the sister file,
76 then add it in the special non-mirrored section. These are
85 Make sure you document the reason the test is here and not there.
90 ***********************************************************************/
92 // Define character constants thusly to be EBCDIC-friendly
94 LEFT_BRACE
=((UChar
)0x007B), /*{*/
95 PIPE
=((UChar
)0x007C), /*|*/
96 ZERO
=((UChar
)0x0030), /*0*/
97 UPPER_A
=((UChar
)0x0041) /*A*/
100 TransliteratorTest::TransliteratorTest()
101 : DESERET_DEE((UChar32
)0x10414),
102 DESERET_dee((UChar32
)0x1043C)
106 TransliteratorTest::~TransliteratorTest() {}
109 TransliteratorTest::runIndexedTest(int32_t index
, UBool exec
,
110 const char* &name
, char* /*par*/) {
112 TESTCASE(0,TestInstantiation
);
113 TESTCASE(1,TestSimpleRules
);
114 TESTCASE(2,TestRuleBasedInverse
);
115 TESTCASE(3,TestKeyboard
);
116 TESTCASE(4,TestKeyboard2
);
117 TESTCASE(5,TestKeyboard3
);
118 TESTCASE(6,TestArabic
);
119 TESTCASE(7,TestCompoundKana
);
120 TESTCASE(8,TestCompoundHex
);
121 TESTCASE(9,TestFiltering
);
122 TESTCASE(10,TestInlineSet
);
123 TESTCASE(11,TestPatternQuoting
);
124 TESTCASE(12,TestJ277
);
125 TESTCASE(13,TestJ243
);
126 TESTCASE(14,TestJ329
);
127 TESTCASE(15,TestSegments
);
128 TESTCASE(16,TestCursorOffset
);
129 TESTCASE(17,TestArbitraryVariableValues
);
130 TESTCASE(18,TestPositionHandling
);
131 TESTCASE(19,TestHiraganaKatakana
);
132 TESTCASE(20,TestCopyJ476
);
133 TESTCASE(21,TestAnchors
);
134 TESTCASE(22,TestInterIndic
);
135 TESTCASE(23,TestFilterIDs
);
136 TESTCASE(24,TestCaseMap
);
137 TESTCASE(25,TestNameMap
);
138 TESTCASE(26,TestLiberalizedID
);
139 TESTCASE(27,TestCreateInstance
);
140 TESTCASE(28,TestNormalizationTransliterator
);
141 TESTCASE(29,TestCompoundRBT
);
142 TESTCASE(30,TestCompoundFilter
);
143 TESTCASE(31,TestRemove
);
144 TESTCASE(32,TestToRules
);
145 TESTCASE(33,TestContext
);
146 TESTCASE(34,TestSupplemental
);
147 TESTCASE(35,TestQuantifier
);
148 TESTCASE(36,TestSTV
);
149 TESTCASE(37,TestCompoundInverse
);
150 TESTCASE(38,TestNFDChainRBT
);
151 TESTCASE(39,TestNullInverse
);
152 TESTCASE(40,TestAliasInverseID
);
153 TESTCASE(41,TestCompoundInverseID
);
154 TESTCASE(42,TestUndefinedVariable
);
155 TESTCASE(43,TestEmptyContext
);
156 TESTCASE(44,TestCompoundFilterID
);
157 TESTCASE(45,TestPropertySet
);
158 TESTCASE(46,TestNewEngine
);
159 TESTCASE(47,TestQuantifiedSegment
);
160 TESTCASE(48,TestDevanagariLatinRT
);
161 TESTCASE(49,TestTeluguLatinRT
);
162 TESTCASE(50,TestCompoundLatinRT
);
163 TESTCASE(51,TestSanskritLatinRT
);
164 TESTCASE(52,TestLocaleInstantiation
);
165 TESTCASE(53,TestTitleAccents
);
166 TESTCASE(54,TestLocaleResource
);
167 TESTCASE(55,TestParseError
);
168 TESTCASE(56,TestOutputSet
);
169 TESTCASE(57,TestVariableRange
);
170 TESTCASE(58,TestInvalidPostContext
);
171 TESTCASE(59,TestIDForms
);
172 TESTCASE(60,TestToRulesMark
);
173 TESTCASE(61,TestEscape
);
174 TESTCASE(62,TestAnchorMasking
);
175 TESTCASE(63,TestDisplayName
);
176 TESTCASE(64,TestSpecialCases
);
177 #if !UCONFIG_NO_FILE_IO
178 TESTCASE(65,TestIncrementalProgress
);
180 TESTCASE(66,TestSurrogateCasing
);
181 TESTCASE(67,TestFunction
);
182 TESTCASE(68,TestInvalidBackRef
);
183 TESTCASE(69,TestMulticharStringSet
);
184 TESTCASE(70,TestUserFunction
);
185 TESTCASE(71,TestAnyX
);
186 TESTCASE(72,TestSourceTargetSet
);
187 TESTCASE(73,TestGurmukhiDevanagari
);
188 TESTCASE(74,TestPatternWhiteSpace
);
189 TESTCASE(75,TestAllCodepoints
);
190 TESTCASE(76,TestBoilerplate
);
191 TESTCASE(77,TestAlternateSyntax
);
192 TESTCASE(78,TestBeginEnd
);
193 TESTCASE(79,TestBeginEndToRules
);
194 TESTCASE(80,TestRegisterAlias
);
195 TESTCASE(81,TestRuleStripping
);
196 TESTCASE(82,TestHalfwidthFullwidth
);
197 TESTCASE(83,TestThai
);
198 TESTCASE(84,TestAny
);
199 default: name
= ""; break;
204 * Make sure every system transliterator can be instantiated.
206 * ALSO test that the result of toRules() for each rule is a valid
207 * rule. Do this here so we don't have to have another test that
208 * instantiates everything as well.
210 void TransliteratorTest::TestInstantiation() {
211 UErrorCode ec
= U_ZERO_ERROR
;
212 StringEnumeration
* avail
= Transliterator::getAvailableIDs(ec
);
213 assertSuccess("getAvailableIDs()", ec
);
214 assertTrue("getAvailableIDs()!=NULL", avail
!=NULL
);
215 int32_t n
= Transliterator::countAvailableIDs();
216 assertTrue("getAvailableIDs().count()==countAvailableIDs()",
217 avail
->count(ec
) == n
);
218 assertSuccess("count()", ec
);
220 for (int32_t i
=0; i
<n
; ++i
) {
221 const UnicodeString
& id
= *avail
->snext(ec
);
222 if (!assertSuccess("snext()", ec
) ||
223 !assertTrue("snext()!=NULL", (&id
)!=NULL
, TRUE
)) {
226 UnicodeString id2
= Transliterator::getAvailableID(i
);
227 if (id
.length() < 1) {
228 errln(UnicodeString("FAIL: getAvailableID(") +
229 i
+ ") returned empty string");
233 errln(UnicodeString("FAIL: getAvailableID(") +
234 i
+ ") != getAvailableIDs().snext()");
237 UParseError parseError
;
238 UErrorCode status
= U_ZERO_ERROR
;
239 Transliterator
* t
= Transliterator::createInstance(id
,
240 UTRANS_FORWARD
, parseError
,status
);
242 Transliterator::getDisplayName(id
, name
);
244 #if UCONFIG_NO_BREAK_ITERATION
245 // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
246 if (id
.compare((UnicodeString
)"Thai-Latn") != 0 &&
247 id
.compare((UnicodeString
)"Thai-Latin") != 0)
249 dataerrln(UnicodeString("FAIL: Couldn't create ") + id
+
250 /*", parse error " + parseError.code +*/
251 ", line " + parseError
.line
+
252 ", offset " + parseError
.offset
+
253 ", pre-context " + prettify(parseError
.preContext
, TRUE
) +
254 ", post-context " +prettify(parseError
.postContext
,TRUE
) +
255 ", Error: " + u_errorName(status
));
256 // When createInstance fails, it deletes the failing
257 // entry from the available ID list. We detect this
258 // here by looking for a change in countAvailableIDs.
259 int32_t nn
= Transliterator::countAvailableIDs();
262 --i
; // Compensate for deleted entry
265 logln(UnicodeString("OK: ") + name
+ " (" + id
+ ")");
269 t
->toRules(rules
, TRUE
);
270 Transliterator
*u
= Transliterator::createFromRules("x",
271 rules
, UTRANS_FORWARD
, parseError
,status
);
273 errln(UnicodeString("FAIL: ") + id
+
274 ".createFromRules() => bad rules" +
275 /*", parse error " + parseError.code +*/
276 ", line " + parseError
.line
+
277 ", offset " + parseError
.offset
+
278 ", context " + prettify(parseError
.preContext
, TRUE
) +
279 ", rules: " + prettify(rules
, TRUE
));
286 assertTrue("snext()==NULL", avail
->snext(ec
)==NULL
);
287 assertSuccess("snext()", ec
);
290 // Now test the failure path
291 UParseError parseError
;
292 UErrorCode status
= U_ZERO_ERROR
;
293 UnicodeString
id("<Not a valid Transliterator ID>");
294 Transliterator
* t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, parseError
, status
);
296 errln("FAIL: " + id
+ " returned a transliterator");
299 logln("OK: Bogus ID handled properly");
303 void TransliteratorTest::TestSimpleRules(void) {
304 /* Example: rules 1. ab>x|y
307 * []|eabcd start - no match, copy e to tranlated buffer
308 * [e]|abcd match rule 1 - copy output & adjust cursor
309 * [ex|y]cd match rule 2 - copy output & adjust cursor
310 * [exz]|d no match, copy d to transliterated buffer
313 expect(UnicodeString("ab>x|y;", "") +
317 /* Another set of rules:
329 expect(UnicodeString("ab>x|yzacw;") +
337 UErrorCode status
= U_ZERO_ERROR
;
338 UParseError parseError
;
339 Transliterator
*t
= Transliterator::createFromRules(
341 UnicodeString("$dummy=").append((UChar
)0xE100) +
343 "$vowel=[aeiouAEIOU];"
345 "$vowel } $lu > '!';"
350 UTRANS_FORWARD
, parseError
,
352 if (U_FAILURE(status
)) {
353 dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status
));
356 expect(*t
, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
361 * Test inline set syntax and set variable syntax.
363 void TransliteratorTest::TestInlineSet(void) {
364 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
365 expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
367 expect(UnicodeString(
370 "$alphanumeric = [$digit $alpha];" // ***
371 "$special = [^$alphanumeric];" // ***
372 "$alphanumeric > '-';"
373 "$special > '*';", ""),
375 "thx-1138", "---*----");
379 * Create some inverses and confirm that they work. We have to be
380 * careful how we do this, since the inverses will not be true
381 * inverses -- we can't throw any random string at the composition
382 * of the transliterators and expect the identity function. F x
383 * F' != I. However, if we are careful about the input, we will
384 * get the expected results.
386 void TransliteratorTest::TestRuleBasedInverse(void) {
387 UnicodeString RULES
=
388 UnicodeString("abc>zyx;") +
406 const char* DATA
[] = {
407 // Careful here -- random strings will not work. If we keep
408 // the left side to the domain and the right side to the range
409 // we will be okay though (left, abc; right xyz).
411 "abcacab", "zyxxxyy",
415 int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
417 UErrorCode status
= U_ZERO_ERROR
;
418 UParseError parseError
;
419 Transliterator
*fwd
= Transliterator::createFromRules("<ID>", RULES
,
420 UTRANS_FORWARD
, parseError
, status
);
421 Transliterator
*rev
= Transliterator::createFromRules("<ID>", RULES
,
422 UTRANS_REVERSE
, parseError
, status
);
423 if (U_FAILURE(status
)) {
424 errln("FAIL: RBT constructor failed");
427 for (int32_t i
=0; i
<DATA_length
; i
+=2) {
428 expect(*fwd
, DATA
[i
], DATA
[i
+1]);
429 expect(*rev
, DATA
[i
+1], DATA
[i
]);
436 * Basic test of keyboard.
438 void TransliteratorTest::TestKeyboard(void) {
439 UParseError parseError
;
440 UErrorCode status
= U_ZERO_ERROR
;
441 Transliterator
*t
= Transliterator::createFromRules("<ID>",
442 UnicodeString("psch>Y;")
446 UTRANS_FORWARD
, parseError
,
448 if (U_FAILURE(status
)) {
449 errln("FAIL: RBT constructor failed");
452 const char* DATA
[] = {
460 0, "AycAY", // null means finishKeyboardTransliteration
463 keyboardAux(*t
, DATA
, UPRV_LENGTHOF(DATA
));
468 * Basic test of keyboard with cursor.
470 void TransliteratorTest::TestKeyboard2(void) {
471 UParseError parseError
;
472 UErrorCode status
= U_ZERO_ERROR
;
473 Transliterator
*t
= Transliterator::createFromRules("<ID>",
474 UnicodeString("ych>Y;")
478 UTRANS_FORWARD
, parseError
,
480 if (U_FAILURE(status
)) {
481 errln("FAIL: RBT constructor failed");
484 const char* DATA
[] = {
488 "s", "Aps", // modified for rollback - "Ay",
489 "c", "Apsc", // modified for rollback - "Ayc",
492 "s", "AycAps", // modified for rollback - "AycAy",
493 "c", "AycApsc", // modified for rollback - "AycAyc",
495 0, "AycAY", // null means finishKeyboardTransliteration
498 keyboardAux(*t
, DATA
, UPRV_LENGTHOF(DATA
));
503 * Test keyboard transliteration with back-replacement.
505 void TransliteratorTest::TestKeyboard3(void) {
506 // We want th>z but t>y. Furthermore, during keyboard
507 // transliteration we want t>y then yh>z if t, then h are
509 UnicodeString
RULES("t>|y;"
512 const char* DATA
[] = {
513 // Column 1: characters to add to buffer (as if typed)
514 // Column 2: expected appearance of buffer after
515 // keyboard xliteration.
518 "t", "abt", // modified for rollback - "aby",
520 "t", "abyct", // modified for rollback - "abycy",
522 0, "abycz", // null means finishKeyboardTransliteration
525 UParseError parseError
;
526 UErrorCode status
= U_ZERO_ERROR
;
527 Transliterator
*t
= Transliterator::createFromRules("<ID>", RULES
, UTRANS_FORWARD
, parseError
, status
);
528 if (U_FAILURE(status
)) {
529 errln("FAIL: RBT constructor failed");
532 keyboardAux(*t
, DATA
, UPRV_LENGTHOF(DATA
));
536 void TransliteratorTest::keyboardAux(const Transliterator
& t
,
537 const char* DATA
[], int32_t DATA_length
) {
538 UErrorCode status
= U_ZERO_ERROR
;
539 UTransPosition index
={0, 0, 0, 0};
541 for (int32_t i
=0; i
<DATA_length
; i
+=2) {
547 t
.transliterate(s
, index
, DATA
[i
], status
);
550 t
.finishTransliteration(s
, index
);
552 // Show the start index '{' and the cursor '|'
553 UnicodeString a
, b
, c
;
554 s
.extractBetween(0, index
.contextStart
, a
);
555 s
.extractBetween(index
.contextStart
, index
.start
, b
);
556 s
.extractBetween(index
.start
, s
.length(), c
);
558 append((UChar
)LEFT_BRACE
).
562 if (s
== DATA
[i
+1] && U_SUCCESS(status
)) {
565 errln(UnicodeString("FAIL: ") + log
+ ", expected " + DATA
[i
+1]);
570 void TransliteratorTest::TestArabic(void) {
571 // Test disabled for 2.0 until new Arabic transliterator can be written.
573 // const char* DATA[] = {
574 // "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
575 // "\u0627\u0644\u0644\u063a\u0629\u0020"+
576 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
577 // "\u0628\u0628\u0646\u0638\u0645\u0020"+
578 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
579 // "\u062c\u0645\u064a\u0644\u0629",
583 // UChar ar_raw[] = {
584 // 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
585 // 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
586 // 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
587 // 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
588 // 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
589 // 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
591 // UnicodeString ar(ar_raw);
592 // UErrorCode status=U_ZERO_ERROR;
593 // UParseError parseError;
594 // Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
596 // errln("FAIL: createInstance failed");
599 // expect(*t, "Arabic", ar);
604 * Compose the Kana transliterator forward and reverse and try
605 * some strings that should come out unchanged.
607 void TransliteratorTest::TestCompoundKana(void) {
608 UParseError parseError
;
609 UErrorCode status
= U_ZERO_ERROR
;
610 Transliterator
* t
= Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD
, parseError
, status
);
612 dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status
));
614 expect(*t
, "aaaaa", "aaaaa");
620 * Compose the hex transliterators forward and reverse.
622 void TransliteratorTest::TestCompoundHex(void) {
623 UParseError parseError
;
624 UErrorCode status
= U_ZERO_ERROR
;
625 Transliterator
* a
= Transliterator::createInstance("Any-Hex", UTRANS_FORWARD
, parseError
, status
);
626 Transliterator
* b
= Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, parseError
, status
);
627 Transliterator
* transab
[] = { a
, b
};
628 Transliterator
* transba
[] = { b
, a
};
629 if (a
== 0 || b
== 0) {
630 errln("FAIL: construction failed");
635 // Do some basic tests of a
636 expect(*a
, "01", UnicodeString("\\u0030\\u0031", ""));
637 // Do some basic tests of b
638 expect(*b
, UnicodeString("\\u0030\\u0031", ""), "01");
640 Transliterator
* ab
= new CompoundTransliterator(transab
, 2);
641 UnicodeString
s("abcde", "");
644 UnicodeString
str(s
);
645 a
->transliterate(str
);
646 Transliterator
* ba
= new CompoundTransliterator(transba
, 2);
647 expect(*ba
, str
, str
);
655 int gTestFilterClassID
= 0;
657 * Used by TestFiltering().
659 class TestFilter
: public UnicodeFilter
{
660 virtual UnicodeFunctor
* clone() const {
661 return new TestFilter(*this);
663 virtual UBool
contains(UChar32 c
) const {
664 return c
!= (UChar
)0x0063 /*c*/;
667 virtual UnicodeString
& toPattern(UnicodeString
& result
,
668 UBool
/*escapeUnprintable*/) const {
671 virtual UBool
matchesIndexValue(uint8_t /*v*/) const {
674 virtual void addMatchSetTo(UnicodeSet
& /*toUnionTo*/) const {}
676 UClassID
getDynamicClassID() const { return (UClassID
)&gTestFilterClassID
; }
680 * Do some basic tests of filtering.
682 void TransliteratorTest::TestFiltering(void) {
683 UParseError parseError
;
684 UErrorCode status
= U_ZERO_ERROR
;
685 Transliterator
* hex
= Transliterator::createInstance("Any-Hex", UTRANS_FORWARD
, parseError
, status
);
687 errln("FAIL: createInstance(Any-Hex) failed");
690 hex
->adoptFilter(new TestFilter());
691 UnicodeString
s("abcde");
692 hex
->transliterate(s
);
693 UnicodeString
exp("\\u0061\\u0062c\\u0064\\u0065", "");
695 logln(UnicodeString("Ok: \"") + exp
+ "\"");
697 logln(UnicodeString("FAIL: \"") + s
+ "\", wanted \"" + exp
+ "\"");
700 // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
701 UnicodeFilter
*f
= hex
->orphanFilter();
703 errln("FAIL: orphanFilter() should get a UnicodeFilter");
713 void TransliteratorTest::TestAnchors(void) {
714 expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""),
717 expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
720 expect(UnicodeString("^ab > 01 ;"
728 expect(UnicodeString("$s = [z$] ;"
735 "abzababbabxzabxabx",
740 * Test pattern quoting and escape mechanisms.
742 void TransliteratorTest::TestPatternQuoting(void) {
744 // Each item is <rules>, <input>, <expected output>
745 const UnicodeString DATA
[] = {
746 UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
747 UnicodeString(UChar(0x4E01)),
751 for (int32_t i
=0; i
<3; i
+=3) {
752 logln(UnicodeString("Pattern: ") + prettify(DATA
[i
]));
753 UParseError parseError
;
754 UErrorCode status
= U_ZERO_ERROR
;
755 Transliterator
*t
= Transliterator::createFromRules("<ID>", DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
756 if (U_FAILURE(status
)) {
757 errln("RBT constructor failed");
759 expect(*t
, DATA
[i
+1], DATA
[i
+2]);
766 * Regression test for bugs found in Greek transliteration.
768 void TransliteratorTest::TestJ277(void) {
769 UErrorCode status
= U_ZERO_ERROR
;
770 UParseError parseError
;
771 Transliterator
*gl
= Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD
, parseError
, status
);
773 dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status
));
778 UChar upsilon
= 0x3C5;
780 // UChar PHI = 0x3A6;
782 // UChar omega = 0x3C9;
783 // UChar omicron = 0x3BF;
784 // UChar epsilon = 0x3B5;
786 // sigma upsilon nu -> syn
788 syn
.append(sigma
).append(upsilon
).append(nu
);
789 expect(*gl
, syn
, "syn");
791 // sigma alpha upsilon nu -> saun
793 sayn
.append(sigma
).append(alpha
).append(upsilon
).append(nu
);
794 expect(*gl
, sayn
, "saun");
796 // Again, using a smaller rule set
801 "$ypsilon = \\u03C5;"
802 "$vowel = [aeiouAEIOU$alpha$ypsilon];"
805 "u <> $vowel { $ypsilon;"
809 Transliterator
*mini
= Transliterator::createFromRules("mini", rules
, UTRANS_REVERSE
, parseError
, status
);
810 if (U_FAILURE(status
)) { errln("FAIL: Transliterator constructor failed"); return; }
811 expect(*mini
, syn
, "syn");
812 expect(*mini
, sayn
, "saun");
816 #if !UCONFIG_NO_FORMATTING
817 // Transliterate the Greek locale data
819 DateFormatSymbols
syms(el
, status
);
820 if (U_FAILURE(status
)) { errln("FAIL: Transliterator constructor failed"); return; }
822 const UnicodeString
* data
= syms
.getMonths(count
);
823 for (i
=0; i
<count
; ++i
) {
824 if (data
[i
].length() == 0) {
827 UnicodeString
out(data
[i
]);
828 gl
->transliterate(out
);
830 if (data
[i
].length() >= 2 && out
.length() >= 2 &&
831 u_isupper(data
[i
].charAt(0)) && u_islower(data
[i
].charAt(1))) {
832 if (!(u_isupper(out
.charAt(0)) && u_islower(out
.charAt(1)))) {
837 logln(prettify(data
[i
] + " -> " + out
));
839 errln(UnicodeString("FAIL: ") + prettify(data
[i
] + " -> " + out
));
848 * Prefix, suffix support in hex transliterators
850 void TransliteratorTest::TestJ243(void) {
851 UErrorCode ec
= U_ZERO_ERROR
;
853 // Test default Hex-Any, which should handle
854 // \u, \U, u+, and U+
855 Transliterator
*hex
=
856 Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, ec
);
857 if (assertSuccess("getInstance", ec
)) {
858 expect(*hex
, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
862 // // Try a custom Hex-Unicode
863 // // \uXXXX and &#xXXXX;
864 // ec = U_ZERO_ERROR;
865 // HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
866 // expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x0123", ""),
867 // "abcd5fx0123");
868 // // Try custom Any-Hex (default is tested elsewhere)
869 // ec = U_ZERO_ERROR;
870 // UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
871 // expect(hex3, "012", "012");
875 * Parsers need better syntax error messages.
877 void TransliteratorTest::TestJ329(void) {
879 struct { UBool containsErrors
; const char* rule
; } DATA
[] = {
880 { FALSE
, "a > b; c > d" },
881 { TRUE
, "a > b; no operator; c > d" },
883 int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
885 for (int32_t i
=0; i
<DATA_length
; ++i
) {
886 UErrorCode status
= U_ZERO_ERROR
;
887 UParseError parseError
;
888 Transliterator
*rbt
= Transliterator::createFromRules("<ID>",
893 UBool gotError
= U_FAILURE(status
);
894 UnicodeString
desc(DATA
[i
].rule
);
895 desc
.append(gotError
? " -> error" : " -> no error");
897 desc
= desc
+ ", ParseError code=" + u_errorName(status
) +
898 " line=" + parseError
.line
+
899 " offset=" + parseError
.offset
+
900 " context=" + parseError
.preContext
;
902 if (gotError
== DATA
[i
].containsErrors
) {
903 logln(UnicodeString("Ok: ") + desc
);
905 errln(UnicodeString("FAIL: ") + desc
);
912 * Test segments and segment references.
914 void TransliteratorTest::TestSegments(void) {
916 // Each item is <rules>, <input>, <expected output>
917 UnicodeString DATA
[] = {
918 "([a-z]) '.' ([0-9]) > $2 '-' $1",
923 "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
927 int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
929 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
930 logln("Pattern: " + prettify(DATA
[i
]));
931 UParseError parseError
;
932 UErrorCode status
= U_ZERO_ERROR
;
933 Transliterator
*t
= Transliterator::createFromRules("ID", DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
934 if (U_FAILURE(status
)) {
935 errln("FAIL: RBT constructor");
937 expect(*t
, DATA
[i
+1], DATA
[i
+2]);
944 * Test cursor positioning outside of the key
946 void TransliteratorTest::TestCursorOffset(void) {
948 // Each item is <rules>, <input>, <expected output>
949 UnicodeString DATA
[] = {
950 "pre {alpha} post > | @ ALPHA ;"
952 "pre {beta} post > BETA @@ | ;"
955 "prealphapost prebetapost",
957 "prbetaxyz preBETApost",
959 int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
961 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
962 logln("Pattern: " + prettify(DATA
[i
]));
963 UParseError parseError
;
964 UErrorCode status
= U_ZERO_ERROR
;
965 Transliterator
*t
= Transliterator::createFromRules("<ID>", DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
966 if (U_FAILURE(status
)) {
967 errln("FAIL: RBT constructor");
969 expect(*t
, DATA
[i
+1], DATA
[i
+2]);
976 * Test zero length and > 1 char length variable values. Test
977 * use of variable refs in UnicodeSets.
979 void TransliteratorTest::TestArbitraryVariableValues(void) {
981 // Each item is <rules>, <input>, <expected output>
982 UnicodeString DATA
[] = {
1000 int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
1002 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
1003 logln("Pattern: " + prettify(DATA
[i
]));
1004 UParseError parseError
;
1005 UErrorCode status
= U_ZERO_ERROR
;
1006 Transliterator
*t
= Transliterator::createFromRules("<ID>", DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
1007 if (U_FAILURE(status
)) {
1008 errln("FAIL: RBT constructor");
1010 expect(*t
, DATA
[i
+1], DATA
[i
+2]);
1017 * Confirm that the contextStart, contextLimit, start, and limit
1018 * behave correctly. J474.
1020 void TransliteratorTest::TestPositionHandling(void) {
1021 // Array of 3n items
1022 // Each item is <rules>, <input>, <expected output>
1023 const char* DATA
[] = {
1024 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1025 "xtat txtb", // pos 0,9,0,9
1028 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1029 "xtat txtb", // pos 2,9,3,8
1032 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1033 "xtat txtb", // pos 3,8,3,8
1037 // Array of 4n positions -- these go with the DATA array
1038 // They are: contextStart, contextLimit, start, limit
1045 int32_t n
= UPRV_LENGTHOF(DATA
) / 3;
1046 for (int32_t i
=0; i
<n
; i
++) {
1047 UErrorCode status
= U_ZERO_ERROR
;
1048 UParseError parseError
;
1049 Transliterator
*t
= Transliterator::createFromRules("<ID>",
1050 DATA
[3*i
], UTRANS_FORWARD
, parseError
, status
);
1051 if (U_FAILURE(status
)) {
1053 errln("FAIL: RBT constructor");
1057 pos
.contextStart
= POS
[4*i
];
1058 pos
.contextLimit
= POS
[4*i
+1];
1059 pos
.start
= POS
[4*i
+2];
1060 pos
.limit
= POS
[4*i
+3];
1061 UnicodeString
rsource(DATA
[3*i
+1]);
1062 t
->transliterate(rsource
, pos
, status
);
1063 if (U_FAILURE(status
)) {
1065 errln("FAIL: transliterate");
1068 t
->finishTransliteration(rsource
, pos
);
1069 expectAux(DATA
[3*i
],
1078 * Test the Hiragana-Katakana transliterator.
1080 void TransliteratorTest::TestHiraganaKatakana(void) {
1081 UParseError parseError
;
1082 UErrorCode status
= U_ZERO_ERROR
;
1083 Transliterator
* hk
= Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD
, parseError
, status
);
1084 Transliterator
* kh
= Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD
, parseError
, status
);
1085 if (hk
== 0 || kh
== 0) {
1086 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
1092 // Array of 3n items
1093 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1094 const char* DATA
[] = {
1096 "\\u3042\\u3090\\u3099\\u3092\\u3050",
1097 "\\u30A2\\u30F8\\u30F2\\u30B0",
1100 "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1101 "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1103 int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
1105 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
1106 UnicodeString h
= CharsToUnicodeString(DATA
[i
+1]);
1107 UnicodeString k
= CharsToUnicodeString(DATA
[i
+2]);
1109 case 0x68: //'h': // Hiragana-Katakana
1112 case 0x6B: //'k': // Katakana-Hiragana
1115 case 0x62: //'b': // both
1126 * Test cloning / copy constructor of RBT.
1128 void TransliteratorTest::TestCopyJ476(void) {
1129 // The real test here is what happens when the destructors are
1130 // called. So we let one object get destructed, and check to
1131 // see that its copy still works.
1132 Transliterator
*t2
= 0;
1134 UParseError parseError
;
1135 UErrorCode status
= U_ZERO_ERROR
;
1136 Transliterator
*t1
= Transliterator::createFromRules("t1",
1137 "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD
, parseError
, status
);
1138 if (U_FAILURE(status
)) {
1139 errln("FAIL: RBT constructor");
1142 t2
= t1
->clone(); // Call copy constructor under the covers.
1143 expect(*t1
, "abcfoofoo", "ABcbar");
1146 expect(*t2
, "abcfoofoo", "ABcbar");
1151 * Test inter-Indic transliterators. These are composed.
1152 * ICU4C Jitterbug 483.
1154 void TransliteratorTest::TestInterIndic(void) {
1155 UnicodeString
ID("Devanagari-Gujarati", "");
1156 UErrorCode status
= U_ZERO_ERROR
;
1157 UParseError parseError
;
1158 Transliterator
* dg
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, parseError
, status
);
1160 dataerrln("FAIL: createInstance(" + ID
+ ") returned NULL - " + u_errorName(status
));
1163 UnicodeString id
= dg
->getID();
1165 errln("FAIL: createInstance(" + ID
+ ")->getID() => " + id
);
1167 UnicodeString dev
= CharsToUnicodeString("\\u0901\\u090B\\u0925");
1168 UnicodeString guj
= CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1169 expect(*dg
, dev
, guj
);
1174 * Test filter syntax in IDs. (J918)
1176 void TransliteratorTest::TestFilterIDs(void) {
1177 // Array of 3n strings:
1178 // <id>, <inverse id>, <input>, <expected output>
1179 const char* DATA
[] = {
1180 "[aeiou]Any-Hex", // ID
1181 "[aeiou]Hex-Any", // expected inverse ID
1183 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1185 "[aeiou]Any-Hex;[^5]Hex-Any",
1186 "[^5]Any-Hex;[aeiou]Hex-Any",
1195 enum { DATA_length
= UPRV_LENGTHOF(DATA
) };
1197 for (int i
=0; i
<DATA_length
; i
+=4) {
1198 UnicodeString
ID(DATA
[i
], "");
1199 UnicodeString
uID(DATA
[i
+1], "");
1200 UnicodeString
data2(DATA
[i
+2], "");
1201 UnicodeString
data3(DATA
[i
+3], "");
1202 UParseError parseError
;
1203 UErrorCode status
= U_ZERO_ERROR
;
1204 Transliterator
*t
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, parseError
, status
);
1206 errln("FAIL: createInstance(" + ID
+ ") returned NULL");
1209 expect(*t
, data2
, data3
);
1212 if (ID
!= t
->getID()) {
1213 errln("FAIL: createInstance(" + ID
+ ").getID() => " +
1217 // Check the inverse
1218 Transliterator
*u
= t
->createInverse(status
);
1220 errln("FAIL: " + ID
+ ".createInverse() returned NULL");
1221 } else if (u
->getID() != uID
) {
1222 errln("FAIL: " + ID
+ ".createInverse().getID() => " +
1223 u
->getID() + ", expected " + uID
);
1232 * Test the case mapping transliterators.
1234 void TransliteratorTest::TestCaseMap(void) {
1235 UParseError parseError
;
1236 UErrorCode status
= U_ZERO_ERROR
;
1237 Transliterator
* toUpper
=
1238 Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
);
1239 Transliterator
* toLower
=
1240 Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
);
1241 Transliterator
* toTitle
=
1242 Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
);
1243 if (toUpper
==0 || toLower
==0 || toTitle
==0) {
1244 errln("FAIL: createInstance returned NULL");
1251 expect(*toUpper
, "The quick brown fox jumped over the lazy dogs.",
1252 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1253 expect(*toLower
, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1254 "the quick brown foX jumped over the lazY dogs.");
1255 expect(*toTitle
, "the quick brown foX can't jump over the laZy dogs.",
1256 "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1264 * Test the name mapping transliterators.
1266 void TransliteratorTest::TestNameMap(void) {
1267 UParseError parseError
;
1268 UErrorCode status
= U_ZERO_ERROR
;
1269 Transliterator
* uni2name
=
1270 Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD
, parseError
, status
);
1271 Transliterator
* name2uni
=
1272 Transliterator::createInstance("Name-Any", UTRANS_FORWARD
, parseError
, status
);
1273 if (uni2name
==0 || name2uni
==0) {
1274 errln("FAIL: createInstance returned NULL");
1280 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1281 expect(*uni2name
, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1282 CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1283 expect(*name2uni
, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1284 CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1291 Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD
, parseError
, status
);
1293 errln("FAIL: createInstance returned NULL");
1298 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1299 UnicodeString s
= CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1305 * Test liberalized ID syntax. 1006c
1307 void TransliteratorTest::TestLiberalizedID(void) {
1308 // Some test cases have an expected getID() value of NULL. This
1309 // means I have disabled the test case for now. This stuff is
1310 // still under development, and I haven't decided whether to make
1311 // getID() return canonical case yet. It will all get rewritten
1312 // with the move to Source-Target/Variant IDs anyway. [aliu]
1313 const char* DATA
[] = {
1314 "latin-greek", NULL
/*"Latin-Greek"*/, "case insensitivity",
1315 " Null ", "Null", "whitespace",
1316 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",
1317 " null ; latin-greek ", NULL
/*"Null;Latin-Greek"*/, "compound whitespace",
1319 const int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
1320 UParseError parseError
;
1321 UErrorCode status
= U_ZERO_ERROR
;
1322 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
1323 Transliterator
*t
= Transliterator::createInstance(DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
1325 dataerrln(UnicodeString("FAIL: ") + DATA
[i
+2] +
1326 " cannot create ID \"" + DATA
[i
] + "\" - " + u_errorName(status
));
1330 exp
= UnicodeString(DATA
[i
+1], "");
1332 // Don't worry about getID() if the expected char*
1333 // is NULL -- see above.
1334 if (exp
.length() == 0 || exp
== t
->getID()) {
1335 logln(UnicodeString("Ok: ") + DATA
[i
+2] +
1336 " create ID \"" + DATA
[i
] + "\" => \"" +
1339 errln(UnicodeString("FAIL: ") + DATA
[i
+2] +
1340 " create ID \"" + DATA
[i
] + "\" => \"" +
1341 t
->getID() + "\", exp \"" + exp
+ "\"");
1348 /* test for Jitterbug 912 */
1349 void TransliteratorTest::TestCreateInstance(){
1350 const char* FORWARD
= "F";
1351 const char* REVERSE
= "R";
1352 const char* DATA
[] = {
1354 // Column 2: direction
1355 // Column 3: expected ID, or "" if expect failure
1356 "Latin-Hangul", REVERSE
, "Hangul-Latin", // JB#912
1358 // JB#2689: bad compound causes crash
1359 "InvalidSource-InvalidTarget", FORWARD
, "",
1360 "InvalidSource-InvalidTarget", REVERSE
, "",
1361 "Hex-Any;InvalidSource-InvalidTarget", FORWARD
, "",
1362 "Hex-Any;InvalidSource-InvalidTarget", REVERSE
, "",
1363 "InvalidSource-InvalidTarget;Hex-Any", FORWARD
, "",
1364 "InvalidSource-InvalidTarget;Hex-Any", REVERSE
, "",
1369 for (int32_t i
=0; DATA
[i
]; i
+=3) {
1371 UErrorCode ec
= U_ZERO_ERROR
;
1372 UnicodeString
id(DATA
[i
]);
1373 UTransDirection dir
= (DATA
[i
+1]==FORWARD
)?
1374 UTRANS_FORWARD
:UTRANS_REVERSE
;
1375 UnicodeString
expID(DATA
[i
+2]);
1377 Transliterator::createInstance(id
,dir
,err
,ec
);
1378 UnicodeString newID
;
1382 UBool ok
= (newID
== expID
);
1384 newID
= u_errorName(ec
);
1387 logln((UnicodeString
)"Ok: createInstance(" +
1388 id
+ "," + DATA
[i
+1] + ") => " + newID
);
1390 dataerrln((UnicodeString
)"FAIL: createInstance(" +
1391 id
+ "," + DATA
[i
+1] + ") => " + newID
+
1392 ", expected " + expID
);
1399 * Test the normalization transliterator.
1401 void TransliteratorTest::TestNormalizationTransliterator() {
1402 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1403 // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1404 const char* CANON
[] = {
1405 // Input Decomposed Composed
1406 "cat", "cat", "cat" ,
1407 "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" ,
1409 "\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above
1410 "D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above
1412 "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above
1413 "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below
1414 "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above
1416 "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1417 "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1419 "\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave
1420 "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave
1421 "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron
1423 "\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign
1424 "\\u00c5", "A\\u030a", "\\u00c5" , // A-ring
1426 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0
1427 "\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0
1429 "Henry IV", "Henry IV", "Henry IV" ,
1430 "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" ,
1432 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1433 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1434 "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten
1435 "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten
1436 "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten
1438 "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" ,
1442 const char* COMPAT
[] = {
1443 // Input Decomposed Composed
1444 "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed
1446 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0
1447 "\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i
1449 "Henry IV", "Henry IV", "Henry IV" ,
1450 "Henry \\u2163", "Henry IV", "Henry IV" ,
1452 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1453 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1455 "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten
1460 UParseError parseError
;
1461 UErrorCode status
= U_ZERO_ERROR
;
1462 Transliterator
* NFD
= Transliterator::createInstance("NFD", UTRANS_FORWARD
, parseError
, status
);
1463 Transliterator
* NFC
= Transliterator::createInstance("NFC", UTRANS_FORWARD
, parseError
, status
);
1465 dataerrln("FAIL: createInstance failed: %s", u_errorName(status
));
1470 for (i
=0; CANON
[i
]; i
+=3) {
1471 UnicodeString in
= CharsToUnicodeString(CANON
[i
]);
1472 UnicodeString expd
= CharsToUnicodeString(CANON
[i
+1]);
1473 UnicodeString expc
= CharsToUnicodeString(CANON
[i
+2]);
1474 expect(*NFD
, in
, expd
);
1475 expect(*NFC
, in
, expc
);
1480 Transliterator
* NFKD
= Transliterator::createInstance("NFKD", UTRANS_FORWARD
, parseError
, status
);
1481 Transliterator
* NFKC
= Transliterator::createInstance("NFKC", UTRANS_FORWARD
, parseError
, status
);
1482 if (!NFKD
|| !NFKC
) {
1483 dataerrln("FAIL: createInstance failed");
1488 for (i
=0; COMPAT
[i
]; i
+=3) {
1489 UnicodeString in
= CharsToUnicodeString(COMPAT
[i
]);
1490 UnicodeString expkd
= CharsToUnicodeString(COMPAT
[i
+1]);
1491 UnicodeString expkc
= CharsToUnicodeString(COMPAT
[i
+2]);
1492 expect(*NFKD
, in
, expkd
);
1493 expect(*NFKC
, in
, expkc
);
1499 status
= U_ZERO_ERROR
;
1500 Transliterator
*t
= Transliterator::createInstance("NFD; [x]Remove",
1504 errln("FAIL: createInstance failed");
1506 expect(*t
, CharsToUnicodeString("\\u010dx"),
1507 CharsToUnicodeString("c\\u030C"));
1512 * Test compound RBT rules.
1514 void TransliteratorTest::TestCompoundRBT(void) {
1515 // Careful with spacing and ';' here: Phrase this exactly
1516 // as toRules() is going to return it. If toRules() changes
1517 // with regard to spacing or ';', then adjust this string.
1518 UnicodeString
rule("::Hex-Any;\n"
1522 "::[^t]Any-Upper;", "");
1523 UParseError parseError
;
1524 UErrorCode status
= U_ZERO_ERROR
;
1525 Transliterator
*t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, parseError
, status
);
1527 errln("FAIL: createFromRules failed");
1530 expect(*t
, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1531 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1533 t
->toRules(r
, TRUE
);
1535 logln((UnicodeString
)"OK: toRules() => " + r
);
1537 errln((UnicodeString
)"FAIL: toRules() => " + r
+
1538 ", expected " + rule
);
1543 t
= Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD
, parseError
, status
);
1545 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
1548 UnicodeString
exp("::Greek-Latin;\n::Latin-Cyrillic;");
1549 t
->toRules(r
, TRUE
);
1551 errln((UnicodeString
)"FAIL: toRules() => " + r
+
1552 ", expected " + exp
);
1554 logln((UnicodeString
)"OK: toRules() => " + r
);
1558 // Round trip the result of toRules
1559 t
= Transliterator::createFromRules("Test", r
, UTRANS_FORWARD
, parseError
, status
);
1561 errln("FAIL: createFromRules #2 failed");
1564 logln((UnicodeString
)"OK: createFromRules(" + r
+ ") succeeded");
1567 // Test toRules again
1568 t
->toRules(r
, TRUE
);
1570 errln((UnicodeString
)"FAIL: toRules() => " + r
+
1571 ", expected " + exp
);
1573 logln((UnicodeString
)"OK: toRules() => " + r
);
1578 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform
1579 // to what the regenerated ID will look like.
1580 UnicodeString
id("Upper(Lower);(NFKC)", "");
1581 t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, parseError
, status
);
1583 errln("FAIL: createInstance #2 failed");
1586 if (t
->getID() == id
) {
1587 logln((UnicodeString
)"OK: created " + id
);
1589 errln((UnicodeString
)"FAIL: createInstance(" + id
+
1590 ").getID() => " + t
->getID());
1593 Transliterator
*u
= t
->createInverse(status
);
1595 errln("FAIL: createInverse failed");
1599 exp
= "NFKC();Lower(Upper)";
1600 if (u
->getID() == exp
) {
1601 logln((UnicodeString
)"OK: createInverse(" + id
+ ") => " +
1604 errln((UnicodeString
)"FAIL: createInverse(" + id
+ ") => " +
1612 * Compound filter semantics were orginially not implemented
1613 * correctly. Originally, each component filter f(i) is replaced by
1614 * f'(i) = f(i) && g, where g is the filter for the compound
1619 * Suppose and I have a transliterator X. Internally X is
1620 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1622 * The compound should convert all greek characters (through latin) to
1623 * cyrillic, then lowercase the result. The filter should say "don't
1624 * touch 'A' in the original". But because an intermediate result
1625 * happens to go through "A", the Greek Alpha gets hung up.
1627 void TransliteratorTest::TestCompoundFilter(void) {
1628 UParseError parseError
;
1629 UErrorCode status
= U_ZERO_ERROR
;
1630 Transliterator
*t
= Transliterator::createInstance
1631 ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD
, parseError
, status
);
1633 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
1636 t
->adoptFilter(new UnicodeSet("[^A]", status
));
1637 if (U_FAILURE(status
)) {
1638 errln("FAIL: UnicodeSet ct failed");
1643 // Only the 'A' at index 1 should remain unchanged
1645 CharsToUnicodeString("BA\\u039A\\u0391"),
1646 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1650 void TransliteratorTest::TestRemove(void) {
1651 UParseError parseError
;
1652 UErrorCode status
= U_ZERO_ERROR
;
1653 Transliterator
*t
= Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD
, parseError
, status
);
1655 errln("FAIL: createInstance failed");
1659 expect(*t
, "Able bodied baker's cats", "Ale odied ker's ts");
1661 // extra test for RemoveTransliterator::clone(), which at one point wasn't
1662 // duplicating the filter
1663 Transliterator
* t2
= t
->clone();
1664 expect(*t2
, "Able bodied baker's cats", "Ale odied ker's ts");
1670 void TransliteratorTest::TestToRules(void) {
1671 const char* RBT
= "rbt";
1672 const char* SET
= "set";
1673 static const char* DATA
[] = {
1675 "$a=\\u4E61; [$a] > A;",
1679 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1680 "[[:Zs:][:Zl:]]{a} > A;",
1707 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1708 "[^[:Zs:]]{a} > A;",
1711 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1712 "[[a-z]-[:Zs:]]{a} > A;",
1715 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1716 "[[:Zs:]&[a-z]]{a} > A;",
1719 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1720 "[x[:Zs:]]{a} > A;",
1723 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1724 "$macron = \\u0304 ;"
1725 "$evowel = [aeiouyAEIOUY] ;"
1726 "$iotasub = \\u0345 ;"
1727 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1728 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1731 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1732 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1734 static const int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
1736 for (int32_t d
=0; d
< DATA_length
; d
+=3) {
1737 if (DATA
[d
] == RBT
) {
1738 // Transliterator test
1739 UParseError parseError
;
1740 UErrorCode status
= U_ZERO_ERROR
;
1741 Transliterator
*t
= Transliterator::createFromRules("ID",
1742 UnicodeString(DATA
[d
+1], -1, US_INV
), UTRANS_FORWARD
, parseError
, status
);
1744 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status
));
1747 UnicodeString rules
, escapedRules
;
1748 t
->toRules(rules
, FALSE
);
1749 t
->toRules(escapedRules
, TRUE
);
1750 UnicodeString expRules
= CharsToUnicodeString(DATA
[d
+2]);
1751 UnicodeString
expEscapedRules(DATA
[d
+2], -1, US_INV
);
1752 if (rules
== expRules
) {
1753 logln((UnicodeString
)"Ok: " + UnicodeString(DATA
[d
+1], -1, US_INV
) +
1756 errln((UnicodeString
)"FAIL: " + UnicodeString(DATA
[d
+1], -1, US_INV
) +
1757 " => " + rules
+ ", exp " + expRules
);
1759 if (escapedRules
== expEscapedRules
) {
1760 logln((UnicodeString
)"Ok: " + UnicodeString(DATA
[d
+1], -1, US_INV
) +
1761 " => " + escapedRules
);
1763 errln((UnicodeString
)"FAIL: " + UnicodeString(DATA
[d
+1], -1, US_INV
) +
1764 " => " + escapedRules
+ ", exp " + expEscapedRules
);
1770 UErrorCode status
= U_ZERO_ERROR
;
1771 UnicodeString
pat(DATA
[d
+1], -1, US_INV
);
1772 UnicodeString
expToPat(DATA
[d
+2], -1, US_INV
);
1773 UnicodeSet
set(pat
, status
);
1774 if (U_FAILURE(status
)) {
1775 errln("FAIL: UnicodeSet ct failed");
1778 // Adjust spacing etc. as necessary.
1779 UnicodeString toPat
;
1780 set
.toPattern(toPat
);
1781 if (expToPat
== toPat
) {
1782 logln((UnicodeString
)"Ok: " + pat
+
1785 errln((UnicodeString
)"FAIL: " + pat
+
1786 " => " + prettify(toPat
, TRUE
) +
1787 ", exp " + prettify(pat
, TRUE
));
1793 void TransliteratorTest::TestContext() {
1794 UTransPosition pos
= {0, 2, 0, 1}; // cs cl s l
1795 expect("de > x; {d}e > y;",
1800 expect("ab{c} > z;",
1805 void TransliteratorTest::TestSupplemental() {
1807 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1809 CharsToUnicodeString("ab\\U0001030Fx"),
1810 CharsToUnicodeString("\\U00010300bix"));
1812 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1813 "$b=[A-Z\\U00010400-\\U0001044D];"
1814 "($a)($b) > $2 $1;"),
1815 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1816 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1818 // k|ax\\U00010300xm
1820 // k|a\\U00010400\\U00010300xm
1821 // ky|\\U00010400\\U00010300xm
1822 // ky\\U00010400|\\U00010300xm
1824 // ky\\U00010400|\\U00010300\\U00010400m
1825 // ky\\U00010400y|\\U00010400m
1826 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1827 "$a {x} > | @ \\U00010400;"
1828 "{$a} [^\\u0000-\\uFFFF] > y;"),
1829 CharsToUnicodeString("kax\\U00010300xm"),
1830 CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1833 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1834 UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1836 expectT("Any-Hex/Unicode",
1837 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1838 UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1840 expectT("Any-Hex/C",
1841 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1842 UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1844 expectT("Any-Hex/Perl",
1845 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1846 UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1848 expectT("Any-Hex/Java",
1849 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1850 UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1852 expectT("Any-Hex/XML",
1853 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1854 "𐌰􏼀󠁡 ");
1856 expectT("Any-Hex/XML10",
1857 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1858 "𐌰􏼀󠁡 ");
1860 expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1861 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1862 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1865 void TransliteratorTest::TestQuantifier() {
1867 // Make sure @ in a quantified anteContext works
1868 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1872 // Make sure @ in a quantified postContext works
1873 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1877 // Make sure @ in a quantified postContext with seg ref works
1878 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1882 // Make sure @ past ante context doesn't enter ante context
1883 UTransPosition pos
= {0, 5, 3, 5};
1884 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1889 // Make sure @ past post context doesn't pass limit
1890 UTransPosition pos2
= {0, 4, 0, 2};
1891 expect("{b} a+ > c @@ |; x > y; a > A;",
1896 // Make sure @ past post context doesn't enter post context
1897 expect("{b} a+ > c @@ |; x > y; a > A;",
1901 expect("(ab)? c > d;",
1905 // NOTE: The (ab)+ when referenced just yields a single "ab",
1906 // not the full sequence of them. This accords with perl behavior.
1907 expect("(ab)+ {x} > '(' $1 ')';",
1909 "x ab(ab) abab(ab)y");
1912 "ac abc abbc abbbc",
1915 expect("[abc]+ > x;",
1916 "qac abrc abbcs abtbbc",
1919 expect("q{(ab)+} > x;",
1920 "qa qab qaba qababc qaba",
1921 "qa qx qxa qxc qxa");
1923 expect("q(ab)* > x;",
1924 "qa qab qaba qababc",
1927 // NOTE: The (ab)+ when referenced just yields a single "ab",
1928 // not the full sequence of them. This accords with perl behavior.
1929 expect("q(ab)* > '(' $1 ')';",
1930 "qa qab qaba qababc",
1931 "()a (ab) (ab)a (ab)c");
1933 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1935 expect("'ab'+ > x;",
1939 // $foo+ and $foo* -- the quantifier should apply to the entire
1940 // variable reference
1941 expect("$var = ab; $var+ > x;",
1946 class TestTrans
: public Transliterator
{
1948 TestTrans(const UnicodeString
& id
) : Transliterator(id
, 0) {
1950 virtual Transliterator
* clone(void) const {
1951 return new TestTrans(getID());
1953 virtual void handleTransliterate(Replaceable
& /*text*/, UTransPosition
& offsets
,
1954 UBool
/*isIncremental*/) const
1956 offsets
.start
= offsets
.limit
;
1958 virtual UClassID
getDynamicClassID() const;
1959 static UClassID U_EXPORT2
getStaticClassID();
1961 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans
)
1964 * Test Source-Target/Variant.
1966 void TransliteratorTest::TestSTV(void) {
1967 int32_t ns
= Transliterator::countAvailableSources();
1968 if (ns
< 0 || ns
> 255) {
1969 errln((UnicodeString
)"FAIL: Bad source count: " + ns
);
1973 for (i
=0; i
<ns
; ++i
) {
1974 UnicodeString source
;
1975 Transliterator::getAvailableSource(i
, source
);
1976 logln((UnicodeString
)"" + i
+ ": " + source
);
1977 if (source
.length() == 0) {
1978 errln("FAIL: empty source");
1981 int32_t nt
= Transliterator::countAvailableTargets(source
);
1982 if (nt
< 0 || nt
> 255) {
1983 errln((UnicodeString
)"FAIL: Bad target count: " + nt
);
1986 for (int32_t j
=0; j
<nt
; ++j
) {
1987 UnicodeString target
;
1988 Transliterator::getAvailableTarget(j
, source
, target
);
1989 logln((UnicodeString
)" " + j
+ ": " + target
);
1990 if (target
.length() == 0) {
1991 errln("FAIL: empty target");
1994 int32_t nv
= Transliterator::countAvailableVariants(source
, target
);
1995 if (nv
< 0 || nv
> 255) {
1996 errln((UnicodeString
)"FAIL: Bad variant count: " + nv
);
1999 for (int32_t k
=0; k
<nv
; ++k
) {
2000 UnicodeString variant
;
2001 Transliterator::getAvailableVariant(k
, source
, target
, variant
);
2002 if (variant
.length() == 0) {
2003 logln((UnicodeString
)" " + k
+ ": <empty>");
2005 logln((UnicodeString
)" " + k
+ ": " + variant
);
2011 // Test registration
2012 const char* IDS
[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2013 const char* FULL_IDS
[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2014 const char* SOURCES
[] = { NULL
, "Seoridf", "Oewoir" };
2015 for (i
=0; i
<3; ++i
) {
2016 Transliterator
*t
= new TestTrans(IDS
[i
]);
2018 errln("FAIL: out of memory");
2021 if (t
->getID() != IDS
[i
]) {
2022 errln((UnicodeString
)"FAIL: ID mismatch for " + IDS
[i
]);
2026 Transliterator::registerInstance(t
);
2027 UErrorCode status
= U_ZERO_ERROR
;
2028 t
= Transliterator::createInstance(IDS
[i
], UTRANS_FORWARD
, status
);
2030 errln((UnicodeString
)"FAIL: Registration/creation failed for ID " +
2033 logln((UnicodeString
)"Ok: Registration/creation succeeded for ID " +
2037 Transliterator::unregister(IDS
[i
]);
2038 t
= Transliterator::createInstance(IDS
[i
], UTRANS_FORWARD
, status
);
2040 errln((UnicodeString
)"FAIL: Unregistration failed for ID " +
2046 // Make sure getAvailable API reflects removal
2047 int32_t n
= Transliterator::countAvailableIDs();
2048 for (i
=0; i
<n
; ++i
) {
2049 UnicodeString id
= Transliterator::getAvailableID(i
);
2050 for (j
=0; j
<3; ++j
) {
2051 if (id
.caseCompare(FULL_IDS
[j
],0)==0) {
2052 errln((UnicodeString
)"FAIL: unregister(" + id
+ ") failed");
2056 n
= Transliterator::countAvailableTargets("Any");
2057 for (i
=0; i
<n
; ++i
) {
2059 Transliterator::getAvailableTarget(i
, "Any", t
);
2060 if (t
.caseCompare(IDS
[0],0)==0) {
2061 errln((UnicodeString
)"FAIL: unregister(Any-" + t
+ ") failed");
2064 n
= Transliterator::countAvailableSources();
2065 for (i
=0; i
<n
; ++i
) {
2067 Transliterator::getAvailableSource(i
, s
);
2068 for (j
=0; j
<3; ++j
) {
2069 if (SOURCES
[j
] == NULL
) continue;
2070 if (s
.caseCompare(SOURCES
[j
],0)==0) {
2071 errln((UnicodeString
)"FAIL: unregister(" + s
+ "-*) failed");
2078 * Test inverse of Greek-Latin; Title()
2080 void TransliteratorTest::TestCompoundInverse(void) {
2081 UParseError parseError
;
2082 UErrorCode status
= U_ZERO_ERROR
;
2083 Transliterator
*t
= Transliterator::createInstance
2084 ("Greek-Latin; Title()", UTRANS_REVERSE
,parseError
, status
);
2086 dataerrln("FAIL: createInstance - %s", u_errorName(status
));
2089 UnicodeString
exp("(Title);Latin-Greek");
2090 if (t
->getID() == exp
) {
2091 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2094 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2095 t
->getID() + "\", expected \"" + exp
+ "\"");
2101 * Test NFD chaining with RBT
2103 void TransliteratorTest::TestNFDChainRBT() {
2105 UErrorCode ec
= U_ZERO_ERROR
;
2106 Transliterator
* t
= Transliterator::createFromRules(
2107 "TEST", "::NFD; aa > Q; a > q;",
2108 UTRANS_FORWARD
, pe
, ec
);
2109 if (t
== NULL
|| U_FAILURE(ec
)) {
2110 dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec
));
2113 expect(*t
, "aa", "Q");
2116 // TEMPORARY TESTS -- BEING DEBUGGED
2117 //=- UnicodeString s, s2;
2118 //=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2119 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2120 //=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2121 //=- expect(*t, s, s2);
2124 //=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2125 //=- expect(*t, s2, s);
2128 //=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2129 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2130 //=- expect(*t, s, s);
2133 // const char* source[] = {
2135 // "\\u015Br\\u012Bmad",
2136 // "bhagavadg\\u012Bt\\u0101",
2139 // "vi\\u1E63\\u0101da",
2141 // "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2142 // "uv\\u0101cr\\u0325",
2144 // "rmk\\u1E63\\u0113t",
2145 // //"dharmak\\u1E63\\u0113tr\\u0113",
2147 // "kuruk\\u1E63\\u0113tr\\u0113",
2148 // "samav\\u0113t\\u0101",
2149 // "yuyutsava-\\u1E25",
2150 // "m\\u0101mak\\u0101-\\u1E25",
2151 // // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2153 // "san\\u0304java",
2158 // const char* expected[] = {
2160 // "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2161 // "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2162 // "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2163 // "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2164 // "\\u0935\\u093f\\u0937\\u093e\\u0926",
2165 // "\\u092f\\u094b\\u0917",
2166 // "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2167 // "\\u0909\\u0935\\u093E\\u091A\\u0943",
2170 // //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2172 // "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2173 // "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2174 // "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2175 // "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2176 // // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2177 // "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2178 // "\\u0938\\u0902\\u091c\\u0935",
2182 // UErrorCode status = U_ZERO_ERROR;
2183 // UParseError parseError;
2184 // UnicodeString message;
2185 // Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2186 // Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2187 // if(U_FAILURE(status)){
2188 // errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2189 // errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2190 // delete latinToDevToLatin;
2191 // delete devToLatinToDev;
2194 // UnicodeString gotResult;
2195 // for(int i= 0; source[i] != 0; i++){
2196 // gotResult = source[i];
2197 // expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2198 // expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2200 // delete latinToDevToLatin;
2201 // delete devToLatinToDev;
2205 * Inverse of "Null" should be "Null". (J21)
2207 void TransliteratorTest::TestNullInverse() {
2209 UErrorCode ec
= U_ZERO_ERROR
;
2210 Transliterator
*t
= Transliterator::createInstance("Null", UTRANS_FORWARD
, pe
, ec
);
2211 if (t
== 0 || U_FAILURE(ec
)) {
2212 errln("FAIL: createInstance");
2215 Transliterator
*u
= t
->createInverse(ec
);
2216 if (u
== 0 || U_FAILURE(ec
)) {
2217 errln("FAIL: createInverse");
2221 if (u
->getID() != "Null") {
2222 errln("FAIL: Inverse of Null should be Null");
2229 * Check ID of inverse of alias. (J22)
2231 void TransliteratorTest::TestAliasInverseID() {
2232 UnicodeString
ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2234 UErrorCode ec
= U_ZERO_ERROR
;
2235 Transliterator
*t
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
);
2236 if (t
== 0 || U_FAILURE(ec
)) {
2237 dataerrln("FAIL: createInstance - %s", u_errorName(ec
));
2240 Transliterator
*u
= t
->createInverse(ec
);
2241 if (u
== 0 || U_FAILURE(ec
)) {
2242 errln("FAIL: createInverse");
2246 UnicodeString exp
= "Hangul-Latin";
2247 UnicodeString got
= u
->getID();
2249 errln((UnicodeString
)"FAIL: Inverse of " + ID
+ " is " + got
+
2250 ", expected " + exp
);
2257 * Test IDs of inverses of compound transliterators. (J20)
2259 void TransliteratorTest::TestCompoundInverseID() {
2260 UnicodeString ID
= "Latin-Jamo;NFC(NFD)";
2262 UErrorCode ec
= U_ZERO_ERROR
;
2263 Transliterator
*t
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
);
2264 if (t
== 0 || U_FAILURE(ec
)) {
2265 dataerrln("FAIL: createInstance - %s", u_errorName(ec
));
2268 Transliterator
*u
= t
->createInverse(ec
);
2269 if (u
== 0 || U_FAILURE(ec
)) {
2270 errln("FAIL: createInverse");
2274 UnicodeString exp
= "NFD(NFC);Jamo-Latin";
2275 UnicodeString got
= u
->getID();
2277 errln((UnicodeString
)"FAIL: Inverse of " + ID
+ " is " + got
+
2278 ", expected " + exp
);
2285 * Test undefined variable.
2288 void TransliteratorTest::TestUndefinedVariable() {
2289 UnicodeString rule
= "$initial } a <> \\u1161;";
2291 UErrorCode ec
= U_ZERO_ERROR
;
2292 Transliterator
*t
= Transliterator::createFromRules("<ID>", rule
, UTRANS_FORWARD
, pe
, ec
);
2294 if (U_FAILURE(ec
)) {
2295 logln((UnicodeString
)"OK: Got exception for " + rule
+ ", as expected: " +
2299 errln((UnicodeString
)"Fail: bogus rule " + rule
+ " compiled with error " +
2304 * Test empty context.
2306 void TransliteratorTest::TestEmptyContext() {
2307 expect(" { a } > b;", "xay a ", "xby b ");
2311 * Test compound filter ID syntax
2313 void TransliteratorTest::TestCompoundFilterID(void) {
2314 static const char* DATA
[] = {
2315 // Col. 1 = ID or rule set (latter must start with #)
2317 // = columns > 1 are null if expect col. 1 to be illegal =
2319 // Col. 2 = direction, "F..." or "R..."
2320 // Col. 3 = source string
2321 // Col. 4 = exp result
2323 "[abc]; [abc]", NULL
, NULL
, NULL
, // multiple filters
2324 "Latin-Greek; [abc];", NULL
, NULL
, NULL
, // misplaced filter
2325 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2326 "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2327 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2328 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2332 for (int32_t i
=0; DATA
[i
]; i
+=4) {
2333 UnicodeString id
= CharsToUnicodeString(DATA
[i
]);
2334 UTransDirection direction
= (DATA
[i
+1] != NULL
&& DATA
[i
+1][0] == 'R') ?
2335 UTRANS_REVERSE
: UTRANS_FORWARD
;
2336 UnicodeString source
;
2338 if (DATA
[i
+2] != NULL
) {
2339 source
= CharsToUnicodeString(DATA
[i
+2]);
2340 exp
= CharsToUnicodeString(DATA
[i
+3]);
2342 UBool expOk
= (DATA
[i
+1] != NULL
);
2343 Transliterator
* t
= NULL
;
2345 UErrorCode ec
= U_ZERO_ERROR
;
2346 if (id
.charAt(0) == 0x23/*#*/) {
2347 t
= Transliterator::createFromRules("ID", id
, direction
, pe
, ec
);
2349 t
= Transliterator::createInstance(id
, direction
, pe
, ec
);
2351 UBool ok
= (t
!= NULL
&& U_SUCCESS(ec
));
2352 UnicodeString transID
;
2354 transID
= t
->getID();
2357 transID
= UnicodeString("NULL", "");
2360 logln((UnicodeString
)"Ok: " + id
+ " => " + transID
+ ", " +
2362 if (source
.length() != 0) {
2363 expect(*t
, source
, exp
);
2367 dataerrln((UnicodeString
)"FAIL: " + id
+ " => " + transID
+ ", " +
2374 * Test new property set syntax
2376 void TransliteratorTest::TestPropertySet() {
2377 expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2378 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2379 "[ a stitch ]\n[ in time ]\r[ saves 9]");
2383 * Test various failure points of the new 2.0 engine.
2385 void TransliteratorTest::TestNewEngine() {
2387 UErrorCode ec
= U_ZERO_ERROR
;
2388 Transliterator
*t
= Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD
, pe
, ec
);
2389 if (t
== 0 || U_FAILURE(ec
)) {
2390 dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec
));
2393 // Katakana should be untouched
2394 expect(*t
, CharsToUnicodeString("a\\u3042\\u30A2"),
2395 CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2400 // This test will only work if Transliterator.ROLLBACK is
2401 // true. Otherwise, this test will fail, revealing a
2402 // limitation of global filters in incremental mode.
2404 Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD
, pe
, ec
);
2406 Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD
, pe
, ec
);
2407 if (U_FAILURE(ec
)) {
2413 Transliterator
* array
[3];
2415 array
[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD
, pe
, ec
);
2417 if (U_FAILURE(ec
)) {
2418 errln("FAIL: createInstance NFD");
2425 t
= new CompoundTransliterator(array
, 3, new UnicodeSet("[:Ll:]", ec
));
2426 if (U_FAILURE(ec
)) {
2427 errln("FAIL: UnicodeSet constructor");
2435 expect(*t
, "aAaA", "bAbA");
2437 assertTrue("countElements", t
->countElements() == 3);
2438 assertEquals("getElement(0)", t
->getElement(0, ec
).getID(), "a_to_A");
2439 assertEquals("getElement(1)", t
->getElement(1, ec
).getID(), "NFD");
2440 assertEquals("getElement(2)", t
->getElement(2, ec
).getID(), "A_to_b");
2441 assertSuccess("getElement", ec
);
2449 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2453 UnicodeString gr
= CharsToUnicodeString(
2455 "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2456 "$rough = \\u0314 ;"
2457 "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2461 expect(gr
, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2465 * Test quantified segment behavior. We want:
2466 * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2468 void TransliteratorTest::TestQuantifiedSegment(void) {
2470 expect("([abc]+) > x $1 x;", "cba", "xcbax");
2472 // The tricky case; the quantifier is around the segment
2473 expect("([abc])+ > x $1 x;", "cba", "xax");
2475 // Tricky case in reverse direction
2476 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2478 // Check post-context segment
2479 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2481 // Test toRule/toPattern for non-quantified segment.
2482 // Careful with spacing here.
2483 UnicodeString
r("([a-c]){q} > x $1 x;");
2485 UErrorCode ec
= U_ZERO_ERROR
;
2486 Transliterator
* t
= Transliterator::createFromRules("ID", r
, UTRANS_FORWARD
, pe
, ec
);
2487 if (U_FAILURE(ec
)) {
2488 errln("FAIL: createFromRules");
2493 t
->toRules(rr
, TRUE
);
2495 errln((UnicodeString
)"FAIL: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2497 logln((UnicodeString
)"Ok: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2501 // Test toRule/toPattern for quantified segment.
2502 // Careful with spacing here.
2503 r
= "([a-c])+{q} > x $1 x;";
2504 t
= Transliterator::createFromRules("ID", r
, UTRANS_FORWARD
, pe
, ec
);
2505 if (U_FAILURE(ec
)) {
2506 errln("FAIL: createFromRules");
2510 t
->toRules(rr
, TRUE
);
2512 errln((UnicodeString
)"FAIL: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2514 logln((UnicodeString
)"Ok: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2519 //======================================================================
2521 //======================================================================
2522 void TransliteratorTest::TestDevanagariLatinRT(){
2523 const int MAX_LEN
= 52;
2524 const char* const source
[MAX_LEN
] = {
2539 //"r\\u0323ya", // \u095c is not valid in Devanagari
2565 "\\u1E6Dh\\u1E6Dha",
2572 // Not roundtrippable --
2573 // \\u0939\\u094d\\u094d\\u092E - hma
2574 // \\u0939\\u094d\\u092E - hma
2575 // CharsToUnicodeString("hma"),
2580 "san\\u0304j\\u012Bb s\\u0113nagupta",
2581 "\\u0101nand vaddir\\u0101ju",
2585 const char* const expected
[MAX_LEN
] = {
2586 "\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */
2587 "\\u0915\\u094D\\u0930", /* kra */
2588 "\\u0915\\u094D\\u0937", /* ks\\u0323a */
2589 "\\u0916\\u094D\\u0930", /* khra */
2590 "\\u0917\\u094D\\u0930", /* gra */
2591 "\\u0919\\u094D\\u0930", /* n\\u0307ra */
2592 "\\u091A\\u094D\\u0930", /* cra */
2593 "\\u091B\\u094D\\u0930", /* chra */
2594 "\\u091C\\u094D\\u091E", /* jn\\u0303a */
2595 "\\u091D\\u094D\\u0930", /* jhra */
2596 "\\u091E\\u094D\\u0930", /* n\\u0303ra */
2597 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2598 "\\u0920\\u094D\\u0930", /* t\\u0323hra */
2599 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2600 //"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari
2601 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2602 "\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */
2603 "\\u0923\\u094D\\u0930", /* n\\u0323ra */
2604 "\\u0924\\u094D\\u0924", /* tta */
2605 "\\u0925\\u094D\\u0930", /* thra */
2606 "\\u0926\\u094D\\u0926", /* dda */
2607 "\\u0927\\u094D\\u0930", /* dhra */
2608 "\\u0928\\u094D\\u0928", /* nna */
2609 "\\u092A\\u094D\\u0930", /* pra */
2610 "\\u092B\\u094D\\u0930", /* phra */
2611 "\\u092C\\u094D\\u0930", /* bra */
2612 "\\u092D\\u094D\\u0930", /* bhra */
2613 "\\u092E\\u094D\\u0930", /* mra */
2614 "\\u0929\\u094D\\u0930", /* n\\u0331ra */
2615 //"\\u0934\\u094D\\u0930", /* l\\u0331ra */
2616 "\\u092F\\u094D\\u0930", /* yra */
2617 "\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */
2619 "\\u0935\\u094D\\u0930", /* vra */
2620 "\\u0936\\u094D\\u0930", /* s\\u0301ra */
2621 "\\u0937\\u094D\\u0930", /* s\\u0323ra */
2622 "\\u0938\\u094D\\u0930", /* sra */
2623 "\\u0939\\u094d\\u092E", /* hma */
2624 "\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */
2625 "\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */
2626 "\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/
2627 "\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */
2628 "\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */
2629 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2630 "\\u0920\\u094D\\u092F", /* t\\u0323hya */
2631 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2632 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2634 "\\u0939\\u094D\\u092F", /* hya */
2635 "\\u0936\\u0943", /* s\\u0301r\\u0325a */
2636 "\\u0936\\u094D\\u091A", /* s\\u0301ca */
2637 "\\u090d", /* e\\u0306 */
2638 "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2639 "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2643 UErrorCode status
= U_ZERO_ERROR
;
2644 UParseError parseError
;
2645 UnicodeString message
;
2646 Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2647 Transliterator
* devToLatin
=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
);
2648 if(U_FAILURE(status
)){
2649 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2650 dataerrln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2653 UnicodeString gotResult
;
2654 for(int i
= 0; i
<MAX_LEN
; i
++){
2655 gotResult
= source
[i
];
2656 expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
]));
2657 expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
]));
2663 void TransliteratorTest::TestTeluguLatinRT(){
2664 const int MAX_LEN
=10;
2665 const char* const source
[MAX_LEN
] = {
2666 "raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */
2667 "\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */
2668 "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */
2669 "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */
2670 "san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */
2671 "amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */
2672 "ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */
2673 "\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */
2674 "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */
2675 "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */
2678 const char* const expected
[MAX_LEN
] = {
2679 "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2680 "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2681 "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2682 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2683 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2684 "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2685 "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2686 "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2687 "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2688 "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2691 UErrorCode status
= U_ZERO_ERROR
;
2692 UParseError parseError
;
2693 UnicodeString message
;
2694 Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD
, parseError
, status
);
2695 Transliterator
* devToLatin
=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD
, parseError
, status
);
2696 if(U_FAILURE(status
)){
2697 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2698 dataerrln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2701 UnicodeString gotResult
;
2702 for(int i
= 0; i
<MAX_LEN
; i
++){
2703 gotResult
= source
[i
];
2704 expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
]));
2705 expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
]));
2711 void TransliteratorTest::TestSanskritLatinRT(){
2712 const int MAX_LEN
=16;
2713 const char* const source
[MAX_LEN
] = {
2714 "rmk\\u1E63\\u0113t",
2715 "\\u015Br\\u012Bmad",
2716 "bhagavadg\\u012Bt\\u0101",
2719 "vi\\u1E63\\u0101da",
2721 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2722 "uv\\u0101cr\\u0325",
2723 "dharmak\\u1E63\\u0113tr\\u0113",
2724 "kuruk\\u1E63\\u0113tr\\u0113",
2725 "samav\\u0113t\\u0101",
2727 "m\\u0101mak\\u0101\\u1E25",
2728 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2732 const char* const expected
[MAX_LEN
] = {
2733 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2734 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2735 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2736 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2737 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2738 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2739 "\\u092f\\u094b\\u0917",
2740 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2741 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2742 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2743 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2744 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2745 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2746 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2747 //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2748 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2749 "\\u0938\\u0902\\u091c\\u0935",
2751 UErrorCode status
= U_ZERO_ERROR
;
2752 UParseError parseError
;
2753 UnicodeString message
;
2754 Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2755 Transliterator
* devToLatin
=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
);
2756 if(U_FAILURE(status
)){
2757 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2758 dataerrln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2761 UnicodeString gotResult
;
2762 for(int i
= 0; i
<MAX_LEN
; i
++){
2763 gotResult
= source
[i
];
2764 expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
]));
2765 expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
]));
2772 void TransliteratorTest::TestCompoundLatinRT(){
2773 const char* const source
[] = {
2774 "rmk\\u1E63\\u0113t",
2775 "\\u015Br\\u012Bmad",
2776 "bhagavadg\\u012Bt\\u0101",
2779 "vi\\u1E63\\u0101da",
2781 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2782 "uv\\u0101cr\\u0325",
2783 "dharmak\\u1E63\\u0113tr\\u0113",
2784 "kuruk\\u1E63\\u0113tr\\u0113",
2785 "samav\\u0113t\\u0101",
2787 "m\\u0101mak\\u0101\\u1E25",
2788 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2792 const int MAX_LEN
= UPRV_LENGTHOF(source
);
2793 const char* const expected
[MAX_LEN
] = {
2794 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2795 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2796 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2797 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2798 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2799 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2800 "\\u092f\\u094b\\u0917",
2801 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2802 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2803 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2804 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2805 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2806 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2807 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2808 // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2809 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2810 "\\u0938\\u0902\\u091c\\u0935"
2812 if(MAX_LEN
!= UPRV_LENGTHOF(expected
)) {
2813 errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2817 UErrorCode status
= U_ZERO_ERROR
;
2818 UParseError parseError
;
2819 UnicodeString message
;
2820 Transliterator
* devToLatinToDev
=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2821 Transliterator
* latinToDevToLatin
=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
);
2822 Transliterator
* devToTelToDev
=Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2823 Transliterator
* latinToTelToLatin
=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD
, parseError
, status
);
2825 if(U_FAILURE(status
)){
2826 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2827 dataerrln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2830 UnicodeString gotResult
;
2831 for(int i
= 0; i
<MAX_LEN
; i
++){
2832 gotResult
= source
[i
];
2833 expect(*devToLatinToDev
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(expected
[i
]));
2834 expect(*latinToDevToLatin
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(source
[i
]));
2835 expect(*latinToTelToLatin
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(source
[i
]));
2838 delete(latinToDevToLatin
);
2839 delete(devToLatinToDev
);
2840 delete(devToTelToDev
);
2841 delete(latinToTelToLatin
);
2845 * Test Gurmukhi-Devanagari Tippi and Bindi
2847 void TransliteratorTest::TestGurmukhiDevanagari(){
2849 // (\u0902) (when preceded by vowel) ---> (\u0A02)
2850 // (\u0902) (when preceded by consonant) ---> (\u0A70)
2851 UErrorCode status
= U_ZERO_ERROR
;
2852 UnicodeSet
vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV
).unescape(), status
);
2853 UnicodeSet
non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV
).unescape(), status
);
2854 UParseError parseError
;
2856 UnicodeSetIterator
vIter(vowel
);
2857 UnicodeSetIterator
nvIter(non_vowel
);
2858 Transliterator
* trans
= Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD
, parseError
, status
);
2859 if(U_FAILURE(status
)) {
2860 dataerrln("Error creating transliterator %s", u_errorName(status
));
2864 UnicodeString
src (" \\u0902", -1, US_INV
);
2865 UnicodeString
expected(" \\u0A02", -1, US_INV
);
2866 src
= src
.unescape();
2867 expected
= expected
.unescape();
2869 while(vIter
.next()){
2870 src
.setCharAt(0,(UChar
) vIter
.getCodepoint());
2871 expected
.setCharAt(0,(UChar
) (vIter
.getCodepoint()+0x0100));
2872 expect(*trans
,src
,expected
);
2875 expected
.setCharAt(1,0x0A70);
2876 while(nvIter
.next()){
2877 //src.setCharAt(0,(char) nvIter.codepoint);
2878 src
.setCharAt(0,(UChar
)nvIter
.getCodepoint());
2879 expected
.setCharAt(0,(UChar
) (nvIter
.getCodepoint()+0x0100));
2880 expect(*trans
,src
,expected
);
2885 * Test instantiation from a locale.
2887 void TransliteratorTest::TestLocaleInstantiation(void) {
2889 UErrorCode ec
= U_ZERO_ERROR
;
2890 Transliterator
*t
= Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD
, pe
, ec
);
2891 if (U_FAILURE(ec
)) {
2892 dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec
));
2896 expect(*t
, CharsToUnicodeString("\\u0430"), "a");
2899 t
= Transliterator::createInstance("en-el", UTRANS_FORWARD
, pe
, ec
);
2900 if (U_FAILURE(ec
)) {
2901 errln("FAIL: createInstance(en-el)");
2905 expect(*t
, "a", CharsToUnicodeString("\\u03B1"));
2910 * Test title case handling of accent (should ignore accents)
2912 void TransliteratorTest::TestTitleAccents(void) {
2914 UErrorCode ec
= U_ZERO_ERROR
;
2915 Transliterator
*t
= Transliterator::createInstance("Title", UTRANS_FORWARD
, pe
, ec
);
2916 if (U_FAILURE(ec
)) {
2917 errln("FAIL: createInstance(Title)");
2921 expect(*t
, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2926 * Basic test of a locale resource based rule.
2928 void TransliteratorTest::TestLocaleResource() {
2929 const char* DATA
[] = {
2931 //"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0",
2932 "Latin-el", "b", "\\u03bc\\u03c0",
2933 "Latin-Greek", "b", "\\u03B2",
2934 "Greek-Latin/UNGEGN", "\\u03B2", "v",
2935 "el-Latin", "\\u03B2", "v",
2936 "Greek-Latin", "\\u03B2", "b",
2938 const int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
2939 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
2941 UErrorCode ec
= U_ZERO_ERROR
;
2942 Transliterator
*t
= Transliterator::createInstance(DATA
[i
], UTRANS_FORWARD
, pe
, ec
);
2943 if (U_FAILURE(ec
)) {
2944 dataerrln((UnicodeString
)"FAIL: createInstance(" + DATA
[i
] + ") - " + u_errorName(ec
));
2948 expect(*t
, CharsToUnicodeString(DATA
[i
+1]),
2949 CharsToUnicodeString(DATA
[i
+2]));
2955 * Make sure parse errors reference the right line.
2957 void TransliteratorTest::TestParseError() {
2958 static const char* rule
=
2962 UErrorCode ec
= U_ZERO_ERROR
;
2964 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
2966 if (U_FAILURE(ec
)) {
2967 UnicodeString
err(pe
.preContext
);
2968 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
2969 if (err
.indexOf("d << b") >= 0) {
2970 logln("Ok: " + err
);
2972 errln("FAIL: " + err
);
2976 errln("FAIL: no syntax error");
2978 static const char* maskingRule
=
2983 delete Transliterator::createFromRules("ID", maskingRule
, UTRANS_FORWARD
, pe
, ec
);
2984 if (ec
!= U_RULE_MASK_ERROR
) {
2985 errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec
));
2987 else if (UnicodeString("a > x;") != UnicodeString(pe
.preContext
)) {
2988 errln("FAIL: did not get expected precontext");
2990 else if (UnicodeString("ab > y;") != UnicodeString(pe
.postContext
)) {
2991 errln("FAIL: did not get expected postcontext");
2996 * Make sure sets on output are disallowed.
2998 void TransliteratorTest::TestOutputSet() {
2999 UnicodeString rule
= "$set = [a-cm-n]; b > $set;";
3000 UErrorCode ec
= U_ZERO_ERROR
;
3002 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
3004 if (U_FAILURE(ec
)) {
3005 UnicodeString
err(pe
.preContext
);
3006 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
3007 logln("Ok: " + err
);
3010 errln("FAIL: No syntax error");
3014 * Test the use variable range pragma, making sure that use of
3015 * variable range characters is detected and flagged as an error.
3017 void TransliteratorTest::TestVariableRange() {
3018 UnicodeString rule
= "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3019 UErrorCode ec
= U_ZERO_ERROR
;
3021 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
3023 if (U_FAILURE(ec
)) {
3024 UnicodeString
err(pe
.preContext
);
3025 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
3026 logln("Ok: " + err
);
3029 errln("FAIL: No syntax error");
3033 * Test invalid post context error handling
3035 void TransliteratorTest::TestInvalidPostContext() {
3036 UnicodeString rule
= "a}b{c>d;";
3037 UErrorCode ec
= U_ZERO_ERROR
;
3039 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
3041 if (U_FAILURE(ec
)) {
3042 UnicodeString
err(pe
.preContext
);
3043 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
3044 if (err
.indexOf("a}b{c") >= 0) {
3045 logln("Ok: " + err
);
3047 errln("FAIL: " + err
);
3051 errln("FAIL: No syntax error");
3055 * Test ID form variants
3057 void TransliteratorTest::TestIDForms() {
3058 const char* DATA
[] = {
3060 "nfd", NULL
, "NFC", // make sure case is ignored
3061 "Any-NFKD", NULL
, "Any-NFKC",
3062 "Null", NULL
, "Null",
3063 "-nfkc", "nfkc", "NFKD",
3064 "-nfkc/", "nfkc", "NFKD",
3065 "Latin-Greek/UNGEGN", NULL
, "Greek-Latin/UNGEGN",
3066 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3067 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3068 "Source-", NULL
, NULL
,
3069 "Source/Variant-", NULL
, NULL
,
3070 "Source-/Variant", NULL
, NULL
,
3071 "/Variant", NULL
, NULL
,
3072 "/Variant-", NULL
, NULL
,
3073 "-/Variant", NULL
, NULL
,
3078 const int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
3080 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
3081 const char* ID
= DATA
[i
];
3082 const char* expID
= DATA
[i
+1];
3083 const char* expInvID
= DATA
[i
+2];
3084 UBool expValid
= (expInvID
!= NULL
);
3085 if (expID
== NULL
) {
3089 UErrorCode ec
= U_ZERO_ERROR
;
3091 Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
);
3092 if (U_FAILURE(ec
)) {
3094 logln((UnicodeString
)"Ok: getInstance(" + ID
+") => " + u_errorName(ec
));
3096 dataerrln((UnicodeString
)"FAIL: Couldn't create " + ID
+ " - " + u_errorName(ec
));
3101 Transliterator
*u
= t
->createInverse(ec
);
3102 if (U_FAILURE(ec
)) {
3103 errln((UnicodeString
)"FAIL: Couldn't create inverse of " + ID
);
3108 if (t
->getID() == expID
&&
3109 u
->getID() == expInvID
) {
3110 logln((UnicodeString
)"Ok: " + ID
+ ".getInverse() => " + expInvID
);
3112 errln((UnicodeString
)"FAIL: getInstance(" + ID
+ ") => " +
3113 t
->getID() + " x getInverse() => " + u
->getID() +
3114 ", expected " + expInvID
);
3121 static const UChar SPACE
[] = {32,0};
3122 static const UChar NEWLINE
[] = {10,0};
3123 static const UChar RETURN
[] = {13,0};
3124 static const UChar EMPTY
[] = {0};
3126 void TransliteratorTest::checkRules(const UnicodeString
& label
, Transliterator
& t2
,
3127 const UnicodeString
& testRulesForward
) {
3128 UnicodeString rules2
; t2
.toRules(rules2
, TRUE
);
3129 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3130 rules2
.findAndReplace(SPACE
, EMPTY
);
3131 rules2
.findAndReplace(NEWLINE
, EMPTY
);
3132 rules2
.findAndReplace(RETURN
, EMPTY
);
3134 UnicodeString
testRules(testRulesForward
); testRules
.findAndReplace(SPACE
, EMPTY
);
3136 if (rules2
!= testRules
) {
3138 logln((UnicodeString
)"GENERATED RULES: " + rules2
);
3139 logln((UnicodeString
)"SHOULD BE: " + testRulesForward
);
3144 * Mark's toRules test.
3146 void TransliteratorTest::TestToRulesMark() {
3147 const char* testRules
=
3148 "::[[:Latin:][:Mark:]];"
3151 "a <> \\u03B1;" // alpha
3155 "::([[:Greek:][:Mark:]]);"
3157 const char* testRulesForward
=
3158 "::[[:Latin:][:Mark:]];"
3166 const char* testRulesBackward
=
3167 "::[[:Greek:][:Mark:]];"
3174 UnicodeString source
= CharsToUnicodeString("\\u00E1"); // a-acute
3175 UnicodeString target
= CharsToUnicodeString("\\u03AC"); // alpha-acute
3178 UErrorCode ec
= U_ZERO_ERROR
;
3179 Transliterator
*t2
= Transliterator::createFromRules("source-target", UnicodeString(testRules
, -1, US_INV
), UTRANS_FORWARD
, pe
, ec
);
3180 Transliterator
*t3
= Transliterator::createFromRules("target-source", UnicodeString(testRules
, -1, US_INV
), UTRANS_REVERSE
, pe
, ec
);
3182 if (U_FAILURE(ec
)) {
3185 dataerrln((UnicodeString
)"FAIL: createFromRules => " + u_errorName(ec
));
3189 expect(*t2
, source
, target
);
3190 expect(*t3
, target
, source
);
3192 checkRules("Failed toRules FORWARD", *t2
, UnicodeString(testRulesForward
, -1, US_INV
));
3193 checkRules("Failed toRules BACKWARD", *t3
, UnicodeString(testRulesBackward
, -1, US_INV
));
3200 * Test Escape and Unescape transliterators.
3202 void TransliteratorTest::TestEscape() {
3208 t
= Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, pe
, ec
);
3209 if (U_FAILURE(ec
)) {
3210 errln((UnicodeString
)"FAIL: createInstance");
3213 UNICODE_STRING_SIMPLE("\\x{40}\\U000000312Q"),
3219 t
= Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD
, pe
, ec
);
3220 if (U_FAILURE(ec
)) {
3221 errln((UnicodeString
)"FAIL: createInstance");
3224 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3225 UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3230 t
= Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD
, pe
, ec
);
3231 if (U_FAILURE(ec
)) {
3232 errln((UnicodeString
)"FAIL: createInstance");
3235 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3236 UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3241 t
= Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD
, pe
, ec
);
3242 if (U_FAILURE(ec
)) {
3243 errln((UnicodeString
)"FAIL: createInstance");
3246 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3247 UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3253 void TransliteratorTest::TestAnchorMasking(){
3254 UnicodeString
rule ("^a > Q; a > q;");
3255 UErrorCode status
= U_ZERO_ERROR
;
3256 UParseError parseError
;
3258 Transliterator
* t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
,parseError
,status
);
3259 if(U_FAILURE(status
)){
3260 errln(UnicodeString("FAIL: ") + "ID" +
3261 ".createFromRules() => bad rules" +
3262 /*", parse error " + parseError.code +*/
3263 ", line " + parseError
.line
+
3264 ", offset " + parseError
.offset
+
3265 ", context " + prettify(parseError
.preContext
, TRUE
) +
3266 ", rules: " + prettify(rule
, TRUE
));
3272 * Make sure display names of variants look reasonable.
3274 void TransliteratorTest::TestDisplayName() {
3275 #if UCONFIG_NO_FORMATTING
3276 logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3279 static const char* DATA
[] = {
3280 // ID, forward name, reverse name
3281 // Update the text as necessary -- the important thing is
3282 // not the text itself, but how various cases are handled.
3285 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3288 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3291 "NFC", "Any to NFC", "Any to NFD",
3294 int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
3296 Locale
US("en", "US");
3298 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
3300 Transliterator::getDisplayName(DATA
[i
], US
, name
);
3301 if (name
!= DATA
[i
+1]) {
3302 dataerrln((UnicodeString
)"FAIL: " + DATA
[i
] + ".getDisplayName() => " +
3303 name
+ ", expected " + DATA
[i
+1]);
3305 logln((UnicodeString
)"Ok: " + DATA
[i
] + ".getDisplayName() => " + name
);
3307 UErrorCode ec
= U_ZERO_ERROR
;
3309 Transliterator
*t
= Transliterator::createInstance(DATA
[i
], UTRANS_REVERSE
, pe
, ec
);
3310 if (U_FAILURE(ec
)) {
3312 dataerrln("FAIL: createInstance failed - %s", u_errorName(ec
));
3315 name
= Transliterator::getDisplayName(t
->getID(), US
, name
);
3316 if (name
!= DATA
[i
+2]) {
3317 dataerrln((UnicodeString
)"FAIL: " + t
->getID() + ".getDisplayName() => " +
3318 name
+ ", expected " + DATA
[i
+2]);
3320 logln((UnicodeString
)"Ok: " + t
->getID() + ".getDisplayName() => " + name
);
3327 void TransliteratorTest::TestSpecialCases(void) {
3328 const UnicodeString registerRules
[] = {
3329 "Any-Dev1", "x > X; y > Y;",
3330 "Any-Dev2", "XY > Z",
3332 CharsToUnicodeString
3333 ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3337 const UnicodeString testCases
[] = {
3339 // should add more test cases
3340 "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3341 "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3342 "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3343 "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3346 "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3347 "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3349 // check for devanagari bug
3350 "nfd;Dev1;Dev2;nfc", "xy", "Z",
3352 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3353 "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
,
3354 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE
+ DESERET_dee
,
3356 //TODO: enable this test once Titlecase works right
3358 "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3359 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3361 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
,
3362 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE
+ DESERET_DEE
,
3363 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
,
3364 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee
+ DESERET_dee
,
3366 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
, "",
3367 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
, "",
3370 "Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3371 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3372 "Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3373 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3374 "Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3375 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3376 "Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3377 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3379 // Upper: TAT\\u02B9\\u00C2NA
3380 // Lower: tat\\u02B9\\u00E2na
3381 // Title: Tat\\u02B9\\u00E2na
3382 "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3383 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3384 "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3385 CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3386 "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3387 CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3394 for (i
= 0; registerRules
[i
].length()!=0; i
+=2) {
3395 UErrorCode status
= U_ZERO_ERROR
;
3397 Transliterator
*t
= Transliterator::createFromRules(registerRules
[0+i
],
3398 registerRules
[i
+1], UTRANS_FORWARD
, pos
, status
);
3399 if (U_FAILURE(status
)) {
3400 dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status
));
3402 Transliterator::registerInstance(t
);
3405 for (i
= 0; testCases
[i
].length()!=0; i
+=3) {
3406 UErrorCode ec
= U_ZERO_ERROR
;
3408 const UnicodeString
& name
= testCases
[i
];
3409 Transliterator
*t
= Transliterator::createInstance(name
, UTRANS_FORWARD
, pe
, ec
);
3410 if (U_FAILURE(ec
)) {
3411 dataerrln((UnicodeString
)"FAIL: Couldn't create " + name
+ " - " + u_errorName(ec
));
3415 const UnicodeString
& id
= t
->getID();
3416 const UnicodeString
& source
= testCases
[i
+1];
3417 UnicodeString target
;
3419 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3421 if (testCases
[i
+2].length() > 0) {
3422 target
= testCases
[i
+2];
3423 } else if (0==id
.caseCompare("NFD", U_FOLD_CASE_DEFAULT
)) {
3424 Normalizer::normalize(source
, UNORM_NFD
, 0, target
, ec
);
3425 } else if (0==id
.caseCompare("NFC", U_FOLD_CASE_DEFAULT
)) {
3426 Normalizer::normalize(source
, UNORM_NFC
, 0, target
, ec
);
3427 } else if (0==id
.caseCompare("NFKD", U_FOLD_CASE_DEFAULT
)) {
3428 Normalizer::normalize(source
, UNORM_NFKD
, 0, target
, ec
);
3429 } else if (0==id
.caseCompare("NFKC", U_FOLD_CASE_DEFAULT
)) {
3430 Normalizer::normalize(source
, UNORM_NFKC
, 0, target
, ec
);
3431 } else if (0==id
.caseCompare("Lower", U_FOLD_CASE_DEFAULT
)) {
3433 target
.toLower(Locale::getUS());
3434 } else if (0==id
.caseCompare("Upper", U_FOLD_CASE_DEFAULT
)) {
3436 target
.toUpper(Locale::getUS());
3438 if (U_FAILURE(ec
)) {
3439 errln((UnicodeString
)"FAIL: Internal error normalizing " + source
);
3443 expect(*t
, source
, target
);
3446 for (i
= 0; registerRules
[i
].length()!=0; i
+=2) {
3447 Transliterator::unregister(registerRules
[i
]);
3451 char* Char32ToEscapedChars(UChar32 ch
, char* buffer
) {
3453 sprintf(buffer
, "\\u%04x", (int)ch
);
3455 sprintf(buffer
, "\\U%08x", (int)ch
);
3460 void TransliteratorTest::TestSurrogateCasing (void) {
3461 // check that casing handles surrogates
3462 // titlecase is currently defective
3466 U16_GET(DESERET_dee
,0, 0, DESERET_dee
.length(), dee
);
3467 UnicodeString
DEE(u_totitle(dee
));
3468 if (DEE
!= DESERET_DEE
) {
3469 err("Fails titlecase of surrogates");
3470 err(Char32ToEscapedChars(dee
, buffer
));
3472 errln(Char32ToEscapedChars(DEE
.char32At(0), buffer
));
3475 UnicodeString deeDEETest
=DESERET_dee
+ DESERET_DEE
;
3476 UnicodeString deedeeTest
= DESERET_dee
+ DESERET_dee
;
3477 UnicodeString DEEDEETest
= DESERET_DEE
+ DESERET_DEE
;
3478 UErrorCode status
= U_ZERO_ERROR
;
3480 u_strToUpper(buffer2
, 20, deeDEETest
.getBuffer(), deeDEETest
.length(), NULL
, &status
);
3481 if (U_FAILURE(status
) || (UnicodeString(buffer2
)!= DEEDEETest
)) {
3482 errln("Fails: Can't uppercase surrogates.");
3485 status
= U_ZERO_ERROR
;
3486 u_strToLower(buffer2
, 20, deeDEETest
.getBuffer(), deeDEETest
.length(), NULL
, &status
);
3487 if (U_FAILURE(status
) || (UnicodeString(buffer2
)!= deedeeTest
)) {
3488 errln("Fails: Can't lowercase surrogates.");
3492 static void _trans(Transliterator
& t
, const UnicodeString
& src
,
3493 UnicodeString
& result
) {
3495 t
.transliterate(result
);
3498 static void _trans(const UnicodeString
& id
, const UnicodeString
& src
,
3499 UnicodeString
& result
, UErrorCode ec
) {
3501 Transliterator
*t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, pe
, ec
);
3502 if (U_SUCCESS(ec
)) {
3503 _trans(*t
, src
, result
);
3508 static UnicodeString
_findMatch(const UnicodeString
& source
,
3509 const UnicodeString
* pairs
) {
3510 UnicodeString empty
;
3511 for (int32_t i
=0; pairs
[i
].length() > 0; i
+=2) {
3512 if (0==source
.caseCompare(pairs
[i
], U_FOLD_CASE_DEFAULT
)) {
3519 // Check to see that incremental gets at least part way through a reasonable string.
3521 void TransliteratorTest::TestIncrementalProgress(void) {
3522 UErrorCode ec
= U_ZERO_ERROR
;
3523 UnicodeString latinTest
= "The Quick Brown Fox.";
3524 UnicodeString devaTest
;
3525 _trans("Latin-Devanagari", latinTest
, devaTest
, ec
);
3526 UnicodeString kataTest
;
3527 _trans("Latin-Katakana", latinTest
, kataTest
, ec
);
3528 if (U_FAILURE(ec
)) {
3529 errln("FAIL: Internal error");
3532 const UnicodeString tests
[] = {
3535 "Halfwidth", latinTest
,
3536 "Devanagari", devaTest
,
3537 "Katakana", kataTest
,
3541 UnicodeString
test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3542 int32_t i
= 0, j
=0, k
=0;
3543 int32_t sources
= Transliterator::countAvailableSources();
3544 for (i
= 0; i
< sources
; i
++) {
3545 UnicodeString source
;
3546 Transliterator::getAvailableSource(i
, source
);
3547 UnicodeString test
= _findMatch(source
, tests
);
3548 if (test
.length() == 0) {
3549 logln((UnicodeString
)"Skipping " + source
+ "-X");
3552 int32_t targets
= Transliterator::countAvailableTargets(source
);
3553 for (j
= 0; j
< targets
; j
++) {
3554 UnicodeString target
;
3555 Transliterator::getAvailableTarget(j
, source
, target
);
3556 int32_t variants
= Transliterator::countAvailableVariants(source
, target
);
3557 for (k
=0; k
< variants
; k
++) {
3558 UnicodeString variant
;
3560 UErrorCode status
= U_ZERO_ERROR
;
3562 Transliterator::getAvailableVariant(k
, source
, target
, variant
);
3563 UnicodeString id
= source
+ "-" + target
+ "/" + variant
;
3565 Transliterator
*t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, err
, status
);
3566 if (U_FAILURE(status
)) {
3567 dataerrln((UnicodeString
)"FAIL: Could not create " + id
);
3571 status
= U_ZERO_ERROR
;
3572 CheckIncrementalAux(t
, test
);
3575 _trans(*t
, test
, rev
);
3576 Transliterator
*inv
= t
->createInverse(status
);
3577 if (U_FAILURE(status
)) {
3578 // The following are forward-only, it is OK that creating an inverse will not work:
3579 // 1. Devanagari-Arabic
3581 // 2a. Any-*/BGN_1981
3584 // If UCONFIG_NO_BREAK_ITERATION is on, Latin-Thai is also not expected to work.
3585 if ( id
.compare((UnicodeString
)"Devanagari-Arabic/") != 0
3586 && !(id
.startsWith((UnicodeString
)"Any-") &&
3587 (id
.endsWith((UnicodeString
)"/BGN") || id
.endsWith((UnicodeString
)"/BGN_1981") || id
.endsWith((UnicodeString
)"/UNGEGN") || id
.endsWith((UnicodeString
)"/MNS"))
3589 #if UCONFIG_NO_BREAK_ITERATION
3590 && id
.compare((UnicodeString
)"Latin-Thai/") != 0
3594 errln((UnicodeString
)"FAIL: Could not create inverse of " + id
);
3600 CheckIncrementalAux(inv
, rev
);
3608 void TransliteratorTest::CheckIncrementalAux(const Transliterator
* t
,
3609 const UnicodeString
& input
) {
3610 UErrorCode ec
= U_ZERO_ERROR
;
3612 UnicodeString test
= input
;
3614 pos
.contextStart
= 0;
3615 pos
.contextLimit
= input
.length();
3617 pos
.limit
= input
.length();
3619 t
->transliterate(test
, pos
, ec
);
3620 if (U_FAILURE(ec
)) {
3621 errln((UnicodeString
)"FAIL: transliterate() error " + u_errorName(ec
));
3624 UBool gotError
= FALSE
;
3625 (void)gotError
; // Suppress set but not used warning.
3627 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3629 if (pos
.start
== 0 && pos
.limit
!= 0 && t
->getID() != "Hex-Any/Unicode") {
3630 errln((UnicodeString
)"No Progress, " +
3631 t
->getID() + ": " + formatInput(test
, input
, pos
));
3634 logln((UnicodeString
)"PASS Progress, " +
3635 t
->getID() + ": " + formatInput(test
, input
, pos
));
3637 t
->finishTransliteration(test
, pos
);
3638 if (pos
.start
!= pos
.limit
) {
3639 errln((UnicodeString
)"Incomplete, " +
3640 t
->getID() + ": " + formatInput(test
, input
, pos
));
3645 void TransliteratorTest::TestFunction() {
3646 // Careful with spacing and ';' here: Phrase this exactly
3647 // as toRules() is going to return it. If toRules() changes
3648 // with regard to spacing or ';', then adjust this string.
3649 UnicodeString rule
=
3650 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3653 UErrorCode ec
= U_ZERO_ERROR
;
3654 Transliterator
*t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3656 dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec
));
3661 t
->toRules(r
, TRUE
);
3663 logln((UnicodeString
)"OK: toRules() => " + r
);
3665 errln((UnicodeString
)"FAIL: toRules() => " + r
+
3666 ", expected " + rule
);
3669 expect(*t
, "The Quick Brown Fox",
3670 UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3675 void TransliteratorTest::TestInvalidBackRef(void) {
3676 UnicodeString rule
= ". > $1;";
3677 UnicodeString rule2
=CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3679 UErrorCode ec
= U_ZERO_ERROR
;
3680 Transliterator
*t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3681 Transliterator
*t2
= Transliterator::createFromRules("Test2", rule2
, UTRANS_FORWARD
, pe
, ec
);
3684 errln("FAIL: createFromRules should have returned NULL");
3689 errln("FAIL: createFromRules should have returned NULL");
3693 if (U_SUCCESS(ec
)) {
3694 errln("FAIL: Ok: . > $1; => no error");
3696 logln((UnicodeString
)"Ok: . > $1; => " + u_errorName(ec
));
3700 void TransliteratorTest::TestMulticharStringSet() {
3707 " e } [{fg}] > r;" ;
3710 UErrorCode ec
= U_ZERO_ERROR
;
3711 Transliterator
* t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3712 if (t
== NULL
|| U_FAILURE(ec
)) {
3714 errln("FAIL: createFromRules failed");
3718 expect(*t
, "a aa ab bc d gd de gde gdefg ddefg",
3719 "y x yz z d gd de gdq gdqfg ddrfg");
3722 // Overlapped string test. Make sure that when multiple
3723 // strings can match that the longest one is matched.
3725 " [a {ab} {abc}] > x;"
3728 " q [t {st} {rst}] { e > p;" ;
3730 t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3731 if (t
== NULL
|| U_FAILURE(ec
)) {
3733 errln("FAIL: createFromRules failed");
3737 expect(*t
, "a ab abc qte qste qrste",
3738 "x x x qtp qstp qrstp");
3742 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3743 // BEGIN TestUserFunction support factory
3745 Transliterator
* _TUFF
[4];
3746 UnicodeString
* _TUFID
[4];
3748 static Transliterator
* U_EXPORT2
_TUFFactory(const UnicodeString
& /*ID*/,
3749 Transliterator::Token context
) {
3750 return _TUFF
[context
.integer
]->clone();
3753 static void _TUFReg(const UnicodeString
& ID
, Transliterator
* t
, int32_t n
) {
3755 _TUFID
[n
] = new UnicodeString(ID
);
3756 Transliterator::registerFactory(ID
, _TUFFactory
, Transliterator::integerToken(n
));
3759 static void _TUFUnreg(int32_t n
) {
3760 if (_TUFF
[n
] != NULL
) {
3761 Transliterator::unregister(*_TUFID
[n
]);
3767 // END TestUserFunction support factory
3768 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3771 * Test that user-registered transliterators can be used under function
3774 void TransliteratorTest::TestUserFunction() {
3778 UErrorCode ec
= U_ZERO_ERROR
;
3780 // Setup our factory
3782 for (i
=0; i
<4; ++i
) {
3786 // There's no need to register inverses if we don't use them
3787 t
= Transliterator::createFromRules("gif",
3788 UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3789 UTRANS_FORWARD
, pe
, ec
);
3790 if (t
== NULL
|| U_FAILURE(ec
)) {
3791 dataerrln((UnicodeString
)"FAIL: createFromRules gif " + u_errorName(ec
));
3794 _TUFReg("Any-gif", t
, 0);
3796 t
= Transliterator::createFromRules("RemoveCurly",
3797 UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3798 UTRANS_FORWARD
, pe
, ec
);
3799 if (t
== NULL
|| U_FAILURE(ec
)) {
3800 errln((UnicodeString
)"FAIL: createFromRules RemoveCurly " + u_errorName(ec
));
3803 expect(*t
, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3804 _TUFReg("Any-RemoveCurly", t
, 1);
3806 logln("Trying &hex");
3807 t
= Transliterator::createFromRules("hex2",
3809 UTRANS_FORWARD
, pe
, ec
);
3810 if (t
== NULL
|| U_FAILURE(ec
)) {
3811 errln("FAIL: createFromRules");
3814 logln("Registering");
3815 _TUFReg("Any-hex2", t
, 2);
3816 t
= Transliterator::createInstance("Any-hex2", UTRANS_FORWARD
, ec
);
3817 if (t
== NULL
|| U_FAILURE(ec
)) {
3818 errln((UnicodeString
)"FAIL: createInstance Any-hex2 " + u_errorName(ec
));
3821 expect(*t
, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3824 logln("Trying &gif");
3825 t
= Transliterator::createFromRules("gif2",
3826 "(.) > &Gif(&Hex2($1));",
3827 UTRANS_FORWARD
, pe
, ec
);
3828 if (t
== NULL
|| U_FAILURE(ec
)) {
3829 errln((UnicodeString
)"FAIL: createFromRules gif2 " + u_errorName(ec
));
3832 logln("Registering");
3833 _TUFReg("Any-gif2", t
, 3);
3834 t
= Transliterator::createInstance("Any-gif2", UTRANS_FORWARD
, ec
);
3835 if (t
== NULL
|| U_FAILURE(ec
)) {
3836 errln((UnicodeString
)"FAIL: createInstance Any-gif2 " + u_errorName(ec
));
3839 expect(*t
, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3840 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3843 // Test that filters are allowed after &
3844 t
= Transliterator::createFromRules("test",
3845 "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3846 UTRANS_FORWARD
, pe
, ec
);
3847 if (t
== NULL
|| U_FAILURE(ec
)) {
3848 errln((UnicodeString
)"FAIL: createFromRules test " + u_errorName(ec
));
3852 UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3856 for (i
=0; i
<4; ++i
) {
3862 * Test the Any-X transliterators.
3864 void TransliteratorTest::TestAnyX(void) {
3865 UParseError parseError
;
3866 UErrorCode status
= U_ZERO_ERROR
;
3867 Transliterator
* anyLatin
=
3868 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD
, parseError
, status
);
3870 dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status
));
3876 CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3877 CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3881 status
= U_ZERO_ERROR
;
3882 Transliterator
* anyASCII
=
3883 Transliterator::createInstance("Any-Latin;Latin-ASCII", UTRANS_FORWARD
, parseError
, status
);
3884 if (U_FAILURE(status
) || anyASCII
==0) {
3885 dataerrln("FAIL: createInstance returned NULL and/or set status %s", u_errorName(status
));
3891 CharsToUnicodeString("ArabicDigits:\\u0660\\u0661\\u0664\\u0669 PersianDigits:\\u06F0\\u06F1\\u06F4\\u06F9"),
3892 CharsToUnicodeString("ArabicDigits:0149 PersianDigits:0149"));
3898 * Test Any-X transliterators with sample letters from all scripts.
3900 void TransliteratorTest::TestAny(void) {
3901 UErrorCode status
= U_ZERO_ERROR
;
3902 // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3903 // function call parameters going on in this test.
3904 UnicodeSet
alphabetic("[:alphabetic:]", status
);
3905 if (U_FAILURE(status
)) {
3906 dataerrln("Failure: file %s, line %d, status = %s", __FILE__
, __LINE__
, u_errorName(status
));
3909 alphabetic
.freeze();
3911 UnicodeString testString
;
3912 for (int32_t i
= 0; i
< USCRIPT_CODE_LIMIT
; i
++) {
3913 const char *scriptName
= uscript_getShortName((UScriptCode
)i
);
3914 if (scriptName
== NULL
) {
3915 errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__
, __LINE__
, i
);
3920 sample
.applyPropertyAlias("script", scriptName
, status
);
3921 if (U_FAILURE(status
)) {
3922 errln("Failure: file %s, line %d, status = %s", __FILE__
, __LINE__
, u_errorName(status
));
3925 sample
.retainAll(alphabetic
);
3926 for (int32_t count
=0; count
<5; count
++) {
3927 UChar32 c
= sample
.charAt(count
);
3931 testString
.append(c
);
3935 UParseError parseError
;
3936 Transliterator
* anyLatin
=
3937 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD
, parseError
, status
);
3938 if (U_FAILURE(status
)) {
3939 dataerrln("Failure: file %s, line %d, status = %s", __FILE__
, __LINE__
, u_errorName(status
));
3943 logln(UnicodeString("Sample set for Any-Latin: ") + testString
);
3944 anyLatin
->transliterate(testString
);
3945 logln(UnicodeString("Sample result for Any-Latin: ") + testString
);
3951 * Test the source and target set API. These are only implemented
3952 * for RBT and CompoundTransliterator at this time.
3954 void TransliteratorTest::TestSourceTargetSet() {
3955 UErrorCode ec
= U_ZERO_ERROR
;
3963 UnicodeSet
expSrc("[arx{lu}]", ec
);
3966 UnicodeSet
expTrg("[bq]", ec
);
3969 Transliterator
* t
= Transliterator::createFromRules("test", r
, UTRANS_FORWARD
, pe
, ec
);
3971 if (U_FAILURE(ec
)) {
3973 errln("FAIL: Couldn't set up test");
3977 UnicodeSet src
; t
->getSourceSet(src
);
3978 UnicodeSet trg
; t
->getTargetSet(trg
);
3980 if (src
== expSrc
&& trg
== expTrg
) {
3982 logln((UnicodeString
)"Ok: " +
3983 r
+ " => source = " + src
.toPattern(a
, TRUE
) +
3984 ", target = " + trg
.toPattern(b
, TRUE
));
3986 UnicodeString a
, b
, c
, d
;
3987 errln((UnicodeString
)"FAIL: " +
3988 r
+ " => source = " + src
.toPattern(a
, TRUE
) +
3989 ", expected " + expSrc
.toPattern(b
, TRUE
) +
3990 "; target = " + trg
.toPattern(c
, TRUE
) +
3991 ", expected " + expTrg
.toPattern(d
, TRUE
));
3998 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
4000 void TransliteratorTest::TestPatternWhiteSpace() {
4002 const char* r
= "a > \\u200E b;";
4004 UErrorCode ec
= U_ZERO_ERROR
;
4006 Transliterator
* t
= Transliterator::createFromRules("test", CharsToUnicodeString(r
), UTRANS_FORWARD
, pe
, ec
);
4008 if (U_FAILURE(ec
)) {
4009 errln("FAIL: Couldn't set up test");
4011 expect(*t
, "a", "b");
4017 UnicodeSet
set(CharsToUnicodeString("[a \\u200E]"), ec
);
4019 if (U_FAILURE(ec
)) {
4020 errln("FAIL: Couldn't set up test");
4022 if (set
.contains(0x200E)) {
4023 errln("FAIL: U+200E not being ignored by UnicodeSet");
4027 //======================================================================
4028 // this method is in TestUScript.java
4029 //======================================================================
4030 void TransliteratorTest::TestAllCodepoints(){
4031 UScriptCode code
= USCRIPT_INVALID_CODE
;
4032 char id
[256]={'\0'};
4033 char abbr
[256]={'\0'};
4034 char newId
[256]={'\0'};
4035 char newAbbrId
[256]={'\0'};
4036 char oldId
[256]={'\0'};
4037 char oldAbbrId
[256]={'\0'};
4039 UErrorCode status
=U_ZERO_ERROR
;
4042 for(uint32_t i
= 0; i
<=0x10ffff; i
++){
4043 code
= uscript_getScript(i
,&status
);
4044 if(code
== USCRIPT_INVALID_CODE
){
4045 dataerrln("uscript_getScript for codepoint \\U%08X failed.", i
);
4047 const char* myId
= uscript_getName(code
);
4049 dataerrln("Valid script code returned NULL name. Check your data!");
4052 uprv_strcpy(id
,myId
);
4053 uprv_strcpy(abbr
,uscript_getShortName(code
));
4055 uprv_strcpy(newId
,"[:");
4056 uprv_strcat(newId
,id
);
4057 uprv_strcat(newId
,":];NFD");
4059 uprv_strcpy(newAbbrId
,"[:");
4060 uprv_strcat(newAbbrId
,abbr
);
4061 uprv_strcat(newAbbrId
,":];NFD");
4063 if(uprv_strcmp(newId
,oldId
)!=0){
4064 Transliterator
* t
= Transliterator::createInstance(newId
,UTRANS_FORWARD
,pe
,status
);
4065 if(t
==NULL
|| U_FAILURE(status
)){
4066 dataerrln((UnicodeString
)"FAIL: Could not create " + id
+ " - " + u_errorName(status
));
4070 if(uprv_strcmp(newAbbrId
,oldAbbrId
)!=0){
4071 Transliterator
* t
= Transliterator::createInstance(newAbbrId
,UTRANS_FORWARD
,pe
,status
);
4072 if(t
==NULL
|| U_FAILURE(status
)){
4073 dataerrln((UnicodeString
)"FAIL: Could not create " + id
+ " - " + u_errorName(status
));
4077 uprv_strcpy(oldId
,newId
);
4078 uprv_strcpy(oldAbbrId
, newAbbrId
);
4084 #define TEST_TRANSLIT_ID(id, cls) { \
4085 UErrorCode ec = U_ZERO_ERROR; \
4086 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4087 if (U_FAILURE(ec)) { \
4088 dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
4090 if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4091 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4093 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4098 #define TEST_TRANSLIT_RULE(rule, cls) { \
4099 UErrorCode ec = U_ZERO_ERROR; \
4101 Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4102 if (U_FAILURE(ec)) { \
4103 errln("FAIL: Couldn't create " rule); \
4105 if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4106 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4108 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4113 void TransliteratorTest::TestBoilerplate() {
4114 TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator
);
4115 TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator
);
4116 TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator
);
4117 TEST_TRANSLIT_ID("Lower", LowercaseTransliterator
);
4118 TEST_TRANSLIT_ID("Upper", UppercaseTransliterator
);
4119 TEST_TRANSLIT_ID("Title", TitlecaseTransliterator
);
4120 TEST_TRANSLIT_ID("Null", NullTransliterator
);
4121 TEST_TRANSLIT_ID("Remove", RemoveTransliterator
);
4122 TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator
);
4123 TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator
);
4124 TEST_TRANSLIT_ID("NFD", NormalizationTransliterator
);
4125 TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator
);
4126 TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator
);
4129 void TransliteratorTest::TestAlternateSyntax() {
4134 expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4137 expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4138 CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4139 UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4142 static const char* BEGIN_END_RULES
[] = {
4156 "", // test case commented out below, this is here to keep from messing up the indexes
4165 "", // test case commented out below, this is here to keep from messing up the indexes
4174 "", // test case commented out below, this is here to keep from messing up the indexes
4193 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4194 "$delim = [\\-$ws];"
4195 "$ws $delim* > ' ';"
4196 "'-' $delim* > '-';",
4200 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4201 "$delim = [\\-$ws];"
4202 "$ws $delim* > ' ';"
4203 "'-' $delim* > '-';",
4206 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4207 "$delim = [\\-$ws];"
4208 "$ws $delim* > ' ';"
4209 "'-' $delim* > '-';"
4213 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4214 "$delim = [\\-$ws];"
4216 "$ws $delim* > ' ';"
4217 "'-' $delim* > '-';",
4222 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4223 "$delim = [\\-$ws];"
4225 "$ws $delim* > ' ';"
4226 "'-' $delim* > '-';",
4228 "", // test case commented out below, this is here to keep from messing up the indexes
4232 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4233 "$delim = [\\-$ws];"
4235 "$ws $delim* > ' ';"
4236 "'-' $delim* > '-';"
4239 "", // test case commented out below, this is here to keep from messing up the indexes
4243 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4244 "$delim = [\\-$ws];"
4247 "$ws $delim* > ' ';"
4248 "'-' $delim* > '-';"
4251 "$ab { ' ' } $ab > '-';"
4258 "", // test case commented out below, this is here to keep from messing up the indexes
4261 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4262 "$delim = [\\-$ws];"
4265 "$ws $delim* > ' ';"
4266 "'-' $delim* > '-';"
4268 "$ab { ' ' } $ab > '-';"
4284 "", // test case commented out below, this is here to keep from messing up the indexes
4305 "", // test case commented out below, this is here to keep from messing up the indexes
4317 (This entire test is commented out below and will need some heavy revision when we re-add
4318 the ::BEGIN/::END stuff)
4319 static const char* BOGUS_BEGIN_END_RULES[] = {
4338 static const int32_t BOGUS_BEGIN_END_RULES_length = UPRV_LENGTHOF(BOGUS_BEGIN_END_RULES);
4341 static const char* BEGIN_END_TEST_CASES
[] = {
4342 // rules input expected output
4343 BEGIN_END_RULES
[0], "abc ababc aba", "xy zbc z",
4344 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
4345 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
4346 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
4347 BEGIN_END_RULES
[4], "abc ababc aba", "xy abxy z",
4348 BEGIN_END_RULES
[5], "abccabaacababcbc", "PXAARXQBR",
4350 BEGIN_END_RULES
[6], "e e - e---e- e", "e e e-e-e",
4351 BEGIN_END_RULES
[7], "e e - e---e- e", "e e e-e-e",
4352 BEGIN_END_RULES
[8], "e e - e---e- e", "e e e-e-e",
4353 BEGIN_END_RULES
[9], "e e - e---e- e", "e e e-e-e",
4354 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e",
4355 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e",
4356 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e",
4357 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a",
4358 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
4359 BEGIN_END_RULES
[13], "e e - e---e- e", "e e e-e-e",
4360 BEGIN_END_RULES
[13], "a a a a", "a%a%a%a",
4361 BEGIN_END_RULES
[13], "a a-b c b a", "a%a-b cb-a",
4363 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4364 BEGIN_END_RULES
[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4365 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4366 BEGIN_END_RULES
[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4368 static const int32_t BEGIN_END_TEST_CASES_length
= UPRV_LENGTHOF(BEGIN_END_TEST_CASES
);
4370 void TransliteratorTest::TestBeginEnd() {
4371 // run through the list of test cases above
4373 for (i
= 0; i
< BEGIN_END_TEST_CASES_length
; i
+= 3) {
4374 expect((UnicodeString
)"Test case #" + (i
/ 3),
4375 UnicodeString(BEGIN_END_TEST_CASES
[i
], -1, US_INV
),
4376 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 1], -1, US_INV
),
4377 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 2], -1, US_INV
));
4380 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4381 UParseError parseError
;
4382 UErrorCode status
= U_ZERO_ERROR
;
4383 Transliterator
* reversed
= Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES
[17]),
4384 UTRANS_REVERSE
, parseError
, status
);
4385 if (reversed
== 0 || U_FAILURE(status
)) {
4386 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError
, status
);
4388 expect(*reversed
, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4392 // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4393 // that all of them cause errors
4395 (commented out until we have the real ::BEGIN/::END stuff in place
4396 for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4397 UParseError parseError;
4398 UErrorCode status = U_ZERO_ERROR;
4399 Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4400 UTRANS_FORWARD, parseError, status);
4401 if (!U_FAILURE(status)) {
4403 errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4409 void TransliteratorTest::TestBeginEndToRules() {
4410 // run through the same list of test cases we used above, but this time, instead of just
4411 // instantiating a Transliterator from the rules and running the test against it, we instantiate
4412 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4413 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4414 // to (i.e., does the same thing as) the original rule set
4415 for (int32_t i
= 0; i
< BEGIN_END_TEST_CASES_length
; i
+= 3) {
4416 UParseError parseError
;
4417 UErrorCode status
= U_ZERO_ERROR
;
4418 Transliterator
* t
= Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES
[i
], -1, US_INV
),
4419 UTRANS_FORWARD
, parseError
, status
);
4420 if (U_FAILURE(status
)) {
4421 reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError
, status
);
4423 UnicodeString rules
;
4424 t
->toRules(rules
, TRUE
);
4425 Transliterator
* t2
= Transliterator::createFromRules((UnicodeString
)"Test case #" + (i
/ 3), rules
,
4426 UTRANS_FORWARD
, parseError
, status
);
4427 if (U_FAILURE(status
)) {
4428 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4429 parseError
, status
);
4433 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 1], -1, US_INV
),
4434 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 2], -1, US_INV
));
4441 // do the same thing for the reversible test case
4442 UParseError parseError
;
4443 UErrorCode status
= U_ZERO_ERROR
;
4444 Transliterator
* reversed
= Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES
[17]),
4445 UTRANS_REVERSE
, parseError
, status
);
4446 if (U_FAILURE(status
)) {
4447 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError
, status
);
4449 UnicodeString rules
;
4450 reversed
->toRules(rules
, FALSE
);
4451 Transliterator
* reversed2
= Transliterator::createFromRules("Reversed", rules
, UTRANS_FORWARD
,
4452 parseError
, status
);
4453 if (U_FAILURE(status
)) {
4454 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4455 parseError
, status
);
4459 UnicodeString("xy XY XYZ yz YZ"),
4460 UnicodeString("xy abc xaba yz aba"));
4467 void TransliteratorTest::TestRegisterAlias() {
4468 UnicodeString
longID("Lower;[aeiou]Upper");
4469 UnicodeString
shortID("Any-CapVowels");
4470 UnicodeString
reallyShortID("CapVowels");
4472 Transliterator::registerAlias(shortID
, longID
);
4474 UErrorCode err
= U_ZERO_ERROR
;
4475 Transliterator
* t1
= Transliterator::createInstance(longID
, UTRANS_FORWARD
, err
);
4476 if (U_FAILURE(err
)) {
4477 errln("Failed to instantiate transliterator with long ID");
4478 Transliterator::unregister(shortID
);
4481 Transliterator
* t2
= Transliterator::createInstance(reallyShortID
, UTRANS_FORWARD
, err
);
4482 if (U_FAILURE(err
)) {
4483 errln("Failed to instantiate transliterator with short ID");
4485 Transliterator::unregister(shortID
);
4489 if (t1
->getID() != longID
)
4490 errln("Transliterator instantiated with long ID doesn't have long ID");
4491 if (t2
->getID() != reallyShortID
)
4492 errln("Transliterator instantiated with short ID doesn't have short ID");
4494 UnicodeString rules1
;
4495 UnicodeString rules2
;
4497 t1
->toRules(rules1
, TRUE
);
4498 t2
->toRules(rules2
, TRUE
);
4499 if (rules1
!= rules2
)
4500 errln("Alias transliterators aren't the same");
4504 Transliterator::unregister(shortID
);
4506 t1
= Transliterator::createInstance(shortID
, UTRANS_FORWARD
, err
);
4507 if (U_SUCCESS(err
)) {
4508 errln("Instantiation with short ID succeeded after short ID was unregistered");
4512 // try the same thing again, but this time with something other than
4513 // an instance of CompoundTransliterator
4514 UnicodeString
realID("Latin-Greek");
4515 UnicodeString
fakeID("Latin-dlgkjdflkjdl");
4516 Transliterator::registerAlias(fakeID
, realID
);
4519 t1
= Transliterator::createInstance(realID
, UTRANS_FORWARD
, err
);
4520 if (U_FAILURE(err
)) {
4521 dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err
));
4522 Transliterator::unregister(realID
);
4525 t2
= Transliterator::createInstance(fakeID
, UTRANS_FORWARD
, err
);
4526 if (U_FAILURE(err
)) {
4527 errln("Failed to instantiate transliterator with fake ID");
4529 Transliterator::unregister(realID
);
4533 t1
->toRules(rules1
, TRUE
);
4534 t2
->toRules(rules2
, TRUE
);
4535 if (rules1
!= rules2
)
4536 errln("Alias transliterators aren't the same");
4540 Transliterator::unregister(fakeID
);
4543 void TransliteratorTest::TestRuleStripping() {
4546 \uE001>\u0C01; # SIGN
4548 static const UChar rule
[] = {
4549 0x0023,0x0020,0x000D,0x000A,
4550 0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4552 static const UChar expectedRule
[] = {
4553 0xE001,0x003E,0x0C01,0x003B,0
4555 UChar result
[UPRV_LENGTHOF(rule
)];
4556 UErrorCode status
= U_ZERO_ERROR
;
4557 int32_t len
= utrans_stripRules(rule
, UPRV_LENGTHOF(rule
), result
, &status
);
4558 if (len
!= u_strlen(expectedRule
)) {
4559 errln("utrans_stripRules return len = %d", len
);
4561 if (u_strncmp(expectedRule
, result
, len
) != 0) {
4562 errln("utrans_stripRules did not return expected string");
4567 * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4569 void TransliteratorTest::TestHalfwidthFullwidth(void) {
4570 UParseError parseError
;
4571 UErrorCode status
= U_ZERO_ERROR
;
4572 Transliterator
* hf
= Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD
, parseError
, status
);
4573 Transliterator
* fh
= Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD
, parseError
, status
);
4574 if (hf
== 0 || fh
== 0) {
4575 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
4581 // Array of 2n items
4583 // "hf"|"fh"|"both",
4586 const char* DATA
[] = {
4588 "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4589 "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4591 int32_t DATA_length
= UPRV_LENGTHOF(DATA
);
4593 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
4594 UnicodeString h
= CharsToUnicodeString(DATA
[i
+1]);
4595 UnicodeString f
= CharsToUnicodeString(DATA
[i
+2]);
4597 case 0x68: //'h': // Halfwidth-Fullwidth only
4600 case 0x66: //'f': // Fullwidth-Halfwidth only
4603 case 0x62: //'b': // both directions
4615 * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4616 * TODO: confirm that the expected results are correct.
4617 * For now, test just confirms that C++ and Java give identical results.
4619 void TransliteratorTest::TestThai(void) {
4620 #if !UCONFIG_NO_BREAK_ITERATION
4621 UParseError parseError
;
4622 UErrorCode status
= U_ZERO_ERROR
;
4623 Transliterator
* tr
= Transliterator::createInstance("Any-Latin", UTRANS_FORWARD
, parseError
, status
);
4625 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
4628 if (U_FAILURE(status
)) {
4629 errln("FAIL: createInstance failed with %s", u_errorName(status
));
4632 const char *thaiText
=
4633 "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4634 "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4635 "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4636 "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4637 "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4638 "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4639 "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4640 "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4641 "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4642 "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4643 "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4644 "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4645 "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4646 "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4647 "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4648 "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4649 "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4650 "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4651 "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4652 "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4653 "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4654 "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4655 "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4656 "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4657 " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4658 "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4659 "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4660 " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4661 "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4662 "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4664 const char *latinText
=
4665 "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4666 "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4667 "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4668 "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4669 "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4670 " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4671 "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4672 "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4673 "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4674 "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4675 "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4676 "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4677 " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4678 "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4679 " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4680 "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4681 "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4682 "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4685 UnicodeString
xlitText(thaiText
);
4686 xlitText
= xlitText
.unescape();
4687 tr
->transliterate(xlitText
);
4689 UnicodeString
expectedText(latinText
);
4690 expectedText
= expectedText
.unescape();
4691 expect(*tr
, xlitText
, expectedText
);
4698 //======================================================================
4700 //======================================================================
4701 void TransliteratorTest::expectT(const UnicodeString
& id
,
4702 const UnicodeString
& source
,
4703 const UnicodeString
& expectedResult
) {
4704 UErrorCode ec
= U_ZERO_ERROR
;
4706 Transliterator
*t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, pe
, ec
);
4707 if (U_FAILURE(ec
)) {
4708 errln((UnicodeString
)"FAIL: Could not create " + id
+ " - " + u_errorName(ec
));
4712 expect(*t
, source
, expectedResult
);
4716 void TransliteratorTest::reportParseError(const UnicodeString
& message
,
4717 const UParseError
& parseError
,
4718 const UErrorCode
& status
) {
4720 /*", parse error " + parseError.code +*/
4721 ", line " + parseError
.line
+
4722 ", offset " + parseError
.offset
+
4723 ", pre-context " + prettify(parseError
.preContext
, TRUE
) +
4724 ", post-context " + prettify(parseError
.postContext
,TRUE
) +
4725 ", Error: " + u_errorName(status
));
4728 void TransliteratorTest::expect(const UnicodeString
& rules
,
4729 const UnicodeString
& source
,
4730 const UnicodeString
& expectedResult
,
4731 UTransPosition
*pos
) {
4732 expect("<ID>", rules
, source
, expectedResult
, pos
);
4735 void TransliteratorTest::expect(const UnicodeString
& id
,
4736 const UnicodeString
& rules
,
4737 const UnicodeString
& source
,
4738 const UnicodeString
& expectedResult
,
4739 UTransPosition
*pos
) {
4740 UErrorCode status
= U_ZERO_ERROR
;
4741 UParseError parseError
;
4742 Transliterator
* t
= Transliterator::createFromRules(id
, rules
, UTRANS_FORWARD
, parseError
, status
);
4743 if (U_FAILURE(status
)) {
4744 reportParseError(UnicodeString("Couldn't create transliterator from ") + rules
, parseError
, status
);
4746 expect(*t
, source
, expectedResult
, pos
);
4751 void TransliteratorTest::expect(const Transliterator
& t
,
4752 const UnicodeString
& source
,
4753 const UnicodeString
& expectedResult
,
4754 const Transliterator
& reverseTransliterator
) {
4755 expect(t
, source
, expectedResult
);
4756 expect(reverseTransliterator
, expectedResult
, source
);
4759 void TransliteratorTest::expect(const Transliterator
& t
,
4760 const UnicodeString
& source
,
4761 const UnicodeString
& expectedResult
,
4762 UTransPosition
*pos
) {
4764 UnicodeString
result(source
);
4765 t
.transliterate(result
);
4766 expectAux(t
.getID() + ":String", source
, result
, expectedResult
);
4768 UTransPosition index
={0, 0, 0, 0};
4773 UnicodeString
rsource(source
);
4775 t
.transliterate(rsource
);
4777 // Do it all at once -- below we do it incrementally
4778 t
.finishTransliteration(rsource
, *pos
);
4780 expectAux(t
.getID() + ":Replaceable", source
, rsource
, expectedResult
);
4782 // Test keyboard (incremental) transliteration -- this result
4783 // must be the same after we finalize (see below).
4788 formatInput(log
, rsource
, index
);
4790 UErrorCode status
= U_ZERO_ERROR
;
4791 t
.transliterate(rsource
, index
, status
);
4792 formatInput(log
, rsource
, index
);
4794 for (int32_t i
=0; i
<source
.length(); ++i
) {
4798 log
.append(source
.charAt(i
)).append(" -> ");
4799 UErrorCode status
= U_ZERO_ERROR
;
4800 t
.transliterate(rsource
, index
, source
.charAt(i
), status
);
4801 formatInput(log
, rsource
, index
);
4805 // As a final step in keyboard transliteration, we must call
4806 // transliterate to finish off any pending partial matches that
4807 // were waiting for more input.
4808 t
.finishTransliteration(rsource
, index
);
4809 log
.append(" => ").append(rsource
);
4811 expectAux(t
.getID() + ":Keyboard", log
,
4812 rsource
== expectedResult
,
4818 * @param appendTo result is appended to this param.
4819 * @param input the string being transliterated
4820 * @param pos the index struct
4822 UnicodeString
& TransliteratorTest::formatInput(UnicodeString
&appendTo
,
4823 const UnicodeString
& input
,
4824 const UTransPosition
& pos
) {
4825 // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4826 // the {} indicate the context start and limit, and the ||
4827 // indicate the start and limit.
4828 if (0 <= pos
.contextStart
&&
4829 pos
.contextStart
<= pos
.start
&&
4830 pos
.start
<= pos
.limit
&&
4831 pos
.limit
<= pos
.contextLimit
&&
4832 pos
.contextLimit
<= input
.length()) {
4834 UnicodeString a
, b
, c
, d
, e
;
4835 input
.extractBetween(0, pos
.contextStart
, a
);
4836 input
.extractBetween(pos
.contextStart
, pos
.start
, b
);
4837 input
.extractBetween(pos
.start
, pos
.limit
, c
);
4838 input
.extractBetween(pos
.limit
, pos
.contextLimit
, d
);
4839 input
.extractBetween(pos
.contextLimit
, input
.length(), e
);
4840 appendTo
.append(a
).append((UChar
)123/*{*/).append(b
).
4841 append((UChar
)PIPE
).append(c
).append((UChar
)PIPE
).append(d
).
4842 append((UChar
)125/*}*/).append(e
);
4844 appendTo
.append((UnicodeString
)"INVALID UTransPosition {cs=" +
4845 pos
.contextStart
+ ", s=" + pos
.start
+ ", l=" +
4846 pos
.limit
+ ", cl=" + pos
.contextLimit
+ "} on " +
4852 void TransliteratorTest::expectAux(const UnicodeString
& tag
,
4853 const UnicodeString
& source
,
4854 const UnicodeString
& result
,
4855 const UnicodeString
& expectedResult
) {
4856 expectAux(tag
, source
+ " -> " + result
,
4857 result
== expectedResult
,
4861 void TransliteratorTest::expectAux(const UnicodeString
& tag
,
4862 const UnicodeString
& summary
, UBool pass
,
4863 const UnicodeString
& expectedResult
) {
4865 logln(UnicodeString("(")+tag
+") " + prettify(summary
));
4867 dataerrln(UnicodeString("FAIL: (")+tag
+") "
4869 + ", expected " + prettify(expectedResult
));
4873 #endif /* #if !UCONFIG_NO_TRANSLITERATION */