2 **********************************************************************
3 * Copyright (C) 1999-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 11/10/99 aliu Creation.
8 **********************************************************************
11 #include "unicode/utypes.h"
13 #if !UCONFIG_NO_TRANSLITERATION
16 #include "unicode/locid.h"
17 #include "unicode/dtfmtsym.h"
18 #include "unicode/normlzr.h"
19 #include "unicode/translit.h"
20 #include "unicode/uchar.h"
21 #include "unicode/unifilt.h"
22 #include "unicode/uniset.h"
23 #include "unicode/ustring.h"
24 #include "unicode/usetiter.h"
25 #include "unicode/uscript.h"
26 #include "unicode/utf16.h"
45 /***********************************************************************
47 HOW TO USE THIS TEST FILE
49 How I developed on two platforms
50 without losing (too much of) my mind
53 1. Add new tests by copying/pasting/changing existing tests. On Java,
54 any public void method named Test...() taking no parameters becomes
55 a test. On C++, you need to modify the header and add a line to
56 the runIndexedTest() dispatch method.
58 2. Make liberal use of the expect() method; it is your friend.
60 3. The tests in this file exactly match those in a sister file on the
61 other side. The two files are:
63 icu4j: src/com/ibm/test/translit/TransliteratorTest.java
64 icu4c: source/test/intltest/transtst.cpp
66 ==> THIS IS THE IMPORTANT PART <==
68 When you add a test in this file, add it in TransliteratorTest.java
69 too. Give it the same name and put it in the same relative place.
70 This makes maintenance a lot simpler for any poor soul who ends up
71 trying to synchronize the tests between icu4j and icu4c.
73 4. If you MUST enter a test that is NOT paralleled in the sister file,
74 then add it in the special non-mirrored section. These are
83 Make sure you document the reason the test is here and not there.
88 ***********************************************************************/
90 // Define character constants thusly to be EBCDIC-friendly
92 LEFT_BRACE
=((UChar
)0x007B), /*{*/
93 PIPE
=((UChar
)0x007C), /*|*/
94 ZERO
=((UChar
)0x0030), /*0*/
95 UPPER_A
=((UChar
)0x0041) /*A*/
98 TransliteratorTest::TransliteratorTest()
99 : DESERET_DEE((UChar32
)0x10414),
100 DESERET_dee((UChar32
)0x1043C)
104 TransliteratorTest::~TransliteratorTest() {}
107 TransliteratorTest::runIndexedTest(int32_t index
, UBool exec
,
108 const char* &name
, char* /*par*/) {
110 TESTCASE(0,TestInstantiation
);
111 TESTCASE(1,TestSimpleRules
);
112 TESTCASE(2,TestRuleBasedInverse
);
113 TESTCASE(3,TestKeyboard
);
114 TESTCASE(4,TestKeyboard2
);
115 TESTCASE(5,TestKeyboard3
);
116 TESTCASE(6,TestArabic
);
117 TESTCASE(7,TestCompoundKana
);
118 TESTCASE(8,TestCompoundHex
);
119 TESTCASE(9,TestFiltering
);
120 TESTCASE(10,TestInlineSet
);
121 TESTCASE(11,TestPatternQuoting
);
122 TESTCASE(12,TestJ277
);
123 TESTCASE(13,TestJ243
);
124 TESTCASE(14,TestJ329
);
125 TESTCASE(15,TestSegments
);
126 TESTCASE(16,TestCursorOffset
);
127 TESTCASE(17,TestArbitraryVariableValues
);
128 TESTCASE(18,TestPositionHandling
);
129 TESTCASE(19,TestHiraganaKatakana
);
130 TESTCASE(20,TestCopyJ476
);
131 TESTCASE(21,TestAnchors
);
132 TESTCASE(22,TestInterIndic
);
133 TESTCASE(23,TestFilterIDs
);
134 TESTCASE(24,TestCaseMap
);
135 TESTCASE(25,TestNameMap
);
136 TESTCASE(26,TestLiberalizedID
);
137 TESTCASE(27,TestCreateInstance
);
138 TESTCASE(28,TestNormalizationTransliterator
);
139 TESTCASE(29,TestCompoundRBT
);
140 TESTCASE(30,TestCompoundFilter
);
141 TESTCASE(31,TestRemove
);
142 TESTCASE(32,TestToRules
);
143 TESTCASE(33,TestContext
);
144 TESTCASE(34,TestSupplemental
);
145 TESTCASE(35,TestQuantifier
);
146 TESTCASE(36,TestSTV
);
147 TESTCASE(37,TestCompoundInverse
);
148 TESTCASE(38,TestNFDChainRBT
);
149 TESTCASE(39,TestNullInverse
);
150 TESTCASE(40,TestAliasInverseID
);
151 TESTCASE(41,TestCompoundInverseID
);
152 TESTCASE(42,TestUndefinedVariable
);
153 TESTCASE(43,TestEmptyContext
);
154 TESTCASE(44,TestCompoundFilterID
);
155 TESTCASE(45,TestPropertySet
);
156 TESTCASE(46,TestNewEngine
);
157 TESTCASE(47,TestQuantifiedSegment
);
158 TESTCASE(48,TestDevanagariLatinRT
);
159 TESTCASE(49,TestTeluguLatinRT
);
160 TESTCASE(50,TestCompoundLatinRT
);
161 TESTCASE(51,TestSanskritLatinRT
);
162 TESTCASE(52,TestLocaleInstantiation
);
163 TESTCASE(53,TestTitleAccents
);
164 TESTCASE(54,TestLocaleResource
);
165 TESTCASE(55,TestParseError
);
166 TESTCASE(56,TestOutputSet
);
167 TESTCASE(57,TestVariableRange
);
168 TESTCASE(58,TestInvalidPostContext
);
169 TESTCASE(59,TestIDForms
);
170 TESTCASE(60,TestToRulesMark
);
171 TESTCASE(61,TestEscape
);
172 TESTCASE(62,TestAnchorMasking
);
173 TESTCASE(63,TestDisplayName
);
174 TESTCASE(64,TestSpecialCases
);
175 #if !UCONFIG_NO_FILE_IO
176 TESTCASE(65,TestIncrementalProgress
);
178 TESTCASE(66,TestSurrogateCasing
);
179 TESTCASE(67,TestFunction
);
180 TESTCASE(68,TestInvalidBackRef
);
181 TESTCASE(69,TestMulticharStringSet
);
182 TESTCASE(70,TestUserFunction
);
183 TESTCASE(71,TestAnyX
);
184 TESTCASE(72,TestSourceTargetSet
);
185 TESTCASE(73,TestGurmukhiDevanagari
);
186 TESTCASE(74,TestPatternWhiteSpace
);
187 TESTCASE(75,TestAllCodepoints
);
188 TESTCASE(76,TestBoilerplate
);
189 TESTCASE(77,TestAlternateSyntax
);
190 TESTCASE(78,TestBeginEnd
);
191 TESTCASE(79,TestBeginEndToRules
);
192 TESTCASE(80,TestRegisterAlias
);
193 TESTCASE(81,TestRuleStripping
);
194 TESTCASE(82,TestHalfwidthFullwidth
);
195 TESTCASE(83,TestThai
);
196 TESTCASE(84,TestAny
);
197 default: name
= ""; break;
202 * Make sure every system transliterator can be instantiated.
204 * ALSO test that the result of toRules() for each rule is a valid
205 * rule. Do this here so we don't have to have another test that
206 * instantiates everything as well.
208 void TransliteratorTest::TestInstantiation() {
209 UErrorCode ec
= U_ZERO_ERROR
;
210 StringEnumeration
* avail
= Transliterator::getAvailableIDs(ec
);
211 assertSuccess("getAvailableIDs()", ec
);
212 assertTrue("getAvailableIDs()!=NULL", avail
!=NULL
);
213 int32_t n
= Transliterator::countAvailableIDs();
214 assertTrue("getAvailableIDs().count()==countAvailableIDs()",
215 avail
->count(ec
) == n
);
216 assertSuccess("count()", ec
);
218 for (int32_t i
=0; i
<n
; ++i
) {
219 const UnicodeString
& id
= *avail
->snext(ec
);
220 if (!assertSuccess("snext()", ec
) ||
221 !assertTrue("snext()!=NULL", (&id
)!=NULL
, TRUE
)) {
224 UnicodeString id2
= Transliterator::getAvailableID(i
);
225 if (id
.length() < 1) {
226 errln(UnicodeString("FAIL: getAvailableID(") +
227 i
+ ") returned empty string");
231 errln(UnicodeString("FAIL: getAvailableID(") +
232 i
+ ") != getAvailableIDs().snext()");
235 UParseError parseError
;
236 UErrorCode status
= U_ZERO_ERROR
;
237 Transliterator
* t
= Transliterator::createInstance(id
,
238 UTRANS_FORWARD
, parseError
,status
);
240 Transliterator::getDisplayName(id
, name
);
242 #if UCONFIG_NO_BREAK_ITERATION
243 // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
244 if (id
.compare((UnicodeString
)"Thai-Latin") != 0)
246 dataerrln(UnicodeString("FAIL: Couldn't create ") + id
+
247 /*", parse error " + parseError.code +*/
248 ", line " + parseError
.line
+
249 ", offset " + parseError
.offset
+
250 ", pre-context " + prettify(parseError
.preContext
, TRUE
) +
251 ", post-context " +prettify(parseError
.postContext
,TRUE
) +
252 ", Error: " + u_errorName(status
));
253 // When createInstance fails, it deletes the failing
254 // entry from the available ID list. We detect this
255 // here by looking for a change in countAvailableIDs.
256 int32_t nn
= Transliterator::countAvailableIDs();
259 --i
; // Compensate for deleted entry
262 logln(UnicodeString("OK: ") + name
+ " (" + id
+ ")");
266 t
->toRules(rules
, TRUE
);
267 Transliterator
*u
= Transliterator::createFromRules("x",
268 rules
, UTRANS_FORWARD
, parseError
,status
);
270 errln(UnicodeString("FAIL: ") + id
+
271 ".createFromRules() => bad rules" +
272 /*", parse error " + parseError.code +*/
273 ", line " + parseError
.line
+
274 ", offset " + parseError
.offset
+
275 ", context " + prettify(parseError
.preContext
, TRUE
) +
276 ", rules: " + prettify(rules
, TRUE
));
283 assertTrue("snext()==NULL", avail
->snext(ec
)==NULL
);
284 assertSuccess("snext()", ec
);
287 // Now test the failure path
288 UParseError parseError
;
289 UErrorCode status
= U_ZERO_ERROR
;
290 UnicodeString
id("<Not a valid Transliterator ID>");
291 Transliterator
* t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, parseError
, status
);
293 errln("FAIL: " + id
+ " returned a transliterator");
296 logln("OK: Bogus ID handled properly");
300 void TransliteratorTest::TestSimpleRules(void) {
301 /* Example: rules 1. ab>x|y
304 * []|eabcd start - no match, copy e to tranlated buffer
305 * [e]|abcd match rule 1 - copy output & adjust cursor
306 * [ex|y]cd match rule 2 - copy output & adjust cursor
307 * [exz]|d no match, copy d to transliterated buffer
310 expect(UnicodeString("ab>x|y;", "") +
314 /* Another set of rules:
326 expect(UnicodeString("ab>x|yzacw;") +
334 UErrorCode status
= U_ZERO_ERROR
;
335 UParseError parseError
;
336 Transliterator
*t
= Transliterator::createFromRules(
338 UnicodeString("$dummy=").append((UChar
)0xE100) +
340 "$vowel=[aeiouAEIOU];"
342 "$vowel } $lu > '!';"
347 UTRANS_FORWARD
, parseError
,
349 if (U_FAILURE(status
)) {
350 dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status
));
353 expect(*t
, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
358 * Test inline set syntax and set variable syntax.
360 void TransliteratorTest::TestInlineSet(void) {
361 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
362 expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
364 expect(UnicodeString(
367 "$alphanumeric = [$digit $alpha];" // ***
368 "$special = [^$alphanumeric];" // ***
369 "$alphanumeric > '-';"
370 "$special > '*';", ""),
372 "thx-1138", "---*----");
376 * Create some inverses and confirm that they work. We have to be
377 * careful how we do this, since the inverses will not be true
378 * inverses -- we can't throw any random string at the composition
379 * of the transliterators and expect the identity function. F x
380 * F' != I. However, if we are careful about the input, we will
381 * get the expected results.
383 void TransliteratorTest::TestRuleBasedInverse(void) {
384 UnicodeString RULES
=
385 UnicodeString("abc>zyx;") +
403 const char* DATA
[] = {
404 // Careful here -- random strings will not work. If we keep
405 // the left side to the domain and the right side to the range
406 // we will be okay though (left, abc; right xyz).
408 "abcacab", "zyxxxyy",
412 int32_t DATA_length
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0]));
414 UErrorCode status
= U_ZERO_ERROR
;
415 UParseError parseError
;
416 Transliterator
*fwd
= Transliterator::createFromRules("<ID>", RULES
,
417 UTRANS_FORWARD
, parseError
, status
);
418 Transliterator
*rev
= Transliterator::createFromRules("<ID>", RULES
,
419 UTRANS_REVERSE
, parseError
, status
);
420 if (U_FAILURE(status
)) {
421 errln("FAIL: RBT constructor failed");
424 for (int32_t i
=0; i
<DATA_length
; i
+=2) {
425 expect(*fwd
, DATA
[i
], DATA
[i
+1]);
426 expect(*rev
, DATA
[i
+1], DATA
[i
]);
433 * Basic test of keyboard.
435 void TransliteratorTest::TestKeyboard(void) {
436 UParseError parseError
;
437 UErrorCode status
= U_ZERO_ERROR
;
438 Transliterator
*t
= Transliterator::createFromRules("<ID>",
439 UnicodeString("psch>Y;")
443 UTRANS_FORWARD
, parseError
,
445 if (U_FAILURE(status
)) {
446 errln("FAIL: RBT constructor failed");
449 const char* DATA
[] = {
457 0, "AycAY", // null means finishKeyboardTransliteration
460 keyboardAux(*t
, DATA
, (int32_t)(sizeof(DATA
)/sizeof(DATA
[0])));
465 * Basic test of keyboard with cursor.
467 void TransliteratorTest::TestKeyboard2(void) {
468 UParseError parseError
;
469 UErrorCode status
= U_ZERO_ERROR
;
470 Transliterator
*t
= Transliterator::createFromRules("<ID>",
471 UnicodeString("ych>Y;")
475 UTRANS_FORWARD
, parseError
,
477 if (U_FAILURE(status
)) {
478 errln("FAIL: RBT constructor failed");
481 const char* DATA
[] = {
485 "s", "Aps", // modified for rollback - "Ay",
486 "c", "Apsc", // modified for rollback - "Ayc",
489 "s", "AycAps", // modified for rollback - "AycAy",
490 "c", "AycApsc", // modified for rollback - "AycAyc",
492 0, "AycAY", // null means finishKeyboardTransliteration
495 keyboardAux(*t
, DATA
, (int32_t)(sizeof(DATA
)/sizeof(DATA
[0])));
500 * Test keyboard transliteration with back-replacement.
502 void TransliteratorTest::TestKeyboard3(void) {
503 // We want th>z but t>y. Furthermore, during keyboard
504 // transliteration we want t>y then yh>z if t, then h are
506 UnicodeString
RULES("t>|y;"
509 const char* DATA
[] = {
510 // Column 1: characters to add to buffer (as if typed)
511 // Column 2: expected appearance of buffer after
512 // keyboard xliteration.
515 "t", "abt", // modified for rollback - "aby",
517 "t", "abyct", // modified for rollback - "abycy",
519 0, "abycz", // null means finishKeyboardTransliteration
522 UParseError parseError
;
523 UErrorCode status
= U_ZERO_ERROR
;
524 Transliterator
*t
= Transliterator::createFromRules("<ID>", RULES
, UTRANS_FORWARD
, parseError
, status
);
525 if (U_FAILURE(status
)) {
526 errln("FAIL: RBT constructor failed");
529 keyboardAux(*t
, DATA
, (int32_t)(sizeof(DATA
)/sizeof(DATA
[0])));
533 void TransliteratorTest::keyboardAux(const Transliterator
& t
,
534 const char* DATA
[], int32_t DATA_length
) {
535 UErrorCode status
= U_ZERO_ERROR
;
536 UTransPosition index
={0, 0, 0, 0};
538 for (int32_t i
=0; i
<DATA_length
; i
+=2) {
544 t
.transliterate(s
, index
, DATA
[i
], status
);
547 t
.finishTransliteration(s
, index
);
549 // Show the start index '{' and the cursor '|'
550 UnicodeString a
, b
, c
;
551 s
.extractBetween(0, index
.contextStart
, a
);
552 s
.extractBetween(index
.contextStart
, index
.start
, b
);
553 s
.extractBetween(index
.start
, s
.length(), c
);
555 append((UChar
)LEFT_BRACE
).
559 if (s
== DATA
[i
+1] && U_SUCCESS(status
)) {
562 errln(UnicodeString("FAIL: ") + log
+ ", expected " + DATA
[i
+1]);
567 void TransliteratorTest::TestArabic(void) {
568 // Test disabled for 2.0 until new Arabic transliterator can be written.
570 // const char* DATA[] = {
571 // "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
572 // "\u0627\u0644\u0644\u063a\u0629\u0020"+
573 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
574 // "\u0628\u0628\u0646\u0638\u0645\u0020"+
575 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
576 // "\u062c\u0645\u064a\u0644\u0629",
580 // UChar ar_raw[] = {
581 // 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
582 // 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
583 // 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
584 // 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
585 // 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
586 // 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
588 // UnicodeString ar(ar_raw);
589 // UErrorCode status=U_ZERO_ERROR;
590 // UParseError parseError;
591 // Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
593 // errln("FAIL: createInstance failed");
596 // expect(*t, "Arabic", ar);
601 * Compose the Kana transliterator forward and reverse and try
602 * some strings that should come out unchanged.
604 void TransliteratorTest::TestCompoundKana(void) {
605 UParseError parseError
;
606 UErrorCode status
= U_ZERO_ERROR
;
607 Transliterator
* t
= Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD
, parseError
, status
);
609 dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status
));
611 expect(*t
, "aaaaa", "aaaaa");
617 * Compose the hex transliterators forward and reverse.
619 void TransliteratorTest::TestCompoundHex(void) {
620 UParseError parseError
;
621 UErrorCode status
= U_ZERO_ERROR
;
622 Transliterator
* a
= Transliterator::createInstance("Any-Hex", UTRANS_FORWARD
, parseError
, status
);
623 Transliterator
* b
= Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, parseError
, status
);
624 Transliterator
* transab
[] = { a
, b
};
625 Transliterator
* transba
[] = { b
, a
};
626 if (a
== 0 || b
== 0) {
627 errln("FAIL: construction failed");
632 // Do some basic tests of a
633 expect(*a
, "01", UnicodeString("\\u0030\\u0031", ""));
634 // Do some basic tests of b
635 expect(*b
, UnicodeString("\\u0030\\u0031", ""), "01");
637 Transliterator
* ab
= new CompoundTransliterator(transab
, 2);
638 UnicodeString
s("abcde", "");
641 UnicodeString
str(s
);
642 a
->transliterate(str
);
643 Transliterator
* ba
= new CompoundTransliterator(transba
, 2);
644 expect(*ba
, str
, str
);
652 int gTestFilterClassID
= 0;
654 * Used by TestFiltering().
656 class TestFilter
: public UnicodeFilter
{
657 virtual UnicodeFunctor
* clone() const {
658 return new TestFilter(*this);
660 virtual UBool
contains(UChar32 c
) const {
661 return c
!= (UChar
)0x0063 /*c*/;
664 virtual UnicodeString
& toPattern(UnicodeString
& result
,
665 UBool
/*escapeUnprintable*/) const {
668 virtual UBool
matchesIndexValue(uint8_t /*v*/) const {
671 virtual void addMatchSetTo(UnicodeSet
& /*toUnionTo*/) const {}
673 UClassID
getDynamicClassID() const { return (UClassID
)&gTestFilterClassID
; }
677 * Do some basic tests of filtering.
679 void TransliteratorTest::TestFiltering(void) {
680 UParseError parseError
;
681 UErrorCode status
= U_ZERO_ERROR
;
682 Transliterator
* hex
= Transliterator::createInstance("Any-Hex", UTRANS_FORWARD
, parseError
, status
);
684 errln("FAIL: createInstance(Any-Hex) failed");
687 hex
->adoptFilter(new TestFilter());
688 UnicodeString
s("abcde");
689 hex
->transliterate(s
);
690 UnicodeString
exp("\\u0061\\u0062c\\u0064\\u0065", "");
692 logln(UnicodeString("Ok: \"") + exp
+ "\"");
694 logln(UnicodeString("FAIL: \"") + s
+ "\", wanted \"" + exp
+ "\"");
697 // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
698 UnicodeFilter
*f
= hex
->orphanFilter();
700 errln("FAIL: orphanFilter() should get a UnicodeFilter");
710 void TransliteratorTest::TestAnchors(void) {
711 expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""),
714 expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
717 expect(UnicodeString("^ab > 01 ;"
725 expect(UnicodeString("$s = [z$] ;"
732 "abzababbabxzabxabx",
737 * Test pattern quoting and escape mechanisms.
739 void TransliteratorTest::TestPatternQuoting(void) {
741 // Each item is <rules>, <input>, <expected output>
742 const UnicodeString DATA
[] = {
743 UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
744 UnicodeString(UChar(0x4E01)),
748 for (int32_t i
=0; i
<3; i
+=3) {
749 logln(UnicodeString("Pattern: ") + prettify(DATA
[i
]));
750 UParseError parseError
;
751 UErrorCode status
= U_ZERO_ERROR
;
752 Transliterator
*t
= Transliterator::createFromRules("<ID>", DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
753 if (U_FAILURE(status
)) {
754 errln("RBT constructor failed");
756 expect(*t
, DATA
[i
+1], DATA
[i
+2]);
763 * Regression test for bugs found in Greek transliteration.
765 void TransliteratorTest::TestJ277(void) {
766 UErrorCode status
= U_ZERO_ERROR
;
767 UParseError parseError
;
768 Transliterator
*gl
= Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD
, parseError
, status
);
770 dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status
));
775 UChar upsilon
= 0x3C5;
777 // UChar PHI = 0x3A6;
779 // UChar omega = 0x3C9;
780 // UChar omicron = 0x3BF;
781 // UChar epsilon = 0x3B5;
783 // sigma upsilon nu -> syn
785 syn
.append(sigma
).append(upsilon
).append(nu
);
786 expect(*gl
, syn
, "syn");
788 // sigma alpha upsilon nu -> saun
790 sayn
.append(sigma
).append(alpha
).append(upsilon
).append(nu
);
791 expect(*gl
, sayn
, "saun");
793 // Again, using a smaller rule set
798 "$ypsilon = \\u03C5;"
799 "$vowel = [aeiouAEIOU$alpha$ypsilon];"
802 "u <> $vowel { $ypsilon;"
806 Transliterator
*mini
= Transliterator::createFromRules("mini", rules
, UTRANS_REVERSE
, parseError
, status
);
807 if (U_FAILURE(status
)) { errln("FAIL: Transliterator constructor failed"); return; }
808 expect(*mini
, syn
, "syn");
809 expect(*mini
, sayn
, "saun");
813 #if !UCONFIG_NO_FORMATTING
814 // Transliterate the Greek locale data
816 DateFormatSymbols
syms(el
, status
);
817 if (U_FAILURE(status
)) { errln("FAIL: Transliterator constructor failed"); return; }
819 const UnicodeString
* data
= syms
.getMonths(count
);
820 for (i
=0; i
<count
; ++i
) {
821 if (data
[i
].length() == 0) {
824 UnicodeString
out(data
[i
]);
825 gl
->transliterate(out
);
827 if (data
[i
].length() >= 2 && out
.length() >= 2 &&
828 u_isupper(data
[i
].charAt(0)) && u_islower(data
[i
].charAt(1))) {
829 if (!(u_isupper(out
.charAt(0)) && u_islower(out
.charAt(1)))) {
834 logln(prettify(data
[i
] + " -> " + out
));
836 errln(UnicodeString("FAIL: ") + prettify(data
[i
] + " -> " + out
));
845 * Prefix, suffix support in hex transliterators
847 void TransliteratorTest::TestJ243(void) {
848 UErrorCode ec
= U_ZERO_ERROR
;
850 // Test default Hex-Any, which should handle
851 // \u, \U, u+, and U+
852 Transliterator
*hex
=
853 Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, ec
);
854 if (assertSuccess("getInstance", ec
)) {
855 expect(*hex
, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
859 // // Try a custom Hex-Unicode
860 // // \uXXXX and &#xXXXX;
861 // ec = U_ZERO_ERROR;
862 // HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
863 // expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x0123", ""),
864 // "abcd5fx0123");
865 // // Try custom Any-Hex (default is tested elsewhere)
866 // ec = U_ZERO_ERROR;
867 // UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
868 // expect(hex3, "012", "012");
872 * Parsers need better syntax error messages.
874 void TransliteratorTest::TestJ329(void) {
876 struct { UBool containsErrors
; const char* rule
; } DATA
[] = {
877 { FALSE
, "a > b; c > d" },
878 { TRUE
, "a > b; no operator; c > d" },
880 int32_t DATA_length
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0]));
882 for (int32_t i
=0; i
<DATA_length
; ++i
) {
883 UErrorCode status
= U_ZERO_ERROR
;
884 UParseError parseError
;
885 Transliterator
*rbt
= Transliterator::createFromRules("<ID>",
890 UBool gotError
= U_FAILURE(status
);
891 UnicodeString
desc(DATA
[i
].rule
);
892 desc
.append(gotError
? " -> error" : " -> no error");
894 desc
= desc
+ ", ParseError code=" + u_errorName(status
) +
895 " line=" + parseError
.line
+
896 " offset=" + parseError
.offset
+
897 " context=" + parseError
.preContext
;
899 if (gotError
== DATA
[i
].containsErrors
) {
900 logln(UnicodeString("Ok: ") + desc
);
902 errln(UnicodeString("FAIL: ") + desc
);
909 * Test segments and segment references.
911 void TransliteratorTest::TestSegments(void) {
913 // Each item is <rules>, <input>, <expected output>
914 UnicodeString DATA
[] = {
915 "([a-z]) '.' ([0-9]) > $2 '-' $1",
920 "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
924 int32_t DATA_length
= (int32_t)(sizeof(DATA
)/sizeof(*DATA
));
926 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
927 logln("Pattern: " + prettify(DATA
[i
]));
928 UParseError parseError
;
929 UErrorCode status
= U_ZERO_ERROR
;
930 Transliterator
*t
= Transliterator::createFromRules("ID", DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
931 if (U_FAILURE(status
)) {
932 errln("FAIL: RBT constructor");
934 expect(*t
, DATA
[i
+1], DATA
[i
+2]);
941 * Test cursor positioning outside of the key
943 void TransliteratorTest::TestCursorOffset(void) {
945 // Each item is <rules>, <input>, <expected output>
946 UnicodeString DATA
[] = {
947 "pre {alpha} post > | @ ALPHA ;"
949 "pre {beta} post > BETA @@ | ;"
952 "prealphapost prebetapost",
954 "prbetaxyz preBETApost",
956 int32_t DATA_length
= (int32_t)(sizeof(DATA
)/sizeof(*DATA
));
958 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
959 logln("Pattern: " + prettify(DATA
[i
]));
960 UParseError parseError
;
961 UErrorCode status
= U_ZERO_ERROR
;
962 Transliterator
*t
= Transliterator::createFromRules("<ID>", DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
963 if (U_FAILURE(status
)) {
964 errln("FAIL: RBT constructor");
966 expect(*t
, DATA
[i
+1], DATA
[i
+2]);
973 * Test zero length and > 1 char length variable values. Test
974 * use of variable refs in UnicodeSets.
976 void TransliteratorTest::TestArbitraryVariableValues(void) {
978 // Each item is <rules>, <input>, <expected output>
979 UnicodeString DATA
[] = {
997 int32_t DATA_length
= (int32_t)(sizeof(DATA
)/sizeof(*DATA
));
999 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
1000 logln("Pattern: " + prettify(DATA
[i
]));
1001 UParseError parseError
;
1002 UErrorCode status
= U_ZERO_ERROR
;
1003 Transliterator
*t
= Transliterator::createFromRules("<ID>", DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
1004 if (U_FAILURE(status
)) {
1005 errln("FAIL: RBT constructor");
1007 expect(*t
, DATA
[i
+1], DATA
[i
+2]);
1014 * Confirm that the contextStart, contextLimit, start, and limit
1015 * behave correctly. J474.
1017 void TransliteratorTest::TestPositionHandling(void) {
1018 // Array of 3n items
1019 // Each item is <rules>, <input>, <expected output>
1020 const char* DATA
[] = {
1021 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1022 "xtat txtb", // pos 0,9,0,9
1025 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1026 "xtat txtb", // pos 2,9,3,8
1029 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1030 "xtat txtb", // pos 3,8,3,8
1034 // Array of 4n positions -- these go with the DATA array
1035 // They are: contextStart, contextLimit, start, limit
1042 int32_t n
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0])) / 3;
1043 for (int32_t i
=0; i
<n
; i
++) {
1044 UErrorCode status
= U_ZERO_ERROR
;
1045 UParseError parseError
;
1046 Transliterator
*t
= Transliterator::createFromRules("<ID>",
1047 DATA
[3*i
], UTRANS_FORWARD
, parseError
, status
);
1048 if (U_FAILURE(status
)) {
1050 errln("FAIL: RBT constructor");
1054 pos
.contextStart
= POS
[4*i
];
1055 pos
.contextLimit
= POS
[4*i
+1];
1056 pos
.start
= POS
[4*i
+2];
1057 pos
.limit
= POS
[4*i
+3];
1058 UnicodeString
rsource(DATA
[3*i
+1]);
1059 t
->transliterate(rsource
, pos
, status
);
1060 if (U_FAILURE(status
)) {
1062 errln("FAIL: transliterate");
1065 t
->finishTransliteration(rsource
, pos
);
1066 expectAux(DATA
[3*i
],
1075 * Test the Hiragana-Katakana transliterator.
1077 void TransliteratorTest::TestHiraganaKatakana(void) {
1078 UParseError parseError
;
1079 UErrorCode status
= U_ZERO_ERROR
;
1080 Transliterator
* hk
= Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD
, parseError
, status
);
1081 Transliterator
* kh
= Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD
, parseError
, status
);
1082 if (hk
== 0 || kh
== 0) {
1083 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
1089 // Array of 3n items
1090 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1091 const char* DATA
[] = {
1093 "\\u3042\\u3090\\u3099\\u3092\\u3050",
1094 "\\u30A2\\u30F8\\u30F2\\u30B0",
1097 "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1098 "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1100 int32_t DATA_length
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0]));
1102 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
1103 UnicodeString h
= CharsToUnicodeString(DATA
[i
+1]);
1104 UnicodeString k
= CharsToUnicodeString(DATA
[i
+2]);
1106 case 0x68: //'h': // Hiragana-Katakana
1109 case 0x6B: //'k': // Katakana-Hiragana
1112 case 0x62: //'b': // both
1123 * Test cloning / copy constructor of RBT.
1125 void TransliteratorTest::TestCopyJ476(void) {
1126 // The real test here is what happens when the destructors are
1127 // called. So we let one object get destructed, and check to
1128 // see that its copy still works.
1129 Transliterator
*t2
= 0;
1131 UParseError parseError
;
1132 UErrorCode status
= U_ZERO_ERROR
;
1133 Transliterator
*t1
= Transliterator::createFromRules("t1",
1134 "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD
, parseError
, status
);
1135 if (U_FAILURE(status
)) {
1136 errln("FAIL: RBT constructor");
1139 t2
= t1
->clone(); // Call copy constructor under the covers.
1140 expect(*t1
, "abcfoofoo", "ABcbar");
1143 expect(*t2
, "abcfoofoo", "ABcbar");
1148 * Test inter-Indic transliterators. These are composed.
1149 * ICU4C Jitterbug 483.
1151 void TransliteratorTest::TestInterIndic(void) {
1152 UnicodeString
ID("Devanagari-Gujarati", "");
1153 UErrorCode status
= U_ZERO_ERROR
;
1154 UParseError parseError
;
1155 Transliterator
* dg
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, parseError
, status
);
1157 dataerrln("FAIL: createInstance(" + ID
+ ") returned NULL - " + u_errorName(status
));
1160 UnicodeString id
= dg
->getID();
1162 errln("FAIL: createInstance(" + ID
+ ")->getID() => " + id
);
1164 UnicodeString dev
= CharsToUnicodeString("\\u0901\\u090B\\u0925");
1165 UnicodeString guj
= CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1166 expect(*dg
, dev
, guj
);
1171 * Test filter syntax in IDs. (J918)
1173 void TransliteratorTest::TestFilterIDs(void) {
1174 // Array of 3n strings:
1175 // <id>, <inverse id>, <input>, <expected output>
1176 const char* DATA
[] = {
1177 "[aeiou]Any-Hex", // ID
1178 "[aeiou]Hex-Any", // expected inverse ID
1180 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1182 "[aeiou]Any-Hex;[^5]Hex-Any",
1183 "[^5]Any-Hex;[aeiou]Hex-Any",
1192 enum { DATA_length
= sizeof(DATA
) / sizeof(DATA
[0]) };
1194 for (int i
=0; i
<DATA_length
; i
+=4) {
1195 UnicodeString
ID(DATA
[i
], "");
1196 UnicodeString
uID(DATA
[i
+1], "");
1197 UnicodeString
data2(DATA
[i
+2], "");
1198 UnicodeString
data3(DATA
[i
+3], "");
1199 UParseError parseError
;
1200 UErrorCode status
= U_ZERO_ERROR
;
1201 Transliterator
*t
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, parseError
, status
);
1203 errln("FAIL: createInstance(" + ID
+ ") returned NULL");
1206 expect(*t
, data2
, data3
);
1209 if (ID
!= t
->getID()) {
1210 errln("FAIL: createInstance(" + ID
+ ").getID() => " +
1214 // Check the inverse
1215 Transliterator
*u
= t
->createInverse(status
);
1217 errln("FAIL: " + ID
+ ".createInverse() returned NULL");
1218 } else if (u
->getID() != uID
) {
1219 errln("FAIL: " + ID
+ ".createInverse().getID() => " +
1220 u
->getID() + ", expected " + uID
);
1229 * Test the case mapping transliterators.
1231 void TransliteratorTest::TestCaseMap(void) {
1232 UParseError parseError
;
1233 UErrorCode status
= U_ZERO_ERROR
;
1234 Transliterator
* toUpper
=
1235 Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
);
1236 Transliterator
* toLower
=
1237 Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
);
1238 Transliterator
* toTitle
=
1239 Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
);
1240 if (toUpper
==0 || toLower
==0 || toTitle
==0) {
1241 errln("FAIL: createInstance returned NULL");
1248 expect(*toUpper
, "The quick brown fox jumped over the lazy dogs.",
1249 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1250 expect(*toLower
, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1251 "the quick brown foX jumped over the lazY dogs.");
1252 expect(*toTitle
, "the quick brown foX can't jump over the laZy dogs.",
1253 "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1261 * Test the name mapping transliterators.
1263 void TransliteratorTest::TestNameMap(void) {
1264 UParseError parseError
;
1265 UErrorCode status
= U_ZERO_ERROR
;
1266 Transliterator
* uni2name
=
1267 Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD
, parseError
, status
);
1268 Transliterator
* name2uni
=
1269 Transliterator::createInstance("Name-Any", UTRANS_FORWARD
, parseError
, status
);
1270 if (uni2name
==0 || name2uni
==0) {
1271 errln("FAIL: createInstance returned NULL");
1277 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1278 expect(*uni2name
, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1279 CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1280 expect(*name2uni
, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1281 CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1288 Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD
, parseError
, status
);
1290 errln("FAIL: createInstance returned NULL");
1295 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1296 UnicodeString s
= CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1302 * Test liberalized ID syntax. 1006c
1304 void TransliteratorTest::TestLiberalizedID(void) {
1305 // Some test cases have an expected getID() value of NULL. This
1306 // means I have disabled the test case for now. This stuff is
1307 // still under development, and I haven't decided whether to make
1308 // getID() return canonical case yet. It will all get rewritten
1309 // with the move to Source-Target/Variant IDs anyway. [aliu]
1310 const char* DATA
[] = {
1311 "latin-greek", NULL
/*"Latin-Greek"*/, "case insensitivity",
1312 " Null ", "Null", "whitespace",
1313 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",
1314 " null ; latin-greek ", NULL
/*"Null;Latin-Greek"*/, "compound whitespace",
1316 const int32_t DATA_length
= sizeof(DATA
)/sizeof(DATA
[0]);
1317 UParseError parseError
;
1318 UErrorCode status
= U_ZERO_ERROR
;
1319 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
1320 Transliterator
*t
= Transliterator::createInstance(DATA
[i
], UTRANS_FORWARD
, parseError
, status
);
1322 dataerrln(UnicodeString("FAIL: ") + DATA
[i
+2] +
1323 " cannot create ID \"" + DATA
[i
] + "\" - " + u_errorName(status
));
1327 exp
= UnicodeString(DATA
[i
+1], "");
1329 // Don't worry about getID() if the expected char*
1330 // is NULL -- see above.
1331 if (exp
.length() == 0 || exp
== t
->getID()) {
1332 logln(UnicodeString("Ok: ") + DATA
[i
+2] +
1333 " create ID \"" + DATA
[i
] + "\" => \"" +
1336 errln(UnicodeString("FAIL: ") + DATA
[i
+2] +
1337 " create ID \"" + DATA
[i
] + "\" => \"" +
1338 t
->getID() + "\", exp \"" + exp
+ "\"");
1345 /* test for Jitterbug 912 */
1346 void TransliteratorTest::TestCreateInstance(){
1347 const char* FORWARD
= "F";
1348 const char* REVERSE
= "R";
1349 const char* DATA
[] = {
1351 // Column 2: direction
1352 // Column 3: expected ID, or "" if expect failure
1353 "Latin-Hangul", REVERSE
, "Hangul-Latin", // JB#912
1355 // JB#2689: bad compound causes crash
1356 "InvalidSource-InvalidTarget", FORWARD
, "",
1357 "InvalidSource-InvalidTarget", REVERSE
, "",
1358 "Hex-Any;InvalidSource-InvalidTarget", FORWARD
, "",
1359 "Hex-Any;InvalidSource-InvalidTarget", REVERSE
, "",
1360 "InvalidSource-InvalidTarget;Hex-Any", FORWARD
, "",
1361 "InvalidSource-InvalidTarget;Hex-Any", REVERSE
, "",
1366 for (int32_t i
=0; DATA
[i
]; i
+=3) {
1368 UErrorCode ec
= U_ZERO_ERROR
;
1369 UnicodeString
id(DATA
[i
]);
1370 UTransDirection dir
= (DATA
[i
+1]==FORWARD
)?
1371 UTRANS_FORWARD
:UTRANS_REVERSE
;
1372 UnicodeString
expID(DATA
[i
+2]);
1374 Transliterator::createInstance(id
,dir
,err
,ec
);
1375 UnicodeString newID
;
1379 UBool ok
= (newID
== expID
);
1381 newID
= u_errorName(ec
);
1384 logln((UnicodeString
)"Ok: createInstance(" +
1385 id
+ "," + DATA
[i
+1] + ") => " + newID
);
1387 dataerrln((UnicodeString
)"FAIL: createInstance(" +
1388 id
+ "," + DATA
[i
+1] + ") => " + newID
+
1389 ", expected " + expID
);
1396 * Test the normalization transliterator.
1398 void TransliteratorTest::TestNormalizationTransliterator() {
1399 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1400 // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1401 const char* CANON
[] = {
1402 // Input Decomposed Composed
1403 "cat", "cat", "cat" ,
1404 "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" ,
1406 "\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above
1407 "D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above
1409 "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above
1410 "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below
1411 "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above
1413 "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1414 "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1416 "\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave
1417 "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave
1418 "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron
1420 "\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign
1421 "\\u00c5", "A\\u030a", "\\u00c5" , // A-ring
1423 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0
1424 "\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0
1426 "Henry IV", "Henry IV", "Henry IV" ,
1427 "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" ,
1429 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1430 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1431 "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten
1432 "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten
1433 "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten
1435 "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" ,
1439 const char* COMPAT
[] = {
1440 // Input Decomposed Composed
1441 "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed
1443 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0
1444 "\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i
1446 "Henry IV", "Henry IV", "Henry IV" ,
1447 "Henry \\u2163", "Henry IV", "Henry IV" ,
1449 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1450 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1452 "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten
1457 UParseError parseError
;
1458 UErrorCode status
= U_ZERO_ERROR
;
1459 Transliterator
* NFD
= Transliterator::createInstance("NFD", UTRANS_FORWARD
, parseError
, status
);
1460 Transliterator
* NFC
= Transliterator::createInstance("NFC", UTRANS_FORWARD
, parseError
, status
);
1462 dataerrln("FAIL: createInstance failed: %s", u_errorName(status
));
1467 for (i
=0; CANON
[i
]; i
+=3) {
1468 UnicodeString in
= CharsToUnicodeString(CANON
[i
]);
1469 UnicodeString expd
= CharsToUnicodeString(CANON
[i
+1]);
1470 UnicodeString expc
= CharsToUnicodeString(CANON
[i
+2]);
1471 expect(*NFD
, in
, expd
);
1472 expect(*NFC
, in
, expc
);
1477 Transliterator
* NFKD
= Transliterator::createInstance("NFKD", UTRANS_FORWARD
, parseError
, status
);
1478 Transliterator
* NFKC
= Transliterator::createInstance("NFKC", UTRANS_FORWARD
, parseError
, status
);
1479 if (!NFKD
|| !NFKC
) {
1480 dataerrln("FAIL: createInstance failed");
1485 for (i
=0; COMPAT
[i
]; i
+=3) {
1486 UnicodeString in
= CharsToUnicodeString(COMPAT
[i
]);
1487 UnicodeString expkd
= CharsToUnicodeString(COMPAT
[i
+1]);
1488 UnicodeString expkc
= CharsToUnicodeString(COMPAT
[i
+2]);
1489 expect(*NFKD
, in
, expkd
);
1490 expect(*NFKC
, in
, expkc
);
1496 status
= U_ZERO_ERROR
;
1497 Transliterator
*t
= Transliterator::createInstance("NFD; [x]Remove",
1501 errln("FAIL: createInstance failed");
1503 expect(*t
, CharsToUnicodeString("\\u010dx"),
1504 CharsToUnicodeString("c\\u030C"));
1509 * Test compound RBT rules.
1511 void TransliteratorTest::TestCompoundRBT(void) {
1512 // Careful with spacing and ';' here: Phrase this exactly
1513 // as toRules() is going to return it. If toRules() changes
1514 // with regard to spacing or ';', then adjust this string.
1515 UnicodeString
rule("::Hex-Any;\n"
1519 "::[^t]Any-Upper;", "");
1520 UParseError parseError
;
1521 UErrorCode status
= U_ZERO_ERROR
;
1522 Transliterator
*t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, parseError
, status
);
1524 errln("FAIL: createFromRules failed");
1527 expect(*t
, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1528 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1530 t
->toRules(r
, TRUE
);
1532 logln((UnicodeString
)"OK: toRules() => " + r
);
1534 errln((UnicodeString
)"FAIL: toRules() => " + r
+
1535 ", expected " + rule
);
1540 t
= Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD
, parseError
, status
);
1542 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
1545 UnicodeString
exp("::Greek-Latin;\n::Latin-Cyrillic;");
1546 t
->toRules(r
, TRUE
);
1548 errln((UnicodeString
)"FAIL: toRules() => " + r
+
1549 ", expected " + exp
);
1551 logln((UnicodeString
)"OK: toRules() => " + r
);
1555 // Round trip the result of toRules
1556 t
= Transliterator::createFromRules("Test", r
, UTRANS_FORWARD
, parseError
, status
);
1558 errln("FAIL: createFromRules #2 failed");
1561 logln((UnicodeString
)"OK: createFromRules(" + r
+ ") succeeded");
1564 // Test toRules again
1565 t
->toRules(r
, TRUE
);
1567 errln((UnicodeString
)"FAIL: toRules() => " + r
+
1568 ", expected " + exp
);
1570 logln((UnicodeString
)"OK: toRules() => " + r
);
1575 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform
1576 // to what the regenerated ID will look like.
1577 UnicodeString
id("Upper(Lower);(NFKC)", "");
1578 t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, parseError
, status
);
1580 errln("FAIL: createInstance #2 failed");
1583 if (t
->getID() == id
) {
1584 logln((UnicodeString
)"OK: created " + id
);
1586 errln((UnicodeString
)"FAIL: createInstance(" + id
+
1587 ").getID() => " + t
->getID());
1590 Transliterator
*u
= t
->createInverse(status
);
1592 errln("FAIL: createInverse failed");
1596 exp
= "NFKC();Lower(Upper)";
1597 if (u
->getID() == exp
) {
1598 logln((UnicodeString
)"OK: createInverse(" + id
+ ") => " +
1601 errln((UnicodeString
)"FAIL: createInverse(" + id
+ ") => " +
1609 * Compound filter semantics were orginially not implemented
1610 * correctly. Originally, each component filter f(i) is replaced by
1611 * f'(i) = f(i) && g, where g is the filter for the compound
1616 * Suppose and I have a transliterator X. Internally X is
1617 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1619 * The compound should convert all greek characters (through latin) to
1620 * cyrillic, then lowercase the result. The filter should say "don't
1621 * touch 'A' in the original". But because an intermediate result
1622 * happens to go through "A", the Greek Alpha gets hung up.
1624 void TransliteratorTest::TestCompoundFilter(void) {
1625 UParseError parseError
;
1626 UErrorCode status
= U_ZERO_ERROR
;
1627 Transliterator
*t
= Transliterator::createInstance
1628 ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD
, parseError
, status
);
1630 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
1633 t
->adoptFilter(new UnicodeSet("[^A]", status
));
1634 if (U_FAILURE(status
)) {
1635 errln("FAIL: UnicodeSet ct failed");
1640 // Only the 'A' at index 1 should remain unchanged
1642 CharsToUnicodeString("BA\\u039A\\u0391"),
1643 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1647 void TransliteratorTest::TestRemove(void) {
1648 UParseError parseError
;
1649 UErrorCode status
= U_ZERO_ERROR
;
1650 Transliterator
*t
= Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD
, parseError
, status
);
1652 errln("FAIL: createInstance failed");
1656 expect(*t
, "Able bodied baker's cats", "Ale odied ker's ts");
1658 // extra test for RemoveTransliterator::clone(), which at one point wasn't
1659 // duplicating the filter
1660 Transliterator
* t2
= t
->clone();
1661 expect(*t2
, "Able bodied baker's cats", "Ale odied ker's ts");
1667 void TransliteratorTest::TestToRules(void) {
1668 const char* RBT
= "rbt";
1669 const char* SET
= "set";
1670 static const char* DATA
[] = {
1672 "$a=\\u4E61; [$a] > A;",
1676 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1677 "[[:Zs:][:Zl:]]{a} > A;",
1704 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1705 "[^[:Zs:]]{a} > A;",
1708 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1709 "[[a-z]-[:Zs:]]{a} > A;",
1712 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1713 "[[:Zs:]&[a-z]]{a} > A;",
1716 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1717 "[x[:Zs:]]{a} > A;",
1720 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1721 "$macron = \\u0304 ;"
1722 "$evowel = [aeiouyAEIOUY] ;"
1723 "$iotasub = \\u0345 ;"
1724 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1725 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1728 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1729 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1731 static const int32_t DATA_length
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0]));
1733 for (int32_t d
=0; d
< DATA_length
; d
+=3) {
1734 if (DATA
[d
] == RBT
) {
1735 // Transliterator test
1736 UParseError parseError
;
1737 UErrorCode status
= U_ZERO_ERROR
;
1738 Transliterator
*t
= Transliterator::createFromRules("ID",
1739 UnicodeString(DATA
[d
+1], -1, US_INV
), UTRANS_FORWARD
, parseError
, status
);
1741 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status
));
1744 UnicodeString rules
, escapedRules
;
1745 t
->toRules(rules
, FALSE
);
1746 t
->toRules(escapedRules
, TRUE
);
1747 UnicodeString expRules
= CharsToUnicodeString(DATA
[d
+2]);
1748 UnicodeString
expEscapedRules(DATA
[d
+2], -1, US_INV
);
1749 if (rules
== expRules
) {
1750 logln((UnicodeString
)"Ok: " + UnicodeString(DATA
[d
+1], -1, US_INV
) +
1753 errln((UnicodeString
)"FAIL: " + UnicodeString(DATA
[d
+1], -1, US_INV
) +
1754 " => " + rules
+ ", exp " + expRules
);
1756 if (escapedRules
== expEscapedRules
) {
1757 logln((UnicodeString
)"Ok: " + UnicodeString(DATA
[d
+1], -1, US_INV
) +
1758 " => " + escapedRules
);
1760 errln((UnicodeString
)"FAIL: " + UnicodeString(DATA
[d
+1], -1, US_INV
) +
1761 " => " + escapedRules
+ ", exp " + expEscapedRules
);
1767 UErrorCode status
= U_ZERO_ERROR
;
1768 UnicodeString
pat(DATA
[d
+1], -1, US_INV
);
1769 UnicodeString
expToPat(DATA
[d
+2], -1, US_INV
);
1770 UnicodeSet
set(pat
, status
);
1771 if (U_FAILURE(status
)) {
1772 errln("FAIL: UnicodeSet ct failed");
1775 // Adjust spacing etc. as necessary.
1776 UnicodeString toPat
;
1777 set
.toPattern(toPat
);
1778 if (expToPat
== toPat
) {
1779 logln((UnicodeString
)"Ok: " + pat
+
1782 errln((UnicodeString
)"FAIL: " + pat
+
1783 " => " + prettify(toPat
, TRUE
) +
1784 ", exp " + prettify(pat
, TRUE
));
1790 void TransliteratorTest::TestContext() {
1791 UTransPosition pos
= {0, 2, 0, 1}; // cs cl s l
1792 expect("de > x; {d}e > y;",
1797 expect("ab{c} > z;",
1802 void TransliteratorTest::TestSupplemental() {
1804 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1806 CharsToUnicodeString("ab\\U0001030Fx"),
1807 CharsToUnicodeString("\\U00010300bix"));
1809 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1810 "$b=[A-Z\\U00010400-\\U0001044D];"
1811 "($a)($b) > $2 $1;"),
1812 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1813 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1815 // k|ax\\U00010300xm
1817 // k|a\\U00010400\\U00010300xm
1818 // ky|\\U00010400\\U00010300xm
1819 // ky\\U00010400|\\U00010300xm
1821 // ky\\U00010400|\\U00010300\\U00010400m
1822 // ky\\U00010400y|\\U00010400m
1823 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1824 "$a {x} > | @ \\U00010400;"
1825 "{$a} [^\\u0000-\\uFFFF] > y;"),
1826 CharsToUnicodeString("kax\\U00010300xm"),
1827 CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1830 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1831 UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1833 expectT("Any-Hex/Unicode",
1834 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1835 UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1837 expectT("Any-Hex/C",
1838 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1839 UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1841 expectT("Any-Hex/Perl",
1842 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1843 UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1845 expectT("Any-Hex/Java",
1846 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1847 UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1849 expectT("Any-Hex/XML",
1850 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1851 "𐌰􏼀󠁡 ");
1853 expectT("Any-Hex/XML10",
1854 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1855 "𐌰􏼀󠁡 ");
1857 expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1858 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1859 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1862 void TransliteratorTest::TestQuantifier() {
1864 // Make sure @ in a quantified anteContext works
1865 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1869 // Make sure @ in a quantified postContext works
1870 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1874 // Make sure @ in a quantified postContext with seg ref works
1875 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1879 // Make sure @ past ante context doesn't enter ante context
1880 UTransPosition pos
= {0, 5, 3, 5};
1881 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1886 // Make sure @ past post context doesn't pass limit
1887 UTransPosition pos2
= {0, 4, 0, 2};
1888 expect("{b} a+ > c @@ |; x > y; a > A;",
1893 // Make sure @ past post context doesn't enter post context
1894 expect("{b} a+ > c @@ |; x > y; a > A;",
1898 expect("(ab)? c > d;",
1902 // NOTE: The (ab)+ when referenced just yields a single "ab",
1903 // not the full sequence of them. This accords with perl behavior.
1904 expect("(ab)+ {x} > '(' $1 ')';",
1906 "x ab(ab) abab(ab)y");
1909 "ac abc abbc abbbc",
1912 expect("[abc]+ > x;",
1913 "qac abrc abbcs abtbbc",
1916 expect("q{(ab)+} > x;",
1917 "qa qab qaba qababc qaba",
1918 "qa qx qxa qxc qxa");
1920 expect("q(ab)* > x;",
1921 "qa qab qaba qababc",
1924 // NOTE: The (ab)+ when referenced just yields a single "ab",
1925 // not the full sequence of them. This accords with perl behavior.
1926 expect("q(ab)* > '(' $1 ')';",
1927 "qa qab qaba qababc",
1928 "()a (ab) (ab)a (ab)c");
1930 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1932 expect("'ab'+ > x;",
1936 // $foo+ and $foo* -- the quantifier should apply to the entire
1937 // variable reference
1938 expect("$var = ab; $var+ > x;",
1943 class TestTrans
: public Transliterator
{
1945 TestTrans(const UnicodeString
& id
) : Transliterator(id
, 0) {
1947 virtual Transliterator
* clone(void) const {
1948 return new TestTrans(getID());
1950 virtual void handleTransliterate(Replaceable
& /*text*/, UTransPosition
& offsets
,
1951 UBool
/*isIncremental*/) const
1953 offsets
.start
= offsets
.limit
;
1955 virtual UClassID
getDynamicClassID() const;
1956 static UClassID U_EXPORT2
getStaticClassID();
1958 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans
)
1961 * Test Source-Target/Variant.
1963 void TransliteratorTest::TestSTV(void) {
1964 int32_t ns
= Transliterator::countAvailableSources();
1965 if (ns
< 0 || ns
> 255) {
1966 errln((UnicodeString
)"FAIL: Bad source count: " + ns
);
1970 for (i
=0; i
<ns
; ++i
) {
1971 UnicodeString source
;
1972 Transliterator::getAvailableSource(i
, source
);
1973 logln((UnicodeString
)"" + i
+ ": " + source
);
1974 if (source
.length() == 0) {
1975 errln("FAIL: empty source");
1978 int32_t nt
= Transliterator::countAvailableTargets(source
);
1979 if (nt
< 0 || nt
> 255) {
1980 errln((UnicodeString
)"FAIL: Bad target count: " + nt
);
1983 for (int32_t j
=0; j
<nt
; ++j
) {
1984 UnicodeString target
;
1985 Transliterator::getAvailableTarget(j
, source
, target
);
1986 logln((UnicodeString
)" " + j
+ ": " + target
);
1987 if (target
.length() == 0) {
1988 errln("FAIL: empty target");
1991 int32_t nv
= Transliterator::countAvailableVariants(source
, target
);
1992 if (nv
< 0 || nv
> 255) {
1993 errln((UnicodeString
)"FAIL: Bad variant count: " + nv
);
1996 for (int32_t k
=0; k
<nv
; ++k
) {
1997 UnicodeString variant
;
1998 Transliterator::getAvailableVariant(k
, source
, target
, variant
);
1999 if (variant
.length() == 0) {
2000 logln((UnicodeString
)" " + k
+ ": <empty>");
2002 logln((UnicodeString
)" " + k
+ ": " + variant
);
2008 // Test registration
2009 const char* IDS
[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2010 const char* FULL_IDS
[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2011 const char* SOURCES
[] = { NULL
, "Seoridf", "Oewoir" };
2012 for (i
=0; i
<3; ++i
) {
2013 Transliterator
*t
= new TestTrans(IDS
[i
]);
2015 errln("FAIL: out of memory");
2018 if (t
->getID() != IDS
[i
]) {
2019 errln((UnicodeString
)"FAIL: ID mismatch for " + IDS
[i
]);
2023 Transliterator::registerInstance(t
);
2024 UErrorCode status
= U_ZERO_ERROR
;
2025 t
= Transliterator::createInstance(IDS
[i
], UTRANS_FORWARD
, status
);
2027 errln((UnicodeString
)"FAIL: Registration/creation failed for ID " +
2030 logln((UnicodeString
)"Ok: Registration/creation succeeded for ID " +
2034 Transliterator::unregister(IDS
[i
]);
2035 t
= Transliterator::createInstance(IDS
[i
], UTRANS_FORWARD
, status
);
2037 errln((UnicodeString
)"FAIL: Unregistration failed for ID " +
2043 // Make sure getAvailable API reflects removal
2044 int32_t n
= Transliterator::countAvailableIDs();
2045 for (i
=0; i
<n
; ++i
) {
2046 UnicodeString id
= Transliterator::getAvailableID(i
);
2047 for (j
=0; j
<3; ++j
) {
2048 if (id
.caseCompare(FULL_IDS
[j
],0)==0) {
2049 errln((UnicodeString
)"FAIL: unregister(" + id
+ ") failed");
2053 n
= Transliterator::countAvailableTargets("Any");
2054 for (i
=0; i
<n
; ++i
) {
2056 Transliterator::getAvailableTarget(i
, "Any", t
);
2057 if (t
.caseCompare(IDS
[0],0)==0) {
2058 errln((UnicodeString
)"FAIL: unregister(Any-" + t
+ ") failed");
2061 n
= Transliterator::countAvailableSources();
2062 for (i
=0; i
<n
; ++i
) {
2064 Transliterator::getAvailableSource(i
, s
);
2065 for (j
=0; j
<3; ++j
) {
2066 if (SOURCES
[j
] == NULL
) continue;
2067 if (s
.caseCompare(SOURCES
[j
],0)==0) {
2068 errln((UnicodeString
)"FAIL: unregister(" + s
+ "-*) failed");
2075 * Test inverse of Greek-Latin; Title()
2077 void TransliteratorTest::TestCompoundInverse(void) {
2078 UParseError parseError
;
2079 UErrorCode status
= U_ZERO_ERROR
;
2080 Transliterator
*t
= Transliterator::createInstance
2081 ("Greek-Latin; Title()", UTRANS_REVERSE
,parseError
, status
);
2083 dataerrln("FAIL: createInstance - %s", u_errorName(status
));
2086 UnicodeString
exp("(Title);Latin-Greek");
2087 if (t
->getID() == exp
) {
2088 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2091 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2092 t
->getID() + "\", expected \"" + exp
+ "\"");
2098 * Test NFD chaining with RBT
2100 void TransliteratorTest::TestNFDChainRBT() {
2102 UErrorCode ec
= U_ZERO_ERROR
;
2103 Transliterator
* t
= Transliterator::createFromRules(
2104 "TEST", "::NFD; aa > Q; a > q;",
2105 UTRANS_FORWARD
, pe
, ec
);
2106 if (t
== NULL
|| U_FAILURE(ec
)) {
2107 dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec
));
2110 expect(*t
, "aa", "Q");
2113 // TEMPORARY TESTS -- BEING DEBUGGED
2114 //=- UnicodeString s, s2;
2115 //=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2116 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2117 //=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2118 //=- expect(*t, s, s2);
2121 //=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2122 //=- expect(*t, s2, s);
2125 //=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2126 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2127 //=- expect(*t, s, s);
2130 // const char* source[] = {
2132 // "\\u015Br\\u012Bmad",
2133 // "bhagavadg\\u012Bt\\u0101",
2136 // "vi\\u1E63\\u0101da",
2138 // "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2139 // "uv\\u0101cr\\u0325",
2141 // "rmk\\u1E63\\u0113t",
2142 // //"dharmak\\u1E63\\u0113tr\\u0113",
2144 // "kuruk\\u1E63\\u0113tr\\u0113",
2145 // "samav\\u0113t\\u0101",
2146 // "yuyutsava-\\u1E25",
2147 // "m\\u0101mak\\u0101-\\u1E25",
2148 // // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2150 // "san\\u0304java",
2155 // const char* expected[] = {
2157 // "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2158 // "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2159 // "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2160 // "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2161 // "\\u0935\\u093f\\u0937\\u093e\\u0926",
2162 // "\\u092f\\u094b\\u0917",
2163 // "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2164 // "\\u0909\\u0935\\u093E\\u091A\\u0943",
2167 // //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2169 // "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2170 // "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2171 // "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2172 // "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2173 // // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2174 // "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2175 // "\\u0938\\u0902\\u091c\\u0935",
2179 // UErrorCode status = U_ZERO_ERROR;
2180 // UParseError parseError;
2181 // UnicodeString message;
2182 // Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2183 // Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2184 // if(U_FAILURE(status)){
2185 // errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2186 // errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2187 // delete latinToDevToLatin;
2188 // delete devToLatinToDev;
2191 // UnicodeString gotResult;
2192 // for(int i= 0; source[i] != 0; i++){
2193 // gotResult = source[i];
2194 // expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2195 // expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2197 // delete latinToDevToLatin;
2198 // delete devToLatinToDev;
2202 * Inverse of "Null" should be "Null". (J21)
2204 void TransliteratorTest::TestNullInverse() {
2206 UErrorCode ec
= U_ZERO_ERROR
;
2207 Transliterator
*t
= Transliterator::createInstance("Null", UTRANS_FORWARD
, pe
, ec
);
2208 if (t
== 0 || U_FAILURE(ec
)) {
2209 errln("FAIL: createInstance");
2212 Transliterator
*u
= t
->createInverse(ec
);
2213 if (u
== 0 || U_FAILURE(ec
)) {
2214 errln("FAIL: createInverse");
2218 if (u
->getID() != "Null") {
2219 errln("FAIL: Inverse of Null should be Null");
2226 * Check ID of inverse of alias. (J22)
2228 void TransliteratorTest::TestAliasInverseID() {
2229 UnicodeString
ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2231 UErrorCode ec
= U_ZERO_ERROR
;
2232 Transliterator
*t
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
);
2233 if (t
== 0 || U_FAILURE(ec
)) {
2234 dataerrln("FAIL: createInstance - %s", u_errorName(ec
));
2237 Transliterator
*u
= t
->createInverse(ec
);
2238 if (u
== 0 || U_FAILURE(ec
)) {
2239 errln("FAIL: createInverse");
2243 UnicodeString exp
= "Hangul-Latin";
2244 UnicodeString got
= u
->getID();
2246 errln((UnicodeString
)"FAIL: Inverse of " + ID
+ " is " + got
+
2247 ", expected " + exp
);
2254 * Test IDs of inverses of compound transliterators. (J20)
2256 void TransliteratorTest::TestCompoundInverseID() {
2257 UnicodeString ID
= "Latin-Jamo;NFC(NFD)";
2259 UErrorCode ec
= U_ZERO_ERROR
;
2260 Transliterator
*t
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
);
2261 if (t
== 0 || U_FAILURE(ec
)) {
2262 dataerrln("FAIL: createInstance - %s", u_errorName(ec
));
2265 Transliterator
*u
= t
->createInverse(ec
);
2266 if (u
== 0 || U_FAILURE(ec
)) {
2267 errln("FAIL: createInverse");
2271 UnicodeString exp
= "NFD(NFC);Jamo-Latin";
2272 UnicodeString got
= u
->getID();
2274 errln((UnicodeString
)"FAIL: Inverse of " + ID
+ " is " + got
+
2275 ", expected " + exp
);
2282 * Test undefined variable.
2285 void TransliteratorTest::TestUndefinedVariable() {
2286 UnicodeString rule
= "$initial } a <> \\u1161;";
2288 UErrorCode ec
= U_ZERO_ERROR
;
2289 Transliterator
*t
= Transliterator::createFromRules("<ID>", rule
, UTRANS_FORWARD
, pe
, ec
);
2291 if (U_FAILURE(ec
)) {
2292 logln((UnicodeString
)"OK: Got exception for " + rule
+ ", as expected: " +
2296 errln((UnicodeString
)"Fail: bogus rule " + rule
+ " compiled with error " +
2301 * Test empty context.
2303 void TransliteratorTest::TestEmptyContext() {
2304 expect(" { a } > b;", "xay a ", "xby b ");
2308 * Test compound filter ID syntax
2310 void TransliteratorTest::TestCompoundFilterID(void) {
2311 static const char* DATA
[] = {
2312 // Col. 1 = ID or rule set (latter must start with #)
2314 // = columns > 1 are null if expect col. 1 to be illegal =
2316 // Col. 2 = direction, "F..." or "R..."
2317 // Col. 3 = source string
2318 // Col. 4 = exp result
2320 "[abc]; [abc]", NULL
, NULL
, NULL
, // multiple filters
2321 "Latin-Greek; [abc];", NULL
, NULL
, NULL
, // misplaced filter
2322 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2323 "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2324 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2325 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2329 for (int32_t i
=0; DATA
[i
]; i
+=4) {
2330 UnicodeString id
= CharsToUnicodeString(DATA
[i
]);
2331 UTransDirection direction
= (DATA
[i
+1] != NULL
&& DATA
[i
+1][0] == 'R') ?
2332 UTRANS_REVERSE
: UTRANS_FORWARD
;
2333 UnicodeString source
;
2335 if (DATA
[i
+2] != NULL
) {
2336 source
= CharsToUnicodeString(DATA
[i
+2]);
2337 exp
= CharsToUnicodeString(DATA
[i
+3]);
2339 UBool expOk
= (DATA
[i
+1] != NULL
);
2340 Transliterator
* t
= NULL
;
2342 UErrorCode ec
= U_ZERO_ERROR
;
2343 if (id
.charAt(0) == 0x23/*#*/) {
2344 t
= Transliterator::createFromRules("ID", id
, direction
, pe
, ec
);
2346 t
= Transliterator::createInstance(id
, direction
, pe
, ec
);
2348 UBool ok
= (t
!= NULL
&& U_SUCCESS(ec
));
2349 UnicodeString transID
;
2351 transID
= t
->getID();
2354 transID
= UnicodeString("NULL", "");
2357 logln((UnicodeString
)"Ok: " + id
+ " => " + transID
+ ", " +
2359 if (source
.length() != 0) {
2360 expect(*t
, source
, exp
);
2364 dataerrln((UnicodeString
)"FAIL: " + id
+ " => " + transID
+ ", " +
2371 * Test new property set syntax
2373 void TransliteratorTest::TestPropertySet() {
2374 expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2375 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2376 "[ a stitch ]\n[ in time ]\r[ saves 9]");
2380 * Test various failure points of the new 2.0 engine.
2382 void TransliteratorTest::TestNewEngine() {
2384 UErrorCode ec
= U_ZERO_ERROR
;
2385 Transliterator
*t
= Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD
, pe
, ec
);
2386 if (t
== 0 || U_FAILURE(ec
)) {
2387 dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec
));
2390 // Katakana should be untouched
2391 expect(*t
, CharsToUnicodeString("a\\u3042\\u30A2"),
2392 CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2397 // This test will only work if Transliterator.ROLLBACK is
2398 // true. Otherwise, this test will fail, revealing a
2399 // limitation of global filters in incremental mode.
2401 Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD
, pe
, ec
);
2403 Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD
, pe
, ec
);
2404 if (U_FAILURE(ec
)) {
2410 Transliterator
* array
[3];
2412 array
[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD
, pe
, ec
);
2414 if (U_FAILURE(ec
)) {
2415 errln("FAIL: createInstance NFD");
2422 t
= new CompoundTransliterator(array
, 3, new UnicodeSet("[:Ll:]", ec
));
2423 if (U_FAILURE(ec
)) {
2424 errln("FAIL: UnicodeSet constructor");
2432 expect(*t
, "aAaA", "bAbA");
2434 assertTrue("countElements", t
->countElements() == 3);
2435 assertEquals("getElement(0)", t
->getElement(0, ec
).getID(), "a_to_A");
2436 assertEquals("getElement(1)", t
->getElement(1, ec
).getID(), "NFD");
2437 assertEquals("getElement(2)", t
->getElement(2, ec
).getID(), "A_to_b");
2438 assertSuccess("getElement", ec
);
2446 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2450 UnicodeString gr
= CharsToUnicodeString(
2452 "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2453 "$rough = \\u0314 ;"
2454 "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2458 expect(gr
, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2462 * Test quantified segment behavior. We want:
2463 * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2465 void TransliteratorTest::TestQuantifiedSegment(void) {
2467 expect("([abc]+) > x $1 x;", "cba", "xcbax");
2469 // The tricky case; the quantifier is around the segment
2470 expect("([abc])+ > x $1 x;", "cba", "xax");
2472 // Tricky case in reverse direction
2473 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2475 // Check post-context segment
2476 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2478 // Test toRule/toPattern for non-quantified segment.
2479 // Careful with spacing here.
2480 UnicodeString
r("([a-c]){q} > x $1 x;");
2482 UErrorCode ec
= U_ZERO_ERROR
;
2483 Transliterator
* t
= Transliterator::createFromRules("ID", r
, UTRANS_FORWARD
, pe
, ec
);
2484 if (U_FAILURE(ec
)) {
2485 errln("FAIL: createFromRules");
2490 t
->toRules(rr
, TRUE
);
2492 errln((UnicodeString
)"FAIL: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2494 logln((UnicodeString
)"Ok: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2498 // Test toRule/toPattern for quantified segment.
2499 // Careful with spacing here.
2500 r
= "([a-c])+{q} > x $1 x;";
2501 t
= Transliterator::createFromRules("ID", r
, UTRANS_FORWARD
, pe
, ec
);
2502 if (U_FAILURE(ec
)) {
2503 errln("FAIL: createFromRules");
2507 t
->toRules(rr
, TRUE
);
2509 errln((UnicodeString
)"FAIL: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2511 logln((UnicodeString
)"Ok: \"" + r
+ "\" x toRules() => \"" + rr
+ "\"");
2516 //======================================================================
2518 //======================================================================
2519 void TransliteratorTest::TestDevanagariLatinRT(){
2520 const int MAX_LEN
= 52;
2521 const char* const source
[MAX_LEN
] = {
2536 //"r\\u0323ya", // \u095c is not valid in Devanagari
2562 "\\u1E6Dh\\u1E6Dha",
2569 // Not roundtrippable --
2570 // \\u0939\\u094d\\u094d\\u092E - hma
2571 // \\u0939\\u094d\\u092E - hma
2572 // CharsToUnicodeString("hma"),
2577 "san\\u0304j\\u012Bb s\\u0113nagupta",
2578 "\\u0101nand vaddir\\u0101ju",
2582 const char* const expected
[MAX_LEN
] = {
2583 "\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */
2584 "\\u0915\\u094D\\u0930", /* kra */
2585 "\\u0915\\u094D\\u0937", /* ks\\u0323a */
2586 "\\u0916\\u094D\\u0930", /* khra */
2587 "\\u0917\\u094D\\u0930", /* gra */
2588 "\\u0919\\u094D\\u0930", /* n\\u0307ra */
2589 "\\u091A\\u094D\\u0930", /* cra */
2590 "\\u091B\\u094D\\u0930", /* chra */
2591 "\\u091C\\u094D\\u091E", /* jn\\u0303a */
2592 "\\u091D\\u094D\\u0930", /* jhra */
2593 "\\u091E\\u094D\\u0930", /* n\\u0303ra */
2594 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2595 "\\u0920\\u094D\\u0930", /* t\\u0323hra */
2596 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2597 //"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari
2598 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2599 "\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */
2600 "\\u0923\\u094D\\u0930", /* n\\u0323ra */
2601 "\\u0924\\u094D\\u0924", /* tta */
2602 "\\u0925\\u094D\\u0930", /* thra */
2603 "\\u0926\\u094D\\u0926", /* dda */
2604 "\\u0927\\u094D\\u0930", /* dhra */
2605 "\\u0928\\u094D\\u0928", /* nna */
2606 "\\u092A\\u094D\\u0930", /* pra */
2607 "\\u092B\\u094D\\u0930", /* phra */
2608 "\\u092C\\u094D\\u0930", /* bra */
2609 "\\u092D\\u094D\\u0930", /* bhra */
2610 "\\u092E\\u094D\\u0930", /* mra */
2611 "\\u0929\\u094D\\u0930", /* n\\u0331ra */
2612 //"\\u0934\\u094D\\u0930", /* l\\u0331ra */
2613 "\\u092F\\u094D\\u0930", /* yra */
2614 "\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */
2616 "\\u0935\\u094D\\u0930", /* vra */
2617 "\\u0936\\u094D\\u0930", /* s\\u0301ra */
2618 "\\u0937\\u094D\\u0930", /* s\\u0323ra */
2619 "\\u0938\\u094D\\u0930", /* sra */
2620 "\\u0939\\u094d\\u092E", /* hma */
2621 "\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */
2622 "\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */
2623 "\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/
2624 "\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */
2625 "\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */
2626 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2627 "\\u0920\\u094D\\u092F", /* t\\u0323hya */
2628 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2629 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2631 "\\u0939\\u094D\\u092F", /* hya */
2632 "\\u0936\\u0943", /* s\\u0301r\\u0325a */
2633 "\\u0936\\u094D\\u091A", /* s\\u0301ca */
2634 "\\u090d", /* e\\u0306 */
2635 "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2636 "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2640 UErrorCode status
= U_ZERO_ERROR
;
2641 UParseError parseError
;
2642 UnicodeString message
;
2643 Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2644 Transliterator
* devToLatin
=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
);
2645 if(U_FAILURE(status
)){
2646 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2647 dataerrln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2650 UnicodeString gotResult
;
2651 for(int i
= 0; i
<MAX_LEN
; i
++){
2652 gotResult
= source
[i
];
2653 expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
]));
2654 expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
]));
2660 void TransliteratorTest::TestTeluguLatinRT(){
2661 const int MAX_LEN
=10;
2662 const char* const source
[MAX_LEN
] = {
2663 "raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */
2664 "\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */
2665 "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */
2666 "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */
2667 "san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */
2668 "amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */
2669 "ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */
2670 "\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */
2671 "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */
2672 "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */
2675 const char* const expected
[MAX_LEN
] = {
2676 "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2677 "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2678 "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2679 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2680 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2681 "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2682 "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2683 "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2684 "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2685 "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2688 UErrorCode status
= U_ZERO_ERROR
;
2689 UParseError parseError
;
2690 UnicodeString message
;
2691 Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD
, parseError
, status
);
2692 Transliterator
* devToLatin
=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD
, parseError
, status
);
2693 if(U_FAILURE(status
)){
2694 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2695 dataerrln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2698 UnicodeString gotResult
;
2699 for(int i
= 0; i
<MAX_LEN
; i
++){
2700 gotResult
= source
[i
];
2701 expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
]));
2702 expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
]));
2708 void TransliteratorTest::TestSanskritLatinRT(){
2709 const int MAX_LEN
=16;
2710 const char* const source
[MAX_LEN
] = {
2711 "rmk\\u1E63\\u0113t",
2712 "\\u015Br\\u012Bmad",
2713 "bhagavadg\\u012Bt\\u0101",
2716 "vi\\u1E63\\u0101da",
2718 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2719 "uv\\u0101cr\\u0325",
2720 "dharmak\\u1E63\\u0113tr\\u0113",
2721 "kuruk\\u1E63\\u0113tr\\u0113",
2722 "samav\\u0113t\\u0101",
2724 "m\\u0101mak\\u0101\\u1E25",
2725 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2729 const char* const expected
[MAX_LEN
] = {
2730 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2731 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2732 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2733 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2734 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2735 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2736 "\\u092f\\u094b\\u0917",
2737 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2738 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2739 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2740 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2741 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2742 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2743 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2744 //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2745 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2746 "\\u0938\\u0902\\u091c\\u0935",
2748 UErrorCode status
= U_ZERO_ERROR
;
2749 UParseError parseError
;
2750 UnicodeString message
;
2751 Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2752 Transliterator
* devToLatin
=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
);
2753 if(U_FAILURE(status
)){
2754 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2755 dataerrln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2758 UnicodeString gotResult
;
2759 for(int i
= 0; i
<MAX_LEN
; i
++){
2760 gotResult
= source
[i
];
2761 expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
]));
2762 expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
]));
2769 void TransliteratorTest::TestCompoundLatinRT(){
2770 const char* const source
[] = {
2771 "rmk\\u1E63\\u0113t",
2772 "\\u015Br\\u012Bmad",
2773 "bhagavadg\\u012Bt\\u0101",
2776 "vi\\u1E63\\u0101da",
2778 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2779 "uv\\u0101cr\\u0325",
2780 "dharmak\\u1E63\\u0113tr\\u0113",
2781 "kuruk\\u1E63\\u0113tr\\u0113",
2782 "samav\\u0113t\\u0101",
2784 "m\\u0101mak\\u0101\\u1E25",
2785 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2789 const int MAX_LEN
= sizeof(source
)/sizeof(source
[0]);
2790 const char* const expected
[MAX_LEN
] = {
2791 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2792 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2793 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2794 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2795 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2796 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2797 "\\u092f\\u094b\\u0917",
2798 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2799 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2800 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2801 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2802 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2803 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2804 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2805 // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2806 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2807 "\\u0938\\u0902\\u091c\\u0935"
2809 if(MAX_LEN
!= sizeof(expected
)/sizeof(expected
[0])) {
2810 errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2814 UErrorCode status
= U_ZERO_ERROR
;
2815 UParseError parseError
;
2816 UnicodeString message
;
2817 Transliterator
* devToLatinToDev
=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2818 Transliterator
* latinToDevToLatin
=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
);
2819 Transliterator
* devToTelToDev
=Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD
, parseError
, status
);
2820 Transliterator
* latinToTelToLatin
=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD
, parseError
, status
);
2822 if(U_FAILURE(status
)){
2823 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status
));
2824 dataerrln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) );
2827 UnicodeString gotResult
;
2828 for(int i
= 0; i
<MAX_LEN
; i
++){
2829 gotResult
= source
[i
];
2830 expect(*devToLatinToDev
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(expected
[i
]));
2831 expect(*latinToDevToLatin
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(source
[i
]));
2832 expect(*latinToTelToLatin
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(source
[i
]));
2835 delete(latinToDevToLatin
);
2836 delete(devToLatinToDev
);
2837 delete(devToTelToDev
);
2838 delete(latinToTelToLatin
);
2842 * Test Gurmukhi-Devanagari Tippi and Bindi
2844 void TransliteratorTest::TestGurmukhiDevanagari(){
2846 // (\u0902) (when preceded by vowel) ---> (\u0A02)
2847 // (\u0902) (when preceded by consonant) ---> (\u0A70)
2848 UErrorCode status
= U_ZERO_ERROR
;
2849 UnicodeSet
vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV
).unescape(), status
);
2850 UnicodeSet
non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV
).unescape(), status
);
2851 UParseError parseError
;
2853 UnicodeSetIterator
vIter(vowel
);
2854 UnicodeSetIterator
nvIter(non_vowel
);
2855 Transliterator
* trans
= Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD
, parseError
, status
);
2856 if(U_FAILURE(status
)) {
2857 dataerrln("Error creating transliterator %s", u_errorName(status
));
2861 UnicodeString
src (" \\u0902", -1, US_INV
);
2862 UnicodeString
expected(" \\u0A02", -1, US_INV
);
2863 src
= src
.unescape();
2864 expected
= expected
.unescape();
2866 while(vIter
.next()){
2867 src
.setCharAt(0,(UChar
) vIter
.getCodepoint());
2868 expected
.setCharAt(0,(UChar
) (vIter
.getCodepoint()+0x0100));
2869 expect(*trans
,src
,expected
);
2872 expected
.setCharAt(1,0x0A70);
2873 while(nvIter
.next()){
2874 //src.setCharAt(0,(char) nvIter.codepoint);
2875 src
.setCharAt(0,(UChar
)nvIter
.getCodepoint());
2876 expected
.setCharAt(0,(UChar
) (nvIter
.getCodepoint()+0x0100));
2877 expect(*trans
,src
,expected
);
2882 * Test instantiation from a locale.
2884 void TransliteratorTest::TestLocaleInstantiation(void) {
2886 UErrorCode ec
= U_ZERO_ERROR
;
2887 Transliterator
*t
= Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD
, pe
, ec
);
2888 if (U_FAILURE(ec
)) {
2889 dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec
));
2893 expect(*t
, CharsToUnicodeString("\\u0430"), "a");
2896 t
= Transliterator::createInstance("en-el", UTRANS_FORWARD
, pe
, ec
);
2897 if (U_FAILURE(ec
)) {
2898 errln("FAIL: createInstance(en-el)");
2902 expect(*t
, "a", CharsToUnicodeString("\\u03B1"));
2907 * Test title case handling of accent (should ignore accents)
2909 void TransliteratorTest::TestTitleAccents(void) {
2911 UErrorCode ec
= U_ZERO_ERROR
;
2912 Transliterator
*t
= Transliterator::createInstance("Title", UTRANS_FORWARD
, pe
, ec
);
2913 if (U_FAILURE(ec
)) {
2914 errln("FAIL: createInstance(Title)");
2918 expect(*t
, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2923 * Basic test of a locale resource based rule.
2925 void TransliteratorTest::TestLocaleResource() {
2926 const char* DATA
[] = {
2928 //"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0",
2929 "Latin-el", "b", "\\u03bc\\u03c0",
2930 "Latin-Greek", "b", "\\u03B2",
2931 "Greek-Latin/UNGEGN", "\\u03B2", "v",
2932 "el-Latin", "\\u03B2", "v",
2933 "Greek-Latin", "\\u03B2", "b",
2935 const int32_t DATA_length
= sizeof(DATA
) / sizeof(DATA
[0]);
2936 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
2938 UErrorCode ec
= U_ZERO_ERROR
;
2939 Transliterator
*t
= Transliterator::createInstance(DATA
[i
], UTRANS_FORWARD
, pe
, ec
);
2940 if (U_FAILURE(ec
)) {
2941 dataerrln((UnicodeString
)"FAIL: createInstance(" + DATA
[i
] + ") - " + u_errorName(ec
));
2945 expect(*t
, CharsToUnicodeString(DATA
[i
+1]),
2946 CharsToUnicodeString(DATA
[i
+2]));
2952 * Make sure parse errors reference the right line.
2954 void TransliteratorTest::TestParseError() {
2955 static const char* rule
=
2959 UErrorCode ec
= U_ZERO_ERROR
;
2961 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
2963 if (U_FAILURE(ec
)) {
2964 UnicodeString
err(pe
.preContext
);
2965 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
2966 if (err
.indexOf("d << b") >= 0) {
2967 logln("Ok: " + err
);
2969 errln("FAIL: " + err
);
2973 errln("FAIL: no syntax error");
2975 static const char* maskingRule
=
2980 delete Transliterator::createFromRules("ID", maskingRule
, UTRANS_FORWARD
, pe
, ec
);
2981 if (ec
!= U_RULE_MASK_ERROR
) {
2982 errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec
));
2984 else if (UnicodeString("a > x;") != UnicodeString(pe
.preContext
)) {
2985 errln("FAIL: did not get expected precontext");
2987 else if (UnicodeString("ab > y;") != UnicodeString(pe
.postContext
)) {
2988 errln("FAIL: did not get expected postcontext");
2993 * Make sure sets on output are disallowed.
2995 void TransliteratorTest::TestOutputSet() {
2996 UnicodeString rule
= "$set = [a-cm-n]; b > $set;";
2997 UErrorCode ec
= U_ZERO_ERROR
;
2999 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
3001 if (U_FAILURE(ec
)) {
3002 UnicodeString
err(pe
.preContext
);
3003 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
3004 logln("Ok: " + err
);
3007 errln("FAIL: No syntax error");
3011 * Test the use variable range pragma, making sure that use of
3012 * variable range characters is detected and flagged as an error.
3014 void TransliteratorTest::TestVariableRange() {
3015 UnicodeString rule
= "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3016 UErrorCode ec
= U_ZERO_ERROR
;
3018 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
3020 if (U_FAILURE(ec
)) {
3021 UnicodeString
err(pe
.preContext
);
3022 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
3023 logln("Ok: " + err
);
3026 errln("FAIL: No syntax error");
3030 * Test invalid post context error handling
3032 void TransliteratorTest::TestInvalidPostContext() {
3033 UnicodeString rule
= "a}b{c>d;";
3034 UErrorCode ec
= U_ZERO_ERROR
;
3036 Transliterator
*t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
);
3038 if (U_FAILURE(ec
)) {
3039 UnicodeString
err(pe
.preContext
);
3040 err
.append((UChar
)124/*|*/).append(pe
.postContext
);
3041 if (err
.indexOf("a}b{c") >= 0) {
3042 logln("Ok: " + err
);
3044 errln("FAIL: " + err
);
3048 errln("FAIL: No syntax error");
3052 * Test ID form variants
3054 void TransliteratorTest::TestIDForms() {
3055 const char* DATA
[] = {
3057 "nfd", NULL
, "NFC", // make sure case is ignored
3058 "Any-NFKD", NULL
, "Any-NFKC",
3059 "Null", NULL
, "Null",
3060 "-nfkc", "nfkc", "NFKD",
3061 "-nfkc/", "nfkc", "NFKD",
3062 "Latin-Greek/UNGEGN", NULL
, "Greek-Latin/UNGEGN",
3063 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3064 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3065 "Source-", NULL
, NULL
,
3066 "Source/Variant-", NULL
, NULL
,
3067 "Source-/Variant", NULL
, NULL
,
3068 "/Variant", NULL
, NULL
,
3069 "/Variant-", NULL
, NULL
,
3070 "-/Variant", NULL
, NULL
,
3075 const int32_t DATA_length
= sizeof(DATA
)/sizeof(DATA
[0]);
3077 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
3078 const char* ID
= DATA
[i
];
3079 const char* expID
= DATA
[i
+1];
3080 const char* expInvID
= DATA
[i
+2];
3081 UBool expValid
= (expInvID
!= NULL
);
3082 if (expID
== NULL
) {
3086 UErrorCode ec
= U_ZERO_ERROR
;
3088 Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
);
3089 if (U_FAILURE(ec
)) {
3091 logln((UnicodeString
)"Ok: getInstance(" + ID
+") => " + u_errorName(ec
));
3093 dataerrln((UnicodeString
)"FAIL: Couldn't create " + ID
+ " - " + u_errorName(ec
));
3098 Transliterator
*u
= t
->createInverse(ec
);
3099 if (U_FAILURE(ec
)) {
3100 errln((UnicodeString
)"FAIL: Couldn't create inverse of " + ID
);
3105 if (t
->getID() == expID
&&
3106 u
->getID() == expInvID
) {
3107 logln((UnicodeString
)"Ok: " + ID
+ ".getInverse() => " + expInvID
);
3109 errln((UnicodeString
)"FAIL: getInstance(" + ID
+ ") => " +
3110 t
->getID() + " x getInverse() => " + u
->getID() +
3111 ", expected " + expInvID
);
3118 static const UChar SPACE
[] = {32,0};
3119 static const UChar NEWLINE
[] = {10,0};
3120 static const UChar RETURN
[] = {13,0};
3121 static const UChar EMPTY
[] = {0};
3123 void TransliteratorTest::checkRules(const UnicodeString
& label
, Transliterator
& t2
,
3124 const UnicodeString
& testRulesForward
) {
3125 UnicodeString rules2
; t2
.toRules(rules2
, TRUE
);
3126 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3127 rules2
.findAndReplace(SPACE
, EMPTY
);
3128 rules2
.findAndReplace(NEWLINE
, EMPTY
);
3129 rules2
.findAndReplace(RETURN
, EMPTY
);
3131 UnicodeString
testRules(testRulesForward
); testRules
.findAndReplace(SPACE
, EMPTY
);
3133 if (rules2
!= testRules
) {
3135 logln((UnicodeString
)"GENERATED RULES: " + rules2
);
3136 logln((UnicodeString
)"SHOULD BE: " + testRulesForward
);
3141 * Mark's toRules test.
3143 void TransliteratorTest::TestToRulesMark() {
3144 const char* testRules
=
3145 "::[[:Latin:][:Mark:]];"
3148 "a <> \\u03B1;" // alpha
3152 "::([[:Greek:][:Mark:]]);"
3154 const char* testRulesForward
=
3155 "::[[:Latin:][:Mark:]];"
3163 const char* testRulesBackward
=
3164 "::[[:Greek:][:Mark:]];"
3171 UnicodeString source
= CharsToUnicodeString("\\u00E1"); // a-acute
3172 UnicodeString target
= CharsToUnicodeString("\\u03AC"); // alpha-acute
3175 UErrorCode ec
= U_ZERO_ERROR
;
3176 Transliterator
*t2
= Transliterator::createFromRules("source-target", UnicodeString(testRules
, -1, US_INV
), UTRANS_FORWARD
, pe
, ec
);
3177 Transliterator
*t3
= Transliterator::createFromRules("target-source", UnicodeString(testRules
, -1, US_INV
), UTRANS_REVERSE
, pe
, ec
);
3179 if (U_FAILURE(ec
)) {
3182 dataerrln((UnicodeString
)"FAIL: createFromRules => " + u_errorName(ec
));
3186 expect(*t2
, source
, target
);
3187 expect(*t3
, target
, source
);
3189 checkRules("Failed toRules FORWARD", *t2
, UnicodeString(testRulesForward
, -1, US_INV
));
3190 checkRules("Failed toRules BACKWARD", *t3
, UnicodeString(testRulesBackward
, -1, US_INV
));
3197 * Test Escape and Unescape transliterators.
3199 void TransliteratorTest::TestEscape() {
3205 t
= Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, pe
, ec
);
3206 if (U_FAILURE(ec
)) {
3207 errln((UnicodeString
)"FAIL: createInstance");
3210 UNICODE_STRING_SIMPLE("\\x{40}\\U000000312Q"),
3216 t
= Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD
, pe
, ec
);
3217 if (U_FAILURE(ec
)) {
3218 errln((UnicodeString
)"FAIL: createInstance");
3221 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3222 UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3227 t
= Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD
, pe
, ec
);
3228 if (U_FAILURE(ec
)) {
3229 errln((UnicodeString
)"FAIL: createInstance");
3232 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3233 UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3238 t
= Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD
, pe
, ec
);
3239 if (U_FAILURE(ec
)) {
3240 errln((UnicodeString
)"FAIL: createInstance");
3243 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3244 UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3250 void TransliteratorTest::TestAnchorMasking(){
3251 UnicodeString
rule ("^a > Q; a > q;");
3252 UErrorCode status
= U_ZERO_ERROR
;
3253 UParseError parseError
;
3255 Transliterator
* t
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
,parseError
,status
);
3256 if(U_FAILURE(status
)){
3257 errln(UnicodeString("FAIL: ") + "ID" +
3258 ".createFromRules() => bad rules" +
3259 /*", parse error " + parseError.code +*/
3260 ", line " + parseError
.line
+
3261 ", offset " + parseError
.offset
+
3262 ", context " + prettify(parseError
.preContext
, TRUE
) +
3263 ", rules: " + prettify(rule
, TRUE
));
3269 * Make sure display names of variants look reasonable.
3271 void TransliteratorTest::TestDisplayName() {
3272 #if UCONFIG_NO_FORMATTING
3273 logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3276 static const char* DATA
[] = {
3277 // ID, forward name, reverse name
3278 // Update the text as necessary -- the important thing is
3279 // not the text itself, but how various cases are handled.
3282 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3285 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3288 "NFC", "Any to NFC", "Any to NFD",
3291 int32_t DATA_length
= sizeof(DATA
) / sizeof(DATA
[0]);
3293 Locale
US("en", "US");
3295 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
3297 Transliterator::getDisplayName(DATA
[i
], US
, name
);
3298 if (name
!= DATA
[i
+1]) {
3299 dataerrln((UnicodeString
)"FAIL: " + DATA
[i
] + ".getDisplayName() => " +
3300 name
+ ", expected " + DATA
[i
+1]);
3302 logln((UnicodeString
)"Ok: " + DATA
[i
] + ".getDisplayName() => " + name
);
3304 UErrorCode ec
= U_ZERO_ERROR
;
3306 Transliterator
*t
= Transliterator::createInstance(DATA
[i
], UTRANS_REVERSE
, pe
, ec
);
3307 if (U_FAILURE(ec
)) {
3309 dataerrln("FAIL: createInstance failed - %s", u_errorName(ec
));
3312 name
= Transliterator::getDisplayName(t
->getID(), US
, name
);
3313 if (name
!= DATA
[i
+2]) {
3314 dataerrln((UnicodeString
)"FAIL: " + t
->getID() + ".getDisplayName() => " +
3315 name
+ ", expected " + DATA
[i
+2]);
3317 logln((UnicodeString
)"Ok: " + t
->getID() + ".getDisplayName() => " + name
);
3324 void TransliteratorTest::TestSpecialCases(void) {
3325 const UnicodeString registerRules
[] = {
3326 "Any-Dev1", "x > X; y > Y;",
3327 "Any-Dev2", "XY > Z",
3329 CharsToUnicodeString
3330 ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3334 const UnicodeString testCases
[] = {
3336 // should add more test cases
3337 "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3338 "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3339 "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3340 "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3343 "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3344 "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3346 // check for devanagari bug
3347 "nfd;Dev1;Dev2;nfc", "xy", "Z",
3349 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3350 "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
,
3351 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE
+ DESERET_dee
,
3353 //TODO: enable this test once Titlecase works right
3355 "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3356 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3358 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
,
3359 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE
+ DESERET_DEE
,
3360 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
,
3361 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee
+ DESERET_dee
,
3363 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
, "",
3364 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee
+ DESERET_DEE
, "",
3367 "Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3368 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3369 "Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3370 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3371 "Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3372 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3373 "Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3374 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3376 // Upper: TAT\\u02B9\\u00C2NA
3377 // Lower: tat\\u02B9\\u00E2na
3378 // Title: Tat\\u02B9\\u00E2na
3379 "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3380 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3381 "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3382 CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3383 "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3384 CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3391 for (i
= 0; registerRules
[i
].length()!=0; i
+=2) {
3392 UErrorCode status
= U_ZERO_ERROR
;
3394 Transliterator
*t
= Transliterator::createFromRules(registerRules
[0+i
],
3395 registerRules
[i
+1], UTRANS_FORWARD
, pos
, status
);
3396 if (U_FAILURE(status
)) {
3397 dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status
));
3399 Transliterator::registerInstance(t
);
3402 for (i
= 0; testCases
[i
].length()!=0; i
+=3) {
3403 UErrorCode ec
= U_ZERO_ERROR
;
3405 const UnicodeString
& name
= testCases
[i
];
3406 Transliterator
*t
= Transliterator::createInstance(name
, UTRANS_FORWARD
, pe
, ec
);
3407 if (U_FAILURE(ec
)) {
3408 dataerrln((UnicodeString
)"FAIL: Couldn't create " + name
+ " - " + u_errorName(ec
));
3412 const UnicodeString
& id
= t
->getID();
3413 const UnicodeString
& source
= testCases
[i
+1];
3414 UnicodeString target
;
3416 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3418 if (testCases
[i
+2].length() > 0) {
3419 target
= testCases
[i
+2];
3420 } else if (0==id
.caseCompare("NFD", U_FOLD_CASE_DEFAULT
)) {
3421 Normalizer::normalize(source
, UNORM_NFD
, 0, target
, ec
);
3422 } else if (0==id
.caseCompare("NFC", U_FOLD_CASE_DEFAULT
)) {
3423 Normalizer::normalize(source
, UNORM_NFC
, 0, target
, ec
);
3424 } else if (0==id
.caseCompare("NFKD", U_FOLD_CASE_DEFAULT
)) {
3425 Normalizer::normalize(source
, UNORM_NFKD
, 0, target
, ec
);
3426 } else if (0==id
.caseCompare("NFKC", U_FOLD_CASE_DEFAULT
)) {
3427 Normalizer::normalize(source
, UNORM_NFKC
, 0, target
, ec
);
3428 } else if (0==id
.caseCompare("Lower", U_FOLD_CASE_DEFAULT
)) {
3430 target
.toLower(Locale::getUS());
3431 } else if (0==id
.caseCompare("Upper", U_FOLD_CASE_DEFAULT
)) {
3433 target
.toUpper(Locale::getUS());
3435 if (U_FAILURE(ec
)) {
3436 errln((UnicodeString
)"FAIL: Internal error normalizing " + source
);
3440 expect(*t
, source
, target
);
3443 for (i
= 0; registerRules
[i
].length()!=0; i
+=2) {
3444 Transliterator::unregister(registerRules
[i
]);
3448 char* Char32ToEscapedChars(UChar32 ch
, char* buffer
) {
3450 sprintf(buffer
, "\\u%04x", (int)ch
);
3452 sprintf(buffer
, "\\U%08x", (int)ch
);
3457 void TransliteratorTest::TestSurrogateCasing (void) {
3458 // check that casing handles surrogates
3459 // titlecase is currently defective
3463 U16_GET(DESERET_dee
,0, 0, DESERET_dee
.length(), dee
);
3464 UnicodeString
DEE(u_totitle(dee
));
3465 if (DEE
!= DESERET_DEE
) {
3466 err("Fails titlecase of surrogates");
3467 err(Char32ToEscapedChars(dee
, buffer
));
3469 errln(Char32ToEscapedChars(DEE
.char32At(0), buffer
));
3472 UnicodeString deeDEETest
=DESERET_dee
+ DESERET_DEE
;
3473 UnicodeString deedeeTest
= DESERET_dee
+ DESERET_dee
;
3474 UnicodeString DEEDEETest
= DESERET_DEE
+ DESERET_DEE
;
3475 UErrorCode status
= U_ZERO_ERROR
;
3477 u_strToUpper(buffer2
, 20, deeDEETest
.getBuffer(), deeDEETest
.length(), NULL
, &status
);
3478 if (U_FAILURE(status
) || (UnicodeString(buffer2
)!= DEEDEETest
)) {
3479 errln("Fails: Can't uppercase surrogates.");
3482 status
= U_ZERO_ERROR
;
3483 u_strToLower(buffer2
, 20, deeDEETest
.getBuffer(), deeDEETest
.length(), NULL
, &status
);
3484 if (U_FAILURE(status
) || (UnicodeString(buffer2
)!= deedeeTest
)) {
3485 errln("Fails: Can't lowercase surrogates.");
3489 static void _trans(Transliterator
& t
, const UnicodeString
& src
,
3490 UnicodeString
& result
) {
3492 t
.transliterate(result
);
3495 static void _trans(const UnicodeString
& id
, const UnicodeString
& src
,
3496 UnicodeString
& result
, UErrorCode ec
) {
3498 Transliterator
*t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, pe
, ec
);
3499 if (U_SUCCESS(ec
)) {
3500 _trans(*t
, src
, result
);
3505 static UnicodeString
_findMatch(const UnicodeString
& source
,
3506 const UnicodeString
* pairs
) {
3507 UnicodeString empty
;
3508 for (int32_t i
=0; pairs
[i
].length() > 0; i
+=2) {
3509 if (0==source
.caseCompare(pairs
[i
], U_FOLD_CASE_DEFAULT
)) {
3516 // Check to see that incremental gets at least part way through a reasonable string.
3518 void TransliteratorTest::TestIncrementalProgress(void) {
3519 UErrorCode ec
= U_ZERO_ERROR
;
3520 UnicodeString latinTest
= "The Quick Brown Fox.";
3521 UnicodeString devaTest
;
3522 _trans("Latin-Devanagari", latinTest
, devaTest
, ec
);
3523 UnicodeString kataTest
;
3524 _trans("Latin-Katakana", latinTest
, kataTest
, ec
);
3525 if (U_FAILURE(ec
)) {
3526 errln("FAIL: Internal error");
3529 const UnicodeString tests
[] = {
3532 "Halfwidth", latinTest
,
3533 "Devanagari", devaTest
,
3534 "Katakana", kataTest
,
3538 UnicodeString
test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3539 int32_t i
= 0, j
=0, k
=0;
3540 int32_t sources
= Transliterator::countAvailableSources();
3541 for (i
= 0; i
< sources
; i
++) {
3542 UnicodeString source
;
3543 Transliterator::getAvailableSource(i
, source
);
3544 UnicodeString test
= _findMatch(source
, tests
);
3545 if (test
.length() == 0) {
3546 logln((UnicodeString
)"Skipping " + source
+ "-X");
3549 int32_t targets
= Transliterator::countAvailableTargets(source
);
3550 for (j
= 0; j
< targets
; j
++) {
3551 UnicodeString target
;
3552 Transliterator::getAvailableTarget(j
, source
, target
);
3553 int32_t variants
= Transliterator::countAvailableVariants(source
, target
);
3554 for (k
=0; k
< variants
; k
++) {
3555 UnicodeString variant
;
3557 UErrorCode status
= U_ZERO_ERROR
;
3559 Transliterator::getAvailableVariant(k
, source
, target
, variant
);
3560 UnicodeString id
= source
+ "-" + target
+ "/" + variant
;
3562 Transliterator
*t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, err
, status
);
3563 if (U_FAILURE(status
)) {
3564 dataerrln((UnicodeString
)"FAIL: Could not create " + id
);
3568 status
= U_ZERO_ERROR
;
3569 CheckIncrementalAux(t
, test
);
3572 _trans(*t
, test
, rev
);
3573 Transliterator
*inv
= t
->createInverse(status
);
3574 if (U_FAILURE(status
)) {
3575 #if UCONFIG_NO_BREAK_ITERATION
3576 // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
3577 if (id
.compare((UnicodeString
)"Latin-Thai/") != 0)
3579 errln((UnicodeString
)"FAIL: Could not create inverse of " + id
);
3585 CheckIncrementalAux(inv
, rev
);
3593 void TransliteratorTest::CheckIncrementalAux(const Transliterator
* t
,
3594 const UnicodeString
& input
) {
3595 UErrorCode ec
= U_ZERO_ERROR
;
3597 UnicodeString test
= input
;
3599 pos
.contextStart
= 0;
3600 pos
.contextLimit
= input
.length();
3602 pos
.limit
= input
.length();
3604 t
->transliterate(test
, pos
, ec
);
3605 if (U_FAILURE(ec
)) {
3606 errln((UnicodeString
)"FAIL: transliterate() error " + u_errorName(ec
));
3609 UBool gotError
= FALSE
;
3610 (void)gotError
; // Suppress set but not used warning.
3612 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3614 if (pos
.start
== 0 && pos
.limit
!= 0 && t
->getID() != "Hex-Any/Unicode") {
3615 errln((UnicodeString
)"No Progress, " +
3616 t
->getID() + ": " + formatInput(test
, input
, pos
));
3619 logln((UnicodeString
)"PASS Progress, " +
3620 t
->getID() + ": " + formatInput(test
, input
, pos
));
3622 t
->finishTransliteration(test
, pos
);
3623 if (pos
.start
!= pos
.limit
) {
3624 errln((UnicodeString
)"Incomplete, " +
3625 t
->getID() + ": " + formatInput(test
, input
, pos
));
3630 void TransliteratorTest::TestFunction() {
3631 // Careful with spacing and ';' here: Phrase this exactly
3632 // as toRules() is going to return it. If toRules() changes
3633 // with regard to spacing or ';', then adjust this string.
3634 UnicodeString rule
=
3635 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3638 UErrorCode ec
= U_ZERO_ERROR
;
3639 Transliterator
*t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3641 dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec
));
3646 t
->toRules(r
, TRUE
);
3648 logln((UnicodeString
)"OK: toRules() => " + r
);
3650 errln((UnicodeString
)"FAIL: toRules() => " + r
+
3651 ", expected " + rule
);
3654 expect(*t
, "The Quick Brown Fox",
3655 UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3660 void TransliteratorTest::TestInvalidBackRef(void) {
3661 UnicodeString rule
= ". > $1;";
3662 UnicodeString rule2
=CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3664 UErrorCode ec
= U_ZERO_ERROR
;
3665 Transliterator
*t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3666 Transliterator
*t2
= Transliterator::createFromRules("Test2", rule2
, UTRANS_FORWARD
, pe
, ec
);
3669 errln("FAIL: createFromRules should have returned NULL");
3674 errln("FAIL: createFromRules should have returned NULL");
3678 if (U_SUCCESS(ec
)) {
3679 errln("FAIL: Ok: . > $1; => no error");
3681 logln((UnicodeString
)"Ok: . > $1; => " + u_errorName(ec
));
3685 void TransliteratorTest::TestMulticharStringSet() {
3692 " e } [{fg}] > r;" ;
3695 UErrorCode ec
= U_ZERO_ERROR
;
3696 Transliterator
* t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3697 if (t
== NULL
|| U_FAILURE(ec
)) {
3699 errln("FAIL: createFromRules failed");
3703 expect(*t
, "a aa ab bc d gd de gde gdefg ddefg",
3704 "y x yz z d gd de gdq gdqfg ddrfg");
3707 // Overlapped string test. Make sure that when multiple
3708 // strings can match that the longest one is matched.
3710 " [a {ab} {abc}] > x;"
3713 " q [t {st} {rst}] { e > p;" ;
3715 t
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
);
3716 if (t
== NULL
|| U_FAILURE(ec
)) {
3718 errln("FAIL: createFromRules failed");
3722 expect(*t
, "a ab abc qte qste qrste",
3723 "x x x qtp qstp qrstp");
3727 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3728 // BEGIN TestUserFunction support factory
3730 Transliterator
* _TUFF
[4];
3731 UnicodeString
* _TUFID
[4];
3733 static Transliterator
* U_EXPORT2
_TUFFactory(const UnicodeString
& /*ID*/,
3734 Transliterator::Token context
) {
3735 return _TUFF
[context
.integer
]->clone();
3738 static void _TUFReg(const UnicodeString
& ID
, Transliterator
* t
, int32_t n
) {
3740 _TUFID
[n
] = new UnicodeString(ID
);
3741 Transliterator::registerFactory(ID
, _TUFFactory
, Transliterator::integerToken(n
));
3744 static void _TUFUnreg(int32_t n
) {
3745 if (_TUFF
[n
] != NULL
) {
3746 Transliterator::unregister(*_TUFID
[n
]);
3752 // END TestUserFunction support factory
3753 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3756 * Test that user-registered transliterators can be used under function
3759 void TransliteratorTest::TestUserFunction() {
3763 UErrorCode ec
= U_ZERO_ERROR
;
3765 // Setup our factory
3767 for (i
=0; i
<4; ++i
) {
3771 // There's no need to register inverses if we don't use them
3772 t
= Transliterator::createFromRules("gif",
3773 UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3774 UTRANS_FORWARD
, pe
, ec
);
3775 if (t
== NULL
|| U_FAILURE(ec
)) {
3776 dataerrln((UnicodeString
)"FAIL: createFromRules gif " + u_errorName(ec
));
3779 _TUFReg("Any-gif", t
, 0);
3781 t
= Transliterator::createFromRules("RemoveCurly",
3782 UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3783 UTRANS_FORWARD
, pe
, ec
);
3784 if (t
== NULL
|| U_FAILURE(ec
)) {
3785 errln((UnicodeString
)"FAIL: createFromRules RemoveCurly " + u_errorName(ec
));
3788 expect(*t
, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3789 _TUFReg("Any-RemoveCurly", t
, 1);
3791 logln("Trying &hex");
3792 t
= Transliterator::createFromRules("hex2",
3794 UTRANS_FORWARD
, pe
, ec
);
3795 if (t
== NULL
|| U_FAILURE(ec
)) {
3796 errln("FAIL: createFromRules");
3799 logln("Registering");
3800 _TUFReg("Any-hex2", t
, 2);
3801 t
= Transliterator::createInstance("Any-hex2", UTRANS_FORWARD
, ec
);
3802 if (t
== NULL
|| U_FAILURE(ec
)) {
3803 errln((UnicodeString
)"FAIL: createInstance Any-hex2 " + u_errorName(ec
));
3806 expect(*t
, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3809 logln("Trying &gif");
3810 t
= Transliterator::createFromRules("gif2",
3811 "(.) > &Gif(&Hex2($1));",
3812 UTRANS_FORWARD
, pe
, ec
);
3813 if (t
== NULL
|| U_FAILURE(ec
)) {
3814 errln((UnicodeString
)"FAIL: createFromRules gif2 " + u_errorName(ec
));
3817 logln("Registering");
3818 _TUFReg("Any-gif2", t
, 3);
3819 t
= Transliterator::createInstance("Any-gif2", UTRANS_FORWARD
, ec
);
3820 if (t
== NULL
|| U_FAILURE(ec
)) {
3821 errln((UnicodeString
)"FAIL: createInstance Any-gif2 " + u_errorName(ec
));
3824 expect(*t
, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3825 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3828 // Test that filters are allowed after &
3829 t
= Transliterator::createFromRules("test",
3830 "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3831 UTRANS_FORWARD
, pe
, ec
);
3832 if (t
== NULL
|| U_FAILURE(ec
)) {
3833 errln((UnicodeString
)"FAIL: createFromRules test " + u_errorName(ec
));
3837 UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3841 for (i
=0; i
<4; ++i
) {
3847 * Test the Any-X transliterators.
3849 void TransliteratorTest::TestAnyX(void) {
3850 UParseError parseError
;
3851 UErrorCode status
= U_ZERO_ERROR
;
3852 Transliterator
* anyLatin
=
3853 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD
, parseError
, status
);
3855 dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status
));
3861 CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3862 CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3868 * Test Any-X transliterators with sample letters from all scripts.
3870 void TransliteratorTest::TestAny(void) {
3871 UErrorCode status
= U_ZERO_ERROR
;
3872 // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3873 // function call parameters going on in this test.
3874 UnicodeSet
alphabetic("[:alphabetic:]", status
);
3875 if (U_FAILURE(status
)) {
3876 dataerrln("Failure: file %s, line %d, status = %s", __FILE__
, __LINE__
, u_errorName(status
));
3879 alphabetic
.freeze();
3881 UnicodeString testString
;
3882 for (int32_t i
= 0; i
< USCRIPT_CODE_LIMIT
; i
++) {
3883 const char *scriptName
= uscript_getShortName((UScriptCode
)i
);
3884 if (scriptName
== NULL
) {
3885 errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__
, __LINE__
, i
);
3890 sample
.applyPropertyAlias("script", scriptName
, status
);
3891 if (U_FAILURE(status
)) {
3892 errln("Failure: file %s, line %d, status = %s", __FILE__
, __LINE__
, u_errorName(status
));
3895 sample
.retainAll(alphabetic
);
3896 for (int32_t count
=0; count
<5; count
++) {
3897 UChar32 c
= sample
.charAt(count
);
3901 testString
.append(c
);
3905 UParseError parseError
;
3906 Transliterator
* anyLatin
=
3907 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD
, parseError
, status
);
3908 if (U_FAILURE(status
)) {
3909 dataerrln("Failure: file %s, line %d, status = %s", __FILE__
, __LINE__
, u_errorName(status
));
3913 logln(UnicodeString("Sample set for Any-Latin: ") + testString
);
3914 anyLatin
->transliterate(testString
);
3915 logln(UnicodeString("Sample result for Any-Latin: ") + testString
);
3921 * Test the source and target set API. These are only implemented
3922 * for RBT and CompoundTransliterator at this time.
3924 void TransliteratorTest::TestSourceTargetSet() {
3925 UErrorCode ec
= U_ZERO_ERROR
;
3933 UnicodeSet
expSrc("[arx{lu}]", ec
);
3936 UnicodeSet
expTrg("[bq]", ec
);
3939 Transliterator
* t
= Transliterator::createFromRules("test", r
, UTRANS_FORWARD
, pe
, ec
);
3941 if (U_FAILURE(ec
)) {
3943 errln("FAIL: Couldn't set up test");
3947 UnicodeSet src
; t
->getSourceSet(src
);
3948 UnicodeSet trg
; t
->getTargetSet(trg
);
3950 if (src
== expSrc
&& trg
== expTrg
) {
3952 logln((UnicodeString
)"Ok: " +
3953 r
+ " => source = " + src
.toPattern(a
, TRUE
) +
3954 ", target = " + trg
.toPattern(b
, TRUE
));
3956 UnicodeString a
, b
, c
, d
;
3957 errln((UnicodeString
)"FAIL: " +
3958 r
+ " => source = " + src
.toPattern(a
, TRUE
) +
3959 ", expected " + expSrc
.toPattern(b
, TRUE
) +
3960 "; target = " + trg
.toPattern(c
, TRUE
) +
3961 ", expected " + expTrg
.toPattern(d
, TRUE
));
3968 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3970 void TransliteratorTest::TestPatternWhiteSpace() {
3972 const char* r
= "a > \\u200E b;";
3974 UErrorCode ec
= U_ZERO_ERROR
;
3976 Transliterator
* t
= Transliterator::createFromRules("test", CharsToUnicodeString(r
), UTRANS_FORWARD
, pe
, ec
);
3978 if (U_FAILURE(ec
)) {
3979 errln("FAIL: Couldn't set up test");
3981 expect(*t
, "a", "b");
3987 UnicodeSet
set(CharsToUnicodeString("[a \\u200E]"), ec
);
3989 if (U_FAILURE(ec
)) {
3990 errln("FAIL: Couldn't set up test");
3992 if (set
.contains(0x200E)) {
3993 errln("FAIL: U+200E not being ignored by UnicodeSet");
3997 //======================================================================
3998 // this method is in TestUScript.java
3999 //======================================================================
4000 void TransliteratorTest::TestAllCodepoints(){
4001 UScriptCode code
= USCRIPT_INVALID_CODE
;
4002 char id
[256]={'\0'};
4003 char abbr
[256]={'\0'};
4004 char newId
[256]={'\0'};
4005 char newAbbrId
[256]={'\0'};
4006 char oldId
[256]={'\0'};
4007 char oldAbbrId
[256]={'\0'};
4009 UErrorCode status
=U_ZERO_ERROR
;
4012 for(uint32_t i
= 0; i
<=0x10ffff; i
++){
4013 code
= uscript_getScript(i
,&status
);
4014 if(code
== USCRIPT_INVALID_CODE
){
4015 dataerrln("uscript_getScript for codepoint \\U%08X failed.", i
);
4017 const char* myId
= uscript_getName(code
);
4019 dataerrln("Valid script code returned NULL name. Check your data!");
4022 uprv_strcpy(id
,myId
);
4023 uprv_strcpy(abbr
,uscript_getShortName(code
));
4025 uprv_strcpy(newId
,"[:");
4026 uprv_strcat(newId
,id
);
4027 uprv_strcat(newId
,":];NFD");
4029 uprv_strcpy(newAbbrId
,"[:");
4030 uprv_strcat(newAbbrId
,abbr
);
4031 uprv_strcat(newAbbrId
,":];NFD");
4033 if(uprv_strcmp(newId
,oldId
)!=0){
4034 Transliterator
* t
= Transliterator::createInstance(newId
,UTRANS_FORWARD
,pe
,status
);
4035 if(t
==NULL
|| U_FAILURE(status
)){
4036 dataerrln((UnicodeString
)"FAIL: Could not create " + id
+ " - " + u_errorName(status
));
4040 if(uprv_strcmp(newAbbrId
,oldAbbrId
)!=0){
4041 Transliterator
* t
= Transliterator::createInstance(newAbbrId
,UTRANS_FORWARD
,pe
,status
);
4042 if(t
==NULL
|| U_FAILURE(status
)){
4043 dataerrln((UnicodeString
)"FAIL: Could not create " + id
+ " - " + u_errorName(status
));
4047 uprv_strcpy(oldId
,newId
);
4048 uprv_strcpy(oldAbbrId
, newAbbrId
);
4054 #define TEST_TRANSLIT_ID(id, cls) { \
4055 UErrorCode ec = U_ZERO_ERROR; \
4056 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4057 if (U_FAILURE(ec)) { \
4058 dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
4060 if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4061 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4063 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4068 #define TEST_TRANSLIT_RULE(rule, cls) { \
4069 UErrorCode ec = U_ZERO_ERROR; \
4071 Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4072 if (U_FAILURE(ec)) { \
4073 errln("FAIL: Couldn't create " rule); \
4075 if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4076 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4078 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4083 void TransliteratorTest::TestBoilerplate() {
4084 TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator
);
4085 TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator
);
4086 TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator
);
4087 TEST_TRANSLIT_ID("Lower", LowercaseTransliterator
);
4088 TEST_TRANSLIT_ID("Upper", UppercaseTransliterator
);
4089 TEST_TRANSLIT_ID("Title", TitlecaseTransliterator
);
4090 TEST_TRANSLIT_ID("Null", NullTransliterator
);
4091 TEST_TRANSLIT_ID("Remove", RemoveTransliterator
);
4092 TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator
);
4093 TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator
);
4094 TEST_TRANSLIT_ID("NFD", NormalizationTransliterator
);
4095 TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator
);
4096 TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator
);
4099 void TransliteratorTest::TestAlternateSyntax() {
4104 expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4107 expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4108 CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4109 UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4112 static const char* BEGIN_END_RULES
[] = {
4126 "", // test case commented out below, this is here to keep from messing up the indexes
4135 "", // test case commented out below, this is here to keep from messing up the indexes
4144 "", // test case commented out below, this is here to keep from messing up the indexes
4163 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4164 "$delim = [\\-$ws];"
4165 "$ws $delim* > ' ';"
4166 "'-' $delim* > '-';",
4170 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4171 "$delim = [\\-$ws];"
4172 "$ws $delim* > ' ';"
4173 "'-' $delim* > '-';",
4176 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4177 "$delim = [\\-$ws];"
4178 "$ws $delim* > ' ';"
4179 "'-' $delim* > '-';"
4183 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4184 "$delim = [\\-$ws];"
4186 "$ws $delim* > ' ';"
4187 "'-' $delim* > '-';",
4192 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4193 "$delim = [\\-$ws];"
4195 "$ws $delim* > ' ';"
4196 "'-' $delim* > '-';",
4198 "", // test case commented out below, this is here to keep from messing up the indexes
4202 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4203 "$delim = [\\-$ws];"
4205 "$ws $delim* > ' ';"
4206 "'-' $delim* > '-';"
4209 "", // test case commented out below, this is here to keep from messing up the indexes
4213 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4214 "$delim = [\\-$ws];"
4217 "$ws $delim* > ' ';"
4218 "'-' $delim* > '-';"
4221 "$ab { ' ' } $ab > '-';"
4228 "", // test case commented out below, this is here to keep from messing up the indexes
4231 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4232 "$delim = [\\-$ws];"
4235 "$ws $delim* > ' ';"
4236 "'-' $delim* > '-';"
4238 "$ab { ' ' } $ab > '-';"
4254 "", // test case commented out below, this is here to keep from messing up the indexes
4275 "", // test case commented out below, this is here to keep from messing up the indexes
4287 (This entire test is commented out below and will need some heavy revision when we re-add
4288 the ::BEGIN/::END stuff)
4289 static const char* BOGUS_BEGIN_END_RULES[] = {
4308 static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
4311 static const char* BEGIN_END_TEST_CASES
[] = {
4312 // rules input expected output
4313 BEGIN_END_RULES
[0], "abc ababc aba", "xy zbc z",
4314 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
4315 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
4316 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
4317 BEGIN_END_RULES
[4], "abc ababc aba", "xy abxy z",
4318 BEGIN_END_RULES
[5], "abccabaacababcbc", "PXAARXQBR",
4320 BEGIN_END_RULES
[6], "e e - e---e- e", "e e e-e-e",
4321 BEGIN_END_RULES
[7], "e e - e---e- e", "e e e-e-e",
4322 BEGIN_END_RULES
[8], "e e - e---e- e", "e e e-e-e",
4323 BEGIN_END_RULES
[9], "e e - e---e- e", "e e e-e-e",
4324 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e",
4325 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e",
4326 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e",
4327 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a",
4328 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
4329 BEGIN_END_RULES
[13], "e e - e---e- e", "e e e-e-e",
4330 BEGIN_END_RULES
[13], "a a a a", "a%a%a%a",
4331 BEGIN_END_RULES
[13], "a a-b c b a", "a%a-b cb-a",
4333 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4334 BEGIN_END_RULES
[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4335 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4336 BEGIN_END_RULES
[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4338 static const int32_t BEGIN_END_TEST_CASES_length
= (int32_t)(sizeof(BEGIN_END_TEST_CASES
) / sizeof(BEGIN_END_TEST_CASES
[0]));
4340 void TransliteratorTest::TestBeginEnd() {
4341 // run through the list of test cases above
4343 for (i
= 0; i
< BEGIN_END_TEST_CASES_length
; i
+= 3) {
4344 expect((UnicodeString
)"Test case #" + (i
/ 3),
4345 UnicodeString(BEGIN_END_TEST_CASES
[i
], -1, US_INV
),
4346 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 1], -1, US_INV
),
4347 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 2], -1, US_INV
));
4350 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4351 UParseError parseError
;
4352 UErrorCode status
= U_ZERO_ERROR
;
4353 Transliterator
* reversed
= Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES
[17]),
4354 UTRANS_REVERSE
, parseError
, status
);
4355 if (reversed
== 0 || U_FAILURE(status
)) {
4356 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError
, status
);
4358 expect(*reversed
, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4362 // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4363 // that all of them cause errors
4365 (commented out until we have the real ::BEGIN/::END stuff in place
4366 for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4367 UParseError parseError;
4368 UErrorCode status = U_ZERO_ERROR;
4369 Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4370 UTRANS_FORWARD, parseError, status);
4371 if (!U_FAILURE(status)) {
4373 errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4379 void TransliteratorTest::TestBeginEndToRules() {
4380 // run through the same list of test cases we used above, but this time, instead of just
4381 // instantiating a Transliterator from the rules and running the test against it, we instantiate
4382 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4383 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4384 // to (i.e., does the same thing as) the original rule set
4385 for (int32_t i
= 0; i
< BEGIN_END_TEST_CASES_length
; i
+= 3) {
4386 UParseError parseError
;
4387 UErrorCode status
= U_ZERO_ERROR
;
4388 Transliterator
* t
= Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES
[i
], -1, US_INV
),
4389 UTRANS_FORWARD
, parseError
, status
);
4390 if (U_FAILURE(status
)) {
4391 reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError
, status
);
4393 UnicodeString rules
;
4394 t
->toRules(rules
, TRUE
);
4395 Transliterator
* t2
= Transliterator::createFromRules((UnicodeString
)"Test case #" + (i
/ 3), rules
,
4396 UTRANS_FORWARD
, parseError
, status
);
4397 if (U_FAILURE(status
)) {
4398 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4399 parseError
, status
);
4403 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 1], -1, US_INV
),
4404 UnicodeString(BEGIN_END_TEST_CASES
[i
+ 2], -1, US_INV
));
4411 // do the same thing for the reversible test case
4412 UParseError parseError
;
4413 UErrorCode status
= U_ZERO_ERROR
;
4414 Transliterator
* reversed
= Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES
[17]),
4415 UTRANS_REVERSE
, parseError
, status
);
4416 if (U_FAILURE(status
)) {
4417 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError
, status
);
4419 UnicodeString rules
;
4420 reversed
->toRules(rules
, FALSE
);
4421 Transliterator
* reversed2
= Transliterator::createFromRules("Reversed", rules
, UTRANS_FORWARD
,
4422 parseError
, status
);
4423 if (U_FAILURE(status
)) {
4424 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4425 parseError
, status
);
4429 UnicodeString("xy XY XYZ yz YZ"),
4430 UnicodeString("xy abc xaba yz aba"));
4437 void TransliteratorTest::TestRegisterAlias() {
4438 UnicodeString
longID("Lower;[aeiou]Upper");
4439 UnicodeString
shortID("Any-CapVowels");
4440 UnicodeString
reallyShortID("CapVowels");
4442 Transliterator::registerAlias(shortID
, longID
);
4444 UErrorCode err
= U_ZERO_ERROR
;
4445 Transliterator
* t1
= Transliterator::createInstance(longID
, UTRANS_FORWARD
, err
);
4446 if (U_FAILURE(err
)) {
4447 errln("Failed to instantiate transliterator with long ID");
4448 Transliterator::unregister(shortID
);
4451 Transliterator
* t2
= Transliterator::createInstance(reallyShortID
, UTRANS_FORWARD
, err
);
4452 if (U_FAILURE(err
)) {
4453 errln("Failed to instantiate transliterator with short ID");
4455 Transliterator::unregister(shortID
);
4459 if (t1
->getID() != longID
)
4460 errln("Transliterator instantiated with long ID doesn't have long ID");
4461 if (t2
->getID() != reallyShortID
)
4462 errln("Transliterator instantiated with short ID doesn't have short ID");
4464 UnicodeString rules1
;
4465 UnicodeString rules2
;
4467 t1
->toRules(rules1
, TRUE
);
4468 t2
->toRules(rules2
, TRUE
);
4469 if (rules1
!= rules2
)
4470 errln("Alias transliterators aren't the same");
4474 Transliterator::unregister(shortID
);
4476 t1
= Transliterator::createInstance(shortID
, UTRANS_FORWARD
, err
);
4477 if (U_SUCCESS(err
)) {
4478 errln("Instantiation with short ID succeeded after short ID was unregistered");
4482 // try the same thing again, but this time with something other than
4483 // an instance of CompoundTransliterator
4484 UnicodeString
realID("Latin-Greek");
4485 UnicodeString
fakeID("Latin-dlgkjdflkjdl");
4486 Transliterator::registerAlias(fakeID
, realID
);
4489 t1
= Transliterator::createInstance(realID
, UTRANS_FORWARD
, err
);
4490 if (U_FAILURE(err
)) {
4491 dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err
));
4492 Transliterator::unregister(realID
);
4495 t2
= Transliterator::createInstance(fakeID
, UTRANS_FORWARD
, err
);
4496 if (U_FAILURE(err
)) {
4497 errln("Failed to instantiate transliterator with fake ID");
4499 Transliterator::unregister(realID
);
4503 t1
->toRules(rules1
, TRUE
);
4504 t2
->toRules(rules2
, TRUE
);
4505 if (rules1
!= rules2
)
4506 errln("Alias transliterators aren't the same");
4510 Transliterator::unregister(fakeID
);
4513 void TransliteratorTest::TestRuleStripping() {
4516 \uE001>\u0C01; # SIGN
4518 static const UChar rule
[] = {
4519 0x0023,0x0020,0x000D,0x000A,
4520 0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4522 static const UChar expectedRule
[] = {
4523 0xE001,0x003E,0x0C01,0x003B,0
4525 UChar result
[sizeof(rule
)/sizeof(rule
[0])];
4526 UErrorCode status
= U_ZERO_ERROR
;
4527 int32_t len
= utrans_stripRules(rule
, (int32_t)(sizeof(rule
)/sizeof(rule
[0])), result
, &status
);
4528 if (len
!= u_strlen(expectedRule
)) {
4529 errln("utrans_stripRules return len = %d", len
);
4531 if (u_strncmp(expectedRule
, result
, len
) != 0) {
4532 errln("utrans_stripRules did not return expected string");
4537 * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4539 void TransliteratorTest::TestHalfwidthFullwidth(void) {
4540 UParseError parseError
;
4541 UErrorCode status
= U_ZERO_ERROR
;
4542 Transliterator
* hf
= Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD
, parseError
, status
);
4543 Transliterator
* fh
= Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD
, parseError
, status
);
4544 if (hf
== 0 || fh
== 0) {
4545 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
4551 // Array of 2n items
4553 // "hf"|"fh"|"both",
4556 const char* DATA
[] = {
4558 "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4559 "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4561 int32_t DATA_length
= (int32_t)(sizeof(DATA
) / sizeof(DATA
[0]));
4563 for (int32_t i
=0; i
<DATA_length
; i
+=3) {
4564 UnicodeString h
= CharsToUnicodeString(DATA
[i
+1]);
4565 UnicodeString f
= CharsToUnicodeString(DATA
[i
+2]);
4567 case 0x68: //'h': // Halfwidth-Fullwidth only
4570 case 0x66: //'f': // Fullwidth-Halfwidth only
4573 case 0x62: //'b': // both directions
4585 * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4586 * TODO: confirm that the expected results are correct.
4587 * For now, test just confirms that C++ and Java give identical results.
4589 void TransliteratorTest::TestThai(void) {
4590 #if !UCONFIG_NO_BREAK_ITERATION
4591 UParseError parseError
;
4592 UErrorCode status
= U_ZERO_ERROR
;
4593 Transliterator
* tr
= Transliterator::createInstance("Any-Latin", UTRANS_FORWARD
, parseError
, status
);
4595 dataerrln("FAIL: createInstance failed - %s", u_errorName(status
));
4598 if (U_FAILURE(status
)) {
4599 errln("FAIL: createInstance failed with %s", u_errorName(status
));
4602 const char *thaiText
=
4603 "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4604 "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4605 "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4606 "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4607 "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4608 "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4609 "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4610 "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4611 "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4612 "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4613 "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4614 "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4615 "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4616 "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4617 "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4618 "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4619 "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4620 "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4621 "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4622 "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4623 "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4624 "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4625 "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4626 "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4627 " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4628 "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4629 "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4630 " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4631 "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4632 "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4634 const char *latinText
=
4635 "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4636 "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4637 "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4638 "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4639 "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4640 " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4641 "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4642 "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4643 "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4644 "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4645 "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4646 "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4647 " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4648 "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4649 " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4650 "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4651 "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4652 "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4655 UnicodeString
xlitText(thaiText
);
4656 xlitText
= xlitText
.unescape();
4657 tr
->transliterate(xlitText
);
4659 UnicodeString
expectedText(latinText
);
4660 expectedText
= expectedText
.unescape();
4661 expect(*tr
, xlitText
, expectedText
);
4668 //======================================================================
4670 //======================================================================
4671 void TransliteratorTest::expectT(const UnicodeString
& id
,
4672 const UnicodeString
& source
,
4673 const UnicodeString
& expectedResult
) {
4674 UErrorCode ec
= U_ZERO_ERROR
;
4676 Transliterator
*t
= Transliterator::createInstance(id
, UTRANS_FORWARD
, pe
, ec
);
4677 if (U_FAILURE(ec
)) {
4678 errln((UnicodeString
)"FAIL: Could not create " + id
+ " - " + u_errorName(ec
));
4682 expect(*t
, source
, expectedResult
);
4686 void TransliteratorTest::reportParseError(const UnicodeString
& message
,
4687 const UParseError
& parseError
,
4688 const UErrorCode
& status
) {
4690 /*", parse error " + parseError.code +*/
4691 ", line " + parseError
.line
+
4692 ", offset " + parseError
.offset
+
4693 ", pre-context " + prettify(parseError
.preContext
, TRUE
) +
4694 ", post-context " + prettify(parseError
.postContext
,TRUE
) +
4695 ", Error: " + u_errorName(status
));
4698 void TransliteratorTest::expect(const UnicodeString
& rules
,
4699 const UnicodeString
& source
,
4700 const UnicodeString
& expectedResult
,
4701 UTransPosition
*pos
) {
4702 expect("<ID>", rules
, source
, expectedResult
, pos
);
4705 void TransliteratorTest::expect(const UnicodeString
& id
,
4706 const UnicodeString
& rules
,
4707 const UnicodeString
& source
,
4708 const UnicodeString
& expectedResult
,
4709 UTransPosition
*pos
) {
4710 UErrorCode status
= U_ZERO_ERROR
;
4711 UParseError parseError
;
4712 Transliterator
* t
= Transliterator::createFromRules(id
, rules
, UTRANS_FORWARD
, parseError
, status
);
4713 if (U_FAILURE(status
)) {
4714 reportParseError(UnicodeString("Couldn't create transliterator from ") + rules
, parseError
, status
);
4716 expect(*t
, source
, expectedResult
, pos
);
4721 void TransliteratorTest::expect(const Transliterator
& t
,
4722 const UnicodeString
& source
,
4723 const UnicodeString
& expectedResult
,
4724 const Transliterator
& reverseTransliterator
) {
4725 expect(t
, source
, expectedResult
);
4726 expect(reverseTransliterator
, expectedResult
, source
);
4729 void TransliteratorTest::expect(const Transliterator
& t
,
4730 const UnicodeString
& source
,
4731 const UnicodeString
& expectedResult
,
4732 UTransPosition
*pos
) {
4734 UnicodeString
result(source
);
4735 t
.transliterate(result
);
4736 expectAux(t
.getID() + ":String", source
, result
, expectedResult
);
4738 UTransPosition index
={0, 0, 0, 0};
4743 UnicodeString
rsource(source
);
4745 t
.transliterate(rsource
);
4747 // Do it all at once -- below we do it incrementally
4748 t
.finishTransliteration(rsource
, *pos
);
4750 expectAux(t
.getID() + ":Replaceable", source
, rsource
, expectedResult
);
4752 // Test keyboard (incremental) transliteration -- this result
4753 // must be the same after we finalize (see below).
4758 formatInput(log
, rsource
, index
);
4760 UErrorCode status
= U_ZERO_ERROR
;
4761 t
.transliterate(rsource
, index
, status
);
4762 formatInput(log
, rsource
, index
);
4764 for (int32_t i
=0; i
<source
.length(); ++i
) {
4768 log
.append(source
.charAt(i
)).append(" -> ");
4769 UErrorCode status
= U_ZERO_ERROR
;
4770 t
.transliterate(rsource
, index
, source
.charAt(i
), status
);
4771 formatInput(log
, rsource
, index
);
4775 // As a final step in keyboard transliteration, we must call
4776 // transliterate to finish off any pending partial matches that
4777 // were waiting for more input.
4778 t
.finishTransliteration(rsource
, index
);
4779 log
.append(" => ").append(rsource
);
4781 expectAux(t
.getID() + ":Keyboard", log
,
4782 rsource
== expectedResult
,
4788 * @param appendTo result is appended to this param.
4789 * @param input the string being transliterated
4790 * @param pos the index struct
4792 UnicodeString
& TransliteratorTest::formatInput(UnicodeString
&appendTo
,
4793 const UnicodeString
& input
,
4794 const UTransPosition
& pos
) {
4795 // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4796 // the {} indicate the context start and limit, and the ||
4797 // indicate the start and limit.
4798 if (0 <= pos
.contextStart
&&
4799 pos
.contextStart
<= pos
.start
&&
4800 pos
.start
<= pos
.limit
&&
4801 pos
.limit
<= pos
.contextLimit
&&
4802 pos
.contextLimit
<= input
.length()) {
4804 UnicodeString a
, b
, c
, d
, e
;
4805 input
.extractBetween(0, pos
.contextStart
, a
);
4806 input
.extractBetween(pos
.contextStart
, pos
.start
, b
);
4807 input
.extractBetween(pos
.start
, pos
.limit
, c
);
4808 input
.extractBetween(pos
.limit
, pos
.contextLimit
, d
);
4809 input
.extractBetween(pos
.contextLimit
, input
.length(), e
);
4810 appendTo
.append(a
).append((UChar
)123/*{*/).append(b
).
4811 append((UChar
)PIPE
).append(c
).append((UChar
)PIPE
).append(d
).
4812 append((UChar
)125/*}*/).append(e
);
4814 appendTo
.append((UnicodeString
)"INVALID UTransPosition {cs=" +
4815 pos
.contextStart
+ ", s=" + pos
.start
+ ", l=" +
4816 pos
.limit
+ ", cl=" + pos
.contextLimit
+ "} on " +
4822 void TransliteratorTest::expectAux(const UnicodeString
& tag
,
4823 const UnicodeString
& source
,
4824 const UnicodeString
& result
,
4825 const UnicodeString
& expectedResult
) {
4826 expectAux(tag
, source
+ " -> " + result
,
4827 result
== expectedResult
,
4831 void TransliteratorTest::expectAux(const UnicodeString
& tag
,
4832 const UnicodeString
& summary
, UBool pass
,
4833 const UnicodeString
& expectedResult
) {
4835 logln(UnicodeString("(")+tag
+") " + prettify(summary
));
4837 dataerrln(UnicodeString("FAIL: (")+tag
+") "
4839 + ", expected " + prettify(expectedResult
));
4843 #endif /* #if !UCONFIG_NO_TRANSLITERATION */