]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/transtst.cpp
ICU-400.39.tar.gz
[apple/icu.git] / icuSources / test / intltest / transtst.cpp
1 /*
2 **********************************************************************
3 * Copyright (C) 1999-2008, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 11/10/99 aliu Creation.
8 **********************************************************************
9 */
10
11 #include "unicode/utypes.h"
12
13 #if !UCONFIG_NO_TRANSLITERATION
14
15 #include "transtst.h"
16 #include "unicode/locid.h"
17 #include "unicode/dtfmtsym.h"
18 #include "unicode/normlzr.h"
19 #include "unicode/translit.h"
20 #include "unicode/uchar.h"
21 #include "unicode/unifilt.h"
22 #include "unicode/uniset.h"
23 #include "unicode/ustring.h"
24 #include "unicode/usetiter.h"
25 #include "unicode/uscript.h"
26 #include "cpdtrans.h"
27 #include "nultrans.h"
28 #include "rbt.h"
29 #include "rbt_pars.h"
30 #include "anytrans.h"
31 #include "esctrn.h"
32 #include "name2uni.h"
33 #include "nortrans.h"
34 #include "remtrans.h"
35 #include "titletrn.h"
36 #include "tolowtrn.h"
37 #include "toupptrn.h"
38 #include "unesctrn.h"
39 #include "uni2name.h"
40 #include "cstring.h"
41 #include "cmemory.h"
42 #include <stdio.h>
43
44 /***********************************************************************
45
46 HOW TO USE THIS TEST FILE
47 -or-
48 How I developed on two platforms
49 without losing (too much of) my mind
50
51
52 1. Add new tests by copying/pasting/changing existing tests. On Java,
53 any public void method named Test...() taking no parameters becomes
54 a test. On C++, you need to modify the header and add a line to
55 the runIndexedTest() dispatch method.
56
57 2. Make liberal use of the expect() method; it is your friend.
58
59 3. The tests in this file exactly match those in a sister file on the
60 other side. The two files are:
61
62 icu4j: src/com/ibm/test/translit/TransliteratorTest.java
63 icu4c: source/test/intltest/transtst.cpp
64
65 ==> THIS IS THE IMPORTANT PART <==
66
67 When you add a test in this file, add it in TransliteratorTest.java
68 too. Give it the same name and put it in the same relative place.
69 This makes maintenance a lot simpler for any poor soul who ends up
70 trying to synchronize the tests between icu4j and icu4c.
71
72 4. If you MUST enter a test that is NOT paralleled in the sister file,
73 then add it in the special non-mirrored section. These are
74 labeled
75
76 "icu4j ONLY"
77
78 or
79
80 "icu4c ONLY"
81
82 Make sure you document the reason the test is here and not there.
83
84
85 Thank you.
86 The Management
87 ***********************************************************************/
88
89 // Define character constants thusly to be EBCDIC-friendly
90 enum {
91 LEFT_BRACE=((UChar)0x007B), /*{*/
92 PIPE =((UChar)0x007C), /*|*/
93 ZERO =((UChar)0x0030), /*0*/
94 UPPER_A =((UChar)0x0041) /*A*/
95 };
96
97 TransliteratorTest::TransliteratorTest()
98 : DESERET_DEE((UChar32)0x10414),
99 DESERET_dee((UChar32)0x1043C)
100 {
101 }
102
103 TransliteratorTest::~TransliteratorTest() {}
104
105 void
106 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
107 const char* &name, char* /*par*/) {
108 switch (index) {
109 TESTCASE(0,TestInstantiation);
110 TESTCASE(1,TestSimpleRules);
111 TESTCASE(2,TestRuleBasedInverse);
112 TESTCASE(3,TestKeyboard);
113 TESTCASE(4,TestKeyboard2);
114 TESTCASE(5,TestKeyboard3);
115 TESTCASE(6,TestArabic);
116 TESTCASE(7,TestCompoundKana);
117 TESTCASE(8,TestCompoundHex);
118 TESTCASE(9,TestFiltering);
119 TESTCASE(10,TestInlineSet);
120 TESTCASE(11,TestPatternQuoting);
121 TESTCASE(12,TestJ277);
122 TESTCASE(13,TestJ243);
123 TESTCASE(14,TestJ329);
124 TESTCASE(15,TestSegments);
125 TESTCASE(16,TestCursorOffset);
126 TESTCASE(17,TestArbitraryVariableValues);
127 TESTCASE(18,TestPositionHandling);
128 TESTCASE(19,TestHiraganaKatakana);
129 TESTCASE(20,TestCopyJ476);
130 TESTCASE(21,TestAnchors);
131 TESTCASE(22,TestInterIndic);
132 TESTCASE(23,TestFilterIDs);
133 TESTCASE(24,TestCaseMap);
134 TESTCASE(25,TestNameMap);
135 TESTCASE(26,TestLiberalizedID);
136 TESTCASE(27,TestCreateInstance);
137 TESTCASE(28,TestNormalizationTransliterator);
138 TESTCASE(29,TestCompoundRBT);
139 TESTCASE(30,TestCompoundFilter);
140 TESTCASE(31,TestRemove);
141 TESTCASE(32,TestToRules);
142 TESTCASE(33,TestContext);
143 TESTCASE(34,TestSupplemental);
144 TESTCASE(35,TestQuantifier);
145 TESTCASE(36,TestSTV);
146 TESTCASE(37,TestCompoundInverse);
147 TESTCASE(38,TestNFDChainRBT);
148 TESTCASE(39,TestNullInverse);
149 TESTCASE(40,TestAliasInverseID);
150 TESTCASE(41,TestCompoundInverseID);
151 TESTCASE(42,TestUndefinedVariable);
152 TESTCASE(43,TestEmptyContext);
153 TESTCASE(44,TestCompoundFilterID);
154 TESTCASE(45,TestPropertySet);
155 TESTCASE(46,TestNewEngine);
156 TESTCASE(47,TestQuantifiedSegment);
157 TESTCASE(48,TestDevanagariLatinRT);
158 TESTCASE(49,TestTeluguLatinRT);
159 TESTCASE(50,TestCompoundLatinRT);
160 TESTCASE(51,TestSanskritLatinRT);
161 TESTCASE(52,TestLocaleInstantiation);
162 TESTCASE(53,TestTitleAccents);
163 TESTCASE(54,TestLocaleResource);
164 TESTCASE(55,TestParseError);
165 TESTCASE(56,TestOutputSet);
166 TESTCASE(57,TestVariableRange);
167 TESTCASE(58,TestInvalidPostContext);
168 TESTCASE(59,TestIDForms);
169 TESTCASE(60,TestToRulesMark);
170 TESTCASE(61,TestEscape);
171 TESTCASE(62,TestAnchorMasking);
172 TESTCASE(63,TestDisplayName);
173 TESTCASE(64,TestSpecialCases);
174 TESTCASE(65,TestIncrementalProgress);
175 TESTCASE(66,TestSurrogateCasing);
176 TESTCASE(67,TestFunction);
177 TESTCASE(68,TestInvalidBackRef);
178 TESTCASE(69,TestMulticharStringSet);
179 TESTCASE(70,TestUserFunction);
180 TESTCASE(71,TestAnyX);
181 TESTCASE(72,TestSourceTargetSet);
182 TESTCASE(73,TestGurmukhiDevanagari);
183 TESTCASE(74,TestRuleWhitespace);
184 TESTCASE(75,TestAllCodepoints);
185 TESTCASE(76,TestBoilerplate);
186 TESTCASE(77,TestAlternateSyntax);
187 TESTCASE(78,TestBeginEnd);
188 TESTCASE(79,TestBeginEndToRules);
189 TESTCASE(80,TestRegisterAlias);
190 TESTCASE(81,TestRuleStripping);
191 TESTCASE(82,TestHalfwidthFullwidth);
192 TESTCASE(83,TestThai);
193 default: name = ""; break;
194 }
195 }
196
197 static const UVersionInfo ICU_39 = {3,9,4,0};
198 /**
199 * Make sure every system transliterator can be instantiated.
200 *
201 * ALSO test that the result of toRules() for each rule is a valid
202 * rule. Do this here so we don't have to have another test that
203 * instantiates everything as well.
204 */
205 void TransliteratorTest::TestInstantiation() {
206 UErrorCode ec = U_ZERO_ERROR;
207 StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
208 assertSuccess("getAvailableIDs()", ec);
209 assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
210 int32_t n = Transliterator::countAvailableIDs();
211 assertTrue("getAvailableIDs().count()==countAvailableIDs()",
212 avail->count(ec) == n);
213 assertSuccess("count()", ec);
214 UnicodeString name;
215 for (int32_t i=0; i<n; ++i) {
216 const UnicodeString& id = *avail->snext(ec);
217 if (!assertSuccess("snext()", ec) ||
218 !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
219 break;
220 }
221 UnicodeString id2 = Transliterator::getAvailableID(i);
222 if (id.length() < 1) {
223 errln(UnicodeString("FAIL: getAvailableID(") +
224 i + ") returned empty string");
225 continue;
226 }
227 if (id != id2) {
228 errln(UnicodeString("FAIL: getAvailableID(") +
229 i + ") != getAvailableIDs().snext()");
230 continue;
231 }
232 UParseError parseError;
233 UErrorCode status = U_ZERO_ERROR;
234 Transliterator* t = Transliterator::createInstance(id,
235 UTRANS_FORWARD, parseError,status);
236 name.truncate(0);
237 Transliterator::getDisplayName(id, name);
238 if (t == 0) {
239 errln(UnicodeString("FAIL: Couldn't create ") + id +
240 /*", parse error " + parseError.code +*/
241 ", line " + parseError.line +
242 ", offset " + parseError.offset +
243 ", pre-context " + prettify(parseError.preContext, TRUE) +
244 ", post-context " +prettify(parseError.postContext,TRUE) +
245 ", Error: " + u_errorName(status));
246 // When createInstance fails, it deletes the failing
247 // entry from the available ID list. We detect this
248 // here by looking for a change in countAvailableIDs.
249 int32_t nn = Transliterator::countAvailableIDs();
250 if (nn == (n - 1)) {
251 n = nn;
252 --i; // Compensate for deleted entry
253 }
254 } else {
255 logln(UnicodeString("OK: ") + name + " (" + id + ")");
256
257 // Now test toRules
258 UnicodeString rules;
259 t->toRules(rules, TRUE);
260 Transliterator *u = Transliterator::createFromRules("x",
261 rules, UTRANS_FORWARD, parseError,status);
262 if (u == 0) {
263 errln(UnicodeString("FAIL: ") + id +
264 ".createFromRules() => bad rules" +
265 /*", parse error " + parseError.code +*/
266 ", line " + parseError.line +
267 ", offset " + parseError.offset +
268 ", context " + prettify(parseError.preContext, TRUE) +
269 ", rules: " + prettify(rules, TRUE));
270 } else {
271 delete u;
272 }
273 delete t;
274 }
275 }
276 assertTrue("snext()==NULL", avail->snext(ec)==NULL);
277 assertSuccess("snext()", ec);
278 delete avail;
279
280 // Now test the failure path
281 UParseError parseError;
282 UErrorCode status = U_ZERO_ERROR;
283 UnicodeString id("<Not a valid Transliterator ID>");
284 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
285 if (t != 0) {
286 errln("FAIL: " + id + " returned a transliterator");
287 delete t;
288 } else {
289 logln("OK: Bogus ID handled properly");
290 }
291 }
292
293 void TransliteratorTest::TestSimpleRules(void) {
294 /* Example: rules 1. ab>x|y
295 * 2. yc>z
296 *
297 * []|eabcd start - no match, copy e to tranlated buffer
298 * [e]|abcd match rule 1 - copy output & adjust cursor
299 * [ex|y]cd match rule 2 - copy output & adjust cursor
300 * [exz]|d no match, copy d to transliterated buffer
301 * [exzd]| done
302 */
303 expect(UnicodeString("ab>x|y;", "") +
304 "yc>z",
305 "eabcd", "exzd");
306
307 /* Another set of rules:
308 * 1. ab>x|yzacw
309 * 2. za>q
310 * 3. qc>r
311 * 4. cw>n
312 *
313 * []|ab Rule 1
314 * [x|yzacw] No match
315 * [xy|zacw] Rule 2
316 * [xyq|cw] Rule 4
317 * [xyqn]| Done
318 */
319 expect(UnicodeString("ab>x|yzacw;") +
320 "za>q;" +
321 "qc>r;" +
322 "cw>n",
323 "ab", "xyqn");
324
325 /* Test categories
326 */
327 UErrorCode status = U_ZERO_ERROR;
328 UParseError parseError;
329 Transliterator *t = Transliterator::createFromRules(
330 "<ID>",
331 UnicodeString("$dummy=").append((UChar)0xE100) +
332 UnicodeString(";"
333 "$vowel=[aeiouAEIOU];"
334 "$lu=[:Lu:];"
335 "$vowel } $lu > '!';"
336 "$vowel > '&';"
337 "'!' { $lu > '^';"
338 "$lu > '*';"
339 "a > ERROR", ""),
340 UTRANS_FORWARD, parseError,
341 status);
342 if (U_FAILURE(status)) {
343 errln("FAIL: RBT constructor failed");
344 return;
345 }
346 expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
347 delete t;
348 }
349
350 /**
351 * Test inline set syntax and set variable syntax.
352 */
353 void TransliteratorTest::TestInlineSet(void) {
354 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
355 expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
356
357 expect(UnicodeString(
358 "$digit = [0-9];"
359 "$alpha = [a-zA-Z];"
360 "$alphanumeric = [$digit $alpha];" // ***
361 "$special = [^$alphanumeric];" // ***
362 "$alphanumeric > '-';"
363 "$special > '*';", ""),
364
365 "thx-1138", "---*----");
366 }
367
368 /**
369 * Create some inverses and confirm that they work. We have to be
370 * careful how we do this, since the inverses will not be true
371 * inverses -- we can't throw any random string at the composition
372 * of the transliterators and expect the identity function. F x
373 * F' != I. However, if we are careful about the input, we will
374 * get the expected results.
375 */
376 void TransliteratorTest::TestRuleBasedInverse(void) {
377 UnicodeString RULES =
378 UnicodeString("abc>zyx;") +
379 "ab>yz;" +
380 "bc>zx;" +
381 "ca>xy;" +
382 "a>x;" +
383 "b>y;" +
384 "c>z;" +
385
386 "abc<zyx;" +
387 "ab<yz;" +
388 "bc<zx;" +
389 "ca<xy;" +
390 "a<x;" +
391 "b<y;" +
392 "c<z;" +
393
394 "";
395
396 const char* DATA[] = {
397 // Careful here -- random strings will not work. If we keep
398 // the left side to the domain and the right side to the range
399 // we will be okay though (left, abc; right xyz).
400 "a", "x",
401 "abcacab", "zyxxxyy",
402 "caccb", "xyzzy",
403 };
404
405 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
406
407 UErrorCode status = U_ZERO_ERROR;
408 UParseError parseError;
409 Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
410 UTRANS_FORWARD, parseError, status);
411 Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
412 UTRANS_REVERSE, parseError, status);
413 if (U_FAILURE(status)) {
414 errln("FAIL: RBT constructor failed");
415 return;
416 }
417 for (int32_t i=0; i<DATA_length; i+=2) {
418 expect(*fwd, DATA[i], DATA[i+1]);
419 expect(*rev, DATA[i+1], DATA[i]);
420 }
421 delete fwd;
422 delete rev;
423 }
424
425 /**
426 * Basic test of keyboard.
427 */
428 void TransliteratorTest::TestKeyboard(void) {
429 UParseError parseError;
430 UErrorCode status = U_ZERO_ERROR;
431 Transliterator *t = Transliterator::createFromRules("<ID>",
432 UnicodeString("psch>Y;")
433 +"ps>y;"
434 +"ch>x;"
435 +"a>A;",
436 UTRANS_FORWARD, parseError,
437 status);
438 if (U_FAILURE(status)) {
439 errln("FAIL: RBT constructor failed");
440 return;
441 }
442 const char* DATA[] = {
443 // insertion, buffer
444 "a", "A",
445 "p", "Ap",
446 "s", "Aps",
447 "c", "Apsc",
448 "a", "AycA",
449 "psch", "AycAY",
450 0, "AycAY", // null means finishKeyboardTransliteration
451 };
452
453 keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
454 delete t;
455 }
456
457 /**
458 * Basic test of keyboard with cursor.
459 */
460 void TransliteratorTest::TestKeyboard2(void) {
461 UParseError parseError;
462 UErrorCode status = U_ZERO_ERROR;
463 Transliterator *t = Transliterator::createFromRules("<ID>",
464 UnicodeString("ych>Y;")
465 +"ps>|y;"
466 +"ch>x;"
467 +"a>A;",
468 UTRANS_FORWARD, parseError,
469 status);
470 if (U_FAILURE(status)) {
471 errln("FAIL: RBT constructor failed");
472 return;
473 }
474 const char* DATA[] = {
475 // insertion, buffer
476 "a", "A",
477 "p", "Ap",
478 "s", "Aps", // modified for rollback - "Ay",
479 "c", "Apsc", // modified for rollback - "Ayc",
480 "a", "AycA",
481 "p", "AycAp",
482 "s", "AycAps", // modified for rollback - "AycAy",
483 "c", "AycApsc", // modified for rollback - "AycAyc",
484 "h", "AycAY",
485 0, "AycAY", // null means finishKeyboardTransliteration
486 };
487
488 keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
489 delete t;
490 }
491
492 /**
493 * Test keyboard transliteration with back-replacement.
494 */
495 void TransliteratorTest::TestKeyboard3(void) {
496 // We want th>z but t>y. Furthermore, during keyboard
497 // transliteration we want t>y then yh>z if t, then h are
498 // typed.
499 UnicodeString RULES("t>|y;"
500 "yh>z;");
501
502 const char* DATA[] = {
503 // Column 1: characters to add to buffer (as if typed)
504 // Column 2: expected appearance of buffer after
505 // keyboard xliteration.
506 "a", "a",
507 "b", "ab",
508 "t", "abt", // modified for rollback - "aby",
509 "c", "abyc",
510 "t", "abyct", // modified for rollback - "abycy",
511 "h", "abycz",
512 0, "abycz", // null means finishKeyboardTransliteration
513 };
514
515 UParseError parseError;
516 UErrorCode status = U_ZERO_ERROR;
517 Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
518 if (U_FAILURE(status)) {
519 errln("FAIL: RBT constructor failed");
520 return;
521 }
522 keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
523 delete t;
524 }
525
526 void TransliteratorTest::keyboardAux(const Transliterator& t,
527 const char* DATA[], int32_t DATA_length) {
528 UErrorCode status = U_ZERO_ERROR;
529 UTransPosition index={0, 0, 0, 0};
530 UnicodeString s;
531 for (int32_t i=0; i<DATA_length; i+=2) {
532 UnicodeString log;
533 if (DATA[i] != 0) {
534 log = s + " + "
535 + DATA[i]
536 + " -> ";
537 t.transliterate(s, index, DATA[i], status);
538 } else {
539 log = s + " => ";
540 t.finishTransliteration(s, index);
541 }
542 // Show the start index '{' and the cursor '|'
543 UnicodeString a, b, c;
544 s.extractBetween(0, index.contextStart, a);
545 s.extractBetween(index.contextStart, index.start, b);
546 s.extractBetween(index.start, s.length(), c);
547 log.append(a).
548 append((UChar)LEFT_BRACE).
549 append(b).
550 append((UChar)PIPE).
551 append(c);
552 if (s == DATA[i+1] && U_SUCCESS(status)) {
553 logln(log);
554 } else {
555 errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
556 }
557 }
558 }
559
560 void TransliteratorTest::TestArabic(void) {
561 // Test disabled for 2.0 until new Arabic transliterator can be written.
562 // /*
563 // const char* DATA[] = {
564 // "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
565 // "\u0627\u0644\u0644\u063a\u0629\u0020"+
566 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
567 // "\u0628\u0628\u0646\u0638\u0645\u0020"+
568 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
569 // "\u062c\u0645\u064a\u0644\u0629",
570 // };
571 // */
572 //
573 // UChar ar_raw[] = {
574 // 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
575 // 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
576 // 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
577 // 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
578 // 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
579 // 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
580 // };
581 // UnicodeString ar(ar_raw);
582 // UErrorCode status=U_ZERO_ERROR;
583 // UParseError parseError;
584 // Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
585 // if (t == 0) {
586 // errln("FAIL: createInstance failed");
587 // return;
588 // }
589 // expect(*t, "Arabic", ar);
590 // delete t;
591 }
592
593 /**
594 * Compose the Kana transliterator forward and reverse and try
595 * some strings that should come out unchanged.
596 */
597 void TransliteratorTest::TestCompoundKana(void) {
598 UParseError parseError;
599 UErrorCode status = U_ZERO_ERROR;
600 Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
601 if (t == 0) {
602 errln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed");
603 } else {
604 expect(*t, "aaaaa", "aaaaa");
605 delete t;
606 }
607 }
608
609 /**
610 * Compose the hex transliterators forward and reverse.
611 */
612 void TransliteratorTest::TestCompoundHex(void) {
613 UParseError parseError;
614 UErrorCode status = U_ZERO_ERROR;
615 Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
616 Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
617 Transliterator* transab[] = { a, b };
618 Transliterator* transba[] = { b, a };
619 if (a == 0 || b == 0) {
620 errln("FAIL: construction failed");
621 delete a;
622 delete b;
623 return;
624 }
625 // Do some basic tests of a
626 expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
627 // Do some basic tests of b
628 expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
629
630 Transliterator* ab = new CompoundTransliterator(transab, 2);
631 UnicodeString s("abcde", "");
632 expect(*ab, s, s);
633
634 UnicodeString str(s);
635 a->transliterate(str);
636 Transliterator* ba = new CompoundTransliterator(transba, 2);
637 expect(*ba, str, str);
638
639 delete ab;
640 delete ba;
641 delete a;
642 delete b;
643 }
644
645 int gTestFilterClassID = 0;
646 /**
647 * Used by TestFiltering().
648 */
649 class TestFilter : public UnicodeFilter {
650 virtual UnicodeFunctor* clone() const {
651 return new TestFilter(*this);
652 }
653 virtual UBool contains(UChar32 c) const {
654 return c != (UChar)0x0063 /*c*/;
655 }
656 // Stubs
657 virtual UnicodeString& toPattern(UnicodeString& result,
658 UBool /*escapeUnprintable*/) const {
659 return result;
660 }
661 virtual UBool matchesIndexValue(uint8_t /*v*/) const {
662 return FALSE;
663 }
664 virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
665 public:
666 UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
667 };
668
669 /**
670 * Do some basic tests of filtering.
671 */
672 void TransliteratorTest::TestFiltering(void) {
673 UParseError parseError;
674 UErrorCode status = U_ZERO_ERROR;
675 Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
676 if (hex == 0) {
677 errln("FAIL: createInstance(Any-Hex) failed");
678 return;
679 }
680 hex->adoptFilter(new TestFilter());
681 UnicodeString s("abcde");
682 hex->transliterate(s);
683 UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
684 if (s == exp) {
685 logln(UnicodeString("Ok: \"") + exp + "\"");
686 } else {
687 logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
688 }
689
690 // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
691 UnicodeFilter *f = hex->orphanFilter();
692 if (f == NULL){
693 errln("FAIL: orphanFilter() should get a UnicodeFilter");
694 } else {
695 delete f;
696 }
697 delete hex;
698 }
699
700 /**
701 * Test anchors
702 */
703 void TransliteratorTest::TestAnchors(void) {
704 expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""),
705 "aaa",
706 "012");
707 expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
708 "aaa",
709 "012");
710 expect(UnicodeString("^ab > 01 ;"
711 " ab > |8 ;"
712 " b > k ;"
713 " 8x$ > 45 ;"
714 " 8x > 77 ;", ""),
715
716 "ababbabxabx",
717 "018k7745");
718 expect(UnicodeString("$s = [z$] ;"
719 "$s{ab > 01 ;"
720 " ab > |8 ;"
721 " b > k ;"
722 " 8x}$s > 45 ;"
723 " 8x > 77 ;", ""),
724
725 "abzababbabxzabxabx",
726 "01z018k45z01x45");
727 }
728
729 /**
730 * Test pattern quoting and escape mechanisms.
731 */
732 void TransliteratorTest::TestPatternQuoting(void) {
733 // Array of 3n items
734 // Each item is <rules>, <input>, <expected output>
735 const UnicodeString DATA[] = {
736 UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
737 UnicodeString(UChar(0x4E01)),
738 "[male adult]"
739 };
740
741 for (int32_t i=0; i<3; i+=3) {
742 logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
743 UParseError parseError;
744 UErrorCode status = U_ZERO_ERROR;
745 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
746 if (U_FAILURE(status)) {
747 errln("RBT constructor failed");
748 } else {
749 expect(*t, DATA[i+1], DATA[i+2]);
750 }
751 delete t;
752 }
753 }
754
755 /**
756 * Regression test for bugs found in Greek transliteration.
757 */
758 void TransliteratorTest::TestJ277(void) {
759 UErrorCode status = U_ZERO_ERROR;
760 UParseError parseError;
761 Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
762 if (gl == NULL) {
763 errln("FAIL: createInstance(Greek-Latin) returned NULL");
764 return;
765 }
766
767 UChar sigma = 0x3C3;
768 UChar upsilon = 0x3C5;
769 UChar nu = 0x3BD;
770 // UChar PHI = 0x3A6;
771 UChar alpha = 0x3B1;
772 // UChar omega = 0x3C9;
773 // UChar omicron = 0x3BF;
774 // UChar epsilon = 0x3B5;
775
776 // sigma upsilon nu -> syn
777 UnicodeString syn;
778 syn.append(sigma).append(upsilon).append(nu);
779 expect(*gl, syn, "syn");
780
781 // sigma alpha upsilon nu -> saun
782 UnicodeString sayn;
783 sayn.append(sigma).append(alpha).append(upsilon).append(nu);
784 expect(*gl, sayn, "saun");
785
786 // Again, using a smaller rule set
787 UnicodeString rules(
788 "$alpha = \\u03B1;"
789 "$nu = \\u03BD;"
790 "$sigma = \\u03C3;"
791 "$ypsilon = \\u03C5;"
792 "$vowel = [aeiouAEIOU$alpha$ypsilon];"
793 "s <> $sigma;"
794 "a <> $alpha;"
795 "u <> $vowel { $ypsilon;"
796 "y <> $ypsilon;"
797 "n <> $nu;",
798 "");
799 Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
800 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
801 expect(*mini, syn, "syn");
802 expect(*mini, sayn, "saun");
803 delete mini;
804 mini = NULL;
805
806 #if !UCONFIG_NO_FORMATTING
807 // Transliterate the Greek locale data
808 Locale el("el");
809 DateFormatSymbols syms(el, status);
810 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
811 int32_t i, count;
812 const UnicodeString* data = syms.getMonths(count);
813 for (i=0; i<count; ++i) {
814 if (data[i].length() == 0) {
815 continue;
816 }
817 UnicodeString out(data[i]);
818 gl->transliterate(out);
819 UBool ok = TRUE;
820 if (data[i].length() >= 2 && out.length() >= 2 &&
821 u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
822 if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
823 ok = FALSE;
824 }
825 }
826 if (ok) {
827 logln(prettify(data[i] + " -> " + out));
828 } else {
829 errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
830 }
831 }
832 #endif
833
834 delete gl;
835 }
836
837 /**
838 * Prefix, suffix support in hex transliterators
839 */
840 void TransliteratorTest::TestJ243(void) {
841 UErrorCode ec = U_ZERO_ERROR;
842
843 // Test default Hex-Any, which should handle
844 // \u, \U, u+, and U+
845 Transliterator *hex =
846 Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
847 if (assertSuccess("getInstance", ec)) {
848 expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
849 }
850 delete hex;
851
852 // // Try a custom Hex-Unicode
853 // // \uXXXX and &#xXXXX;
854 // ec = U_ZERO_ERROR;
855 // HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
856 // expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
857 // "abcd5fx012&#x00033;");
858 // // Try custom Any-Hex (default is tested elsewhere)
859 // ec = U_ZERO_ERROR;
860 // UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
861 // expect(hex3, "012", "&#x30;&#x31;&#x32;");
862 }
863
864 /**
865 * Parsers need better syntax error messages.
866 */
867 void TransliteratorTest::TestJ329(void) {
868
869 struct { UBool containsErrors; const char* rule; } DATA[] = {
870 { FALSE, "a > b; c > d" },
871 { TRUE, "a > b; no operator; c > d" },
872 };
873 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
874
875 for (int32_t i=0; i<DATA_length; ++i) {
876 UErrorCode status = U_ZERO_ERROR;
877 UParseError parseError;
878 Transliterator *rbt = Transliterator::createFromRules("<ID>",
879 DATA[i].rule,
880 UTRANS_FORWARD,
881 parseError,
882 status);
883 UBool gotError = U_FAILURE(status);
884 UnicodeString desc(DATA[i].rule);
885 desc.append(gotError ? " -> error" : " -> no error");
886 if (gotError) {
887 desc = desc + ", ParseError code=" + u_errorName(status) +
888 " line=" + parseError.line +
889 " offset=" + parseError.offset +
890 " context=" + parseError.preContext;
891 }
892 if (gotError == DATA[i].containsErrors) {
893 logln(UnicodeString("Ok: ") + desc);
894 } else {
895 errln(UnicodeString("FAIL: ") + desc);
896 }
897 delete rbt;
898 }
899 }
900
901 /**
902 * Test segments and segment references.
903 */
904 void TransliteratorTest::TestSegments(void) {
905 // Array of 3n items
906 // Each item is <rules>, <input>, <expected output>
907 UnicodeString DATA[] = {
908 "([a-z]) '.' ([0-9]) > $2 '-' $1",
909 "abc.123.xyz.456",
910 "ab1-c23.xy4-z56",
911
912 // nested
913 "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
914 "a1 b2",
915 "a1.a.1 b2.b.2",
916 };
917 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
918
919 for (int32_t i=0; i<DATA_length; i+=3) {
920 logln("Pattern: " + prettify(DATA[i]));
921 UParseError parseError;
922 UErrorCode status = U_ZERO_ERROR;
923 Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
924 if (U_FAILURE(status)) {
925 errln("FAIL: RBT constructor");
926 } else {
927 expect(*t, DATA[i+1], DATA[i+2]);
928 }
929 delete t;
930 }
931 }
932
933 /**
934 * Test cursor positioning outside of the key
935 */
936 void TransliteratorTest::TestCursorOffset(void) {
937 // Array of 3n items
938 // Each item is <rules>, <input>, <expected output>
939 UnicodeString DATA[] = {
940 "pre {alpha} post > | @ ALPHA ;"
941 "eALPHA > beta ;"
942 "pre {beta} post > BETA @@ | ;"
943 "post > xyz",
944
945 "prealphapost prebetapost",
946
947 "prbetaxyz preBETApost",
948 };
949 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
950
951 for (int32_t i=0; i<DATA_length; i+=3) {
952 logln("Pattern: " + prettify(DATA[i]));
953 UParseError parseError;
954 UErrorCode status = U_ZERO_ERROR;
955 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
956 if (U_FAILURE(status)) {
957 errln("FAIL: RBT constructor");
958 } else {
959 expect(*t, DATA[i+1], DATA[i+2]);
960 }
961 delete t;
962 }
963 }
964
965 /**
966 * Test zero length and > 1 char length variable values. Test
967 * use of variable refs in UnicodeSets.
968 */
969 void TransliteratorTest::TestArbitraryVariableValues(void) {
970 // Array of 3n items
971 // Each item is <rules>, <input>, <expected output>
972 UnicodeString DATA[] = {
973 "$abe = ab;"
974 "$pat = x[yY]z;"
975 "$ll = 'a-z';"
976 "$llZ = [$ll];"
977 "$llY = [$ll$pat];"
978 "$emp = ;"
979
980 "$abe > ABE;"
981 "$pat > END;"
982 "$llZ > 1;"
983 "$llY > 2;"
984 "7$emp 8 > 9;"
985 "",
986
987 "ab xYzxyz stY78",
988 "ABE ENDEND 1129",
989 };
990 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
991
992 for (int32_t i=0; i<DATA_length; i+=3) {
993 logln("Pattern: " + prettify(DATA[i]));
994 UParseError parseError;
995 UErrorCode status = U_ZERO_ERROR;
996 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
997 if (U_FAILURE(status)) {
998 errln("FAIL: RBT constructor");
999 } else {
1000 expect(*t, DATA[i+1], DATA[i+2]);
1001 }
1002 delete t;
1003 }
1004 }
1005
1006 /**
1007 * Confirm that the contextStart, contextLimit, start, and limit
1008 * behave correctly. J474.
1009 */
1010 void TransliteratorTest::TestPositionHandling(void) {
1011 // Array of 3n items
1012 // Each item is <rules>, <input>, <expected output>
1013 const char* DATA[] = {
1014 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1015 "xtat txtb", // pos 0,9,0,9
1016 "xTTaSS TTxUUb",
1017
1018 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1019 "xtat txtb", // pos 2,9,3,8
1020 "xtaSS TTxUUb",
1021
1022 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1023 "xtat txtb", // pos 3,8,3,8
1024 "xtaTT TTxTTb",
1025 };
1026
1027 // Array of 4n positions -- these go with the DATA array
1028 // They are: contextStart, contextLimit, start, limit
1029 int32_t POS[] = {
1030 0, 9, 0, 9,
1031 2, 9, 3, 8,
1032 3, 8, 3, 8,
1033 };
1034
1035 int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3;
1036 for (int32_t i=0; i<n; i++) {
1037 UErrorCode status = U_ZERO_ERROR;
1038 UParseError parseError;
1039 Transliterator *t = Transliterator::createFromRules("<ID>",
1040 DATA[3*i], UTRANS_FORWARD, parseError, status);
1041 if (U_FAILURE(status)) {
1042 delete t;
1043 errln("FAIL: RBT constructor");
1044 return;
1045 }
1046 UTransPosition pos;
1047 pos.contextStart= POS[4*i];
1048 pos.contextLimit = POS[4*i+1];
1049 pos.start = POS[4*i+2];
1050 pos.limit = POS[4*i+3];
1051 UnicodeString rsource(DATA[3*i+1]);
1052 t->transliterate(rsource, pos, status);
1053 if (U_FAILURE(status)) {
1054 delete t;
1055 errln("FAIL: transliterate");
1056 return;
1057 }
1058 t->finishTransliteration(rsource, pos);
1059 expectAux(DATA[3*i],
1060 DATA[3*i+1],
1061 rsource,
1062 DATA[3*i+2]);
1063 delete t;
1064 }
1065 }
1066
1067 /**
1068 * Test the Hiragana-Katakana transliterator.
1069 */
1070 void TransliteratorTest::TestHiraganaKatakana(void) {
1071 UParseError parseError;
1072 UErrorCode status = U_ZERO_ERROR;
1073 Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1074 Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1075 if (hk == 0 || kh == 0) {
1076 errln("FAIL: createInstance failed");
1077 delete hk;
1078 delete kh;
1079 return;
1080 }
1081
1082 // Array of 3n items
1083 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1084 const char* DATA[] = {
1085 "both",
1086 "\\u3042\\u3090\\u3099\\u3092\\u3050",
1087 "\\u30A2\\u30F8\\u30F2\\u30B0",
1088
1089 "kh",
1090 "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1091 "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1092 };
1093 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1094
1095 for (int32_t i=0; i<DATA_length; i+=3) {
1096 UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1097 UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1098 switch (*DATA[i]) {
1099 case 0x68: //'h': // Hiragana-Katakana
1100 expect(*hk, h, k);
1101 break;
1102 case 0x6B: //'k': // Katakana-Hiragana
1103 expect(*kh, k, h);
1104 break;
1105 case 0x62: //'b': // both
1106 expect(*hk, h, k);
1107 expect(*kh, k, h);
1108 break;
1109 }
1110 }
1111 delete hk;
1112 delete kh;
1113 }
1114
1115 /**
1116 * Test cloning / copy constructor of RBT.
1117 */
1118 void TransliteratorTest::TestCopyJ476(void) {
1119 // The real test here is what happens when the destructors are
1120 // called. So we let one object get destructed, and check to
1121 // see that its copy still works.
1122 Transliterator *t2 = 0;
1123 {
1124 UParseError parseError;
1125 UErrorCode status = U_ZERO_ERROR;
1126 Transliterator *t1 = Transliterator::createFromRules("t1",
1127 "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
1128 if (U_FAILURE(status)) {
1129 errln("FAIL: RBT constructor");
1130 return;
1131 }
1132 t2 = t1->clone(); // Call copy constructor under the covers.
1133 expect(*t1, "abcfoofoo", "ABcbar");
1134 delete t1;
1135 }
1136 expect(*t2, "abcfoofoo", "ABcbar");
1137 delete t2;
1138 }
1139
1140 /**
1141 * Test inter-Indic transliterators. These are composed.
1142 * ICU4C Jitterbug 483.
1143 */
1144 void TransliteratorTest::TestInterIndic(void) {
1145 UnicodeString ID("Devanagari-Gujarati", "");
1146 UErrorCode status = U_ZERO_ERROR;
1147 UParseError parseError;
1148 Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1149 if (dg == 0) {
1150 errln("FAIL: createInstance(" + ID + ") returned NULL");
1151 return;
1152 }
1153 UnicodeString id = dg->getID();
1154 if (id != ID) {
1155 errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1156 }
1157 UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1158 UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1159 expect(*dg, dev, guj);
1160 delete dg;
1161 }
1162
1163 /**
1164 * Test filter syntax in IDs. (J918)
1165 */
1166 void TransliteratorTest::TestFilterIDs(void) {
1167 // Array of 3n strings:
1168 // <id>, <inverse id>, <input>, <expected output>
1169 const char* DATA[] = {
1170 "[aeiou]Any-Hex", // ID
1171 "[aeiou]Hex-Any", // expected inverse ID
1172 "quizzical", // src
1173 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1174
1175 "[aeiou]Any-Hex;[^5]Hex-Any",
1176 "[^5]Any-Hex;[aeiou]Hex-Any",
1177 "quizzical",
1178 "q\\u0075izzical",
1179
1180 "[abc]Null",
1181 "[abc]Null",
1182 "xyz",
1183 "xyz",
1184 };
1185 enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) };
1186
1187 for (int i=0; i<DATA_length; i+=4) {
1188 UnicodeString ID(DATA[i], "");
1189 UnicodeString uID(DATA[i+1], "");
1190 UnicodeString data2(DATA[i+2], "");
1191 UnicodeString data3(DATA[i+3], "");
1192 UParseError parseError;
1193 UErrorCode status = U_ZERO_ERROR;
1194 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1195 if (t == 0) {
1196 errln("FAIL: createInstance(" + ID + ") returned NULL");
1197 return;
1198 }
1199 expect(*t, data2, data3);
1200
1201 // Check the ID
1202 if (ID != t->getID()) {
1203 errln("FAIL: createInstance(" + ID + ").getID() => " +
1204 t->getID());
1205 }
1206
1207 // Check the inverse
1208 Transliterator *u = t->createInverse(status);
1209 if (u == 0) {
1210 errln("FAIL: " + ID + ".createInverse() returned NULL");
1211 } else if (u->getID() != uID) {
1212 errln("FAIL: " + ID + ".createInverse().getID() => " +
1213 u->getID() + ", expected " + uID);
1214 }
1215
1216 delete t;
1217 delete u;
1218 }
1219 }
1220
1221 /**
1222 * Test the case mapping transliterators.
1223 */
1224 void TransliteratorTest::TestCaseMap(void) {
1225 UParseError parseError;
1226 UErrorCode status = U_ZERO_ERROR;
1227 Transliterator* toUpper =
1228 Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1229 Transliterator* toLower =
1230 Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1231 Transliterator* toTitle =
1232 Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1233 if (toUpper==0 || toLower==0 || toTitle==0) {
1234 errln("FAIL: createInstance returned NULL");
1235 delete toUpper;
1236 delete toLower;
1237 delete toTitle;
1238 return;
1239 }
1240
1241 expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1242 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1243 expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1244 "the quick brown foX jumped over the lazY dogs.");
1245 expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1246 "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1247
1248 delete toUpper;
1249 delete toLower;
1250 delete toTitle;
1251 }
1252
1253 /**
1254 * Test the name mapping transliterators.
1255 */
1256 void TransliteratorTest::TestNameMap(void) {
1257 UParseError parseError;
1258 UErrorCode status = U_ZERO_ERROR;
1259 Transliterator* uni2name =
1260 Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1261 Transliterator* name2uni =
1262 Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1263 if (uni2name==0 || name2uni==0) {
1264 errln("FAIL: createInstance returned NULL");
1265 delete uni2name;
1266 delete name2uni;
1267 return;
1268 }
1269
1270 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1271 expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1272 CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{END OF TRANSMISSION}\\\\N{CHARACTER TABULATION}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1273 expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1274 CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1275
1276 delete uni2name;
1277 delete name2uni;
1278
1279 // round trip
1280 Transliterator* t =
1281 Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1282 if (t==0) {
1283 errln("FAIL: createInstance returned NULL");
1284 delete t;
1285 return;
1286 }
1287
1288 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1289 UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1290 expect(*t, s, s);
1291 delete t;
1292 }
1293
1294 /**
1295 * Test liberalized ID syntax. 1006c
1296 */
1297 void TransliteratorTest::TestLiberalizedID(void) {
1298 // Some test cases have an expected getID() value of NULL. This
1299 // means I have disabled the test case for now. This stuff is
1300 // still under development, and I haven't decided whether to make
1301 // getID() return canonical case yet. It will all get rewritten
1302 // with the move to Source-Target/Variant IDs anyway. [aliu]
1303 const char* DATA[] = {
1304 "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1305 " Null ", "Null", "whitespace",
1306 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",
1307 " null ; latin-greek ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1308 };
1309 const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
1310 UParseError parseError;
1311 UErrorCode status= U_ZERO_ERROR;
1312 for (int32_t i=0; i<DATA_length; i+=3) {
1313 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1314 if (t == 0) {
1315 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1316 " cannot create ID \"" + DATA[i] + "\"");
1317 } else {
1318 UnicodeString exp;
1319 if (DATA[i+1]) {
1320 exp = UnicodeString(DATA[i+1], "");
1321 }
1322 // Don't worry about getID() if the expected char*
1323 // is NULL -- see above.
1324 if (exp.length() == 0 || exp == t->getID()) {
1325 logln(UnicodeString("Ok: ") + DATA[i+2] +
1326 " create ID \"" + DATA[i] + "\" => \"" +
1327 exp + "\"");
1328 } else {
1329 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1330 " create ID \"" + DATA[i] + "\" => \"" +
1331 t->getID() + "\", exp \"" + exp + "\"");
1332 }
1333 delete t;
1334 }
1335 }
1336 }
1337
1338 /* test for Jitterbug 912 */
1339 void TransliteratorTest::TestCreateInstance(){
1340 const char* FORWARD = "F";
1341 const char* REVERSE = "R";
1342 const char* DATA[] = {
1343 // Column 1: id
1344 // Column 2: direction
1345 // Column 3: expected ID, or "" if expect failure
1346 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1347
1348 // JB#2689: bad compound causes crash
1349 "InvalidSource-InvalidTarget", FORWARD, "",
1350 "InvalidSource-InvalidTarget", REVERSE, "",
1351 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1352 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1353 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1354 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1355
1356 NULL
1357 };
1358
1359 for (int32_t i=0; DATA[i]; i+=3) {
1360 UParseError err;
1361 UErrorCode ec = U_ZERO_ERROR;
1362 UnicodeString id(DATA[i]);
1363 UTransDirection dir = (DATA[i+1]==FORWARD)?
1364 UTRANS_FORWARD:UTRANS_REVERSE;
1365 UnicodeString expID(DATA[i+2]);
1366 Transliterator* t =
1367 Transliterator::createInstance(id,dir,err,ec);
1368 UnicodeString newID;
1369 if (t) {
1370 newID = t->getID();
1371 }
1372 UBool ok = (newID == expID);
1373 if (!t) {
1374 newID = u_errorName(ec);
1375 }
1376 if (ok) {
1377 logln((UnicodeString)"Ok: createInstance(" +
1378 id + "," + DATA[i+1] + ") => " + newID);
1379 } else {
1380 errln((UnicodeString)"FAIL: createInstance(" +
1381 id + "," + DATA[i+1] + ") => " + newID +
1382 ", expected " + expID);
1383 }
1384 delete t;
1385 }
1386 }
1387
1388 /**
1389 * Test the normalization transliterator.
1390 */
1391 void TransliteratorTest::TestNormalizationTransliterator() {
1392 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1393 // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1394 const char* CANON[] = {
1395 // Input Decomposed Composed
1396 "cat", "cat", "cat" ,
1397 "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" ,
1398
1399 "\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above
1400 "D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above
1401
1402 "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above
1403 "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below
1404 "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above
1405
1406 "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1407 "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1408
1409 "\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave
1410 "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave
1411 "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron
1412
1413 "\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign
1414 "\\u00c5", "A\\u030a", "\\u00c5" , // A-ring
1415
1416 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0
1417 "\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0
1418
1419 "Henry IV", "Henry IV", "Henry IV" ,
1420 "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" ,
1421
1422 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1423 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1424 "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten
1425 "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten
1426 "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten
1427
1428 "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" ,
1429 0 // end
1430 };
1431
1432 const char* COMPAT[] = {
1433 // Input Decomposed Composed
1434 "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed
1435
1436 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0
1437 "\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i
1438
1439 "Henry IV", "Henry IV", "Henry IV" ,
1440 "Henry \\u2163", "Henry IV", "Henry IV" ,
1441
1442 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1443 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1444
1445 "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten
1446 0 // end
1447 };
1448
1449 int32_t i;
1450 UParseError parseError;
1451 UErrorCode status = U_ZERO_ERROR;
1452 Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1453 Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1454 if (!NFD || !NFC) {
1455 errln("FAIL: createInstance failed");
1456 delete NFD;
1457 delete NFC;
1458 return;
1459 }
1460 for (i=0; CANON[i]; i+=3) {
1461 UnicodeString in = CharsToUnicodeString(CANON[i]);
1462 UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1463 UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1464 expect(*NFD, in, expd);
1465 expect(*NFC, in, expc);
1466 }
1467 delete NFD;
1468 delete NFC;
1469
1470 Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1471 Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1472 if (!NFKD || !NFKC) {
1473 errln("FAIL: createInstance failed");
1474 delete NFKD;
1475 delete NFKC;
1476 return;
1477 }
1478 for (i=0; COMPAT[i]; i+=3) {
1479 UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1480 UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1481 UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1482 expect(*NFKD, in, expkd);
1483 expect(*NFKC, in, expkc);
1484 }
1485 delete NFKD;
1486 delete NFKC;
1487
1488 UParseError pe;
1489 status = U_ZERO_ERROR;
1490 Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1491 UTRANS_FORWARD,
1492 pe, status);
1493 if (t == 0) {
1494 errln("FAIL: createInstance failed");
1495 }
1496 expect(*t, CharsToUnicodeString("\\u010dx"),
1497 CharsToUnicodeString("c\\u030C"));
1498 delete t;
1499 }
1500
1501 /**
1502 * Test compound RBT rules.
1503 */
1504 void TransliteratorTest::TestCompoundRBT(void) {
1505 // Careful with spacing and ';' here: Phrase this exactly
1506 // as toRules() is going to return it. If toRules() changes
1507 // with regard to spacing or ';', then adjust this string.
1508 UnicodeString rule("::Hex-Any;\n"
1509 "::Any-Lower;\n"
1510 "a > '.A.';\n"
1511 "b > '.B.';\n"
1512 "::[^t]Any-Upper;", "");
1513 UParseError parseError;
1514 UErrorCode status = U_ZERO_ERROR;
1515 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1516 if (t == 0) {
1517 errln("FAIL: createFromRules failed");
1518 return;
1519 }
1520 expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1521 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1522 UnicodeString r;
1523 t->toRules(r, TRUE);
1524 if (r == rule) {
1525 logln((UnicodeString)"OK: toRules() => " + r);
1526 } else {
1527 errln((UnicodeString)"FAIL: toRules() => " + r +
1528 ", expected " + rule);
1529 }
1530 delete t;
1531
1532 // Now test toRules
1533 t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1534 if (t == 0) {
1535 errln("FAIL: createInstance failed");
1536 return;
1537 }
1538 UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1539 t->toRules(r, TRUE);
1540 if (r != exp) {
1541 errln((UnicodeString)"FAIL: toRules() => " + r +
1542 ", expected " + exp);
1543 } else {
1544 logln((UnicodeString)"OK: toRules() => " + r);
1545 }
1546 delete t;
1547
1548 // Round trip the result of toRules
1549 t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1550 if (t == 0) {
1551 errln("FAIL: createFromRules #2 failed");
1552 return;
1553 } else {
1554 logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1555 }
1556
1557 // Test toRules again
1558 t->toRules(r, TRUE);
1559 if (r != exp) {
1560 errln((UnicodeString)"FAIL: toRules() => " + r +
1561 ", expected " + exp);
1562 } else {
1563 logln((UnicodeString)"OK: toRules() => " + r);
1564 }
1565
1566 delete t;
1567
1568 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform
1569 // to what the regenerated ID will look like.
1570 UnicodeString id("Upper(Lower);(NFKC)", "");
1571 t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1572 if (t == 0) {
1573 errln("FAIL: createInstance #2 failed");
1574 return;
1575 }
1576 if (t->getID() == id) {
1577 logln((UnicodeString)"OK: created " + id);
1578 } else {
1579 errln((UnicodeString)"FAIL: createInstance(" + id +
1580 ").getID() => " + t->getID());
1581 }
1582
1583 Transliterator *u = t->createInverse(status);
1584 if (u == 0) {
1585 errln("FAIL: createInverse failed");
1586 delete t;
1587 return;
1588 }
1589 exp = "NFKC();Lower(Upper)";
1590 if (u->getID() == exp) {
1591 logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1592 u->getID());
1593 } else {
1594 errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1595 u->getID());
1596 }
1597 delete t;
1598 delete u;
1599 }
1600
1601 /**
1602 * Compound filter semantics were orginially not implemented
1603 * correctly. Originally, each component filter f(i) is replaced by
1604 * f'(i) = f(i) && g, where g is the filter for the compound
1605 * transliterator.
1606 *
1607 * From Mark:
1608 *
1609 * Suppose and I have a transliterator X. Internally X is
1610 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1611 *
1612 * The compound should convert all greek characters (through latin) to
1613 * cyrillic, then lowercase the result. The filter should say "don't
1614 * touch 'A' in the original". But because an intermediate result
1615 * happens to go through "A", the Greek Alpha gets hung up.
1616 */
1617 void TransliteratorTest::TestCompoundFilter(void) {
1618 UParseError parseError;
1619 UErrorCode status = U_ZERO_ERROR;
1620 Transliterator *t = Transliterator::createInstance
1621 ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1622 if (t == 0) {
1623 errln("FAIL: createInstance failed");
1624 return;
1625 }
1626 t->adoptFilter(new UnicodeSet("[^A]", status));
1627 if (U_FAILURE(status)) {
1628 errln("FAIL: UnicodeSet ct failed");
1629 delete t;
1630 return;
1631 }
1632
1633 // Only the 'A' at index 1 should remain unchanged
1634 expect(*t,
1635 CharsToUnicodeString("BA\\u039A\\u0391"),
1636 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1637 delete t;
1638 }
1639
1640 void TransliteratorTest::TestRemove(void) {
1641 UParseError parseError;
1642 UErrorCode status = U_ZERO_ERROR;
1643 Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1644 if (t == 0) {
1645 errln("FAIL: createInstance failed");
1646 return;
1647 }
1648
1649 expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1650
1651 // extra test for RemoveTransliterator::clone(), which at one point wasn't
1652 // duplicating the filter
1653 Transliterator* t2 = t->clone();
1654 expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1655
1656 delete t;
1657 delete t2;
1658 }
1659
1660 void TransliteratorTest::TestToRules(void) {
1661 const char* RBT = "rbt";
1662 const char* SET = "set";
1663 static const char* DATA[] = {
1664 RBT,
1665 "$a=\\u4E61; [$a] > A;",
1666 "[\\u4E61] > A;",
1667
1668 RBT,
1669 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1670 "[[:Zs:][:Zl:]]{a} > A;",
1671
1672 SET,
1673 "[[:Zs:][:Zl:]]",
1674 "[[:Zs:][:Zl:]]",
1675
1676 SET,
1677 "[:Ps:]",
1678 "[:Ps:]",
1679
1680 SET,
1681 "[:L:]",
1682 "[:L:]",
1683
1684 SET,
1685 "[[:L:]-[A]]",
1686 "[[:L:]-[A]]",
1687
1688 SET,
1689 "[~[:Lu:][:Ll:]]",
1690 "[~[:Lu:][:Ll:]]",
1691
1692 SET,
1693 "[~[a-z]]",
1694 "[~[a-z]]",
1695
1696 RBT,
1697 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1698 "[^[:Zs:]]{a} > A;",
1699
1700 RBT,
1701 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1702 "[[a-z]-[:Zs:]]{a} > A;",
1703
1704 RBT,
1705 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1706 "[[:Zs:]&[a-z]]{a} > A;",
1707
1708 RBT,
1709 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1710 "[x[:Zs:]]{a} > A;",
1711
1712 RBT,
1713 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1714 "$macron = \\u0304 ;"
1715 "$evowel = [aeiouyAEIOUY] ;"
1716 "$iotasub = \\u0345 ;"
1717 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1718 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1719
1720 RBT,
1721 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1722 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1723 };
1724 static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1725
1726 for (int32_t d=0; d < DATA_length; d+=3) {
1727 if (DATA[d] == RBT) {
1728 // Transliterator test
1729 UParseError parseError;
1730 UErrorCode status = U_ZERO_ERROR;
1731 Transliterator *t = Transliterator::createFromRules("ID",
1732 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
1733 if (t == 0) {
1734 errln("FAIL: createFromRules failed");
1735 return;
1736 }
1737 UnicodeString rules, escapedRules;
1738 t->toRules(rules, FALSE);
1739 t->toRules(escapedRules, TRUE);
1740 UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1741 UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
1742 if (rules == expRules) {
1743 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1744 " => " + rules);
1745 } else {
1746 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1747 " => " + rules + ", exp " + expRules);
1748 }
1749 if (escapedRules == expEscapedRules) {
1750 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1751 " => " + escapedRules);
1752 } else {
1753 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1754 " => " + escapedRules + ", exp " + expEscapedRules);
1755 }
1756 delete t;
1757
1758 } else {
1759 // UnicodeSet test
1760 UErrorCode status = U_ZERO_ERROR;
1761 UnicodeString pat(DATA[d+1], -1, US_INV);
1762 UnicodeString expToPat(DATA[d+2], -1, US_INV);
1763 UnicodeSet set(pat, status);
1764 if (U_FAILURE(status)) {
1765 errln("FAIL: UnicodeSet ct failed");
1766 return;
1767 }
1768 // Adjust spacing etc. as necessary.
1769 UnicodeString toPat;
1770 set.toPattern(toPat);
1771 if (expToPat == toPat) {
1772 logln((UnicodeString)"Ok: " + pat +
1773 " => " + toPat);
1774 } else {
1775 errln((UnicodeString)"FAIL: " + pat +
1776 " => " + prettify(toPat, TRUE) +
1777 ", exp " + prettify(pat, TRUE));
1778 }
1779 }
1780 }
1781 }
1782
1783 void TransliteratorTest::TestContext() {
1784 UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1785 expect("de > x; {d}e > y;",
1786 "de",
1787 "ye",
1788 &pos);
1789
1790 expect("ab{c} > z;",
1791 "xadabdabcy",
1792 "xadabdabzy");
1793 }
1794
1795 void TransliteratorTest::TestSupplemental() {
1796
1797 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1798 "a > $a; $s > i;"),
1799 CharsToUnicodeString("ab\\U0001030Fx"),
1800 CharsToUnicodeString("\\U00010300bix"));
1801
1802 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1803 "$b=[A-Z\\U00010400-\\U0001044D];"
1804 "($a)($b) > $2 $1;"),
1805 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1806 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1807
1808 // k|ax\\U00010300xm
1809
1810 // k|a\\U00010400\\U00010300xm
1811 // ky|\\U00010400\\U00010300xm
1812 // ky\\U00010400|\\U00010300xm
1813
1814 // ky\\U00010400|\\U00010300\\U00010400m
1815 // ky\\U00010400y|\\U00010400m
1816 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1817 "$a {x} > | @ \\U00010400;"
1818 "{$a} [^\\u0000-\\uFFFF] > y;"),
1819 CharsToUnicodeString("kax\\U00010300xm"),
1820 CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1821
1822 expectT("Any-Name",
1823 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1824 UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1825
1826 expectT("Any-Hex/Unicode",
1827 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1828 UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1829
1830 expectT("Any-Hex/C",
1831 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1832 UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1833
1834 expectT("Any-Hex/Perl",
1835 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1836 UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1837
1838 expectT("Any-Hex/Java",
1839 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1840 UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1841
1842 expectT("Any-Hex/XML",
1843 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1844 "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1845
1846 expectT("Any-Hex/XML10",
1847 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1848 "&#66352;&#1113856;&#917601;&#160;");
1849
1850 expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1851 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1852 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1853 }
1854
1855 void TransliteratorTest::TestQuantifier() {
1856
1857 // Make sure @ in a quantified anteContext works
1858 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1859 "AAAAAb",
1860 "aaa(aac)");
1861
1862 // Make sure @ in a quantified postContext works
1863 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1864 "baaaaa",
1865 "caa(aaa)");
1866
1867 // Make sure @ in a quantified postContext with seg ref works
1868 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1869 "baaaaa",
1870 "baa(aaa)");
1871
1872 // Make sure @ past ante context doesn't enter ante context
1873 UTransPosition pos = {0, 5, 3, 5};
1874 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1875 "xxxab",
1876 "xxx(ac)",
1877 &pos);
1878
1879 // Make sure @ past post context doesn't pass limit
1880 UTransPosition pos2 = {0, 4, 0, 2};
1881 expect("{b} a+ > c @@ |; x > y; a > A;",
1882 "baxx",
1883 "caxx",
1884 &pos2);
1885
1886 // Make sure @ past post context doesn't enter post context
1887 expect("{b} a+ > c @@ |; x > y; a > A;",
1888 "baxx",
1889 "cayy");
1890
1891 expect("(ab)? c > d;",
1892 "c abc ababc",
1893 "d d abd");
1894
1895 // NOTE: The (ab)+ when referenced just yields a single "ab",
1896 // not the full sequence of them. This accords with perl behavior.
1897 expect("(ab)+ {x} > '(' $1 ')';",
1898 "x abx ababxy",
1899 "x ab(ab) abab(ab)y");
1900
1901 expect("b+ > x;",
1902 "ac abc abbc abbbc",
1903 "ac axc axc axc");
1904
1905 expect("[abc]+ > x;",
1906 "qac abrc abbcs abtbbc",
1907 "qx xrx xs xtx");
1908
1909 expect("q{(ab)+} > x;",
1910 "qa qab qaba qababc qaba",
1911 "qa qx qxa qxc qxa");
1912
1913 expect("q(ab)* > x;",
1914 "qa qab qaba qababc",
1915 "xa x xa xc");
1916
1917 // NOTE: The (ab)+ when referenced just yields a single "ab",
1918 // not the full sequence of them. This accords with perl behavior.
1919 expect("q(ab)* > '(' $1 ')';",
1920 "qa qab qaba qababc",
1921 "()a (ab) (ab)a (ab)c");
1922
1923 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1924 // quoted string
1925 expect("'ab'+ > x;",
1926 "bb ab ababb",
1927 "bb x xb");
1928
1929 // $foo+ and $foo* -- the quantifier should apply to the entire
1930 // variable reference
1931 expect("$var = ab; $var+ > x;",
1932 "bb ab ababb",
1933 "bb x xb");
1934 }
1935
1936 class TestTrans : public Transliterator {
1937 public:
1938 TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
1939 }
1940 virtual Transliterator* clone(void) const {
1941 return new TestTrans(getID());
1942 }
1943 virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
1944 UBool /*isIncremental*/) const
1945 {
1946 offsets.start = offsets.limit;
1947 }
1948 virtual UClassID getDynamicClassID() const;
1949 static UClassID U_EXPORT2 getStaticClassID();
1950 };
1951 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
1952
1953 /**
1954 * Test Source-Target/Variant.
1955 */
1956 void TransliteratorTest::TestSTV(void) {
1957 int32_t ns = Transliterator::countAvailableSources();
1958 if (ns < 0 || ns > 255) {
1959 errln((UnicodeString)"FAIL: Bad source count: " + ns);
1960 return;
1961 }
1962 int32_t i, j;
1963 for (i=0; i<ns; ++i) {
1964 UnicodeString source;
1965 Transliterator::getAvailableSource(i, source);
1966 logln((UnicodeString)"" + i + ": " + source);
1967 if (source.length() == 0) {
1968 errln("FAIL: empty source");
1969 continue;
1970 }
1971 int32_t nt = Transliterator::countAvailableTargets(source);
1972 if (nt < 0 || nt > 255) {
1973 errln((UnicodeString)"FAIL: Bad target count: " + nt);
1974 continue;
1975 }
1976 for (int32_t j=0; j<nt; ++j) {
1977 UnicodeString target;
1978 Transliterator::getAvailableTarget(j, source, target);
1979 logln((UnicodeString)" " + j + ": " + target);
1980 if (target.length() == 0) {
1981 errln("FAIL: empty target");
1982 continue;
1983 }
1984 int32_t nv = Transliterator::countAvailableVariants(source, target);
1985 if (nv < 0 || nv > 255) {
1986 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1987 continue;
1988 }
1989 for (int32_t k=0; k<nv; ++k) {
1990 UnicodeString variant;
1991 Transliterator::getAvailableVariant(k, source, target, variant);
1992 if (variant.length() == 0) {
1993 logln((UnicodeString)" " + k + ": <empty>");
1994 } else {
1995 logln((UnicodeString)" " + k + ": " + variant);
1996 }
1997 }
1998 }
1999 }
2000
2001 // Test registration
2002 const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2003 const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2004 const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
2005 for (i=0; i<3; ++i) {
2006 Transliterator *t = new TestTrans(IDS[i]);
2007 if (t == 0) {
2008 errln("FAIL: out of memory");
2009 return;
2010 }
2011 if (t->getID() != IDS[i]) {
2012 errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
2013 delete t;
2014 return;
2015 }
2016 Transliterator::registerInstance(t);
2017 UErrorCode status = U_ZERO_ERROR;
2018 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2019 if (t == NULL) {
2020 errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
2021 IDS[i]);
2022 } else {
2023 logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
2024 IDS[i]);
2025 delete t;
2026 }
2027 Transliterator::unregister(IDS[i]);
2028 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2029 if (t != NULL) {
2030 errln((UnicodeString)"FAIL: Unregistration failed for ID " +
2031 IDS[i]);
2032 delete t;
2033 }
2034 }
2035
2036 // Make sure getAvailable API reflects removal
2037 int32_t n = Transliterator::countAvailableIDs();
2038 for (i=0; i<n; ++i) {
2039 UnicodeString id = Transliterator::getAvailableID(i);
2040 for (j=0; j<3; ++j) {
2041 if (id.caseCompare(FULL_IDS[j],0)==0) {
2042 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2043 }
2044 }
2045 }
2046 n = Transliterator::countAvailableTargets("Any");
2047 for (i=0; i<n; ++i) {
2048 UnicodeString t;
2049 Transliterator::getAvailableTarget(i, "Any", t);
2050 if (t.caseCompare(IDS[0],0)==0) {
2051 errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2052 }
2053 }
2054 n = Transliterator::countAvailableSources();
2055 for (i=0; i<n; ++i) {
2056 UnicodeString s;
2057 Transliterator::getAvailableSource(i, s);
2058 for (j=0; j<3; ++j) {
2059 if (SOURCES[j] == NULL) continue;
2060 if (s.caseCompare(SOURCES[j],0)==0) {
2061 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2062 }
2063 }
2064 }
2065 }
2066
2067 /**
2068 * Test inverse of Greek-Latin; Title()
2069 */
2070 void TransliteratorTest::TestCompoundInverse(void) {
2071 UParseError parseError;
2072 UErrorCode status = U_ZERO_ERROR;
2073 Transliterator *t = Transliterator::createInstance
2074 ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2075 if (t == 0) {
2076 errln("FAIL: createInstance");
2077 return;
2078 }
2079 UnicodeString exp("(Title);Latin-Greek");
2080 if (t->getID() == exp) {
2081 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2082 t->getID());
2083 } else {
2084 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2085 t->getID() + "\", expected \"" + exp + "\"");
2086 }
2087 delete t;
2088 }
2089
2090 /**
2091 * Test NFD chaining with RBT
2092 */
2093 void TransliteratorTest::TestNFDChainRBT() {
2094 UParseError pe;
2095 UErrorCode ec = U_ZERO_ERROR;
2096 Transliterator* t = Transliterator::createFromRules(
2097 "TEST", "::NFD; aa > Q; a > q;",
2098 UTRANS_FORWARD, pe, ec);
2099 if (t == NULL || U_FAILURE(ec)) {
2100 errln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2101 return;
2102 }
2103 expect(*t, "aa", "Q");
2104 delete t;
2105
2106 // TEMPORARY TESTS -- BEING DEBUGGED
2107 //=- UnicodeString s, s2;
2108 //=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2109 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2110 //=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2111 //=- expect(*t, s, s2);
2112 //=- delete t;
2113 //=-
2114 //=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2115 //=- expect(*t, s2, s);
2116 //=- delete t;
2117 //=-
2118 //=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2119 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2120 //=- expect(*t, s, s);
2121 //=- delete t;
2122
2123 // const char* source[] = {
2124 // /*
2125 // "\\u015Br\\u012Bmad",
2126 // "bhagavadg\\u012Bt\\u0101",
2127 // "adhy\\u0101ya",
2128 // "arjuna",
2129 // "vi\\u1E63\\u0101da",
2130 // "y\\u014Dga",
2131 // "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2132 // "uv\\u0101cr\\u0325",
2133 // */
2134 // "rmk\\u1E63\\u0113t",
2135 // //"dharmak\\u1E63\\u0113tr\\u0113",
2136 // /*
2137 // "kuruk\\u1E63\\u0113tr\\u0113",
2138 // "samav\\u0113t\\u0101",
2139 // "yuyutsava-\\u1E25",
2140 // "m\\u0101mak\\u0101-\\u1E25",
2141 // // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2142 // "kimakurvata",
2143 // "san\\u0304java",
2144 // */
2145 //
2146 // 0
2147 // };
2148 // const char* expected[] = {
2149 // /*
2150 // "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2151 // "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2152 // "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2153 // "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2154 // "\\u0935\\u093f\\u0937\\u093e\\u0926",
2155 // "\\u092f\\u094b\\u0917",
2156 // "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2157 // "\\u0909\\u0935\\u093E\\u091A\\u0943",
2158 // */
2159 // "\\u0927",
2160 // //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2161 // /*
2162 // "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2163 // "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2164 // "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2165 // "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2166 // // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2167 // "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2168 // "\\u0938\\u0902\\u091c\\u0935",
2169 // */
2170 // 0
2171 // };
2172 // UErrorCode status = U_ZERO_ERROR;
2173 // UParseError parseError;
2174 // UnicodeString message;
2175 // Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2176 // Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2177 // if(U_FAILURE(status)){
2178 // errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2179 // errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2180 // delete latinToDevToLatin;
2181 // delete devToLatinToDev;
2182 // return;
2183 // }
2184 // UnicodeString gotResult;
2185 // for(int i= 0; source[i] != 0; i++){
2186 // gotResult = source[i];
2187 // expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2188 // expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2189 // }
2190 // delete latinToDevToLatin;
2191 // delete devToLatinToDev;
2192 }
2193
2194 /**
2195 * Inverse of "Null" should be "Null". (J21)
2196 */
2197 void TransliteratorTest::TestNullInverse() {
2198 UParseError pe;
2199 UErrorCode ec = U_ZERO_ERROR;
2200 Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2201 if (t == 0 || U_FAILURE(ec)) {
2202 errln("FAIL: createInstance");
2203 return;
2204 }
2205 Transliterator *u = t->createInverse(ec);
2206 if (u == 0 || U_FAILURE(ec)) {
2207 errln("FAIL: createInverse");
2208 delete t;
2209 return;
2210 }
2211 if (u->getID() != "Null") {
2212 errln("FAIL: Inverse of Null should be Null");
2213 }
2214 delete t;
2215 delete u;
2216 }
2217
2218 /**
2219 * Check ID of inverse of alias. (J22)
2220 */
2221 void TransliteratorTest::TestAliasInverseID() {
2222 UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2223 UParseError pe;
2224 UErrorCode ec = U_ZERO_ERROR;
2225 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2226 if (t == 0 || U_FAILURE(ec)) {
2227 errln("FAIL: createInstance");
2228 return;
2229 }
2230 Transliterator *u = t->createInverse(ec);
2231 if (u == 0 || U_FAILURE(ec)) {
2232 errln("FAIL: createInverse");
2233 delete t;
2234 return;
2235 }
2236 UnicodeString exp = "Hangul-Latin";
2237 UnicodeString got = u->getID();
2238 if (got != exp) {
2239 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2240 ", expected " + exp);
2241 }
2242 delete t;
2243 delete u;
2244 }
2245
2246 /**
2247 * Test IDs of inverses of compound transliterators. (J20)
2248 */
2249 void TransliteratorTest::TestCompoundInverseID() {
2250 UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2251 UParseError pe;
2252 UErrorCode ec = U_ZERO_ERROR;
2253 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2254 if (t == 0 || U_FAILURE(ec)) {
2255 errln("FAIL: createInstance");
2256 return;
2257 }
2258 Transliterator *u = t->createInverse(ec);
2259 if (u == 0 || U_FAILURE(ec)) {
2260 errln("FAIL: createInverse");
2261 delete t;
2262 return;
2263 }
2264 UnicodeString exp = "NFD(NFC);Jamo-Latin";
2265 UnicodeString got = u->getID();
2266 if (got != exp) {
2267 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2268 ", expected " + exp);
2269 }
2270 delete t;
2271 delete u;
2272 }
2273
2274 /**
2275 * Test undefined variable.
2276
2277 */
2278 void TransliteratorTest::TestUndefinedVariable() {
2279 UnicodeString rule = "$initial } a <> \\u1161;";
2280 UParseError pe;
2281 UErrorCode ec = U_ZERO_ERROR;
2282 Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
2283 delete t;
2284 if (U_FAILURE(ec)) {
2285 logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2286 u_errorName(ec));
2287 return;
2288 }
2289 errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2290 u_errorName(ec));
2291 }
2292
2293 /**
2294 * Test empty context.
2295 */
2296 void TransliteratorTest::TestEmptyContext() {
2297 expect(" { a } > b;", "xay a ", "xby b ");
2298 }
2299
2300 /**
2301 * Test compound filter ID syntax
2302 */
2303 void TransliteratorTest::TestCompoundFilterID(void) {
2304 static const char* DATA[] = {
2305 // Col. 1 = ID or rule set (latter must start with #)
2306
2307 // = columns > 1 are null if expect col. 1 to be illegal =
2308
2309 // Col. 2 = direction, "F..." or "R..."
2310 // Col. 3 = source string
2311 // Col. 4 = exp result
2312
2313 "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2314 "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2315 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2316 "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2317 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2318 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2319 NULL,
2320 };
2321
2322 for (int32_t i=0; DATA[i]; i+=4) {
2323 UnicodeString id = CharsToUnicodeString(DATA[i]);
2324 UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2325 UTRANS_REVERSE : UTRANS_FORWARD;
2326 UnicodeString source;
2327 UnicodeString exp;
2328 if (DATA[i+2] != NULL) {
2329 source = CharsToUnicodeString(DATA[i+2]);
2330 exp = CharsToUnicodeString(DATA[i+3]);
2331 }
2332 UBool expOk = (DATA[i+1] != NULL);
2333 Transliterator* t = NULL;
2334 UParseError pe;
2335 UErrorCode ec = U_ZERO_ERROR;
2336 if (id.charAt(0) == 0x23/*#*/) {
2337 t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2338 } else {
2339 t = Transliterator::createInstance(id, direction, pe, ec);
2340 }
2341 UBool ok = (t != NULL && U_SUCCESS(ec));
2342 UnicodeString transID;
2343 if (t!=0) {
2344 transID = t->getID();
2345 }
2346 else {
2347 transID = UnicodeString("NULL", "");
2348 }
2349 if (ok == expOk) {
2350 logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2351 u_errorName(ec));
2352 if (source.length() != 0) {
2353 expect(*t, source, exp);
2354 }
2355 delete t;
2356 } else {
2357 errln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2358 u_errorName(ec));
2359 }
2360 }
2361 }
2362
2363 /**
2364 * Test new property set syntax
2365 */
2366 void TransliteratorTest::TestPropertySet() {
2367 expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2368 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2369 "[ a stitch ]\n[ in time ]\r[ saves 9]");
2370 }
2371
2372 /**
2373 * Test various failure points of the new 2.0 engine.
2374 */
2375 void TransliteratorTest::TestNewEngine() {
2376 UParseError pe;
2377 UErrorCode ec = U_ZERO_ERROR;
2378 Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2379 if (t == 0 || U_FAILURE(ec)) {
2380 errln("FAIL: createInstance Latin-Hiragana");
2381 return;
2382 }
2383 // Katakana should be untouched
2384 expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2385 CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2386
2387 delete t;
2388
2389 #if 1
2390 // This test will only work if Transliterator.ROLLBACK is
2391 // true. Otherwise, this test will fail, revealing a
2392 // limitation of global filters in incremental mode.
2393 Transliterator *a =
2394 Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2395 Transliterator *A =
2396 Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2397 if (U_FAILURE(ec)) {
2398 delete a;
2399 delete A;
2400 return;
2401 }
2402
2403 Transliterator* array[3];
2404 array[0] = a;
2405 array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2406 array[2] = A;
2407 if (U_FAILURE(ec)) {
2408 errln("FAIL: createInstance NFD");
2409 delete a;
2410 delete A;
2411 delete array[1];
2412 return;
2413 }
2414
2415 t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2416 if (U_FAILURE(ec)) {
2417 errln("FAIL: UnicodeSet constructor");
2418 delete a;
2419 delete A;
2420 delete array[1];
2421 delete t;
2422 return;
2423 }
2424
2425 expect(*t, "aAaA", "bAbA");
2426
2427 assertTrue("countElements", t->countElements() == 3);
2428 assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2429 assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2430 assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2431 assertSuccess("getElement", ec);
2432
2433 delete a;
2434 delete A;
2435 delete array[1];
2436 delete t;
2437 #endif
2438
2439 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2440 "a",
2441 "ax");
2442
2443 UnicodeString gr = CharsToUnicodeString(
2444 "$ddot = \\u0308 ;"
2445 "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2446 "$rough = \\u0314 ;"
2447 "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2448 "\\u03b1 <> a ;"
2449 "$rough <> h ;");
2450
2451 expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2452 }
2453
2454 /**
2455 * Test quantified segment behavior. We want:
2456 * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2457 */
2458 void TransliteratorTest::TestQuantifiedSegment(void) {
2459 // The normal case
2460 expect("([abc]+) > x $1 x;", "cba", "xcbax");
2461
2462 // The tricky case; the quantifier is around the segment
2463 expect("([abc])+ > x $1 x;", "cba", "xax");
2464
2465 // Tricky case in reverse direction
2466 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2467
2468 // Check post-context segment
2469 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2470
2471 // Test toRule/toPattern for non-quantified segment.
2472 // Careful with spacing here.
2473 UnicodeString r("([a-c]){q} > x $1 x;");
2474 UParseError pe;
2475 UErrorCode ec = U_ZERO_ERROR;
2476 Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2477 if (U_FAILURE(ec)) {
2478 errln("FAIL: createFromRules");
2479 delete t;
2480 return;
2481 }
2482 UnicodeString rr;
2483 t->toRules(rr, TRUE);
2484 if (r != rr) {
2485 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2486 } else {
2487 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2488 }
2489 delete t;
2490
2491 // Test toRule/toPattern for quantified segment.
2492 // Careful with spacing here.
2493 r = "([a-c])+{q} > x $1 x;";
2494 t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2495 if (U_FAILURE(ec)) {
2496 errln("FAIL: createFromRules");
2497 delete t;
2498 return;
2499 }
2500 t->toRules(rr, TRUE);
2501 if (r != rr) {
2502 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2503 } else {
2504 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2505 }
2506 delete t;
2507 }
2508
2509 //======================================================================
2510 // Ram's tests
2511 //======================================================================
2512 void TransliteratorTest::TestDevanagariLatinRT(){
2513 const int MAX_LEN= 52;
2514 const char* const source[MAX_LEN] = {
2515 "bh\\u0101rata",
2516 "kra",
2517 "k\\u1E63a",
2518 "khra",
2519 "gra",
2520 "\\u1E45ra",
2521 "cra",
2522 "chra",
2523 "j\\u00F1a",
2524 "jhra",
2525 "\\u00F1ra",
2526 "\\u1E6Dya",
2527 "\\u1E6Dhra",
2528 "\\u1E0Dya",
2529 //"r\\u0323ya", // \u095c is not valid in Devanagari
2530 "\\u1E0Dhya",
2531 "\\u1E5Bhra",
2532 "\\u1E47ra",
2533 "tta",
2534 "thra",
2535 "dda",
2536 "dhra",
2537 "nna",
2538 "pra",
2539 "phra",
2540 "bra",
2541 "bhra",
2542 "mra",
2543 "\\u1E49ra",
2544 //"l\\u0331ra",
2545 "yra",
2546 "\\u1E8Fra",
2547 //"l-",
2548 "vra",
2549 "\\u015Bra",
2550 "\\u1E63ra",
2551 "sra",
2552 "hma",
2553 "\\u1E6D\\u1E6Da",
2554 "\\u1E6D\\u1E6Dha",
2555 "\\u1E6Dh\\u1E6Dha",
2556 "\\u1E0D\\u1E0Da",
2557 "\\u1E0D\\u1E0Dha",
2558 "\\u1E6Dya",
2559 "\\u1E6Dhya",
2560 "\\u1E0Dya",
2561 "\\u1E0Dhya",
2562 // Not roundtrippable --
2563 // \\u0939\\u094d\\u094d\\u092E - hma
2564 // \\u0939\\u094d\\u092E - hma
2565 // CharsToUnicodeString("hma"),
2566 "hya",
2567 "\\u015Br\\u0325",
2568 "\\u015Bca",
2569 "\\u0115",
2570 "san\\u0304j\\u012Bb s\\u0113nagupta",
2571 "\\u0101nand vaddir\\u0101ju",
2572 "\\u0101",
2573 "a"
2574 };
2575 const char* const expected[MAX_LEN] = {
2576 "\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */
2577 "\\u0915\\u094D\\u0930", /* kra */
2578 "\\u0915\\u094D\\u0937", /* ks\\u0323a */
2579 "\\u0916\\u094D\\u0930", /* khra */
2580 "\\u0917\\u094D\\u0930", /* gra */
2581 "\\u0919\\u094D\\u0930", /* n\\u0307ra */
2582 "\\u091A\\u094D\\u0930", /* cra */
2583 "\\u091B\\u094D\\u0930", /* chra */
2584 "\\u091C\\u094D\\u091E", /* jn\\u0303a */
2585 "\\u091D\\u094D\\u0930", /* jhra */
2586 "\\u091E\\u094D\\u0930", /* n\\u0303ra */
2587 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2588 "\\u0920\\u094D\\u0930", /* t\\u0323hra */
2589 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2590 //"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari
2591 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2592 "\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */
2593 "\\u0923\\u094D\\u0930", /* n\\u0323ra */
2594 "\\u0924\\u094D\\u0924", /* tta */
2595 "\\u0925\\u094D\\u0930", /* thra */
2596 "\\u0926\\u094D\\u0926", /* dda */
2597 "\\u0927\\u094D\\u0930", /* dhra */
2598 "\\u0928\\u094D\\u0928", /* nna */
2599 "\\u092A\\u094D\\u0930", /* pra */
2600 "\\u092B\\u094D\\u0930", /* phra */
2601 "\\u092C\\u094D\\u0930", /* bra */
2602 "\\u092D\\u094D\\u0930", /* bhra */
2603 "\\u092E\\u094D\\u0930", /* mra */
2604 "\\u0929\\u094D\\u0930", /* n\\u0331ra */
2605 //"\\u0934\\u094D\\u0930", /* l\\u0331ra */
2606 "\\u092F\\u094D\\u0930", /* yra */
2607 "\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */
2608 //"l-",
2609 "\\u0935\\u094D\\u0930", /* vra */
2610 "\\u0936\\u094D\\u0930", /* s\\u0301ra */
2611 "\\u0937\\u094D\\u0930", /* s\\u0323ra */
2612 "\\u0938\\u094D\\u0930", /* sra */
2613 "\\u0939\\u094d\\u092E", /* hma */
2614 "\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */
2615 "\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */
2616 "\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/
2617 "\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */
2618 "\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */
2619 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2620 "\\u0920\\u094D\\u092F", /* t\\u0323hya */
2621 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2622 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2623 // "hma", /* hma */
2624 "\\u0939\\u094D\\u092F", /* hya */
2625 "\\u0936\\u0943", /* s\\u0301r\\u0325a */
2626 "\\u0936\\u094D\\u091A", /* s\\u0301ca */
2627 "\\u090d", /* e\\u0306 */
2628 "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2629 "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2630 "\\u0906",
2631 "\\u0905",
2632 };
2633 UErrorCode status = U_ZERO_ERROR;
2634 UParseError parseError;
2635 UnicodeString message;
2636 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2637 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2638 if(U_FAILURE(status)){
2639 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2640 errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2641 return;
2642 }
2643 UnicodeString gotResult;
2644 for(int i= 0; i<MAX_LEN; i++){
2645 gotResult = source[i];
2646 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2647 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2648 }
2649 delete latinToDev;
2650 delete devToLatin;
2651 }
2652
2653 void TransliteratorTest::TestTeluguLatinRT(){
2654 const int MAX_LEN=10;
2655 const char* const source[MAX_LEN] = {
2656 "raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */
2657 "\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */
2658 "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */
2659 "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */
2660 "san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */
2661 "amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */
2662 "ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */
2663 "\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */
2664 "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */
2665 "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */
2666 };
2667
2668 const char* const expected[MAX_LEN] = {
2669 "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2670 "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2671 "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2672 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2673 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2674 "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2675 "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2676 "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2677 "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2678 "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2679 };
2680
2681 UErrorCode status = U_ZERO_ERROR;
2682 UParseError parseError;
2683 UnicodeString message;
2684 Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2685 Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2686 if(U_FAILURE(status)){
2687 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2688 errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2689 return;
2690 }
2691 UnicodeString gotResult;
2692 for(int i= 0; i<MAX_LEN; i++){
2693 gotResult = source[i];
2694 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2695 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2696 }
2697 delete latinToDev;
2698 delete devToLatin;
2699 }
2700
2701 void TransliteratorTest::TestSanskritLatinRT(){
2702 const int MAX_LEN =16;
2703 const char* const source[MAX_LEN] = {
2704 "rmk\\u1E63\\u0113t",
2705 "\\u015Br\\u012Bmad",
2706 "bhagavadg\\u012Bt\\u0101",
2707 "adhy\\u0101ya",
2708 "arjuna",
2709 "vi\\u1E63\\u0101da",
2710 "y\\u014Dga",
2711 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2712 "uv\\u0101cr\\u0325",
2713 "dharmak\\u1E63\\u0113tr\\u0113",
2714 "kuruk\\u1E63\\u0113tr\\u0113",
2715 "samav\\u0113t\\u0101",
2716 "yuyutsava\\u1E25",
2717 "m\\u0101mak\\u0101\\u1E25",
2718 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2719 "kimakurvata",
2720 "san\\u0304java",
2721 };
2722 const char* const expected[MAX_LEN] = {
2723 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2724 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2725 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2726 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2727 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2728 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2729 "\\u092f\\u094b\\u0917",
2730 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2731 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2732 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2733 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2734 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2735 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2736 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2737 //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2738 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2739 "\\u0938\\u0902\\u091c\\u0935",
2740 };
2741 UErrorCode status = U_ZERO_ERROR;
2742 UParseError parseError;
2743 UnicodeString message;
2744 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2745 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2746 if(U_FAILURE(status)){
2747 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2748 errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2749 return;
2750 }
2751 UnicodeString gotResult;
2752 for(int i= 0; i<MAX_LEN; i++){
2753 gotResult = source[i];
2754 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2755 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2756 }
2757 delete latinToDev;
2758 delete devToLatin;
2759 }
2760
2761
2762 void TransliteratorTest::TestCompoundLatinRT(){
2763 const char* const source[] = {
2764 "rmk\\u1E63\\u0113t",
2765 "\\u015Br\\u012Bmad",
2766 "bhagavadg\\u012Bt\\u0101",
2767 "adhy\\u0101ya",
2768 "arjuna",
2769 "vi\\u1E63\\u0101da",
2770 "y\\u014Dga",
2771 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2772 "uv\\u0101cr\\u0325",
2773 "dharmak\\u1E63\\u0113tr\\u0113",
2774 "kuruk\\u1E63\\u0113tr\\u0113",
2775 "samav\\u0113t\\u0101",
2776 "yuyutsava\\u1E25",
2777 "m\\u0101mak\\u0101\\u1E25",
2778 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2779 "kimakurvata",
2780 "san\\u0304java"
2781 };
2782 const int MAX_LEN = sizeof(source)/sizeof(source[0]);
2783 const char* const expected[MAX_LEN] = {
2784 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2785 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2786 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2787 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2788 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2789 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2790 "\\u092f\\u094b\\u0917",
2791 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2792 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2793 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2794 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2795 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2796 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2797 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2798 // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2799 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2800 "\\u0938\\u0902\\u091c\\u0935"
2801 };
2802 if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) {
2803 errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2804 return;
2805 }
2806
2807 UErrorCode status = U_ZERO_ERROR;
2808 UParseError parseError;
2809 UnicodeString message;
2810 Transliterator* devToLatinToDev =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2811 Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2812 Transliterator* devToTelToDev =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2813 Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2814
2815 if(U_FAILURE(status)){
2816 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2817 errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2818 return;
2819 }
2820 UnicodeString gotResult;
2821 for(int i= 0; i<MAX_LEN; i++){
2822 gotResult = source[i];
2823 expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2824 expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2825 expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2826
2827 }
2828 delete(latinToDevToLatin);
2829 delete(devToLatinToDev);
2830 delete(devToTelToDev);
2831 delete(latinToTelToLatin);
2832 }
2833
2834 /**
2835 * Test Gurmukhi-Devanagari Tippi and Bindi
2836 */
2837 void TransliteratorTest::TestGurmukhiDevanagari(){
2838 // the rule says:
2839 // (\u0902) (when preceded by vowel) ---> (\u0A02)
2840 // (\u0902) (when preceded by consonant) ---> (\u0A70)
2841 UErrorCode status = U_ZERO_ERROR;
2842 UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
2843 UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
2844 UParseError parseError;
2845
2846 UnicodeSetIterator vIter(vowel);
2847 UnicodeSetIterator nvIter(non_vowel);
2848 Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2849 if(U_FAILURE(status)) {
2850 errln("Error creating transliterator %s", u_errorName(status));
2851 delete trans;
2852 return;
2853 }
2854 UnicodeString src (" \\u0902", -1, US_INV);
2855 UnicodeString expected(" \\u0A02", -1, US_INV);
2856 src = src.unescape();
2857 expected= expected.unescape();
2858
2859 while(vIter.next()){
2860 src.setCharAt(0,(UChar) vIter.getCodepoint());
2861 expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2862 expect(*trans,src,expected);
2863 }
2864
2865 expected.setCharAt(1,0x0A70);
2866 while(nvIter.next()){
2867 //src.setCharAt(0,(char) nvIter.codepoint);
2868 src.setCharAt(0,(UChar)nvIter.getCodepoint());
2869 expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2870 expect(*trans,src,expected);
2871 }
2872 delete trans;
2873 }
2874 /**
2875 * Test instantiation from a locale.
2876 */
2877 void TransliteratorTest::TestLocaleInstantiation(void) {
2878 UParseError pe;
2879 UErrorCode ec = U_ZERO_ERROR;
2880 Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2881 if (U_FAILURE(ec)) {
2882 errln("FAIL: createInstance(ru_RU-Latin)");
2883 delete t;
2884 return;
2885 }
2886 expect(*t, CharsToUnicodeString("\\u0430"), "a");
2887 delete t;
2888
2889 t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2890 if (U_FAILURE(ec)) {
2891 errln("FAIL: createInstance(en-el)");
2892 delete t;
2893 return;
2894 }
2895 expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2896 delete t;
2897 }
2898
2899 /**
2900 * Test title case handling of accent (should ignore accents)
2901 */
2902 void TransliteratorTest::TestTitleAccents(void) {
2903 UParseError pe;
2904 UErrorCode ec = U_ZERO_ERROR;
2905 Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2906 if (U_FAILURE(ec)) {
2907 errln("FAIL: createInstance(Title)");
2908 delete t;
2909 return;
2910 }
2911 expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2912 delete t;
2913 }
2914
2915 /**
2916 * Basic test of a locale resource based rule.
2917 */
2918 void TransliteratorTest::TestLocaleResource() {
2919 const char* DATA[] = {
2920 // id from to
2921 //"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0",
2922 "Latin-el", "b", "\\u03bc\\u03c0",
2923 "Latin-Greek", "b", "\\u03B2",
2924 "Greek-Latin/UNGEGN", "\\u03B2", "v",
2925 "el-Latin", "\\u03B2", "v",
2926 "Greek-Latin", "\\u03B2", "b",
2927 };
2928 const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
2929 for (int32_t i=0; i<DATA_length; i+=3) {
2930 UParseError pe;
2931 UErrorCode ec = U_ZERO_ERROR;
2932 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2933 if (U_FAILURE(ec)) {
2934 errln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ")");
2935 delete t;
2936 continue;
2937 }
2938 expect(*t, CharsToUnicodeString(DATA[i+1]),
2939 CharsToUnicodeString(DATA[i+2]));
2940 delete t;
2941 }
2942 }
2943
2944 /**
2945 * Make sure parse errors reference the right line.
2946 */
2947 void TransliteratorTest::TestParseError() {
2948 static const char* rule =
2949 "a > b;\n"
2950 "# more stuff\n"
2951 "d << b;";
2952 UErrorCode ec = U_ZERO_ERROR;
2953 UParseError pe;
2954 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2955 delete t;
2956 if (U_FAILURE(ec)) {
2957 UnicodeString err(pe.preContext);
2958 err.append((UChar)124/*|*/).append(pe.postContext);
2959 if (err.indexOf("d << b") >= 0) {
2960 logln("Ok: " + err);
2961 } else {
2962 errln("FAIL: " + err);
2963 }
2964 }
2965 else {
2966 errln("FAIL: no syntax error");
2967 }
2968 static const char* maskingRule =
2969 "a>x;\n"
2970 "# more stuff\n"
2971 "ab>y;";
2972 ec = U_ZERO_ERROR;
2973 delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
2974 if (ec != U_RULE_MASK_ERROR) {
2975 errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
2976 }
2977 else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
2978 errln("FAIL: did not get expected precontext");
2979 }
2980 else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
2981 errln("FAIL: did not get expected postcontext");
2982 }
2983 }
2984
2985 /**
2986 * Make sure sets on output are disallowed.
2987 */
2988 void TransliteratorTest::TestOutputSet() {
2989 UnicodeString rule = "$set = [a-cm-n]; b > $set;";
2990 UErrorCode ec = U_ZERO_ERROR;
2991 UParseError pe;
2992 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2993 delete t;
2994 if (U_FAILURE(ec)) {
2995 UnicodeString err(pe.preContext);
2996 err.append((UChar)124/*|*/).append(pe.postContext);
2997 logln("Ok: " + err);
2998 return;
2999 }
3000 errln("FAIL: No syntax error");
3001 }
3002
3003 /**
3004 * Test the use variable range pragma, making sure that use of
3005 * variable range characters is detected and flagged as an error.
3006 */
3007 void TransliteratorTest::TestVariableRange() {
3008 UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3009 UErrorCode ec = U_ZERO_ERROR;
3010 UParseError pe;
3011 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3012 delete t;
3013 if (U_FAILURE(ec)) {
3014 UnicodeString err(pe.preContext);
3015 err.append((UChar)124/*|*/).append(pe.postContext);
3016 logln("Ok: " + err);
3017 return;
3018 }
3019 errln("FAIL: No syntax error");
3020 }
3021
3022 /**
3023 * Test invalid post context error handling
3024 */
3025 void TransliteratorTest::TestInvalidPostContext() {
3026 UnicodeString rule = "a}b{c>d;";
3027 UErrorCode ec = U_ZERO_ERROR;
3028 UParseError pe;
3029 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3030 delete t;
3031 if (U_FAILURE(ec)) {
3032 UnicodeString err(pe.preContext);
3033 err.append((UChar)124/*|*/).append(pe.postContext);
3034 if (err.indexOf("a}b{c") >= 0) {
3035 logln("Ok: " + err);
3036 } else {
3037 errln("FAIL: " + err);
3038 }
3039 return;
3040 }
3041 errln("FAIL: No syntax error");
3042 }
3043
3044 /**
3045 * Test ID form variants
3046 */
3047 void TransliteratorTest::TestIDForms() {
3048 const char* DATA[] = {
3049 "NFC", NULL, "NFD",
3050 "nfd", NULL, "NFC", // make sure case is ignored
3051 "Any-NFKD", NULL, "Any-NFKC",
3052 "Null", NULL, "Null",
3053 "-nfkc", "nfkc", "NFKD",
3054 "-nfkc/", "nfkc", "NFKD",
3055 "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3056 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3057 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3058 "Source-", NULL, NULL,
3059 "Source/Variant-", NULL, NULL,
3060 "Source-/Variant", NULL, NULL,
3061 "/Variant", NULL, NULL,
3062 "/Variant-", NULL, NULL,
3063 "-/Variant", NULL, NULL,
3064 "-/", NULL, NULL,
3065 "-", NULL, NULL,
3066 "/", NULL, NULL,
3067 };
3068 const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
3069
3070 for (int32_t i=0; i<DATA_length; i+=3) {
3071 const char* ID = DATA[i];
3072 const char* expID = DATA[i+1];
3073 const char* expInvID = DATA[i+2];
3074 UBool expValid = (expInvID != NULL);
3075 if (expID == NULL) {
3076 expID = ID;
3077 }
3078 UParseError pe;
3079 UErrorCode ec = U_ZERO_ERROR;
3080 Transliterator *t =
3081 Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3082 if (U_FAILURE(ec)) {
3083 if (!expValid) {
3084 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3085 } else {
3086 errln((UnicodeString)"FAIL: Couldn't create " + ID);
3087 }
3088 delete t;
3089 continue;
3090 }
3091 Transliterator *u = t->createInverse(ec);
3092 if (U_FAILURE(ec)) {
3093 errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3094 delete t;
3095 delete u;
3096 continue;
3097 }
3098 if (t->getID() == expID &&
3099 u->getID() == expInvID) {
3100 logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3101 } else {
3102 errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3103 t->getID() + " x getInverse() => " + u->getID() +
3104 ", expected " + expInvID);
3105 }
3106 delete t;
3107 delete u;
3108 }
3109 }
3110
3111 static const UChar SPACE[] = {32,0};
3112 static const UChar NEWLINE[] = {10,0};
3113 static const UChar RETURN[] = {13,0};
3114 static const UChar EMPTY[] = {0};
3115
3116 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3117 const UnicodeString& testRulesForward) {
3118 UnicodeString rules2; t2.toRules(rules2, TRUE);
3119 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3120 rules2.findAndReplace(SPACE, EMPTY);
3121 rules2.findAndReplace(NEWLINE, EMPTY);
3122 rules2.findAndReplace(RETURN, EMPTY);
3123
3124 UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3125
3126 if (rules2 != testRules) {
3127 errln(label);
3128 logln((UnicodeString)"GENERATED RULES: " + rules2);
3129 logln((UnicodeString)"SHOULD BE: " + testRulesForward);
3130 }
3131 }
3132
3133 /**
3134 * Mark's toRules test.
3135 */
3136 void TransliteratorTest::TestToRulesMark() {
3137 const char* testRules =
3138 "::[[:Latin:][:Mark:]];"
3139 "::NFKD (NFC);"
3140 "::Lower (Lower);"
3141 "a <> \\u03B1;" // alpha
3142 "::NFKC (NFD);"
3143 "::Upper (Lower);"
3144 "::Lower ();"
3145 "::([[:Greek:][:Mark:]]);"
3146 ;
3147 const char* testRulesForward =
3148 "::[[:Latin:][:Mark:]];"
3149 "::NFKD(NFC);"
3150 "::Lower(Lower);"
3151 "a > \\u03B1;"
3152 "::NFKC(NFD);"
3153 "::Upper (Lower);"
3154 "::Lower ();"
3155 ;
3156 const char* testRulesBackward =
3157 "::[[:Greek:][:Mark:]];"
3158 "::Lower (Upper);"
3159 "::NFD(NFKC);"
3160 "\\u03B1 > a;"
3161 "::Lower(Lower);"
3162 "::NFC(NFKD);"
3163 ;
3164 UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3165 UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3166
3167 UParseError pe;
3168 UErrorCode ec = U_ZERO_ERROR;
3169 Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
3170 Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
3171
3172 if (U_FAILURE(ec)) {
3173 delete t2;
3174 delete t3;
3175 errln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3176 return;
3177 }
3178
3179 expect(*t2, source, target);
3180 expect(*t3, target, source);
3181
3182 checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
3183 checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
3184
3185 delete t2;
3186 delete t3;
3187 }
3188
3189 /**
3190 * Test Escape and Unescape transliterators.
3191 */
3192 void TransliteratorTest::TestEscape() {
3193 UParseError pe;
3194 UErrorCode ec;
3195 Transliterator *t;
3196
3197 ec = U_ZERO_ERROR;
3198 t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3199 if (U_FAILURE(ec)) {
3200 errln((UnicodeString)"FAIL: createInstance");
3201 } else {
3202 expect(*t,
3203 UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
3204 "@12Q");
3205 }
3206 delete t;
3207
3208 ec = U_ZERO_ERROR;
3209 t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3210 if (U_FAILURE(ec)) {
3211 errln((UnicodeString)"FAIL: createInstance");
3212 } else {
3213 expect(*t,
3214 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3215 UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3216 }
3217 delete t;
3218
3219 ec = U_ZERO_ERROR;
3220 t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3221 if (U_FAILURE(ec)) {
3222 errln((UnicodeString)"FAIL: createInstance");
3223 } else {
3224 expect(*t,
3225 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3226 UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3227 }
3228 delete t;
3229
3230 ec = U_ZERO_ERROR;
3231 t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3232 if (U_FAILURE(ec)) {
3233 errln((UnicodeString)"FAIL: createInstance");
3234 } else {
3235 expect(*t,
3236 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3237 UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3238 }
3239 delete t;
3240 }
3241
3242
3243 void TransliteratorTest::TestAnchorMasking(){
3244 UnicodeString rule ("^a > Q; a > q;");
3245 UErrorCode status= U_ZERO_ERROR;
3246 UParseError parseError;
3247
3248 Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3249 if(U_FAILURE(status)){
3250 errln(UnicodeString("FAIL: ") + "ID" +
3251 ".createFromRules() => bad rules" +
3252 /*", parse error " + parseError.code +*/
3253 ", line " + parseError.line +
3254 ", offset " + parseError.offset +
3255 ", context " + prettify(parseError.preContext, TRUE) +
3256 ", rules: " + prettify(rule, TRUE));
3257 }
3258 delete t;
3259 }
3260
3261 /**
3262 * Make sure display names of variants look reasonable.
3263 */
3264 void TransliteratorTest::TestDisplayName() {
3265 #if UCONFIG_NO_FORMATTING
3266 logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3267 return;
3268 #else
3269 static const char* DATA[] = {
3270 // ID, forward name, reverse name
3271 // Update the text as necessary -- the important thing is
3272 // not the text itself, but how various cases are handled.
3273
3274 // Basic test
3275 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3276
3277 // Variants
3278 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3279
3280 // Target-only IDs
3281 "NFC", "Any to NFC", "Any to NFD",
3282 };
3283
3284 int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
3285
3286 Locale US("en", "US");
3287
3288 for (int32_t i=0; i<DATA_length; i+=3) {
3289 UnicodeString name;
3290 Transliterator::getDisplayName(DATA[i], US, name);
3291 if (name != DATA[i+1]) {
3292 errln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3293 name + ", expected " + DATA[i+1]);
3294 } else {
3295 logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3296 }
3297 UErrorCode ec = U_ZERO_ERROR;
3298 UParseError pe;
3299 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3300 if (U_FAILURE(ec)) {
3301 delete t;
3302 errln("FAIL: createInstance failed");
3303 continue;
3304 }
3305 name = Transliterator::getDisplayName(t->getID(), US, name);
3306 if (name != DATA[i+2]) {
3307 errln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3308 name + ", expected " + DATA[i+2]);
3309 } else {
3310 logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3311 }
3312 delete t;
3313 }
3314 #endif
3315 }
3316
3317 void TransliteratorTest::TestSpecialCases(void) {
3318 const UnicodeString registerRules[] = {
3319 "Any-Dev1", "x > X; y > Y;",
3320 "Any-Dev2", "XY > Z",
3321 "Greek-Latin/FAKE",
3322 CharsToUnicodeString
3323 ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3324 "" // END MARKER
3325 };
3326
3327 const UnicodeString testCases[] = {
3328 // NORMALIZATION
3329 // should add more test cases
3330 "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3331 "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3332 "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3333 "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3334
3335 // mp -> b BUG
3336 "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3337 "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3338
3339 // check for devanagari bug
3340 "nfd;Dev1;Dev2;nfc", "xy", "Z",
3341
3342 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3343 "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3344 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3345
3346 //TODO: enable this test once Titlecase works right
3347 /*
3348 "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3349 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3350 */
3351 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3352 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3353 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3354 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3355
3356 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3357 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3358
3359 // FORMS OF S
3360 "Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3361 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3362 "Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3363 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3364 "Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3365 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3366 "Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3367 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3368 // Tatiana bug
3369 // Upper: TAT\\u02B9\\u00C2NA
3370 // Lower: tat\\u02B9\\u00E2na
3371 // Title: Tat\\u02B9\\u00E2na
3372 "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3373 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3374 "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3375 CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3376 "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3377 CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3378
3379 "" // END MARKER
3380 };
3381
3382 UParseError pos;
3383 int32_t i;
3384 for (i = 0; registerRules[i].length()!=0; i+=2) {
3385 UErrorCode status = U_ZERO_ERROR;
3386
3387 Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3388 registerRules[i+1], UTRANS_FORWARD, pos, status);
3389 if (U_FAILURE(status)) {
3390 errln("Fails: Unable to create the transliterator from rules.");
3391 } else {
3392 Transliterator::registerInstance(t);
3393 }
3394 }
3395 for (i = 0; testCases[i].length()!=0; i+=3) {
3396 UErrorCode ec = U_ZERO_ERROR;
3397 UParseError pe;
3398 const UnicodeString& name = testCases[i];
3399 Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3400 if (U_FAILURE(ec)) {
3401 errln((UnicodeString)"FAIL: Couldn't create " + name);
3402 delete t;
3403 continue;
3404 }
3405 const UnicodeString& id = t->getID();
3406 const UnicodeString& source = testCases[i+1];
3407 UnicodeString target;
3408
3409 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3410
3411 if (testCases[i+2].length() > 0) {
3412 target = testCases[i+2];
3413 } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3414 Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3415 } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3416 Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3417 } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3418 Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3419 } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3420 Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3421 } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3422 target = source;
3423 target.toLower(Locale::getUS());
3424 } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3425 target = source;
3426 target.toUpper(Locale::getUS());
3427 }
3428 if (U_FAILURE(ec)) {
3429 errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3430 continue;
3431 }
3432
3433 expect(*t, source, target);
3434 delete t;
3435 }
3436 for (i = 0; registerRules[i].length()!=0; i+=2) {
3437 Transliterator::unregister(registerRules[i]);
3438 }
3439 }
3440
3441 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3442 if (ch <= 0xFFFF) {
3443 sprintf(buffer, "\\u%04x", (int)ch);
3444 } else {
3445 sprintf(buffer, "\\U%08x", (int)ch);
3446 }
3447 return buffer;
3448 }
3449
3450 void TransliteratorTest::TestSurrogateCasing (void) {
3451 // check that casing handles surrogates
3452 // titlecase is currently defective
3453 char buffer[20];
3454 UChar buffer2[20];
3455 UChar32 dee;
3456 UTF_GET_CHAR(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3457 UnicodeString DEE(u_totitle(dee));
3458 if (DEE != DESERET_DEE) {
3459 err("Fails titlecase of surrogates");
3460 err(Char32ToEscapedChars(dee, buffer));
3461 err(", ");
3462 errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3463 }
3464
3465 UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3466 UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3467 UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3468 UErrorCode status= U_ZERO_ERROR;
3469
3470 u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3471 if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3472 errln("Fails: Can't uppercase surrogates.");
3473 }
3474
3475 status= U_ZERO_ERROR;
3476 u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3477 if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3478 errln("Fails: Can't lowercase surrogates.");
3479 }
3480 }
3481
3482 static void _trans(Transliterator& t, const UnicodeString& src,
3483 UnicodeString& result) {
3484 result = src;
3485 t.transliterate(result);
3486 }
3487
3488 static void _trans(const UnicodeString& id, const UnicodeString& src,
3489 UnicodeString& result, UErrorCode ec) {
3490 UParseError pe;
3491 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3492 if (U_SUCCESS(ec)) {
3493 _trans(*t, src, result);
3494 }
3495 delete t;
3496 }
3497
3498 static UnicodeString _findMatch(const UnicodeString& source,
3499 const UnicodeString* pairs) {
3500 UnicodeString empty;
3501 for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3502 if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3503 return pairs[i+1];
3504 }
3505 }
3506 return empty;
3507 }
3508
3509 // Check to see that incremental gets at least part way through a reasonable string.
3510
3511 void TransliteratorTest::TestIncrementalProgress(void) {
3512 UErrorCode ec = U_ZERO_ERROR;
3513 UnicodeString latinTest = "The Quick Brown Fox.";
3514 UnicodeString devaTest;
3515 _trans("Latin-Devanagari", latinTest, devaTest, ec);
3516 UnicodeString kataTest;
3517 _trans("Latin-Katakana", latinTest, kataTest, ec);
3518 if (U_FAILURE(ec)) {
3519 errln("FAIL: Internal error");
3520 return;
3521 }
3522 const UnicodeString tests[] = {
3523 "Any", latinTest,
3524 "Latin", latinTest,
3525 "Halfwidth", latinTest,
3526 "Devanagari", devaTest,
3527 "Katakana", kataTest,
3528 "" // END MARKER
3529 };
3530
3531 UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3532 int32_t i = 0, j=0, k=0;
3533 int32_t sources = Transliterator::countAvailableSources();
3534 for (i = 0; i < sources; i++) {
3535 UnicodeString source;
3536 Transliterator::getAvailableSource(i, source);
3537 UnicodeString test = _findMatch(source, tests);
3538 if (test.length() == 0) {
3539 logln((UnicodeString)"Skipping " + source + "-X");
3540 continue;
3541 }
3542 int32_t targets = Transliterator::countAvailableTargets(source);
3543 for (j = 0; j < targets; j++) {
3544 UnicodeString target;
3545 Transliterator::getAvailableTarget(j, source, target);
3546 int32_t variants = Transliterator::countAvailableVariants(source, target);
3547 for (k =0; k< variants; k++) {
3548 UnicodeString variant;
3549 UParseError err;
3550 UErrorCode status = U_ZERO_ERROR;
3551
3552 Transliterator::getAvailableVariant(k, source, target, variant);
3553 UnicodeString id = source + "-" + target + "/" + variant;
3554
3555 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3556 if (U_FAILURE(status)) {
3557 errln((UnicodeString)"FAIL: Could not create " + id);
3558 delete t;
3559 continue;
3560 }
3561 status = U_ZERO_ERROR;
3562 CheckIncrementalAux(t, test);
3563
3564 UnicodeString rev;
3565 _trans(*t, test, rev);
3566 Transliterator *inv = t->createInverse(status);
3567 if (U_FAILURE(status)) {
3568 errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3569 delete t;
3570 delete inv;
3571 continue;
3572 }
3573 CheckIncrementalAux(inv, rev);
3574 delete t;
3575 delete inv;
3576 }
3577 }
3578 }
3579 }
3580
3581 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3582 const UnicodeString& input) {
3583 UErrorCode ec = U_ZERO_ERROR;
3584 UTransPosition pos;
3585 UnicodeString test = input;
3586
3587 pos.contextStart = 0;
3588 pos.contextLimit = input.length();
3589 pos.start = 0;
3590 pos.limit = input.length();
3591
3592 t->transliterate(test, pos, ec);
3593 if (U_FAILURE(ec)) {
3594 errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3595 return;
3596 }
3597 UBool gotError = FALSE;
3598
3599 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3600
3601 if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3602 errln((UnicodeString)"No Progress, " +
3603 t->getID() + ": " + formatInput(test, input, pos));
3604 gotError = TRUE;
3605 } else {
3606 logln((UnicodeString)"PASS Progress, " +
3607 t->getID() + ": " + formatInput(test, input, pos));
3608 }
3609 t->finishTransliteration(test, pos);
3610 if (pos.start != pos.limit) {
3611 errln((UnicodeString)"Incomplete, " +
3612 t->getID() + ": " + formatInput(test, input, pos));
3613 gotError = TRUE;
3614 }
3615 }
3616
3617 void TransliteratorTest::TestFunction() {
3618 // Careful with spacing and ';' here: Phrase this exactly
3619 // as toRules() is going to return it. If toRules() changes
3620 // with regard to spacing or ';', then adjust this string.
3621 UnicodeString rule =
3622 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3623
3624 UParseError pe;
3625 UErrorCode ec = U_ZERO_ERROR;
3626 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3627 if (t == NULL) {
3628 errln("FAIL: createFromRules failed");
3629 return;
3630 }
3631
3632 UnicodeString r;
3633 t->toRules(r, TRUE);
3634 if (r == rule) {
3635 logln((UnicodeString)"OK: toRules() => " + r);
3636 } else {
3637 errln((UnicodeString)"FAIL: toRules() => " + r +
3638 ", expected " + rule);
3639 }
3640
3641 expect(*t, "The Quick Brown Fox",
3642 UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3643
3644 delete t;
3645 }
3646
3647 void TransliteratorTest::TestInvalidBackRef(void) {
3648 UnicodeString rule = ". > $1;";
3649 UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3650 UParseError pe;
3651 UErrorCode ec = U_ZERO_ERROR;
3652 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3653 Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3654
3655 if (t != NULL) {
3656 errln("FAIL: createFromRules should have returned NULL");
3657 delete t;
3658 }
3659
3660 if (t2 != NULL) {
3661 errln("FAIL: createFromRules should have returned NULL");
3662 delete t2;
3663 }
3664
3665 if (U_SUCCESS(ec)) {
3666 errln("FAIL: Ok: . > $1; => no error");
3667 } else {
3668 logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3669 }
3670 }
3671
3672 void TransliteratorTest::TestMulticharStringSet() {
3673 // Basic testing
3674 const char* rule =
3675 " [{aa}] > x;"
3676 " a > y;"
3677 " [b{bc}] > z;"
3678 "[{gd}] { e > q;"
3679 " e } [{fg}] > r;" ;
3680
3681 UParseError pe;
3682 UErrorCode ec = U_ZERO_ERROR;
3683 Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3684 if (t == NULL || U_FAILURE(ec)) {
3685 delete t;
3686 errln("FAIL: createFromRules failed");
3687 return;
3688 }
3689
3690 expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3691 "y x yz z d gd de gdq gdqfg ddrfg");
3692 delete t;
3693
3694 // Overlapped string test. Make sure that when multiple
3695 // strings can match that the longest one is matched.
3696 rule =
3697 " [a {ab} {abc}] > x;"
3698 " b > y;"
3699 " c > z;"
3700 " q [t {st} {rst}] { e > p;" ;
3701
3702 t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3703 if (t == NULL || U_FAILURE(ec)) {
3704 delete t;
3705 errln("FAIL: createFromRules failed");
3706 return;
3707 }
3708
3709 expect(*t, "a ab abc qte qste qrste",
3710 "x x x qtp qstp qrstp");
3711 delete t;
3712 }
3713
3714 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3715 // BEGIN TestUserFunction support factory
3716
3717 Transliterator* _TUFF[4];
3718 UnicodeString* _TUFID[4];
3719
3720 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3721 Transliterator::Token context) {
3722 return _TUFF[context.integer]->clone();
3723 }
3724
3725 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3726 _TUFF[n] = t;
3727 _TUFID[n] = new UnicodeString(ID);
3728 Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3729 }
3730
3731 static void _TUFUnreg(int32_t n) {
3732 if (_TUFF[n] != NULL) {
3733 Transliterator::unregister(*_TUFID[n]);
3734 delete _TUFF[n];
3735 delete _TUFID[n];
3736 }
3737 }
3738
3739 // END TestUserFunction support factory
3740 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3741
3742 /**
3743 * Test that user-registered transliterators can be used under function
3744 * syntax.
3745 */
3746 void TransliteratorTest::TestUserFunction() {
3747
3748 Transliterator* t;
3749 UParseError pe;
3750 UErrorCode ec = U_ZERO_ERROR;
3751
3752 // Setup our factory
3753 int32_t i;
3754 for (i=0; i<4; ++i) {
3755 _TUFF[i] = NULL;
3756 }
3757
3758 // There's no need to register inverses if we don't use them
3759 t = Transliterator::createFromRules("gif",
3760 UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3761 UTRANS_FORWARD, pe, ec);
3762 if (t == NULL || U_FAILURE(ec)) {
3763 errln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3764 return;
3765 }
3766 _TUFReg("Any-gif", t, 0);
3767
3768 t = Transliterator::createFromRules("RemoveCurly",
3769 UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3770 UTRANS_FORWARD, pe, ec);
3771 if (t == NULL || U_FAILURE(ec)) {
3772 errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3773 goto FAIL;
3774 }
3775 expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3776 _TUFReg("Any-RemoveCurly", t, 1);
3777
3778 logln("Trying &hex");
3779 t = Transliterator::createFromRules("hex2",
3780 "(.) > &hex($1);",
3781 UTRANS_FORWARD, pe, ec);
3782 if (t == NULL || U_FAILURE(ec)) {
3783 errln("FAIL: createFromRules");
3784 goto FAIL;
3785 }
3786 logln("Registering");
3787 _TUFReg("Any-hex2", t, 2);
3788 t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3789 if (t == NULL || U_FAILURE(ec)) {
3790 errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3791 goto FAIL;
3792 }
3793 expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3794 delete t;
3795
3796 logln("Trying &gif");
3797 t = Transliterator::createFromRules("gif2",
3798 "(.) > &Gif(&Hex2($1));",
3799 UTRANS_FORWARD, pe, ec);
3800 if (t == NULL || U_FAILURE(ec)) {
3801 errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3802 goto FAIL;
3803 }
3804 logln("Registering");
3805 _TUFReg("Any-gif2", t, 3);
3806 t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3807 if (t == NULL || U_FAILURE(ec)) {
3808 errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3809 goto FAIL;
3810 }
3811 expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3812 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3813 delete t;
3814
3815 // Test that filters are allowed after &
3816 t = Transliterator::createFromRules("test",
3817 "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3818 UTRANS_FORWARD, pe, ec);
3819 if (t == NULL || U_FAILURE(ec)) {
3820 errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3821 goto FAIL;
3822 }
3823 expect(*t, "abc",
3824 UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3825 delete t;
3826
3827 FAIL:
3828 for (i=0; i<4; ++i) {
3829 _TUFUnreg(i);
3830 }
3831 }
3832
3833 /**
3834 * Test the Any-X transliterators.
3835 */
3836 void TransliteratorTest::TestAnyX(void) {
3837 UParseError parseError;
3838 UErrorCode status = U_ZERO_ERROR;
3839 Transliterator* anyLatin =
3840 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3841 if (anyLatin==0) {
3842 errln("FAIL: createInstance returned NULL");
3843 delete anyLatin;
3844 return;
3845 }
3846
3847 expect(*anyLatin,
3848 CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3849 CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3850
3851 delete anyLatin;
3852 }
3853
3854 /**
3855 * Test the source and target set API. These are only implemented
3856 * for RBT and CompoundTransliterator at this time.
3857 */
3858 void TransliteratorTest::TestSourceTargetSet() {
3859 UErrorCode ec = U_ZERO_ERROR;
3860
3861 // Rules
3862 const char* r =
3863 "a > b; "
3864 "r [x{lu}] > q;";
3865
3866 // Expected source
3867 UnicodeSet expSrc("[arx{lu}]", ec);
3868
3869 // Expected target
3870 UnicodeSet expTrg("[bq]", ec);
3871
3872 UParseError pe;
3873 Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3874
3875 if (U_FAILURE(ec)) {
3876 delete t;
3877 errln("FAIL: Couldn't set up test");
3878 return;
3879 }
3880
3881 UnicodeSet src; t->getSourceSet(src);
3882 UnicodeSet trg; t->getTargetSet(trg);
3883
3884 if (src == expSrc && trg == expTrg) {
3885 UnicodeString a, b;
3886 logln((UnicodeString)"Ok: " +
3887 r + " => source = " + src.toPattern(a, TRUE) +
3888 ", target = " + trg.toPattern(b, TRUE));
3889 } else {
3890 UnicodeString a, b, c, d;
3891 errln((UnicodeString)"FAIL: " +
3892 r + " => source = " + src.toPattern(a, TRUE) +
3893 ", expected " + expSrc.toPattern(b, TRUE) +
3894 "; target = " + trg.toPattern(c, TRUE) +
3895 ", expected " + expTrg.toPattern(d, TRUE));
3896 }
3897
3898 delete t;
3899 }
3900
3901 /**
3902 * Test handling of rule whitespace, for both RBT and UnicodeSet.
3903 */
3904 void TransliteratorTest::TestRuleWhitespace() {
3905 // Rules
3906 const char* r = "a > \\u200E b;";
3907
3908 UErrorCode ec = U_ZERO_ERROR;
3909 UParseError pe;
3910 Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
3911
3912 if (U_FAILURE(ec)) {
3913 errln("FAIL: Couldn't set up test");
3914 } else {
3915 expect(*t, "a", "b");
3916 }
3917 delete t;
3918
3919 // UnicodeSet
3920 ec = U_ZERO_ERROR;
3921 UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
3922
3923 if (U_FAILURE(ec)) {
3924 errln("FAIL: Couldn't set up test");
3925 } else {
3926 if (set.contains(0x200E)) {
3927 errln("FAIL: U+200E not being ignored by UnicodeSet");
3928 }
3929 }
3930 }
3931 //======================================================================
3932 // this method is in TestUScript.java
3933 //======================================================================
3934 void TransliteratorTest::TestAllCodepoints(){
3935 UScriptCode code= USCRIPT_INVALID_CODE;
3936 char id[256]={'\0'};
3937 char abbr[256]={'\0'};
3938 char newId[256]={'\0'};
3939 char newAbbrId[256]={'\0'};
3940 char oldId[256]={'\0'};
3941 char oldAbbrId[256]={'\0'};
3942
3943 UErrorCode status =U_ZERO_ERROR;
3944 UParseError pe;
3945
3946 for(uint32_t i = 0; i<=0x10ffff; i++){
3947 code = uscript_getScript(i,&status);
3948 if(code == USCRIPT_INVALID_CODE){
3949 errln("uscript_getScript for codepoint \\U%08X failed.\n", i);
3950 }
3951 const char* myId = uscript_getName(code);
3952 if(!myId) {
3953 errln("Valid script code returned NULL name. Check your data!");
3954 return;
3955 }
3956 uprv_strcpy(id,myId);
3957 uprv_strcpy(abbr,uscript_getShortName(code));
3958
3959 uprv_strcpy(newId,"[:");
3960 uprv_strcat(newId,id);
3961 uprv_strcat(newId,":];NFD");
3962
3963 uprv_strcpy(newAbbrId,"[:");
3964 uprv_strcat(newAbbrId,abbr);
3965 uprv_strcat(newAbbrId,":];NFD");
3966
3967 if(uprv_strcmp(newId,oldId)!=0){
3968 Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
3969 if(t==NULL || U_FAILURE(status)){
3970 errln((UnicodeString)"FAIL: Could not create " + id);
3971 }
3972 delete t;
3973 }
3974 if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
3975 Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
3976 if(t==NULL || U_FAILURE(status)){
3977 errln((UnicodeString)"FAIL: Could not create " + id);
3978 }
3979 delete t;
3980 }
3981 uprv_strcpy(oldId,newId);
3982 uprv_strcpy(oldAbbrId, newAbbrId);
3983
3984 }
3985
3986 }
3987
3988 #define TEST_TRANSLIT_ID(id, cls) { \
3989 UErrorCode ec = U_ZERO_ERROR; \
3990 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
3991 if (U_FAILURE(ec)) { \
3992 errln("FAIL: Couldn't create " id); \
3993 } else { \
3994 if (t->getDynamicClassID() != cls::getStaticClassID()) { \
3995 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
3996 } \
3997 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
3998 } \
3999 delete t; \
4000 }
4001
4002 #define TEST_TRANSLIT_RULE(rule, cls) { \
4003 UErrorCode ec = U_ZERO_ERROR; \
4004 UParseError pe; \
4005 Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4006 if (U_FAILURE(ec)) { \
4007 errln("FAIL: Couldn't create " rule); \
4008 } else { \
4009 if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4010 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4011 } \
4012 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4013 } \
4014 delete t; \
4015 }
4016
4017 void TransliteratorTest::TestBoilerplate() {
4018 TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
4019 TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
4020 TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
4021 TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
4022 TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
4023 TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
4024 TEST_TRANSLIT_ID("Null", NullTransliterator);
4025 TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
4026 TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
4027 TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
4028 TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
4029 TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
4030 TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
4031 }
4032
4033 void TransliteratorTest::TestAlternateSyntax() {
4034 // U+2206 == &
4035 // U+2190 == <
4036 // U+2192 == >
4037 // U+2194 == <>
4038 expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4039 "abc",
4040 "xbz");
4041 expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4042 CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4043 UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4044 }
4045
4046 static const char* BEGIN_END_RULES[] = {
4047 // [0]
4048 "abc > xy;"
4049 "aba > z;",
4050
4051 // [1]
4052 /*
4053 "::BEGIN;"
4054 "abc > xy;"
4055 "::END;"
4056 "::BEGIN;"
4057 "aba > z;"
4058 "::END;",
4059 */
4060 "", // test case commented out below, this is here to keep from messing up the indexes
4061
4062 // [2]
4063 /*
4064 "abc > xy;"
4065 "::BEGIN;"
4066 "aba > z;"
4067 "::END;",
4068 */
4069 "", // test case commented out below, this is here to keep from messing up the indexes
4070
4071 // [3]
4072 /*
4073 "::BEGIN;"
4074 "abc > xy;"
4075 "::END;"
4076 "aba > z;",
4077 */
4078 "", // test case commented out below, this is here to keep from messing up the indexes
4079
4080 // [4]
4081 "abc > xy;"
4082 "::Null;"
4083 "aba > z;",
4084
4085 // [5]
4086 "::Upper;"
4087 "ABC > xy;"
4088 "AB > x;"
4089 "C > z;"
4090 "::Upper;"
4091 "XYZ > p;"
4092 "XY > q;"
4093 "Z > r;"
4094 "::Upper;",
4095
4096 // [6]
4097 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4098 "$delim = [\\-$ws];"
4099 "$ws $delim* > ' ';"
4100 "'-' $delim* > '-';",
4101
4102 // [7]
4103 "::Null;"
4104 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4105 "$delim = [\\-$ws];"
4106 "$ws $delim* > ' ';"
4107 "'-' $delim* > '-';",
4108
4109 // [8]
4110 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4111 "$delim = [\\-$ws];"
4112 "$ws $delim* > ' ';"
4113 "'-' $delim* > '-';"
4114 "::Null;",
4115
4116 // [9]
4117 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4118 "$delim = [\\-$ws];"
4119 "::Null;"
4120 "$ws $delim* > ' ';"
4121 "'-' $delim* > '-';",
4122
4123 // [10]
4124 /*
4125 "::BEGIN;"
4126 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4127 "$delim = [\\-$ws];"
4128 "::END;"
4129 "$ws $delim* > ' ';"
4130 "'-' $delim* > '-';",
4131 */
4132 "", // test case commented out below, this is here to keep from messing up the indexes
4133
4134 // [11]
4135 /*
4136 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4137 "$delim = [\\-$ws];"
4138 "::BEGIN;"
4139 "$ws $delim* > ' ';"
4140 "'-' $delim* > '-';"
4141 "::END;",
4142 */
4143 "", // test case commented out below, this is here to keep from messing up the indexes
4144
4145 // [12]
4146 /*
4147 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4148 "$delim = [\\-$ws];"
4149 "$ab = [ab];"
4150 "::BEGIN;"
4151 "$ws $delim* > ' ';"
4152 "'-' $delim* > '-';"
4153 "::END;"
4154 "::BEGIN;"
4155 "$ab { ' ' } $ab > '-';"
4156 "c { ' ' > ;"
4157 "::END;"
4158 "::BEGIN;"
4159 "'a-a' > a\\%|a;"
4160 "::END;",
4161 */
4162 "", // test case commented out below, this is here to keep from messing up the indexes
4163
4164 // [13]
4165 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4166 "$delim = [\\-$ws];"
4167 "$ab = [ab];"
4168 "::Null;"
4169 "$ws $delim* > ' ';"
4170 "'-' $delim* > '-';"
4171 "::Null;"
4172 "$ab { ' ' } $ab > '-';"
4173 "c { ' ' > ;"
4174 "::Null;"
4175 "'a-a' > a\\%|a;",
4176
4177 // [14]
4178 /*
4179 "::[abc];"
4180 "::BEGIN;"
4181 "abc > xy;"
4182 "::END;"
4183 "::BEGIN;"
4184 "aba > yz;"
4185 "::END;"
4186 "::Upper;",
4187 */
4188 "", // test case commented out below, this is here to keep from messing up the indexes
4189
4190 // [15]
4191 "::[abc];"
4192 "abc > xy;"
4193 "::Null;"
4194 "aba > yz;"
4195 "::Upper;",
4196
4197 // [16]
4198 /*
4199 "::[abc];"
4200 "::BEGIN;"
4201 "abc <> xy;"
4202 "::END;"
4203 "::BEGIN;"
4204 "aba <> yz;"
4205 "::END;"
4206 "::Upper(Lower);"
4207 "::([XYZ]);"
4208 */
4209 "", // test case commented out below, this is here to keep from messing up the indexes
4210
4211 // [17]
4212 "::[abc];"
4213 "abc <> xy;"
4214 "::Null;"
4215 "aba <> yz;"
4216 "::Upper(Lower);"
4217 "::([XYZ]);"
4218 };
4219 static const int32_t BEGIN_END_RULES_length = (int32_t)(sizeof(BEGIN_END_RULES) / sizeof(BEGIN_END_RULES[0]));
4220
4221 /*
4222 (This entire test is commented out below and will need some heavy revision when we re-add
4223 the ::BEGIN/::END stuff)
4224 static const char* BOGUS_BEGIN_END_RULES[] = {
4225 // [7]
4226 "::BEGIN;"
4227 "abc > xy;"
4228 "::BEGIN;"
4229 "aba > z;"
4230 "::END;"
4231 "::END;",
4232
4233 // [8]
4234 "abc > xy;"
4235 " aba > z;"
4236 "::END;",
4237
4238 // [9]
4239 "::BEGIN;"
4240 "::Upper;"
4241 "::END;"
4242 };
4243 static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
4244 */
4245
4246 static const char* BEGIN_END_TEST_CASES[] = {
4247 // rules input expected output
4248 BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z",
4249 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
4250 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
4251 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
4252 BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z",
4253 BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR",
4254
4255 BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e",
4256 BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e",
4257 BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e",
4258 BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e",
4259 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e",
4260 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e",
4261 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e",
4262 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a",
4263 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
4264 BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e",
4265 BEGIN_END_RULES[13], "a a a a", "a%a%a%a",
4266 BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a",
4267
4268 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4269 BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4270 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4271 BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4272 };
4273 static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0]));
4274
4275 void TransliteratorTest::TestBeginEnd() {
4276 // run through the list of test cases above
4277 int32_t i = 0;
4278 for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4279 expect((UnicodeString)"Test case #" + (i / 3),
4280 UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4281 UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4282 UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4283 }
4284
4285 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4286 UParseError parseError;
4287 UErrorCode status = U_ZERO_ERROR;
4288 Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4289 UTRANS_REVERSE, parseError, status);
4290 if (reversed == 0 || U_FAILURE(status)) {
4291 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4292 } else {
4293 expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4294 }
4295 delete reversed;
4296
4297 // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4298 // that all of them cause errors
4299 /*
4300 (commented out until we have the real ::BEGIN/::END stuff in place
4301 for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4302 UParseError parseError;
4303 UErrorCode status = U_ZERO_ERROR;
4304 Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4305 UTRANS_FORWARD, parseError, status);
4306 if (!U_FAILURE(status)) {
4307 delete t;
4308 errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4309 }
4310 }
4311 */
4312 }
4313
4314 void TransliteratorTest::TestBeginEndToRules() {
4315 // run through the same list of test cases we used above, but this time, instead of just
4316 // instantiating a Transliterator from the rules and running the test against it, we instantiate
4317 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4318 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4319 // to (i.e., does the same thing as) the original rule set
4320 for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4321 UParseError parseError;
4322 UErrorCode status = U_ZERO_ERROR;
4323 Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4324 UTRANS_FORWARD, parseError, status);
4325 if (U_FAILURE(status)) {
4326 reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4327 } else {
4328 UnicodeString rules;
4329 t->toRules(rules, TRUE);
4330 Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4331 UTRANS_FORWARD, parseError, status);
4332 if (U_FAILURE(status)) {
4333 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4334 parseError, status);
4335 delete t;
4336 } else {
4337 expect(*t2,
4338 UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4339 UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4340 delete t;
4341 delete t2;
4342 }
4343 }
4344 }
4345
4346 // do the same thing for the reversible test case
4347 UParseError parseError;
4348 UErrorCode status = U_ZERO_ERROR;
4349 Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4350 UTRANS_REVERSE, parseError, status);
4351 if (U_FAILURE(status)) {
4352 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4353 } else {
4354 UnicodeString rules;
4355 reversed->toRules(rules, FALSE);
4356 Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4357 parseError, status);
4358 if (U_FAILURE(status)) {
4359 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4360 parseError, status);
4361 delete reversed;
4362 } else {
4363 expect(*reversed2,
4364 UnicodeString("xy XY XYZ yz YZ"),
4365 UnicodeString("xy abc xaba yz aba"));
4366 delete reversed;
4367 delete reversed2;
4368 }
4369 }
4370 }
4371
4372 void TransliteratorTest::TestRegisterAlias() {
4373 UnicodeString longID("Lower;[aeiou]Upper");
4374 UnicodeString shortID("Any-CapVowels");
4375 UnicodeString reallyShortID("CapVowels");
4376
4377 Transliterator::registerAlias(shortID, longID);
4378
4379 UErrorCode err = U_ZERO_ERROR;
4380 Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4381 if (U_FAILURE(err)) {
4382 errln("Failed to instantiate transliterator with long ID");
4383 Transliterator::unregister(shortID);
4384 return;
4385 }
4386 Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4387 if (U_FAILURE(err)) {
4388 errln("Failed to instantiate transliterator with short ID");
4389 delete t1;
4390 Transliterator::unregister(shortID);
4391 return;
4392 }
4393
4394 if (t1->getID() != longID)
4395 errln("Transliterator instantiated with long ID doesn't have long ID");
4396 if (t2->getID() != reallyShortID)
4397 errln("Transliterator instantiated with short ID doesn't have short ID");
4398
4399 UnicodeString rules1;
4400 UnicodeString rules2;
4401
4402 t1->toRules(rules1, TRUE);
4403 t2->toRules(rules2, TRUE);
4404 if (rules1 != rules2)
4405 errln("Alias transliterators aren't the same");
4406
4407 delete t1;
4408 delete t2;
4409 Transliterator::unregister(shortID);
4410
4411 t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4412 if (U_SUCCESS(err)) {
4413 errln("Instantiation with short ID succeeded after short ID was unregistered");
4414 delete t1;
4415 }
4416
4417 // try the same thing again, but this time with something other than
4418 // an instance of CompoundTransliterator
4419 UnicodeString realID("Latin-Greek");
4420 UnicodeString fakeID("Latin-dlgkjdflkjdl");
4421 Transliterator::registerAlias(fakeID, realID);
4422
4423 err = U_ZERO_ERROR;
4424 t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4425 if (U_FAILURE(err)) {
4426 errln("Failed to instantiate transliterator with real ID");
4427 Transliterator::unregister(realID);
4428 return;
4429 }
4430 t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4431 if (U_FAILURE(err)) {
4432 errln("Failed to instantiate transliterator with fake ID");
4433 delete t1;
4434 Transliterator::unregister(realID);
4435 return;
4436 }
4437
4438 t1->toRules(rules1, TRUE);
4439 t2->toRules(rules2, TRUE);
4440 if (rules1 != rules2)
4441 errln("Alias transliterators aren't the same");
4442
4443 delete t1;
4444 delete t2;
4445 Transliterator::unregister(fakeID);
4446 }
4447
4448 void TransliteratorTest::TestRuleStripping() {
4449 /*
4450 #
4451 \uE001>\u0C01; # SIGN
4452 */
4453 static const UChar rule[] = {
4454 0x0023,0x0020,0x000D,0x000A,
4455 0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4456 };
4457 static const UChar expectedRule[] = {
4458 0xE001,0x003E,0x0C01,0x003B,0
4459 };
4460 UChar result[sizeof(rule)/sizeof(rule[0])];
4461 UErrorCode status = U_ZERO_ERROR;
4462 int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status);
4463 if (len != u_strlen(expectedRule)) {
4464 errln("utrans_stripRules return len = %d", len);
4465 }
4466 if (u_strncmp(expectedRule, result, len) != 0) {
4467 errln("utrans_stripRules did not return expected string");
4468 }
4469 }
4470
4471 /**
4472 * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4473 */
4474 void TransliteratorTest::TestHalfwidthFullwidth(void) {
4475 UParseError parseError;
4476 UErrorCode status = U_ZERO_ERROR;
4477 Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
4478 Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
4479 if (hf == 0 || fh == 0) {
4480 errln("FAIL: createInstance failed");
4481 delete hf;
4482 delete fh;
4483 return;
4484 }
4485
4486 // Array of 2n items
4487 // Each item is
4488 // "hf"|"fh"|"both",
4489 // <Halfwidth>,
4490 // <Fullwidth>
4491 const char* DATA[] = {
4492 "both",
4493 "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4494 "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4495 };
4496 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
4497
4498 for (int32_t i=0; i<DATA_length; i+=3) {
4499 UnicodeString h = CharsToUnicodeString(DATA[i+1]);
4500 UnicodeString f = CharsToUnicodeString(DATA[i+2]);
4501 switch (*DATA[i]) {
4502 case 0x68: //'h': // Halfwidth-Fullwidth only
4503 expect(*hf, h, f);
4504 break;
4505 case 0x66: //'f': // Fullwidth-Halfwidth only
4506 expect(*fh, f, h);
4507 break;
4508 case 0x62: //'b': // both directions
4509 expect(*hf, h, f);
4510 expect(*fh, f, h);
4511 break;
4512 }
4513 }
4514 delete hf;
4515 delete fh;
4516 }
4517
4518
4519 /**
4520 * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4521 * TODO: confirm that the expected results are correct.
4522 * For now, test just confirms that C++ and Java give identical results.
4523 */
4524 void TransliteratorTest::TestThai(void) {
4525 UParseError parseError;
4526 UErrorCode status = U_ZERO_ERROR;
4527 Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
4528 if (tr == 0) {
4529 errln("FAIL: createInstance failed");
4530 return;
4531 }
4532 if (U_FAILURE(status)) {
4533 errln("FAIL: createInstance failed with %s", u_errorName(status));
4534 return;
4535 }
4536 const char *thaiText =
4537 "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4538 "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4539 "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4540 "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4541 "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4542 "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4543 "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4544 "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4545 "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4546 "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4547 "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4548 "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4549 "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4550 "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4551 "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4552 "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4553 "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4554 "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4555 "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4556 "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4557 "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4558 "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4559 "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4560 "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4561 " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4562 "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4563 "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4564 " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4565 "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4566 "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4567
4568 const char *latinText =
4569 "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4570 "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4571 "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4572 "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4573 "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4574 " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4575 "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4576 "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4577 "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4578 "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4579 "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4580 "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4581 " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4582 "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4583 " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4584 "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4585 "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4586 "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4587
4588
4589 UnicodeString xlitText(thaiText);
4590 xlitText = xlitText.unescape();
4591 tr->transliterate(xlitText);
4592
4593 UnicodeString expectedText(latinText);
4594 expectedText = expectedText.unescape();
4595 expect(*tr, xlitText, expectedText);
4596
4597 delete tr;
4598 }
4599
4600
4601 //======================================================================
4602 // Support methods
4603 //======================================================================
4604 void TransliteratorTest::expectT(const UnicodeString& id,
4605 const UnicodeString& source,
4606 const UnicodeString& expectedResult) {
4607 UErrorCode ec = U_ZERO_ERROR;
4608 UParseError pe;
4609 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4610 if (U_FAILURE(ec)) {
4611 errln((UnicodeString)"FAIL: Could not create " + id);
4612 delete t;
4613 return;
4614 }
4615 expect(*t, source, expectedResult);
4616 delete t;
4617 }
4618
4619 void TransliteratorTest::reportParseError(const UnicodeString& message,
4620 const UParseError& parseError,
4621 const UErrorCode& status) {
4622 errln(message +
4623 /*", parse error " + parseError.code +*/
4624 ", line " + parseError.line +
4625 ", offset " + parseError.offset +
4626 ", pre-context " + prettify(parseError.preContext, TRUE) +
4627 ", post-context " + prettify(parseError.postContext,TRUE) +
4628 ", Error: " + u_errorName(status));
4629 }
4630
4631 void TransliteratorTest::expect(const UnicodeString& rules,
4632 const UnicodeString& source,
4633 const UnicodeString& expectedResult,
4634 UTransPosition *pos) {
4635 expect("<ID>", rules, source, expectedResult, pos);
4636 }
4637
4638 void TransliteratorTest::expect(const UnicodeString& id,
4639 const UnicodeString& rules,
4640 const UnicodeString& source,
4641 const UnicodeString& expectedResult,
4642 UTransPosition *pos) {
4643 UErrorCode status = U_ZERO_ERROR;
4644 UParseError parseError;
4645 Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
4646 if (U_FAILURE(status)) {
4647 reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
4648 } else {
4649 expect(*t, source, expectedResult, pos);
4650 }
4651 delete t;
4652 }
4653
4654 void TransliteratorTest::expect(const Transliterator& t,
4655 const UnicodeString& source,
4656 const UnicodeString& expectedResult,
4657 const Transliterator& reverseTransliterator) {
4658 expect(t, source, expectedResult);
4659 expect(reverseTransliterator, expectedResult, source);
4660 }
4661
4662 void TransliteratorTest::expect(const Transliterator& t,
4663 const UnicodeString& source,
4664 const UnicodeString& expectedResult,
4665 UTransPosition *pos) {
4666 if (pos == 0) {
4667 UnicodeString result(source);
4668 t.transliterate(result);
4669 expectAux(t.getID() + ":String", source, result, expectedResult);
4670 }
4671 UTransPosition index={0, 0, 0, 0};
4672 if (pos != 0) {
4673 index = *pos;
4674 }
4675
4676 UnicodeString rsource(source);
4677 if (pos == 0) {
4678 t.transliterate(rsource);
4679 } else {
4680 // Do it all at once -- below we do it incrementally
4681 t.finishTransliteration(rsource, *pos);
4682 }
4683 expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4684
4685 // Test keyboard (incremental) transliteration -- this result
4686 // must be the same after we finalize (see below).
4687 UnicodeString log;
4688 rsource.remove();
4689 if (pos != 0) {
4690 rsource = source;
4691 formatInput(log, rsource, index);
4692 log.append(" -> ");
4693 UErrorCode status = U_ZERO_ERROR;
4694 t.transliterate(rsource, index, status);
4695 formatInput(log, rsource, index);
4696 } else {
4697 for (int32_t i=0; i<source.length(); ++i) {
4698 if (i != 0) {
4699 log.append(" + ");
4700 }
4701 log.append(source.charAt(i)).append(" -> ");
4702 UErrorCode status = U_ZERO_ERROR;
4703 t.transliterate(rsource, index, source.charAt(i), status);
4704 formatInput(log, rsource, index);
4705 }
4706 }
4707
4708 // As a final step in keyboard transliteration, we must call
4709 // transliterate to finish off any pending partial matches that
4710 // were waiting for more input.
4711 t.finishTransliteration(rsource, index);
4712 log.append(" => ").append(rsource);
4713
4714 expectAux(t.getID() + ":Keyboard", log,
4715 rsource == expectedResult,
4716 expectedResult);
4717 }
4718
4719
4720 /**
4721 * @param appendTo result is appended to this param.
4722 * @param input the string being transliterated
4723 * @param pos the index struct
4724 */
4725 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4726 const UnicodeString& input,
4727 const UTransPosition& pos) {
4728 // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4729 // the {} indicate the context start and limit, and the ||
4730 // indicate the start and limit.
4731 if (0 <= pos.contextStart &&
4732 pos.contextStart <= pos.start &&
4733 pos.start <= pos.limit &&
4734 pos.limit <= pos.contextLimit &&
4735 pos.contextLimit <= input.length()) {
4736
4737 UnicodeString a, b, c, d, e;
4738 input.extractBetween(0, pos.contextStart, a);
4739 input.extractBetween(pos.contextStart, pos.start, b);
4740 input.extractBetween(pos.start, pos.limit, c);
4741 input.extractBetween(pos.limit, pos.contextLimit, d);
4742 input.extractBetween(pos.contextLimit, input.length(), e);
4743 appendTo.append(a).append((UChar)123/*{*/).append(b).
4744 append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4745 append((UChar)125/*}*/).append(e);
4746 } else {
4747 appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4748 pos.contextStart + ", s=" + pos.start + ", l=" +
4749 pos.limit + ", cl=" + pos.contextLimit + "} on " +
4750 input);
4751 }
4752 return appendTo;
4753 }
4754
4755 void TransliteratorTest::expectAux(const UnicodeString& tag,
4756 const UnicodeString& source,
4757 const UnicodeString& result,
4758 const UnicodeString& expectedResult) {
4759 expectAux(tag, source + " -> " + result,
4760 result == expectedResult,
4761 expectedResult);
4762 }
4763
4764 void TransliteratorTest::expectAux(const UnicodeString& tag,
4765 const UnicodeString& summary, UBool pass,
4766 const UnicodeString& expectedResult) {
4767 if (pass) {
4768 logln(UnicodeString("(")+tag+") " + prettify(summary));
4769 } else {
4770 errln(UnicodeString("FAIL: (")+tag+") "
4771 + prettify(summary)
4772 + ", expected " + prettify(expectedResult));
4773 }
4774 }
4775
4776 #endif /* #if !UCONFIG_NO_TRANSLITERATION */