]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/transtst.cpp
ICU-511.34.tar.gz
[apple/icu.git] / icuSources / test / intltest / transtst.cpp
1 /*
2 **********************************************************************
3 * Copyright (C) 1999-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 11/10/99 aliu Creation.
8 **********************************************************************
9 */
10
11 #include "unicode/utypes.h"
12
13 #if !UCONFIG_NO_TRANSLITERATION
14
15 #include "transtst.h"
16 #include "unicode/locid.h"
17 #include "unicode/dtfmtsym.h"
18 #include "unicode/normlzr.h"
19 #include "unicode/translit.h"
20 #include "unicode/uchar.h"
21 #include "unicode/unifilt.h"
22 #include "unicode/uniset.h"
23 #include "unicode/ustring.h"
24 #include "unicode/usetiter.h"
25 #include "unicode/uscript.h"
26 #include "unicode/utf16.h"
27 #include "cpdtrans.h"
28 #include "nultrans.h"
29 #include "rbt.h"
30 #include "rbt_pars.h"
31 #include "anytrans.h"
32 #include "esctrn.h"
33 #include "name2uni.h"
34 #include "nortrans.h"
35 #include "remtrans.h"
36 #include "titletrn.h"
37 #include "tolowtrn.h"
38 #include "toupptrn.h"
39 #include "unesctrn.h"
40 #include "uni2name.h"
41 #include "cstring.h"
42 #include "cmemory.h"
43 #include <stdio.h>
44
45 /***********************************************************************
46
47 HOW TO USE THIS TEST FILE
48 -or-
49 How I developed on two platforms
50 without losing (too much of) my mind
51
52
53 1. Add new tests by copying/pasting/changing existing tests. On Java,
54 any public void method named Test...() taking no parameters becomes
55 a test. On C++, you need to modify the header and add a line to
56 the runIndexedTest() dispatch method.
57
58 2. Make liberal use of the expect() method; it is your friend.
59
60 3. The tests in this file exactly match those in a sister file on the
61 other side. The two files are:
62
63 icu4j: src/com/ibm/test/translit/TransliteratorTest.java
64 icu4c: source/test/intltest/transtst.cpp
65
66 ==> THIS IS THE IMPORTANT PART <==
67
68 When you add a test in this file, add it in TransliteratorTest.java
69 too. Give it the same name and put it in the same relative place.
70 This makes maintenance a lot simpler for any poor soul who ends up
71 trying to synchronize the tests between icu4j and icu4c.
72
73 4. If you MUST enter a test that is NOT paralleled in the sister file,
74 then add it in the special non-mirrored section. These are
75 labeled
76
77 "icu4j ONLY"
78
79 or
80
81 "icu4c ONLY"
82
83 Make sure you document the reason the test is here and not there.
84
85
86 Thank you.
87 The Management
88 ***********************************************************************/
89
90 // Define character constants thusly to be EBCDIC-friendly
91 enum {
92 LEFT_BRACE=((UChar)0x007B), /*{*/
93 PIPE =((UChar)0x007C), /*|*/
94 ZERO =((UChar)0x0030), /*0*/
95 UPPER_A =((UChar)0x0041) /*A*/
96 };
97
98 TransliteratorTest::TransliteratorTest()
99 : DESERET_DEE((UChar32)0x10414),
100 DESERET_dee((UChar32)0x1043C)
101 {
102 }
103
104 TransliteratorTest::~TransliteratorTest() {}
105
106 void
107 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
108 const char* &name, char* /*par*/) {
109 switch (index) {
110 TESTCASE(0,TestInstantiation);
111 TESTCASE(1,TestSimpleRules);
112 TESTCASE(2,TestRuleBasedInverse);
113 TESTCASE(3,TestKeyboard);
114 TESTCASE(4,TestKeyboard2);
115 TESTCASE(5,TestKeyboard3);
116 TESTCASE(6,TestArabic);
117 TESTCASE(7,TestCompoundKana);
118 TESTCASE(8,TestCompoundHex);
119 TESTCASE(9,TestFiltering);
120 TESTCASE(10,TestInlineSet);
121 TESTCASE(11,TestPatternQuoting);
122 TESTCASE(12,TestJ277);
123 TESTCASE(13,TestJ243);
124 TESTCASE(14,TestJ329);
125 TESTCASE(15,TestSegments);
126 TESTCASE(16,TestCursorOffset);
127 TESTCASE(17,TestArbitraryVariableValues);
128 TESTCASE(18,TestPositionHandling);
129 TESTCASE(19,TestHiraganaKatakana);
130 TESTCASE(20,TestCopyJ476);
131 TESTCASE(21,TestAnchors);
132 TESTCASE(22,TestInterIndic);
133 TESTCASE(23,TestFilterIDs);
134 TESTCASE(24,TestCaseMap);
135 TESTCASE(25,TestNameMap);
136 TESTCASE(26,TestLiberalizedID);
137 TESTCASE(27,TestCreateInstance);
138 TESTCASE(28,TestNormalizationTransliterator);
139 TESTCASE(29,TestCompoundRBT);
140 TESTCASE(30,TestCompoundFilter);
141 TESTCASE(31,TestRemove);
142 TESTCASE(32,TestToRules);
143 TESTCASE(33,TestContext);
144 TESTCASE(34,TestSupplemental);
145 TESTCASE(35,TestQuantifier);
146 TESTCASE(36,TestSTV);
147 TESTCASE(37,TestCompoundInverse);
148 TESTCASE(38,TestNFDChainRBT);
149 TESTCASE(39,TestNullInverse);
150 TESTCASE(40,TestAliasInverseID);
151 TESTCASE(41,TestCompoundInverseID);
152 TESTCASE(42,TestUndefinedVariable);
153 TESTCASE(43,TestEmptyContext);
154 TESTCASE(44,TestCompoundFilterID);
155 TESTCASE(45,TestPropertySet);
156 TESTCASE(46,TestNewEngine);
157 TESTCASE(47,TestQuantifiedSegment);
158 TESTCASE(48,TestDevanagariLatinRT);
159 TESTCASE(49,TestTeluguLatinRT);
160 TESTCASE(50,TestCompoundLatinRT);
161 TESTCASE(51,TestSanskritLatinRT);
162 TESTCASE(52,TestLocaleInstantiation);
163 TESTCASE(53,TestTitleAccents);
164 TESTCASE(54,TestLocaleResource);
165 TESTCASE(55,TestParseError);
166 TESTCASE(56,TestOutputSet);
167 TESTCASE(57,TestVariableRange);
168 TESTCASE(58,TestInvalidPostContext);
169 TESTCASE(59,TestIDForms);
170 TESTCASE(60,TestToRulesMark);
171 TESTCASE(61,TestEscape);
172 TESTCASE(62,TestAnchorMasking);
173 TESTCASE(63,TestDisplayName);
174 TESTCASE(64,TestSpecialCases);
175 #if !UCONFIG_NO_FILE_IO
176 TESTCASE(65,TestIncrementalProgress);
177 #endif
178 TESTCASE(66,TestSurrogateCasing);
179 TESTCASE(67,TestFunction);
180 TESTCASE(68,TestInvalidBackRef);
181 TESTCASE(69,TestMulticharStringSet);
182 TESTCASE(70,TestUserFunction);
183 TESTCASE(71,TestAnyX);
184 TESTCASE(72,TestSourceTargetSet);
185 TESTCASE(73,TestGurmukhiDevanagari);
186 TESTCASE(74,TestPatternWhiteSpace);
187 TESTCASE(75,TestAllCodepoints);
188 TESTCASE(76,TestBoilerplate);
189 TESTCASE(77,TestAlternateSyntax);
190 TESTCASE(78,TestBeginEnd);
191 TESTCASE(79,TestBeginEndToRules);
192 TESTCASE(80,TestRegisterAlias);
193 TESTCASE(81,TestRuleStripping);
194 TESTCASE(82,TestHalfwidthFullwidth);
195 TESTCASE(83,TestThai);
196 TESTCASE(84,TestAny);
197 default: name = ""; break;
198 }
199 }
200
201 static const UVersionInfo ICU_39 = {3,9,4,0};
202 /**
203 * Make sure every system transliterator can be instantiated.
204 *
205 * ALSO test that the result of toRules() for each rule is a valid
206 * rule. Do this here so we don't have to have another test that
207 * instantiates everything as well.
208 */
209 void TransliteratorTest::TestInstantiation() {
210 UErrorCode ec = U_ZERO_ERROR;
211 StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
212 assertSuccess("getAvailableIDs()", ec);
213 assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
214 int32_t n = Transliterator::countAvailableIDs();
215 assertTrue("getAvailableIDs().count()==countAvailableIDs()",
216 avail->count(ec) == n);
217 assertSuccess("count()", ec);
218 UnicodeString name;
219 for (int32_t i=0; i<n; ++i) {
220 const UnicodeString& id = *avail->snext(ec);
221 if (!assertSuccess("snext()", ec) ||
222 !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
223 break;
224 }
225 UnicodeString id2 = Transliterator::getAvailableID(i);
226 if (id.length() < 1) {
227 errln(UnicodeString("FAIL: getAvailableID(") +
228 i + ") returned empty string");
229 continue;
230 }
231 if (id != id2) {
232 errln(UnicodeString("FAIL: getAvailableID(") +
233 i + ") != getAvailableIDs().snext()");
234 continue;
235 }
236 UParseError parseError;
237 UErrorCode status = U_ZERO_ERROR;
238 Transliterator* t = Transliterator::createInstance(id,
239 UTRANS_FORWARD, parseError,status);
240 name.truncate(0);
241 Transliterator::getDisplayName(id, name);
242 if (t == 0) {
243 #if UCONFIG_NO_BREAK_ITERATION
244 // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
245 if (id.compare((UnicodeString)"Thai-Latin") != 0)
246 #endif
247 dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
248 /*", parse error " + parseError.code +*/
249 ", line " + parseError.line +
250 ", offset " + parseError.offset +
251 ", pre-context " + prettify(parseError.preContext, TRUE) +
252 ", post-context " +prettify(parseError.postContext,TRUE) +
253 ", Error: " + u_errorName(status));
254 // When createInstance fails, it deletes the failing
255 // entry from the available ID list. We detect this
256 // here by looking for a change in countAvailableIDs.
257 int32_t nn = Transliterator::countAvailableIDs();
258 if (nn == (n - 1)) {
259 n = nn;
260 --i; // Compensate for deleted entry
261 }
262 } else {
263 logln(UnicodeString("OK: ") + name + " (" + id + ")");
264
265 // Now test toRules
266 UnicodeString rules;
267 t->toRules(rules, TRUE);
268 Transliterator *u = Transliterator::createFromRules("x",
269 rules, UTRANS_FORWARD, parseError,status);
270 if (u == 0) {
271 errln(UnicodeString("FAIL: ") + id +
272 ".createFromRules() => bad rules" +
273 /*", parse error " + parseError.code +*/
274 ", line " + parseError.line +
275 ", offset " + parseError.offset +
276 ", context " + prettify(parseError.preContext, TRUE) +
277 ", rules: " + prettify(rules, TRUE));
278 } else {
279 delete u;
280 }
281 delete t;
282 }
283 }
284 assertTrue("snext()==NULL", avail->snext(ec)==NULL);
285 assertSuccess("snext()", ec);
286 delete avail;
287
288 // Now test the failure path
289 UParseError parseError;
290 UErrorCode status = U_ZERO_ERROR;
291 UnicodeString id("<Not a valid Transliterator ID>");
292 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
293 if (t != 0) {
294 errln("FAIL: " + id + " returned a transliterator");
295 delete t;
296 } else {
297 logln("OK: Bogus ID handled properly");
298 }
299 }
300
301 void TransliteratorTest::TestSimpleRules(void) {
302 /* Example: rules 1. ab>x|y
303 * 2. yc>z
304 *
305 * []|eabcd start - no match, copy e to tranlated buffer
306 * [e]|abcd match rule 1 - copy output & adjust cursor
307 * [ex|y]cd match rule 2 - copy output & adjust cursor
308 * [exz]|d no match, copy d to transliterated buffer
309 * [exzd]| done
310 */
311 expect(UnicodeString("ab>x|y;", "") +
312 "yc>z",
313 "eabcd", "exzd");
314
315 /* Another set of rules:
316 * 1. ab>x|yzacw
317 * 2. za>q
318 * 3. qc>r
319 * 4. cw>n
320 *
321 * []|ab Rule 1
322 * [x|yzacw] No match
323 * [xy|zacw] Rule 2
324 * [xyq|cw] Rule 4
325 * [xyqn]| Done
326 */
327 expect(UnicodeString("ab>x|yzacw;") +
328 "za>q;" +
329 "qc>r;" +
330 "cw>n",
331 "ab", "xyqn");
332
333 /* Test categories
334 */
335 UErrorCode status = U_ZERO_ERROR;
336 UParseError parseError;
337 Transliterator *t = Transliterator::createFromRules(
338 "<ID>",
339 UnicodeString("$dummy=").append((UChar)0xE100) +
340 UnicodeString(";"
341 "$vowel=[aeiouAEIOU];"
342 "$lu=[:Lu:];"
343 "$vowel } $lu > '!';"
344 "$vowel > '&';"
345 "'!' { $lu > '^';"
346 "$lu > '*';"
347 "a > ERROR", ""),
348 UTRANS_FORWARD, parseError,
349 status);
350 if (U_FAILURE(status)) {
351 dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
352 return;
353 }
354 expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
355 delete t;
356 }
357
358 /**
359 * Test inline set syntax and set variable syntax.
360 */
361 void TransliteratorTest::TestInlineSet(void) {
362 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
363 expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
364
365 expect(UnicodeString(
366 "$digit = [0-9];"
367 "$alpha = [a-zA-Z];"
368 "$alphanumeric = [$digit $alpha];" // ***
369 "$special = [^$alphanumeric];" // ***
370 "$alphanumeric > '-';"
371 "$special > '*';", ""),
372
373 "thx-1138", "---*----");
374 }
375
376 /**
377 * Create some inverses and confirm that they work. We have to be
378 * careful how we do this, since the inverses will not be true
379 * inverses -- we can't throw any random string at the composition
380 * of the transliterators and expect the identity function. F x
381 * F' != I. However, if we are careful about the input, we will
382 * get the expected results.
383 */
384 void TransliteratorTest::TestRuleBasedInverse(void) {
385 UnicodeString RULES =
386 UnicodeString("abc>zyx;") +
387 "ab>yz;" +
388 "bc>zx;" +
389 "ca>xy;" +
390 "a>x;" +
391 "b>y;" +
392 "c>z;" +
393
394 "abc<zyx;" +
395 "ab<yz;" +
396 "bc<zx;" +
397 "ca<xy;" +
398 "a<x;" +
399 "b<y;" +
400 "c<z;" +
401
402 "";
403
404 const char* DATA[] = {
405 // Careful here -- random strings will not work. If we keep
406 // the left side to the domain and the right side to the range
407 // we will be okay though (left, abc; right xyz).
408 "a", "x",
409 "abcacab", "zyxxxyy",
410 "caccb", "xyzzy",
411 };
412
413 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
414
415 UErrorCode status = U_ZERO_ERROR;
416 UParseError parseError;
417 Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
418 UTRANS_FORWARD, parseError, status);
419 Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
420 UTRANS_REVERSE, parseError, status);
421 if (U_FAILURE(status)) {
422 errln("FAIL: RBT constructor failed");
423 return;
424 }
425 for (int32_t i=0; i<DATA_length; i+=2) {
426 expect(*fwd, DATA[i], DATA[i+1]);
427 expect(*rev, DATA[i+1], DATA[i]);
428 }
429 delete fwd;
430 delete rev;
431 }
432
433 /**
434 * Basic test of keyboard.
435 */
436 void TransliteratorTest::TestKeyboard(void) {
437 UParseError parseError;
438 UErrorCode status = U_ZERO_ERROR;
439 Transliterator *t = Transliterator::createFromRules("<ID>",
440 UnicodeString("psch>Y;")
441 +"ps>y;"
442 +"ch>x;"
443 +"a>A;",
444 UTRANS_FORWARD, parseError,
445 status);
446 if (U_FAILURE(status)) {
447 errln("FAIL: RBT constructor failed");
448 return;
449 }
450 const char* DATA[] = {
451 // insertion, buffer
452 "a", "A",
453 "p", "Ap",
454 "s", "Aps",
455 "c", "Apsc",
456 "a", "AycA",
457 "psch", "AycAY",
458 0, "AycAY", // null means finishKeyboardTransliteration
459 };
460
461 keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
462 delete t;
463 }
464
465 /**
466 * Basic test of keyboard with cursor.
467 */
468 void TransliteratorTest::TestKeyboard2(void) {
469 UParseError parseError;
470 UErrorCode status = U_ZERO_ERROR;
471 Transliterator *t = Transliterator::createFromRules("<ID>",
472 UnicodeString("ych>Y;")
473 +"ps>|y;"
474 +"ch>x;"
475 +"a>A;",
476 UTRANS_FORWARD, parseError,
477 status);
478 if (U_FAILURE(status)) {
479 errln("FAIL: RBT constructor failed");
480 return;
481 }
482 const char* DATA[] = {
483 // insertion, buffer
484 "a", "A",
485 "p", "Ap",
486 "s", "Aps", // modified for rollback - "Ay",
487 "c", "Apsc", // modified for rollback - "Ayc",
488 "a", "AycA",
489 "p", "AycAp",
490 "s", "AycAps", // modified for rollback - "AycAy",
491 "c", "AycApsc", // modified for rollback - "AycAyc",
492 "h", "AycAY",
493 0, "AycAY", // null means finishKeyboardTransliteration
494 };
495
496 keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
497 delete t;
498 }
499
500 /**
501 * Test keyboard transliteration with back-replacement.
502 */
503 void TransliteratorTest::TestKeyboard3(void) {
504 // We want th>z but t>y. Furthermore, during keyboard
505 // transliteration we want t>y then yh>z if t, then h are
506 // typed.
507 UnicodeString RULES("t>|y;"
508 "yh>z;");
509
510 const char* DATA[] = {
511 // Column 1: characters to add to buffer (as if typed)
512 // Column 2: expected appearance of buffer after
513 // keyboard xliteration.
514 "a", "a",
515 "b", "ab",
516 "t", "abt", // modified for rollback - "aby",
517 "c", "abyc",
518 "t", "abyct", // modified for rollback - "abycy",
519 "h", "abycz",
520 0, "abycz", // null means finishKeyboardTransliteration
521 };
522
523 UParseError parseError;
524 UErrorCode status = U_ZERO_ERROR;
525 Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
526 if (U_FAILURE(status)) {
527 errln("FAIL: RBT constructor failed");
528 return;
529 }
530 keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
531 delete t;
532 }
533
534 void TransliteratorTest::keyboardAux(const Transliterator& t,
535 const char* DATA[], int32_t DATA_length) {
536 UErrorCode status = U_ZERO_ERROR;
537 UTransPosition index={0, 0, 0, 0};
538 UnicodeString s;
539 for (int32_t i=0; i<DATA_length; i+=2) {
540 UnicodeString log;
541 if (DATA[i] != 0) {
542 log = s + " + "
543 + DATA[i]
544 + " -> ";
545 t.transliterate(s, index, DATA[i], status);
546 } else {
547 log = s + " => ";
548 t.finishTransliteration(s, index);
549 }
550 // Show the start index '{' and the cursor '|'
551 UnicodeString a, b, c;
552 s.extractBetween(0, index.contextStart, a);
553 s.extractBetween(index.contextStart, index.start, b);
554 s.extractBetween(index.start, s.length(), c);
555 log.append(a).
556 append((UChar)LEFT_BRACE).
557 append(b).
558 append((UChar)PIPE).
559 append(c);
560 if (s == DATA[i+1] && U_SUCCESS(status)) {
561 logln(log);
562 } else {
563 errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
564 }
565 }
566 }
567
568 void TransliteratorTest::TestArabic(void) {
569 // Test disabled for 2.0 until new Arabic transliterator can be written.
570 // /*
571 // const char* DATA[] = {
572 // "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
573 // "\u0627\u0644\u0644\u063a\u0629\u0020"+
574 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
575 // "\u0628\u0628\u0646\u0638\u0645\u0020"+
576 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
577 // "\u062c\u0645\u064a\u0644\u0629",
578 // };
579 // */
580 //
581 // UChar ar_raw[] = {
582 // 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
583 // 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
584 // 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
585 // 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
586 // 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
587 // 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
588 // };
589 // UnicodeString ar(ar_raw);
590 // UErrorCode status=U_ZERO_ERROR;
591 // UParseError parseError;
592 // Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
593 // if (t == 0) {
594 // errln("FAIL: createInstance failed");
595 // return;
596 // }
597 // expect(*t, "Arabic", ar);
598 // delete t;
599 }
600
601 /**
602 * Compose the Kana transliterator forward and reverse and try
603 * some strings that should come out unchanged.
604 */
605 void TransliteratorTest::TestCompoundKana(void) {
606 UParseError parseError;
607 UErrorCode status = U_ZERO_ERROR;
608 Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
609 if (t == 0) {
610 dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
611 } else {
612 expect(*t, "aaaaa", "aaaaa");
613 delete t;
614 }
615 }
616
617 /**
618 * Compose the hex transliterators forward and reverse.
619 */
620 void TransliteratorTest::TestCompoundHex(void) {
621 UParseError parseError;
622 UErrorCode status = U_ZERO_ERROR;
623 Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
624 Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
625 Transliterator* transab[] = { a, b };
626 Transliterator* transba[] = { b, a };
627 if (a == 0 || b == 0) {
628 errln("FAIL: construction failed");
629 delete a;
630 delete b;
631 return;
632 }
633 // Do some basic tests of a
634 expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
635 // Do some basic tests of b
636 expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
637
638 Transliterator* ab = new CompoundTransliterator(transab, 2);
639 UnicodeString s("abcde", "");
640 expect(*ab, s, s);
641
642 UnicodeString str(s);
643 a->transliterate(str);
644 Transliterator* ba = new CompoundTransliterator(transba, 2);
645 expect(*ba, str, str);
646
647 delete ab;
648 delete ba;
649 delete a;
650 delete b;
651 }
652
653 int gTestFilterClassID = 0;
654 /**
655 * Used by TestFiltering().
656 */
657 class TestFilter : public UnicodeFilter {
658 virtual UnicodeFunctor* clone() const {
659 return new TestFilter(*this);
660 }
661 virtual UBool contains(UChar32 c) const {
662 return c != (UChar)0x0063 /*c*/;
663 }
664 // Stubs
665 virtual UnicodeString& toPattern(UnicodeString& result,
666 UBool /*escapeUnprintable*/) const {
667 return result;
668 }
669 virtual UBool matchesIndexValue(uint8_t /*v*/) const {
670 return FALSE;
671 }
672 virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
673 public:
674 UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
675 };
676
677 /**
678 * Do some basic tests of filtering.
679 */
680 void TransliteratorTest::TestFiltering(void) {
681 UParseError parseError;
682 UErrorCode status = U_ZERO_ERROR;
683 Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
684 if (hex == 0) {
685 errln("FAIL: createInstance(Any-Hex) failed");
686 return;
687 }
688 hex->adoptFilter(new TestFilter());
689 UnicodeString s("abcde");
690 hex->transliterate(s);
691 UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
692 if (s == exp) {
693 logln(UnicodeString("Ok: \"") + exp + "\"");
694 } else {
695 logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
696 }
697
698 // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
699 UnicodeFilter *f = hex->orphanFilter();
700 if (f == NULL){
701 errln("FAIL: orphanFilter() should get a UnicodeFilter");
702 } else {
703 delete f;
704 }
705 delete hex;
706 }
707
708 /**
709 * Test anchors
710 */
711 void TransliteratorTest::TestAnchors(void) {
712 expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""),
713 "aaa",
714 "012");
715 expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
716 "aaa",
717 "012");
718 expect(UnicodeString("^ab > 01 ;"
719 " ab > |8 ;"
720 " b > k ;"
721 " 8x$ > 45 ;"
722 " 8x > 77 ;", ""),
723
724 "ababbabxabx",
725 "018k7745");
726 expect(UnicodeString("$s = [z$] ;"
727 "$s{ab > 01 ;"
728 " ab > |8 ;"
729 " b > k ;"
730 " 8x}$s > 45 ;"
731 " 8x > 77 ;", ""),
732
733 "abzababbabxzabxabx",
734 "01z018k45z01x45");
735 }
736
737 /**
738 * Test pattern quoting and escape mechanisms.
739 */
740 void TransliteratorTest::TestPatternQuoting(void) {
741 // Array of 3n items
742 // Each item is <rules>, <input>, <expected output>
743 const UnicodeString DATA[] = {
744 UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
745 UnicodeString(UChar(0x4E01)),
746 "[male adult]"
747 };
748
749 for (int32_t i=0; i<3; i+=3) {
750 logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
751 UParseError parseError;
752 UErrorCode status = U_ZERO_ERROR;
753 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
754 if (U_FAILURE(status)) {
755 errln("RBT constructor failed");
756 } else {
757 expect(*t, DATA[i+1], DATA[i+2]);
758 }
759 delete t;
760 }
761 }
762
763 /**
764 * Regression test for bugs found in Greek transliteration.
765 */
766 void TransliteratorTest::TestJ277(void) {
767 UErrorCode status = U_ZERO_ERROR;
768 UParseError parseError;
769 Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
770 if (gl == NULL) {
771 dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
772 return;
773 }
774
775 UChar sigma = 0x3C3;
776 UChar upsilon = 0x3C5;
777 UChar nu = 0x3BD;
778 // UChar PHI = 0x3A6;
779 UChar alpha = 0x3B1;
780 // UChar omega = 0x3C9;
781 // UChar omicron = 0x3BF;
782 // UChar epsilon = 0x3B5;
783
784 // sigma upsilon nu -> syn
785 UnicodeString syn;
786 syn.append(sigma).append(upsilon).append(nu);
787 expect(*gl, syn, "syn");
788
789 // sigma alpha upsilon nu -> saun
790 UnicodeString sayn;
791 sayn.append(sigma).append(alpha).append(upsilon).append(nu);
792 expect(*gl, sayn, "saun");
793
794 // Again, using a smaller rule set
795 UnicodeString rules(
796 "$alpha = \\u03B1;"
797 "$nu = \\u03BD;"
798 "$sigma = \\u03C3;"
799 "$ypsilon = \\u03C5;"
800 "$vowel = [aeiouAEIOU$alpha$ypsilon];"
801 "s <> $sigma;"
802 "a <> $alpha;"
803 "u <> $vowel { $ypsilon;"
804 "y <> $ypsilon;"
805 "n <> $nu;",
806 "");
807 Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
808 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
809 expect(*mini, syn, "syn");
810 expect(*mini, sayn, "saun");
811 delete mini;
812 mini = NULL;
813
814 #if !UCONFIG_NO_FORMATTING
815 // Transliterate the Greek locale data
816 Locale el("el");
817 DateFormatSymbols syms(el, status);
818 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
819 int32_t i, count;
820 const UnicodeString* data = syms.getMonths(count);
821 for (i=0; i<count; ++i) {
822 if (data[i].length() == 0) {
823 continue;
824 }
825 UnicodeString out(data[i]);
826 gl->transliterate(out);
827 UBool ok = TRUE;
828 if (data[i].length() >= 2 && out.length() >= 2 &&
829 u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
830 if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
831 ok = FALSE;
832 }
833 }
834 if (ok) {
835 logln(prettify(data[i] + " -> " + out));
836 } else {
837 errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
838 }
839 }
840 #endif
841
842 delete gl;
843 }
844
845 /**
846 * Prefix, suffix support in hex transliterators
847 */
848 void TransliteratorTest::TestJ243(void) {
849 UErrorCode ec = U_ZERO_ERROR;
850
851 // Test default Hex-Any, which should handle
852 // \u, \U, u+, and U+
853 Transliterator *hex =
854 Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
855 if (assertSuccess("getInstance", ec)) {
856 expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
857 }
858 delete hex;
859
860 // // Try a custom Hex-Unicode
861 // // \uXXXX and &#xXXXX;
862 // ec = U_ZERO_ERROR;
863 // HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
864 // expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
865 // "abcd5fx012&#x00033;");
866 // // Try custom Any-Hex (default is tested elsewhere)
867 // ec = U_ZERO_ERROR;
868 // UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
869 // expect(hex3, "012", "&#x30;&#x31;&#x32;");
870 }
871
872 /**
873 * Parsers need better syntax error messages.
874 */
875 void TransliteratorTest::TestJ329(void) {
876
877 struct { UBool containsErrors; const char* rule; } DATA[] = {
878 { FALSE, "a > b; c > d" },
879 { TRUE, "a > b; no operator; c > d" },
880 };
881 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
882
883 for (int32_t i=0; i<DATA_length; ++i) {
884 UErrorCode status = U_ZERO_ERROR;
885 UParseError parseError;
886 Transliterator *rbt = Transliterator::createFromRules("<ID>",
887 DATA[i].rule,
888 UTRANS_FORWARD,
889 parseError,
890 status);
891 UBool gotError = U_FAILURE(status);
892 UnicodeString desc(DATA[i].rule);
893 desc.append(gotError ? " -> error" : " -> no error");
894 if (gotError) {
895 desc = desc + ", ParseError code=" + u_errorName(status) +
896 " line=" + parseError.line +
897 " offset=" + parseError.offset +
898 " context=" + parseError.preContext;
899 }
900 if (gotError == DATA[i].containsErrors) {
901 logln(UnicodeString("Ok: ") + desc);
902 } else {
903 errln(UnicodeString("FAIL: ") + desc);
904 }
905 delete rbt;
906 }
907 }
908
909 /**
910 * Test segments and segment references.
911 */
912 void TransliteratorTest::TestSegments(void) {
913 // Array of 3n items
914 // Each item is <rules>, <input>, <expected output>
915 UnicodeString DATA[] = {
916 "([a-z]) '.' ([0-9]) > $2 '-' $1",
917 "abc.123.xyz.456",
918 "ab1-c23.xy4-z56",
919
920 // nested
921 "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
922 "a1 b2",
923 "a1.a.1 b2.b.2",
924 };
925 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
926
927 for (int32_t i=0; i<DATA_length; i+=3) {
928 logln("Pattern: " + prettify(DATA[i]));
929 UParseError parseError;
930 UErrorCode status = U_ZERO_ERROR;
931 Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
932 if (U_FAILURE(status)) {
933 errln("FAIL: RBT constructor");
934 } else {
935 expect(*t, DATA[i+1], DATA[i+2]);
936 }
937 delete t;
938 }
939 }
940
941 /**
942 * Test cursor positioning outside of the key
943 */
944 void TransliteratorTest::TestCursorOffset(void) {
945 // Array of 3n items
946 // Each item is <rules>, <input>, <expected output>
947 UnicodeString DATA[] = {
948 "pre {alpha} post > | @ ALPHA ;"
949 "eALPHA > beta ;"
950 "pre {beta} post > BETA @@ | ;"
951 "post > xyz",
952
953 "prealphapost prebetapost",
954
955 "prbetaxyz preBETApost",
956 };
957 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
958
959 for (int32_t i=0; i<DATA_length; i+=3) {
960 logln("Pattern: " + prettify(DATA[i]));
961 UParseError parseError;
962 UErrorCode status = U_ZERO_ERROR;
963 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
964 if (U_FAILURE(status)) {
965 errln("FAIL: RBT constructor");
966 } else {
967 expect(*t, DATA[i+1], DATA[i+2]);
968 }
969 delete t;
970 }
971 }
972
973 /**
974 * Test zero length and > 1 char length variable values. Test
975 * use of variable refs in UnicodeSets.
976 */
977 void TransliteratorTest::TestArbitraryVariableValues(void) {
978 // Array of 3n items
979 // Each item is <rules>, <input>, <expected output>
980 UnicodeString DATA[] = {
981 "$abe = ab;"
982 "$pat = x[yY]z;"
983 "$ll = 'a-z';"
984 "$llZ = [$ll];"
985 "$llY = [$ll$pat];"
986 "$emp = ;"
987
988 "$abe > ABE;"
989 "$pat > END;"
990 "$llZ > 1;"
991 "$llY > 2;"
992 "7$emp 8 > 9;"
993 "",
994
995 "ab xYzxyz stY78",
996 "ABE ENDEND 1129",
997 };
998 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
999
1000 for (int32_t i=0; i<DATA_length; i+=3) {
1001 logln("Pattern: " + prettify(DATA[i]));
1002 UParseError parseError;
1003 UErrorCode status = U_ZERO_ERROR;
1004 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
1005 if (U_FAILURE(status)) {
1006 errln("FAIL: RBT constructor");
1007 } else {
1008 expect(*t, DATA[i+1], DATA[i+2]);
1009 }
1010 delete t;
1011 }
1012 }
1013
1014 /**
1015 * Confirm that the contextStart, contextLimit, start, and limit
1016 * behave correctly. J474.
1017 */
1018 void TransliteratorTest::TestPositionHandling(void) {
1019 // Array of 3n items
1020 // Each item is <rules>, <input>, <expected output>
1021 const char* DATA[] = {
1022 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1023 "xtat txtb", // pos 0,9,0,9
1024 "xTTaSS TTxUUb",
1025
1026 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1027 "xtat txtb", // pos 2,9,3,8
1028 "xtaSS TTxUUb",
1029
1030 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1031 "xtat txtb", // pos 3,8,3,8
1032 "xtaTT TTxTTb",
1033 };
1034
1035 // Array of 4n positions -- these go with the DATA array
1036 // They are: contextStart, contextLimit, start, limit
1037 int32_t POS[] = {
1038 0, 9, 0, 9,
1039 2, 9, 3, 8,
1040 3, 8, 3, 8,
1041 };
1042
1043 int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3;
1044 for (int32_t i=0; i<n; i++) {
1045 UErrorCode status = U_ZERO_ERROR;
1046 UParseError parseError;
1047 Transliterator *t = Transliterator::createFromRules("<ID>",
1048 DATA[3*i], UTRANS_FORWARD, parseError, status);
1049 if (U_FAILURE(status)) {
1050 delete t;
1051 errln("FAIL: RBT constructor");
1052 return;
1053 }
1054 UTransPosition pos;
1055 pos.contextStart= POS[4*i];
1056 pos.contextLimit = POS[4*i+1];
1057 pos.start = POS[4*i+2];
1058 pos.limit = POS[4*i+3];
1059 UnicodeString rsource(DATA[3*i+1]);
1060 t->transliterate(rsource, pos, status);
1061 if (U_FAILURE(status)) {
1062 delete t;
1063 errln("FAIL: transliterate");
1064 return;
1065 }
1066 t->finishTransliteration(rsource, pos);
1067 expectAux(DATA[3*i],
1068 DATA[3*i+1],
1069 rsource,
1070 DATA[3*i+2]);
1071 delete t;
1072 }
1073 }
1074
1075 /**
1076 * Test the Hiragana-Katakana transliterator.
1077 */
1078 void TransliteratorTest::TestHiraganaKatakana(void) {
1079 UParseError parseError;
1080 UErrorCode status = U_ZERO_ERROR;
1081 Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1082 Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1083 if (hk == 0 || kh == 0) {
1084 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1085 delete hk;
1086 delete kh;
1087 return;
1088 }
1089
1090 // Array of 3n items
1091 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1092 const char* DATA[] = {
1093 "both",
1094 "\\u3042\\u3090\\u3099\\u3092\\u3050",
1095 "\\u30A2\\u30F8\\u30F2\\u30B0",
1096
1097 "kh",
1098 "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1099 "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1100 };
1101 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1102
1103 for (int32_t i=0; i<DATA_length; i+=3) {
1104 UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1105 UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1106 switch (*DATA[i]) {
1107 case 0x68: //'h': // Hiragana-Katakana
1108 expect(*hk, h, k);
1109 break;
1110 case 0x6B: //'k': // Katakana-Hiragana
1111 expect(*kh, k, h);
1112 break;
1113 case 0x62: //'b': // both
1114 expect(*hk, h, k);
1115 expect(*kh, k, h);
1116 break;
1117 }
1118 }
1119 delete hk;
1120 delete kh;
1121 }
1122
1123 /**
1124 * Test cloning / copy constructor of RBT.
1125 */
1126 void TransliteratorTest::TestCopyJ476(void) {
1127 // The real test here is what happens when the destructors are
1128 // called. So we let one object get destructed, and check to
1129 // see that its copy still works.
1130 Transliterator *t2 = 0;
1131 {
1132 UParseError parseError;
1133 UErrorCode status = U_ZERO_ERROR;
1134 Transliterator *t1 = Transliterator::createFromRules("t1",
1135 "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
1136 if (U_FAILURE(status)) {
1137 errln("FAIL: RBT constructor");
1138 return;
1139 }
1140 t2 = t1->clone(); // Call copy constructor under the covers.
1141 expect(*t1, "abcfoofoo", "ABcbar");
1142 delete t1;
1143 }
1144 expect(*t2, "abcfoofoo", "ABcbar");
1145 delete t2;
1146 }
1147
1148 /**
1149 * Test inter-Indic transliterators. These are composed.
1150 * ICU4C Jitterbug 483.
1151 */
1152 void TransliteratorTest::TestInterIndic(void) {
1153 UnicodeString ID("Devanagari-Gujarati", "");
1154 UErrorCode status = U_ZERO_ERROR;
1155 UParseError parseError;
1156 Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1157 if (dg == 0) {
1158 dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
1159 return;
1160 }
1161 UnicodeString id = dg->getID();
1162 if (id != ID) {
1163 errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1164 }
1165 UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1166 UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1167 expect(*dg, dev, guj);
1168 delete dg;
1169 }
1170
1171 /**
1172 * Test filter syntax in IDs. (J918)
1173 */
1174 void TransliteratorTest::TestFilterIDs(void) {
1175 // Array of 3n strings:
1176 // <id>, <inverse id>, <input>, <expected output>
1177 const char* DATA[] = {
1178 "[aeiou]Any-Hex", // ID
1179 "[aeiou]Hex-Any", // expected inverse ID
1180 "quizzical", // src
1181 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1182
1183 "[aeiou]Any-Hex;[^5]Hex-Any",
1184 "[^5]Any-Hex;[aeiou]Hex-Any",
1185 "quizzical",
1186 "q\\u0075izzical",
1187
1188 "[abc]Null",
1189 "[abc]Null",
1190 "xyz",
1191 "xyz",
1192 };
1193 enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) };
1194
1195 for (int i=0; i<DATA_length; i+=4) {
1196 UnicodeString ID(DATA[i], "");
1197 UnicodeString uID(DATA[i+1], "");
1198 UnicodeString data2(DATA[i+2], "");
1199 UnicodeString data3(DATA[i+3], "");
1200 UParseError parseError;
1201 UErrorCode status = U_ZERO_ERROR;
1202 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1203 if (t == 0) {
1204 errln("FAIL: createInstance(" + ID + ") returned NULL");
1205 return;
1206 }
1207 expect(*t, data2, data3);
1208
1209 // Check the ID
1210 if (ID != t->getID()) {
1211 errln("FAIL: createInstance(" + ID + ").getID() => " +
1212 t->getID());
1213 }
1214
1215 // Check the inverse
1216 Transliterator *u = t->createInverse(status);
1217 if (u == 0) {
1218 errln("FAIL: " + ID + ".createInverse() returned NULL");
1219 } else if (u->getID() != uID) {
1220 errln("FAIL: " + ID + ".createInverse().getID() => " +
1221 u->getID() + ", expected " + uID);
1222 }
1223
1224 delete t;
1225 delete u;
1226 }
1227 }
1228
1229 /**
1230 * Test the case mapping transliterators.
1231 */
1232 void TransliteratorTest::TestCaseMap(void) {
1233 UParseError parseError;
1234 UErrorCode status = U_ZERO_ERROR;
1235 Transliterator* toUpper =
1236 Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1237 Transliterator* toLower =
1238 Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1239 Transliterator* toTitle =
1240 Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1241 if (toUpper==0 || toLower==0 || toTitle==0) {
1242 errln("FAIL: createInstance returned NULL");
1243 delete toUpper;
1244 delete toLower;
1245 delete toTitle;
1246 return;
1247 }
1248
1249 expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1250 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1251 expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1252 "the quick brown foX jumped over the lazY dogs.");
1253 expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1254 "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1255
1256 delete toUpper;
1257 delete toLower;
1258 delete toTitle;
1259 }
1260
1261 /**
1262 * Test the name mapping transliterators.
1263 */
1264 void TransliteratorTest::TestNameMap(void) {
1265 UParseError parseError;
1266 UErrorCode status = U_ZERO_ERROR;
1267 Transliterator* uni2name =
1268 Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1269 Transliterator* name2uni =
1270 Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1271 if (uni2name==0 || name2uni==0) {
1272 errln("FAIL: createInstance returned NULL");
1273 delete uni2name;
1274 delete name2uni;
1275 return;
1276 }
1277
1278 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1279 expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1280 CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1281 expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1282 CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1283
1284 delete uni2name;
1285 delete name2uni;
1286
1287 // round trip
1288 Transliterator* t =
1289 Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1290 if (t==0) {
1291 errln("FAIL: createInstance returned NULL");
1292 delete t;
1293 return;
1294 }
1295
1296 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1297 UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1298 expect(*t, s, s);
1299 delete t;
1300 }
1301
1302 /**
1303 * Test liberalized ID syntax. 1006c
1304 */
1305 void TransliteratorTest::TestLiberalizedID(void) {
1306 // Some test cases have an expected getID() value of NULL. This
1307 // means I have disabled the test case for now. This stuff is
1308 // still under development, and I haven't decided whether to make
1309 // getID() return canonical case yet. It will all get rewritten
1310 // with the move to Source-Target/Variant IDs anyway. [aliu]
1311 const char* DATA[] = {
1312 "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1313 " Null ", "Null", "whitespace",
1314 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",
1315 " null ; latin-greek ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1316 };
1317 const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
1318 UParseError parseError;
1319 UErrorCode status= U_ZERO_ERROR;
1320 for (int32_t i=0; i<DATA_length; i+=3) {
1321 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1322 if (t == 0) {
1323 dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
1324 " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
1325 } else {
1326 UnicodeString exp;
1327 if (DATA[i+1]) {
1328 exp = UnicodeString(DATA[i+1], "");
1329 }
1330 // Don't worry about getID() if the expected char*
1331 // is NULL -- see above.
1332 if (exp.length() == 0 || exp == t->getID()) {
1333 logln(UnicodeString("Ok: ") + DATA[i+2] +
1334 " create ID \"" + DATA[i] + "\" => \"" +
1335 exp + "\"");
1336 } else {
1337 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1338 " create ID \"" + DATA[i] + "\" => \"" +
1339 t->getID() + "\", exp \"" + exp + "\"");
1340 }
1341 delete t;
1342 }
1343 }
1344 }
1345
1346 /* test for Jitterbug 912 */
1347 void TransliteratorTest::TestCreateInstance(){
1348 const char* FORWARD = "F";
1349 const char* REVERSE = "R";
1350 const char* DATA[] = {
1351 // Column 1: id
1352 // Column 2: direction
1353 // Column 3: expected ID, or "" if expect failure
1354 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1355
1356 // JB#2689: bad compound causes crash
1357 "InvalidSource-InvalidTarget", FORWARD, "",
1358 "InvalidSource-InvalidTarget", REVERSE, "",
1359 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1360 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1361 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1362 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1363
1364 NULL
1365 };
1366
1367 for (int32_t i=0; DATA[i]; i+=3) {
1368 UParseError err;
1369 UErrorCode ec = U_ZERO_ERROR;
1370 UnicodeString id(DATA[i]);
1371 UTransDirection dir = (DATA[i+1]==FORWARD)?
1372 UTRANS_FORWARD:UTRANS_REVERSE;
1373 UnicodeString expID(DATA[i+2]);
1374 Transliterator* t =
1375 Transliterator::createInstance(id,dir,err,ec);
1376 UnicodeString newID;
1377 if (t) {
1378 newID = t->getID();
1379 }
1380 UBool ok = (newID == expID);
1381 if (!t) {
1382 newID = u_errorName(ec);
1383 }
1384 if (ok) {
1385 logln((UnicodeString)"Ok: createInstance(" +
1386 id + "," + DATA[i+1] + ") => " + newID);
1387 } else {
1388 dataerrln((UnicodeString)"FAIL: createInstance(" +
1389 id + "," + DATA[i+1] + ") => " + newID +
1390 ", expected " + expID);
1391 }
1392 delete t;
1393 }
1394 }
1395
1396 /**
1397 * Test the normalization transliterator.
1398 */
1399 void TransliteratorTest::TestNormalizationTransliterator() {
1400 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1401 // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1402 const char* CANON[] = {
1403 // Input Decomposed Composed
1404 "cat", "cat", "cat" ,
1405 "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" ,
1406
1407 "\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above
1408 "D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above
1409
1410 "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above
1411 "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below
1412 "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above
1413
1414 "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1415 "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1416
1417 "\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave
1418 "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave
1419 "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron
1420
1421 "\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign
1422 "\\u00c5", "A\\u030a", "\\u00c5" , // A-ring
1423
1424 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0
1425 "\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0
1426
1427 "Henry IV", "Henry IV", "Henry IV" ,
1428 "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" ,
1429
1430 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1431 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1432 "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten
1433 "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten
1434 "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten
1435
1436 "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" ,
1437 0 // end
1438 };
1439
1440 const char* COMPAT[] = {
1441 // Input Decomposed Composed
1442 "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed
1443
1444 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0
1445 "\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i
1446
1447 "Henry IV", "Henry IV", "Henry IV" ,
1448 "Henry \\u2163", "Henry IV", "Henry IV" ,
1449
1450 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1451 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1452
1453 "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten
1454 0 // end
1455 };
1456
1457 int32_t i;
1458 UParseError parseError;
1459 UErrorCode status = U_ZERO_ERROR;
1460 Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1461 Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1462 if (!NFD || !NFC) {
1463 dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
1464 delete NFD;
1465 delete NFC;
1466 return;
1467 }
1468 for (i=0; CANON[i]; i+=3) {
1469 UnicodeString in = CharsToUnicodeString(CANON[i]);
1470 UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1471 UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1472 expect(*NFD, in, expd);
1473 expect(*NFC, in, expc);
1474 }
1475 delete NFD;
1476 delete NFC;
1477
1478 Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1479 Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1480 if (!NFKD || !NFKC) {
1481 errln("FAIL: createInstance failed");
1482 delete NFKD;
1483 delete NFKC;
1484 return;
1485 }
1486 for (i=0; COMPAT[i]; i+=3) {
1487 UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1488 UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1489 UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1490 expect(*NFKD, in, expkd);
1491 expect(*NFKC, in, expkc);
1492 }
1493 delete NFKD;
1494 delete NFKC;
1495
1496 UParseError pe;
1497 status = U_ZERO_ERROR;
1498 Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1499 UTRANS_FORWARD,
1500 pe, status);
1501 if (t == 0) {
1502 errln("FAIL: createInstance failed");
1503 }
1504 expect(*t, CharsToUnicodeString("\\u010dx"),
1505 CharsToUnicodeString("c\\u030C"));
1506 delete t;
1507 }
1508
1509 /**
1510 * Test compound RBT rules.
1511 */
1512 void TransliteratorTest::TestCompoundRBT(void) {
1513 // Careful with spacing and ';' here: Phrase this exactly
1514 // as toRules() is going to return it. If toRules() changes
1515 // with regard to spacing or ';', then adjust this string.
1516 UnicodeString rule("::Hex-Any;\n"
1517 "::Any-Lower;\n"
1518 "a > '.A.';\n"
1519 "b > '.B.';\n"
1520 "::[^t]Any-Upper;", "");
1521 UParseError parseError;
1522 UErrorCode status = U_ZERO_ERROR;
1523 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1524 if (t == 0) {
1525 errln("FAIL: createFromRules failed");
1526 return;
1527 }
1528 expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1529 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1530 UnicodeString r;
1531 t->toRules(r, TRUE);
1532 if (r == rule) {
1533 logln((UnicodeString)"OK: toRules() => " + r);
1534 } else {
1535 errln((UnicodeString)"FAIL: toRules() => " + r +
1536 ", expected " + rule);
1537 }
1538 delete t;
1539
1540 // Now test toRules
1541 t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1542 if (t == 0) {
1543 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1544 return;
1545 }
1546 UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1547 t->toRules(r, TRUE);
1548 if (r != exp) {
1549 errln((UnicodeString)"FAIL: toRules() => " + r +
1550 ", expected " + exp);
1551 } else {
1552 logln((UnicodeString)"OK: toRules() => " + r);
1553 }
1554 delete t;
1555
1556 // Round trip the result of toRules
1557 t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1558 if (t == 0) {
1559 errln("FAIL: createFromRules #2 failed");
1560 return;
1561 } else {
1562 logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1563 }
1564
1565 // Test toRules again
1566 t->toRules(r, TRUE);
1567 if (r != exp) {
1568 errln((UnicodeString)"FAIL: toRules() => " + r +
1569 ", expected " + exp);
1570 } else {
1571 logln((UnicodeString)"OK: toRules() => " + r);
1572 }
1573
1574 delete t;
1575
1576 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform
1577 // to what the regenerated ID will look like.
1578 UnicodeString id("Upper(Lower);(NFKC)", "");
1579 t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1580 if (t == 0) {
1581 errln("FAIL: createInstance #2 failed");
1582 return;
1583 }
1584 if (t->getID() == id) {
1585 logln((UnicodeString)"OK: created " + id);
1586 } else {
1587 errln((UnicodeString)"FAIL: createInstance(" + id +
1588 ").getID() => " + t->getID());
1589 }
1590
1591 Transliterator *u = t->createInverse(status);
1592 if (u == 0) {
1593 errln("FAIL: createInverse failed");
1594 delete t;
1595 return;
1596 }
1597 exp = "NFKC();Lower(Upper)";
1598 if (u->getID() == exp) {
1599 logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1600 u->getID());
1601 } else {
1602 errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1603 u->getID());
1604 }
1605 delete t;
1606 delete u;
1607 }
1608
1609 /**
1610 * Compound filter semantics were orginially not implemented
1611 * correctly. Originally, each component filter f(i) is replaced by
1612 * f'(i) = f(i) && g, where g is the filter for the compound
1613 * transliterator.
1614 *
1615 * From Mark:
1616 *
1617 * Suppose and I have a transliterator X. Internally X is
1618 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1619 *
1620 * The compound should convert all greek characters (through latin) to
1621 * cyrillic, then lowercase the result. The filter should say "don't
1622 * touch 'A' in the original". But because an intermediate result
1623 * happens to go through "A", the Greek Alpha gets hung up.
1624 */
1625 void TransliteratorTest::TestCompoundFilter(void) {
1626 UParseError parseError;
1627 UErrorCode status = U_ZERO_ERROR;
1628 Transliterator *t = Transliterator::createInstance
1629 ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1630 if (t == 0) {
1631 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1632 return;
1633 }
1634 t->adoptFilter(new UnicodeSet("[^A]", status));
1635 if (U_FAILURE(status)) {
1636 errln("FAIL: UnicodeSet ct failed");
1637 delete t;
1638 return;
1639 }
1640
1641 // Only the 'A' at index 1 should remain unchanged
1642 expect(*t,
1643 CharsToUnicodeString("BA\\u039A\\u0391"),
1644 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1645 delete t;
1646 }
1647
1648 void TransliteratorTest::TestRemove(void) {
1649 UParseError parseError;
1650 UErrorCode status = U_ZERO_ERROR;
1651 Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1652 if (t == 0) {
1653 errln("FAIL: createInstance failed");
1654 return;
1655 }
1656
1657 expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1658
1659 // extra test for RemoveTransliterator::clone(), which at one point wasn't
1660 // duplicating the filter
1661 Transliterator* t2 = t->clone();
1662 expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1663
1664 delete t;
1665 delete t2;
1666 }
1667
1668 void TransliteratorTest::TestToRules(void) {
1669 const char* RBT = "rbt";
1670 const char* SET = "set";
1671 static const char* DATA[] = {
1672 RBT,
1673 "$a=\\u4E61; [$a] > A;",
1674 "[\\u4E61] > A;",
1675
1676 RBT,
1677 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1678 "[[:Zs:][:Zl:]]{a} > A;",
1679
1680 SET,
1681 "[[:Zs:][:Zl:]]",
1682 "[[:Zs:][:Zl:]]",
1683
1684 SET,
1685 "[:Ps:]",
1686 "[:Ps:]",
1687
1688 SET,
1689 "[:L:]",
1690 "[:L:]",
1691
1692 SET,
1693 "[[:L:]-[A]]",
1694 "[[:L:]-[A]]",
1695
1696 SET,
1697 "[~[:Lu:][:Ll:]]",
1698 "[~[:Lu:][:Ll:]]",
1699
1700 SET,
1701 "[~[a-z]]",
1702 "[~[a-z]]",
1703
1704 RBT,
1705 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1706 "[^[:Zs:]]{a} > A;",
1707
1708 RBT,
1709 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1710 "[[a-z]-[:Zs:]]{a} > A;",
1711
1712 RBT,
1713 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1714 "[[:Zs:]&[a-z]]{a} > A;",
1715
1716 RBT,
1717 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1718 "[x[:Zs:]]{a} > A;",
1719
1720 RBT,
1721 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1722 "$macron = \\u0304 ;"
1723 "$evowel = [aeiouyAEIOUY] ;"
1724 "$iotasub = \\u0345 ;"
1725 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1726 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1727
1728 RBT,
1729 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1730 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1731 };
1732 static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1733
1734 for (int32_t d=0; d < DATA_length; d+=3) {
1735 if (DATA[d] == RBT) {
1736 // Transliterator test
1737 UParseError parseError;
1738 UErrorCode status = U_ZERO_ERROR;
1739 Transliterator *t = Transliterator::createFromRules("ID",
1740 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
1741 if (t == 0) {
1742 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
1743 return;
1744 }
1745 UnicodeString rules, escapedRules;
1746 t->toRules(rules, FALSE);
1747 t->toRules(escapedRules, TRUE);
1748 UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1749 UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
1750 if (rules == expRules) {
1751 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1752 " => " + rules);
1753 } else {
1754 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1755 " => " + rules + ", exp " + expRules);
1756 }
1757 if (escapedRules == expEscapedRules) {
1758 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1759 " => " + escapedRules);
1760 } else {
1761 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1762 " => " + escapedRules + ", exp " + expEscapedRules);
1763 }
1764 delete t;
1765
1766 } else {
1767 // UnicodeSet test
1768 UErrorCode status = U_ZERO_ERROR;
1769 UnicodeString pat(DATA[d+1], -1, US_INV);
1770 UnicodeString expToPat(DATA[d+2], -1, US_INV);
1771 UnicodeSet set(pat, status);
1772 if (U_FAILURE(status)) {
1773 errln("FAIL: UnicodeSet ct failed");
1774 return;
1775 }
1776 // Adjust spacing etc. as necessary.
1777 UnicodeString toPat;
1778 set.toPattern(toPat);
1779 if (expToPat == toPat) {
1780 logln((UnicodeString)"Ok: " + pat +
1781 " => " + toPat);
1782 } else {
1783 errln((UnicodeString)"FAIL: " + pat +
1784 " => " + prettify(toPat, TRUE) +
1785 ", exp " + prettify(pat, TRUE));
1786 }
1787 }
1788 }
1789 }
1790
1791 void TransliteratorTest::TestContext() {
1792 UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1793 expect("de > x; {d}e > y;",
1794 "de",
1795 "ye",
1796 &pos);
1797
1798 expect("ab{c} > z;",
1799 "xadabdabcy",
1800 "xadabdabzy");
1801 }
1802
1803 void TransliteratorTest::TestSupplemental() {
1804
1805 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1806 "a > $a; $s > i;"),
1807 CharsToUnicodeString("ab\\U0001030Fx"),
1808 CharsToUnicodeString("\\U00010300bix"));
1809
1810 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1811 "$b=[A-Z\\U00010400-\\U0001044D];"
1812 "($a)($b) > $2 $1;"),
1813 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1814 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1815
1816 // k|ax\\U00010300xm
1817
1818 // k|a\\U00010400\\U00010300xm
1819 // ky|\\U00010400\\U00010300xm
1820 // ky\\U00010400|\\U00010300xm
1821
1822 // ky\\U00010400|\\U00010300\\U00010400m
1823 // ky\\U00010400y|\\U00010400m
1824 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1825 "$a {x} > | @ \\U00010400;"
1826 "{$a} [^\\u0000-\\uFFFF] > y;"),
1827 CharsToUnicodeString("kax\\U00010300xm"),
1828 CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1829
1830 expectT("Any-Name",
1831 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1832 UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1833
1834 expectT("Any-Hex/Unicode",
1835 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1836 UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1837
1838 expectT("Any-Hex/C",
1839 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1840 UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1841
1842 expectT("Any-Hex/Perl",
1843 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1844 UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1845
1846 expectT("Any-Hex/Java",
1847 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1848 UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1849
1850 expectT("Any-Hex/XML",
1851 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1852 "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1853
1854 expectT("Any-Hex/XML10",
1855 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1856 "&#66352;&#1113856;&#917601;&#160;");
1857
1858 expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1859 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1860 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1861 }
1862
1863 void TransliteratorTest::TestQuantifier() {
1864
1865 // Make sure @ in a quantified anteContext works
1866 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1867 "AAAAAb",
1868 "aaa(aac)");
1869
1870 // Make sure @ in a quantified postContext works
1871 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1872 "baaaaa",
1873 "caa(aaa)");
1874
1875 // Make sure @ in a quantified postContext with seg ref works
1876 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1877 "baaaaa",
1878 "baa(aaa)");
1879
1880 // Make sure @ past ante context doesn't enter ante context
1881 UTransPosition pos = {0, 5, 3, 5};
1882 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1883 "xxxab",
1884 "xxx(ac)",
1885 &pos);
1886
1887 // Make sure @ past post context doesn't pass limit
1888 UTransPosition pos2 = {0, 4, 0, 2};
1889 expect("{b} a+ > c @@ |; x > y; a > A;",
1890 "baxx",
1891 "caxx",
1892 &pos2);
1893
1894 // Make sure @ past post context doesn't enter post context
1895 expect("{b} a+ > c @@ |; x > y; a > A;",
1896 "baxx",
1897 "cayy");
1898
1899 expect("(ab)? c > d;",
1900 "c abc ababc",
1901 "d d abd");
1902
1903 // NOTE: The (ab)+ when referenced just yields a single "ab",
1904 // not the full sequence of them. This accords with perl behavior.
1905 expect("(ab)+ {x} > '(' $1 ')';",
1906 "x abx ababxy",
1907 "x ab(ab) abab(ab)y");
1908
1909 expect("b+ > x;",
1910 "ac abc abbc abbbc",
1911 "ac axc axc axc");
1912
1913 expect("[abc]+ > x;",
1914 "qac abrc abbcs abtbbc",
1915 "qx xrx xs xtx");
1916
1917 expect("q{(ab)+} > x;",
1918 "qa qab qaba qababc qaba",
1919 "qa qx qxa qxc qxa");
1920
1921 expect("q(ab)* > x;",
1922 "qa qab qaba qababc",
1923 "xa x xa xc");
1924
1925 // NOTE: The (ab)+ when referenced just yields a single "ab",
1926 // not the full sequence of them. This accords with perl behavior.
1927 expect("q(ab)* > '(' $1 ')';",
1928 "qa qab qaba qababc",
1929 "()a (ab) (ab)a (ab)c");
1930
1931 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1932 // quoted string
1933 expect("'ab'+ > x;",
1934 "bb ab ababb",
1935 "bb x xb");
1936
1937 // $foo+ and $foo* -- the quantifier should apply to the entire
1938 // variable reference
1939 expect("$var = ab; $var+ > x;",
1940 "bb ab ababb",
1941 "bb x xb");
1942 }
1943
1944 class TestTrans : public Transliterator {
1945 public:
1946 TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
1947 }
1948 virtual Transliterator* clone(void) const {
1949 return new TestTrans(getID());
1950 }
1951 virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
1952 UBool /*isIncremental*/) const
1953 {
1954 offsets.start = offsets.limit;
1955 }
1956 virtual UClassID getDynamicClassID() const;
1957 static UClassID U_EXPORT2 getStaticClassID();
1958 };
1959 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
1960
1961 /**
1962 * Test Source-Target/Variant.
1963 */
1964 void TransliteratorTest::TestSTV(void) {
1965 int32_t ns = Transliterator::countAvailableSources();
1966 if (ns < 0 || ns > 255) {
1967 errln((UnicodeString)"FAIL: Bad source count: " + ns);
1968 return;
1969 }
1970 int32_t i, j;
1971 for (i=0; i<ns; ++i) {
1972 UnicodeString source;
1973 Transliterator::getAvailableSource(i, source);
1974 logln((UnicodeString)"" + i + ": " + source);
1975 if (source.length() == 0) {
1976 errln("FAIL: empty source");
1977 continue;
1978 }
1979 int32_t nt = Transliterator::countAvailableTargets(source);
1980 if (nt < 0 || nt > 255) {
1981 errln((UnicodeString)"FAIL: Bad target count: " + nt);
1982 continue;
1983 }
1984 for (int32_t j=0; j<nt; ++j) {
1985 UnicodeString target;
1986 Transliterator::getAvailableTarget(j, source, target);
1987 logln((UnicodeString)" " + j + ": " + target);
1988 if (target.length() == 0) {
1989 errln("FAIL: empty target");
1990 continue;
1991 }
1992 int32_t nv = Transliterator::countAvailableVariants(source, target);
1993 if (nv < 0 || nv > 255) {
1994 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1995 continue;
1996 }
1997 for (int32_t k=0; k<nv; ++k) {
1998 UnicodeString variant;
1999 Transliterator::getAvailableVariant(k, source, target, variant);
2000 if (variant.length() == 0) {
2001 logln((UnicodeString)" " + k + ": <empty>");
2002 } else {
2003 logln((UnicodeString)" " + k + ": " + variant);
2004 }
2005 }
2006 }
2007 }
2008
2009 // Test registration
2010 const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2011 const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2012 const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
2013 for (i=0; i<3; ++i) {
2014 Transliterator *t = new TestTrans(IDS[i]);
2015 if (t == 0) {
2016 errln("FAIL: out of memory");
2017 return;
2018 }
2019 if (t->getID() != IDS[i]) {
2020 errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
2021 delete t;
2022 return;
2023 }
2024 Transliterator::registerInstance(t);
2025 UErrorCode status = U_ZERO_ERROR;
2026 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2027 if (t == NULL) {
2028 errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
2029 IDS[i]);
2030 } else {
2031 logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
2032 IDS[i]);
2033 delete t;
2034 }
2035 Transliterator::unregister(IDS[i]);
2036 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2037 if (t != NULL) {
2038 errln((UnicodeString)"FAIL: Unregistration failed for ID " +
2039 IDS[i]);
2040 delete t;
2041 }
2042 }
2043
2044 // Make sure getAvailable API reflects removal
2045 int32_t n = Transliterator::countAvailableIDs();
2046 for (i=0; i<n; ++i) {
2047 UnicodeString id = Transliterator::getAvailableID(i);
2048 for (j=0; j<3; ++j) {
2049 if (id.caseCompare(FULL_IDS[j],0)==0) {
2050 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2051 }
2052 }
2053 }
2054 n = Transliterator::countAvailableTargets("Any");
2055 for (i=0; i<n; ++i) {
2056 UnicodeString t;
2057 Transliterator::getAvailableTarget(i, "Any", t);
2058 if (t.caseCompare(IDS[0],0)==0) {
2059 errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2060 }
2061 }
2062 n = Transliterator::countAvailableSources();
2063 for (i=0; i<n; ++i) {
2064 UnicodeString s;
2065 Transliterator::getAvailableSource(i, s);
2066 for (j=0; j<3; ++j) {
2067 if (SOURCES[j] == NULL) continue;
2068 if (s.caseCompare(SOURCES[j],0)==0) {
2069 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2070 }
2071 }
2072 }
2073 }
2074
2075 /**
2076 * Test inverse of Greek-Latin; Title()
2077 */
2078 void TransliteratorTest::TestCompoundInverse(void) {
2079 UParseError parseError;
2080 UErrorCode status = U_ZERO_ERROR;
2081 Transliterator *t = Transliterator::createInstance
2082 ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2083 if (t == 0) {
2084 dataerrln("FAIL: createInstance - %s", u_errorName(status));
2085 return;
2086 }
2087 UnicodeString exp("(Title);Latin-Greek");
2088 if (t->getID() == exp) {
2089 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2090 t->getID());
2091 } else {
2092 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2093 t->getID() + "\", expected \"" + exp + "\"");
2094 }
2095 delete t;
2096 }
2097
2098 /**
2099 * Test NFD chaining with RBT
2100 */
2101 void TransliteratorTest::TestNFDChainRBT() {
2102 UParseError pe;
2103 UErrorCode ec = U_ZERO_ERROR;
2104 Transliterator* t = Transliterator::createFromRules(
2105 "TEST", "::NFD; aa > Q; a > q;",
2106 UTRANS_FORWARD, pe, ec);
2107 if (t == NULL || U_FAILURE(ec)) {
2108 dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2109 return;
2110 }
2111 expect(*t, "aa", "Q");
2112 delete t;
2113
2114 // TEMPORARY TESTS -- BEING DEBUGGED
2115 //=- UnicodeString s, s2;
2116 //=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2117 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2118 //=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2119 //=- expect(*t, s, s2);
2120 //=- delete t;
2121 //=-
2122 //=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2123 //=- expect(*t, s2, s);
2124 //=- delete t;
2125 //=-
2126 //=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2127 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2128 //=- expect(*t, s, s);
2129 //=- delete t;
2130
2131 // const char* source[] = {
2132 // /*
2133 // "\\u015Br\\u012Bmad",
2134 // "bhagavadg\\u012Bt\\u0101",
2135 // "adhy\\u0101ya",
2136 // "arjuna",
2137 // "vi\\u1E63\\u0101da",
2138 // "y\\u014Dga",
2139 // "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2140 // "uv\\u0101cr\\u0325",
2141 // */
2142 // "rmk\\u1E63\\u0113t",
2143 // //"dharmak\\u1E63\\u0113tr\\u0113",
2144 // /*
2145 // "kuruk\\u1E63\\u0113tr\\u0113",
2146 // "samav\\u0113t\\u0101",
2147 // "yuyutsava-\\u1E25",
2148 // "m\\u0101mak\\u0101-\\u1E25",
2149 // // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2150 // "kimakurvata",
2151 // "san\\u0304java",
2152 // */
2153 //
2154 // 0
2155 // };
2156 // const char* expected[] = {
2157 // /*
2158 // "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2159 // "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2160 // "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2161 // "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2162 // "\\u0935\\u093f\\u0937\\u093e\\u0926",
2163 // "\\u092f\\u094b\\u0917",
2164 // "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2165 // "\\u0909\\u0935\\u093E\\u091A\\u0943",
2166 // */
2167 // "\\u0927",
2168 // //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2169 // /*
2170 // "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2171 // "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2172 // "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2173 // "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2174 // // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2175 // "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2176 // "\\u0938\\u0902\\u091c\\u0935",
2177 // */
2178 // 0
2179 // };
2180 // UErrorCode status = U_ZERO_ERROR;
2181 // UParseError parseError;
2182 // UnicodeString message;
2183 // Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2184 // Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2185 // if(U_FAILURE(status)){
2186 // errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2187 // errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2188 // delete latinToDevToLatin;
2189 // delete devToLatinToDev;
2190 // return;
2191 // }
2192 // UnicodeString gotResult;
2193 // for(int i= 0; source[i] != 0; i++){
2194 // gotResult = source[i];
2195 // expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2196 // expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2197 // }
2198 // delete latinToDevToLatin;
2199 // delete devToLatinToDev;
2200 }
2201
2202 /**
2203 * Inverse of "Null" should be "Null". (J21)
2204 */
2205 void TransliteratorTest::TestNullInverse() {
2206 UParseError pe;
2207 UErrorCode ec = U_ZERO_ERROR;
2208 Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2209 if (t == 0 || U_FAILURE(ec)) {
2210 errln("FAIL: createInstance");
2211 return;
2212 }
2213 Transliterator *u = t->createInverse(ec);
2214 if (u == 0 || U_FAILURE(ec)) {
2215 errln("FAIL: createInverse");
2216 delete t;
2217 return;
2218 }
2219 if (u->getID() != "Null") {
2220 errln("FAIL: Inverse of Null should be Null");
2221 }
2222 delete t;
2223 delete u;
2224 }
2225
2226 /**
2227 * Check ID of inverse of alias. (J22)
2228 */
2229 void TransliteratorTest::TestAliasInverseID() {
2230 UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2231 UParseError pe;
2232 UErrorCode ec = U_ZERO_ERROR;
2233 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2234 if (t == 0 || U_FAILURE(ec)) {
2235 dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2236 return;
2237 }
2238 Transliterator *u = t->createInverse(ec);
2239 if (u == 0 || U_FAILURE(ec)) {
2240 errln("FAIL: createInverse");
2241 delete t;
2242 return;
2243 }
2244 UnicodeString exp = "Hangul-Latin";
2245 UnicodeString got = u->getID();
2246 if (got != exp) {
2247 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2248 ", expected " + exp);
2249 }
2250 delete t;
2251 delete u;
2252 }
2253
2254 /**
2255 * Test IDs of inverses of compound transliterators. (J20)
2256 */
2257 void TransliteratorTest::TestCompoundInverseID() {
2258 UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2259 UParseError pe;
2260 UErrorCode ec = U_ZERO_ERROR;
2261 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2262 if (t == 0 || U_FAILURE(ec)) {
2263 dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2264 return;
2265 }
2266 Transliterator *u = t->createInverse(ec);
2267 if (u == 0 || U_FAILURE(ec)) {
2268 errln("FAIL: createInverse");
2269 delete t;
2270 return;
2271 }
2272 UnicodeString exp = "NFD(NFC);Jamo-Latin";
2273 UnicodeString got = u->getID();
2274 if (got != exp) {
2275 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2276 ", expected " + exp);
2277 }
2278 delete t;
2279 delete u;
2280 }
2281
2282 /**
2283 * Test undefined variable.
2284
2285 */
2286 void TransliteratorTest::TestUndefinedVariable() {
2287 UnicodeString rule = "$initial } a <> \\u1161;";
2288 UParseError pe;
2289 UErrorCode ec = U_ZERO_ERROR;
2290 Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
2291 delete t;
2292 if (U_FAILURE(ec)) {
2293 logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2294 u_errorName(ec));
2295 return;
2296 }
2297 errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2298 u_errorName(ec));
2299 }
2300
2301 /**
2302 * Test empty context.
2303 */
2304 void TransliteratorTest::TestEmptyContext() {
2305 expect(" { a } > b;", "xay a ", "xby b ");
2306 }
2307
2308 /**
2309 * Test compound filter ID syntax
2310 */
2311 void TransliteratorTest::TestCompoundFilterID(void) {
2312 static const char* DATA[] = {
2313 // Col. 1 = ID or rule set (latter must start with #)
2314
2315 // = columns > 1 are null if expect col. 1 to be illegal =
2316
2317 // Col. 2 = direction, "F..." or "R..."
2318 // Col. 3 = source string
2319 // Col. 4 = exp result
2320
2321 "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2322 "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2323 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2324 "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2325 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2326 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2327 NULL,
2328 };
2329
2330 for (int32_t i=0; DATA[i]; i+=4) {
2331 UnicodeString id = CharsToUnicodeString(DATA[i]);
2332 UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2333 UTRANS_REVERSE : UTRANS_FORWARD;
2334 UnicodeString source;
2335 UnicodeString exp;
2336 if (DATA[i+2] != NULL) {
2337 source = CharsToUnicodeString(DATA[i+2]);
2338 exp = CharsToUnicodeString(DATA[i+3]);
2339 }
2340 UBool expOk = (DATA[i+1] != NULL);
2341 Transliterator* t = NULL;
2342 UParseError pe;
2343 UErrorCode ec = U_ZERO_ERROR;
2344 if (id.charAt(0) == 0x23/*#*/) {
2345 t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2346 } else {
2347 t = Transliterator::createInstance(id, direction, pe, ec);
2348 }
2349 UBool ok = (t != NULL && U_SUCCESS(ec));
2350 UnicodeString transID;
2351 if (t!=0) {
2352 transID = t->getID();
2353 }
2354 else {
2355 transID = UnicodeString("NULL", "");
2356 }
2357 if (ok == expOk) {
2358 logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2359 u_errorName(ec));
2360 if (source.length() != 0) {
2361 expect(*t, source, exp);
2362 }
2363 delete t;
2364 } else {
2365 dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2366 u_errorName(ec));
2367 }
2368 }
2369 }
2370
2371 /**
2372 * Test new property set syntax
2373 */
2374 void TransliteratorTest::TestPropertySet() {
2375 expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2376 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2377 "[ a stitch ]\n[ in time ]\r[ saves 9]");
2378 }
2379
2380 /**
2381 * Test various failure points of the new 2.0 engine.
2382 */
2383 void TransliteratorTest::TestNewEngine() {
2384 UParseError pe;
2385 UErrorCode ec = U_ZERO_ERROR;
2386 Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2387 if (t == 0 || U_FAILURE(ec)) {
2388 dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
2389 return;
2390 }
2391 // Katakana should be untouched
2392 expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2393 CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2394
2395 delete t;
2396
2397 #if 1
2398 // This test will only work if Transliterator.ROLLBACK is
2399 // true. Otherwise, this test will fail, revealing a
2400 // limitation of global filters in incremental mode.
2401 Transliterator *a =
2402 Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2403 Transliterator *A =
2404 Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2405 if (U_FAILURE(ec)) {
2406 delete a;
2407 delete A;
2408 return;
2409 }
2410
2411 Transliterator* array[3];
2412 array[0] = a;
2413 array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2414 array[2] = A;
2415 if (U_FAILURE(ec)) {
2416 errln("FAIL: createInstance NFD");
2417 delete a;
2418 delete A;
2419 delete array[1];
2420 return;
2421 }
2422
2423 t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2424 if (U_FAILURE(ec)) {
2425 errln("FAIL: UnicodeSet constructor");
2426 delete a;
2427 delete A;
2428 delete array[1];
2429 delete t;
2430 return;
2431 }
2432
2433 expect(*t, "aAaA", "bAbA");
2434
2435 assertTrue("countElements", t->countElements() == 3);
2436 assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2437 assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2438 assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2439 assertSuccess("getElement", ec);
2440
2441 delete a;
2442 delete A;
2443 delete array[1];
2444 delete t;
2445 #endif
2446
2447 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2448 "a",
2449 "ax");
2450
2451 UnicodeString gr = CharsToUnicodeString(
2452 "$ddot = \\u0308 ;"
2453 "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2454 "$rough = \\u0314 ;"
2455 "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2456 "\\u03b1 <> a ;"
2457 "$rough <> h ;");
2458
2459 expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2460 }
2461
2462 /**
2463 * Test quantified segment behavior. We want:
2464 * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2465 */
2466 void TransliteratorTest::TestQuantifiedSegment(void) {
2467 // The normal case
2468 expect("([abc]+) > x $1 x;", "cba", "xcbax");
2469
2470 // The tricky case; the quantifier is around the segment
2471 expect("([abc])+ > x $1 x;", "cba", "xax");
2472
2473 // Tricky case in reverse direction
2474 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2475
2476 // Check post-context segment
2477 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2478
2479 // Test toRule/toPattern for non-quantified segment.
2480 // Careful with spacing here.
2481 UnicodeString r("([a-c]){q} > x $1 x;");
2482 UParseError pe;
2483 UErrorCode ec = U_ZERO_ERROR;
2484 Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2485 if (U_FAILURE(ec)) {
2486 errln("FAIL: createFromRules");
2487 delete t;
2488 return;
2489 }
2490 UnicodeString rr;
2491 t->toRules(rr, TRUE);
2492 if (r != rr) {
2493 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2494 } else {
2495 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2496 }
2497 delete t;
2498
2499 // Test toRule/toPattern for quantified segment.
2500 // Careful with spacing here.
2501 r = "([a-c])+{q} > x $1 x;";
2502 t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2503 if (U_FAILURE(ec)) {
2504 errln("FAIL: createFromRules");
2505 delete t;
2506 return;
2507 }
2508 t->toRules(rr, TRUE);
2509 if (r != rr) {
2510 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2511 } else {
2512 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2513 }
2514 delete t;
2515 }
2516
2517 //======================================================================
2518 // Ram's tests
2519 //======================================================================
2520 void TransliteratorTest::TestDevanagariLatinRT(){
2521 const int MAX_LEN= 52;
2522 const char* const source[MAX_LEN] = {
2523 "bh\\u0101rata",
2524 "kra",
2525 "k\\u1E63a",
2526 "khra",
2527 "gra",
2528 "\\u1E45ra",
2529 "cra",
2530 "chra",
2531 "j\\u00F1a",
2532 "jhra",
2533 "\\u00F1ra",
2534 "\\u1E6Dya",
2535 "\\u1E6Dhra",
2536 "\\u1E0Dya",
2537 //"r\\u0323ya", // \u095c is not valid in Devanagari
2538 "\\u1E0Dhya",
2539 "\\u1E5Bhra",
2540 "\\u1E47ra",
2541 "tta",
2542 "thra",
2543 "dda",
2544 "dhra",
2545 "nna",
2546 "pra",
2547 "phra",
2548 "bra",
2549 "bhra",
2550 "mra",
2551 "\\u1E49ra",
2552 //"l\\u0331ra",
2553 "yra",
2554 "\\u1E8Fra",
2555 //"l-",
2556 "vra",
2557 "\\u015Bra",
2558 "\\u1E63ra",
2559 "sra",
2560 "hma",
2561 "\\u1E6D\\u1E6Da",
2562 "\\u1E6D\\u1E6Dha",
2563 "\\u1E6Dh\\u1E6Dha",
2564 "\\u1E0D\\u1E0Da",
2565 "\\u1E0D\\u1E0Dha",
2566 "\\u1E6Dya",
2567 "\\u1E6Dhya",
2568 "\\u1E0Dya",
2569 "\\u1E0Dhya",
2570 // Not roundtrippable --
2571 // \\u0939\\u094d\\u094d\\u092E - hma
2572 // \\u0939\\u094d\\u092E - hma
2573 // CharsToUnicodeString("hma"),
2574 "hya",
2575 "\\u015Br\\u0325",
2576 "\\u015Bca",
2577 "\\u0115",
2578 "san\\u0304j\\u012Bb s\\u0113nagupta",
2579 "\\u0101nand vaddir\\u0101ju",
2580 "\\u0101",
2581 "a"
2582 };
2583 const char* const expected[MAX_LEN] = {
2584 "\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */
2585 "\\u0915\\u094D\\u0930", /* kra */
2586 "\\u0915\\u094D\\u0937", /* ks\\u0323a */
2587 "\\u0916\\u094D\\u0930", /* khra */
2588 "\\u0917\\u094D\\u0930", /* gra */
2589 "\\u0919\\u094D\\u0930", /* n\\u0307ra */
2590 "\\u091A\\u094D\\u0930", /* cra */
2591 "\\u091B\\u094D\\u0930", /* chra */
2592 "\\u091C\\u094D\\u091E", /* jn\\u0303a */
2593 "\\u091D\\u094D\\u0930", /* jhra */
2594 "\\u091E\\u094D\\u0930", /* n\\u0303ra */
2595 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2596 "\\u0920\\u094D\\u0930", /* t\\u0323hra */
2597 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2598 //"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari
2599 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2600 "\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */
2601 "\\u0923\\u094D\\u0930", /* n\\u0323ra */
2602 "\\u0924\\u094D\\u0924", /* tta */
2603 "\\u0925\\u094D\\u0930", /* thra */
2604 "\\u0926\\u094D\\u0926", /* dda */
2605 "\\u0927\\u094D\\u0930", /* dhra */
2606 "\\u0928\\u094D\\u0928", /* nna */
2607 "\\u092A\\u094D\\u0930", /* pra */
2608 "\\u092B\\u094D\\u0930", /* phra */
2609 "\\u092C\\u094D\\u0930", /* bra */
2610 "\\u092D\\u094D\\u0930", /* bhra */
2611 "\\u092E\\u094D\\u0930", /* mra */
2612 "\\u0929\\u094D\\u0930", /* n\\u0331ra */
2613 //"\\u0934\\u094D\\u0930", /* l\\u0331ra */
2614 "\\u092F\\u094D\\u0930", /* yra */
2615 "\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */
2616 //"l-",
2617 "\\u0935\\u094D\\u0930", /* vra */
2618 "\\u0936\\u094D\\u0930", /* s\\u0301ra */
2619 "\\u0937\\u094D\\u0930", /* s\\u0323ra */
2620 "\\u0938\\u094D\\u0930", /* sra */
2621 "\\u0939\\u094d\\u092E", /* hma */
2622 "\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */
2623 "\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */
2624 "\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/
2625 "\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */
2626 "\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */
2627 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2628 "\\u0920\\u094D\\u092F", /* t\\u0323hya */
2629 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2630 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2631 // "hma", /* hma */
2632 "\\u0939\\u094D\\u092F", /* hya */
2633 "\\u0936\\u0943", /* s\\u0301r\\u0325a */
2634 "\\u0936\\u094D\\u091A", /* s\\u0301ca */
2635 "\\u090d", /* e\\u0306 */
2636 "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2637 "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2638 "\\u0906",
2639 "\\u0905",
2640 };
2641 UErrorCode status = U_ZERO_ERROR;
2642 UParseError parseError;
2643 UnicodeString message;
2644 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2645 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2646 if(U_FAILURE(status)){
2647 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2648 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2649 return;
2650 }
2651 UnicodeString gotResult;
2652 for(int i= 0; i<MAX_LEN; i++){
2653 gotResult = source[i];
2654 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2655 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2656 }
2657 delete latinToDev;
2658 delete devToLatin;
2659 }
2660
2661 void TransliteratorTest::TestTeluguLatinRT(){
2662 const int MAX_LEN=10;
2663 const char* const source[MAX_LEN] = {
2664 "raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */
2665 "\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */
2666 "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */
2667 "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */
2668 "san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */
2669 "amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */
2670 "ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */
2671 "\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */
2672 "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */
2673 "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */
2674 };
2675
2676 const char* const expected[MAX_LEN] = {
2677 "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2678 "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2679 "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2680 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2681 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2682 "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2683 "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2684 "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2685 "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2686 "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2687 };
2688
2689 UErrorCode status = U_ZERO_ERROR;
2690 UParseError parseError;
2691 UnicodeString message;
2692 Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2693 Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2694 if(U_FAILURE(status)){
2695 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2696 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2697 return;
2698 }
2699 UnicodeString gotResult;
2700 for(int i= 0; i<MAX_LEN; i++){
2701 gotResult = source[i];
2702 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2703 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2704 }
2705 delete latinToDev;
2706 delete devToLatin;
2707 }
2708
2709 void TransliteratorTest::TestSanskritLatinRT(){
2710 const int MAX_LEN =16;
2711 const char* const source[MAX_LEN] = {
2712 "rmk\\u1E63\\u0113t",
2713 "\\u015Br\\u012Bmad",
2714 "bhagavadg\\u012Bt\\u0101",
2715 "adhy\\u0101ya",
2716 "arjuna",
2717 "vi\\u1E63\\u0101da",
2718 "y\\u014Dga",
2719 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2720 "uv\\u0101cr\\u0325",
2721 "dharmak\\u1E63\\u0113tr\\u0113",
2722 "kuruk\\u1E63\\u0113tr\\u0113",
2723 "samav\\u0113t\\u0101",
2724 "yuyutsava\\u1E25",
2725 "m\\u0101mak\\u0101\\u1E25",
2726 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2727 "kimakurvata",
2728 "san\\u0304java",
2729 };
2730 const char* const expected[MAX_LEN] = {
2731 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2732 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2733 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2734 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2735 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2736 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2737 "\\u092f\\u094b\\u0917",
2738 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2739 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2740 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2741 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2742 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2743 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2744 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2745 //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2746 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2747 "\\u0938\\u0902\\u091c\\u0935",
2748 };
2749 UErrorCode status = U_ZERO_ERROR;
2750 UParseError parseError;
2751 UnicodeString message;
2752 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2753 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2754 if(U_FAILURE(status)){
2755 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2756 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2757 return;
2758 }
2759 UnicodeString gotResult;
2760 for(int i= 0; i<MAX_LEN; i++){
2761 gotResult = source[i];
2762 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2763 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2764 }
2765 delete latinToDev;
2766 delete devToLatin;
2767 }
2768
2769
2770 void TransliteratorTest::TestCompoundLatinRT(){
2771 const char* const source[] = {
2772 "rmk\\u1E63\\u0113t",
2773 "\\u015Br\\u012Bmad",
2774 "bhagavadg\\u012Bt\\u0101",
2775 "adhy\\u0101ya",
2776 "arjuna",
2777 "vi\\u1E63\\u0101da",
2778 "y\\u014Dga",
2779 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2780 "uv\\u0101cr\\u0325",
2781 "dharmak\\u1E63\\u0113tr\\u0113",
2782 "kuruk\\u1E63\\u0113tr\\u0113",
2783 "samav\\u0113t\\u0101",
2784 "yuyutsava\\u1E25",
2785 "m\\u0101mak\\u0101\\u1E25",
2786 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2787 "kimakurvata",
2788 "san\\u0304java"
2789 };
2790 const int MAX_LEN = sizeof(source)/sizeof(source[0]);
2791 const char* const expected[MAX_LEN] = {
2792 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2793 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2794 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2795 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2796 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2797 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2798 "\\u092f\\u094b\\u0917",
2799 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2800 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2801 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2802 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2803 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2804 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2805 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2806 // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2807 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2808 "\\u0938\\u0902\\u091c\\u0935"
2809 };
2810 if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) {
2811 errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2812 return;
2813 }
2814
2815 UErrorCode status = U_ZERO_ERROR;
2816 UParseError parseError;
2817 UnicodeString message;
2818 Transliterator* devToLatinToDev =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2819 Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2820 Transliterator* devToTelToDev =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2821 Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2822
2823 if(U_FAILURE(status)){
2824 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2825 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2826 return;
2827 }
2828 UnicodeString gotResult;
2829 for(int i= 0; i<MAX_LEN; i++){
2830 gotResult = source[i];
2831 expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2832 expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2833 expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2834
2835 }
2836 delete(latinToDevToLatin);
2837 delete(devToLatinToDev);
2838 delete(devToTelToDev);
2839 delete(latinToTelToLatin);
2840 }
2841
2842 /**
2843 * Test Gurmukhi-Devanagari Tippi and Bindi
2844 */
2845 void TransliteratorTest::TestGurmukhiDevanagari(){
2846 // the rule says:
2847 // (\u0902) (when preceded by vowel) ---> (\u0A02)
2848 // (\u0902) (when preceded by consonant) ---> (\u0A70)
2849 UErrorCode status = U_ZERO_ERROR;
2850 UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
2851 UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
2852 UParseError parseError;
2853
2854 UnicodeSetIterator vIter(vowel);
2855 UnicodeSetIterator nvIter(non_vowel);
2856 Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2857 if(U_FAILURE(status)) {
2858 dataerrln("Error creating transliterator %s", u_errorName(status));
2859 delete trans;
2860 return;
2861 }
2862 UnicodeString src (" \\u0902", -1, US_INV);
2863 UnicodeString expected(" \\u0A02", -1, US_INV);
2864 src = src.unescape();
2865 expected= expected.unescape();
2866
2867 while(vIter.next()){
2868 src.setCharAt(0,(UChar) vIter.getCodepoint());
2869 expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2870 expect(*trans,src,expected);
2871 }
2872
2873 expected.setCharAt(1,0x0A70);
2874 while(nvIter.next()){
2875 //src.setCharAt(0,(char) nvIter.codepoint);
2876 src.setCharAt(0,(UChar)nvIter.getCodepoint());
2877 expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2878 expect(*trans,src,expected);
2879 }
2880 delete trans;
2881 }
2882 /**
2883 * Test instantiation from a locale.
2884 */
2885 void TransliteratorTest::TestLocaleInstantiation(void) {
2886 UParseError pe;
2887 UErrorCode ec = U_ZERO_ERROR;
2888 Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2889 if (U_FAILURE(ec)) {
2890 dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
2891 delete t;
2892 return;
2893 }
2894 expect(*t, CharsToUnicodeString("\\u0430"), "a");
2895 delete t;
2896
2897 t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2898 if (U_FAILURE(ec)) {
2899 errln("FAIL: createInstance(en-el)");
2900 delete t;
2901 return;
2902 }
2903 expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2904 delete t;
2905 }
2906
2907 /**
2908 * Test title case handling of accent (should ignore accents)
2909 */
2910 void TransliteratorTest::TestTitleAccents(void) {
2911 UParseError pe;
2912 UErrorCode ec = U_ZERO_ERROR;
2913 Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2914 if (U_FAILURE(ec)) {
2915 errln("FAIL: createInstance(Title)");
2916 delete t;
2917 return;
2918 }
2919 expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2920 delete t;
2921 }
2922
2923 /**
2924 * Basic test of a locale resource based rule.
2925 */
2926 void TransliteratorTest::TestLocaleResource() {
2927 const char* DATA[] = {
2928 // id from to
2929 //"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0",
2930 "Latin-el", "b", "\\u03bc\\u03c0",
2931 "Latin-Greek", "b", "\\u03B2",
2932 "Greek-Latin/UNGEGN", "\\u03B2", "v",
2933 "el-Latin", "\\u03B2", "v",
2934 "Greek-Latin", "\\u03B2", "b",
2935 };
2936 const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
2937 for (int32_t i=0; i<DATA_length; i+=3) {
2938 UParseError pe;
2939 UErrorCode ec = U_ZERO_ERROR;
2940 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2941 if (U_FAILURE(ec)) {
2942 dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
2943 delete t;
2944 continue;
2945 }
2946 expect(*t, CharsToUnicodeString(DATA[i+1]),
2947 CharsToUnicodeString(DATA[i+2]));
2948 delete t;
2949 }
2950 }
2951
2952 /**
2953 * Make sure parse errors reference the right line.
2954 */
2955 void TransliteratorTest::TestParseError() {
2956 static const char* rule =
2957 "a > b;\n"
2958 "# more stuff\n"
2959 "d << b;";
2960 UErrorCode ec = U_ZERO_ERROR;
2961 UParseError pe;
2962 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2963 delete t;
2964 if (U_FAILURE(ec)) {
2965 UnicodeString err(pe.preContext);
2966 err.append((UChar)124/*|*/).append(pe.postContext);
2967 if (err.indexOf("d << b") >= 0) {
2968 logln("Ok: " + err);
2969 } else {
2970 errln("FAIL: " + err);
2971 }
2972 }
2973 else {
2974 errln("FAIL: no syntax error");
2975 }
2976 static const char* maskingRule =
2977 "a>x;\n"
2978 "# more stuff\n"
2979 "ab>y;";
2980 ec = U_ZERO_ERROR;
2981 delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
2982 if (ec != U_RULE_MASK_ERROR) {
2983 errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
2984 }
2985 else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
2986 errln("FAIL: did not get expected precontext");
2987 }
2988 else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
2989 errln("FAIL: did not get expected postcontext");
2990 }
2991 }
2992
2993 /**
2994 * Make sure sets on output are disallowed.
2995 */
2996 void TransliteratorTest::TestOutputSet() {
2997 UnicodeString rule = "$set = [a-cm-n]; b > $set;";
2998 UErrorCode ec = U_ZERO_ERROR;
2999 UParseError pe;
3000 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3001 delete t;
3002 if (U_FAILURE(ec)) {
3003 UnicodeString err(pe.preContext);
3004 err.append((UChar)124/*|*/).append(pe.postContext);
3005 logln("Ok: " + err);
3006 return;
3007 }
3008 errln("FAIL: No syntax error");
3009 }
3010
3011 /**
3012 * Test the use variable range pragma, making sure that use of
3013 * variable range characters is detected and flagged as an error.
3014 */
3015 void TransliteratorTest::TestVariableRange() {
3016 UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3017 UErrorCode ec = U_ZERO_ERROR;
3018 UParseError pe;
3019 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3020 delete t;
3021 if (U_FAILURE(ec)) {
3022 UnicodeString err(pe.preContext);
3023 err.append((UChar)124/*|*/).append(pe.postContext);
3024 logln("Ok: " + err);
3025 return;
3026 }
3027 errln("FAIL: No syntax error");
3028 }
3029
3030 /**
3031 * Test invalid post context error handling
3032 */
3033 void TransliteratorTest::TestInvalidPostContext() {
3034 UnicodeString rule = "a}b{c>d;";
3035 UErrorCode ec = U_ZERO_ERROR;
3036 UParseError pe;
3037 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3038 delete t;
3039 if (U_FAILURE(ec)) {
3040 UnicodeString err(pe.preContext);
3041 err.append((UChar)124/*|*/).append(pe.postContext);
3042 if (err.indexOf("a}b{c") >= 0) {
3043 logln("Ok: " + err);
3044 } else {
3045 errln("FAIL: " + err);
3046 }
3047 return;
3048 }
3049 errln("FAIL: No syntax error");
3050 }
3051
3052 /**
3053 * Test ID form variants
3054 */
3055 void TransliteratorTest::TestIDForms() {
3056 const char* DATA[] = {
3057 "NFC", NULL, "NFD",
3058 "nfd", NULL, "NFC", // make sure case is ignored
3059 "Any-NFKD", NULL, "Any-NFKC",
3060 "Null", NULL, "Null",
3061 "-nfkc", "nfkc", "NFKD",
3062 "-nfkc/", "nfkc", "NFKD",
3063 "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3064 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3065 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3066 "Source-", NULL, NULL,
3067 "Source/Variant-", NULL, NULL,
3068 "Source-/Variant", NULL, NULL,
3069 "/Variant", NULL, NULL,
3070 "/Variant-", NULL, NULL,
3071 "-/Variant", NULL, NULL,
3072 "-/", NULL, NULL,
3073 "-", NULL, NULL,
3074 "/", NULL, NULL,
3075 };
3076 const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
3077
3078 for (int32_t i=0; i<DATA_length; i+=3) {
3079 const char* ID = DATA[i];
3080 const char* expID = DATA[i+1];
3081 const char* expInvID = DATA[i+2];
3082 UBool expValid = (expInvID != NULL);
3083 if (expID == NULL) {
3084 expID = ID;
3085 }
3086 UParseError pe;
3087 UErrorCode ec = U_ZERO_ERROR;
3088 Transliterator *t =
3089 Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3090 if (U_FAILURE(ec)) {
3091 if (!expValid) {
3092 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3093 } else {
3094 dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
3095 }
3096 delete t;
3097 continue;
3098 }
3099 Transliterator *u = t->createInverse(ec);
3100 if (U_FAILURE(ec)) {
3101 errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3102 delete t;
3103 delete u;
3104 continue;
3105 }
3106 if (t->getID() == expID &&
3107 u->getID() == expInvID) {
3108 logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3109 } else {
3110 errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3111 t->getID() + " x getInverse() => " + u->getID() +
3112 ", expected " + expInvID);
3113 }
3114 delete t;
3115 delete u;
3116 }
3117 }
3118
3119 static const UChar SPACE[] = {32,0};
3120 static const UChar NEWLINE[] = {10,0};
3121 static const UChar RETURN[] = {13,0};
3122 static const UChar EMPTY[] = {0};
3123
3124 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3125 const UnicodeString& testRulesForward) {
3126 UnicodeString rules2; t2.toRules(rules2, TRUE);
3127 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3128 rules2.findAndReplace(SPACE, EMPTY);
3129 rules2.findAndReplace(NEWLINE, EMPTY);
3130 rules2.findAndReplace(RETURN, EMPTY);
3131
3132 UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3133
3134 if (rules2 != testRules) {
3135 errln(label);
3136 logln((UnicodeString)"GENERATED RULES: " + rules2);
3137 logln((UnicodeString)"SHOULD BE: " + testRulesForward);
3138 }
3139 }
3140
3141 /**
3142 * Mark's toRules test.
3143 */
3144 void TransliteratorTest::TestToRulesMark() {
3145 const char* testRules =
3146 "::[[:Latin:][:Mark:]];"
3147 "::NFKD (NFC);"
3148 "::Lower (Lower);"
3149 "a <> \\u03B1;" // alpha
3150 "::NFKC (NFD);"
3151 "::Upper (Lower);"
3152 "::Lower ();"
3153 "::([[:Greek:][:Mark:]]);"
3154 ;
3155 const char* testRulesForward =
3156 "::[[:Latin:][:Mark:]];"
3157 "::NFKD(NFC);"
3158 "::Lower(Lower);"
3159 "a > \\u03B1;"
3160 "::NFKC(NFD);"
3161 "::Upper (Lower);"
3162 "::Lower ();"
3163 ;
3164 const char* testRulesBackward =
3165 "::[[:Greek:][:Mark:]];"
3166 "::Lower (Upper);"
3167 "::NFD(NFKC);"
3168 "\\u03B1 > a;"
3169 "::Lower(Lower);"
3170 "::NFC(NFKD);"
3171 ;
3172 UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3173 UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3174
3175 UParseError pe;
3176 UErrorCode ec = U_ZERO_ERROR;
3177 Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
3178 Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
3179
3180 if (U_FAILURE(ec)) {
3181 delete t2;
3182 delete t3;
3183 dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3184 return;
3185 }
3186
3187 expect(*t2, source, target);
3188 expect(*t3, target, source);
3189
3190 checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
3191 checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
3192
3193 delete t2;
3194 delete t3;
3195 }
3196
3197 /**
3198 * Test Escape and Unescape transliterators.
3199 */
3200 void TransliteratorTest::TestEscape() {
3201 UParseError pe;
3202 UErrorCode ec;
3203 Transliterator *t;
3204
3205 ec = U_ZERO_ERROR;
3206 t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3207 if (U_FAILURE(ec)) {
3208 errln((UnicodeString)"FAIL: createInstance");
3209 } else {
3210 expect(*t,
3211 UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
3212 "@12Q");
3213 }
3214 delete t;
3215
3216 ec = U_ZERO_ERROR;
3217 t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3218 if (U_FAILURE(ec)) {
3219 errln((UnicodeString)"FAIL: createInstance");
3220 } else {
3221 expect(*t,
3222 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3223 UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3224 }
3225 delete t;
3226
3227 ec = U_ZERO_ERROR;
3228 t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3229 if (U_FAILURE(ec)) {
3230 errln((UnicodeString)"FAIL: createInstance");
3231 } else {
3232 expect(*t,
3233 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3234 UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3235 }
3236 delete t;
3237
3238 ec = U_ZERO_ERROR;
3239 t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3240 if (U_FAILURE(ec)) {
3241 errln((UnicodeString)"FAIL: createInstance");
3242 } else {
3243 expect(*t,
3244 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3245 UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3246 }
3247 delete t;
3248 }
3249
3250
3251 void TransliteratorTest::TestAnchorMasking(){
3252 UnicodeString rule ("^a > Q; a > q;");
3253 UErrorCode status= U_ZERO_ERROR;
3254 UParseError parseError;
3255
3256 Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3257 if(U_FAILURE(status)){
3258 errln(UnicodeString("FAIL: ") + "ID" +
3259 ".createFromRules() => bad rules" +
3260 /*", parse error " + parseError.code +*/
3261 ", line " + parseError.line +
3262 ", offset " + parseError.offset +
3263 ", context " + prettify(parseError.preContext, TRUE) +
3264 ", rules: " + prettify(rule, TRUE));
3265 }
3266 delete t;
3267 }
3268
3269 /**
3270 * Make sure display names of variants look reasonable.
3271 */
3272 void TransliteratorTest::TestDisplayName() {
3273 #if UCONFIG_NO_FORMATTING
3274 logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3275 return;
3276 #else
3277 static const char* DATA[] = {
3278 // ID, forward name, reverse name
3279 // Update the text as necessary -- the important thing is
3280 // not the text itself, but how various cases are handled.
3281
3282 // Basic test
3283 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3284
3285 // Variants
3286 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3287
3288 // Target-only IDs
3289 "NFC", "Any to NFC", "Any to NFD",
3290 };
3291
3292 int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
3293
3294 Locale US("en", "US");
3295
3296 for (int32_t i=0; i<DATA_length; i+=3) {
3297 UnicodeString name;
3298 Transliterator::getDisplayName(DATA[i], US, name);
3299 if (name != DATA[i+1]) {
3300 dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3301 name + ", expected " + DATA[i+1]);
3302 } else {
3303 logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3304 }
3305 UErrorCode ec = U_ZERO_ERROR;
3306 UParseError pe;
3307 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3308 if (U_FAILURE(ec)) {
3309 delete t;
3310 dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
3311 continue;
3312 }
3313 name = Transliterator::getDisplayName(t->getID(), US, name);
3314 if (name != DATA[i+2]) {
3315 dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3316 name + ", expected " + DATA[i+2]);
3317 } else {
3318 logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3319 }
3320 delete t;
3321 }
3322 #endif
3323 }
3324
3325 void TransliteratorTest::TestSpecialCases(void) {
3326 const UnicodeString registerRules[] = {
3327 "Any-Dev1", "x > X; y > Y;",
3328 "Any-Dev2", "XY > Z",
3329 "Greek-Latin/FAKE",
3330 CharsToUnicodeString
3331 ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3332 "" // END MARKER
3333 };
3334
3335 const UnicodeString testCases[] = {
3336 // NORMALIZATION
3337 // should add more test cases
3338 "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3339 "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3340 "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3341 "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3342
3343 // mp -> b BUG
3344 "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3345 "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3346
3347 // check for devanagari bug
3348 "nfd;Dev1;Dev2;nfc", "xy", "Z",
3349
3350 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3351 "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3352 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3353
3354 //TODO: enable this test once Titlecase works right
3355 /*
3356 "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3357 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3358 */
3359 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3360 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3361 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3362 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3363
3364 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3365 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3366
3367 // FORMS OF S
3368 "Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3369 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3370 "Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3371 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3372 "Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3373 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3374 "Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3375 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3376 // Tatiana bug
3377 // Upper: TAT\\u02B9\\u00C2NA
3378 // Lower: tat\\u02B9\\u00E2na
3379 // Title: Tat\\u02B9\\u00E2na
3380 "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3381 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3382 "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3383 CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3384 "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3385 CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3386
3387 "" // END MARKER
3388 };
3389
3390 UParseError pos;
3391 int32_t i;
3392 for (i = 0; registerRules[i].length()!=0; i+=2) {
3393 UErrorCode status = U_ZERO_ERROR;
3394
3395 Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3396 registerRules[i+1], UTRANS_FORWARD, pos, status);
3397 if (U_FAILURE(status)) {
3398 dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
3399 } else {
3400 Transliterator::registerInstance(t);
3401 }
3402 }
3403 for (i = 0; testCases[i].length()!=0; i+=3) {
3404 UErrorCode ec = U_ZERO_ERROR;
3405 UParseError pe;
3406 const UnicodeString& name = testCases[i];
3407 Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3408 if (U_FAILURE(ec)) {
3409 dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
3410 delete t;
3411 continue;
3412 }
3413 const UnicodeString& id = t->getID();
3414 const UnicodeString& source = testCases[i+1];
3415 UnicodeString target;
3416
3417 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3418
3419 if (testCases[i+2].length() > 0) {
3420 target = testCases[i+2];
3421 } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3422 Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3423 } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3424 Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3425 } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3426 Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3427 } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3428 Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3429 } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3430 target = source;
3431 target.toLower(Locale::getUS());
3432 } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3433 target = source;
3434 target.toUpper(Locale::getUS());
3435 }
3436 if (U_FAILURE(ec)) {
3437 errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3438 continue;
3439 }
3440
3441 expect(*t, source, target);
3442 delete t;
3443 }
3444 for (i = 0; registerRules[i].length()!=0; i+=2) {
3445 Transliterator::unregister(registerRules[i]);
3446 }
3447 }
3448
3449 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3450 if (ch <= 0xFFFF) {
3451 sprintf(buffer, "\\u%04x", (int)ch);
3452 } else {
3453 sprintf(buffer, "\\U%08x", (int)ch);
3454 }
3455 return buffer;
3456 }
3457
3458 void TransliteratorTest::TestSurrogateCasing (void) {
3459 // check that casing handles surrogates
3460 // titlecase is currently defective
3461 char buffer[20];
3462 UChar buffer2[20];
3463 UChar32 dee;
3464 U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3465 UnicodeString DEE(u_totitle(dee));
3466 if (DEE != DESERET_DEE) {
3467 err("Fails titlecase of surrogates");
3468 err(Char32ToEscapedChars(dee, buffer));
3469 err(", ");
3470 errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3471 }
3472
3473 UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3474 UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3475 UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3476 UErrorCode status= U_ZERO_ERROR;
3477
3478 u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3479 if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3480 errln("Fails: Can't uppercase surrogates.");
3481 }
3482
3483 status= U_ZERO_ERROR;
3484 u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3485 if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3486 errln("Fails: Can't lowercase surrogates.");
3487 }
3488 }
3489
3490 static void _trans(Transliterator& t, const UnicodeString& src,
3491 UnicodeString& result) {
3492 result = src;
3493 t.transliterate(result);
3494 }
3495
3496 static void _trans(const UnicodeString& id, const UnicodeString& src,
3497 UnicodeString& result, UErrorCode ec) {
3498 UParseError pe;
3499 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3500 if (U_SUCCESS(ec)) {
3501 _trans(*t, src, result);
3502 }
3503 delete t;
3504 }
3505
3506 static UnicodeString _findMatch(const UnicodeString& source,
3507 const UnicodeString* pairs) {
3508 UnicodeString empty;
3509 for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3510 if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3511 return pairs[i+1];
3512 }
3513 }
3514 return empty;
3515 }
3516
3517 // Check to see that incremental gets at least part way through a reasonable string.
3518
3519 void TransliteratorTest::TestIncrementalProgress(void) {
3520 UErrorCode ec = U_ZERO_ERROR;
3521 UnicodeString latinTest = "The Quick Brown Fox.";
3522 UnicodeString devaTest;
3523 _trans("Latin-Devanagari", latinTest, devaTest, ec);
3524 UnicodeString kataTest;
3525 _trans("Latin-Katakana", latinTest, kataTest, ec);
3526 if (U_FAILURE(ec)) {
3527 errln("FAIL: Internal error");
3528 return;
3529 }
3530 const UnicodeString tests[] = {
3531 "Any", latinTest,
3532 "Latin", latinTest,
3533 "Halfwidth", latinTest,
3534 "Devanagari", devaTest,
3535 "Katakana", kataTest,
3536 "" // END MARKER
3537 };
3538
3539 UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3540 int32_t i = 0, j=0, k=0;
3541 int32_t sources = Transliterator::countAvailableSources();
3542 for (i = 0; i < sources; i++) {
3543 UnicodeString source;
3544 Transliterator::getAvailableSource(i, source);
3545 UnicodeString test = _findMatch(source, tests);
3546 if (test.length() == 0) {
3547 logln((UnicodeString)"Skipping " + source + "-X");
3548 continue;
3549 }
3550 int32_t targets = Transliterator::countAvailableTargets(source);
3551 for (j = 0; j < targets; j++) {
3552 UnicodeString target;
3553 Transliterator::getAvailableTarget(j, source, target);
3554 int32_t variants = Transliterator::countAvailableVariants(source, target);
3555 for (k =0; k< variants; k++) {
3556 UnicodeString variant;
3557 UParseError err;
3558 UErrorCode status = U_ZERO_ERROR;
3559
3560 Transliterator::getAvailableVariant(k, source, target, variant);
3561 UnicodeString id = source + "-" + target + "/" + variant;
3562
3563 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3564 if (U_FAILURE(status)) {
3565 dataerrln((UnicodeString)"FAIL: Could not create " + id);
3566 delete t;
3567 continue;
3568 }
3569 status = U_ZERO_ERROR;
3570 CheckIncrementalAux(t, test);
3571
3572 UnicodeString rev;
3573 _trans(*t, test, rev);
3574 Transliterator *inv = t->createInverse(status);
3575 if (U_FAILURE(status)) {
3576 #if UCONFIG_NO_BREAK_ITERATION
3577 // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
3578 if (id.compare((UnicodeString)"Latin-Thai/") != 0)
3579 #endif
3580 errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3581
3582 delete t;
3583 delete inv;
3584 continue;
3585 }
3586 CheckIncrementalAux(inv, rev);
3587 delete t;
3588 delete inv;
3589 }
3590 }
3591 }
3592 }
3593
3594 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3595 const UnicodeString& input) {
3596 UErrorCode ec = U_ZERO_ERROR;
3597 UTransPosition pos;
3598 UnicodeString test = input;
3599
3600 pos.contextStart = 0;
3601 pos.contextLimit = input.length();
3602 pos.start = 0;
3603 pos.limit = input.length();
3604
3605 t->transliterate(test, pos, ec);
3606 if (U_FAILURE(ec)) {
3607 errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3608 return;
3609 }
3610 UBool gotError = FALSE;
3611
3612 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3613
3614 if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3615 errln((UnicodeString)"No Progress, " +
3616 t->getID() + ": " + formatInput(test, input, pos));
3617 gotError = TRUE;
3618 } else {
3619 logln((UnicodeString)"PASS Progress, " +
3620 t->getID() + ": " + formatInput(test, input, pos));
3621 }
3622 t->finishTransliteration(test, pos);
3623 if (pos.start != pos.limit) {
3624 errln((UnicodeString)"Incomplete, " +
3625 t->getID() + ": " + formatInput(test, input, pos));
3626 gotError = TRUE;
3627 }
3628 }
3629
3630 void TransliteratorTest::TestFunction() {
3631 // Careful with spacing and ';' here: Phrase this exactly
3632 // as toRules() is going to return it. If toRules() changes
3633 // with regard to spacing or ';', then adjust this string.
3634 UnicodeString rule =
3635 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3636
3637 UParseError pe;
3638 UErrorCode ec = U_ZERO_ERROR;
3639 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3640 if (t == NULL) {
3641 dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
3642 return;
3643 }
3644
3645 UnicodeString r;
3646 t->toRules(r, TRUE);
3647 if (r == rule) {
3648 logln((UnicodeString)"OK: toRules() => " + r);
3649 } else {
3650 errln((UnicodeString)"FAIL: toRules() => " + r +
3651 ", expected " + rule);
3652 }
3653
3654 expect(*t, "The Quick Brown Fox",
3655 UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3656
3657 delete t;
3658 }
3659
3660 void TransliteratorTest::TestInvalidBackRef(void) {
3661 UnicodeString rule = ". > $1;";
3662 UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3663 UParseError pe;
3664 UErrorCode ec = U_ZERO_ERROR;
3665 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3666 Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3667
3668 if (t != NULL) {
3669 errln("FAIL: createFromRules should have returned NULL");
3670 delete t;
3671 }
3672
3673 if (t2 != NULL) {
3674 errln("FAIL: createFromRules should have returned NULL");
3675 delete t2;
3676 }
3677
3678 if (U_SUCCESS(ec)) {
3679 errln("FAIL: Ok: . > $1; => no error");
3680 } else {
3681 logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3682 }
3683 }
3684
3685 void TransliteratorTest::TestMulticharStringSet() {
3686 // Basic testing
3687 const char* rule =
3688 " [{aa}] > x;"
3689 " a > y;"
3690 " [b{bc}] > z;"
3691 "[{gd}] { e > q;"
3692 " e } [{fg}] > r;" ;
3693
3694 UParseError pe;
3695 UErrorCode ec = U_ZERO_ERROR;
3696 Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3697 if (t == NULL || U_FAILURE(ec)) {
3698 delete t;
3699 errln("FAIL: createFromRules failed");
3700 return;
3701 }
3702
3703 expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3704 "y x yz z d gd de gdq gdqfg ddrfg");
3705 delete t;
3706
3707 // Overlapped string test. Make sure that when multiple
3708 // strings can match that the longest one is matched.
3709 rule =
3710 " [a {ab} {abc}] > x;"
3711 " b > y;"
3712 " c > z;"
3713 " q [t {st} {rst}] { e > p;" ;
3714
3715 t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3716 if (t == NULL || U_FAILURE(ec)) {
3717 delete t;
3718 errln("FAIL: createFromRules failed");
3719 return;
3720 }
3721
3722 expect(*t, "a ab abc qte qste qrste",
3723 "x x x qtp qstp qrstp");
3724 delete t;
3725 }
3726
3727 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3728 // BEGIN TestUserFunction support factory
3729
3730 Transliterator* _TUFF[4];
3731 UnicodeString* _TUFID[4];
3732
3733 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3734 Transliterator::Token context) {
3735 return _TUFF[context.integer]->clone();
3736 }
3737
3738 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3739 _TUFF[n] = t;
3740 _TUFID[n] = new UnicodeString(ID);
3741 Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3742 }
3743
3744 static void _TUFUnreg(int32_t n) {
3745 if (_TUFF[n] != NULL) {
3746 Transliterator::unregister(*_TUFID[n]);
3747 delete _TUFF[n];
3748 delete _TUFID[n];
3749 }
3750 }
3751
3752 // END TestUserFunction support factory
3753 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3754
3755 /**
3756 * Test that user-registered transliterators can be used under function
3757 * syntax.
3758 */
3759 void TransliteratorTest::TestUserFunction() {
3760
3761 Transliterator* t;
3762 UParseError pe;
3763 UErrorCode ec = U_ZERO_ERROR;
3764
3765 // Setup our factory
3766 int32_t i;
3767 for (i=0; i<4; ++i) {
3768 _TUFF[i] = NULL;
3769 }
3770
3771 // There's no need to register inverses if we don't use them
3772 t = Transliterator::createFromRules("gif",
3773 UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3774 UTRANS_FORWARD, pe, ec);
3775 if (t == NULL || U_FAILURE(ec)) {
3776 dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3777 return;
3778 }
3779 _TUFReg("Any-gif", t, 0);
3780
3781 t = Transliterator::createFromRules("RemoveCurly",
3782 UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3783 UTRANS_FORWARD, pe, ec);
3784 if (t == NULL || U_FAILURE(ec)) {
3785 errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3786 goto FAIL;
3787 }
3788 expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3789 _TUFReg("Any-RemoveCurly", t, 1);
3790
3791 logln("Trying &hex");
3792 t = Transliterator::createFromRules("hex2",
3793 "(.) > &hex($1);",
3794 UTRANS_FORWARD, pe, ec);
3795 if (t == NULL || U_FAILURE(ec)) {
3796 errln("FAIL: createFromRules");
3797 goto FAIL;
3798 }
3799 logln("Registering");
3800 _TUFReg("Any-hex2", t, 2);
3801 t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3802 if (t == NULL || U_FAILURE(ec)) {
3803 errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3804 goto FAIL;
3805 }
3806 expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3807 delete t;
3808
3809 logln("Trying &gif");
3810 t = Transliterator::createFromRules("gif2",
3811 "(.) > &Gif(&Hex2($1));",
3812 UTRANS_FORWARD, pe, ec);
3813 if (t == NULL || U_FAILURE(ec)) {
3814 errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3815 goto FAIL;
3816 }
3817 logln("Registering");
3818 _TUFReg("Any-gif2", t, 3);
3819 t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3820 if (t == NULL || U_FAILURE(ec)) {
3821 errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3822 goto FAIL;
3823 }
3824 expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3825 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3826 delete t;
3827
3828 // Test that filters are allowed after &
3829 t = Transliterator::createFromRules("test",
3830 "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3831 UTRANS_FORWARD, pe, ec);
3832 if (t == NULL || U_FAILURE(ec)) {
3833 errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3834 goto FAIL;
3835 }
3836 expect(*t, "abc",
3837 UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3838 delete t;
3839
3840 FAIL:
3841 for (i=0; i<4; ++i) {
3842 _TUFUnreg(i);
3843 }
3844 }
3845
3846 /**
3847 * Test the Any-X transliterators.
3848 */
3849 void TransliteratorTest::TestAnyX(void) {
3850 UParseError parseError;
3851 UErrorCode status = U_ZERO_ERROR;
3852 Transliterator* anyLatin =
3853 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3854 if (anyLatin==0) {
3855 dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
3856 delete anyLatin;
3857 return;
3858 }
3859
3860 expect(*anyLatin,
3861 CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3862 CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3863
3864 delete anyLatin;
3865 }
3866
3867 /**
3868 * Test Any-X transliterators with sample letters from all scripts.
3869 */
3870 void TransliteratorTest::TestAny(void) {
3871 UErrorCode status = U_ZERO_ERROR;
3872 // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3873 // function call parameters going on in this test.
3874 UnicodeSet alphabetic("[:alphabetic:]", status);
3875 if (U_FAILURE(status)) {
3876 dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3877 return;
3878 }
3879 alphabetic.freeze();
3880
3881 UnicodeString testString;
3882 for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
3883 const char *scriptName = uscript_getShortName((UScriptCode)i);
3884 if (scriptName == NULL) {
3885 errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
3886 return;
3887 }
3888
3889 UnicodeSet sample;
3890 sample.applyPropertyAlias("script", scriptName, status);
3891 if (U_FAILURE(status)) {
3892 errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3893 return;
3894 }
3895 sample.retainAll(alphabetic);
3896 for (int32_t count=0; count<5; count++) {
3897 UChar32 c = sample.charAt(count);
3898 if (c == -1) {
3899 break;
3900 }
3901 testString.append(c);
3902 }
3903 }
3904
3905 UParseError parseError;
3906 Transliterator* anyLatin =
3907 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3908 if (U_FAILURE(status)) {
3909 dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3910 return;
3911 }
3912
3913 logln(UnicodeString("Sample set for Any-Latin: ") + testString);
3914 anyLatin->transliterate(testString);
3915 logln(UnicodeString("Sample result for Any-Latin: ") + testString);
3916 delete anyLatin;
3917 }
3918
3919
3920 /**
3921 * Test the source and target set API. These are only implemented
3922 * for RBT and CompoundTransliterator at this time.
3923 */
3924 void TransliteratorTest::TestSourceTargetSet() {
3925 UErrorCode ec = U_ZERO_ERROR;
3926
3927 // Rules
3928 const char* r =
3929 "a > b; "
3930 "r [x{lu}] > q;";
3931
3932 // Expected source
3933 UnicodeSet expSrc("[arx{lu}]", ec);
3934
3935 // Expected target
3936 UnicodeSet expTrg("[bq]", ec);
3937
3938 UParseError pe;
3939 Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3940
3941 if (U_FAILURE(ec)) {
3942 delete t;
3943 errln("FAIL: Couldn't set up test");
3944 return;
3945 }
3946
3947 UnicodeSet src; t->getSourceSet(src);
3948 UnicodeSet trg; t->getTargetSet(trg);
3949
3950 if (src == expSrc && trg == expTrg) {
3951 UnicodeString a, b;
3952 logln((UnicodeString)"Ok: " +
3953 r + " => source = " + src.toPattern(a, TRUE) +
3954 ", target = " + trg.toPattern(b, TRUE));
3955 } else {
3956 UnicodeString a, b, c, d;
3957 errln((UnicodeString)"FAIL: " +
3958 r + " => source = " + src.toPattern(a, TRUE) +
3959 ", expected " + expSrc.toPattern(b, TRUE) +
3960 "; target = " + trg.toPattern(c, TRUE) +
3961 ", expected " + expTrg.toPattern(d, TRUE));
3962 }
3963
3964 delete t;
3965 }
3966
3967 /**
3968 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3969 */
3970 void TransliteratorTest::TestPatternWhiteSpace() {
3971 // Rules
3972 const char* r = "a > \\u200E b;";
3973
3974 UErrorCode ec = U_ZERO_ERROR;
3975 UParseError pe;
3976 Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
3977
3978 if (U_FAILURE(ec)) {
3979 errln("FAIL: Couldn't set up test");
3980 } else {
3981 expect(*t, "a", "b");
3982 }
3983 delete t;
3984
3985 // UnicodeSet
3986 ec = U_ZERO_ERROR;
3987 UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
3988
3989 if (U_FAILURE(ec)) {
3990 errln("FAIL: Couldn't set up test");
3991 } else {
3992 if (set.contains(0x200E)) {
3993 errln("FAIL: U+200E not being ignored by UnicodeSet");
3994 }
3995 }
3996 }
3997 //======================================================================
3998 // this method is in TestUScript.java
3999 //======================================================================
4000 void TransliteratorTest::TestAllCodepoints(){
4001 UScriptCode code= USCRIPT_INVALID_CODE;
4002 char id[256]={'\0'};
4003 char abbr[256]={'\0'};
4004 char newId[256]={'\0'};
4005 char newAbbrId[256]={'\0'};
4006 char oldId[256]={'\0'};
4007 char oldAbbrId[256]={'\0'};
4008
4009 UErrorCode status =U_ZERO_ERROR;
4010 UParseError pe;
4011
4012 for(uint32_t i = 0; i<=0x10ffff; i++){
4013 code = uscript_getScript(i,&status);
4014 if(code == USCRIPT_INVALID_CODE){
4015 dataerrln("uscript_getScript for codepoint \\U%08X failed.", i);
4016 }
4017 const char* myId = uscript_getName(code);
4018 if(!myId) {
4019 dataerrln("Valid script code returned NULL name. Check your data!");
4020 return;
4021 }
4022 uprv_strcpy(id,myId);
4023 uprv_strcpy(abbr,uscript_getShortName(code));
4024
4025 uprv_strcpy(newId,"[:");
4026 uprv_strcat(newId,id);
4027 uprv_strcat(newId,":];NFD");
4028
4029 uprv_strcpy(newAbbrId,"[:");
4030 uprv_strcat(newAbbrId,abbr);
4031 uprv_strcat(newAbbrId,":];NFD");
4032
4033 if(uprv_strcmp(newId,oldId)!=0){
4034 Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
4035 if(t==NULL || U_FAILURE(status)){
4036 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4037 }
4038 delete t;
4039 }
4040 if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
4041 Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
4042 if(t==NULL || U_FAILURE(status)){
4043 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4044 }
4045 delete t;
4046 }
4047 uprv_strcpy(oldId,newId);
4048 uprv_strcpy(oldAbbrId, newAbbrId);
4049
4050 }
4051
4052 }
4053
4054 #define TEST_TRANSLIT_ID(id, cls) { \
4055 UErrorCode ec = U_ZERO_ERROR; \
4056 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4057 if (U_FAILURE(ec)) { \
4058 dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
4059 } else { \
4060 if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4061 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4062 } \
4063 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4064 } \
4065 delete t; \
4066 }
4067
4068 #define TEST_TRANSLIT_RULE(rule, cls) { \
4069 UErrorCode ec = U_ZERO_ERROR; \
4070 UParseError pe; \
4071 Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4072 if (U_FAILURE(ec)) { \
4073 errln("FAIL: Couldn't create " rule); \
4074 } else { \
4075 if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4076 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4077 } \
4078 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4079 } \
4080 delete t; \
4081 }
4082
4083 void TransliteratorTest::TestBoilerplate() {
4084 TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
4085 TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
4086 TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
4087 TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
4088 TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
4089 TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
4090 TEST_TRANSLIT_ID("Null", NullTransliterator);
4091 TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
4092 TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
4093 TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
4094 TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
4095 TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
4096 TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
4097 }
4098
4099 void TransliteratorTest::TestAlternateSyntax() {
4100 // U+2206 == &
4101 // U+2190 == <
4102 // U+2192 == >
4103 // U+2194 == <>
4104 expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4105 "abc",
4106 "xbz");
4107 expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4108 CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4109 UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4110 }
4111
4112 static const char* BEGIN_END_RULES[] = {
4113 // [0]
4114 "abc > xy;"
4115 "aba > z;",
4116
4117 // [1]
4118 /*
4119 "::BEGIN;"
4120 "abc > xy;"
4121 "::END;"
4122 "::BEGIN;"
4123 "aba > z;"
4124 "::END;",
4125 */
4126 "", // test case commented out below, this is here to keep from messing up the indexes
4127
4128 // [2]
4129 /*
4130 "abc > xy;"
4131 "::BEGIN;"
4132 "aba > z;"
4133 "::END;",
4134 */
4135 "", // test case commented out below, this is here to keep from messing up the indexes
4136
4137 // [3]
4138 /*
4139 "::BEGIN;"
4140 "abc > xy;"
4141 "::END;"
4142 "aba > z;",
4143 */
4144 "", // test case commented out below, this is here to keep from messing up the indexes
4145
4146 // [4]
4147 "abc > xy;"
4148 "::Null;"
4149 "aba > z;",
4150
4151 // [5]
4152 "::Upper;"
4153 "ABC > xy;"
4154 "AB > x;"
4155 "C > z;"
4156 "::Upper;"
4157 "XYZ > p;"
4158 "XY > q;"
4159 "Z > r;"
4160 "::Upper;",
4161
4162 // [6]
4163 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4164 "$delim = [\\-$ws];"
4165 "$ws $delim* > ' ';"
4166 "'-' $delim* > '-';",
4167
4168 // [7]
4169 "::Null;"
4170 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4171 "$delim = [\\-$ws];"
4172 "$ws $delim* > ' ';"
4173 "'-' $delim* > '-';",
4174
4175 // [8]
4176 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4177 "$delim = [\\-$ws];"
4178 "$ws $delim* > ' ';"
4179 "'-' $delim* > '-';"
4180 "::Null;",
4181
4182 // [9]
4183 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4184 "$delim = [\\-$ws];"
4185 "::Null;"
4186 "$ws $delim* > ' ';"
4187 "'-' $delim* > '-';",
4188
4189 // [10]
4190 /*
4191 "::BEGIN;"
4192 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4193 "$delim = [\\-$ws];"
4194 "::END;"
4195 "$ws $delim* > ' ';"
4196 "'-' $delim* > '-';",
4197 */
4198 "", // test case commented out below, this is here to keep from messing up the indexes
4199
4200 // [11]
4201 /*
4202 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4203 "$delim = [\\-$ws];"
4204 "::BEGIN;"
4205 "$ws $delim* > ' ';"
4206 "'-' $delim* > '-';"
4207 "::END;",
4208 */
4209 "", // test case commented out below, this is here to keep from messing up the indexes
4210
4211 // [12]
4212 /*
4213 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4214 "$delim = [\\-$ws];"
4215 "$ab = [ab];"
4216 "::BEGIN;"
4217 "$ws $delim* > ' ';"
4218 "'-' $delim* > '-';"
4219 "::END;"
4220 "::BEGIN;"
4221 "$ab { ' ' } $ab > '-';"
4222 "c { ' ' > ;"
4223 "::END;"
4224 "::BEGIN;"
4225 "'a-a' > a\\%|a;"
4226 "::END;",
4227 */
4228 "", // test case commented out below, this is here to keep from messing up the indexes
4229
4230 // [13]
4231 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4232 "$delim = [\\-$ws];"
4233 "$ab = [ab];"
4234 "::Null;"
4235 "$ws $delim* > ' ';"
4236 "'-' $delim* > '-';"
4237 "::Null;"
4238 "$ab { ' ' } $ab > '-';"
4239 "c { ' ' > ;"
4240 "::Null;"
4241 "'a-a' > a\\%|a;",
4242
4243 // [14]
4244 /*
4245 "::[abc];"
4246 "::BEGIN;"
4247 "abc > xy;"
4248 "::END;"
4249 "::BEGIN;"
4250 "aba > yz;"
4251 "::END;"
4252 "::Upper;",
4253 */
4254 "", // test case commented out below, this is here to keep from messing up the indexes
4255
4256 // [15]
4257 "::[abc];"
4258 "abc > xy;"
4259 "::Null;"
4260 "aba > yz;"
4261 "::Upper;",
4262
4263 // [16]
4264 /*
4265 "::[abc];"
4266 "::BEGIN;"
4267 "abc <> xy;"
4268 "::END;"
4269 "::BEGIN;"
4270 "aba <> yz;"
4271 "::END;"
4272 "::Upper(Lower);"
4273 "::([XYZ]);"
4274 */
4275 "", // test case commented out below, this is here to keep from messing up the indexes
4276
4277 // [17]
4278 "::[abc];"
4279 "abc <> xy;"
4280 "::Null;"
4281 "aba <> yz;"
4282 "::Upper(Lower);"
4283 "::([XYZ]);"
4284 };
4285 static const int32_t BEGIN_END_RULES_length = (int32_t)(sizeof(BEGIN_END_RULES) / sizeof(BEGIN_END_RULES[0]));
4286
4287 /*
4288 (This entire test is commented out below and will need some heavy revision when we re-add
4289 the ::BEGIN/::END stuff)
4290 static const char* BOGUS_BEGIN_END_RULES[] = {
4291 // [7]
4292 "::BEGIN;"
4293 "abc > xy;"
4294 "::BEGIN;"
4295 "aba > z;"
4296 "::END;"
4297 "::END;",
4298
4299 // [8]
4300 "abc > xy;"
4301 " aba > z;"
4302 "::END;",
4303
4304 // [9]
4305 "::BEGIN;"
4306 "::Upper;"
4307 "::END;"
4308 };
4309 static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
4310 */
4311
4312 static const char* BEGIN_END_TEST_CASES[] = {
4313 // rules input expected output
4314 BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z",
4315 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
4316 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
4317 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
4318 BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z",
4319 BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR",
4320
4321 BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e",
4322 BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e",
4323 BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e",
4324 BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e",
4325 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e",
4326 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e",
4327 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e",
4328 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a",
4329 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
4330 BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e",
4331 BEGIN_END_RULES[13], "a a a a", "a%a%a%a",
4332 BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a",
4333
4334 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4335 BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4336 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4337 BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4338 };
4339 static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0]));
4340
4341 void TransliteratorTest::TestBeginEnd() {
4342 // run through the list of test cases above
4343 int32_t i = 0;
4344 for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4345 expect((UnicodeString)"Test case #" + (i / 3),
4346 UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4347 UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4348 UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4349 }
4350
4351 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4352 UParseError parseError;
4353 UErrorCode status = U_ZERO_ERROR;
4354 Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4355 UTRANS_REVERSE, parseError, status);
4356 if (reversed == 0 || U_FAILURE(status)) {
4357 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4358 } else {
4359 expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4360 }
4361 delete reversed;
4362
4363 // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4364 // that all of them cause errors
4365 /*
4366 (commented out until we have the real ::BEGIN/::END stuff in place
4367 for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4368 UParseError parseError;
4369 UErrorCode status = U_ZERO_ERROR;
4370 Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4371 UTRANS_FORWARD, parseError, status);
4372 if (!U_FAILURE(status)) {
4373 delete t;
4374 errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4375 }
4376 }
4377 */
4378 }
4379
4380 void TransliteratorTest::TestBeginEndToRules() {
4381 // run through the same list of test cases we used above, but this time, instead of just
4382 // instantiating a Transliterator from the rules and running the test against it, we instantiate
4383 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4384 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4385 // to (i.e., does the same thing as) the original rule set
4386 for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4387 UParseError parseError;
4388 UErrorCode status = U_ZERO_ERROR;
4389 Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4390 UTRANS_FORWARD, parseError, status);
4391 if (U_FAILURE(status)) {
4392 reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4393 } else {
4394 UnicodeString rules;
4395 t->toRules(rules, TRUE);
4396 Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4397 UTRANS_FORWARD, parseError, status);
4398 if (U_FAILURE(status)) {
4399 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4400 parseError, status);
4401 delete t;
4402 } else {
4403 expect(*t2,
4404 UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4405 UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4406 delete t;
4407 delete t2;
4408 }
4409 }
4410 }
4411
4412 // do the same thing for the reversible test case
4413 UParseError parseError;
4414 UErrorCode status = U_ZERO_ERROR;
4415 Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4416 UTRANS_REVERSE, parseError, status);
4417 if (U_FAILURE(status)) {
4418 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4419 } else {
4420 UnicodeString rules;
4421 reversed->toRules(rules, FALSE);
4422 Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4423 parseError, status);
4424 if (U_FAILURE(status)) {
4425 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4426 parseError, status);
4427 delete reversed;
4428 } else {
4429 expect(*reversed2,
4430 UnicodeString("xy XY XYZ yz YZ"),
4431 UnicodeString("xy abc xaba yz aba"));
4432 delete reversed;
4433 delete reversed2;
4434 }
4435 }
4436 }
4437
4438 void TransliteratorTest::TestRegisterAlias() {
4439 UnicodeString longID("Lower;[aeiou]Upper");
4440 UnicodeString shortID("Any-CapVowels");
4441 UnicodeString reallyShortID("CapVowels");
4442
4443 Transliterator::registerAlias(shortID, longID);
4444
4445 UErrorCode err = U_ZERO_ERROR;
4446 Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4447 if (U_FAILURE(err)) {
4448 errln("Failed to instantiate transliterator with long ID");
4449 Transliterator::unregister(shortID);
4450 return;
4451 }
4452 Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4453 if (U_FAILURE(err)) {
4454 errln("Failed to instantiate transliterator with short ID");
4455 delete t1;
4456 Transliterator::unregister(shortID);
4457 return;
4458 }
4459
4460 if (t1->getID() != longID)
4461 errln("Transliterator instantiated with long ID doesn't have long ID");
4462 if (t2->getID() != reallyShortID)
4463 errln("Transliterator instantiated with short ID doesn't have short ID");
4464
4465 UnicodeString rules1;
4466 UnicodeString rules2;
4467
4468 t1->toRules(rules1, TRUE);
4469 t2->toRules(rules2, TRUE);
4470 if (rules1 != rules2)
4471 errln("Alias transliterators aren't the same");
4472
4473 delete t1;
4474 delete t2;
4475 Transliterator::unregister(shortID);
4476
4477 t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4478 if (U_SUCCESS(err)) {
4479 errln("Instantiation with short ID succeeded after short ID was unregistered");
4480 delete t1;
4481 }
4482
4483 // try the same thing again, but this time with something other than
4484 // an instance of CompoundTransliterator
4485 UnicodeString realID("Latin-Greek");
4486 UnicodeString fakeID("Latin-dlgkjdflkjdl");
4487 Transliterator::registerAlias(fakeID, realID);
4488
4489 err = U_ZERO_ERROR;
4490 t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4491 if (U_FAILURE(err)) {
4492 dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
4493 Transliterator::unregister(realID);
4494 return;
4495 }
4496 t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4497 if (U_FAILURE(err)) {
4498 errln("Failed to instantiate transliterator with fake ID");
4499 delete t1;
4500 Transliterator::unregister(realID);
4501 return;
4502 }
4503
4504 t1->toRules(rules1, TRUE);
4505 t2->toRules(rules2, TRUE);
4506 if (rules1 != rules2)
4507 errln("Alias transliterators aren't the same");
4508
4509 delete t1;
4510 delete t2;
4511 Transliterator::unregister(fakeID);
4512 }
4513
4514 void TransliteratorTest::TestRuleStripping() {
4515 /*
4516 #
4517 \uE001>\u0C01; # SIGN
4518 */
4519 static const UChar rule[] = {
4520 0x0023,0x0020,0x000D,0x000A,
4521 0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4522 };
4523 static const UChar expectedRule[] = {
4524 0xE001,0x003E,0x0C01,0x003B,0
4525 };
4526 UChar result[sizeof(rule)/sizeof(rule[0])];
4527 UErrorCode status = U_ZERO_ERROR;
4528 int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status);
4529 if (len != u_strlen(expectedRule)) {
4530 errln("utrans_stripRules return len = %d", len);
4531 }
4532 if (u_strncmp(expectedRule, result, len) != 0) {
4533 errln("utrans_stripRules did not return expected string");
4534 }
4535 }
4536
4537 /**
4538 * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4539 */
4540 void TransliteratorTest::TestHalfwidthFullwidth(void) {
4541 UParseError parseError;
4542 UErrorCode status = U_ZERO_ERROR;
4543 Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
4544 Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
4545 if (hf == 0 || fh == 0) {
4546 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4547 delete hf;
4548 delete fh;
4549 return;
4550 }
4551
4552 // Array of 2n items
4553 // Each item is
4554 // "hf"|"fh"|"both",
4555 // <Halfwidth>,
4556 // <Fullwidth>
4557 const char* DATA[] = {
4558 "both",
4559 "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4560 "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4561 };
4562 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
4563
4564 for (int32_t i=0; i<DATA_length; i+=3) {
4565 UnicodeString h = CharsToUnicodeString(DATA[i+1]);
4566 UnicodeString f = CharsToUnicodeString(DATA[i+2]);
4567 switch (*DATA[i]) {
4568 case 0x68: //'h': // Halfwidth-Fullwidth only
4569 expect(*hf, h, f);
4570 break;
4571 case 0x66: //'f': // Fullwidth-Halfwidth only
4572 expect(*fh, f, h);
4573 break;
4574 case 0x62: //'b': // both directions
4575 expect(*hf, h, f);
4576 expect(*fh, f, h);
4577 break;
4578 }
4579 }
4580 delete hf;
4581 delete fh;
4582 }
4583
4584
4585 /**
4586 * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4587 * TODO: confirm that the expected results are correct.
4588 * For now, test just confirms that C++ and Java give identical results.
4589 */
4590 void TransliteratorTest::TestThai(void) {
4591 #if !UCONFIG_NO_BREAK_ITERATION
4592 UParseError parseError;
4593 UErrorCode status = U_ZERO_ERROR;
4594 Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
4595 if (tr == 0) {
4596 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4597 return;
4598 }
4599 if (U_FAILURE(status)) {
4600 errln("FAIL: createInstance failed with %s", u_errorName(status));
4601 return;
4602 }
4603 const char *thaiText =
4604 "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4605 "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4606 "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4607 "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4608 "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4609 "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4610 "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4611 "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4612 "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4613 "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4614 "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4615 "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4616 "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4617 "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4618 "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4619 "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4620 "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4621 "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4622 "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4623 "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4624 "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4625 "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4626 "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4627 "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4628 " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4629 "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4630 "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4631 " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4632 "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4633 "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4634
4635 const char *latinText =
4636 "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4637 "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4638 "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4639 "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4640 "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4641 " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4642 "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4643 "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4644 "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4645 "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4646 "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4647 "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4648 " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4649 "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4650 " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4651 "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4652 "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4653 "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4654
4655
4656 UnicodeString xlitText(thaiText);
4657 xlitText = xlitText.unescape();
4658 tr->transliterate(xlitText);
4659
4660 UnicodeString expectedText(latinText);
4661 expectedText = expectedText.unescape();
4662 expect(*tr, xlitText, expectedText);
4663
4664 delete tr;
4665 #endif
4666 }
4667
4668
4669 //======================================================================
4670 // Support methods
4671 //======================================================================
4672 void TransliteratorTest::expectT(const UnicodeString& id,
4673 const UnicodeString& source,
4674 const UnicodeString& expectedResult) {
4675 UErrorCode ec = U_ZERO_ERROR;
4676 UParseError pe;
4677 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4678 if (U_FAILURE(ec)) {
4679 errln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(ec));
4680 delete t;
4681 return;
4682 }
4683 expect(*t, source, expectedResult);
4684 delete t;
4685 }
4686
4687 void TransliteratorTest::reportParseError(const UnicodeString& message,
4688 const UParseError& parseError,
4689 const UErrorCode& status) {
4690 dataerrln(message +
4691 /*", parse error " + parseError.code +*/
4692 ", line " + parseError.line +
4693 ", offset " + parseError.offset +
4694 ", pre-context " + prettify(parseError.preContext, TRUE) +
4695 ", post-context " + prettify(parseError.postContext,TRUE) +
4696 ", Error: " + u_errorName(status));
4697 }
4698
4699 void TransliteratorTest::expect(const UnicodeString& rules,
4700 const UnicodeString& source,
4701 const UnicodeString& expectedResult,
4702 UTransPosition *pos) {
4703 expect("<ID>", rules, source, expectedResult, pos);
4704 }
4705
4706 void TransliteratorTest::expect(const UnicodeString& id,
4707 const UnicodeString& rules,
4708 const UnicodeString& source,
4709 const UnicodeString& expectedResult,
4710 UTransPosition *pos) {
4711 UErrorCode status = U_ZERO_ERROR;
4712 UParseError parseError;
4713 Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
4714 if (U_FAILURE(status)) {
4715 reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
4716 } else {
4717 expect(*t, source, expectedResult, pos);
4718 }
4719 delete t;
4720 }
4721
4722 void TransliteratorTest::expect(const Transliterator& t,
4723 const UnicodeString& source,
4724 const UnicodeString& expectedResult,
4725 const Transliterator& reverseTransliterator) {
4726 expect(t, source, expectedResult);
4727 expect(reverseTransliterator, expectedResult, source);
4728 }
4729
4730 void TransliteratorTest::expect(const Transliterator& t,
4731 const UnicodeString& source,
4732 const UnicodeString& expectedResult,
4733 UTransPosition *pos) {
4734 if (pos == 0) {
4735 UnicodeString result(source);
4736 t.transliterate(result);
4737 expectAux(t.getID() + ":String", source, result, expectedResult);
4738 }
4739 UTransPosition index={0, 0, 0, 0};
4740 if (pos != 0) {
4741 index = *pos;
4742 }
4743
4744 UnicodeString rsource(source);
4745 if (pos == 0) {
4746 t.transliterate(rsource);
4747 } else {
4748 // Do it all at once -- below we do it incrementally
4749 t.finishTransliteration(rsource, *pos);
4750 }
4751 expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4752
4753 // Test keyboard (incremental) transliteration -- this result
4754 // must be the same after we finalize (see below).
4755 UnicodeString log;
4756 rsource.remove();
4757 if (pos != 0) {
4758 rsource = source;
4759 formatInput(log, rsource, index);
4760 log.append(" -> ");
4761 UErrorCode status = U_ZERO_ERROR;
4762 t.transliterate(rsource, index, status);
4763 formatInput(log, rsource, index);
4764 } else {
4765 for (int32_t i=0; i<source.length(); ++i) {
4766 if (i != 0) {
4767 log.append(" + ");
4768 }
4769 log.append(source.charAt(i)).append(" -> ");
4770 UErrorCode status = U_ZERO_ERROR;
4771 t.transliterate(rsource, index, source.charAt(i), status);
4772 formatInput(log, rsource, index);
4773 }
4774 }
4775
4776 // As a final step in keyboard transliteration, we must call
4777 // transliterate to finish off any pending partial matches that
4778 // were waiting for more input.
4779 t.finishTransliteration(rsource, index);
4780 log.append(" => ").append(rsource);
4781
4782 expectAux(t.getID() + ":Keyboard", log,
4783 rsource == expectedResult,
4784 expectedResult);
4785 }
4786
4787
4788 /**
4789 * @param appendTo result is appended to this param.
4790 * @param input the string being transliterated
4791 * @param pos the index struct
4792 */
4793 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4794 const UnicodeString& input,
4795 const UTransPosition& pos) {
4796 // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4797 // the {} indicate the context start and limit, and the ||
4798 // indicate the start and limit.
4799 if (0 <= pos.contextStart &&
4800 pos.contextStart <= pos.start &&
4801 pos.start <= pos.limit &&
4802 pos.limit <= pos.contextLimit &&
4803 pos.contextLimit <= input.length()) {
4804
4805 UnicodeString a, b, c, d, e;
4806 input.extractBetween(0, pos.contextStart, a);
4807 input.extractBetween(pos.contextStart, pos.start, b);
4808 input.extractBetween(pos.start, pos.limit, c);
4809 input.extractBetween(pos.limit, pos.contextLimit, d);
4810 input.extractBetween(pos.contextLimit, input.length(), e);
4811 appendTo.append(a).append((UChar)123/*{*/).append(b).
4812 append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4813 append((UChar)125/*}*/).append(e);
4814 } else {
4815 appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4816 pos.contextStart + ", s=" + pos.start + ", l=" +
4817 pos.limit + ", cl=" + pos.contextLimit + "} on " +
4818 input);
4819 }
4820 return appendTo;
4821 }
4822
4823 void TransliteratorTest::expectAux(const UnicodeString& tag,
4824 const UnicodeString& source,
4825 const UnicodeString& result,
4826 const UnicodeString& expectedResult) {
4827 expectAux(tag, source + " -> " + result,
4828 result == expectedResult,
4829 expectedResult);
4830 }
4831
4832 void TransliteratorTest::expectAux(const UnicodeString& tag,
4833 const UnicodeString& summary, UBool pass,
4834 const UnicodeString& expectedResult) {
4835 if (pass) {
4836 logln(UnicodeString("(")+tag+") " + prettify(summary));
4837 } else {
4838 dataerrln(UnicodeString("FAIL: (")+tag+") "
4839 + prettify(summary)
4840 + ", expected " + prettify(expectedResult));
4841 }
4842 }
4843
4844 #endif /* #if !UCONFIG_NO_TRANSLITERATION */