]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/transtst.cpp
ICU-6.2.6.tar.gz
[apple/icu.git] / icuSources / test / intltest / transtst.cpp
1 /*
2 **********************************************************************
3 * Copyright (C) 1999-2004, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 11/10/99 aliu Creation.
8 **********************************************************************
9 */
10
11 #include "unicode/utypes.h"
12
13 #if !UCONFIG_NO_TRANSLITERATION
14
15 #include "transtst.h"
16 #include "unicode/locid.h"
17 #include "unicode/dtfmtsym.h"
18 #include "unicode/normlzr.h"
19 #include "unicode/translit.h"
20 #include "unicode/uchar.h"
21 #include "unicode/unifilt.h"
22 #include "unicode/uniset.h"
23 #include "unicode/ustring.h"
24 #include "unicode/usetiter.h"
25 #include "unicode/uscript.h"
26 #include "cpdtrans.h"
27 #include "nultrans.h"
28 #include "rbt.h"
29 #include "anytrans.h"
30 #include "esctrn.h"
31 #include "name2uni.h"
32 #include "nortrans.h"
33 #include "remtrans.h"
34 #include "titletrn.h"
35 #include "tolowtrn.h"
36 #include "toupptrn.h"
37 #include "unesctrn.h"
38 #include "uni2name.h"
39 #include "cstring.h"
40 #include "cmemory.h"
41 #include <stdio.h>
42
43 /***********************************************************************
44
45 HOW TO USE THIS TEST FILE
46 -or-
47 How I developed on two platforms
48 without losing (too much of) my mind
49
50
51 1. Add new tests by copying/pasting/changing existing tests. On Java,
52 any public void method named Test...() taking no parameters becomes
53 a test. On C++, you need to modify the header and add a line to
54 the runIndexedTest() dispatch method.
55
56 2. Make liberal use of the expect() method; it is your friend.
57
58 3. The tests in this file exactly match those in a sister file on the
59 other side. The two files are:
60
61 icu4j: src/com/ibm/test/translit/TransliteratorTest.java
62 icu4c: source/test/intltest/transtst.cpp
63
64 ==> THIS IS THE IMPORTANT PART <==
65
66 When you add a test in this file, add it in TransliteratorTest.java
67 too. Give it the same name and put it in the same relative place.
68 This makes maintenance a lot simpler for any poor soul who ends up
69 trying to synchronize the tests between icu4j and icu4c.
70
71 4. If you MUST enter a test that is NOT paralleled in the sister file,
72 then add it in the special non-mirrored section. These are
73 labeled
74
75 "icu4j ONLY"
76
77 or
78
79 "icu4c ONLY"
80
81 Make sure you document the reason the test is here and not there.
82
83
84 Thank you.
85 The Management
86 ***********************************************************************/
87
88 // Define character constants thusly to be EBCDIC-friendly
89 enum {
90 LEFT_BRACE=((UChar)0x007B), /*{*/
91 PIPE =((UChar)0x007C), /*|*/
92 ZERO =((UChar)0x0030), /*0*/
93 UPPER_A =((UChar)0x0041) /*A*/
94 };
95
96 TransliteratorTest::TransliteratorTest()
97 : DESERET_DEE((UChar32)0x10414),
98 DESERET_dee((UChar32)0x1043C)
99 {
100 }
101
102 TransliteratorTest::~TransliteratorTest() {}
103
104 void
105 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
106 const char* &name, char* /*par*/) {
107 switch (index) {
108 TESTCASE(0,TestInstantiation);
109 TESTCASE(1,TestSimpleRules);
110 TESTCASE(2,TestRuleBasedInverse);
111 TESTCASE(3,TestKeyboard);
112 TESTCASE(4,TestKeyboard2);
113 TESTCASE(5,TestKeyboard3);
114 TESTCASE(6,TestArabic);
115 TESTCASE(7,TestCompoundKana);
116 TESTCASE(8,TestCompoundHex);
117 TESTCASE(9,TestFiltering);
118 TESTCASE(10,TestInlineSet);
119 TESTCASE(11,TestPatternQuoting);
120 TESTCASE(12,TestJ277);
121 TESTCASE(13,TestJ243);
122 TESTCASE(14,TestJ329);
123 TESTCASE(15,TestSegments);
124 TESTCASE(16,TestCursorOffset);
125 TESTCASE(17,TestArbitraryVariableValues);
126 TESTCASE(18,TestPositionHandling);
127 TESTCASE(19,TestHiraganaKatakana);
128 TESTCASE(20,TestCopyJ476);
129 TESTCASE(21,TestAnchors);
130 TESTCASE(22,TestInterIndic);
131 TESTCASE(23,TestFilterIDs);
132 TESTCASE(24,TestCaseMap);
133 TESTCASE(25,TestNameMap);
134 TESTCASE(26,TestLiberalizedID);
135 TESTCASE(27,TestCreateInstance);
136 TESTCASE(28,TestNormalizationTransliterator);
137 TESTCASE(29,TestCompoundRBT);
138 TESTCASE(30,TestCompoundFilter);
139 TESTCASE(31,TestRemove);
140 TESTCASE(32,TestToRules);
141 TESTCASE(33,TestContext);
142 TESTCASE(34,TestSupplemental);
143 TESTCASE(35,TestQuantifier);
144 TESTCASE(36,TestSTV);
145 TESTCASE(37,TestCompoundInverse);
146 TESTCASE(38,TestNFDChainRBT);
147 TESTCASE(39,TestNullInverse);
148 TESTCASE(40,TestAliasInverseID);
149 TESTCASE(41,TestCompoundInverseID);
150 TESTCASE(42,TestUndefinedVariable);
151 TESTCASE(43,TestEmptyContext);
152 TESTCASE(44,TestCompoundFilterID);
153 TESTCASE(45,TestPropertySet);
154 TESTCASE(46,TestNewEngine);
155 TESTCASE(47,TestQuantifiedSegment);
156 TESTCASE(48,TestDevanagariLatinRT);
157 TESTCASE(49,TestTeluguLatinRT);
158 TESTCASE(50,TestCompoundLatinRT);
159 TESTCASE(51,TestSanskritLatinRT);
160 TESTCASE(52,TestLocaleInstantiation);
161 TESTCASE(53,TestTitleAccents);
162 TESTCASE(54,TestLocaleResource);
163 TESTCASE(55,TestParseError);
164 TESTCASE(56,TestOutputSet);
165 TESTCASE(57,TestVariableRange);
166 TESTCASE(58,TestInvalidPostContext);
167 TESTCASE(59,TestIDForms);
168 TESTCASE(60,TestToRulesMark);
169 TESTCASE(61,TestEscape);
170 TESTCASE(62,TestAnchorMasking);
171 TESTCASE(63,TestDisplayName);
172 TESTCASE(64,TestSpecialCases);
173 TESTCASE(65,TestIncrementalProgress);
174 TESTCASE(66,TestSurrogateCasing);
175 TESTCASE(67,TestFunction);
176 TESTCASE(68,TestInvalidBackRef);
177 TESTCASE(69,TestMulticharStringSet);
178 TESTCASE(70,TestUserFunction);
179 TESTCASE(71,TestAnyX);
180 TESTCASE(72,TestSourceTargetSet);
181 TESTCASE(73,TestGurmukhiDevanagari);
182 TESTCASE(74,TestRuleWhitespace);
183 TESTCASE(75,TestAllCodepoints);
184 TESTCASE(76,TestBoilerplate);
185 TESTCASE(77,TestAlternateSyntax);
186 default: name = ""; break;
187 }
188 }
189
190 static const UVersionInfo ICU_31 = {3,1,0,0};
191 /**
192 * Make sure every system transliterator can be instantiated.
193 *
194 * ALSO test that the result of toRules() for each rule is a valid
195 * rule. Do this here so we don't have to have another test that
196 * instantiates everything as well.
197 */
198 void TransliteratorTest::TestInstantiation() {
199 UErrorCode ec = U_ZERO_ERROR;
200 StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
201 assertSuccess("getAvailableIDs()", ec);
202 assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
203 int32_t n = Transliterator::countAvailableIDs();
204 assertTrue("getAvailableIDs().count()==countAvailableIDs()",
205 avail->count(ec) == n);
206 assertSuccess("count()", ec);
207 UnicodeString name;
208 for (int32_t i=0; i<n; ++i) {
209 const UnicodeString& id = *avail->snext(ec);
210 if (!assertSuccess("snext()", ec) ||
211 !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
212 break;
213 }
214 UnicodeString id2 = Transliterator::getAvailableID(i);
215 if (id.length() < 1) {
216 errln(UnicodeString("FAIL: getAvailableID(") +
217 i + ") returned empty string");
218 continue;
219 }
220 if (id != id2) {
221 errln(UnicodeString("FAIL: getAvailableID(") +
222 i + ") != getAvailableIDs().snext()");
223 continue;
224 }
225 if(id2.indexOf("Thai")>-1 && isICUVersionAtLeast(ICU_31)){
226 continue;
227 }
228 UParseError parseError;
229 UErrorCode status = U_ZERO_ERROR;
230 Transliterator* t = Transliterator::createInstance(id,
231 UTRANS_FORWARD, parseError,status);
232 name.truncate(0);
233 Transliterator::getDisplayName(id, name);
234 if (t == 0) {
235 errln(UnicodeString("FAIL: Couldn't create ") + id +
236 /*", parse error " + parseError.code +*/
237 ", line " + parseError.line +
238 ", offset " + parseError.offset +
239 ", pre-context " + prettify(parseError.preContext, TRUE) +
240 ", post-context " +prettify(parseError.postContext,TRUE) +
241 ", Error: " + u_errorName(status));
242 // When createInstance fails, it deletes the failing
243 // entry from the available ID list. We detect this
244 // here by looking for a change in countAvailableIDs.
245 int32_t nn = Transliterator::countAvailableIDs();
246 if (nn == (n - 1)) {
247 n = nn;
248 --i; // Compensate for deleted entry
249 }
250 } else {
251 logln(UnicodeString("OK: ") + name + " (" + id + ")");
252
253 // Now test toRules
254 UnicodeString rules;
255 t->toRules(rules, TRUE);
256 Transliterator *u = Transliterator::createFromRules("x",
257 rules, UTRANS_FORWARD, parseError,status);
258 if (u == 0) {
259 errln(UnicodeString("FAIL: ") + id +
260 ".createFromRules() => bad rules" +
261 /*", parse error " + parseError.code +*/
262 ", line " + parseError.line +
263 ", offset " + parseError.offset +
264 ", context " + prettify(parseError.preContext, TRUE) +
265 ", rules: " + prettify(rules, TRUE));
266 } else {
267 delete u;
268 }
269 delete t;
270 }
271 }
272 assertTrue("snext()==NULL", avail->snext(ec)==NULL);
273 assertSuccess("snext()", ec);
274 delete avail;
275
276 // Now test the failure path
277 UParseError parseError;
278 UErrorCode status = U_ZERO_ERROR;
279 UnicodeString id("<Not a valid Transliterator ID>");
280 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
281 if (t != 0) {
282 errln("FAIL: " + id + " returned a transliterator");
283 delete t;
284 } else {
285 logln("OK: Bogus ID handled properly");
286 }
287 }
288
289 void TransliteratorTest::TestSimpleRules(void) {
290 /* Example: rules 1. ab>x|y
291 * 2. yc>z
292 *
293 * []|eabcd start - no match, copy e to tranlated buffer
294 * [e]|abcd match rule 1 - copy output & adjust cursor
295 * [ex|y]cd match rule 2 - copy output & adjust cursor
296 * [exz]|d no match, copy d to transliterated buffer
297 * [exzd]| done
298 */
299 expect(UnicodeString("ab>x|y;", "") +
300 "yc>z",
301 "eabcd", "exzd");
302
303 /* Another set of rules:
304 * 1. ab>x|yzacw
305 * 2. za>q
306 * 3. qc>r
307 * 4. cw>n
308 *
309 * []|ab Rule 1
310 * [x|yzacw] No match
311 * [xy|zacw] Rule 2
312 * [xyq|cw] Rule 4
313 * [xyqn]| Done
314 */
315 expect(UnicodeString("ab>x|yzacw;") +
316 "za>q;" +
317 "qc>r;" +
318 "cw>n",
319 "ab", "xyqn");
320
321 /* Test categories
322 */
323 UErrorCode status = U_ZERO_ERROR;
324 RuleBasedTransliterator t(
325 "<ID>",
326 UnicodeString("$dummy=").append((UChar)0xE100) +
327 UnicodeString(";"
328 "$vowel=[aeiouAEIOU];"
329 "$lu=[:Lu:];"
330 "$vowel } $lu > '!';"
331 "$vowel > '&';"
332 "'!' { $lu > '^';"
333 "$lu > '*';"
334 "a > ERROR", ""),
335 status);
336 if (U_FAILURE(status)) {
337 errln("FAIL: RBT constructor failed");
338 return;
339 }
340 expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
341 }
342
343 /**
344 * Test inline set syntax and set variable syntax.
345 */
346 void TransliteratorTest::TestInlineSet(void) {
347 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
348 expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
349
350 expect(UnicodeString(
351 "$digit = [0-9];"
352 "$alpha = [a-zA-Z];"
353 "$alphanumeric = [$digit $alpha];" // ***
354 "$special = [^$alphanumeric];" // ***
355 "$alphanumeric > '-';"
356 "$special > '*';", ""),
357
358 "thx-1138", "---*----");
359 }
360
361 /**
362 * Create some inverses and confirm that they work. We have to be
363 * careful how we do this, since the inverses will not be true
364 * inverses -- we can't throw any random string at the composition
365 * of the transliterators and expect the identity function. F x
366 * F' != I. However, if we are careful about the input, we will
367 * get the expected results.
368 */
369 void TransliteratorTest::TestRuleBasedInverse(void) {
370 UnicodeString RULES =
371 UnicodeString("abc>zyx;") +
372 "ab>yz;" +
373 "bc>zx;" +
374 "ca>xy;" +
375 "a>x;" +
376 "b>y;" +
377 "c>z;" +
378
379 "abc<zyx;" +
380 "ab<yz;" +
381 "bc<zx;" +
382 "ca<xy;" +
383 "a<x;" +
384 "b<y;" +
385 "c<z;" +
386
387 "";
388
389 const char* DATA[] = {
390 // Careful here -- random strings will not work. If we keep
391 // the left side to the domain and the right side to the range
392 // we will be okay though (left, abc; right xyz).
393 "a", "x",
394 "abcacab", "zyxxxyy",
395 "caccb", "xyzzy",
396 };
397
398 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
399
400 UErrorCode status = U_ZERO_ERROR;
401 RuleBasedTransliterator fwd("<ID>", RULES, status);
402 RuleBasedTransliterator rev("<ID>", RULES,
403 UTRANS_REVERSE, status);
404 if (U_FAILURE(status)) {
405 errln("FAIL: RBT constructor failed");
406 return;
407 }
408 for (int32_t i=0; i<DATA_length; i+=2) {
409 expect(fwd, DATA[i], DATA[i+1]);
410 expect(rev, DATA[i+1], DATA[i]);
411 }
412 }
413
414 /**
415 * Basic test of keyboard.
416 */
417 void TransliteratorTest::TestKeyboard(void) {
418 UErrorCode status = U_ZERO_ERROR;
419 RuleBasedTransliterator t("<ID>",
420 UnicodeString("psch>Y;")
421 +"ps>y;"
422 +"ch>x;"
423 +"a>A;",
424 status);
425 if (U_FAILURE(status)) {
426 errln("FAIL: RBT constructor failed");
427 return;
428 }
429 const char* DATA[] = {
430 // insertion, buffer
431 "a", "A",
432 "p", "Ap",
433 "s", "Aps",
434 "c", "Apsc",
435 "a", "AycA",
436 "psch", "AycAY",
437 0, "AycAY", // null means finishKeyboardTransliteration
438 };
439
440 keyboardAux(t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
441 }
442
443 /**
444 * Basic test of keyboard with cursor.
445 */
446 void TransliteratorTest::TestKeyboard2(void) {
447 UErrorCode status = U_ZERO_ERROR;
448 RuleBasedTransliterator t("<ID>",
449 UnicodeString("ych>Y;")
450 +"ps>|y;"
451 +"ch>x;"
452 +"a>A;",
453 status);
454 if (U_FAILURE(status)) {
455 errln("FAIL: RBT constructor failed");
456 return;
457 }
458 const char* DATA[] = {
459 // insertion, buffer
460 "a", "A",
461 "p", "Ap",
462 "s", "Aps", // modified for rollback - "Ay",
463 "c", "Apsc", // modified for rollback - "Ayc",
464 "a", "AycA",
465 "p", "AycAp",
466 "s", "AycAps", // modified for rollback - "AycAy",
467 "c", "AycApsc", // modified for rollback - "AycAyc",
468 "h", "AycAY",
469 0, "AycAY", // null means finishKeyboardTransliteration
470 };
471
472 keyboardAux(t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
473 }
474
475 /**
476 * Test keyboard transliteration with back-replacement.
477 */
478 void TransliteratorTest::TestKeyboard3(void) {
479 // We want th>z but t>y. Furthermore, during keyboard
480 // transliteration we want t>y then yh>z if t, then h are
481 // typed.
482 UnicodeString RULES("t>|y;"
483 "yh>z;");
484
485 const char* DATA[] = {
486 // Column 1: characters to add to buffer (as if typed)
487 // Column 2: expected appearance of buffer after
488 // keyboard xliteration.
489 "a", "a",
490 "b", "ab",
491 "t", "abt", // modified for rollback - "aby",
492 "c", "abyc",
493 "t", "abyct", // modified for rollback - "abycy",
494 "h", "abycz",
495 0, "abycz", // null means finishKeyboardTransliteration
496 };
497
498 UErrorCode status = U_ZERO_ERROR;
499 RuleBasedTransliterator t("<ID>", RULES, status);
500 if (U_FAILURE(status)) {
501 errln("FAIL: RBT constructor failed");
502 return;
503 }
504 keyboardAux(t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
505 }
506
507 void TransliteratorTest::keyboardAux(const Transliterator& t,
508 const char* DATA[], int32_t DATA_length) {
509 UErrorCode status = U_ZERO_ERROR;
510 UTransPosition index={0, 0, 0, 0};
511 UnicodeString s;
512 for (int32_t i=0; i<DATA_length; i+=2) {
513 UnicodeString log;
514 if (DATA[i] != 0) {
515 log = s + " + "
516 + DATA[i]
517 + " -> ";
518 t.transliterate(s, index, DATA[i], status);
519 } else {
520 log = s + " => ";
521 t.finishTransliteration(s, index);
522 }
523 // Show the start index '{' and the cursor '|'
524 UnicodeString a, b, c;
525 s.extractBetween(0, index.contextStart, a);
526 s.extractBetween(index.contextStart, index.start, b);
527 s.extractBetween(index.start, s.length(), c);
528 log.append(a).
529 append((UChar)LEFT_BRACE).
530 append(b).
531 append((UChar)PIPE).
532 append(c);
533 if (s == DATA[i+1] && U_SUCCESS(status)) {
534 logln(log);
535 } else {
536 errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
537 }
538 }
539 }
540
541 void TransliteratorTest::TestArabic(void) {
542 // Test disabled for 2.0 until new Arabic transliterator can be written.
543 // /*
544 // const char* DATA[] = {
545 // "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
546 // "\u0627\u0644\u0644\u063a\u0629\u0020"+
547 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
548 // "\u0628\u0628\u0646\u0638\u0645\u0020"+
549 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
550 // "\u062c\u0645\u064a\u0644\u0629",
551 // };
552 // */
553 //
554 // UChar ar_raw[] = {
555 // 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
556 // 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
557 // 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
558 // 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
559 // 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
560 // 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
561 // };
562 // UnicodeString ar(ar_raw);
563 // UErrorCode status=U_ZERO_ERROR;
564 // UParseError parseError;
565 // Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
566 // if (t == 0) {
567 // errln("FAIL: createInstance failed");
568 // return;
569 // }
570 // expect(*t, "Arabic", ar);
571 // delete t;
572 }
573
574 /**
575 * Compose the Kana transliterator forward and reverse and try
576 * some strings that should come out unchanged.
577 */
578 void TransliteratorTest::TestCompoundKana(void) {
579 UParseError parseError;
580 UErrorCode status = U_ZERO_ERROR;
581 Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
582 if (t == 0) {
583 errln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed");
584 } else {
585 expect(*t, "aaaaa", "aaaaa");
586 delete t;
587 }
588 }
589
590 /**
591 * Compose the hex transliterators forward and reverse.
592 */
593 void TransliteratorTest::TestCompoundHex(void) {
594 UParseError parseError;
595 UErrorCode status = U_ZERO_ERROR;
596 Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
597 Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
598 Transliterator* transab[] = { a, b };
599 Transliterator* transba[] = { b, a };
600 if (a == 0 || b == 0) {
601 errln("FAIL: construction failed");
602 delete a;
603 delete b;
604 return;
605 }
606 // Do some basic tests of a
607 expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
608 // Do some basic tests of b
609 expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
610
611 Transliterator* ab = new CompoundTransliterator(transab, 2);
612 UnicodeString s("abcde", "");
613 expect(*ab, s, s);
614
615 UnicodeString str(s);
616 a->transliterate(str);
617 Transliterator* ba = new CompoundTransliterator(transba, 2);
618 expect(*ba, str, str);
619
620 delete ab;
621 delete ba;
622 delete a;
623 delete b;
624 }
625
626 int gTestFilterClassID = 0;
627 /**
628 * Used by TestFiltering().
629 */
630 class TestFilter : public UnicodeFilter {
631 virtual UnicodeFunctor* clone() const {
632 return new TestFilter(*this);
633 }
634 virtual UBool contains(UChar32 c) const {
635 return c != (UChar)0x0063 /*c*/;
636 }
637 // Stubs
638 virtual UnicodeString& toPattern(UnicodeString& result,
639 UBool /*escapeUnprintable*/) const {
640 return result;
641 }
642 virtual UBool matchesIndexValue(uint8_t /*v*/) const {
643 return FALSE;
644 }
645 virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
646 public:
647 UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
648 };
649
650 /**
651 * Do some basic tests of filtering.
652 */
653 void TransliteratorTest::TestFiltering(void) {
654 UParseError parseError;
655 UErrorCode status = U_ZERO_ERROR;
656 Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
657 if (hex == 0) {
658 errln("FAIL: createInstance(Any-Hex) failed");
659 return;
660 }
661 hex->adoptFilter(new TestFilter());
662 UnicodeString s("abcde");
663 hex->transliterate(s);
664 UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
665 if (s == exp) {
666 logln(UnicodeString("Ok: \"") + exp + "\"");
667 } else {
668 logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
669 }
670 delete hex;
671 }
672
673 /**
674 * Test anchors
675 */
676 void TransliteratorTest::TestAnchors(void) {
677 expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""),
678 "aaa",
679 "012");
680 expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
681 "aaa",
682 "012");
683 expect(UnicodeString("^ab > 01 ;"
684 " ab > |8 ;"
685 " b > k ;"
686 " 8x$ > 45 ;"
687 " 8x > 77 ;", ""),
688
689 "ababbabxabx",
690 "018k7745");
691 expect(UnicodeString("$s = [z$] ;"
692 "$s{ab > 01 ;"
693 " ab > |8 ;"
694 " b > k ;"
695 " 8x}$s > 45 ;"
696 " 8x > 77 ;", ""),
697
698 "abzababbabxzabxabx",
699 "01z018k45z01x45");
700 }
701
702 /**
703 * Test pattern quoting and escape mechanisms.
704 */
705 void TransliteratorTest::TestPatternQuoting(void) {
706 // Array of 3n items
707 // Each item is <rules>, <input>, <expected output>
708 const UnicodeString DATA[] = {
709 UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
710 UnicodeString(UChar(0x4E01)),
711 "[male adult]"
712 };
713
714 for (int32_t i=0; i<3; i+=3) {
715 logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
716 UErrorCode status = U_ZERO_ERROR;
717 RuleBasedTransliterator t("<ID>", DATA[i], status);
718 if (U_FAILURE(status)) {
719 errln("RBT constructor failed");
720 } else {
721 expect(t, DATA[i+1], DATA[i+2]);
722 }
723 }
724 }
725
726 /**
727 * Regression test for bugs found in Greek transliteration.
728 */
729 void TransliteratorTest::TestJ277(void) {
730 UErrorCode status = U_ZERO_ERROR;
731 UParseError parseError;
732 Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
733 if (gl == NULL) {
734 errln("FAIL: createInstance(Greek-Latin) returned NULL");
735 return;
736 }
737
738 UChar sigma = 0x3C3;
739 UChar upsilon = 0x3C5;
740 UChar nu = 0x3BD;
741 // UChar PHI = 0x3A6;
742 UChar alpha = 0x3B1;
743 // UChar omega = 0x3C9;
744 // UChar omicron = 0x3BF;
745 // UChar epsilon = 0x3B5;
746
747 // sigma upsilon nu -> syn
748 UnicodeString syn;
749 syn.append(sigma).append(upsilon).append(nu);
750 expect(*gl, syn, "syn");
751
752 // sigma alpha upsilon nu -> saun
753 UnicodeString sayn;
754 sayn.append(sigma).append(alpha).append(upsilon).append(nu);
755 expect(*gl, sayn, "saun");
756
757 // Again, using a smaller rule set
758 UnicodeString rules(
759 "$alpha = \\u03B1;"
760 "$nu = \\u03BD;"
761 "$sigma = \\u03C3;"
762 "$ypsilon = \\u03C5;"
763 "$vowel = [aeiouAEIOU$alpha$ypsilon];"
764 "s <> $sigma;"
765 "a <> $alpha;"
766 "u <> $vowel { $ypsilon;"
767 "y <> $ypsilon;"
768 "n <> $nu;",
769 "");
770 RuleBasedTransliterator mini("mini", rules, UTRANS_REVERSE, status);
771 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
772 expect(mini, syn, "syn");
773 expect(mini, sayn, "saun");
774
775 #if !UCONFIG_NO_FORMATTING
776 // Transliterate the Greek locale data
777 Locale el("el");
778 DateFormatSymbols syms(el, status);
779 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
780 int32_t i, count;
781 const UnicodeString* data = syms.getMonths(count);
782 for (i=0; i<count; ++i) {
783 if (data[i].length() == 0) {
784 continue;
785 }
786 UnicodeString out(data[i]);
787 gl->transliterate(out);
788 UBool ok = TRUE;
789 if (data[i].length() >= 2 && out.length() >= 2 &&
790 u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
791 if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
792 ok = FALSE;
793 }
794 }
795 if (ok) {
796 logln(prettify(data[i] + " -> " + out));
797 } else {
798 errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
799 }
800 }
801 #endif
802
803 delete gl;
804 }
805
806 /**
807 * Prefix, suffix support in hex transliterators
808 */
809 void TransliteratorTest::TestJ243(void) {
810 UErrorCode ec = U_ZERO_ERROR;
811
812 // Test default Hex-Any, which should handle
813 // \u, \U, u+, and U+
814 Transliterator *hex =
815 Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
816 if (assertSuccess("getInstance", ec)) {
817 expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
818 }
819 delete hex;
820
821 // // Try a custom Hex-Unicode
822 // // \uXXXX and &#xXXXX;
823 // ec = U_ZERO_ERROR;
824 // HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
825 // expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
826 // "abcd5fx012&#x00033;");
827 // // Try custom Any-Hex (default is tested elsewhere)
828 // ec = U_ZERO_ERROR;
829 // UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
830 // expect(hex3, "012", "&#x30;&#x31;&#x32;");
831 }
832
833 /**
834 * Parsers need better syntax error messages.
835 */
836 void TransliteratorTest::TestJ329(void) {
837
838 struct { UBool containsErrors; const char* rule; } DATA[] = {
839 { FALSE, "a > b; c > d" },
840 { TRUE, "a > b; no operator; c > d" },
841 };
842 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
843
844 for (int32_t i=0; i<DATA_length; ++i) {
845 UErrorCode status = U_ZERO_ERROR;
846 UParseError parseError;
847 RuleBasedTransliterator rbt("<ID>",
848 DATA[i].rule,
849 UTRANS_FORWARD,
850 0,
851 parseError,
852 status);
853 UBool gotError = U_FAILURE(status);
854 UnicodeString desc(DATA[i].rule);
855 desc.append(gotError ? " -> error" : " -> no error");
856 if (gotError) {
857 desc = desc + ", ParseError code=" + u_errorName(status) +
858 " line=" + parseError.line +
859 " offset=" + parseError.offset +
860 " context=" + parseError.preContext;
861 }
862 if (gotError == DATA[i].containsErrors) {
863 logln(UnicodeString("Ok: ") + desc);
864 } else {
865 errln(UnicodeString("FAIL: ") + desc);
866 }
867 }
868 }
869
870 /**
871 * Test segments and segment references.
872 */
873 void TransliteratorTest::TestSegments(void) {
874 // Array of 3n items
875 // Each item is <rules>, <input>, <expected output>
876 UnicodeString DATA[] = {
877 "([a-z]) '.' ([0-9]) > $2 '-' $1",
878 "abc.123.xyz.456",
879 "ab1-c23.xy4-z56",
880
881 // nested
882 "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
883 "a1 b2",
884 "a1.a.1 b2.b.2",
885 };
886 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
887
888 for (int32_t i=0; i<DATA_length; i+=3) {
889 logln("Pattern: " + prettify(DATA[i]));
890 UErrorCode status = U_ZERO_ERROR;
891 RuleBasedTransliterator t("ID", DATA[i], status);
892 if (U_FAILURE(status)) {
893 errln("FAIL: RBT constructor");
894 } else {
895 expect(t, DATA[i+1], DATA[i+2]);
896 }
897 }
898 }
899
900 /**
901 * Test cursor positioning outside of the key
902 */
903 void TransliteratorTest::TestCursorOffset(void) {
904 // Array of 3n items
905 // Each item is <rules>, <input>, <expected output>
906 UnicodeString DATA[] = {
907 "pre {alpha} post > | @ ALPHA ;"
908 "eALPHA > beta ;"
909 "pre {beta} post > BETA @@ | ;"
910 "post > xyz",
911
912 "prealphapost prebetapost",
913
914 "prbetaxyz preBETApost",
915 };
916 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
917
918 for (int32_t i=0; i<DATA_length; i+=3) {
919 logln("Pattern: " + prettify(DATA[i]));
920 UErrorCode status = U_ZERO_ERROR;
921 RuleBasedTransliterator t("<ID>", DATA[i], status);
922 if (U_FAILURE(status)) {
923 errln("FAIL: RBT constructor");
924 } else {
925 expect(t, DATA[i+1], DATA[i+2]);
926 }
927 }
928 }
929
930 /**
931 * Test zero length and > 1 char length variable values. Test
932 * use of variable refs in UnicodeSets.
933 */
934 void TransliteratorTest::TestArbitraryVariableValues(void) {
935 // Array of 3n items
936 // Each item is <rules>, <input>, <expected output>
937 UnicodeString DATA[] = {
938 "$abe = ab;"
939 "$pat = x[yY]z;"
940 "$ll = 'a-z';"
941 "$llZ = [$ll];"
942 "$llY = [$ll$pat];"
943 "$emp = ;"
944
945 "$abe > ABE;"
946 "$pat > END;"
947 "$llZ > 1;"
948 "$llY > 2;"
949 "7$emp 8 > 9;"
950 "",
951
952 "ab xYzxyz stY78",
953 "ABE ENDEND 1129",
954 };
955 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
956
957 for (int32_t i=0; i<DATA_length; i+=3) {
958 logln("Pattern: " + prettify(DATA[i]));
959 UErrorCode status = U_ZERO_ERROR;
960 RuleBasedTransliterator t("<ID>", DATA[i], status);
961 if (U_FAILURE(status)) {
962 errln("FAIL: RBT constructor");
963 } else {
964 expect(t, DATA[i+1], DATA[i+2]);
965 }
966 }
967 }
968
969 /**
970 * Confirm that the contextStart, contextLimit, start, and limit
971 * behave correctly. J474.
972 */
973 void TransliteratorTest::TestPositionHandling(void) {
974 // Array of 3n items
975 // Each item is <rules>, <input>, <expected output>
976 const char* DATA[] = {
977 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
978 "xtat txtb", // pos 0,9,0,9
979 "xTTaSS TTxUUb",
980
981 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
982 "xtat txtb", // pos 2,9,3,8
983 "xtaSS TTxUUb",
984
985 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
986 "xtat txtb", // pos 3,8,3,8
987 "xtaTT TTxTTb",
988 };
989
990 // Array of 4n positions -- these go with the DATA array
991 // They are: contextStart, contextLimit, start, limit
992 int32_t POS[] = {
993 0, 9, 0, 9,
994 2, 9, 3, 8,
995 3, 8, 3, 8,
996 };
997
998 int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3;
999 for (int32_t i=0; i<n; i++) {
1000 UErrorCode status = U_ZERO_ERROR;
1001 Transliterator *t = new RuleBasedTransliterator("<ID>",
1002 DATA[3*i], status);
1003 if (U_FAILURE(status)) {
1004 delete t;
1005 errln("FAIL: RBT constructor");
1006 return;
1007 }
1008 UTransPosition pos;
1009 pos.contextStart= POS[4*i];
1010 pos.contextLimit = POS[4*i+1];
1011 pos.start = POS[4*i+2];
1012 pos.limit = POS[4*i+3];
1013 UnicodeString rsource(DATA[3*i+1]);
1014 t->transliterate(rsource, pos, status);
1015 if (U_FAILURE(status)) {
1016 delete t;
1017 errln("FAIL: transliterate");
1018 return;
1019 }
1020 t->finishTransliteration(rsource, pos);
1021 expectAux(DATA[3*i],
1022 DATA[3*i+1],
1023 rsource,
1024 DATA[3*i+2]);
1025 delete t;
1026 }
1027 }
1028
1029 /**
1030 * Test the Hiragana-Katakana transliterator.
1031 */
1032 void TransliteratorTest::TestHiraganaKatakana(void) {
1033 UParseError parseError;
1034 UErrorCode status = U_ZERO_ERROR;
1035 Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1036 Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1037 if (hk == 0 || kh == 0) {
1038 errln("FAIL: createInstance failed");
1039 delete hk;
1040 delete kh;
1041 return;
1042 }
1043
1044 // Array of 3n items
1045 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1046 const char* DATA[] = {
1047 "both",
1048 "\\u3042\\u3090\\u3099\\u3092\\u3050",
1049 "\\u30A2\\u30F8\\u30F2\\u30B0",
1050
1051 "kh",
1052 "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1053 "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1054 };
1055 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1056
1057 for (int32_t i=0; i<DATA_length; i+=3) {
1058 UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1059 UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1060 switch (*DATA[i]) {
1061 case 0x68: //'h': // Hiragana-Katakana
1062 expect(*hk, h, k);
1063 break;
1064 case 0x6B: //'k': // Katakana-Hiragana
1065 expect(*kh, k, h);
1066 break;
1067 case 0x62: //'b': // both
1068 expect(*hk, h, k);
1069 expect(*kh, k, h);
1070 break;
1071 }
1072 }
1073 delete hk;
1074 delete kh;
1075 }
1076
1077 /**
1078 * Test cloning / copy constructor of RBT.
1079 */
1080 void TransliteratorTest::TestCopyJ476(void) {
1081 // The real test here is what happens when the destructors are
1082 // called. So we let one object get destructed, and check to
1083 // see that its copy still works.
1084 RuleBasedTransliterator *t2 = 0;
1085 {
1086 UErrorCode status = U_ZERO_ERROR;
1087 RuleBasedTransliterator t1("t1", "a>A;b>B;", status);
1088 if (U_FAILURE(status)) {
1089 errln("FAIL: RBT constructor");
1090 return;
1091 }
1092 t2 = new RuleBasedTransliterator(t1);
1093 expect(t1, "abc", "ABc");
1094 }
1095 expect(*t2, "abc", "ABc");
1096 delete t2;
1097 }
1098
1099 /**
1100 * Test inter-Indic transliterators. These are composed.
1101 * ICU4C Jitterbug 483.
1102 */
1103 void TransliteratorTest::TestInterIndic(void) {
1104 UnicodeString ID("Devanagari-Gujarati", "");
1105 UErrorCode status = U_ZERO_ERROR;
1106 UParseError parseError;
1107 Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1108 if (dg == 0) {
1109 errln("FAIL: createInstance(" + ID + ") returned NULL");
1110 return;
1111 }
1112 UnicodeString id = dg->getID();
1113 if (id != ID) {
1114 errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1115 }
1116 UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1117 UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1118 expect(*dg, dev, guj);
1119 delete dg;
1120 }
1121
1122 /**
1123 * Test filter syntax in IDs. (J918)
1124 */
1125 void TransliteratorTest::TestFilterIDs(void) {
1126 // Array of 3n strings:
1127 // <id>, <inverse id>, <input>, <expected output>
1128 const char* DATA[] = {
1129 "[aeiou]Any-Hex", // ID
1130 "[aeiou]Hex-Any", // expected inverse ID
1131 "quizzical", // src
1132 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1133
1134 "[aeiou]Any-Hex;[^5]Hex-Any",
1135 "[^5]Any-Hex;[aeiou]Hex-Any",
1136 "quizzical",
1137 "q\\u0075izzical",
1138
1139 "[abc]Null",
1140 "[abc]Null",
1141 "xyz",
1142 "xyz",
1143 };
1144 enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) };
1145
1146 for (int i=0; i<DATA_length; i+=4) {
1147 UnicodeString ID(DATA[i], "");
1148 UnicodeString uID(DATA[i+1], "");
1149 UnicodeString data2(DATA[i+2], "");
1150 UnicodeString data3(DATA[i+3], "");
1151 UParseError parseError;
1152 UErrorCode status = U_ZERO_ERROR;
1153 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1154 if (t == 0) {
1155 errln("FAIL: createInstance(" + ID + ") returned NULL");
1156 return;
1157 }
1158 expect(*t, data2, data3);
1159
1160 // Check the ID
1161 if (ID != t->getID()) {
1162 errln("FAIL: createInstance(" + ID + ").getID() => " +
1163 t->getID());
1164 }
1165
1166 // Check the inverse
1167 Transliterator *u = t->createInverse(status);
1168 if (u == 0) {
1169 errln("FAIL: " + ID + ".createInverse() returned NULL");
1170 } else if (u->getID() != uID) {
1171 errln("FAIL: " + ID + ".createInverse().getID() => " +
1172 u->getID() + ", expected " + uID);
1173 }
1174
1175 delete t;
1176 delete u;
1177 }
1178 }
1179
1180 /**
1181 * Test the case mapping transliterators.
1182 */
1183 void TransliteratorTest::TestCaseMap(void) {
1184 UParseError parseError;
1185 UErrorCode status = U_ZERO_ERROR;
1186 Transliterator* toUpper =
1187 Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1188 Transliterator* toLower =
1189 Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1190 Transliterator* toTitle =
1191 Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1192 if (toUpper==0 || toLower==0 || toTitle==0) {
1193 errln("FAIL: createInstance returned NULL");
1194 delete toUpper;
1195 delete toLower;
1196 delete toTitle;
1197 return;
1198 }
1199
1200 expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1201 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1202 expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1203 "the quick brown foX jumped over the lazY dogs.");
1204 expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1205 "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1206
1207 delete toUpper;
1208 delete toLower;
1209 delete toTitle;
1210 }
1211
1212 /**
1213 * Test the name mapping transliterators.
1214 */
1215 void TransliteratorTest::TestNameMap(void) {
1216 UParseError parseError;
1217 UErrorCode status = U_ZERO_ERROR;
1218 Transliterator* uni2name =
1219 Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1220 Transliterator* name2uni =
1221 Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1222 if (uni2name==0 || name2uni==0) {
1223 errln("FAIL: createInstance returned NULL");
1224 delete uni2name;
1225 delete name2uni;
1226 return;
1227 }
1228
1229 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1230 expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1231 CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{END OF TRANSMISSION}\\\\N{CHARACTER TABULATION}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1232 expect(*name2uni, "{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{",
1233 CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1234
1235 delete uni2name;
1236 delete name2uni;
1237
1238 // round trip
1239 Transliterator* t =
1240 Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1241 if (t==0) {
1242 errln("FAIL: createInstance returned NULL");
1243 delete t;
1244 return;
1245 }
1246
1247 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1248 UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1249 expect(*t, s, s);
1250 delete t;
1251 }
1252
1253 /**
1254 * Test liberalized ID syntax. 1006c
1255 */
1256 void TransliteratorTest::TestLiberalizedID(void) {
1257 // Some test cases have an expected getID() value of NULL. This
1258 // means I have disabled the test case for now. This stuff is
1259 // still under development, and I haven't decided whether to make
1260 // getID() return canonical case yet. It will all get rewritten
1261 // with the move to Source-Target/Variant IDs anyway. [aliu]
1262 const char* DATA[] = {
1263 "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1264 " Null ", "Null", "whitespace",
1265 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",
1266 " null ; latin-greek ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1267 };
1268 const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
1269 UParseError parseError;
1270 UErrorCode status= U_ZERO_ERROR;
1271 for (int32_t i=0; i<DATA_length; i+=3) {
1272 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1273 if (t == 0) {
1274 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1275 " cannot create ID \"" + DATA[i] + "\"");
1276 } else {
1277 UnicodeString exp;
1278 if (DATA[i+1]) {
1279 exp = UnicodeString(DATA[i+1], "");
1280 }
1281 // Don't worry about getID() if the expected char*
1282 // is NULL -- see above.
1283 if (exp.length() == 0 || exp == t->getID()) {
1284 logln(UnicodeString("Ok: ") + DATA[i+2] +
1285 " create ID \"" + DATA[i] + "\" => \"" +
1286 exp + "\"");
1287 } else {
1288 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1289 " create ID \"" + DATA[i] + "\" => \"" +
1290 t->getID() + "\", exp \"" + exp + "\"");
1291 }
1292 delete t;
1293 }
1294 }
1295 }
1296
1297 /* test for Jitterbug 912 */
1298 void TransliteratorTest::TestCreateInstance(){
1299 const char* FORWARD = "F";
1300 const char* REVERSE = "R";
1301 const char* DATA[] = {
1302 // Column 1: id
1303 // Column 2: direction
1304 // Column 3: expected ID, or "" if expect failure
1305 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1306
1307 // JB#2689: bad compound causes crash
1308 "InvalidSource-InvalidTarget", FORWARD, "",
1309 "InvalidSource-InvalidTarget", REVERSE, "",
1310 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1311 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1312 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1313 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1314
1315 NULL
1316 };
1317
1318 for (int32_t i=0; DATA[i]; i+=3) {
1319 UParseError err;
1320 UErrorCode ec = U_ZERO_ERROR;
1321 UnicodeString id(DATA[i]);
1322 UTransDirection dir = (DATA[i+1]==FORWARD)?
1323 UTRANS_FORWARD:UTRANS_REVERSE;
1324 UnicodeString expID(DATA[i+2]);
1325 Transliterator* t =
1326 Transliterator::createInstance(id,dir,err,ec);
1327 UnicodeString newID;
1328 if (t) {
1329 newID = t->getID();
1330 }
1331 UBool ok = (newID == expID);
1332 if (!t) {
1333 newID = u_errorName(ec);
1334 }
1335 if (ok) {
1336 logln((UnicodeString)"Ok: createInstance(" +
1337 id + "," + DATA[i+1] + ") => " + newID);
1338 } else {
1339 errln((UnicodeString)"FAIL: createInstance(" +
1340 id + "," + DATA[i+1] + ") => " + newID +
1341 ", expected " + expID);
1342 }
1343 delete t;
1344 }
1345 }
1346
1347 /**
1348 * Test the normalization transliterator.
1349 */
1350 void TransliteratorTest::TestNormalizationTransliterator() {
1351 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1352 // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1353 const char* CANON[] = {
1354 // Input Decomposed Composed
1355 "cat", "cat", "cat" ,
1356 "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" ,
1357
1358 "\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above
1359 "D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above
1360
1361 "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above
1362 "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below
1363 "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above
1364
1365 "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1366 "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1367
1368 "\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave
1369 "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave
1370 "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron
1371
1372 "\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign
1373 "\\u00c5", "A\\u030a", "\\u00c5" , // A-ring
1374
1375 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0
1376 "\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0
1377
1378 "Henry IV", "Henry IV", "Henry IV" ,
1379 "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" ,
1380
1381 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1382 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1383 "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten
1384 "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten
1385 "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten
1386
1387 "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" ,
1388 0 // end
1389 };
1390
1391 const char* COMPAT[] = {
1392 // Input Decomposed Composed
1393 "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed
1394
1395 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0
1396 "\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i
1397
1398 "Henry IV", "Henry IV", "Henry IV" ,
1399 "Henry \\u2163", "Henry IV", "Henry IV" ,
1400
1401 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1402 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1403
1404 "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten
1405 0 // end
1406 };
1407
1408 int32_t i;
1409 UParseError parseError;
1410 UErrorCode status = U_ZERO_ERROR;
1411 Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1412 Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1413 if (!NFD || !NFC) {
1414 errln("FAIL: createInstance failed");
1415 delete NFD;
1416 delete NFC;
1417 return;
1418 }
1419 for (i=0; CANON[i]; i+=3) {
1420 UnicodeString in = CharsToUnicodeString(CANON[i]);
1421 UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1422 UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1423 expect(*NFD, in, expd);
1424 expect(*NFC, in, expc);
1425 }
1426 delete NFD;
1427 delete NFC;
1428
1429 Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1430 Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1431 if (!NFKD || !NFKC) {
1432 errln("FAIL: createInstance failed");
1433 delete NFKD;
1434 delete NFKC;
1435 return;
1436 }
1437 for (i=0; COMPAT[i]; i+=3) {
1438 UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1439 UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1440 UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1441 expect(*NFKD, in, expkd);
1442 expect(*NFKC, in, expkc);
1443 }
1444 delete NFKD;
1445 delete NFKC;
1446
1447 UParseError pe;
1448 status = U_ZERO_ERROR;
1449 Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1450 UTRANS_FORWARD,
1451 pe, status);
1452 if (t == 0) {
1453 errln("FAIL: createInstance failed");
1454 }
1455 expect(*t, CharsToUnicodeString("\\u010dx"),
1456 CharsToUnicodeString("c\\u030C"));
1457 delete t;
1458 }
1459
1460 /**
1461 * Test compound RBT rules.
1462 */
1463 void TransliteratorTest::TestCompoundRBT(void) {
1464 // Careful with spacing and ';' here: Phrase this exactly
1465 // as toRules() is going to return it. If toRules() changes
1466 // with regard to spacing or ';', then adjust this string.
1467 UnicodeString rule("::Hex-Any;\n"
1468 "::Any-Lower;\n"
1469 "a > '.A.';\n"
1470 "b > '.B.';\n"
1471 "::[^t]Any-Upper;", "");
1472 UParseError parseError;
1473 UErrorCode status = U_ZERO_ERROR;
1474 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1475 if (t == 0) {
1476 errln("FAIL: createFromRules failed");
1477 return;
1478 }
1479 expect(*t, "\\u0043at in the hat, bat on the mat",
1480 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1481 UnicodeString r;
1482 t->toRules(r, TRUE);
1483 if (r == rule) {
1484 logln((UnicodeString)"OK: toRules() => " + r);
1485 } else {
1486 errln((UnicodeString)"FAIL: toRules() => " + r +
1487 ", expected " + rule);
1488 }
1489 delete t;
1490
1491 // Now test toRules
1492 t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1493 if (t == 0) {
1494 errln("FAIL: createInstance failed");
1495 return;
1496 }
1497 UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1498 t->toRules(r, TRUE);
1499 if (r != exp) {
1500 errln((UnicodeString)"FAIL: toRules() => " + r +
1501 ", expected " + exp);
1502 } else {
1503 logln((UnicodeString)"OK: toRules() => " + r);
1504 }
1505 delete t;
1506
1507 // Round trip the result of toRules
1508 t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1509 if (t == 0) {
1510 errln("FAIL: createFromRules #2 failed");
1511 return;
1512 } else {
1513 logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1514 }
1515
1516 // Test toRules again
1517 t->toRules(r, TRUE);
1518 if (r != exp) {
1519 errln((UnicodeString)"FAIL: toRules() => " + r +
1520 ", expected " + exp);
1521 } else {
1522 logln((UnicodeString)"OK: toRules() => " + r);
1523 }
1524
1525 delete t;
1526
1527 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform
1528 // to what the regenerated ID will look like.
1529 UnicodeString id("Upper(Lower);(NFKC)", "");
1530 t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1531 if (t == 0) {
1532 errln("FAIL: createInstance #2 failed");
1533 return;
1534 }
1535 if (t->getID() == id) {
1536 logln((UnicodeString)"OK: created " + id);
1537 } else {
1538 errln((UnicodeString)"FAIL: createInstance(" + id +
1539 ").getID() => " + t->getID());
1540 }
1541
1542 Transliterator *u = t->createInverse(status);
1543 if (u == 0) {
1544 errln("FAIL: createInverse failed");
1545 delete t;
1546 return;
1547 }
1548 exp = "NFKC();Lower(Upper)";
1549 if (u->getID() == exp) {
1550 logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1551 u->getID());
1552 } else {
1553 errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1554 u->getID());
1555 }
1556 delete t;
1557 delete u;
1558 }
1559
1560 /**
1561 * Compound filter semantics were orginially not implemented
1562 * correctly. Originally, each component filter f(i) is replaced by
1563 * f'(i) = f(i) && g, where g is the filter for the compound
1564 * transliterator.
1565 *
1566 * From Mark:
1567 *
1568 * Suppose and I have a transliterator X. Internally X is
1569 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1570 *
1571 * The compound should convert all greek characters (through latin) to
1572 * cyrillic, then lowercase the result. The filter should say "don't
1573 * touch 'A' in the original". But because an intermediate result
1574 * happens to go through "A", the Greek Alpha gets hung up.
1575 */
1576 void TransliteratorTest::TestCompoundFilter(void) {
1577 UParseError parseError;
1578 UErrorCode status = U_ZERO_ERROR;
1579 Transliterator *t = Transliterator::createInstance
1580 ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1581 if (t == 0) {
1582 errln("FAIL: createInstance failed");
1583 return;
1584 }
1585 t->adoptFilter(new UnicodeSet("[^A]", status));
1586 if (U_FAILURE(status)) {
1587 errln("FAIL: UnicodeSet ct failed");
1588 delete t;
1589 return;
1590 }
1591
1592 // Only the 'A' at index 1 should remain unchanged
1593 expect(*t,
1594 CharsToUnicodeString("BA\\u039A\\u0391"),
1595 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1596 delete t;
1597 }
1598
1599 void TransliteratorTest::TestRemove(void) {
1600 UParseError parseError;
1601 UErrorCode status = U_ZERO_ERROR;
1602 Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1603 if (t == 0) {
1604 errln("FAIL: createInstance failed");
1605 return;
1606 }
1607
1608 expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1609 delete t;
1610 }
1611
1612 void TransliteratorTest::TestToRules(void) {
1613 const char* RBT = "rbt";
1614 const char* SET = "set";
1615 static const char* DATA[] = {
1616 RBT,
1617 "$a=\\u4E61; [$a] > A;",
1618 "[\\u4E61] > A;",
1619
1620 RBT,
1621 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1622 "[[:Zs:][:Zl:]]{a} > A;",
1623
1624 SET,
1625 "[[:Zs:][:Zl:]]",
1626 "[[:Zs:][:Zl:]]",
1627
1628 SET,
1629 "[:Ps:]",
1630 "[:Ps:]",
1631
1632 SET,
1633 "[:L:]",
1634 "[:L:]",
1635
1636 SET,
1637 "[[:L:]-[A]]",
1638 "[[:L:]-[A]]",
1639
1640 SET,
1641 "[~[:Lu:][:Ll:]]",
1642 "[~[:Lu:][:Ll:]]",
1643
1644 SET,
1645 "[~[a-z]]",
1646 "[~[a-z]]",
1647
1648 RBT,
1649 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1650 "[^[:Zs:]]{a} > A;",
1651
1652 RBT,
1653 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1654 "[[a-z]-[:Zs:]]{a} > A;",
1655
1656 RBT,
1657 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1658 "[[:Zs:]&[a-z]]{a} > A;",
1659
1660 RBT,
1661 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1662 "[x[:Zs:]]{a} > A;",
1663
1664 RBT,
1665 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1666 "$macron = \\u0304 ;"
1667 "$evowel = [aeiouyAEIOUY] ;"
1668 "$iotasub = \\u0345 ;"
1669 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1670 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1671
1672 RBT,
1673 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1674 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1675 };
1676 static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1677
1678 for (int32_t d=0; d < DATA_length; d+=3) {
1679 if (DATA[d] == RBT) {
1680 // Transliterator test
1681 UParseError parseError;
1682 UErrorCode status = U_ZERO_ERROR;
1683 Transliterator *t = Transliterator::createFromRules("ID",
1684 DATA[d+1], UTRANS_FORWARD, parseError, status);
1685 if (t == 0) {
1686 errln("FAIL: createFromRules failed");
1687 return;
1688 }
1689 UnicodeString rules, escapedRules;
1690 t->toRules(rules, FALSE);
1691 t->toRules(escapedRules, TRUE);
1692 UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1693 UnicodeString expEscapedRules(DATA[d+2]);
1694 if (rules == expRules) {
1695 logln((UnicodeString)"Ok: " + DATA[d+1] +
1696 " => " + rules);
1697 } else {
1698 errln((UnicodeString)"FAIL: " + DATA[d+1] +
1699 " => " + rules + ", exp " + expRules);
1700 }
1701 if (escapedRules == expEscapedRules) {
1702 logln((UnicodeString)"Ok: " + DATA[d+1] +
1703 " => " + escapedRules);
1704 } else {
1705 errln((UnicodeString)"FAIL: " + DATA[d+1] +
1706 " => " + escapedRules + ", exp " + expEscapedRules);
1707 }
1708 delete t;
1709
1710 } else {
1711 // UnicodeSet test
1712 UErrorCode status = U_ZERO_ERROR;
1713 UnicodeString pat(DATA[d+1]);
1714 UnicodeString expToPat(DATA[d+2]);
1715 UnicodeSet set(pat, status);
1716 if (U_FAILURE(status)) {
1717 errln("FAIL: UnicodeSet ct failed");
1718 return;
1719 }
1720 // Adjust spacing etc. as necessary.
1721 UnicodeString toPat;
1722 set.toPattern(toPat);
1723 if (expToPat == toPat) {
1724 logln((UnicodeString)"Ok: " + pat +
1725 " => " + toPat);
1726 } else {
1727 errln((UnicodeString)"FAIL: " + pat +
1728 " => " + prettify(toPat, TRUE) +
1729 ", exp " + prettify(pat, TRUE));
1730 }
1731 }
1732 }
1733 }
1734
1735 void TransliteratorTest::TestContext() {
1736 UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1737 expect("de > x; {d}e > y;",
1738 "de",
1739 "ye",
1740 &pos);
1741
1742 expect("ab{c} > z;",
1743 "xadabdabcy",
1744 "xadabdabzy");
1745 }
1746
1747 void TransliteratorTest::TestSupplemental() {
1748
1749 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1750 "a > $a; $s > i;"),
1751 CharsToUnicodeString("ab\\U0001030Fx"),
1752 CharsToUnicodeString("\\U00010300bix"));
1753
1754 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1755 "$b=[A-Z\\U00010400-\\U0001044D];"
1756 "($a)($b) > $2 $1;"),
1757 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1758 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1759
1760 // k|ax\\U00010300xm
1761
1762 // k|a\\U00010400\\U00010300xm
1763 // ky|\\U00010400\\U00010300xm
1764 // ky\\U00010400|\\U00010300xm
1765
1766 // ky\\U00010400|\\U00010300\\U00010400m
1767 // ky\\U00010400y|\\U00010400m
1768 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1769 "$a {x} > | @ \\U00010400;"
1770 "{$a} [^\\u0000-\\uFFFF] > y;"),
1771 CharsToUnicodeString("kax\\U00010300xm"),
1772 CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1773
1774 expectT("Any-Name",
1775 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1776 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}");
1777
1778 expectT("Any-Hex/Unicode",
1779 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1780 "U+10330U+10FF00U+E0061U+00A0");
1781
1782 expectT("Any-Hex/C",
1783 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1784 "\\U00010330\\U0010FF00\\U000E0061\\u00A0");
1785
1786 expectT("Any-Hex/Perl",
1787 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1788 "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}");
1789
1790 expectT("Any-Hex/Java",
1791 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1792 "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0");
1793
1794 expectT("Any-Hex/XML",
1795 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1796 "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1797
1798 expectT("Any-Hex/XML10",
1799 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1800 "&#66352;&#1113856;&#917601;&#160;");
1801
1802 expectT("[\\U000E0000-\\U000E0FFF] Remove",
1803 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1804 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1805 }
1806
1807 void TransliteratorTest::TestQuantifier() {
1808
1809 // Make sure @ in a quantified anteContext works
1810 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1811 "AAAAAb",
1812 "aaa(aac)");
1813
1814 // Make sure @ in a quantified postContext works
1815 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1816 "baaaaa",
1817 "caa(aaa)");
1818
1819 // Make sure @ in a quantified postContext with seg ref works
1820 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1821 "baaaaa",
1822 "baa(aaa)");
1823
1824 // Make sure @ past ante context doesn't enter ante context
1825 UTransPosition pos = {0, 5, 3, 5};
1826 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1827 "xxxab",
1828 "xxx(ac)",
1829 &pos);
1830
1831 // Make sure @ past post context doesn't pass limit
1832 UTransPosition pos2 = {0, 4, 0, 2};
1833 expect("{b} a+ > c @@ |; x > y; a > A;",
1834 "baxx",
1835 "caxx",
1836 &pos2);
1837
1838 // Make sure @ past post context doesn't enter post context
1839 expect("{b} a+ > c @@ |; x > y; a > A;",
1840 "baxx",
1841 "cayy");
1842
1843 expect("(ab)? c > d;",
1844 "c abc ababc",
1845 "d d abd");
1846
1847 // NOTE: The (ab)+ when referenced just yields a single "ab",
1848 // not the full sequence of them. This accords with perl behavior.
1849 expect("(ab)+ {x} > '(' $1 ')';",
1850 "x abx ababxy",
1851 "x ab(ab) abab(ab)y");
1852
1853 expect("b+ > x;",
1854 "ac abc abbc abbbc",
1855 "ac axc axc axc");
1856
1857 expect("[abc]+ > x;",
1858 "qac abrc abbcs abtbbc",
1859 "qx xrx xs xtx");
1860
1861 expect("q{(ab)+} > x;",
1862 "qa qab qaba qababc qaba",
1863 "qa qx qxa qxc qxa");
1864
1865 expect("q(ab)* > x;",
1866 "qa qab qaba qababc",
1867 "xa x xa xc");
1868
1869 // NOTE: The (ab)+ when referenced just yields a single "ab",
1870 // not the full sequence of them. This accords with perl behavior.
1871 expect("q(ab)* > '(' $1 ')';",
1872 "qa qab qaba qababc",
1873 "()a (ab) (ab)a (ab)c");
1874
1875 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1876 // quoted string
1877 expect("'ab'+ > x;",
1878 "bb ab ababb",
1879 "bb x xb");
1880
1881 // $foo+ and $foo* -- the quantifier should apply to the entire
1882 // variable reference
1883 expect("$var = ab; $var+ > x;",
1884 "bb ab ababb",
1885 "bb x xb");
1886 }
1887
1888 class TestTrans : public NullTransliterator {
1889 public:
1890 TestTrans(const UnicodeString& id) {
1891 setID(id);
1892 }
1893 };
1894
1895 /**
1896 * Test Source-Target/Variant.
1897 */
1898 void TransliteratorTest::TestSTV(void) {
1899 int32_t ns = Transliterator::countAvailableSources();
1900 if (ns < 0 || ns > 255) {
1901 errln((UnicodeString)"FAIL: Bad source count: " + ns);
1902 return;
1903 }
1904 int32_t i, j;
1905 for (i=0; i<ns; ++i) {
1906 UnicodeString source;
1907 Transliterator::getAvailableSource(i, source);
1908 logln((UnicodeString)"" + i + ": " + source);
1909 if (source.length() == 0) {
1910 errln("FAIL: empty source");
1911 continue;
1912 }
1913 int32_t nt = Transliterator::countAvailableTargets(source);
1914 if (nt < 0 || nt > 255) {
1915 errln((UnicodeString)"FAIL: Bad target count: " + nt);
1916 continue;
1917 }
1918 for (int32_t j=0; j<nt; ++j) {
1919 UnicodeString target;
1920 Transliterator::getAvailableTarget(j, source, target);
1921 logln((UnicodeString)" " + j + ": " + target);
1922 if (target.length() == 0) {
1923 errln("FAIL: empty target");
1924 continue;
1925 }
1926 int32_t nv = Transliterator::countAvailableVariants(source, target);
1927 if (nv < 0 || nv > 255) {
1928 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1929 continue;
1930 }
1931 for (int32_t k=0; k<nv; ++k) {
1932 UnicodeString variant;
1933 Transliterator::getAvailableVariant(k, source, target, variant);
1934 if (variant.length() == 0) {
1935 logln((UnicodeString)" " + k + ": <empty>");
1936 } else {
1937 logln((UnicodeString)" " + k + ": " + variant);
1938 }
1939 }
1940 }
1941 }
1942
1943 // Test registration
1944 const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
1945 const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
1946 const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
1947 for (i=0; i<3; ++i) {
1948 Transliterator *t = new TestTrans(IDS[i]);
1949 if (t == 0) {
1950 errln("FAIL: out of memory");
1951 return;
1952 }
1953 if (t->getID() != IDS[i]) {
1954 errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
1955 delete t;
1956 return;
1957 }
1958 Transliterator::registerInstance(t);
1959 UErrorCode status = U_ZERO_ERROR;
1960 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
1961 if (t == NULL) {
1962 errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
1963 IDS[i]);
1964 } else {
1965 logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
1966 IDS[i]);
1967 delete t;
1968 }
1969 Transliterator::unregister(IDS[i]);
1970 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
1971 if (t != NULL) {
1972 errln((UnicodeString)"FAIL: Unregistration failed for ID " +
1973 IDS[i]);
1974 delete t;
1975 }
1976 }
1977
1978 // Make sure getAvailable API reflects removal
1979 int32_t n = Transliterator::countAvailableIDs();
1980 for (i=0; i<n; ++i) {
1981 UnicodeString id = Transliterator::getAvailableID(i);
1982 for (j=0; j<3; ++j) {
1983 if (id.caseCompare(FULL_IDS[j],0)==0) {
1984 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
1985 }
1986 }
1987 }
1988 n = Transliterator::countAvailableTargets("Any");
1989 for (i=0; i<n; ++i) {
1990 UnicodeString t;
1991 Transliterator::getAvailableTarget(i, "Any", t);
1992 if (t.caseCompare(IDS[0],0)==0) {
1993 errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
1994 }
1995 }
1996 n = Transliterator::countAvailableSources();
1997 for (i=0; i<n; ++i) {
1998 UnicodeString s;
1999 Transliterator::getAvailableSource(i, s);
2000 for (j=0; j<3; ++j) {
2001 if (SOURCES[j] == NULL) continue;
2002 if (s.caseCompare(SOURCES[j],0)==0) {
2003 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2004 }
2005 }
2006 }
2007 }
2008
2009 /**
2010 * Test inverse of Greek-Latin; Title()
2011 */
2012 void TransliteratorTest::TestCompoundInverse(void) {
2013 UParseError parseError;
2014 UErrorCode status = U_ZERO_ERROR;
2015 Transliterator *t = Transliterator::createInstance
2016 ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2017 if (t == 0) {
2018 errln("FAIL: createInstance");
2019 return;
2020 }
2021 UnicodeString exp("(Title);Latin-Greek");
2022 if (t->getID() == exp) {
2023 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2024 t->getID());
2025 } else {
2026 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2027 t->getID() + "\", expected \"" + exp + "\"");
2028 }
2029 delete t;
2030 }
2031
2032 /**
2033 * Test NFD chaining with RBT
2034 */
2035 void TransliteratorTest::TestNFDChainRBT() {
2036 UParseError pe;
2037 UErrorCode ec = U_ZERO_ERROR;
2038 Transliterator* t = Transliterator::createFromRules(
2039 "TEST", "::NFD; aa > Q; a > q;",
2040 UTRANS_FORWARD, pe, ec);
2041 if (t == NULL || U_FAILURE(ec)) {
2042 errln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2043 return;
2044 }
2045 expect(*t, "aa", "Q");
2046 delete t;
2047
2048 // TEMPORARY TESTS -- BEING DEBUGGED
2049 //=- UnicodeString s, s2;
2050 //=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2051 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2052 //=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2053 //=- expect(*t, s, s2);
2054 //=- delete t;
2055 //=-
2056 //=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2057 //=- expect(*t, s2, s);
2058 //=- delete t;
2059 //=-
2060 //=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2061 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2062 //=- expect(*t, s, s);
2063 //=- delete t;
2064
2065 // const char* source[] = {
2066 // /*
2067 // "\\u015Br\\u012Bmad",
2068 // "bhagavadg\\u012Bt\\u0101",
2069 // "adhy\\u0101ya",
2070 // "arjuna",
2071 // "vi\\u1E63\\u0101da",
2072 // "y\\u014Dga",
2073 // "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2074 // "uv\\u0101cr\\u0325",
2075 // */
2076 // "rmk\\u1E63\\u0113t",
2077 // //"dharmak\\u1E63\\u0113tr\\u0113",
2078 // /*
2079 // "kuruk\\u1E63\\u0113tr\\u0113",
2080 // "samav\\u0113t\\u0101",
2081 // "yuyutsava-\\u1E25",
2082 // "m\\u0101mak\\u0101-\\u1E25",
2083 // // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2084 // "kimakurvata",
2085 // "san\\u0304java",
2086 // */
2087 //
2088 // 0
2089 // };
2090 // const char* expected[] = {
2091 // /*
2092 // "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2093 // "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2094 // "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2095 // "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2096 // "\\u0935\\u093f\\u0937\\u093e\\u0926",
2097 // "\\u092f\\u094b\\u0917",
2098 // "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2099 // "\\u0909\\u0935\\u093E\\u091A\\u0943",
2100 // */
2101 // "\\u0927",
2102 // //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2103 // /*
2104 // "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2105 // "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2106 // "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2107 // "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2108 // // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2109 // "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2110 // "\\u0938\\u0902\\u091c\\u0935",
2111 // */
2112 // 0
2113 // };
2114 // UErrorCode status = U_ZERO_ERROR;
2115 // UParseError parseError;
2116 // UnicodeString message;
2117 // Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2118 // Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2119 // if(U_FAILURE(status)){
2120 // errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2121 // errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2122 // delete latinToDevToLatin;
2123 // delete devToLatinToDev;
2124 // return;
2125 // }
2126 // UnicodeString gotResult;
2127 // for(int i= 0; source[i] != 0; i++){
2128 // gotResult = source[i];
2129 // expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2130 // expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2131 // }
2132 // delete latinToDevToLatin;
2133 // delete devToLatinToDev;
2134 }
2135
2136 /**
2137 * Inverse of "Null" should be "Null". (J21)
2138 */
2139 void TransliteratorTest::TestNullInverse() {
2140 UParseError pe;
2141 UErrorCode ec = U_ZERO_ERROR;
2142 Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2143 if (t == 0 || U_FAILURE(ec)) {
2144 errln("FAIL: createInstance");
2145 return;
2146 }
2147 Transliterator *u = t->createInverse(ec);
2148 if (u == 0 || U_FAILURE(ec)) {
2149 errln("FAIL: createInverse");
2150 delete t;
2151 return;
2152 }
2153 if (u->getID() != "Null") {
2154 errln("FAIL: Inverse of Null should be Null");
2155 }
2156 delete t;
2157 delete u;
2158 }
2159
2160 /**
2161 * Check ID of inverse of alias. (J22)
2162 */
2163 void TransliteratorTest::TestAliasInverseID() {
2164 UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2165 UParseError pe;
2166 UErrorCode ec = U_ZERO_ERROR;
2167 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2168 if (t == 0 || U_FAILURE(ec)) {
2169 errln("FAIL: createInstance");
2170 return;
2171 }
2172 Transliterator *u = t->createInverse(ec);
2173 if (u == 0 || U_FAILURE(ec)) {
2174 errln("FAIL: createInverse");
2175 delete t;
2176 return;
2177 }
2178 UnicodeString exp = "Hangul-Latin";
2179 UnicodeString got = u->getID();
2180 if (got != exp) {
2181 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2182 ", expected " + exp);
2183 }
2184 delete t;
2185 delete u;
2186 }
2187
2188 /**
2189 * Test IDs of inverses of compound transliterators. (J20)
2190 */
2191 void TransliteratorTest::TestCompoundInverseID() {
2192 UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2193 UParseError pe;
2194 UErrorCode ec = U_ZERO_ERROR;
2195 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2196 if (t == 0 || U_FAILURE(ec)) {
2197 errln("FAIL: createInstance");
2198 return;
2199 }
2200 Transliterator *u = t->createInverse(ec);
2201 if (u == 0 || U_FAILURE(ec)) {
2202 errln("FAIL: createInverse");
2203 delete t;
2204 return;
2205 }
2206 UnicodeString exp = "NFD(NFC);Jamo-Latin";
2207 UnicodeString got = u->getID();
2208 if (got != exp) {
2209 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2210 ", expected " + exp);
2211 }
2212 delete t;
2213 delete u;
2214 }
2215
2216 /**
2217 * Test undefined variable.
2218
2219 */
2220 void TransliteratorTest::TestUndefinedVariable() {
2221 UnicodeString rule = "$initial } a <> \\u1161;";
2222 UParseError pe;
2223 UErrorCode ec = U_ZERO_ERROR;
2224 Transliterator *t = new RuleBasedTransliterator("<ID>", rule, UTRANS_FORWARD, 0, pe, ec);
2225 delete t;
2226 if (U_FAILURE(ec)) {
2227 logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2228 u_errorName(ec));
2229 return;
2230 }
2231 errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2232 u_errorName(ec));
2233 }
2234
2235 /**
2236 * Test empty context.
2237 */
2238 void TransliteratorTest::TestEmptyContext() {
2239 expect(" { a } > b;", "xay a ", "xby b ");
2240 }
2241
2242 /**
2243 * Test compound filter ID syntax
2244 */
2245 void TransliteratorTest::TestCompoundFilterID(void) {
2246 static const char* DATA[] = {
2247 // Col. 1 = ID or rule set (latter must start with #)
2248
2249 // = columns > 1 are null if expect col. 1 to be illegal =
2250
2251 // Col. 2 = direction, "F..." or "R..."
2252 // Col. 3 = source string
2253 // Col. 4 = exp result
2254
2255 "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2256 "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2257 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2258 "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2259 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2260 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2261 NULL,
2262 };
2263
2264 for (int32_t i=0; DATA[i]; i+=4) {
2265 UnicodeString id = CharsToUnicodeString(DATA[i]);
2266 UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2267 UTRANS_REVERSE : UTRANS_FORWARD;
2268 UnicodeString source;
2269 UnicodeString exp;
2270 if (DATA[i+2] != NULL) {
2271 source = CharsToUnicodeString(DATA[i+2]);
2272 exp = CharsToUnicodeString(DATA[i+3]);
2273 }
2274 UBool expOk = (DATA[i+1] != NULL);
2275 Transliterator* t = NULL;
2276 UParseError pe;
2277 UErrorCode ec = U_ZERO_ERROR;
2278 if (id.charAt(0) == 0x23/*#*/) {
2279 t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2280 } else {
2281 t = Transliterator::createInstance(id, direction, pe, ec);
2282 }
2283 UBool ok = (t != NULL && U_SUCCESS(ec));
2284 UnicodeString transID;
2285 if (t!=0) {
2286 transID = t->getID();
2287 }
2288 else {
2289 transID = UnicodeString("NULL", "");
2290 }
2291 if (ok == expOk) {
2292 logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2293 u_errorName(ec));
2294 if (source.length() != 0) {
2295 expect(*t, source, exp);
2296 }
2297 delete t;
2298 } else {
2299 errln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2300 u_errorName(ec));
2301 }
2302 }
2303 }
2304
2305 /**
2306 * Test new property set syntax
2307 */
2308 void TransliteratorTest::TestPropertySet() {
2309 expect("a>A; \\p{Lu}>x; \\p{ANY}>y;", "abcDEF", "Ayyxxx");
2310 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2311 "[ a stitch ]\n[ in time ]\r[ saves 9]");
2312 }
2313
2314 /**
2315 * Test various failure points of the new 2.0 engine.
2316 */
2317 void TransliteratorTest::TestNewEngine() {
2318 UParseError pe;
2319 UErrorCode ec = U_ZERO_ERROR;
2320 Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2321 if (t == 0 || U_FAILURE(ec)) {
2322 errln("FAIL: createInstance Latin-Hiragana");
2323 return;
2324 }
2325 // Katakana should be untouched
2326 expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2327 CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2328
2329 delete t;
2330
2331 #if 1
2332 // This test will only work if Transliterator.ROLLBACK is
2333 // true. Otherwise, this test will fail, revealing a
2334 // limitation of global filters in incremental mode.
2335 Transliterator *a =
2336 Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2337 Transliterator *A =
2338 Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2339 if (U_FAILURE(ec)) {
2340 delete a;
2341 delete A;
2342 return;
2343 }
2344
2345 Transliterator* array[3];
2346 array[0] = a;
2347 array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2348 array[2] = A;
2349 if (U_FAILURE(ec)) {
2350 errln("FAIL: createInstance NFD");
2351 delete a;
2352 delete A;
2353 delete array[1];
2354 return;
2355 }
2356
2357 t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2358 if (U_FAILURE(ec)) {
2359 errln("FAIL: UnicodeSet constructor");
2360 delete a;
2361 delete A;
2362 delete array[1];
2363 delete t;
2364 return;
2365 }
2366
2367 expect(*t, "aAaA", "bAbA");
2368
2369 assertTrue("countElements", t->countElements() == 3);
2370 assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2371 assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2372 assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2373 assertSuccess("getElement", ec);
2374
2375 delete a;
2376 delete A;
2377 delete array[1];
2378 delete t;
2379 #endif
2380
2381 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2382 "a",
2383 "ax");
2384
2385 UnicodeString gr = CharsToUnicodeString(
2386 "$ddot = \\u0308 ;"
2387 "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2388 "$rough = \\u0314 ;"
2389 "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2390 "\\u03b1 <> a ;"
2391 "$rough <> h ;");
2392
2393 expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2394 }
2395
2396 /**
2397 * Test quantified segment behavior. We want:
2398 * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2399 */
2400 void TransliteratorTest::TestQuantifiedSegment(void) {
2401 // The normal case
2402 expect("([abc]+) > x $1 x;", "cba", "xcbax");
2403
2404 // The tricky case; the quantifier is around the segment
2405 expect("([abc])+ > x $1 x;", "cba", "xax");
2406
2407 // Tricky case in reverse direction
2408 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2409
2410 // Check post-context segment
2411 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2412
2413 // Test toRule/toPattern for non-quantified segment.
2414 // Careful with spacing here.
2415 UnicodeString r("([a-c]){q} > x $1 x;");
2416 UParseError pe;
2417 UErrorCode ec = U_ZERO_ERROR;
2418 Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2419 if (U_FAILURE(ec)) {
2420 errln("FAIL: createFromRules");
2421 delete t;
2422 return;
2423 }
2424 UnicodeString rr;
2425 t->toRules(rr, TRUE);
2426 if (r != rr) {
2427 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2428 } else {
2429 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2430 }
2431 delete t;
2432
2433 // Test toRule/toPattern for quantified segment.
2434 // Careful with spacing here.
2435 r = "([a-c])+{q} > x $1 x;";
2436 t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2437 if (U_FAILURE(ec)) {
2438 errln("FAIL: createFromRules");
2439 delete t;
2440 return;
2441 }
2442 t->toRules(rr, TRUE);
2443 if (r != rr) {
2444 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2445 } else {
2446 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2447 }
2448 delete t;
2449 }
2450
2451 //======================================================================
2452 // Ram's tests
2453 //======================================================================
2454 void TransliteratorTest::TestDevanagariLatinRT(){
2455 const int MAX_LEN= 52;
2456 const char* const source[MAX_LEN] = {
2457 "bh\\u0101rata",
2458 "kra",
2459 "k\\u1E63a",
2460 "khra",
2461 "gra",
2462 "\\u1E45ra",
2463 "cra",
2464 "chra",
2465 "j\\u00F1a",
2466 "jhra",
2467 "\\u00F1ra",
2468 "\\u1E6Dya",
2469 "\\u1E6Dhra",
2470 "\\u1E0Dya",
2471 //"r\\u0323ya", // \u095c is not valid in Devanagari
2472 "\\u1E0Dhya",
2473 "\\u1E5Bhra",
2474 "\\u1E47ra",
2475 "tta",
2476 "thra",
2477 "dda",
2478 "dhra",
2479 "nna",
2480 "pra",
2481 "phra",
2482 "bra",
2483 "bhra",
2484 "mra",
2485 "\\u1E49ra",
2486 //"l\\u0331ra",
2487 "yra",
2488 "\\u1E8Fra",
2489 //"l-",
2490 "vra",
2491 "\\u015Bra",
2492 "\\u1E63ra",
2493 "sra",
2494 "hma",
2495 "\\u1E6D\\u1E6Da",
2496 "\\u1E6D\\u1E6Dha",
2497 "\\u1E6Dh\\u1E6Dha",
2498 "\\u1E0D\\u1E0Da",
2499 "\\u1E0D\\u1E0Dha",
2500 "\\u1E6Dya",
2501 "\\u1E6Dhya",
2502 "\\u1E0Dya",
2503 "\\u1E0Dhya",
2504 // Not roundtrippable --
2505 // \\u0939\\u094d\\u094d\\u092E - hma
2506 // \\u0939\\u094d\\u092E - hma
2507 // CharsToUnicodeString("hma"),
2508 "hya",
2509 "\\u015Br\\u0325",
2510 "\\u015Bca",
2511 "\\u0115",
2512 "san\\u0304j\\u012Bb s\\u0113nagupta",
2513 "\\u0101nand vaddir\\u0101ju",
2514 "\\u0101",
2515 "a"
2516 };
2517 const char* const expected[MAX_LEN] = {
2518 "\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */
2519 "\\u0915\\u094D\\u0930", /* kra */
2520 "\\u0915\\u094D\\u0937", /* ks\\u0323a */
2521 "\\u0916\\u094D\\u0930", /* khra */
2522 "\\u0917\\u094D\\u0930", /* gra */
2523 "\\u0919\\u094D\\u0930", /* n\\u0307ra */
2524 "\\u091A\\u094D\\u0930", /* cra */
2525 "\\u091B\\u094D\\u0930", /* chra */
2526 "\\u091C\\u094D\\u091E", /* jn\\u0303a */
2527 "\\u091D\\u094D\\u0930", /* jhra */
2528 "\\u091E\\u094D\\u0930", /* n\\u0303ra */
2529 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2530 "\\u0920\\u094D\\u0930", /* t\\u0323hra */
2531 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2532 //"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari
2533 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2534 "\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */
2535 "\\u0923\\u094D\\u0930", /* n\\u0323ra */
2536 "\\u0924\\u094D\\u0924", /* tta */
2537 "\\u0925\\u094D\\u0930", /* thra */
2538 "\\u0926\\u094D\\u0926", /* dda */
2539 "\\u0927\\u094D\\u0930", /* dhra */
2540 "\\u0928\\u094D\\u0928", /* nna */
2541 "\\u092A\\u094D\\u0930", /* pra */
2542 "\\u092B\\u094D\\u0930", /* phra */
2543 "\\u092C\\u094D\\u0930", /* bra */
2544 "\\u092D\\u094D\\u0930", /* bhra */
2545 "\\u092E\\u094D\\u0930", /* mra */
2546 "\\u0929\\u094D\\u0930", /* n\\u0331ra */
2547 //"\\u0934\\u094D\\u0930", /* l\\u0331ra */
2548 "\\u092F\\u094D\\u0930", /* yra */
2549 "\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */
2550 //"l-",
2551 "\\u0935\\u094D\\u0930", /* vra */
2552 "\\u0936\\u094D\\u0930", /* s\\u0301ra */
2553 "\\u0937\\u094D\\u0930", /* s\\u0323ra */
2554 "\\u0938\\u094D\\u0930", /* sra */
2555 "\\u0939\\u094d\\u092E", /* hma */
2556 "\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */
2557 "\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */
2558 "\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/
2559 "\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */
2560 "\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */
2561 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2562 "\\u0920\\u094D\\u092F", /* t\\u0323hya */
2563 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2564 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2565 // "hma", /* hma */
2566 "\\u0939\\u094D\\u092F", /* hya */
2567 "\\u0936\\u0943", /* s\\u0301r\\u0325a */
2568 "\\u0936\\u094D\\u091A", /* s\\u0301ca */
2569 "\\u090d", /* e\\u0306 */
2570 "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2571 "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2572 "\\u0906",
2573 "\\u0905",
2574 };
2575 UErrorCode status = U_ZERO_ERROR;
2576 UParseError parseError;
2577 UnicodeString message;
2578 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2579 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2580 if(U_FAILURE(status)){
2581 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2582 errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2583 return;
2584 }
2585 UnicodeString gotResult;
2586 for(int i= 0; i<MAX_LEN; i++){
2587 gotResult = source[i];
2588 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2589 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2590 }
2591 delete latinToDev;
2592 delete devToLatin;
2593 }
2594
2595 void TransliteratorTest::TestTeluguLatinRT(){
2596 const int MAX_LEN=10;
2597 const char* const source[MAX_LEN] = {
2598 "raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */
2599 "\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */
2600 "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */
2601 "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */
2602 "san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */
2603 "amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */
2604 "ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */
2605 "\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */
2606 "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */
2607 "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */
2608 };
2609
2610 const char* const expected[MAX_LEN] = {
2611 "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2612 "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2613 "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2614 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2615 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2616 "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2617 "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2618 "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2619 "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2620 "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2621 };
2622
2623 UErrorCode status = U_ZERO_ERROR;
2624 UParseError parseError;
2625 UnicodeString message;
2626 Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2627 Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2628 if(U_FAILURE(status)){
2629 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2630 errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2631 return;
2632 }
2633 UnicodeString gotResult;
2634 for(int i= 0; i<MAX_LEN; i++){
2635 gotResult = source[i];
2636 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2637 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2638 }
2639 delete latinToDev;
2640 delete devToLatin;
2641 }
2642
2643 void TransliteratorTest::TestSanskritLatinRT(){
2644 const int MAX_LEN =16;
2645 const char* const source[MAX_LEN] = {
2646 "rmk\\u1E63\\u0113t",
2647 "\\u015Br\\u012Bmad",
2648 "bhagavadg\\u012Bt\\u0101",
2649 "adhy\\u0101ya",
2650 "arjuna",
2651 "vi\\u1E63\\u0101da",
2652 "y\\u014Dga",
2653 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2654 "uv\\u0101cr\\u0325",
2655 "dharmak\\u1E63\\u0113tr\\u0113",
2656 "kuruk\\u1E63\\u0113tr\\u0113",
2657 "samav\\u0113t\\u0101",
2658 "yuyutsava\\u1E25",
2659 "m\\u0101mak\\u0101\\u1E25",
2660 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2661 "kimakurvata",
2662 "san\\u0304java",
2663 };
2664 const char* const expected[MAX_LEN] = {
2665 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2666 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2667 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2668 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2669 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2670 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2671 "\\u092f\\u094b\\u0917",
2672 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2673 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2674 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2675 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2676 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2677 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2678 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2679 //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2680 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2681 "\\u0938\\u0902\\u091c\\u0935",
2682 };
2683 UErrorCode status = U_ZERO_ERROR;
2684 UParseError parseError;
2685 UnicodeString message;
2686 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2687 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2688 if(U_FAILURE(status)){
2689 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2690 errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2691 return;
2692 }
2693 UnicodeString gotResult;
2694 for(int i= 0; i<MAX_LEN; i++){
2695 gotResult = source[i];
2696 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2697 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2698 }
2699 delete latinToDev;
2700 delete devToLatin;
2701 }
2702
2703
2704 void TransliteratorTest::TestCompoundLatinRT(){
2705 const char* const source[] = {
2706 "rmk\\u1E63\\u0113t",
2707 "\\u015Br\\u012Bmad",
2708 "bhagavadg\\u012Bt\\u0101",
2709 "adhy\\u0101ya",
2710 "arjuna",
2711 "vi\\u1E63\\u0101da",
2712 "y\\u014Dga",
2713 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2714 "uv\\u0101cr\\u0325",
2715 "dharmak\\u1E63\\u0113tr\\u0113",
2716 "kuruk\\u1E63\\u0113tr\\u0113",
2717 "samav\\u0113t\\u0101",
2718 "yuyutsava\\u1E25",
2719 "m\\u0101mak\\u0101\\u1E25",
2720 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2721 "kimakurvata",
2722 "san\\u0304java"
2723 };
2724 const int MAX_LEN = sizeof(source)/sizeof(source[0]);
2725 const char* const expected[MAX_LEN] = {
2726 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2727 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2728 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2729 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2730 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2731 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2732 "\\u092f\\u094b\\u0917",
2733 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2734 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2735 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2736 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2737 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2738 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2739 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2740 // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2741 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2742 "\\u0938\\u0902\\u091c\\u0935"
2743 };
2744 if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) {
2745 errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2746 return;
2747 }
2748
2749 UErrorCode status = U_ZERO_ERROR;
2750 UParseError parseError;
2751 UnicodeString message;
2752 Transliterator* devToLatinToDev =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2753 Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2754 Transliterator* devToTelToDev =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2755 Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2756
2757 if(U_FAILURE(status)){
2758 errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2759 errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2760 return;
2761 }
2762 UnicodeString gotResult;
2763 for(int i= 0; i<MAX_LEN; i++){
2764 gotResult = source[i];
2765 expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2766 expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2767 expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2768
2769 }
2770 delete(latinToDevToLatin);
2771 delete(devToLatinToDev);
2772 delete(devToTelToDev);
2773 delete(latinToTelToLatin);
2774 }
2775
2776 /**
2777 * Test Gurmukhi-Devanagari Tippi and Bindi
2778 */
2779 void TransliteratorTest::TestGurmukhiDevanagari(){
2780 // the rule says:
2781 // (\u0902) (when preceded by vowel) ---> (\u0A02)
2782 // (\u0902) (when preceded by consonant) ---> (\u0A70)
2783 UErrorCode status = U_ZERO_ERROR;
2784 UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]").unescape(), status);
2785 UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]").unescape(), status);
2786 UParseError parseError;
2787
2788 UnicodeSetIterator vIter(vowel);
2789 UnicodeSetIterator nvIter(non_vowel);
2790 Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2791 if(U_FAILURE(status)) {
2792 errln("Error creating transliterator %s", u_errorName(status));
2793 delete trans;
2794 return;
2795 }
2796 UnicodeString src (" \\u0902");
2797 UnicodeString expected(" \\u0A02");
2798 src = src.unescape();
2799 expected= expected.unescape();
2800
2801 while(vIter.next()){
2802 src.setCharAt(0,(UChar) vIter.getCodepoint());
2803 expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2804 expect(*trans,src,expected);
2805 }
2806
2807 expected.setCharAt(1,0x0A70);
2808 while(nvIter.next()){
2809 //src.setCharAt(0,(char) nvIter.codepoint);
2810 src.setCharAt(0,(UChar)nvIter.getCodepoint());
2811 expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2812 expect(*trans,src,expected);
2813 }
2814 delete trans;
2815 }
2816 /**
2817 * Test instantiation from a locale.
2818 */
2819 void TransliteratorTest::TestLocaleInstantiation(void) {
2820 UParseError pe;
2821 UErrorCode ec = U_ZERO_ERROR;
2822 Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2823 if (U_FAILURE(ec)) {
2824 errln("FAIL: createInstance(ru_RU-Latin)");
2825 delete t;
2826 return;
2827 }
2828 expect(*t, CharsToUnicodeString("\\u0430"), "a");
2829 delete t;
2830
2831 t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2832 if (U_FAILURE(ec)) {
2833 errln("FAIL: createInstance(en-el)");
2834 delete t;
2835 return;
2836 }
2837 expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2838 delete t;
2839 }
2840
2841 /**
2842 * Test title case handling of accent (should ignore accents)
2843 */
2844 void TransliteratorTest::TestTitleAccents(void) {
2845 UParseError pe;
2846 UErrorCode ec = U_ZERO_ERROR;
2847 Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2848 if (U_FAILURE(ec)) {
2849 errln("FAIL: createInstance(Title)");
2850 delete t;
2851 return;
2852 }
2853 expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2854 delete t;
2855 }
2856
2857 /**
2858 * Basic test of a locale resource based rule.
2859 */
2860 void TransliteratorTest::TestLocaleResource() {
2861 const char* DATA[] = {
2862 // id from to
2863 //"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0",
2864 "Latin-el", "b", "\\u03bc\\u03c0",
2865 "Latin-Greek", "b", "\\u03B2",
2866 "Greek-Latin/UNGEGN", "\\u03B2", "v",
2867 "el-Latin", "\\u03B2", "v",
2868 "Greek-Latin", "\\u03B2", "b",
2869 };
2870 const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
2871 for (int32_t i=0; i<DATA_length; i+=3) {
2872 UParseError pe;
2873 UErrorCode ec = U_ZERO_ERROR;
2874 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2875 if (U_FAILURE(ec)) {
2876 errln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ")");
2877 delete t;
2878 continue;
2879 }
2880 expect(*t, CharsToUnicodeString(DATA[i+1]),
2881 CharsToUnicodeString(DATA[i+2]));
2882 delete t;
2883 }
2884 }
2885
2886 /**
2887 * Make sure parse errors reference the right line.
2888 */
2889 void TransliteratorTest::TestParseError() {
2890 const char* rule =
2891 "a > b;\n"
2892 "# more stuff\n"
2893 "d << b;";
2894 UErrorCode ec = U_ZERO_ERROR;
2895 UParseError pe;
2896 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2897 delete t;
2898 if (U_FAILURE(ec)) {
2899 UnicodeString err(pe.preContext);
2900 err.append((UChar)124/*|*/).append(pe.postContext);
2901 if (err.indexOf("d << b") >= 0) {
2902 logln("Ok: " + err);
2903 } else {
2904 errln("FAIL: " + err);
2905 }
2906 return;
2907 }
2908 errln("FAIL: no syntax error");
2909 }
2910
2911 /**
2912 * Make sure sets on output are disallowed.
2913 */
2914 void TransliteratorTest::TestOutputSet() {
2915 UnicodeString rule = "$set = [a-cm-n]; b > $set;";
2916 UErrorCode ec = U_ZERO_ERROR;
2917 UParseError pe;
2918 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2919 delete t;
2920 if (U_FAILURE(ec)) {
2921 UnicodeString err(pe.preContext);
2922 err.append((UChar)124/*|*/).append(pe.postContext);
2923 logln("Ok: " + err);
2924 return;
2925 }
2926 errln("FAIL: No syntax error");
2927 }
2928
2929 /**
2930 * Test the use variable range pragma, making sure that use of
2931 * variable range characters is detected and flagged as an error.
2932 */
2933 void TransliteratorTest::TestVariableRange() {
2934 UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
2935 UErrorCode ec = U_ZERO_ERROR;
2936 UParseError pe;
2937 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2938 delete t;
2939 if (U_FAILURE(ec)) {
2940 UnicodeString err(pe.preContext);
2941 err.append((UChar)124/*|*/).append(pe.postContext);
2942 logln("Ok: " + err);
2943 return;
2944 }
2945 errln("FAIL: No syntax error");
2946 }
2947
2948 /**
2949 * Test invalid post context error handling
2950 */
2951 void TransliteratorTest::TestInvalidPostContext() {
2952 UnicodeString rule = "a}b{c>d;";
2953 UErrorCode ec = U_ZERO_ERROR;
2954 UParseError pe;
2955 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2956 delete t;
2957 if (U_FAILURE(ec)) {
2958 UnicodeString err(pe.preContext);
2959 err.append((UChar)124/*|*/).append(pe.postContext);
2960 if (err.indexOf("a}b{c") >= 0) {
2961 logln("Ok: " + err);
2962 } else {
2963 errln("FAIL: " + err);
2964 }
2965 return;
2966 }
2967 errln("FAIL: No syntax error");
2968 }
2969
2970 /**
2971 * Test ID form variants
2972 */
2973 void TransliteratorTest::TestIDForms() {
2974 const char* DATA[] = {
2975 "NFC", NULL, "NFD",
2976 "nfd", NULL, "NFC", // make sure case is ignored
2977 "Any-NFKD", NULL, "Any-NFKC",
2978 "Null", NULL, "Null",
2979 "-nfkc", "nfkc", "NFKD",
2980 "-nfkc/", "nfkc", "NFKD",
2981 "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
2982 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
2983 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
2984 "Source-", NULL, NULL,
2985 "Source/Variant-", NULL, NULL,
2986 "Source-/Variant", NULL, NULL,
2987 "/Variant", NULL, NULL,
2988 "/Variant-", NULL, NULL,
2989 "-/Variant", NULL, NULL,
2990 "-/", NULL, NULL,
2991 "-", NULL, NULL,
2992 "/", NULL, NULL,
2993 };
2994 const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
2995
2996 for (int32_t i=0; i<DATA_length; i+=3) {
2997 const char* ID = DATA[i];
2998 const char* expID = DATA[i+1];
2999 const char* expInvID = DATA[i+2];
3000 UBool expValid = (expInvID != NULL);
3001 if (expID == NULL) {
3002 expID = ID;
3003 }
3004 UParseError pe;
3005 UErrorCode ec = U_ZERO_ERROR;
3006 Transliterator *t =
3007 Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3008 if (U_FAILURE(ec)) {
3009 if (!expValid) {
3010 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3011 } else {
3012 errln((UnicodeString)"FAIL: Couldn't create " + ID);
3013 }
3014 delete t;
3015 continue;
3016 }
3017 Transliterator *u = t->createInverse(ec);
3018 if (U_FAILURE(ec)) {
3019 errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3020 delete t;
3021 delete u;
3022 continue;
3023 }
3024 if (t->getID() == expID &&
3025 u->getID() == expInvID) {
3026 logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3027 } else {
3028 errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3029 t->getID() + " x getInverse() => " + u->getID() +
3030 ", expected " + expInvID);
3031 }
3032 delete t;
3033 delete u;
3034 }
3035 }
3036
3037 static const UChar SPACE[] = {32,0};
3038 static const UChar NEWLINE[] = {10,0};
3039 static const UChar RETURN[] = {13,0};
3040 static const UChar EMPTY[] = {0};
3041
3042 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3043 const UnicodeString& testRulesForward) {
3044 UnicodeString rules2; t2.toRules(rules2, TRUE);
3045 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3046 rules2.findAndReplace(SPACE, EMPTY);
3047 rules2.findAndReplace(NEWLINE, EMPTY);
3048 rules2.findAndReplace(RETURN, EMPTY);
3049
3050 UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3051
3052 if (rules2 != testRules) {
3053 errln(label);
3054 logln((UnicodeString)"GENERATED RULES: " + rules2);
3055 logln((UnicodeString)"SHOULD BE: " + testRulesForward);
3056 }
3057 }
3058
3059 /**
3060 * Mark's toRules test.
3061 */
3062 void TransliteratorTest::TestToRulesMark() {
3063 const char* testRules =
3064 "::[[:Latin:][:Mark:]];"
3065 "::NFKD (NFC);"
3066 "::Lower (Lower);"
3067 "a <> \\u03B1;" // alpha
3068 "::NFKC (NFD);"
3069 "::Upper (Lower);"
3070 "::Lower ();"
3071 "::([[:Greek:][:Mark:]]);"
3072 ;
3073 const char* testRulesForward =
3074 "::[[:Latin:][:Mark:]];"
3075 "::NFKD(NFC);"
3076 "::Lower(Lower);"
3077 "a > \\u03B1;"
3078 "::NFKC(NFD);"
3079 "::Upper (Lower);"
3080 "::Lower ();"
3081 ;
3082 const char* testRulesBackward =
3083 "::[[:Greek:][:Mark:]];"
3084 "::Lower (Upper);"
3085 "::NFD(NFKC);"
3086 "\\u03B1 > a;"
3087 "::Lower(Lower);"
3088 "::NFC(NFKD);"
3089 ;
3090 UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3091 UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3092
3093 UParseError pe;
3094 UErrorCode ec = U_ZERO_ERROR;
3095 Transliterator *t2 = Transliterator::createFromRules("source-target", testRules, UTRANS_FORWARD, pe, ec);
3096 Transliterator *t3 = Transliterator::createFromRules("target-source", testRules, UTRANS_REVERSE, pe, ec);
3097
3098 if (U_FAILURE(ec)) {
3099 delete t2;
3100 delete t3;
3101 errln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3102 return;
3103 }
3104
3105 expect(*t2, source, target);
3106 expect(*t3, target, source);
3107
3108 checkRules("Failed toRules FORWARD", *t2, testRulesForward);
3109 checkRules("Failed toRules BACKWARD", *t3, testRulesBackward);
3110
3111 delete t2;
3112 delete t3;
3113 }
3114
3115 /**
3116 * Test Escape and Unescape transliterators.
3117 */
3118 void TransliteratorTest::TestEscape() {
3119 UParseError pe;
3120 UErrorCode ec;
3121 Transliterator *t;
3122
3123 ec = U_ZERO_ERROR;
3124 t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3125 if (U_FAILURE(ec)) {
3126 errln((UnicodeString)"FAIL: createInstance");
3127 } else {
3128 expect(*t,
3129 "\\x{40}\\U00000031&#x32;&#81;",
3130 "@12Q");
3131 }
3132 delete t;
3133
3134 ec = U_ZERO_ERROR;
3135 t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3136 if (U_FAILURE(ec)) {
3137 errln((UnicodeString)"FAIL: createInstance");
3138 } else {
3139 expect(*t,
3140 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3141 "\\u0041\\U0010BEEF\\uFEED");
3142 }
3143 delete t;
3144
3145 ec = U_ZERO_ERROR;
3146 t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3147 if (U_FAILURE(ec)) {
3148 errln((UnicodeString)"FAIL: createInstance");
3149 } else {
3150 expect(*t,
3151 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3152 "\\u0041\\uDBEF\\uDEEF\\uFEED");
3153 }
3154 delete t;
3155
3156 ec = U_ZERO_ERROR;
3157 t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3158 if (U_FAILURE(ec)) {
3159 errln((UnicodeString)"FAIL: createInstance");
3160 } else {
3161 expect(*t,
3162 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3163 "\\x{41}\\x{10BEEF}\\x{FEED}");
3164 }
3165 delete t;
3166 }
3167
3168
3169 void TransliteratorTest::TestAnchorMasking(){
3170 UnicodeString rule ("^a > Q; a > q;");
3171 UErrorCode status= U_ZERO_ERROR;
3172 UParseError parseError;
3173
3174 Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3175 if(U_FAILURE(status)){
3176 errln(UnicodeString("FAIL: ") + "ID" +
3177 ".createFromRules() => bad rules" +
3178 /*", parse error " + parseError.code +*/
3179 ", line " + parseError.line +
3180 ", offset " + parseError.offset +
3181 ", context " + prettify(parseError.preContext, TRUE) +
3182 ", rules: " + prettify(rule, TRUE));
3183 }
3184 delete t;
3185 }
3186
3187 /**
3188 * Make sure display names of variants look reasonable.
3189 */
3190 void TransliteratorTest::TestDisplayName() {
3191 #if UCONFIG_NO_FORMATTING
3192 logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3193 return;
3194 #else
3195 static const char* DATA[] = {
3196 // ID, forward name, reverse name
3197 // Update the text as necessary -- the important thing is
3198 // not the text itself, but how various cases are handled.
3199
3200 // Basic test
3201 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3202
3203 // Variants
3204 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3205
3206 // Target-only IDs
3207 "NFC", "Any to NFC", "Any to NFD",
3208 };
3209
3210 int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
3211
3212 Locale US("en", "US");
3213
3214 for (int32_t i=0; i<DATA_length; i+=3) {
3215 UnicodeString name;
3216 Transliterator::getDisplayName(DATA[i], US, name);
3217 if (name != DATA[i+1]) {
3218 errln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3219 name + ", expected " + DATA[i+1]);
3220 } else {
3221 logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3222 }
3223 UErrorCode ec = U_ZERO_ERROR;
3224 UParseError pe;
3225 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3226 if (U_FAILURE(ec)) {
3227 delete t;
3228 errln("FAIL: createInstance failed");
3229 continue;
3230 }
3231 name = Transliterator::getDisplayName(t->getID(), US, name);
3232 if (name != DATA[i+2]) {
3233 errln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3234 name + ", expected " + DATA[i+2]);
3235 } else {
3236 logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3237 }
3238 delete t;
3239 }
3240 #endif
3241 }
3242
3243 void TransliteratorTest::TestSpecialCases(void) {
3244 const UnicodeString registerRules[] = {
3245 "Any-Dev1", "x > X; y > Y;",
3246 "Any-Dev2", "XY > Z",
3247 "Greek-Latin/FAKE",
3248 CharsToUnicodeString
3249 ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3250 "" // END MARKER
3251 };
3252
3253 const UnicodeString testCases[] = {
3254 // NORMALIZATION
3255 // should add more test cases
3256 "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3257 "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3258 "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3259 "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3260
3261 // mp -> b BUG
3262 "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3263 "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3264
3265 // check for devanagari bug
3266 "nfd;Dev1;Dev2;nfc", "xy", "Z",
3267
3268 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3269 "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3270 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3271
3272 //TODO: enable this test once Titlecase works right
3273 /*
3274 "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3275 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3276 */
3277 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3278 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3279 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3280 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3281
3282 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3283 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3284
3285 // FORMS OF S
3286 "Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3287 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3288 "Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3289 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3290 "Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3291 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3292 "Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3293 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3294 // Tatiana bug
3295 // Upper: TAT\\u02B9\\u00C2NA
3296 // Lower: tat\\u02B9\\u00E2na
3297 // Title: Tat\\u02B9\\u00E2na
3298 "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3299 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3300 "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3301 CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3302 "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3303 CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3304
3305 "" // END MARKER
3306 };
3307
3308 UParseError pos;
3309 int32_t i;
3310 for (i = 0; registerRules[i].length()!=0; i+=2) {
3311 UErrorCode status = U_ZERO_ERROR;
3312
3313 Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3314 registerRules[i+1], UTRANS_FORWARD, pos, status);
3315 if (U_FAILURE(status)) {
3316 errln("Fails: Unable to create the transliterator from rules.");
3317 } else {
3318 Transliterator::registerInstance(t);
3319 }
3320 }
3321 for (i = 0; testCases[i].length()!=0; i+=3) {
3322 UErrorCode ec = U_ZERO_ERROR;
3323 UParseError pe;
3324 const UnicodeString& name = testCases[i];
3325 Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3326 if (U_FAILURE(ec)) {
3327 errln((UnicodeString)"FAIL: Couldn't create " + name);
3328 delete t;
3329 continue;
3330 }
3331 const UnicodeString& id = t->getID();
3332 const UnicodeString& source = testCases[i+1];
3333 UnicodeString target;
3334
3335 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3336
3337 if (testCases[i+2].length() > 0) {
3338 target = testCases[i+2];
3339 } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3340 Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3341 } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3342 Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3343 } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3344 Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3345 } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3346 Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3347 } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3348 target = source;
3349 target.toLower(Locale::getUS());
3350 } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3351 target = source;
3352 target.toUpper(Locale::getUS());
3353 }
3354 if (U_FAILURE(ec)) {
3355 errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3356 continue;
3357 }
3358
3359 expect(*t, source, target);
3360 delete t;
3361 }
3362 for (i = 0; registerRules[i].length()!=0; i+=2) {
3363 Transliterator::unregister(registerRules[i]);
3364 }
3365 }
3366
3367 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3368 if (ch <= 0xFFFF) {
3369 sprintf(buffer, "\\u%04x", (int)ch);
3370 } else {
3371 sprintf(buffer, "\\U%08x", (int)ch);
3372 }
3373 return buffer;
3374 }
3375
3376 void TransliteratorTest::TestSurrogateCasing (void) {
3377 // check that casing handles surrogates
3378 // titlecase is currently defective
3379 char buffer[20];
3380 UChar buffer2[20];
3381 UChar32 dee;
3382 UTF_GET_CHAR(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3383 UnicodeString DEE(u_totitle(dee));
3384 if (DEE != DESERET_DEE) {
3385 err("Fails titlecase of surrogates");
3386 err(Char32ToEscapedChars(dee, buffer));
3387 err(", ");
3388 errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3389 }
3390
3391 UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3392 UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3393 UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3394 UErrorCode status= U_ZERO_ERROR;
3395
3396 u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3397 if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3398 errln("Fails: Can't uppercase surrogates.");
3399 }
3400
3401 status= U_ZERO_ERROR;
3402 u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3403 if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3404 errln("Fails: Can't lowercase surrogates.");
3405 }
3406 }
3407
3408 static void _trans(Transliterator& t, const UnicodeString& src,
3409 UnicodeString& result) {
3410 result = src;
3411 t.transliterate(result);
3412 }
3413
3414 static void _trans(const UnicodeString& id, const UnicodeString& src,
3415 UnicodeString& result, UErrorCode ec) {
3416 UParseError pe;
3417 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3418 if (U_SUCCESS(ec)) {
3419 _trans(*t, src, result);
3420 }
3421 delete t;
3422 }
3423
3424 static UnicodeString _findMatch(const UnicodeString& source,
3425 const UnicodeString* pairs) {
3426 UnicodeString empty;
3427 for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3428 if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3429 return pairs[i+1];
3430 }
3431 }
3432 return empty;
3433 }
3434
3435 // Check to see that incremental gets at least part way through a reasonable string.
3436
3437 void TransliteratorTest::TestIncrementalProgress(void) {
3438 UErrorCode ec = U_ZERO_ERROR;
3439 UnicodeString latinTest = "The Quick Brown Fox.";
3440 UnicodeString devaTest;
3441 _trans("Latin-Devanagari", latinTest, devaTest, ec);
3442 UnicodeString kataTest;
3443 _trans("Latin-Katakana", latinTest, kataTest, ec);
3444 if (U_FAILURE(ec)) {
3445 errln("FAIL: Internal error");
3446 return;
3447 }
3448 const UnicodeString tests[] = {
3449 "Any", latinTest,
3450 "Latin", latinTest,
3451 "Halfwidth", latinTest,
3452 "Devanagari", devaTest,
3453 "Katakana", kataTest,
3454 "" // END MARKER
3455 };
3456
3457 UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3458 int32_t i = 0, j=0, k=0;
3459 int32_t sources = Transliterator::countAvailableSources();
3460 for (i = 0; i < sources; i++) {
3461 UnicodeString source;
3462 Transliterator::getAvailableSource(i, source);
3463 UnicodeString test = _findMatch(source, tests);
3464 if (test.length() == 0) {
3465 logln((UnicodeString)"Skipping " + source + "-X");
3466 continue;
3467 }
3468 int32_t targets = Transliterator::countAvailableTargets(source);
3469 for (j = 0; j < targets; j++) {
3470 UnicodeString target;
3471 Transliterator::getAvailableTarget(j, source, target);
3472 int32_t variants = Transliterator::countAvailableVariants(source, target);
3473 for (k =0; k< variants; k++) {
3474 UnicodeString variant;
3475 UParseError err;
3476 UErrorCode status = U_ZERO_ERROR;
3477
3478 Transliterator::getAvailableVariant(k, source, target, variant);
3479 UnicodeString id = source + "-" + target + "/" + variant;
3480
3481 if(id.indexOf("Thai")>-1 && isICUVersionAtLeast(ICU_31)){
3482 continue;
3483 }
3484 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3485 if (U_FAILURE(status)) {
3486 errln((UnicodeString)"FAIL: Could not create " + id);
3487 delete t;
3488 continue;
3489 }
3490 status = U_ZERO_ERROR;
3491 CheckIncrementalAux(t, test);
3492
3493 UnicodeString rev;
3494 _trans(*t, test, rev);
3495 Transliterator *inv = t->createInverse(status);
3496 if (U_FAILURE(status)) {
3497 errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3498 delete t;
3499 delete inv;
3500 continue;
3501 }
3502 CheckIncrementalAux(inv, rev);
3503 delete t;
3504 delete inv;
3505 }
3506 }
3507 }
3508 }
3509
3510 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3511 const UnicodeString& input) {
3512 UErrorCode ec = U_ZERO_ERROR;
3513 UTransPosition pos;
3514 UnicodeString test = input;
3515
3516 pos.contextStart = 0;
3517 pos.contextLimit = input.length();
3518 pos.start = 0;
3519 pos.limit = input.length();
3520
3521 t->transliterate(test, pos, ec);
3522 if (U_FAILURE(ec)) {
3523 errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3524 return;
3525 }
3526 UBool gotError = FALSE;
3527
3528 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3529
3530 if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3531 errln((UnicodeString)"No Progress, " +
3532 t->getID() + ": " + formatInput(test, input, pos));
3533 gotError = TRUE;
3534 } else {
3535 logln((UnicodeString)"PASS Progress, " +
3536 t->getID() + ": " + formatInput(test, input, pos));
3537 }
3538 t->finishTransliteration(test, pos);
3539 if (pos.start != pos.limit) {
3540 errln((UnicodeString)"Incomplete, " +
3541 t->getID() + ": " + formatInput(test, input, pos));
3542 gotError = TRUE;
3543 }
3544 }
3545
3546 void TransliteratorTest::TestFunction() {
3547 // Careful with spacing and ';' here: Phrase this exactly
3548 // as toRules() is going to return it. If toRules() changes
3549 // with regard to spacing or ';', then adjust this string.
3550 UnicodeString rule =
3551 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3552
3553 UParseError pe;
3554 UErrorCode ec = U_ZERO_ERROR;
3555 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3556 if (t == NULL) {
3557 errln("FAIL: createFromRules failed");
3558 return;
3559 }
3560
3561 UnicodeString r;
3562 t->toRules(r, TRUE);
3563 if (r == rule) {
3564 logln((UnicodeString)"OK: toRules() => " + r);
3565 } else {
3566 errln((UnicodeString)"FAIL: toRules() => " + r +
3567 ", expected " + rule);
3568 }
3569
3570 expect(*t, "The Quick Brown Fox",
3571 "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox");
3572
3573 delete t;
3574 }
3575
3576 void TransliteratorTest::TestInvalidBackRef(void) {
3577 UnicodeString rule = ". > $1;";
3578 UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3579 UParseError pe;
3580 UErrorCode ec = U_ZERO_ERROR;
3581 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3582 Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3583
3584 if (t != NULL) {
3585 errln("FAIL: createFromRules should have returned NULL");
3586 delete t;
3587 }
3588
3589 if (t2 != NULL) {
3590 errln("FAIL: createFromRules should have returned NULL");
3591 delete t2;
3592 }
3593
3594 if (U_SUCCESS(ec)) {
3595 errln("FAIL: Ok: . > $1; => no error");
3596 } else {
3597 logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3598 }
3599 }
3600
3601 void TransliteratorTest::TestMulticharStringSet() {
3602 // Basic testing
3603 const char* rule =
3604 " [{aa}] > x;"
3605 " a > y;"
3606 " [b{bc}] > z;"
3607 "[{gd}] { e > q;"
3608 " e } [{fg}] > r;" ;
3609
3610 UParseError pe;
3611 UErrorCode ec = U_ZERO_ERROR;
3612 Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3613 if (t == NULL || U_FAILURE(ec)) {
3614 delete t;
3615 errln("FAIL: createFromRules failed");
3616 return;
3617 }
3618
3619 expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3620 "y x yz z d gd de gdq gdqfg ddrfg");
3621 delete t;
3622
3623 // Overlapped string test. Make sure that when multiple
3624 // strings can match that the longest one is matched.
3625 rule =
3626 " [a {ab} {abc}] > x;"
3627 " b > y;"
3628 " c > z;"
3629 " q [t {st} {rst}] { e > p;" ;
3630
3631 t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3632 if (t == NULL || U_FAILURE(ec)) {
3633 delete t;
3634 errln("FAIL: createFromRules failed");
3635 return;
3636 }
3637
3638 expect(*t, "a ab abc qte qste qrste",
3639 "x x x qtp qstp qrstp");
3640 delete t;
3641 }
3642
3643 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3644 // BEGIN TestUserFunction support factory
3645
3646 Transliterator* _TUFF[4];
3647 UnicodeString* _TUFID[4];
3648
3649 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3650 Transliterator::Token context) {
3651 return _TUFF[context.integer]->clone();
3652 }
3653
3654 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3655 _TUFF[n] = t;
3656 _TUFID[n] = new UnicodeString(ID);
3657 Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3658 }
3659
3660 static void _TUFUnreg(int32_t n) {
3661 if (_TUFF[n] != NULL) {
3662 Transliterator::unregister(*_TUFID[n]);
3663 delete _TUFF[n];
3664 delete _TUFID[n];
3665 }
3666 }
3667
3668 // END TestUserFunction support factory
3669 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3670
3671 /**
3672 * Test that user-registered transliterators can be used under function
3673 * syntax.
3674 */
3675 void TransliteratorTest::TestUserFunction() {
3676
3677 Transliterator* t;
3678 UParseError pe;
3679 UErrorCode ec = U_ZERO_ERROR;
3680
3681 // Setup our factory
3682 int32_t i;
3683 for (i=0; i<4; ++i) {
3684 _TUFF[i] = NULL;
3685 }
3686
3687 // There's no need to register inverses if we don't use them
3688 t = Transliterator::createFromRules("gif",
3689 "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",
3690 UTRANS_FORWARD, pe, ec);
3691 if (t == NULL || U_FAILURE(ec)) {
3692 errln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3693 return;
3694 }
3695 _TUFReg("Any-gif", t, 0);
3696
3697 t = Transliterator::createFromRules("RemoveCurly",
3698 "[\\{\\}] > ; '\\N' > ;",
3699 UTRANS_FORWARD, pe, ec);
3700 if (t == NULL || U_FAILURE(ec)) {
3701 errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3702 goto FAIL;
3703 }
3704 expect(*t, "\\N{name}", "name");
3705 _TUFReg("Any-RemoveCurly", t, 1);
3706
3707 logln("Trying &hex");
3708 t = Transliterator::createFromRules("hex2",
3709 "(.) > &hex($1);",
3710 UTRANS_FORWARD, pe, ec);
3711 if (t == NULL || U_FAILURE(ec)) {
3712 errln("FAIL: createFromRules");
3713 goto FAIL;
3714 }
3715 logln("Registering");
3716 _TUFReg("Any-hex2", t, 2);
3717 t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3718 if (t == NULL || U_FAILURE(ec)) {
3719 errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3720 goto FAIL;
3721 }
3722 expect(*t, "abc", "\\u0061\\u0062\\u0063");
3723 delete t;
3724
3725 logln("Trying &gif");
3726 t = Transliterator::createFromRules("gif2",
3727 "(.) > &Gif(&Hex2($1));",
3728 UTRANS_FORWARD, pe, ec);
3729 if (t == NULL || U_FAILURE(ec)) {
3730 errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3731 goto FAIL;
3732 }
3733 logln("Registering");
3734 _TUFReg("Any-gif2", t, 3);
3735 t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3736 if (t == NULL || U_FAILURE(ec)) {
3737 errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3738 goto FAIL;
3739 }
3740 expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3741 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3742 delete t;
3743
3744 // Test that filters are allowed after &
3745 t = Transliterator::createFromRules("test",
3746 "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3747 UTRANS_FORWARD, pe, ec);
3748 if (t == NULL || U_FAILURE(ec)) {
3749 errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3750 goto FAIL;
3751 }
3752 expect(*t, "abc",
3753 "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");
3754 delete t;
3755
3756 FAIL:
3757 for (i=0; i<4; ++i) {
3758 _TUFUnreg(i);
3759 }
3760 }
3761
3762 /**
3763 * Test the Any-X transliterators.
3764 */
3765 void TransliteratorTest::TestAnyX(void) {
3766 UParseError parseError;
3767 UErrorCode status = U_ZERO_ERROR;
3768 Transliterator* anyLatin =
3769 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3770 if (anyLatin==0) {
3771 errln("FAIL: createInstance returned NULL");
3772 delete anyLatin;
3773 return;
3774 }
3775
3776 expect(*anyLatin,
3777 CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3778 CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3779
3780 delete anyLatin;
3781 }
3782
3783 /**
3784 * Test the source and target set API. These are only implemented
3785 * for RBT and CompoundTransliterator at this time.
3786 */
3787 void TransliteratorTest::TestSourceTargetSet() {
3788 UErrorCode ec = U_ZERO_ERROR;
3789
3790 // Rules
3791 const char* r =
3792 "a > b; "
3793 "r [x{lu}] > q;";
3794
3795 // Expected source
3796 UnicodeSet expSrc("[arx{lu}]", ec);
3797
3798 // Expected target
3799 UnicodeSet expTrg("[bq]", ec);
3800
3801 UParseError pe;
3802 Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3803
3804 if (U_FAILURE(ec)) {
3805 delete t;
3806 errln("FAIL: Couldn't set up test");
3807 return;
3808 }
3809
3810 UnicodeSet src; t->getSourceSet(src);
3811 UnicodeSet trg; t->getTargetSet(trg);
3812
3813 if (src == expSrc && trg == expTrg) {
3814 UnicodeString a, b;
3815 logln((UnicodeString)"Ok: " +
3816 r + " => source = " + src.toPattern(a, TRUE) +
3817 ", target = " + trg.toPattern(b, TRUE));
3818 } else {
3819 UnicodeString a, b, c, d;
3820 errln((UnicodeString)"FAIL: " +
3821 r + " => source = " + src.toPattern(a, TRUE) +
3822 ", expected " + expSrc.toPattern(b, TRUE) +
3823 "; target = " + trg.toPattern(c, TRUE) +
3824 ", expected " + expTrg.toPattern(d, TRUE));
3825 }
3826
3827 delete t;
3828 }
3829
3830 /**
3831 * Test handling of rule whitespace, for both RBT and UnicodeSet.
3832 */
3833 void TransliteratorTest::TestRuleWhitespace() {
3834 // Rules
3835 const char* r = "a > \\u200E b;";
3836
3837 UErrorCode ec = U_ZERO_ERROR;
3838 UParseError pe;
3839 Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
3840
3841 if (U_FAILURE(ec)) {
3842 errln("FAIL: Couldn't set up test");
3843 } else {
3844 expect(*t, "a", "b");
3845 }
3846 delete t;
3847
3848 // UnicodeSet
3849 ec = U_ZERO_ERROR;
3850 UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
3851
3852 if (U_FAILURE(ec)) {
3853 errln("FAIL: Couldn't set up test");
3854 } else {
3855 if (set.contains(0x200E)) {
3856 errln("FAIL: U+200E not being ignored by UnicodeSet");
3857 }
3858 }
3859 }
3860 //======================================================================
3861 // this method is in TestUScript.java
3862 //======================================================================
3863 void TransliteratorTest::TestAllCodepoints(){
3864 UScriptCode code= USCRIPT_INVALID_CODE;
3865 char id[256]={'\0'};
3866 char abbr[256]={'\0'};
3867 char newId[256]={'\0'};
3868 char newAbbrId[256]={'\0'};
3869 char oldId[256]={'\0'};
3870 char oldAbbrId[256]={'\0'};
3871
3872 UErrorCode status =U_ZERO_ERROR;
3873 UParseError pe;
3874
3875 for(uint32_t i = 0; i<=0x10ffff; i++){
3876 code = uscript_getScript(i,&status);
3877 if(code == USCRIPT_INVALID_CODE){
3878 errln("uscript_getScript for codepoint \\U%08X failed.\n", i);
3879 }
3880 const char* myId = uscript_getName(code);
3881 if(!myId) {
3882 errln("Valid script code returned NULL name. Check your data!");
3883 return;
3884 }
3885 uprv_strcpy(id,myId);
3886 uprv_strcpy(abbr,uscript_getShortName(code));
3887
3888 uprv_strcpy(newId,"[:");
3889 uprv_strcat(newId,id);
3890 uprv_strcat(newId,":];NFD");
3891
3892 uprv_strcpy(newAbbrId,"[:");
3893 uprv_strcat(newAbbrId,abbr);
3894 uprv_strcat(newAbbrId,":];NFD");
3895
3896 if(uprv_strcmp(newId,oldId)!=0){
3897 Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
3898 if(t==NULL || U_FAILURE(status)){
3899 errln((UnicodeString)"FAIL: Could not create " + id);
3900 }
3901 delete t;
3902 }
3903 if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
3904 Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
3905 if(t==NULL || U_FAILURE(status)){
3906 errln((UnicodeString)"FAIL: Could not create " + id);
3907 }
3908 delete t;
3909 }
3910 uprv_strcpy(oldId,newId);
3911 uprv_strcpy(oldAbbrId, newAbbrId);
3912
3913 }
3914
3915 }
3916
3917 #define TEST_TRANSLIT_ID(id, cls) { \
3918 UErrorCode ec = U_ZERO_ERROR; \
3919 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
3920 if (U_FAILURE(ec)) { \
3921 errln("FAIL: Couldn't create " id); \
3922 } else { \
3923 if (t->getDynamicClassID() != cls::getStaticClassID()) { \
3924 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
3925 } \
3926 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
3927 } \
3928 delete t; \
3929 }
3930
3931 #define TEST_TRANSLIT_RULE(rule, cls) { \
3932 UErrorCode ec = U_ZERO_ERROR; \
3933 UParseError pe; \
3934 Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
3935 if (U_FAILURE(ec)) { \
3936 errln("FAIL: Couldn't create " rule); \
3937 } else { \
3938 if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
3939 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
3940 } \
3941 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
3942 } \
3943 delete t; \
3944 }
3945
3946 void TransliteratorTest::TestBoilerplate() {
3947 TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
3948 TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
3949 TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
3950 TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
3951 TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
3952 TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
3953 TEST_TRANSLIT_ID("Null", NullTransliterator);
3954 TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
3955 TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
3956 TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
3957 TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
3958 TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
3959 TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
3960 }
3961
3962 void TransliteratorTest::TestAlternateSyntax() {
3963 // U+2206 == &
3964 // U+2190 == <
3965 // U+2192 == >
3966 // U+2194 == <>
3967 expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
3968 "abc",
3969 "xbz");
3970 expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
3971 CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
3972 "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}");
3973 }
3974
3975 //======================================================================
3976 // Support methods
3977 //======================================================================
3978 void TransliteratorTest::expectT(const UnicodeString& id,
3979 const UnicodeString& source,
3980 const UnicodeString& expectedResult) {
3981 UErrorCode ec = U_ZERO_ERROR;
3982 UParseError pe;
3983 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3984 if (U_FAILURE(ec)) {
3985 errln((UnicodeString)"FAIL: Could not create " + id);
3986 delete t;
3987 return;
3988 }
3989 expect(*t, source, expectedResult);
3990 delete t;
3991 }
3992
3993 void TransliteratorTest::expect(const UnicodeString& rules,
3994 const UnicodeString& source,
3995 const UnicodeString& expectedResult,
3996 UTransPosition *pos) {
3997 UErrorCode status = U_ZERO_ERROR;
3998 Transliterator *t = new RuleBasedTransliterator("<ID>", rules, status);
3999 if (U_FAILURE(status)) {
4000 errln("FAIL: Transliterator constructor failed");
4001 } else {
4002 expect(*t, source, expectedResult, pos);
4003 }
4004 delete t;
4005 }
4006
4007 void TransliteratorTest::expect(const Transliterator& t,
4008 const UnicodeString& source,
4009 const UnicodeString& expectedResult,
4010 const Transliterator& reverseTransliterator) {
4011 expect(t, source, expectedResult);
4012 expect(reverseTransliterator, expectedResult, source);
4013 }
4014
4015 void TransliteratorTest::expect(const Transliterator& t,
4016 const UnicodeString& source,
4017 const UnicodeString& expectedResult,
4018 UTransPosition *pos) {
4019 if (pos == 0) {
4020 UnicodeString result(source);
4021 t.transliterate(result);
4022 expectAux(t.getID() + ":String", source, result, expectedResult);
4023 }
4024
4025 UTransPosition index={0, 0, 0, 0};
4026 if (pos != 0) {
4027 index = *pos;
4028 }
4029
4030 UnicodeString rsource(source);
4031 if (pos == 0) {
4032 t.transliterate(rsource);
4033 } else {
4034 // Do it all at once -- below we do it incrementally
4035 t.finishTransliteration(rsource, *pos);
4036 }
4037 expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4038
4039 // Test keyboard (incremental) transliteration -- this result
4040 // must be the same after we finalize (see below).
4041 UnicodeString log;
4042 rsource.remove();
4043 if (pos != 0) {
4044 rsource = source;
4045 formatInput(log, rsource, index);
4046 log.append(" -> ");
4047 UErrorCode status = U_ZERO_ERROR;
4048 t.transliterate(rsource, index, status);
4049 formatInput(log, rsource, index);
4050 } else {
4051 for (int32_t i=0; i<source.length(); ++i) {
4052 if (i != 0) {
4053 log.append(" + ");
4054 }
4055 log.append(source.charAt(i)).append(" -> ");
4056 UErrorCode status = U_ZERO_ERROR;
4057 t.transliterate(rsource, index, source.charAt(i), status);
4058 formatInput(log, rsource, index);
4059 }
4060 }
4061
4062 // As a final step in keyboard transliteration, we must call
4063 // transliterate to finish off any pending partial matches that
4064 // were waiting for more input.
4065 t.finishTransliteration(rsource, index);
4066 log.append(" => ").append(rsource);
4067
4068 expectAux(t.getID() + ":Keyboard", log,
4069 rsource == expectedResult,
4070 expectedResult);
4071 }
4072
4073
4074 /**
4075 * @param appendTo result is appended to this param.
4076 * @param input the string being transliterated
4077 * @param pos the index struct
4078 */
4079 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4080 const UnicodeString& input,
4081 const UTransPosition& pos) {
4082 // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4083 // the {} indicate the context start and limit, and the ||
4084 // indicate the start and limit.
4085 if (0 <= pos.contextStart &&
4086 pos.contextStart <= pos.start &&
4087 pos.start <= pos.limit &&
4088 pos.limit <= pos.contextLimit &&
4089 pos.contextLimit <= input.length()) {
4090
4091 UnicodeString a, b, c, d, e;
4092 input.extractBetween(0, pos.contextStart, a);
4093 input.extractBetween(pos.contextStart, pos.start, b);
4094 input.extractBetween(pos.start, pos.limit, c);
4095 input.extractBetween(pos.limit, pos.contextLimit, d);
4096 input.extractBetween(pos.contextLimit, input.length(), e);
4097 appendTo.append(a).append((UChar)123/*{*/).append(b).
4098 append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4099 append((UChar)125/*}*/).append(e);
4100 } else {
4101 appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4102 pos.contextStart + ", s=" + pos.start + ", l=" +
4103 pos.limit + ", cl=" + pos.contextLimit + "} on " +
4104 input);
4105 }
4106 return appendTo;
4107 }
4108
4109 void TransliteratorTest::expectAux(const UnicodeString& tag,
4110 const UnicodeString& source,
4111 const UnicodeString& result,
4112 const UnicodeString& expectedResult) {
4113 expectAux(tag, source + " -> " + result,
4114 result == expectedResult,
4115 expectedResult);
4116 }
4117
4118 void TransliteratorTest::expectAux(const UnicodeString& tag,
4119 const UnicodeString& summary, UBool pass,
4120 const UnicodeString& expectedResult) {
4121 if (pass) {
4122 logln(UnicodeString("(")+tag+") " + prettify(summary));
4123 } else {
4124 errln(UnicodeString("FAIL: (")+tag+") "
4125 + prettify(summary)
4126 + ", expected " + prettify(expectedResult));
4127 }
4128 }
4129
4130 #endif /* #if !UCONFIG_NO_TRANSLITERATION */