]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/intltest/transtst.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / test / intltest / transtst.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4**********************************************************************
2ca993e8 5* Copyright (C) 1999-2016, International Business Machines
b75a7d8f
A
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8* Date Name Description
9* 11/10/99 aliu Creation.
10**********************************************************************
11*/
12
13#include "unicode/utypes.h"
14
15#if !UCONFIG_NO_TRANSLITERATION
16
17#include "transtst.h"
18#include "unicode/locid.h"
19#include "unicode/dtfmtsym.h"
20#include "unicode/normlzr.h"
21#include "unicode/translit.h"
b75a7d8f
A
22#include "unicode/uchar.h"
23#include "unicode/unifilt.h"
24#include "unicode/uniset.h"
b75a7d8f
A
25#include "unicode/ustring.h"
26#include "unicode/usetiter.h"
27#include "unicode/uscript.h"
4388f060 28#include "unicode/utf16.h"
b75a7d8f
A
29#include "cpdtrans.h"
30#include "nultrans.h"
31#include "rbt.h"
46f4442e 32#include "rbt_pars.h"
b75a7d8f
A
33#include "anytrans.h"
34#include "esctrn.h"
35#include "name2uni.h"
36#include "nortrans.h"
37#include "remtrans.h"
38#include "titletrn.h"
39#include "tolowtrn.h"
40#include "toupptrn.h"
41#include "unesctrn.h"
42#include "uni2name.h"
43#include "cstring.h"
374ca955
A
44#include "cmemory.h"
45#include <stdio.h>
b75a7d8f
A
46
47/***********************************************************************
48
49 HOW TO USE THIS TEST FILE
50 -or-
51 How I developed on two platforms
52 without losing (too much of) my mind
53
54
551. Add new tests by copying/pasting/changing existing tests. On Java,
56 any public void method named Test...() taking no parameters becomes
57 a test. On C++, you need to modify the header and add a line to
58 the runIndexedTest() dispatch method.
59
602. Make liberal use of the expect() method; it is your friend.
61
623. The tests in this file exactly match those in a sister file on the
63 other side. The two files are:
64
65 icu4j: src/com/ibm/test/translit/TransliteratorTest.java
66 icu4c: source/test/intltest/transtst.cpp
67
68 ==> THIS IS THE IMPORTANT PART <==
69
70 When you add a test in this file, add it in TransliteratorTest.java
71 too. Give it the same name and put it in the same relative place.
72 This makes maintenance a lot simpler for any poor soul who ends up
73 trying to synchronize the tests between icu4j and icu4c.
74
754. If you MUST enter a test that is NOT paralleled in the sister file,
76 then add it in the special non-mirrored section. These are
77 labeled
78
79 "icu4j ONLY"
80
81 or
82
83 "icu4c ONLY"
84
85 Make sure you document the reason the test is here and not there.
86
87
88Thank you.
89The Management
90***********************************************************************/
91
92// Define character constants thusly to be EBCDIC-friendly
93enum {
94 LEFT_BRACE=((UChar)0x007B), /*{*/
95 PIPE =((UChar)0x007C), /*|*/
96 ZERO =((UChar)0x0030), /*0*/
97 UPPER_A =((UChar)0x0041) /*A*/
98};
99
100TransliteratorTest::TransliteratorTest()
101: DESERET_DEE((UChar32)0x10414),
102 DESERET_dee((UChar32)0x1043C)
103{
104}
105
374ca955
A
106TransliteratorTest::~TransliteratorTest() {}
107
b75a7d8f
A
108void
109TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
110 const char* &name, char* /*par*/) {
111 switch (index) {
112 TESTCASE(0,TestInstantiation);
113 TESTCASE(1,TestSimpleRules);
114 TESTCASE(2,TestRuleBasedInverse);
115 TESTCASE(3,TestKeyboard);
116 TESTCASE(4,TestKeyboard2);
117 TESTCASE(5,TestKeyboard3);
118 TESTCASE(6,TestArabic);
119 TESTCASE(7,TestCompoundKana);
120 TESTCASE(8,TestCompoundHex);
121 TESTCASE(9,TestFiltering);
122 TESTCASE(10,TestInlineSet);
123 TESTCASE(11,TestPatternQuoting);
124 TESTCASE(12,TestJ277);
125 TESTCASE(13,TestJ243);
126 TESTCASE(14,TestJ329);
127 TESTCASE(15,TestSegments);
128 TESTCASE(16,TestCursorOffset);
129 TESTCASE(17,TestArbitraryVariableValues);
130 TESTCASE(18,TestPositionHandling);
131 TESTCASE(19,TestHiraganaKatakana);
132 TESTCASE(20,TestCopyJ476);
133 TESTCASE(21,TestAnchors);
134 TESTCASE(22,TestInterIndic);
135 TESTCASE(23,TestFilterIDs);
136 TESTCASE(24,TestCaseMap);
137 TESTCASE(25,TestNameMap);
138 TESTCASE(26,TestLiberalizedID);
139 TESTCASE(27,TestCreateInstance);
140 TESTCASE(28,TestNormalizationTransliterator);
141 TESTCASE(29,TestCompoundRBT);
142 TESTCASE(30,TestCompoundFilter);
143 TESTCASE(31,TestRemove);
144 TESTCASE(32,TestToRules);
145 TESTCASE(33,TestContext);
146 TESTCASE(34,TestSupplemental);
147 TESTCASE(35,TestQuantifier);
148 TESTCASE(36,TestSTV);
149 TESTCASE(37,TestCompoundInverse);
150 TESTCASE(38,TestNFDChainRBT);
151 TESTCASE(39,TestNullInverse);
152 TESTCASE(40,TestAliasInverseID);
153 TESTCASE(41,TestCompoundInverseID);
154 TESTCASE(42,TestUndefinedVariable);
155 TESTCASE(43,TestEmptyContext);
156 TESTCASE(44,TestCompoundFilterID);
157 TESTCASE(45,TestPropertySet);
158 TESTCASE(46,TestNewEngine);
159 TESTCASE(47,TestQuantifiedSegment);
160 TESTCASE(48,TestDevanagariLatinRT);
161 TESTCASE(49,TestTeluguLatinRT);
162 TESTCASE(50,TestCompoundLatinRT);
163 TESTCASE(51,TestSanskritLatinRT);
164 TESTCASE(52,TestLocaleInstantiation);
165 TESTCASE(53,TestTitleAccents);
166 TESTCASE(54,TestLocaleResource);
167 TESTCASE(55,TestParseError);
168 TESTCASE(56,TestOutputSet);
169 TESTCASE(57,TestVariableRange);
170 TESTCASE(58,TestInvalidPostContext);
171 TESTCASE(59,TestIDForms);
172 TESTCASE(60,TestToRulesMark);
173 TESTCASE(61,TestEscape);
174 TESTCASE(62,TestAnchorMasking);
175 TESTCASE(63,TestDisplayName);
176 TESTCASE(64,TestSpecialCases);
729e4ab9 177#if !UCONFIG_NO_FILE_IO
b75a7d8f 178 TESTCASE(65,TestIncrementalProgress);
729e4ab9 179#endif
b75a7d8f
A
180 TESTCASE(66,TestSurrogateCasing);
181 TESTCASE(67,TestFunction);
182 TESTCASE(68,TestInvalidBackRef);
183 TESTCASE(69,TestMulticharStringSet);
184 TESTCASE(70,TestUserFunction);
185 TESTCASE(71,TestAnyX);
186 TESTCASE(72,TestSourceTargetSet);
187 TESTCASE(73,TestGurmukhiDevanagari);
4388f060 188 TESTCASE(74,TestPatternWhiteSpace);
b75a7d8f
A
189 TESTCASE(75,TestAllCodepoints);
190 TESTCASE(76,TestBoilerplate);
191 TESTCASE(77,TestAlternateSyntax);
73c04bcf
A
192 TESTCASE(78,TestBeginEnd);
193 TESTCASE(79,TestBeginEndToRules);
194 TESTCASE(80,TestRegisterAlias);
46f4442e
A
195 TESTCASE(81,TestRuleStripping);
196 TESTCASE(82,TestHalfwidthFullwidth);
197 TESTCASE(83,TestThai);
729e4ab9 198 TESTCASE(84,TestAny);
340931cb 199 TESTCASE(85,TestHansHant);
b75a7d8f
A
200 default: name = ""; break;
201 }
202}
203
204/**
205 * Make sure every system transliterator can be instantiated.
206 *
207 * ALSO test that the result of toRules() for each rule is a valid
208 * rule. Do this here so we don't have to have another test that
209 * instantiates everything as well.
210 */
211void TransliteratorTest::TestInstantiation() {
374ca955
A
212 UErrorCode ec = U_ZERO_ERROR;
213 StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
214 assertSuccess("getAvailableIDs()", ec);
215 assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
b75a7d8f 216 int32_t n = Transliterator::countAvailableIDs();
374ca955
A
217 assertTrue("getAvailableIDs().count()==countAvailableIDs()",
218 avail->count(ec) == n);
219 assertSuccess("count()", ec);
b75a7d8f
A
220 UnicodeString name;
221 for (int32_t i=0; i<n; ++i) {
374ca955
A
222 const UnicodeString& id = *avail->snext(ec);
223 if (!assertSuccess("snext()", ec) ||
224 !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
225 break;
226 }
227 UnicodeString id2 = Transliterator::getAvailableID(i);
b75a7d8f
A
228 if (id.length() < 1) {
229 errln(UnicodeString("FAIL: getAvailableID(") +
230 i + ") returned empty string");
231 continue;
232 }
374ca955
A
233 if (id != id2) {
234 errln(UnicodeString("FAIL: getAvailableID(") +
235 i + ") != getAvailableIDs().snext()");
236 continue;
237 }
b75a7d8f
A
238 UParseError parseError;
239 UErrorCode status = U_ZERO_ERROR;
240 Transliterator* t = Transliterator::createInstance(id,
241 UTRANS_FORWARD, parseError,status);
242 name.truncate(0);
243 Transliterator::getDisplayName(id, name);
244 if (t == 0) {
729e4ab9
A
245#if UCONFIG_NO_BREAK_ITERATION
246 // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
f3c0d7a5
A
247 if (id.compare((UnicodeString)"Thai-Latn") != 0 &&
248 id.compare((UnicodeString)"Thai-Latin") != 0)
729e4ab9
A
249#endif
250 dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
251 /*", parse error " + parseError.code +*/
252 ", line " + parseError.line +
253 ", offset " + parseError.offset +
254 ", pre-context " + prettify(parseError.preContext, TRUE) +
255 ", post-context " +prettify(parseError.postContext,TRUE) +
256 ", Error: " + u_errorName(status));
257 // When createInstance fails, it deletes the failing
258 // entry from the available ID list. We detect this
259 // here by looking for a change in countAvailableIDs.
b75a7d8f
A
260 int32_t nn = Transliterator::countAvailableIDs();
261 if (nn == (n - 1)) {
262 n = nn;
263 --i; // Compensate for deleted entry
264 }
265 } else {
266 logln(UnicodeString("OK: ") + name + " (" + id + ")");
267
268 // Now test toRules
269 UnicodeString rules;
270 t->toRules(rules, TRUE);
271 Transliterator *u = Transliterator::createFromRules("x",
272 rules, UTRANS_FORWARD, parseError,status);
273 if (u == 0) {
274 errln(UnicodeString("FAIL: ") + id +
275 ".createFromRules() => bad rules" +
276 /*", parse error " + parseError.code +*/
277 ", line " + parseError.line +
278 ", offset " + parseError.offset +
279 ", context " + prettify(parseError.preContext, TRUE) +
280 ", rules: " + prettify(rules, TRUE));
281 } else {
282 delete u;
283 }
284 delete t;
285 }
286 }
374ca955
A
287 assertTrue("snext()==NULL", avail->snext(ec)==NULL);
288 assertSuccess("snext()", ec);
289 delete avail;
b75a7d8f
A
290
291 // Now test the failure path
292 UParseError parseError;
293 UErrorCode status = U_ZERO_ERROR;
294 UnicodeString id("<Not a valid Transliterator ID>");
295 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
296 if (t != 0) {
297 errln("FAIL: " + id + " returned a transliterator");
298 delete t;
299 } else {
300 logln("OK: Bogus ID handled properly");
301 }
302}
303
304void TransliteratorTest::TestSimpleRules(void) {
305 /* Example: rules 1. ab>x|y
306 * 2. yc>z
307 *
308 * []|eabcd start - no match, copy e to tranlated buffer
309 * [e]|abcd match rule 1 - copy output & adjust cursor
310 * [ex|y]cd match rule 2 - copy output & adjust cursor
311 * [exz]|d no match, copy d to transliterated buffer
312 * [exzd]| done
313 */
314 expect(UnicodeString("ab>x|y;", "") +
315 "yc>z",
316 "eabcd", "exzd");
317
318 /* Another set of rules:
319 * 1. ab>x|yzacw
320 * 2. za>q
321 * 3. qc>r
322 * 4. cw>n
323 *
324 * []|ab Rule 1
325 * [x|yzacw] No match
326 * [xy|zacw] Rule 2
327 * [xyq|cw] Rule 4
328 * [xyqn]| Done
329 */
330 expect(UnicodeString("ab>x|yzacw;") +
331 "za>q;" +
332 "qc>r;" +
333 "cw>n",
334 "ab", "xyqn");
335
336 /* Test categories
337 */
338 UErrorCode status = U_ZERO_ERROR;
46f4442e
A
339 UParseError parseError;
340 Transliterator *t = Transliterator::createFromRules(
b75a7d8f
A
341 "<ID>",
342 UnicodeString("$dummy=").append((UChar)0xE100) +
343 UnicodeString(";"
344 "$vowel=[aeiouAEIOU];"
345 "$lu=[:Lu:];"
346 "$vowel } $lu > '!';"
347 "$vowel > '&';"
348 "'!' { $lu > '^';"
349 "$lu > '*';"
350 "a > ERROR", ""),
46f4442e 351 UTRANS_FORWARD, parseError,
b75a7d8f
A
352 status);
353 if (U_FAILURE(status)) {
729e4ab9 354 dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
b75a7d8f
A
355 return;
356 }
46f4442e
A
357 expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
358 delete t;
b75a7d8f
A
359}
360
361/**
362 * Test inline set syntax and set variable syntax.
363 */
364void TransliteratorTest::TestInlineSet(void) {
365 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
366 expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
367
368 expect(UnicodeString(
369 "$digit = [0-9];"
370 "$alpha = [a-zA-Z];"
371 "$alphanumeric = [$digit $alpha];" // ***
372 "$special = [^$alphanumeric];" // ***
373 "$alphanumeric > '-';"
374 "$special > '*';", ""),
375
376 "thx-1138", "---*----");
377}
378
379/**
380 * Create some inverses and confirm that they work. We have to be
381 * careful how we do this, since the inverses will not be true
382 * inverses -- we can't throw any random string at the composition
383 * of the transliterators and expect the identity function. F x
384 * F' != I. However, if we are careful about the input, we will
385 * get the expected results.
386 */
387void TransliteratorTest::TestRuleBasedInverse(void) {
388 UnicodeString RULES =
389 UnicodeString("abc>zyx;") +
390 "ab>yz;" +
391 "bc>zx;" +
392 "ca>xy;" +
393 "a>x;" +
394 "b>y;" +
395 "c>z;" +
396
397 "abc<zyx;" +
398 "ab<yz;" +
399 "bc<zx;" +
400 "ca<xy;" +
401 "a<x;" +
402 "b<y;" +
403 "c<z;" +
404
405 "";
406
407 const char* DATA[] = {
408 // Careful here -- random strings will not work. If we keep
409 // the left side to the domain and the right side to the range
410 // we will be okay though (left, abc; right xyz).
411 "a", "x",
412 "abcacab", "zyxxxyy",
413 "caccb", "xyzzy",
414 };
415
2ca993e8 416 int32_t DATA_length = UPRV_LENGTHOF(DATA);
b75a7d8f
A
417
418 UErrorCode status = U_ZERO_ERROR;
46f4442e
A
419 UParseError parseError;
420 Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
421 UTRANS_FORWARD, parseError, status);
422 Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
423 UTRANS_REVERSE, parseError, status);
b75a7d8f
A
424 if (U_FAILURE(status)) {
425 errln("FAIL: RBT constructor failed");
426 return;
427 }
428 for (int32_t i=0; i<DATA_length; i+=2) {
46f4442e
A
429 expect(*fwd, DATA[i], DATA[i+1]);
430 expect(*rev, DATA[i+1], DATA[i]);
b75a7d8f 431 }
46f4442e
A
432 delete fwd;
433 delete rev;
b75a7d8f
A
434}
435
436/**
437 * Basic test of keyboard.
438 */
439void TransliteratorTest::TestKeyboard(void) {
46f4442e 440 UParseError parseError;
b75a7d8f 441 UErrorCode status = U_ZERO_ERROR;
46f4442e 442 Transliterator *t = Transliterator::createFromRules("<ID>",
b75a7d8f
A
443 UnicodeString("psch>Y;")
444 +"ps>y;"
445 +"ch>x;"
446 +"a>A;",
46f4442e 447 UTRANS_FORWARD, parseError,
b75a7d8f
A
448 status);
449 if (U_FAILURE(status)) {
450 errln("FAIL: RBT constructor failed");
451 return;
452 }
453 const char* DATA[] = {
454 // insertion, buffer
455 "a", "A",
456 "p", "Ap",
457 "s", "Aps",
458 "c", "Apsc",
459 "a", "AycA",
460 "psch", "AycAY",
461 0, "AycAY", // null means finishKeyboardTransliteration
462 };
463
2ca993e8 464 keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
46f4442e 465 delete t;
b75a7d8f
A
466}
467
468/**
469 * Basic test of keyboard with cursor.
470 */
471void TransliteratorTest::TestKeyboard2(void) {
46f4442e 472 UParseError parseError;
b75a7d8f 473 UErrorCode status = U_ZERO_ERROR;
46f4442e 474 Transliterator *t = Transliterator::createFromRules("<ID>",
b75a7d8f
A
475 UnicodeString("ych>Y;")
476 +"ps>|y;"
477 +"ch>x;"
478 +"a>A;",
46f4442e 479 UTRANS_FORWARD, parseError,
b75a7d8f
A
480 status);
481 if (U_FAILURE(status)) {
482 errln("FAIL: RBT constructor failed");
483 return;
484 }
485 const char* DATA[] = {
486 // insertion, buffer
487 "a", "A",
488 "p", "Ap",
489 "s", "Aps", // modified for rollback - "Ay",
490 "c", "Apsc", // modified for rollback - "Ayc",
491 "a", "AycA",
492 "p", "AycAp",
493 "s", "AycAps", // modified for rollback - "AycAy",
494 "c", "AycApsc", // modified for rollback - "AycAyc",
495 "h", "AycAY",
496 0, "AycAY", // null means finishKeyboardTransliteration
497 };
498
2ca993e8 499 keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
46f4442e 500 delete t;
b75a7d8f
A
501}
502
503/**
504 * Test keyboard transliteration with back-replacement.
505 */
506void TransliteratorTest::TestKeyboard3(void) {
507 // We want th>z but t>y. Furthermore, during keyboard
508 // transliteration we want t>y then yh>z if t, then h are
509 // typed.
510 UnicodeString RULES("t>|y;"
511 "yh>z;");
512
513 const char* DATA[] = {
514 // Column 1: characters to add to buffer (as if typed)
515 // Column 2: expected appearance of buffer after
516 // keyboard xliteration.
517 "a", "a",
518 "b", "ab",
519 "t", "abt", // modified for rollback - "aby",
520 "c", "abyc",
521 "t", "abyct", // modified for rollback - "abycy",
522 "h", "abycz",
523 0, "abycz", // null means finishKeyboardTransliteration
524 };
525
46f4442e 526 UParseError parseError;
b75a7d8f 527 UErrorCode status = U_ZERO_ERROR;
46f4442e 528 Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
b75a7d8f
A
529 if (U_FAILURE(status)) {
530 errln("FAIL: RBT constructor failed");
531 return;
532 }
2ca993e8 533 keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
46f4442e 534 delete t;
b75a7d8f
A
535}
536
537void TransliteratorTest::keyboardAux(const Transliterator& t,
538 const char* DATA[], int32_t DATA_length) {
539 UErrorCode status = U_ZERO_ERROR;
540 UTransPosition index={0, 0, 0, 0};
541 UnicodeString s;
542 for (int32_t i=0; i<DATA_length; i+=2) {
543 UnicodeString log;
544 if (DATA[i] != 0) {
545 log = s + " + "
546 + DATA[i]
547 + " -> ";
548 t.transliterate(s, index, DATA[i], status);
549 } else {
550 log = s + " => ";
551 t.finishTransliteration(s, index);
552 }
553 // Show the start index '{' and the cursor '|'
554 UnicodeString a, b, c;
555 s.extractBetween(0, index.contextStart, a);
556 s.extractBetween(index.contextStart, index.start, b);
557 s.extractBetween(index.start, s.length(), c);
558 log.append(a).
559 append((UChar)LEFT_BRACE).
560 append(b).
561 append((UChar)PIPE).
562 append(c);
563 if (s == DATA[i+1] && U_SUCCESS(status)) {
564 logln(log);
565 } else {
566 errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
567 }
568 }
569}
570
571void TransliteratorTest::TestArabic(void) {
572// Test disabled for 2.0 until new Arabic transliterator can be written.
573// /*
574// const char* DATA[] = {
575// "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
576// "\u0627\u0644\u0644\u063a\u0629\u0020"+
577// "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
578// "\u0628\u0628\u0646\u0638\u0645\u0020"+
579// "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
580// "\u062c\u0645\u064a\u0644\u0629",
581// };
582// */
583//
584// UChar ar_raw[] = {
585// 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
586// 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
587// 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
588// 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
589// 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
590// 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
591// };
592// UnicodeString ar(ar_raw);
593// UErrorCode status=U_ZERO_ERROR;
594// UParseError parseError;
595// Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
596// if (t == 0) {
597// errln("FAIL: createInstance failed");
598// return;
599// }
600// expect(*t, "Arabic", ar);
601// delete t;
602}
603
604/**
605 * Compose the Kana transliterator forward and reverse and try
606 * some strings that should come out unchanged.
607 */
608void TransliteratorTest::TestCompoundKana(void) {
609 UParseError parseError;
610 UErrorCode status = U_ZERO_ERROR;
611 Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
612 if (t == 0) {
729e4ab9 613 dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
b75a7d8f
A
614 } else {
615 expect(*t, "aaaaa", "aaaaa");
616 delete t;
617 }
618}
619
620/**
621 * Compose the hex transliterators forward and reverse.
622 */
623void TransliteratorTest::TestCompoundHex(void) {
624 UParseError parseError;
625 UErrorCode status = U_ZERO_ERROR;
626 Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
627 Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
628 Transliterator* transab[] = { a, b };
629 Transliterator* transba[] = { b, a };
630 if (a == 0 || b == 0) {
631 errln("FAIL: construction failed");
632 delete a;
633 delete b;
634 return;
635 }
636 // Do some basic tests of a
637 expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
638 // Do some basic tests of b
639 expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
640
641 Transliterator* ab = new CompoundTransliterator(transab, 2);
642 UnicodeString s("abcde", "");
643 expect(*ab, s, s);
644
645 UnicodeString str(s);
646 a->transliterate(str);
647 Transliterator* ba = new CompoundTransliterator(transba, 2);
648 expect(*ba, str, str);
649
650 delete ab;
651 delete ba;
652 delete a;
653 delete b;
654}
655
656int gTestFilterClassID = 0;
657/**
658 * Used by TestFiltering().
659 */
660class TestFilter : public UnicodeFilter {
340931cb 661 virtual TestFilter* clone() const {
b75a7d8f
A
662 return new TestFilter(*this);
663 }
664 virtual UBool contains(UChar32 c) const {
665 return c != (UChar)0x0063 /*c*/;
666 }
667 // Stubs
668 virtual UnicodeString& toPattern(UnicodeString& result,
669 UBool /*escapeUnprintable*/) const {
670 return result;
671 }
672 virtual UBool matchesIndexValue(uint8_t /*v*/) const {
673 return FALSE;
674 }
675 virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
676public:
677 UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
678};
679
680/**
681 * Do some basic tests of filtering.
682 */
683void TransliteratorTest::TestFiltering(void) {
684 UParseError parseError;
685 UErrorCode status = U_ZERO_ERROR;
686 Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
687 if (hex == 0) {
688 errln("FAIL: createInstance(Any-Hex) failed");
689 return;
690 }
691 hex->adoptFilter(new TestFilter());
692 UnicodeString s("abcde");
693 hex->transliterate(s);
694 UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
695 if (s == exp) {
696 logln(UnicodeString("Ok: \"") + exp + "\"");
697 } else {
698 logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
699 }
73c04bcf
A
700
701 // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
702 UnicodeFilter *f = hex->orphanFilter();
703 if (f == NULL){
704 errln("FAIL: orphanFilter() should get a UnicodeFilter");
705 } else {
706 delete f;
707 }
b75a7d8f
A
708 delete hex;
709}
710
711/**
712 * Test anchors
713 */
714void TransliteratorTest::TestAnchors(void) {
715 expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""),
716 "aaa",
717 "012");
718 expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
719 "aaa",
720 "012");
721 expect(UnicodeString("^ab > 01 ;"
722 " ab > |8 ;"
723 " b > k ;"
724 " 8x$ > 45 ;"
725 " 8x > 77 ;", ""),
726
727 "ababbabxabx",
728 "018k7745");
729 expect(UnicodeString("$s = [z$] ;"
730 "$s{ab > 01 ;"
731 " ab > |8 ;"
732 " b > k ;"
733 " 8x}$s > 45 ;"
734 " 8x > 77 ;", ""),
735
736 "abzababbabxzabxabx",
737 "01z018k45z01x45");
738}
739
740/**
741 * Test pattern quoting and escape mechanisms.
742 */
743void TransliteratorTest::TestPatternQuoting(void) {
744 // Array of 3n items
745 // Each item is <rules>, <input>, <expected output>
746 const UnicodeString DATA[] = {
747 UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
748 UnicodeString(UChar(0x4E01)),
749 "[male adult]"
750 };
751
752 for (int32_t i=0; i<3; i+=3) {
753 logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
46f4442e 754 UParseError parseError;
b75a7d8f 755 UErrorCode status = U_ZERO_ERROR;
46f4442e 756 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
b75a7d8f
A
757 if (U_FAILURE(status)) {
758 errln("RBT constructor failed");
759 } else {
46f4442e 760 expect(*t, DATA[i+1], DATA[i+2]);
b75a7d8f 761 }
46f4442e 762 delete t;
b75a7d8f
A
763 }
764}
765
766/**
767 * Regression test for bugs found in Greek transliteration.
768 */
769void TransliteratorTest::TestJ277(void) {
770 UErrorCode status = U_ZERO_ERROR;
771 UParseError parseError;
772 Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
773 if (gl == NULL) {
729e4ab9 774 dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
b75a7d8f
A
775 return;
776 }
777
778 UChar sigma = 0x3C3;
779 UChar upsilon = 0x3C5;
780 UChar nu = 0x3BD;
781// UChar PHI = 0x3A6;
782 UChar alpha = 0x3B1;
783// UChar omega = 0x3C9;
784// UChar omicron = 0x3BF;
785// UChar epsilon = 0x3B5;
786
787 // sigma upsilon nu -> syn
788 UnicodeString syn;
789 syn.append(sigma).append(upsilon).append(nu);
790 expect(*gl, syn, "syn");
791
792 // sigma alpha upsilon nu -> saun
793 UnicodeString sayn;
794 sayn.append(sigma).append(alpha).append(upsilon).append(nu);
795 expect(*gl, sayn, "saun");
796
797 // Again, using a smaller rule set
798 UnicodeString rules(
799 "$alpha = \\u03B1;"
800 "$nu = \\u03BD;"
801 "$sigma = \\u03C3;"
802 "$ypsilon = \\u03C5;"
803 "$vowel = [aeiouAEIOU$alpha$ypsilon];"
804 "s <> $sigma;"
805 "a <> $alpha;"
806 "u <> $vowel { $ypsilon;"
807 "y <> $ypsilon;"
808 "n <> $nu;",
809 "");
46f4442e 810 Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
b75a7d8f 811 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
46f4442e
A
812 expect(*mini, syn, "syn");
813 expect(*mini, sayn, "saun");
814 delete mini;
815 mini = NULL;
b75a7d8f
A
816
817#if !UCONFIG_NO_FORMATTING
818 // Transliterate the Greek locale data
819 Locale el("el");
820 DateFormatSymbols syms(el, status);
821 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
822 int32_t i, count;
823 const UnicodeString* data = syms.getMonths(count);
824 for (i=0; i<count; ++i) {
825 if (data[i].length() == 0) {
826 continue;
827 }
828 UnicodeString out(data[i]);
829 gl->transliterate(out);
830 UBool ok = TRUE;
831 if (data[i].length() >= 2 && out.length() >= 2 &&
832 u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
833 if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
834 ok = FALSE;
835 }
836 }
837 if (ok) {
838 logln(prettify(data[i] + " -> " + out));
839 } else {
840 errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
841 }
842 }
843#endif
844
845 delete gl;
846}
847
848/**
849 * Prefix, suffix support in hex transliterators
850 */
851void TransliteratorTest::TestJ243(void) {
374ca955 852 UErrorCode ec = U_ZERO_ERROR;
b75a7d8f
A
853
854 // Test default Hex-Any, which should handle
855 // \u, \U, u+, and U+
374ca955
A
856 Transliterator *hex =
857 Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
858 if (assertSuccess("getInstance", ec)) {
859 expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
860 }
861 delete hex;
862
863// // Try a custom Hex-Unicode
864// // \uXXXX and &#xXXXX;
865// ec = U_ZERO_ERROR;
866// HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
867// expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
868// "abcd5fx012&#x00033;");
869// // Try custom Any-Hex (default is tested elsewhere)
870// ec = U_ZERO_ERROR;
871// UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
872// expect(hex3, "012", "&#x30;&#x31;&#x32;");
b75a7d8f
A
873}
874
875/**
876 * Parsers need better syntax error messages.
877 */
878void TransliteratorTest::TestJ329(void) {
879
880 struct { UBool containsErrors; const char* rule; } DATA[] = {
881 { FALSE, "a > b; c > d" },
882 { TRUE, "a > b; no operator; c > d" },
883 };
2ca993e8 884 int32_t DATA_length = UPRV_LENGTHOF(DATA);
b75a7d8f
A
885
886 for (int32_t i=0; i<DATA_length; ++i) {
887 UErrorCode status = U_ZERO_ERROR;
888 UParseError parseError;
46f4442e 889 Transliterator *rbt = Transliterator::createFromRules("<ID>",
b75a7d8f
A
890 DATA[i].rule,
891 UTRANS_FORWARD,
b75a7d8f
A
892 parseError,
893 status);
894 UBool gotError = U_FAILURE(status);
895 UnicodeString desc(DATA[i].rule);
896 desc.append(gotError ? " -> error" : " -> no error");
897 if (gotError) {
898 desc = desc + ", ParseError code=" + u_errorName(status) +
899 " line=" + parseError.line +
900 " offset=" + parseError.offset +
901 " context=" + parseError.preContext;
902 }
903 if (gotError == DATA[i].containsErrors) {
904 logln(UnicodeString("Ok: ") + desc);
905 } else {
906 errln(UnicodeString("FAIL: ") + desc);
907 }
46f4442e 908 delete rbt;
b75a7d8f
A
909 }
910}
911
912/**
913 * Test segments and segment references.
914 */
915void TransliteratorTest::TestSegments(void) {
916 // Array of 3n items
917 // Each item is <rules>, <input>, <expected output>
918 UnicodeString DATA[] = {
919 "([a-z]) '.' ([0-9]) > $2 '-' $1",
920 "abc.123.xyz.456",
921 "ab1-c23.xy4-z56",
922
923 // nested
924 "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
925 "a1 b2",
926 "a1.a.1 b2.b.2",
927 };
2ca993e8 928 int32_t DATA_length = UPRV_LENGTHOF(DATA);
b75a7d8f
A
929
930 for (int32_t i=0; i<DATA_length; i+=3) {
931 logln("Pattern: " + prettify(DATA[i]));
46f4442e 932 UParseError parseError;
b75a7d8f 933 UErrorCode status = U_ZERO_ERROR;
46f4442e 934 Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
b75a7d8f
A
935 if (U_FAILURE(status)) {
936 errln("FAIL: RBT constructor");
937 } else {
46f4442e 938 expect(*t, DATA[i+1], DATA[i+2]);
b75a7d8f 939 }
46f4442e 940 delete t;
b75a7d8f
A
941 }
942}
943
944/**
945 * Test cursor positioning outside of the key
946 */
947void TransliteratorTest::TestCursorOffset(void) {
948 // Array of 3n items
949 // Each item is <rules>, <input>, <expected output>
950 UnicodeString DATA[] = {
951 "pre {alpha} post > | @ ALPHA ;"
952 "eALPHA > beta ;"
953 "pre {beta} post > BETA @@ | ;"
954 "post > xyz",
955
956 "prealphapost prebetapost",
957
958 "prbetaxyz preBETApost",
959 };
2ca993e8 960 int32_t DATA_length = UPRV_LENGTHOF(DATA);
b75a7d8f
A
961
962 for (int32_t i=0; i<DATA_length; i+=3) {
963 logln("Pattern: " + prettify(DATA[i]));
46f4442e 964 UParseError parseError;
b75a7d8f 965 UErrorCode status = U_ZERO_ERROR;
46f4442e 966 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
b75a7d8f
A
967 if (U_FAILURE(status)) {
968 errln("FAIL: RBT constructor");
969 } else {
46f4442e 970 expect(*t, DATA[i+1], DATA[i+2]);
b75a7d8f 971 }
46f4442e 972 delete t;
b75a7d8f
A
973 }
974}
975
976/**
977 * Test zero length and > 1 char length variable values. Test
978 * use of variable refs in UnicodeSets.
979 */
980void TransliteratorTest::TestArbitraryVariableValues(void) {
981 // Array of 3n items
982 // Each item is <rules>, <input>, <expected output>
983 UnicodeString DATA[] = {
984 "$abe = ab;"
985 "$pat = x[yY]z;"
986 "$ll = 'a-z';"
987 "$llZ = [$ll];"
988 "$llY = [$ll$pat];"
989 "$emp = ;"
990
991 "$abe > ABE;"
992 "$pat > END;"
993 "$llZ > 1;"
994 "$llY > 2;"
995 "7$emp 8 > 9;"
996 "",
997
998 "ab xYzxyz stY78",
999 "ABE ENDEND 1129",
1000 };
2ca993e8 1001 int32_t DATA_length = UPRV_LENGTHOF(DATA);
b75a7d8f
A
1002
1003 for (int32_t i=0; i<DATA_length; i+=3) {
1004 logln("Pattern: " + prettify(DATA[i]));
46f4442e 1005 UParseError parseError;
b75a7d8f 1006 UErrorCode status = U_ZERO_ERROR;
46f4442e 1007 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
b75a7d8f
A
1008 if (U_FAILURE(status)) {
1009 errln("FAIL: RBT constructor");
1010 } else {
46f4442e 1011 expect(*t, DATA[i+1], DATA[i+2]);
b75a7d8f 1012 }
46f4442e 1013 delete t;
b75a7d8f
A
1014 }
1015}
1016
1017/**
1018 * Confirm that the contextStart, contextLimit, start, and limit
1019 * behave correctly. J474.
1020 */
1021void TransliteratorTest::TestPositionHandling(void) {
1022 // Array of 3n items
1023 // Each item is <rules>, <input>, <expected output>
1024 const char* DATA[] = {
1025 "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1026 "xtat txtb", // pos 0,9,0,9
1027 "xTTaSS TTxUUb",
1028
1029 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1030 "xtat txtb", // pos 2,9,3,8
1031 "xtaSS TTxUUb",
1032
1033 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1034 "xtat txtb", // pos 3,8,3,8
1035 "xtaTT TTxTTb",
1036 };
1037
1038 // Array of 4n positions -- these go with the DATA array
1039 // They are: contextStart, contextLimit, start, limit
1040 int32_t POS[] = {
1041 0, 9, 0, 9,
1042 2, 9, 3, 8,
1043 3, 8, 3, 8,
1044 };
1045
2ca993e8 1046 int32_t n = UPRV_LENGTHOF(DATA) / 3;
b75a7d8f
A
1047 for (int32_t i=0; i<n; i++) {
1048 UErrorCode status = U_ZERO_ERROR;
46f4442e
A
1049 UParseError parseError;
1050 Transliterator *t = Transliterator::createFromRules("<ID>",
1051 DATA[3*i], UTRANS_FORWARD, parseError, status);
b75a7d8f
A
1052 if (U_FAILURE(status)) {
1053 delete t;
1054 errln("FAIL: RBT constructor");
1055 return;
1056 }
1057 UTransPosition pos;
1058 pos.contextStart= POS[4*i];
1059 pos.contextLimit = POS[4*i+1];
1060 pos.start = POS[4*i+2];
1061 pos.limit = POS[4*i+3];
1062 UnicodeString rsource(DATA[3*i+1]);
1063 t->transliterate(rsource, pos, status);
1064 if (U_FAILURE(status)) {
1065 delete t;
1066 errln("FAIL: transliterate");
1067 return;
1068 }
1069 t->finishTransliteration(rsource, pos);
1070 expectAux(DATA[3*i],
1071 DATA[3*i+1],
1072 rsource,
1073 DATA[3*i+2]);
1074 delete t;
1075 }
1076}
1077
1078/**
1079 * Test the Hiragana-Katakana transliterator.
1080 */
1081void TransliteratorTest::TestHiraganaKatakana(void) {
1082 UParseError parseError;
1083 UErrorCode status = U_ZERO_ERROR;
1084 Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1085 Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1086 if (hk == 0 || kh == 0) {
729e4ab9 1087 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
b75a7d8f
A
1088 delete hk;
1089 delete kh;
1090 return;
1091 }
1092
1093 // Array of 3n items
1094 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1095 const char* DATA[] = {
1096 "both",
1097 "\\u3042\\u3090\\u3099\\u3092\\u3050",
1098 "\\u30A2\\u30F8\\u30F2\\u30B0",
1099
1100 "kh",
1101 "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1102 "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1103 };
2ca993e8 1104 int32_t DATA_length = UPRV_LENGTHOF(DATA);
b75a7d8f
A
1105
1106 for (int32_t i=0; i<DATA_length; i+=3) {
1107 UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1108 UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1109 switch (*DATA[i]) {
1110 case 0x68: //'h': // Hiragana-Katakana
1111 expect(*hk, h, k);
1112 break;
1113 case 0x6B: //'k': // Katakana-Hiragana
1114 expect(*kh, k, h);
1115 break;
1116 case 0x62: //'b': // both
1117 expect(*hk, h, k);
1118 expect(*kh, k, h);
1119 break;
1120 }
1121 }
1122 delete hk;
1123 delete kh;
1124}
1125
1126/**
1127 * Test cloning / copy constructor of RBT.
1128 */
1129void TransliteratorTest::TestCopyJ476(void) {
1130 // The real test here is what happens when the destructors are
1131 // called. So we let one object get destructed, and check to
1132 // see that its copy still works.
46f4442e 1133 Transliterator *t2 = 0;
b75a7d8f 1134 {
46f4442e 1135 UParseError parseError;
b75a7d8f 1136 UErrorCode status = U_ZERO_ERROR;
46f4442e
A
1137 Transliterator *t1 = Transliterator::createFromRules("t1",
1138 "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
b75a7d8f
A
1139 if (U_FAILURE(status)) {
1140 errln("FAIL: RBT constructor");
1141 return;
1142 }
46f4442e
A
1143 t2 = t1->clone(); // Call copy constructor under the covers.
1144 expect(*t1, "abcfoofoo", "ABcbar");
1145 delete t1;
b75a7d8f 1146 }
46f4442e 1147 expect(*t2, "abcfoofoo", "ABcbar");
b75a7d8f
A
1148 delete t2;
1149}
1150
1151/**
1152 * Test inter-Indic transliterators. These are composed.
1153 * ICU4C Jitterbug 483.
1154 */
1155void TransliteratorTest::TestInterIndic(void) {
1156 UnicodeString ID("Devanagari-Gujarati", "");
1157 UErrorCode status = U_ZERO_ERROR;
1158 UParseError parseError;
1159 Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1160 if (dg == 0) {
729e4ab9 1161 dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
b75a7d8f
A
1162 return;
1163 }
1164 UnicodeString id = dg->getID();
1165 if (id != ID) {
1166 errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1167 }
1168 UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1169 UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1170 expect(*dg, dev, guj);
1171 delete dg;
1172}
1173
1174/**
1175 * Test filter syntax in IDs. (J918)
1176 */
1177void TransliteratorTest::TestFilterIDs(void) {
1178 // Array of 3n strings:
1179 // <id>, <inverse id>, <input>, <expected output>
1180 const char* DATA[] = {
1181 "[aeiou]Any-Hex", // ID
1182 "[aeiou]Hex-Any", // expected inverse ID
1183 "quizzical", // src
1184 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1185
1186 "[aeiou]Any-Hex;[^5]Hex-Any",
1187 "[^5]Any-Hex;[aeiou]Hex-Any",
1188 "quizzical",
1189 "q\\u0075izzical",
1190
1191 "[abc]Null",
1192 "[abc]Null",
1193 "xyz",
1194 "xyz",
1195 };
2ca993e8 1196 enum { DATA_length = UPRV_LENGTHOF(DATA) };
b75a7d8f
A
1197
1198 for (int i=0; i<DATA_length; i+=4) {
1199 UnicodeString ID(DATA[i], "");
1200 UnicodeString uID(DATA[i+1], "");
1201 UnicodeString data2(DATA[i+2], "");
1202 UnicodeString data3(DATA[i+3], "");
1203 UParseError parseError;
1204 UErrorCode status = U_ZERO_ERROR;
1205 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1206 if (t == 0) {
1207 errln("FAIL: createInstance(" + ID + ") returned NULL");
1208 return;
1209 }
1210 expect(*t, data2, data3);
1211
1212 // Check the ID
1213 if (ID != t->getID()) {
1214 errln("FAIL: createInstance(" + ID + ").getID() => " +
1215 t->getID());
1216 }
1217
1218 // Check the inverse
1219 Transliterator *u = t->createInverse(status);
1220 if (u == 0) {
1221 errln("FAIL: " + ID + ".createInverse() returned NULL");
1222 } else if (u->getID() != uID) {
1223 errln("FAIL: " + ID + ".createInverse().getID() => " +
1224 u->getID() + ", expected " + uID);
1225 }
1226
1227 delete t;
1228 delete u;
1229 }
1230}
1231
1232/**
1233 * Test the case mapping transliterators.
1234 */
1235void TransliteratorTest::TestCaseMap(void) {
1236 UParseError parseError;
1237 UErrorCode status = U_ZERO_ERROR;
1238 Transliterator* toUpper =
1239 Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1240 Transliterator* toLower =
1241 Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1242 Transliterator* toTitle =
1243 Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1244 if (toUpper==0 || toLower==0 || toTitle==0) {
1245 errln("FAIL: createInstance returned NULL");
1246 delete toUpper;
1247 delete toLower;
1248 delete toTitle;
1249 return;
1250 }
1251
1252 expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1253 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1254 expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1255 "the quick brown foX jumped over the lazY dogs.");
1256 expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1257 "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1258
1259 delete toUpper;
1260 delete toLower;
1261 delete toTitle;
1262}
1263
1264/**
1265 * Test the name mapping transliterators.
1266 */
1267void TransliteratorTest::TestNameMap(void) {
1268 UParseError parseError;
1269 UErrorCode status = U_ZERO_ERROR;
1270 Transliterator* uni2name =
1271 Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1272 Transliterator* name2uni =
1273 Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1274 if (uni2name==0 || name2uni==0) {
1275 errln("FAIL: createInstance returned NULL");
1276 delete uni2name;
1277 delete name2uni;
1278 return;
1279 }
1280
1281 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1282 expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
4388f060
A
1283 CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1284 expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
b75a7d8f
A
1285 CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1286
1287 delete uni2name;
1288 delete name2uni;
1289
1290 // round trip
1291 Transliterator* t =
1292 Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1293 if (t==0) {
1294 errln("FAIL: createInstance returned NULL");
1295 delete t;
1296 return;
1297 }
1298
1299 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1300 UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1301 expect(*t, s, s);
1302 delete t;
1303}
1304
1305/**
1306 * Test liberalized ID syntax. 1006c
1307 */
1308void TransliteratorTest::TestLiberalizedID(void) {
1309 // Some test cases have an expected getID() value of NULL. This
1310 // means I have disabled the test case for now. This stuff is
1311 // still under development, and I haven't decided whether to make
1312 // getID() return canonical case yet. It will all get rewritten
1313 // with the move to Source-Target/Variant IDs anyway. [aliu]
1314 const char* DATA[] = {
1315 "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1316 " Null ", "Null", "whitespace",
1317 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",
1318 " null ; latin-greek ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1319 };
2ca993e8 1320 const int32_t DATA_length = UPRV_LENGTHOF(DATA);
b75a7d8f
A
1321 UParseError parseError;
1322 UErrorCode status= U_ZERO_ERROR;
1323 for (int32_t i=0; i<DATA_length; i+=3) {
1324 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1325 if (t == 0) {
729e4ab9
A
1326 dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
1327 " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
b75a7d8f
A
1328 } else {
1329 UnicodeString exp;
1330 if (DATA[i+1]) {
1331 exp = UnicodeString(DATA[i+1], "");
1332 }
1333 // Don't worry about getID() if the expected char*
1334 // is NULL -- see above.
1335 if (exp.length() == 0 || exp == t->getID()) {
1336 logln(UnicodeString("Ok: ") + DATA[i+2] +
1337 " create ID \"" + DATA[i] + "\" => \"" +
1338 exp + "\"");
1339 } else {
1340 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1341 " create ID \"" + DATA[i] + "\" => \"" +
1342 t->getID() + "\", exp \"" + exp + "\"");
1343 }
1344 delete t;
1345 }
1346 }
1347}
1348
1349/* test for Jitterbug 912 */
1350void TransliteratorTest::TestCreateInstance(){
1351 const char* FORWARD = "F";
1352 const char* REVERSE = "R";
1353 const char* DATA[] = {
1354 // Column 1: id
1355 // Column 2: direction
1356 // Column 3: expected ID, or "" if expect failure
1357 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1358
1359 // JB#2689: bad compound causes crash
1360 "InvalidSource-InvalidTarget", FORWARD, "",
1361 "InvalidSource-InvalidTarget", REVERSE, "",
1362 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1363 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1364 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1365 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1366
1367 NULL
1368 };
1369
1370 for (int32_t i=0; DATA[i]; i+=3) {
1371 UParseError err;
1372 UErrorCode ec = U_ZERO_ERROR;
1373 UnicodeString id(DATA[i]);
1374 UTransDirection dir = (DATA[i+1]==FORWARD)?
1375 UTRANS_FORWARD:UTRANS_REVERSE;
1376 UnicodeString expID(DATA[i+2]);
1377 Transliterator* t =
1378 Transliterator::createInstance(id,dir,err,ec);
374ca955
A
1379 UnicodeString newID;
1380 if (t) {
1381 newID = t->getID();
1382 }
b75a7d8f
A
1383 UBool ok = (newID == expID);
1384 if (!t) {
1385 newID = u_errorName(ec);
1386 }
1387 if (ok) {
1388 logln((UnicodeString)"Ok: createInstance(" +
1389 id + "," + DATA[i+1] + ") => " + newID);
1390 } else {
729e4ab9 1391 dataerrln((UnicodeString)"FAIL: createInstance(" +
b75a7d8f
A
1392 id + "," + DATA[i+1] + ") => " + newID +
1393 ", expected " + expID);
1394 }
1395 delete t;
1396 }
1397}
1398
1399/**
1400 * Test the normalization transliterator.
1401 */
1402void TransliteratorTest::TestNormalizationTransliterator() {
1403 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1404 // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1405 const char* CANON[] = {
1406 // Input Decomposed Composed
1407 "cat", "cat", "cat" ,
1408 "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" ,
1409
1410 "\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above
1411 "D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above
1412
1413 "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above
1414 "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below
1415 "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above
1416
1417 "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1418 "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1419
1420 "\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave
1421 "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave
1422 "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron
1423
1424 "\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign
1425 "\\u00c5", "A\\u030a", "\\u00c5" , // A-ring
1426
1427 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0
1428 "\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0
1429
1430 "Henry IV", "Henry IV", "Henry IV" ,
1431 "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" ,
1432
1433 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1434 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1435 "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten
1436 "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten
1437 "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten
1438
1439 "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" ,
1440 0 // end
1441 };
1442
1443 const char* COMPAT[] = {
1444 // Input Decomposed Composed
1445 "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed
1446
1447 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0
1448 "\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i
1449
1450 "Henry IV", "Henry IV", "Henry IV" ,
1451 "Henry \\u2163", "Henry IV", "Henry IV" ,
1452
1453 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
1454 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
1455
1456 "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten
1457 0 // end
1458 };
1459
1460 int32_t i;
1461 UParseError parseError;
1462 UErrorCode status = U_ZERO_ERROR;
1463 Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1464 Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1465 if (!NFD || !NFC) {
729e4ab9 1466 dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
b75a7d8f
A
1467 delete NFD;
1468 delete NFC;
1469 return;
1470 }
1471 for (i=0; CANON[i]; i+=3) {
1472 UnicodeString in = CharsToUnicodeString(CANON[i]);
1473 UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1474 UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1475 expect(*NFD, in, expd);
1476 expect(*NFC, in, expc);
1477 }
1478 delete NFD;
1479 delete NFC;
1480
1481 Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1482 Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1483 if (!NFKD || !NFKC) {
b331163b 1484 dataerrln("FAIL: createInstance failed");
b75a7d8f
A
1485 delete NFKD;
1486 delete NFKC;
1487 return;
1488 }
1489 for (i=0; COMPAT[i]; i+=3) {
1490 UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1491 UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1492 UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1493 expect(*NFKD, in, expkd);
1494 expect(*NFKC, in, expkc);
1495 }
1496 delete NFKD;
1497 delete NFKC;
1498
1499 UParseError pe;
1500 status = U_ZERO_ERROR;
1501 Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1502 UTRANS_FORWARD,
1503 pe, status);
1504 if (t == 0) {
1505 errln("FAIL: createInstance failed");
1506 }
1507 expect(*t, CharsToUnicodeString("\\u010dx"),
1508 CharsToUnicodeString("c\\u030C"));
1509 delete t;
1510}
1511
1512/**
1513 * Test compound RBT rules.
1514 */
1515void TransliteratorTest::TestCompoundRBT(void) {
1516 // Careful with spacing and ';' here: Phrase this exactly
1517 // as toRules() is going to return it. If toRules() changes
1518 // with regard to spacing or ';', then adjust this string.
1519 UnicodeString rule("::Hex-Any;\n"
1520 "::Any-Lower;\n"
1521 "a > '.A.';\n"
1522 "b > '.B.';\n"
1523 "::[^t]Any-Upper;", "");
1524 UParseError parseError;
1525 UErrorCode status = U_ZERO_ERROR;
1526 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1527 if (t == 0) {
1528 errln("FAIL: createFromRules failed");
1529 return;
1530 }
46f4442e 1531 expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
b75a7d8f
A
1532 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1533 UnicodeString r;
1534 t->toRules(r, TRUE);
1535 if (r == rule) {
1536 logln((UnicodeString)"OK: toRules() => " + r);
1537 } else {
1538 errln((UnicodeString)"FAIL: toRules() => " + r +
1539 ", expected " + rule);
1540 }
1541 delete t;
1542
1543 // Now test toRules
1544 t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1545 if (t == 0) {
729e4ab9 1546 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
b75a7d8f
A
1547 return;
1548 }
1549 UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1550 t->toRules(r, TRUE);
1551 if (r != exp) {
1552 errln((UnicodeString)"FAIL: toRules() => " + r +
1553 ", expected " + exp);
1554 } else {
1555 logln((UnicodeString)"OK: toRules() => " + r);
1556 }
1557 delete t;
1558
1559 // Round trip the result of toRules
1560 t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1561 if (t == 0) {
1562 errln("FAIL: createFromRules #2 failed");
1563 return;
1564 } else {
1565 logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1566 }
1567
1568 // Test toRules again
1569 t->toRules(r, TRUE);
1570 if (r != exp) {
1571 errln((UnicodeString)"FAIL: toRules() => " + r +
1572 ", expected " + exp);
1573 } else {
1574 logln((UnicodeString)"OK: toRules() => " + r);
1575 }
1576
1577 delete t;
1578
1579 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform
1580 // to what the regenerated ID will look like.
1581 UnicodeString id("Upper(Lower);(NFKC)", "");
1582 t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1583 if (t == 0) {
1584 errln("FAIL: createInstance #2 failed");
1585 return;
1586 }
1587 if (t->getID() == id) {
1588 logln((UnicodeString)"OK: created " + id);
1589 } else {
1590 errln((UnicodeString)"FAIL: createInstance(" + id +
1591 ").getID() => " + t->getID());
1592 }
1593
1594 Transliterator *u = t->createInverse(status);
1595 if (u == 0) {
1596 errln("FAIL: createInverse failed");
1597 delete t;
1598 return;
1599 }
1600 exp = "NFKC();Lower(Upper)";
1601 if (u->getID() == exp) {
1602 logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1603 u->getID());
1604 } else {
1605 errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1606 u->getID());
1607 }
1608 delete t;
1609 delete u;
1610}
1611
1612/**
1613 * Compound filter semantics were orginially not implemented
1614 * correctly. Originally, each component filter f(i) is replaced by
1615 * f'(i) = f(i) && g, where g is the filter for the compound
1616 * transliterator.
1617 *
1618 * From Mark:
1619 *
1620 * Suppose and I have a transliterator X. Internally X is
1621 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1622 *
1623 * The compound should convert all greek characters (through latin) to
1624 * cyrillic, then lowercase the result. The filter should say "don't
1625 * touch 'A' in the original". But because an intermediate result
1626 * happens to go through "A", the Greek Alpha gets hung up.
1627 */
1628void TransliteratorTest::TestCompoundFilter(void) {
1629 UParseError parseError;
1630 UErrorCode status = U_ZERO_ERROR;
1631 Transliterator *t = Transliterator::createInstance
1632 ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1633 if (t == 0) {
729e4ab9 1634 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
b75a7d8f
A
1635 return;
1636 }
1637 t->adoptFilter(new UnicodeSet("[^A]", status));
1638 if (U_FAILURE(status)) {
1639 errln("FAIL: UnicodeSet ct failed");
1640 delete t;
1641 return;
1642 }
1643
1644 // Only the 'A' at index 1 should remain unchanged
1645 expect(*t,
1646 CharsToUnicodeString("BA\\u039A\\u0391"),
1647 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1648 delete t;
1649}
1650
1651void TransliteratorTest::TestRemove(void) {
1652 UParseError parseError;
1653 UErrorCode status = U_ZERO_ERROR;
1654 Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1655 if (t == 0) {
1656 errln("FAIL: createInstance failed");
1657 return;
1658 }
1659
1660 expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
73c04bcf
A
1661
1662 // extra test for RemoveTransliterator::clone(), which at one point wasn't
1663 // duplicating the filter
1664 Transliterator* t2 = t->clone();
1665 expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1666
b75a7d8f 1667 delete t;
73c04bcf 1668 delete t2;
b75a7d8f
A
1669}
1670
1671void TransliteratorTest::TestToRules(void) {
1672 const char* RBT = "rbt";
1673 const char* SET = "set";
1674 static const char* DATA[] = {
1675 RBT,
1676 "$a=\\u4E61; [$a] > A;",
1677 "[\\u4E61] > A;",
1678
1679 RBT,
1680 "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1681 "[[:Zs:][:Zl:]]{a} > A;",
1682
1683 SET,
1684 "[[:Zs:][:Zl:]]",
1685 "[[:Zs:][:Zl:]]",
1686
1687 SET,
1688 "[:Ps:]",
1689 "[:Ps:]",
1690
1691 SET,
1692 "[:L:]",
1693 "[:L:]",
1694
1695 SET,
1696 "[[:L:]-[A]]",
1697 "[[:L:]-[A]]",
1698
1699 SET,
1700 "[~[:Lu:][:Ll:]]",
1701 "[~[:Lu:][:Ll:]]",
1702
1703 SET,
1704 "[~[a-z]]",
1705 "[~[a-z]]",
1706
1707 RBT,
1708 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1709 "[^[:Zs:]]{a} > A;",
1710
1711 RBT,
1712 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1713 "[[a-z]-[:Zs:]]{a} > A;",
1714
1715 RBT,
1716 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1717 "[[:Zs:]&[a-z]]{a} > A;",
1718
1719 RBT,
1720 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1721 "[x[:Zs:]]{a} > A;",
1722
1723 RBT,
1724 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1725 "$macron = \\u0304 ;"
1726 "$evowel = [aeiouyAEIOUY] ;"
1727 "$iotasub = \\u0345 ;"
1728 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1729 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1730
1731 RBT,
1732 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1733 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1734 };
2ca993e8 1735 static const int32_t DATA_length = UPRV_LENGTHOF(DATA);
b75a7d8f
A
1736
1737 for (int32_t d=0; d < DATA_length; d+=3) {
1738 if (DATA[d] == RBT) {
1739 // Transliterator test
1740 UParseError parseError;
1741 UErrorCode status = U_ZERO_ERROR;
1742 Transliterator *t = Transliterator::createFromRules("ID",
46f4442e 1743 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
b75a7d8f 1744 if (t == 0) {
729e4ab9 1745 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
b75a7d8f
A
1746 return;
1747 }
1748 UnicodeString rules, escapedRules;
1749 t->toRules(rules, FALSE);
1750 t->toRules(escapedRules, TRUE);
1751 UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
46f4442e 1752 UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
b75a7d8f 1753 if (rules == expRules) {
46f4442e 1754 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
b75a7d8f
A
1755 " => " + rules);
1756 } else {
46f4442e 1757 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
b75a7d8f
A
1758 " => " + rules + ", exp " + expRules);
1759 }
1760 if (escapedRules == expEscapedRules) {
46f4442e 1761 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
b75a7d8f
A
1762 " => " + escapedRules);
1763 } else {
46f4442e 1764 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
b75a7d8f
A
1765 " => " + escapedRules + ", exp " + expEscapedRules);
1766 }
1767 delete t;
1768
1769 } else {
1770 // UnicodeSet test
1771 UErrorCode status = U_ZERO_ERROR;
46f4442e
A
1772 UnicodeString pat(DATA[d+1], -1, US_INV);
1773 UnicodeString expToPat(DATA[d+2], -1, US_INV);
b75a7d8f
A
1774 UnicodeSet set(pat, status);
1775 if (U_FAILURE(status)) {
1776 errln("FAIL: UnicodeSet ct failed");
1777 return;
1778 }
1779 // Adjust spacing etc. as necessary.
1780 UnicodeString toPat;
1781 set.toPattern(toPat);
1782 if (expToPat == toPat) {
1783 logln((UnicodeString)"Ok: " + pat +
1784 " => " + toPat);
1785 } else {
1786 errln((UnicodeString)"FAIL: " + pat +
1787 " => " + prettify(toPat, TRUE) +
1788 ", exp " + prettify(pat, TRUE));
1789 }
1790 }
1791 }
1792}
1793
1794void TransliteratorTest::TestContext() {
1795 UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1796 expect("de > x; {d}e > y;",
1797 "de",
1798 "ye",
1799 &pos);
1800
1801 expect("ab{c} > z;",
1802 "xadabdabcy",
1803 "xadabdabzy");
1804}
1805
1806void TransliteratorTest::TestSupplemental() {
1807
1808 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1809 "a > $a; $s > i;"),
1810 CharsToUnicodeString("ab\\U0001030Fx"),
1811 CharsToUnicodeString("\\U00010300bix"));
1812
1813 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1814 "$b=[A-Z\\U00010400-\\U0001044D];"
1815 "($a)($b) > $2 $1;"),
1816 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1817 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1818
1819 // k|ax\\U00010300xm
1820
1821 // k|a\\U00010400\\U00010300xm
1822 // ky|\\U00010400\\U00010300xm
1823 // ky\\U00010400|\\U00010300xm
1824
1825 // ky\\U00010400|\\U00010300\\U00010400m
1826 // ky\\U00010400y|\\U00010400m
1827 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1828 "$a {x} > | @ \\U00010400;"
1829 "{$a} [^\\u0000-\\uFFFF] > y;"),
1830 CharsToUnicodeString("kax\\U00010300xm"),
1831 CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1832
1833 expectT("Any-Name",
1834 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
46f4442e 1835 UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
b75a7d8f
A
1836
1837 expectT("Any-Hex/Unicode",
1838 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
46f4442e 1839 UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
b75a7d8f
A
1840
1841 expectT("Any-Hex/C",
1842 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
46f4442e 1843 UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
b75a7d8f
A
1844
1845 expectT("Any-Hex/Perl",
1846 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
46f4442e 1847 UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
b75a7d8f
A
1848
1849 expectT("Any-Hex/Java",
1850 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
46f4442e 1851 UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
b75a7d8f
A
1852
1853 expectT("Any-Hex/XML",
1854 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1855 "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1856
1857 expectT("Any-Hex/XML10",
1858 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1859 "&#66352;&#1113856;&#917601;&#160;");
1860
46f4442e 1861 expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
b75a7d8f
A
1862 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1863 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1864}
1865
1866void TransliteratorTest::TestQuantifier() {
1867
1868 // Make sure @ in a quantified anteContext works
1869 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1870 "AAAAAb",
1871 "aaa(aac)");
1872
1873 // Make sure @ in a quantified postContext works
1874 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1875 "baaaaa",
1876 "caa(aaa)");
1877
1878 // Make sure @ in a quantified postContext with seg ref works
1879 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1880 "baaaaa",
1881 "baa(aaa)");
1882
1883 // Make sure @ past ante context doesn't enter ante context
1884 UTransPosition pos = {0, 5, 3, 5};
1885 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1886 "xxxab",
1887 "xxx(ac)",
1888 &pos);
1889
1890 // Make sure @ past post context doesn't pass limit
1891 UTransPosition pos2 = {0, 4, 0, 2};
1892 expect("{b} a+ > c @@ |; x > y; a > A;",
1893 "baxx",
1894 "caxx",
1895 &pos2);
1896
1897 // Make sure @ past post context doesn't enter post context
1898 expect("{b} a+ > c @@ |; x > y; a > A;",
1899 "baxx",
1900 "cayy");
1901
1902 expect("(ab)? c > d;",
1903 "c abc ababc",
1904 "d d abd");
1905
1906 // NOTE: The (ab)+ when referenced just yields a single "ab",
1907 // not the full sequence of them. This accords with perl behavior.
1908 expect("(ab)+ {x} > '(' $1 ')';",
1909 "x abx ababxy",
1910 "x ab(ab) abab(ab)y");
1911
1912 expect("b+ > x;",
1913 "ac abc abbc abbbc",
1914 "ac axc axc axc");
1915
1916 expect("[abc]+ > x;",
1917 "qac abrc abbcs abtbbc",
1918 "qx xrx xs xtx");
1919
1920 expect("q{(ab)+} > x;",
1921 "qa qab qaba qababc qaba",
1922 "qa qx qxa qxc qxa");
1923
1924 expect("q(ab)* > x;",
1925 "qa qab qaba qababc",
1926 "xa x xa xc");
1927
1928 // NOTE: The (ab)+ when referenced just yields a single "ab",
1929 // not the full sequence of them. This accords with perl behavior.
1930 expect("q(ab)* > '(' $1 ')';",
1931 "qa qab qaba qababc",
1932 "()a (ab) (ab)a (ab)c");
1933
1934 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1935 // quoted string
1936 expect("'ab'+ > x;",
1937 "bb ab ababb",
1938 "bb x xb");
1939
1940 // $foo+ and $foo* -- the quantifier should apply to the entire
1941 // variable reference
1942 expect("$var = ab; $var+ > x;",
1943 "bb ab ababb",
1944 "bb x xb");
1945}
1946
46f4442e 1947class TestTrans : public Transliterator {
b75a7d8f 1948public:
46f4442e
A
1949 TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
1950 }
340931cb 1951 virtual TestTrans* clone(void) const {
46f4442e 1952 return new TestTrans(getID());
b75a7d8f 1953 }
46f4442e
A
1954 virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
1955 UBool /*isIncremental*/) const
1956 {
1957 offsets.start = offsets.limit;
1958 }
1959 virtual UClassID getDynamicClassID() const;
1960 static UClassID U_EXPORT2 getStaticClassID();
b75a7d8f 1961};
46f4442e 1962UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
b75a7d8f
A
1963
1964/**
1965 * Test Source-Target/Variant.
1966 */
1967void TransliteratorTest::TestSTV(void) {
1968 int32_t ns = Transliterator::countAvailableSources();
1969 if (ns < 0 || ns > 255) {
1970 errln((UnicodeString)"FAIL: Bad source count: " + ns);
1971 return;
1972 }
1973 int32_t i, j;
1974 for (i=0; i<ns; ++i) {
1975 UnicodeString source;
1976 Transliterator::getAvailableSource(i, source);
1977 logln((UnicodeString)"" + i + ": " + source);
1978 if (source.length() == 0) {
1979 errln("FAIL: empty source");
1980 continue;
1981 }
1982 int32_t nt = Transliterator::countAvailableTargets(source);
1983 if (nt < 0 || nt > 255) {
1984 errln((UnicodeString)"FAIL: Bad target count: " + nt);
1985 continue;
1986 }
1987 for (int32_t j=0; j<nt; ++j) {
1988 UnicodeString target;
1989 Transliterator::getAvailableTarget(j, source, target);
1990 logln((UnicodeString)" " + j + ": " + target);
1991 if (target.length() == 0) {
1992 errln("FAIL: empty target");
1993 continue;
1994 }
1995 int32_t nv = Transliterator::countAvailableVariants(source, target);
1996 if (nv < 0 || nv > 255) {
1997 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1998 continue;
1999 }
2000 for (int32_t k=0; k<nv; ++k) {
2001 UnicodeString variant;
2002 Transliterator::getAvailableVariant(k, source, target, variant);
2003 if (variant.length() == 0) {
2004 logln((UnicodeString)" " + k + ": <empty>");
2005 } else {
2006 logln((UnicodeString)" " + k + ": " + variant);
2007 }
2008 }
2009 }
2010 }
2011
2012 // Test registration
2013 const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2014 const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2015 const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
2016 for (i=0; i<3; ++i) {
2017 Transliterator *t = new TestTrans(IDS[i]);
2018 if (t == 0) {
2019 errln("FAIL: out of memory");
2020 return;
2021 }
2022 if (t->getID() != IDS[i]) {
2023 errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
2024 delete t;
2025 return;
2026 }
2027 Transliterator::registerInstance(t);
2028 UErrorCode status = U_ZERO_ERROR;
2029 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2030 if (t == NULL) {
2031 errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
2032 IDS[i]);
2033 } else {
2034 logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
2035 IDS[i]);
2036 delete t;
2037 }
2038 Transliterator::unregister(IDS[i]);
2039 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2040 if (t != NULL) {
2041 errln((UnicodeString)"FAIL: Unregistration failed for ID " +
2042 IDS[i]);
2043 delete t;
2044 }
2045 }
2046
2047 // Make sure getAvailable API reflects removal
2048 int32_t n = Transliterator::countAvailableIDs();
2049 for (i=0; i<n; ++i) {
2050 UnicodeString id = Transliterator::getAvailableID(i);
2051 for (j=0; j<3; ++j) {
2052 if (id.caseCompare(FULL_IDS[j],0)==0) {
2053 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2054 }
2055 }
2056 }
2057 n = Transliterator::countAvailableTargets("Any");
2058 for (i=0; i<n; ++i) {
2059 UnicodeString t;
2060 Transliterator::getAvailableTarget(i, "Any", t);
2061 if (t.caseCompare(IDS[0],0)==0) {
2062 errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2063 }
2064 }
2065 n = Transliterator::countAvailableSources();
2066 for (i=0; i<n; ++i) {
2067 UnicodeString s;
2068 Transliterator::getAvailableSource(i, s);
2069 for (j=0; j<3; ++j) {
2070 if (SOURCES[j] == NULL) continue;
2071 if (s.caseCompare(SOURCES[j],0)==0) {
2072 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2073 }
2074 }
2075 }
2076}
2077
2078/**
2079 * Test inverse of Greek-Latin; Title()
2080 */
2081void TransliteratorTest::TestCompoundInverse(void) {
2082 UParseError parseError;
2083 UErrorCode status = U_ZERO_ERROR;
2084 Transliterator *t = Transliterator::createInstance
2085 ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2086 if (t == 0) {
729e4ab9 2087 dataerrln("FAIL: createInstance - %s", u_errorName(status));
b75a7d8f
A
2088 return;
2089 }
2090 UnicodeString exp("(Title);Latin-Greek");
2091 if (t->getID() == exp) {
2092 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2093 t->getID());
2094 } else {
2095 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2096 t->getID() + "\", expected \"" + exp + "\"");
2097 }
2098 delete t;
2099}
2100
2101/**
2102 * Test NFD chaining with RBT
2103 */
2104void TransliteratorTest::TestNFDChainRBT() {
2105 UParseError pe;
2106 UErrorCode ec = U_ZERO_ERROR;
2107 Transliterator* t = Transliterator::createFromRules(
2108 "TEST", "::NFD; aa > Q; a > q;",
2109 UTRANS_FORWARD, pe, ec);
2110 if (t == NULL || U_FAILURE(ec)) {
729e4ab9 2111 dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
b75a7d8f
A
2112 return;
2113 }
2114 expect(*t, "aa", "Q");
2115 delete t;
2116
2117 // TEMPORARY TESTS -- BEING DEBUGGED
2118//=- UnicodeString s, s2;
2119//=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2120//=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2121//=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2122//=- expect(*t, s, s2);
2123//=- delete t;
2124//=-
2125//=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2126//=- expect(*t, s2, s);
2127//=- delete t;
2128//=-
2129//=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2130//=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2131//=- expect(*t, s, s);
2132//=- delete t;
2133
2134// const char* source[] = {
2135// /*
2136// "\\u015Br\\u012Bmad",
2137// "bhagavadg\\u012Bt\\u0101",
2138// "adhy\\u0101ya",
2139// "arjuna",
2140// "vi\\u1E63\\u0101da",
2141// "y\\u014Dga",
2142// "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2143// "uv\\u0101cr\\u0325",
2144// */
2145// "rmk\\u1E63\\u0113t",
2146// //"dharmak\\u1E63\\u0113tr\\u0113",
2147// /*
2148// "kuruk\\u1E63\\u0113tr\\u0113",
2149// "samav\\u0113t\\u0101",
2150// "yuyutsava-\\u1E25",
2151// "m\\u0101mak\\u0101-\\u1E25",
2152// // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2153// "kimakurvata",
2154// "san\\u0304java",
2155// */
2156//
2157// 0
2158// };
2159// const char* expected[] = {
2160// /*
2161// "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2162// "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2163// "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2164// "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2165// "\\u0935\\u093f\\u0937\\u093e\\u0926",
2166// "\\u092f\\u094b\\u0917",
2167// "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2168// "\\u0909\\u0935\\u093E\\u091A\\u0943",
2169// */
2170// "\\u0927",
2171// //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2172// /*
2173// "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2174// "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2175// "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2176// "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2177// // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2178// "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2179// "\\u0938\\u0902\\u091c\\u0935",
2180// */
2181// 0
2182// };
2183// UErrorCode status = U_ZERO_ERROR;
2184// UParseError parseError;
2185// UnicodeString message;
2186// Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2187// Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2188// if(U_FAILURE(status)){
2189// errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2190// errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2191// delete latinToDevToLatin;
2192// delete devToLatinToDev;
2193// return;
2194// }
2195// UnicodeString gotResult;
2196// for(int i= 0; source[i] != 0; i++){
2197// gotResult = source[i];
2198// expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2199// expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2200// }
2201// delete latinToDevToLatin;
2202// delete devToLatinToDev;
2203}
2204
2205/**
2206 * Inverse of "Null" should be "Null". (J21)
2207 */
2208void TransliteratorTest::TestNullInverse() {
2209 UParseError pe;
2210 UErrorCode ec = U_ZERO_ERROR;
2211 Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2212 if (t == 0 || U_FAILURE(ec)) {
2213 errln("FAIL: createInstance");
2214 return;
2215 }
2216 Transliterator *u = t->createInverse(ec);
2217 if (u == 0 || U_FAILURE(ec)) {
2218 errln("FAIL: createInverse");
2219 delete t;
2220 return;
2221 }
2222 if (u->getID() != "Null") {
2223 errln("FAIL: Inverse of Null should be Null");
2224 }
2225 delete t;
2226 delete u;
2227}
2228
2229/**
2230 * Check ID of inverse of alias. (J22)
2231 */
2232void TransliteratorTest::TestAliasInverseID() {
2233 UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2234 UParseError pe;
2235 UErrorCode ec = U_ZERO_ERROR;
2236 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2237 if (t == 0 || U_FAILURE(ec)) {
729e4ab9 2238 dataerrln("FAIL: createInstance - %s", u_errorName(ec));
b75a7d8f
A
2239 return;
2240 }
2241 Transliterator *u = t->createInverse(ec);
2242 if (u == 0 || U_FAILURE(ec)) {
2243 errln("FAIL: createInverse");
2244 delete t;
2245 return;
2246 }
2247 UnicodeString exp = "Hangul-Latin";
2248 UnicodeString got = u->getID();
2249 if (got != exp) {
2250 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2251 ", expected " + exp);
2252 }
2253 delete t;
2254 delete u;
2255}
2256
2257/**
2258 * Test IDs of inverses of compound transliterators. (J20)
2259 */
2260void TransliteratorTest::TestCompoundInverseID() {
2261 UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2262 UParseError pe;
2263 UErrorCode ec = U_ZERO_ERROR;
2264 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2265 if (t == 0 || U_FAILURE(ec)) {
729e4ab9 2266 dataerrln("FAIL: createInstance - %s", u_errorName(ec));
b75a7d8f
A
2267 return;
2268 }
2269 Transliterator *u = t->createInverse(ec);
2270 if (u == 0 || U_FAILURE(ec)) {
2271 errln("FAIL: createInverse");
2272 delete t;
2273 return;
2274 }
2275 UnicodeString exp = "NFD(NFC);Jamo-Latin";
2276 UnicodeString got = u->getID();
2277 if (got != exp) {
2278 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2279 ", expected " + exp);
2280 }
2281 delete t;
2282 delete u;
2283}
2284
2285/**
2286 * Test undefined variable.
2287
2288 */
2289void TransliteratorTest::TestUndefinedVariable() {
2290 UnicodeString rule = "$initial } a <> \\u1161;";
2291 UParseError pe;
2292 UErrorCode ec = U_ZERO_ERROR;
46f4442e 2293 Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
b75a7d8f
A
2294 delete t;
2295 if (U_FAILURE(ec)) {
2296 logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2297 u_errorName(ec));
2298 return;
2299 }
2300 errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2301 u_errorName(ec));
2302}
2303
2304/**
2305 * Test empty context.
2306 */
2307void TransliteratorTest::TestEmptyContext() {
2308 expect(" { a } > b;", "xay a ", "xby b ");
2309}
2310
2311/**
2312* Test compound filter ID syntax
2313*/
2314void TransliteratorTest::TestCompoundFilterID(void) {
2315 static const char* DATA[] = {
2316 // Col. 1 = ID or rule set (latter must start with #)
2317
2318 // = columns > 1 are null if expect col. 1 to be illegal =
2319
2320 // Col. 2 = direction, "F..." or "R..."
2321 // Col. 3 = source string
2322 // Col. 4 = exp result
2323
2324 "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2325 "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2326 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2327 "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2328 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2329 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2330 NULL,
2331 };
2332
2333 for (int32_t i=0; DATA[i]; i+=4) {
2334 UnicodeString id = CharsToUnicodeString(DATA[i]);
2335 UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2336 UTRANS_REVERSE : UTRANS_FORWARD;
2337 UnicodeString source;
2338 UnicodeString exp;
2339 if (DATA[i+2] != NULL) {
2340 source = CharsToUnicodeString(DATA[i+2]);
2341 exp = CharsToUnicodeString(DATA[i+3]);
2342 }
2343 UBool expOk = (DATA[i+1] != NULL);
340931cb 2344 LocalPointer<Transliterator> t;
b75a7d8f
A
2345 UParseError pe;
2346 UErrorCode ec = U_ZERO_ERROR;
2347 if (id.charAt(0) == 0x23/*#*/) {
340931cb 2348 t.adoptInstead(Transliterator::createFromRules("ID", id, direction, pe, ec));
b75a7d8f 2349 } else {
340931cb 2350 t.adoptInstead(Transliterator::createInstance(id, direction, pe, ec));
b75a7d8f 2351 }
340931cb 2352 UBool ok = (t.isValid() && U_SUCCESS(ec));
374ca955 2353 UnicodeString transID;
340931cb 2354 if (t.isValid()) {
374ca955
A
2355 transID = t->getID();
2356 }
2357 else {
2358 transID = UnicodeString("NULL", "");
2359 }
b75a7d8f 2360 if (ok == expOk) {
374ca955 2361 logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
b75a7d8f
A
2362 u_errorName(ec));
2363 if (source.length() != 0) {
2364 expect(*t, source, exp);
2365 }
b75a7d8f 2366 } else {
729e4ab9 2367 dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
b75a7d8f
A
2368 u_errorName(ec));
2369 }
2370 }
2371}
2372
2373/**
2374 * Test new property set syntax
2375 */
2376void TransliteratorTest::TestPropertySet() {
46f4442e 2377 expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
b75a7d8f
A
2378 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2379 "[ a stitch ]\n[ in time ]\r[ saves 9]");
2380}
2381
2382/**
2383 * Test various failure points of the new 2.0 engine.
2384 */
2385void TransliteratorTest::TestNewEngine() {
2386 UParseError pe;
2387 UErrorCode ec = U_ZERO_ERROR;
2388 Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2389 if (t == 0 || U_FAILURE(ec)) {
729e4ab9 2390 dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
b75a7d8f
A
2391 return;
2392 }
2393 // Katakana should be untouched
2394 expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2395 CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2396
2397 delete t;
2398
2399#if 1
2400 // This test will only work if Transliterator.ROLLBACK is
2401 // true. Otherwise, this test will fail, revealing a
2402 // limitation of global filters in incremental mode.
2403 Transliterator *a =
374ca955 2404 Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
b75a7d8f 2405 Transliterator *A =
374ca955 2406 Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
b75a7d8f
A
2407 if (U_FAILURE(ec)) {
2408 delete a;
2409 delete A;
2410 return;
2411 }
2412
2413 Transliterator* array[3];
2414 array[0] = a;
2415 array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2416 array[2] = A;
2417 if (U_FAILURE(ec)) {
2418 errln("FAIL: createInstance NFD");
2419 delete a;
2420 delete A;
2421 delete array[1];
2422 return;
2423 }
2424
2425 t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2426 if (U_FAILURE(ec)) {
2427 errln("FAIL: UnicodeSet constructor");
2428 delete a;
2429 delete A;
2430 delete array[1];
2431 delete t;
2432 return;
2433 }
2434
2435 expect(*t, "aAaA", "bAbA");
374ca955
A
2436
2437 assertTrue("countElements", t->countElements() == 3);
2438 assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2439 assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2440 assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2441 assertSuccess("getElement", ec);
2442
b75a7d8f
A
2443 delete a;
2444 delete A;
2445 delete array[1];
2446 delete t;
2447#endif
2448
2449 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2450 "a",
2451 "ax");
2452
2453 UnicodeString gr = CharsToUnicodeString(
2454 "$ddot = \\u0308 ;"
2455 "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2456 "$rough = \\u0314 ;"
2457 "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2458 "\\u03b1 <> a ;"
2459 "$rough <> h ;");
2460
2461 expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2462}
2463
2464/**
2465 * Test quantified segment behavior. We want:
2466 * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2467 */
2468void TransliteratorTest::TestQuantifiedSegment(void) {
2469 // The normal case
2470 expect("([abc]+) > x $1 x;", "cba", "xcbax");
2471
2472 // The tricky case; the quantifier is around the segment
2473 expect("([abc])+ > x $1 x;", "cba", "xax");
2474
2475 // Tricky case in reverse direction
2476 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2477
2478 // Check post-context segment
2479 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2480
2481 // Test toRule/toPattern for non-quantified segment.
2482 // Careful with spacing here.
2483 UnicodeString r("([a-c]){q} > x $1 x;");
2484 UParseError pe;
2485 UErrorCode ec = U_ZERO_ERROR;
2486 Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2487 if (U_FAILURE(ec)) {
2488 errln("FAIL: createFromRules");
2489 delete t;
2490 return;
2491 }
2492 UnicodeString rr;
2493 t->toRules(rr, TRUE);
2494 if (r != rr) {
2495 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2496 } else {
2497 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2498 }
2499 delete t;
2500
2501 // Test toRule/toPattern for quantified segment.
2502 // Careful with spacing here.
2503 r = "([a-c])+{q} > x $1 x;";
2504 t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2505 if (U_FAILURE(ec)) {
2506 errln("FAIL: createFromRules");
2507 delete t;
2508 return;
2509 }
2510 t->toRules(rr, TRUE);
2511 if (r != rr) {
2512 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2513 } else {
2514 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2515 }
2516 delete t;
2517}
2518
2519//======================================================================
2520// Ram's tests
2521//======================================================================
2522void TransliteratorTest::TestDevanagariLatinRT(){
2523 const int MAX_LEN= 52;
2524 const char* const source[MAX_LEN] = {
2525 "bh\\u0101rata",
2526 "kra",
2527 "k\\u1E63a",
2528 "khra",
2529 "gra",
2530 "\\u1E45ra",
2531 "cra",
2532 "chra",
2533 "j\\u00F1a",
2534 "jhra",
2535 "\\u00F1ra",
2536 "\\u1E6Dya",
2537 "\\u1E6Dhra",
2538 "\\u1E0Dya",
2539 //"r\\u0323ya", // \u095c is not valid in Devanagari
2540 "\\u1E0Dhya",
2541 "\\u1E5Bhra",
2542 "\\u1E47ra",
2543 "tta",
2544 "thra",
2545 "dda",
2546 "dhra",
2547 "nna",
2548 "pra",
2549 "phra",
2550 "bra",
2551 "bhra",
2552 "mra",
2553 "\\u1E49ra",
2554 //"l\\u0331ra",
2555 "yra",
2556 "\\u1E8Fra",
2557 //"l-",
2558 "vra",
2559 "\\u015Bra",
2560 "\\u1E63ra",
2561 "sra",
2562 "hma",
2563 "\\u1E6D\\u1E6Da",
2564 "\\u1E6D\\u1E6Dha",
2565 "\\u1E6Dh\\u1E6Dha",
2566 "\\u1E0D\\u1E0Da",
2567 "\\u1E0D\\u1E0Dha",
2568 "\\u1E6Dya",
2569 "\\u1E6Dhya",
2570 "\\u1E0Dya",
2571 "\\u1E0Dhya",
2572 // Not roundtrippable --
2573 // \\u0939\\u094d\\u094d\\u092E - hma
2574 // \\u0939\\u094d\\u092E - hma
2575 // CharsToUnicodeString("hma"),
2576 "hya",
2577 "\\u015Br\\u0325",
2578 "\\u015Bca",
2579 "\\u0115",
2580 "san\\u0304j\\u012Bb s\\u0113nagupta",
2581 "\\u0101nand vaddir\\u0101ju",
2582 "\\u0101",
2583 "a"
2584 };
2585 const char* const expected[MAX_LEN] = {
2586 "\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */
2587 "\\u0915\\u094D\\u0930", /* kra */
2588 "\\u0915\\u094D\\u0937", /* ks\\u0323a */
2589 "\\u0916\\u094D\\u0930", /* khra */
2590 "\\u0917\\u094D\\u0930", /* gra */
2591 "\\u0919\\u094D\\u0930", /* n\\u0307ra */
2592 "\\u091A\\u094D\\u0930", /* cra */
2593 "\\u091B\\u094D\\u0930", /* chra */
2594 "\\u091C\\u094D\\u091E", /* jn\\u0303a */
2595 "\\u091D\\u094D\\u0930", /* jhra */
2596 "\\u091E\\u094D\\u0930", /* n\\u0303ra */
2597 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2598 "\\u0920\\u094D\\u0930", /* t\\u0323hra */
2599 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2600 //"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari
2601 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2602 "\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */
2603 "\\u0923\\u094D\\u0930", /* n\\u0323ra */
2604 "\\u0924\\u094D\\u0924", /* tta */
2605 "\\u0925\\u094D\\u0930", /* thra */
2606 "\\u0926\\u094D\\u0926", /* dda */
2607 "\\u0927\\u094D\\u0930", /* dhra */
2608 "\\u0928\\u094D\\u0928", /* nna */
2609 "\\u092A\\u094D\\u0930", /* pra */
2610 "\\u092B\\u094D\\u0930", /* phra */
2611 "\\u092C\\u094D\\u0930", /* bra */
2612 "\\u092D\\u094D\\u0930", /* bhra */
2613 "\\u092E\\u094D\\u0930", /* mra */
2614 "\\u0929\\u094D\\u0930", /* n\\u0331ra */
2615 //"\\u0934\\u094D\\u0930", /* l\\u0331ra */
2616 "\\u092F\\u094D\\u0930", /* yra */
2617 "\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */
2618 //"l-",
2619 "\\u0935\\u094D\\u0930", /* vra */
2620 "\\u0936\\u094D\\u0930", /* s\\u0301ra */
2621 "\\u0937\\u094D\\u0930", /* s\\u0323ra */
2622 "\\u0938\\u094D\\u0930", /* sra */
2623 "\\u0939\\u094d\\u092E", /* hma */
2624 "\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */
2625 "\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */
2626 "\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/
2627 "\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */
2628 "\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */
2629 "\\u091F\\u094D\\u092F", /* t\\u0323ya */
2630 "\\u0920\\u094D\\u092F", /* t\\u0323hya */
2631 "\\u0921\\u094D\\u092F", /* d\\u0323ya */
2632 "\\u0922\\u094D\\u092F", /* d\\u0323hya */
2633 // "hma", /* hma */
2634 "\\u0939\\u094D\\u092F", /* hya */
2635 "\\u0936\\u0943", /* s\\u0301r\\u0325a */
2636 "\\u0936\\u094D\\u091A", /* s\\u0301ca */
2637 "\\u090d", /* e\\u0306 */
2638 "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2639 "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2640 "\\u0906",
2641 "\\u0905",
2642 };
2643 UErrorCode status = U_ZERO_ERROR;
2644 UParseError parseError;
2645 UnicodeString message;
2646 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2647 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2648 if(U_FAILURE(status)){
729e4ab9
A
2649 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2650 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
b75a7d8f
A
2651 return;
2652 }
2653 UnicodeString gotResult;
2654 for(int i= 0; i<MAX_LEN; i++){
2655 gotResult = source[i];
2656 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2657 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2658 }
2659 delete latinToDev;
2660 delete devToLatin;
2661}
2662
2663void TransliteratorTest::TestTeluguLatinRT(){
2664 const int MAX_LEN=10;
2665 const char* const source[MAX_LEN] = {
2666 "raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */
2667 "\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */
2668 "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */
2669 "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */
2670 "san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */
2671 "amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */
2672 "ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */
2673 "\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */
2674 "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */
2675 "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */
2676 };
2677
2678 const char* const expected[MAX_LEN] = {
2679 "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2680 "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2681 "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2682 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2683 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2684 "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2685 "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2686 "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2687 "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2688 "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2689 };
2690
2691 UErrorCode status = U_ZERO_ERROR;
2692 UParseError parseError;
2693 UnicodeString message;
2694 Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2695 Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2696 if(U_FAILURE(status)){
729e4ab9
A
2697 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2698 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
b75a7d8f
A
2699 return;
2700 }
2701 UnicodeString gotResult;
2702 for(int i= 0; i<MAX_LEN; i++){
2703 gotResult = source[i];
2704 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2705 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2706 }
2707 delete latinToDev;
2708 delete devToLatin;
2709}
2710
2711void TransliteratorTest::TestSanskritLatinRT(){
2712 const int MAX_LEN =16;
2713 const char* const source[MAX_LEN] = {
2714 "rmk\\u1E63\\u0113t",
2715 "\\u015Br\\u012Bmad",
2716 "bhagavadg\\u012Bt\\u0101",
2717 "adhy\\u0101ya",
2718 "arjuna",
2719 "vi\\u1E63\\u0101da",
2720 "y\\u014Dga",
2721 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2722 "uv\\u0101cr\\u0325",
2723 "dharmak\\u1E63\\u0113tr\\u0113",
2724 "kuruk\\u1E63\\u0113tr\\u0113",
2725 "samav\\u0113t\\u0101",
2726 "yuyutsava\\u1E25",
2727 "m\\u0101mak\\u0101\\u1E25",
2728 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2729 "kimakurvata",
2730 "san\\u0304java",
2731 };
2732 const char* const expected[MAX_LEN] = {
2733 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2734 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2735 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2736 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2737 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2738 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2739 "\\u092f\\u094b\\u0917",
2740 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2741 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2742 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2743 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2744 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2745 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2746 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2747 //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2748 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2749 "\\u0938\\u0902\\u091c\\u0935",
2750 };
2751 UErrorCode status = U_ZERO_ERROR;
2752 UParseError parseError;
2753 UnicodeString message;
2754 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2755 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2756 if(U_FAILURE(status)){
729e4ab9
A
2757 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2758 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
b75a7d8f
A
2759 return;
2760 }
2761 UnicodeString gotResult;
2762 for(int i= 0; i<MAX_LEN; i++){
2763 gotResult = source[i];
2764 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2765 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2766 }
2767 delete latinToDev;
2768 delete devToLatin;
2769}
2770
2771
2772void TransliteratorTest::TestCompoundLatinRT(){
2773 const char* const source[] = {
2774 "rmk\\u1E63\\u0113t",
2775 "\\u015Br\\u012Bmad",
2776 "bhagavadg\\u012Bt\\u0101",
2777 "adhy\\u0101ya",
2778 "arjuna",
2779 "vi\\u1E63\\u0101da",
2780 "y\\u014Dga",
2781 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2782 "uv\\u0101cr\\u0325",
2783 "dharmak\\u1E63\\u0113tr\\u0113",
2784 "kuruk\\u1E63\\u0113tr\\u0113",
2785 "samav\\u0113t\\u0101",
2786 "yuyutsava\\u1E25",
2787 "m\\u0101mak\\u0101\\u1E25",
2788 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2789 "kimakurvata",
2790 "san\\u0304java"
2791 };
2ca993e8 2792 const int MAX_LEN = UPRV_LENGTHOF(source);
b75a7d8f
A
2793 const char* const expected[MAX_LEN] = {
2794 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2795 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2796 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2797 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2798 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2799 "\\u0935\\u093f\\u0937\\u093e\\u0926",
2800 "\\u092f\\u094b\\u0917",
2801 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2802 "\\u0909\\u0935\\u093E\\u091A\\u0943",
2803 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2804 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2805 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2806 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2807 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2808 // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2809 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2810 "\\u0938\\u0902\\u091c\\u0935"
2811 };
2ca993e8 2812 if(MAX_LEN != UPRV_LENGTHOF(expected)) {
b75a7d8f
A
2813 errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2814 return;
2815 }
2816
2817 UErrorCode status = U_ZERO_ERROR;
2818 UParseError parseError;
2819 UnicodeString message;
2820 Transliterator* devToLatinToDev =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2821 Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2822 Transliterator* devToTelToDev =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2823 Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2824
2825 if(U_FAILURE(status)){
729e4ab9
A
2826 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
2827 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
b75a7d8f
A
2828 return;
2829 }
2830 UnicodeString gotResult;
2831 for(int i= 0; i<MAX_LEN; i++){
2832 gotResult = source[i];
2833 expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2834 expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2835 expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2836
2837 }
2838 delete(latinToDevToLatin);
2839 delete(devToLatinToDev);
2840 delete(devToTelToDev);
2841 delete(latinToTelToLatin);
2842}
2843
2844/**
2845 * Test Gurmukhi-Devanagari Tippi and Bindi
2846 */
2847void TransliteratorTest::TestGurmukhiDevanagari(){
2848 // the rule says:
2849 // (\u0902) (when preceded by vowel) ---> (\u0A02)
2850 // (\u0902) (when preceded by consonant) ---> (\u0A70)
2851 UErrorCode status = U_ZERO_ERROR;
46f4442e
A
2852 UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
2853 UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
b75a7d8f
A
2854 UParseError parseError;
2855
2856 UnicodeSetIterator vIter(vowel);
2857 UnicodeSetIterator nvIter(non_vowel);
2858 Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2859 if(U_FAILURE(status)) {
729e4ab9 2860 dataerrln("Error creating transliterator %s", u_errorName(status));
b75a7d8f
A
2861 delete trans;
2862 return;
2863 }
46f4442e
A
2864 UnicodeString src (" \\u0902", -1, US_INV);
2865 UnicodeString expected(" \\u0A02", -1, US_INV);
b75a7d8f
A
2866 src = src.unescape();
2867 expected= expected.unescape();
2868
2869 while(vIter.next()){
2870 src.setCharAt(0,(UChar) vIter.getCodepoint());
2871 expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2872 expect(*trans,src,expected);
2873 }
2874
2875 expected.setCharAt(1,0x0A70);
2876 while(nvIter.next()){
2877 //src.setCharAt(0,(char) nvIter.codepoint);
2878 src.setCharAt(0,(UChar)nvIter.getCodepoint());
2879 expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2880 expect(*trans,src,expected);
2881 }
2882 delete trans;
2883}
2884/**
2885 * Test instantiation from a locale.
2886 */
2887void TransliteratorTest::TestLocaleInstantiation(void) {
2888 UParseError pe;
2889 UErrorCode ec = U_ZERO_ERROR;
2890 Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2891 if (U_FAILURE(ec)) {
729e4ab9 2892 dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
b75a7d8f
A
2893 delete t;
2894 return;
2895 }
2896 expect(*t, CharsToUnicodeString("\\u0430"), "a");
2897 delete t;
2898
2899 t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2900 if (U_FAILURE(ec)) {
2901 errln("FAIL: createInstance(en-el)");
2902 delete t;
2903 return;
2904 }
2905 expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2906 delete t;
2907}
2908
2909/**
2910 * Test title case handling of accent (should ignore accents)
2911 */
2912void TransliteratorTest::TestTitleAccents(void) {
2913 UParseError pe;
2914 UErrorCode ec = U_ZERO_ERROR;
2915 Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2916 if (U_FAILURE(ec)) {
2917 errln("FAIL: createInstance(Title)");
2918 delete t;
2919 return;
2920 }
2921 expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2922 delete t;
2923}
2924
2925/**
2926 * Basic test of a locale resource based rule.
2927 */
2928void TransliteratorTest::TestLocaleResource() {
2929 const char* DATA[] = {
2930 // id from to
2931 //"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0",
2932 "Latin-el", "b", "\\u03bc\\u03c0",
2933 "Latin-Greek", "b", "\\u03B2",
2934 "Greek-Latin/UNGEGN", "\\u03B2", "v",
2935 "el-Latin", "\\u03B2", "v",
2936 "Greek-Latin", "\\u03B2", "b",
2937 };
2ca993e8 2938 const int32_t DATA_length = UPRV_LENGTHOF(DATA);
b75a7d8f
A
2939 for (int32_t i=0; i<DATA_length; i+=3) {
2940 UParseError pe;
2941 UErrorCode ec = U_ZERO_ERROR;
2942 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2943 if (U_FAILURE(ec)) {
729e4ab9 2944 dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
b75a7d8f
A
2945 delete t;
2946 continue;
2947 }
2948 expect(*t, CharsToUnicodeString(DATA[i+1]),
2949 CharsToUnicodeString(DATA[i+2]));
2950 delete t;
2951 }
2952}
2953
2954/**
2955 * Make sure parse errors reference the right line.
2956 */
2957void TransliteratorTest::TestParseError() {
46f4442e 2958 static const char* rule =
b75a7d8f
A
2959 "a > b;\n"
2960 "# more stuff\n"
2961 "d << b;";
2962 UErrorCode ec = U_ZERO_ERROR;
2963 UParseError pe;
2964 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2965 delete t;
2966 if (U_FAILURE(ec)) {
2967 UnicodeString err(pe.preContext);
2968 err.append((UChar)124/*|*/).append(pe.postContext);
2969 if (err.indexOf("d << b") >= 0) {
2970 logln("Ok: " + err);
2971 } else {
2972 errln("FAIL: " + err);
2973 }
b75a7d8f 2974 }
46f4442e
A
2975 else {
2976 errln("FAIL: no syntax error");
2977 }
2978 static const char* maskingRule =
2979 "a>x;\n"
2980 "# more stuff\n"
2981 "ab>y;";
2982 ec = U_ZERO_ERROR;
2983 delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
2984 if (ec != U_RULE_MASK_ERROR) {
2985 errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
2986 }
2987 else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
2988 errln("FAIL: did not get expected precontext");
2989 }
2990 else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
2991 errln("FAIL: did not get expected postcontext");
2992 }
b75a7d8f
A
2993}
2994
2995/**
2996 * Make sure sets on output are disallowed.
2997 */
2998void TransliteratorTest::TestOutputSet() {
2999 UnicodeString rule = "$set = [a-cm-n]; b > $set;";
3000 UErrorCode ec = U_ZERO_ERROR;
3001 UParseError pe;
3002 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3003 delete t;
3004 if (U_FAILURE(ec)) {
3005 UnicodeString err(pe.preContext);
3006 err.append((UChar)124/*|*/).append(pe.postContext);
3007 logln("Ok: " + err);
3008 return;
3009 }
3010 errln("FAIL: No syntax error");
3011}
3012
3013/**
3014 * Test the use variable range pragma, making sure that use of
3015 * variable range characters is detected and flagged as an error.
3016 */
3017void TransliteratorTest::TestVariableRange() {
3018 UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3019 UErrorCode ec = U_ZERO_ERROR;
3020 UParseError pe;
3021 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3022 delete t;
3023 if (U_FAILURE(ec)) {
3024 UnicodeString err(pe.preContext);
3025 err.append((UChar)124/*|*/).append(pe.postContext);
3026 logln("Ok: " + err);
3027 return;
3028 }
3029 errln("FAIL: No syntax error");
3030}
3031
3032/**
3033 * Test invalid post context error handling
3034 */
3035void TransliteratorTest::TestInvalidPostContext() {
3036 UnicodeString rule = "a}b{c>d;";
3037 UErrorCode ec = U_ZERO_ERROR;
3038 UParseError pe;
3039 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3040 delete t;
3041 if (U_FAILURE(ec)) {
3042 UnicodeString err(pe.preContext);
3043 err.append((UChar)124/*|*/).append(pe.postContext);
3044 if (err.indexOf("a}b{c") >= 0) {
3045 logln("Ok: " + err);
3046 } else {
3047 errln("FAIL: " + err);
3048 }
3049 return;
3050 }
3051 errln("FAIL: No syntax error");
3052}
3053
3054/**
3055 * Test ID form variants
3056 */
3057void TransliteratorTest::TestIDForms() {
3058 const char* DATA[] = {
3059 "NFC", NULL, "NFD",
3060 "nfd", NULL, "NFC", // make sure case is ignored
3061 "Any-NFKD", NULL, "Any-NFKC",
3062 "Null", NULL, "Null",
3063 "-nfkc", "nfkc", "NFKD",
3064 "-nfkc/", "nfkc", "NFKD",
3065 "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3066 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3067 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3068 "Source-", NULL, NULL,
3069 "Source/Variant-", NULL, NULL,
3070 "Source-/Variant", NULL, NULL,
3071 "/Variant", NULL, NULL,
3072 "/Variant-", NULL, NULL,
3073 "-/Variant", NULL, NULL,
3074 "-/", NULL, NULL,
3075 "-", NULL, NULL,
3076 "/", NULL, NULL,
3077 };
2ca993e8 3078 const int32_t DATA_length = UPRV_LENGTHOF(DATA);
b75a7d8f
A
3079
3080 for (int32_t i=0; i<DATA_length; i+=3) {
3081 const char* ID = DATA[i];
3082 const char* expID = DATA[i+1];
3083 const char* expInvID = DATA[i+2];
3084 UBool expValid = (expInvID != NULL);
3085 if (expID == NULL) {
3086 expID = ID;
3087 }
3088 UParseError pe;
3089 UErrorCode ec = U_ZERO_ERROR;
3090 Transliterator *t =
3091 Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3092 if (U_FAILURE(ec)) {
3093 if (!expValid) {
3094 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3095 } else {
729e4ab9 3096 dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
b75a7d8f
A
3097 }
3098 delete t;
3099 continue;
3100 }
3101 Transliterator *u = t->createInverse(ec);
3102 if (U_FAILURE(ec)) {
3103 errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3104 delete t;
3105 delete u;
3106 continue;
3107 }
3108 if (t->getID() == expID &&
3109 u->getID() == expInvID) {
3110 logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3111 } else {
3112 errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3113 t->getID() + " x getInverse() => " + u->getID() +
3114 ", expected " + expInvID);
3115 }
3116 delete t;
3117 delete u;
3118 }
3119}
3120
3121static const UChar SPACE[] = {32,0};
3122static const UChar NEWLINE[] = {10,0};
3123static const UChar RETURN[] = {13,0};
3124static const UChar EMPTY[] = {0};
3125
3126void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3127 const UnicodeString& testRulesForward) {
3128 UnicodeString rules2; t2.toRules(rules2, TRUE);
3129 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3130 rules2.findAndReplace(SPACE, EMPTY);
3131 rules2.findAndReplace(NEWLINE, EMPTY);
3132 rules2.findAndReplace(RETURN, EMPTY);
3133
3134 UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3135
3136 if (rules2 != testRules) {
3137 errln(label);
3138 logln((UnicodeString)"GENERATED RULES: " + rules2);
3139 logln((UnicodeString)"SHOULD BE: " + testRulesForward);
3140 }
3141}
3142
3143/**
3144 * Mark's toRules test.
3145 */
3146void TransliteratorTest::TestToRulesMark() {
3147 const char* testRules =
3148 "::[[:Latin:][:Mark:]];"
3149 "::NFKD (NFC);"
3150 "::Lower (Lower);"
3151 "a <> \\u03B1;" // alpha
3152 "::NFKC (NFD);"
3153 "::Upper (Lower);"
3154 "::Lower ();"
3155 "::([[:Greek:][:Mark:]]);"
3156 ;
3157 const char* testRulesForward =
3158 "::[[:Latin:][:Mark:]];"
3159 "::NFKD(NFC);"
3160 "::Lower(Lower);"
3161 "a > \\u03B1;"
3162 "::NFKC(NFD);"
3163 "::Upper (Lower);"
3164 "::Lower ();"
3165 ;
3166 const char* testRulesBackward =
3167 "::[[:Greek:][:Mark:]];"
3168 "::Lower (Upper);"
3169 "::NFD(NFKC);"
3170 "\\u03B1 > a;"
3171 "::Lower(Lower);"
3172 "::NFC(NFKD);"
3173 ;
3174 UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3175 UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3176
3177 UParseError pe;
3178 UErrorCode ec = U_ZERO_ERROR;
340931cb
A
3179 LocalPointer<Transliterator> t2(
3180 Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec));
3181 LocalPointer<Transliterator> t3(
3182 Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec));
b75a7d8f
A
3183
3184 if (U_FAILURE(ec)) {
729e4ab9 3185 dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
b75a7d8f
A
3186 return;
3187 }
3188
3189 expect(*t2, source, target);
3190 expect(*t3, target, source);
3191
46f4442e
A
3192 checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
3193 checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
b75a7d8f
A
3194}
3195
3196/**
3197 * Test Escape and Unescape transliterators.
3198 */
3199void TransliteratorTest::TestEscape() {
3200 UParseError pe;
3201 UErrorCode ec;
3202 Transliterator *t;
3203
3204 ec = U_ZERO_ERROR;
3205 t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3206 if (U_FAILURE(ec)) {
3207 errln((UnicodeString)"FAIL: createInstance");
3208 } else {
3209 expect(*t,
46f4442e 3210 UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
b75a7d8f
A
3211 "@12Q");
3212 }
3213 delete t;
3214
3215 ec = U_ZERO_ERROR;
3216 t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3217 if (U_FAILURE(ec)) {
3218 errln((UnicodeString)"FAIL: createInstance");
3219 } else {
3220 expect(*t,
3221 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
46f4442e 3222 UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
b75a7d8f
A
3223 }
3224 delete t;
3225
3226 ec = U_ZERO_ERROR;
3227 t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3228 if (U_FAILURE(ec)) {
3229 errln((UnicodeString)"FAIL: createInstance");
3230 } else {
3231 expect(*t,
3232 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
46f4442e 3233 UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
b75a7d8f
A
3234 }
3235 delete t;
3236
3237 ec = U_ZERO_ERROR;
3238 t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3239 if (U_FAILURE(ec)) {
3240 errln((UnicodeString)"FAIL: createInstance");
3241 } else {
3242 expect(*t,
3243 CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
46f4442e 3244 UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
b75a7d8f
A
3245 }
3246 delete t;
3247}
3248
3249
3250void TransliteratorTest::TestAnchorMasking(){
3251 UnicodeString rule ("^a > Q; a > q;");
3252 UErrorCode status= U_ZERO_ERROR;
3253 UParseError parseError;
3254
3255 Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3256 if(U_FAILURE(status)){
3257 errln(UnicodeString("FAIL: ") + "ID" +
3258 ".createFromRules() => bad rules" +
3259 /*", parse error " + parseError.code +*/
3260 ", line " + parseError.line +
3261 ", offset " + parseError.offset +
3262 ", context " + prettify(parseError.preContext, TRUE) +
3263 ", rules: " + prettify(rule, TRUE));
3264 }
3265 delete t;
3266}
3267
3268/**
3269 * Make sure display names of variants look reasonable.
3270 */
3271void TransliteratorTest::TestDisplayName() {
3272#if UCONFIG_NO_FORMATTING
3273 logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3274 return;
3275#else
3276 static const char* DATA[] = {
3277 // ID, forward name, reverse name
3278 // Update the text as necessary -- the important thing is
3279 // not the text itself, but how various cases are handled.
3280
3281 // Basic test
3282 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3283
3284 // Variants
3285 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3286
3287 // Target-only IDs
3288 "NFC", "Any to NFC", "Any to NFD",
3289 };
3290
2ca993e8 3291 int32_t DATA_length = UPRV_LENGTHOF(DATA);
b75a7d8f
A
3292
3293 Locale US("en", "US");
3294
3295 for (int32_t i=0; i<DATA_length; i+=3) {
3296 UnicodeString name;
3297 Transliterator::getDisplayName(DATA[i], US, name);
3298 if (name != DATA[i+1]) {
729e4ab9 3299 dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
b75a7d8f
A
3300 name + ", expected " + DATA[i+1]);
3301 } else {
3302 logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3303 }
3304 UErrorCode ec = U_ZERO_ERROR;
3305 UParseError pe;
3306 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3307 if (U_FAILURE(ec)) {
3308 delete t;
729e4ab9 3309 dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
b75a7d8f
A
3310 continue;
3311 }
3312 name = Transliterator::getDisplayName(t->getID(), US, name);
3313 if (name != DATA[i+2]) {
729e4ab9 3314 dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
b75a7d8f
A
3315 name + ", expected " + DATA[i+2]);
3316 } else {
3317 logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3318 }
3319 delete t;
3320 }
3321#endif
3322}
3323
3324void TransliteratorTest::TestSpecialCases(void) {
3325 const UnicodeString registerRules[] = {
3326 "Any-Dev1", "x > X; y > Y;",
3327 "Any-Dev2", "XY > Z",
3328 "Greek-Latin/FAKE",
3329 CharsToUnicodeString
3330 ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3331 "" // END MARKER
3332 };
3333
3334 const UnicodeString testCases[] = {
3335 // NORMALIZATION
3336 // should add more test cases
3337 "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3338 "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3339 "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3340 "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3341
3342 // mp -> b BUG
3343 "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3344 "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3345
3346 // check for devanagari bug
3347 "nfd;Dev1;Dev2;nfc", "xy", "Z",
3348
3349 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3350 "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3351 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3352
3353 //TODO: enable this test once Titlecase works right
3354 /*
3355 "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3356 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3357 */
3358 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3359 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3360 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3361 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3362
3363 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3364 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3365
3366 // FORMS OF S
3367 "Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3368 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3369 "Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3370 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3371 "Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3372 CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3373 "Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3374 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3375 // Tatiana bug
3376 // Upper: TAT\\u02B9\\u00C2NA
3377 // Lower: tat\\u02B9\\u00E2na
3378 // Title: Tat\\u02B9\\u00E2na
3379 "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3380 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3381 "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3382 CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3383 "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3384 CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3385
3386 "" // END MARKER
3387 };
3388
3389 UParseError pos;
3390 int32_t i;
3391 for (i = 0; registerRules[i].length()!=0; i+=2) {
3392 UErrorCode status = U_ZERO_ERROR;
3393
3394 Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3395 registerRules[i+1], UTRANS_FORWARD, pos, status);
3396 if (U_FAILURE(status)) {
729e4ab9 3397 dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
b75a7d8f
A
3398 } else {
3399 Transliterator::registerInstance(t);
3400 }
3401 }
3402 for (i = 0; testCases[i].length()!=0; i+=3) {
3403 UErrorCode ec = U_ZERO_ERROR;
3404 UParseError pe;
3405 const UnicodeString& name = testCases[i];
3406 Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3407 if (U_FAILURE(ec)) {
729e4ab9 3408 dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
b75a7d8f
A
3409 delete t;
3410 continue;
3411 }
3412 const UnicodeString& id = t->getID();
3413 const UnicodeString& source = testCases[i+1];
3414 UnicodeString target;
3415
3416 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3417
3418 if (testCases[i+2].length() > 0) {
3419 target = testCases[i+2];
3420 } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3421 Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3422 } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3423 Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3424 } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3425 Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3426 } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3427 Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3428 } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3429 target = source;
3430 target.toLower(Locale::getUS());
3431 } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3432 target = source;
3433 target.toUpper(Locale::getUS());
3434 }
3435 if (U_FAILURE(ec)) {
3436 errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3437 continue;
3438 }
3439
3440 expect(*t, source, target);
3441 delete t;
3442 }
3443 for (i = 0; registerRules[i].length()!=0; i+=2) {
3444 Transliterator::unregister(registerRules[i]);
3445 }
3446}
3447
3448char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3449 if (ch <= 0xFFFF) {
374ca955 3450 sprintf(buffer, "\\u%04x", (int)ch);
b75a7d8f 3451 } else {
374ca955 3452 sprintf(buffer, "\\U%08x", (int)ch);
b75a7d8f
A
3453 }
3454 return buffer;
3455}
3456
3457void TransliteratorTest::TestSurrogateCasing (void) {
3458 // check that casing handles surrogates
3459 // titlecase is currently defective
3460 char buffer[20];
3461 UChar buffer2[20];
3462 UChar32 dee;
4388f060 3463 U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
b75a7d8f
A
3464 UnicodeString DEE(u_totitle(dee));
3465 if (DEE != DESERET_DEE) {
3466 err("Fails titlecase of surrogates");
3467 err(Char32ToEscapedChars(dee, buffer));
3468 err(", ");
3469 errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3470 }
3471
3472 UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3473 UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3474 UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3475 UErrorCode status= U_ZERO_ERROR;
3476
3477 u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3478 if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3479 errln("Fails: Can't uppercase surrogates.");
3480 }
3481
3482 status= U_ZERO_ERROR;
3483 u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3484 if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3485 errln("Fails: Can't lowercase surrogates.");
3486 }
3487}
3488
3489static void _trans(Transliterator& t, const UnicodeString& src,
3490 UnicodeString& result) {
3491 result = src;
3492 t.transliterate(result);
3493}
3494
3495static void _trans(const UnicodeString& id, const UnicodeString& src,
3496 UnicodeString& result, UErrorCode ec) {
3497 UParseError pe;
3498 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3499 if (U_SUCCESS(ec)) {
3500 _trans(*t, src, result);
3501 }
3502 delete t;
3503}
3504
3505static UnicodeString _findMatch(const UnicodeString& source,
3506 const UnicodeString* pairs) {
3507 UnicodeString empty;
3508 for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3509 if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3510 return pairs[i+1];
3511 }
3512 }
3513 return empty;
3514}
3515
3516// Check to see that incremental gets at least part way through a reasonable string.
3517
3518void TransliteratorTest::TestIncrementalProgress(void) {
3519 UErrorCode ec = U_ZERO_ERROR;
3520 UnicodeString latinTest = "The Quick Brown Fox.";
3521 UnicodeString devaTest;
3522 _trans("Latin-Devanagari", latinTest, devaTest, ec);
3523 UnicodeString kataTest;
3524 _trans("Latin-Katakana", latinTest, kataTest, ec);
3525 if (U_FAILURE(ec)) {
3526 errln("FAIL: Internal error");
3527 return;
3528 }
3529 const UnicodeString tests[] = {
3530 "Any", latinTest,
3531 "Latin", latinTest,
3532 "Halfwidth", latinTest,
3533 "Devanagari", devaTest,
3534 "Katakana", kataTest,
3535 "" // END MARKER
3536 };
3537
3538 UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3539 int32_t i = 0, j=0, k=0;
3540 int32_t sources = Transliterator::countAvailableSources();
3541 for (i = 0; i < sources; i++) {
3542 UnicodeString source;
3543 Transliterator::getAvailableSource(i, source);
3544 UnicodeString test = _findMatch(source, tests);
3545 if (test.length() == 0) {
3546 logln((UnicodeString)"Skipping " + source + "-X");
3547 continue;
3548 }
3549 int32_t targets = Transliterator::countAvailableTargets(source);
3550 for (j = 0; j < targets; j++) {
3551 UnicodeString target;
3552 Transliterator::getAvailableTarget(j, source, target);
3553 int32_t variants = Transliterator::countAvailableVariants(source, target);
3554 for (k =0; k< variants; k++) {
3555 UnicodeString variant;
3556 UParseError err;
3557 UErrorCode status = U_ZERO_ERROR;
3558
3559 Transliterator::getAvailableVariant(k, source, target, variant);
3560 UnicodeString id = source + "-" + target + "/" + variant;
374ca955 3561
b75a7d8f
A
3562 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3563 if (U_FAILURE(status)) {
729e4ab9 3564 dataerrln((UnicodeString)"FAIL: Could not create " + id);
b75a7d8f
A
3565 delete t;
3566 continue;
3567 }
3568 status = U_ZERO_ERROR;
3569 CheckIncrementalAux(t, test);
3570
3571 UnicodeString rev;
3572 _trans(*t, test, rev);
3573 Transliterator *inv = t->createInverse(status);
3574 if (U_FAILURE(status)) {
f3c0d7a5
A
3575 // The following are forward-only, it is OK that creating an inverse will not work:
3576 // 1. Devanagari-Arabic
3577 // 2. Any-*/BGN
0f5d89e8 3578 // 2a. Any-*/BGN_1981
f3c0d7a5 3579 // 3. Any-*/UNGEGN
0f5d89e8 3580 // 4. Any-*/MNS
f3c0d7a5
A
3581 // If UCONFIG_NO_BREAK_ITERATION is on, Latin-Thai is also not expected to work.
3582 if ( id.compare((UnicodeString)"Devanagari-Arabic/") != 0
3583 && !(id.startsWith((UnicodeString)"Any-") &&
0f5d89e8 3584 (id.endsWith((UnicodeString)"/BGN") || id.endsWith((UnicodeString)"/BGN_1981") || id.endsWith((UnicodeString)"/UNGEGN") || id.endsWith((UnicodeString)"/MNS"))
f3c0d7a5 3585 )
729e4ab9 3586#if UCONFIG_NO_BREAK_ITERATION
f3c0d7a5 3587 && id.compare((UnicodeString)"Latin-Thai/") != 0
729e4ab9 3588#endif
f3c0d7a5
A
3589 )
3590 {
729e4ab9 3591 errln((UnicodeString)"FAIL: Could not create inverse of " + id);
f3c0d7a5 3592 }
b75a7d8f
A
3593 delete t;
3594 delete inv;
3595 continue;
3596 }
3597 CheckIncrementalAux(inv, rev);
3598 delete t;
3599 delete inv;
3600 }
3601 }
3602 }
3603}
3604
3605void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3606 const UnicodeString& input) {
3607 UErrorCode ec = U_ZERO_ERROR;
3608 UTransPosition pos;
3609 UnicodeString test = input;
3610
3611 pos.contextStart = 0;
3612 pos.contextLimit = input.length();
3613 pos.start = 0;
3614 pos.limit = input.length();
3615
3616 t->transliterate(test, pos, ec);
3617 if (U_FAILURE(ec)) {
3618 errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3619 return;
3620 }
3621 UBool gotError = FALSE;
57a6839d 3622 (void)gotError; // Suppress set but not used warning.
b75a7d8f
A
3623
3624 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3625
3626 if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3627 errln((UnicodeString)"No Progress, " +
3628 t->getID() + ": " + formatInput(test, input, pos));
3629 gotError = TRUE;
3630 } else {
3631 logln((UnicodeString)"PASS Progress, " +
3632 t->getID() + ": " + formatInput(test, input, pos));
3633 }
3634 t->finishTransliteration(test, pos);
3635 if (pos.start != pos.limit) {
3636 errln((UnicodeString)"Incomplete, " +
3637 t->getID() + ": " + formatInput(test, input, pos));
3638 gotError = TRUE;
3639 }
3640}
3641
3642void TransliteratorTest::TestFunction() {
3643 // Careful with spacing and ';' here: Phrase this exactly
3644 // as toRules() is going to return it. If toRules() changes
3645 // with regard to spacing or ';', then adjust this string.
3646 UnicodeString rule =
3647 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3648
3649 UParseError pe;
3650 UErrorCode ec = U_ZERO_ERROR;
3651 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3652 if (t == NULL) {
729e4ab9 3653 dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
b75a7d8f
A
3654 return;
3655 }
3656
3657 UnicodeString r;
3658 t->toRules(r, TRUE);
3659 if (r == rule) {
3660 logln((UnicodeString)"OK: toRules() => " + r);
3661 } else {
3662 errln((UnicodeString)"FAIL: toRules() => " + r +
3663 ", expected " + rule);
3664 }
3665
3666 expect(*t, "The Quick Brown Fox",
46f4442e 3667 UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
b75a7d8f
A
3668
3669 delete t;
3670}
3671
3672void TransliteratorTest::TestInvalidBackRef(void) {
3673 UnicodeString rule = ". > $1;";
3674 UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3675 UParseError pe;
3676 UErrorCode ec = U_ZERO_ERROR;
3677 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3678 Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3679
3680 if (t != NULL) {
3681 errln("FAIL: createFromRules should have returned NULL");
3682 delete t;
3683 }
3684
3685 if (t2 != NULL) {
3686 errln("FAIL: createFromRules should have returned NULL");
3687 delete t2;
3688 }
3689
3690 if (U_SUCCESS(ec)) {
3691 errln("FAIL: Ok: . > $1; => no error");
3692 } else {
3693 logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3694 }
3695}
3696
3697void TransliteratorTest::TestMulticharStringSet() {
3698 // Basic testing
3699 const char* rule =
3700 " [{aa}] > x;"
3701 " a > y;"
3702 " [b{bc}] > z;"
3703 "[{gd}] { e > q;"
3704 " e } [{fg}] > r;" ;
3705
3706 UParseError pe;
3707 UErrorCode ec = U_ZERO_ERROR;
3708 Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3709 if (t == NULL || U_FAILURE(ec)) {
3710 delete t;
3711 errln("FAIL: createFromRules failed");
3712 return;
3713 }
3714
3715 expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3716 "y x yz z d gd de gdq gdqfg ddrfg");
3717 delete t;
3718
3719 // Overlapped string test. Make sure that when multiple
3720 // strings can match that the longest one is matched.
3721 rule =
3722 " [a {ab} {abc}] > x;"
3723 " b > y;"
3724 " c > z;"
3725 " q [t {st} {rst}] { e > p;" ;
3726
3727 t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3728 if (t == NULL || U_FAILURE(ec)) {
3729 delete t;
3730 errln("FAIL: createFromRules failed");
3731 return;
3732 }
3733
3734 expect(*t, "a ab abc qte qste qrste",
3735 "x x x qtp qstp qrstp");
3736 delete t;
3737}
3738
3739// vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3740// BEGIN TestUserFunction support factory
3741
3742Transliterator* _TUFF[4];
3743UnicodeString* _TUFID[4];
3744
374ca955 3745static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
b75a7d8f
A
3746 Transliterator::Token context) {
3747 return _TUFF[context.integer]->clone();
3748}
3749
3750static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3751 _TUFF[n] = t;
3752 _TUFID[n] = new UnicodeString(ID);
3753 Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3754}
3755
3756static void _TUFUnreg(int32_t n) {
3757 if (_TUFF[n] != NULL) {
3758 Transliterator::unregister(*_TUFID[n]);
3759 delete _TUFF[n];
3760 delete _TUFID[n];
3761 }
3762}
3763
3764// END TestUserFunction support factory
3765// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3766
3767/**
3768 * Test that user-registered transliterators can be used under function
3769 * syntax.
3770 */
3771void TransliteratorTest::TestUserFunction() {
3772
3773 Transliterator* t;
3774 UParseError pe;
3775 UErrorCode ec = U_ZERO_ERROR;
3776
3777 // Setup our factory
3778 int32_t i;
3779 for (i=0; i<4; ++i) {
3780 _TUFF[i] = NULL;
3781 }
3782
3783 // There's no need to register inverses if we don't use them
3784 t = Transliterator::createFromRules("gif",
46f4442e 3785 UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
b75a7d8f
A
3786 UTRANS_FORWARD, pe, ec);
3787 if (t == NULL || U_FAILURE(ec)) {
729e4ab9 3788 dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
b75a7d8f
A
3789 return;
3790 }
3791 _TUFReg("Any-gif", t, 0);
3792
3793 t = Transliterator::createFromRules("RemoveCurly",
46f4442e 3794 UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
b75a7d8f
A
3795 UTRANS_FORWARD, pe, ec);
3796 if (t == NULL || U_FAILURE(ec)) {
3797 errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3798 goto FAIL;
3799 }
46f4442e 3800 expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
b75a7d8f
A
3801 _TUFReg("Any-RemoveCurly", t, 1);
3802
3803 logln("Trying &hex");
3804 t = Transliterator::createFromRules("hex2",
3805 "(.) > &hex($1);",
3806 UTRANS_FORWARD, pe, ec);
3807 if (t == NULL || U_FAILURE(ec)) {
3808 errln("FAIL: createFromRules");
3809 goto FAIL;
3810 }
3811 logln("Registering");
3812 _TUFReg("Any-hex2", t, 2);
3813 t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3814 if (t == NULL || U_FAILURE(ec)) {
3815 errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3816 goto FAIL;
3817 }
46f4442e 3818 expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
b75a7d8f
A
3819 delete t;
3820
3821 logln("Trying &gif");
3822 t = Transliterator::createFromRules("gif2",
3823 "(.) > &Gif(&Hex2($1));",
3824 UTRANS_FORWARD, pe, ec);
3825 if (t == NULL || U_FAILURE(ec)) {
3826 errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3827 goto FAIL;
3828 }
3829 logln("Registering");
3830 _TUFReg("Any-gif2", t, 3);
3831 t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3832 if (t == NULL || U_FAILURE(ec)) {
3833 errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3834 goto FAIL;
3835 }
3836 expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3837 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3838 delete t;
3839
3840 // Test that filters are allowed after &
3841 t = Transliterator::createFromRules("test",
3842 "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3843 UTRANS_FORWARD, pe, ec);
3844 if (t == NULL || U_FAILURE(ec)) {
3845 errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3846 goto FAIL;
3847 }
3848 expect(*t, "abc",
46f4442e 3849 UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
b75a7d8f
A
3850 delete t;
3851
3852 FAIL:
3853 for (i=0; i<4; ++i) {
3854 _TUFUnreg(i);
3855 }
3856}
3857
3858/**
3859 * Test the Any-X transliterators.
3860 */
3861void TransliteratorTest::TestAnyX(void) {
3862 UParseError parseError;
3863 UErrorCode status = U_ZERO_ERROR;
3864 Transliterator* anyLatin =
3865 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3866 if (anyLatin==0) {
729e4ab9 3867 dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
b75a7d8f
A
3868 delete anyLatin;
3869 return;
3870 }
3871
3872 expect(*anyLatin,
3873 CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3874 CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3875
3876 delete anyLatin;
f3c0d7a5
A
3877
3878 status = U_ZERO_ERROR;
3879 Transliterator* anyASCII =
3880 Transliterator::createInstance("Any-Latin;Latin-ASCII", UTRANS_FORWARD, parseError, status);
3881 if (U_FAILURE(status) || anyASCII==0) {
3882 dataerrln("FAIL: createInstance returned NULL and/or set status %s", u_errorName(status));
3883 delete anyASCII;
3884 return;
3885 }
3886
3887 expect(*anyASCII,
3888 CharsToUnicodeString("ArabicDigits:\\u0660\\u0661\\u0664\\u0669 PersianDigits:\\u06F0\\u06F1\\u06F4\\u06F9"),
3889 CharsToUnicodeString("ArabicDigits:0149 PersianDigits:0149"));
3890
3891 delete anyASCII;
b75a7d8f
A
3892}
3893
729e4ab9
A
3894/**
3895 * Test Any-X transliterators with sample letters from all scripts.
3896 */
3897void TransliteratorTest::TestAny(void) {
3898 UErrorCode status = U_ZERO_ERROR;
3899 // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3900 // function call parameters going on in this test.
3901 UnicodeSet alphabetic("[:alphabetic:]", status);
3902 if (U_FAILURE(status)) {
3903 dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3904 return;
3905 }
3906 alphabetic.freeze();
3907
3908 UnicodeString testString;
3909 for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
3910 const char *scriptName = uscript_getShortName((UScriptCode)i);
3911 if (scriptName == NULL) {
3912 errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
3913 return;
3914 }
3915
3916 UnicodeSet sample;
3917 sample.applyPropertyAlias("script", scriptName, status);
3918 if (U_FAILURE(status)) {
3919 errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3920 return;
3921 }
3922 sample.retainAll(alphabetic);
3923 for (int32_t count=0; count<5; count++) {
3924 UChar32 c = sample.charAt(count);
3925 if (c == -1) {
3926 break;
3927 }
3928 testString.append(c);
3929 }
3930 }
3931
3932 UParseError parseError;
3933 Transliterator* anyLatin =
3934 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3935 if (U_FAILURE(status)) {
4388f060 3936 dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
729e4ab9
A
3937 return;
3938 }
3939
3940 logln(UnicodeString("Sample set for Any-Latin: ") + testString);
3941 anyLatin->transliterate(testString);
3942 logln(UnicodeString("Sample result for Any-Latin: ") + testString);
3943 delete anyLatin;
3944}
3945
3946
b75a7d8f
A
3947/**
3948 * Test the source and target set API. These are only implemented
3949 * for RBT and CompoundTransliterator at this time.
3950 */
3951void TransliteratorTest::TestSourceTargetSet() {
3952 UErrorCode ec = U_ZERO_ERROR;
3953
3954 // Rules
3955 const char* r =
3956 "a > b; "
3957 "r [x{lu}] > q;";
3958
3959 // Expected source
3960 UnicodeSet expSrc("[arx{lu}]", ec);
3961
3962 // Expected target
3963 UnicodeSet expTrg("[bq]", ec);
3964
3965 UParseError pe;
3966 Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3967
3968 if (U_FAILURE(ec)) {
3969 delete t;
3970 errln("FAIL: Couldn't set up test");
3971 return;
3972 }
3973
3974 UnicodeSet src; t->getSourceSet(src);
3975 UnicodeSet trg; t->getTargetSet(trg);
3976
3977 if (src == expSrc && trg == expTrg) {
3978 UnicodeString a, b;
3979 logln((UnicodeString)"Ok: " +
3980 r + " => source = " + src.toPattern(a, TRUE) +
3981 ", target = " + trg.toPattern(b, TRUE));
3982 } else {
3983 UnicodeString a, b, c, d;
3984 errln((UnicodeString)"FAIL: " +
3985 r + " => source = " + src.toPattern(a, TRUE) +
3986 ", expected " + expSrc.toPattern(b, TRUE) +
3987 "; target = " + trg.toPattern(c, TRUE) +
3988 ", expected " + expTrg.toPattern(d, TRUE));
3989 }
3990
3991 delete t;
3992}
3993
3994/**
4388f060 3995 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
b75a7d8f 3996 */
4388f060 3997void TransliteratorTest::TestPatternWhiteSpace() {
b75a7d8f
A
3998 // Rules
3999 const char* r = "a > \\u200E b;";
4000
4001 UErrorCode ec = U_ZERO_ERROR;
4002 UParseError pe;
4003 Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
4004
4005 if (U_FAILURE(ec)) {
4006 errln("FAIL: Couldn't set up test");
4007 } else {
4008 expect(*t, "a", "b");
4009 }
4010 delete t;
4011
4012 // UnicodeSet
4013 ec = U_ZERO_ERROR;
4014 UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
4015
4016 if (U_FAILURE(ec)) {
4017 errln("FAIL: Couldn't set up test");
4018 } else {
4019 if (set.contains(0x200E)) {
4020 errln("FAIL: U+200E not being ignored by UnicodeSet");
4021 }
4022 }
4023}
4024//======================================================================
4025// this method is in TestUScript.java
4026//======================================================================
4027void TransliteratorTest::TestAllCodepoints(){
4028 UScriptCode code= USCRIPT_INVALID_CODE;
4029 char id[256]={'\0'};
4030 char abbr[256]={'\0'};
4031 char newId[256]={'\0'};
4032 char newAbbrId[256]={'\0'};
4033 char oldId[256]={'\0'};
4034 char oldAbbrId[256]={'\0'};
4035
4036 UErrorCode status =U_ZERO_ERROR;
4037 UParseError pe;
4038
4039 for(uint32_t i = 0; i<=0x10ffff; i++){
4040 code = uscript_getScript(i,&status);
4041 if(code == USCRIPT_INVALID_CODE){
4388f060 4042 dataerrln("uscript_getScript for codepoint \\U%08X failed.", i);
b75a7d8f
A
4043 }
4044 const char* myId = uscript_getName(code);
4045 if(!myId) {
729e4ab9 4046 dataerrln("Valid script code returned NULL name. Check your data!");
b75a7d8f
A
4047 return;
4048 }
4049 uprv_strcpy(id,myId);
4050 uprv_strcpy(abbr,uscript_getShortName(code));
4051
4052 uprv_strcpy(newId,"[:");
4053 uprv_strcat(newId,id);
4054 uprv_strcat(newId,":];NFD");
4055
4056 uprv_strcpy(newAbbrId,"[:");
4057 uprv_strcat(newAbbrId,abbr);
4058 uprv_strcat(newAbbrId,":];NFD");
4059
4060 if(uprv_strcmp(newId,oldId)!=0){
4061 Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
4062 if(t==NULL || U_FAILURE(status)){
4388f060 4063 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
b75a7d8f
A
4064 }
4065 delete t;
4066 }
4067 if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
4068 Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
4069 if(t==NULL || U_FAILURE(status)){
4388f060 4070 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
b75a7d8f
A
4071 }
4072 delete t;
4073 }
4074 uprv_strcpy(oldId,newId);
4075 uprv_strcpy(oldAbbrId, newAbbrId);
4076
4077 }
4078
4079}
4080
340931cb 4081#define TEST_TRANSLIT_ID(id, cls) UPRV_BLOCK_MACRO_BEGIN { \
b75a7d8f
A
4082 UErrorCode ec = U_ZERO_ERROR; \
4083 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4084 if (U_FAILURE(ec)) { \
729e4ab9 4085 dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
b75a7d8f
A
4086 } else { \
4087 if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4088 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4089 } \
4090 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4091 } \
4092 delete t; \
340931cb 4093} UPRV_BLOCK_MACRO_END
b75a7d8f 4094
340931cb 4095#define TEST_TRANSLIT_RULE(rule, cls) UPRV_BLOCK_MACRO_BEGIN { \
b75a7d8f
A
4096 UErrorCode ec = U_ZERO_ERROR; \
4097 UParseError pe; \
4098 Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4099 if (U_FAILURE(ec)) { \
4100 errln("FAIL: Couldn't create " rule); \
4101 } else { \
4102 if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4103 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4104 } \
4105 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4106 } \
4107 delete t; \
340931cb 4108} UPRV_BLOCK_MACRO_END
b75a7d8f
A
4109
4110void TransliteratorTest::TestBoilerplate() {
4111 TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
4112 TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
4113 TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
4114 TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
4115 TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
4116 TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
4117 TEST_TRANSLIT_ID("Null", NullTransliterator);
4118 TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
4119 TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
4120 TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
4121 TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
4122 TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
4123 TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
4124}
4125
4126void TransliteratorTest::TestAlternateSyntax() {
4127 // U+2206 == &
4128 // U+2190 == <
4129 // U+2192 == >
4130 // U+2194 == <>
4131 expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4132 "abc",
4133 "xbz");
4134 expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4135 CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
46f4442e 4136 UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
b75a7d8f
A
4137}
4138
73c04bcf
A
4139static const char* BEGIN_END_RULES[] = {
4140 // [0]
4141 "abc > xy;"
4142 "aba > z;",
4143
4144 // [1]
4145/*
4146 "::BEGIN;"
4147 "abc > xy;"
4148 "::END;"
4149 "::BEGIN;"
4150 "aba > z;"
4151 "::END;",
4152*/
4153 "", // test case commented out below, this is here to keep from messing up the indexes
4154
4155 // [2]
4156/*
4157 "abc > xy;"
4158 "::BEGIN;"
4159 "aba > z;"
4160 "::END;",
4161*/
4162 "", // test case commented out below, this is here to keep from messing up the indexes
4163
4164 // [3]
4165/*
4166 "::BEGIN;"
4167 "abc > xy;"
4168 "::END;"
4169 "aba > z;",
4170*/
4171 "", // test case commented out below, this is here to keep from messing up the indexes
4172
4173 // [4]
4174 "abc > xy;"
4175 "::Null;"
4176 "aba > z;",
4177
4178 // [5]
4179 "::Upper;"
4180 "ABC > xy;"
4181 "AB > x;"
4182 "C > z;"
4183 "::Upper;"
4184 "XYZ > p;"
4185 "XY > q;"
4186 "Z > r;"
4187 "::Upper;",
4188
4189 // [6]
4190 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4191 "$delim = [\\-$ws];"
4192 "$ws $delim* > ' ';"
4193 "'-' $delim* > '-';",
4194
4195 // [7]
4196 "::Null;"
4197 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4198 "$delim = [\\-$ws];"
4199 "$ws $delim* > ' ';"
4200 "'-' $delim* > '-';",
4201
4202 // [8]
4203 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4204 "$delim = [\\-$ws];"
4205 "$ws $delim* > ' ';"
4206 "'-' $delim* > '-';"
4207 "::Null;",
4208
4209 // [9]
4210 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4211 "$delim = [\\-$ws];"
4212 "::Null;"
4213 "$ws $delim* > ' ';"
4214 "'-' $delim* > '-';",
4215
4216 // [10]
4217/*
4218 "::BEGIN;"
4219 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4220 "$delim = [\\-$ws];"
4221 "::END;"
4222 "$ws $delim* > ' ';"
4223 "'-' $delim* > '-';",
4224*/
4225 "", // test case commented out below, this is here to keep from messing up the indexes
4226
4227 // [11]
4228/*
4229 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4230 "$delim = [\\-$ws];"
4231 "::BEGIN;"
4232 "$ws $delim* > ' ';"
4233 "'-' $delim* > '-';"
4234 "::END;",
4235*/
4236 "", // test case commented out below, this is here to keep from messing up the indexes
4237
4238 // [12]
4239/*
4240 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4241 "$delim = [\\-$ws];"
4242 "$ab = [ab];"
4243 "::BEGIN;"
4244 "$ws $delim* > ' ';"
4245 "'-' $delim* > '-';"
4246 "::END;"
4247 "::BEGIN;"
4248 "$ab { ' ' } $ab > '-';"
4249 "c { ' ' > ;"
4250 "::END;"
4251 "::BEGIN;"
4252 "'a-a' > a\\%|a;"
4253 "::END;",
4254*/
4255 "", // test case commented out below, this is here to keep from messing up the indexes
4256
4257 // [13]
4258 "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4259 "$delim = [\\-$ws];"
4260 "$ab = [ab];"
4261 "::Null;"
4262 "$ws $delim* > ' ';"
4263 "'-' $delim* > '-';"
4264 "::Null;"
4265 "$ab { ' ' } $ab > '-';"
4266 "c { ' ' > ;"
4267 "::Null;"
4268 "'a-a' > a\\%|a;",
4269
4270 // [14]
4271/*
4272 "::[abc];"
4273 "::BEGIN;"
4274 "abc > xy;"
4275 "::END;"
4276 "::BEGIN;"
4277 "aba > yz;"
4278 "::END;"
4279 "::Upper;",
4280*/
4281 "", // test case commented out below, this is here to keep from messing up the indexes
4282
4283 // [15]
4284 "::[abc];"
4285 "abc > xy;"
4286 "::Null;"
4287 "aba > yz;"
4288 "::Upper;",
4289
4290 // [16]
4291/*
4292 "::[abc];"
4293 "::BEGIN;"
4294 "abc <> xy;"
4295 "::END;"
4296 "::BEGIN;"
4297 "aba <> yz;"
4298 "::END;"
4299 "::Upper(Lower);"
4300 "::([XYZ]);"
4301*/
4302 "", // test case commented out below, this is here to keep from messing up the indexes
4303
4304 // [17]
4305 "::[abc];"
4306 "abc <> xy;"
4307 "::Null;"
4308 "aba <> yz;"
4309 "::Upper(Lower);"
4310 "::([XYZ]);"
4311};
73c04bcf
A
4312
4313/*
4314(This entire test is commented out below and will need some heavy revision when we re-add
4315the ::BEGIN/::END stuff)
4316static const char* BOGUS_BEGIN_END_RULES[] = {
4317 // [7]
4318 "::BEGIN;"
4319 "abc > xy;"
4320 "::BEGIN;"
4321 "aba > z;"
4322 "::END;"
4323 "::END;",
4324
4325 // [8]
4326 "abc > xy;"
4327 " aba > z;"
4328 "::END;",
4329
4330 // [9]
4331 "::BEGIN;"
4332 "::Upper;"
4333 "::END;"
4334};
2ca993e8 4335static const int32_t BOGUS_BEGIN_END_RULES_length = UPRV_LENGTHOF(BOGUS_BEGIN_END_RULES);
73c04bcf
A
4336*/
4337
4338static const char* BEGIN_END_TEST_CASES[] = {
4339 // rules input expected output
4340 BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z",
4341// BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
4342// BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
4343// BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
4344 BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z",
4345 BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR",
4346
4347 BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e",
4348 BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e",
4349 BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e",
4350 BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e",
4351// BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e",
4352// BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e",
4353// BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e",
4354// BEGIN_END_RULES[12], "a a a a", "a%a%a%a",
4355// BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
4356 BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e",
4357 BEGIN_END_RULES[13], "a a a a", "a%a%a%a",
4358 BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a",
4359
4360// BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4361 BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4362// BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4363 BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4364};
2ca993e8 4365static const int32_t BEGIN_END_TEST_CASES_length = UPRV_LENGTHOF(BEGIN_END_TEST_CASES);
73c04bcf
A
4366
4367void TransliteratorTest::TestBeginEnd() {
4368 // run through the list of test cases above
4369 int32_t i = 0;
4370 for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4371 expect((UnicodeString)"Test case #" + (i / 3),
46f4442e
A
4372 UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4373 UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4374 UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
73c04bcf
A
4375 }
4376
4377 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4378 UParseError parseError;
4379 UErrorCode status = U_ZERO_ERROR;
4380 Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4381 UTRANS_REVERSE, parseError, status);
4382 if (reversed == 0 || U_FAILURE(status)) {
4383 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4384 } else {
4385 expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4386 }
4387 delete reversed;
4388
4389 // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4390 // that all of them cause errors
4391/*
4392(commented out until we have the real ::BEGIN/::END stuff in place
4393 for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4394 UParseError parseError;
4395 UErrorCode status = U_ZERO_ERROR;
4396 Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4397 UTRANS_FORWARD, parseError, status);
4398 if (!U_FAILURE(status)) {
4399 delete t;
4400 errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4401 }
4402 }
4403*/
4404}
4405
4406void TransliteratorTest::TestBeginEndToRules() {
4407 // run through the same list of test cases we used above, but this time, instead of just
4408 // instantiating a Transliterator from the rules and running the test against it, we instantiate
4409 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4410 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4411 // to (i.e., does the same thing as) the original rule set
4412 for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4413 UParseError parseError;
4414 UErrorCode status = U_ZERO_ERROR;
46f4442e 4415 Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
73c04bcf
A
4416 UTRANS_FORWARD, parseError, status);
4417 if (U_FAILURE(status)) {
4418 reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4419 } else {
4420 UnicodeString rules;
4421 t->toRules(rules, TRUE);
4422 Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4423 UTRANS_FORWARD, parseError, status);
4424 if (U_FAILURE(status)) {
4425 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4426 parseError, status);
4427 delete t;
4428 } else {
4429 expect(*t2,
46f4442e
A
4430 UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4431 UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
73c04bcf
A
4432 delete t;
4433 delete t2;
4434 }
4435 }
4436 }
4437
4438 // do the same thing for the reversible test case
4439 UParseError parseError;
4440 UErrorCode status = U_ZERO_ERROR;
4441 Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4442 UTRANS_REVERSE, parseError, status);
4443 if (U_FAILURE(status)) {
4444 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4445 } else {
4446 UnicodeString rules;
4447 reversed->toRules(rules, FALSE);
4448 Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4449 parseError, status);
4450 if (U_FAILURE(status)) {
4451 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4452 parseError, status);
4453 delete reversed;
4454 } else {
4455 expect(*reversed2,
4456 UnicodeString("xy XY XYZ yz YZ"),
4457 UnicodeString("xy abc xaba yz aba"));
4458 delete reversed;
4459 delete reversed2;
4460 }
4461 }
4462}
4463
4464void TransliteratorTest::TestRegisterAlias() {
4465 UnicodeString longID("Lower;[aeiou]Upper");
4466 UnicodeString shortID("Any-CapVowels");
4467 UnicodeString reallyShortID("CapVowels");
4468
4469 Transliterator::registerAlias(shortID, longID);
4470
4471 UErrorCode err = U_ZERO_ERROR;
4472 Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4473 if (U_FAILURE(err)) {
4474 errln("Failed to instantiate transliterator with long ID");
4475 Transliterator::unregister(shortID);
4476 return;
4477 }
4478 Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4479 if (U_FAILURE(err)) {
4480 errln("Failed to instantiate transliterator with short ID");
4481 delete t1;
4482 Transliterator::unregister(shortID);
4483 return;
4484 }
4485
4486 if (t1->getID() != longID)
4487 errln("Transliterator instantiated with long ID doesn't have long ID");
4488 if (t2->getID() != reallyShortID)
4489 errln("Transliterator instantiated with short ID doesn't have short ID");
4490
4491 UnicodeString rules1;
4492 UnicodeString rules2;
4493
4494 t1->toRules(rules1, TRUE);
4495 t2->toRules(rules2, TRUE);
4496 if (rules1 != rules2)
4497 errln("Alias transliterators aren't the same");
4498
4499 delete t1;
4500 delete t2;
4501 Transliterator::unregister(shortID);
4502
4503 t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4504 if (U_SUCCESS(err)) {
4505 errln("Instantiation with short ID succeeded after short ID was unregistered");
4506 delete t1;
4507 }
4508
4509 // try the same thing again, but this time with something other than
4510 // an instance of CompoundTransliterator
4511 UnicodeString realID("Latin-Greek");
4512 UnicodeString fakeID("Latin-dlgkjdflkjdl");
4513 Transliterator::registerAlias(fakeID, realID);
4514
4515 err = U_ZERO_ERROR;
4516 t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4517 if (U_FAILURE(err)) {
729e4ab9 4518 dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
73c04bcf
A
4519 Transliterator::unregister(realID);
4520 return;
4521 }
4522 t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4523 if (U_FAILURE(err)) {
4524 errln("Failed to instantiate transliterator with fake ID");
4525 delete t1;
4526 Transliterator::unregister(realID);
4527 return;
4528 }
4529
4530 t1->toRules(rules1, TRUE);
4531 t2->toRules(rules2, TRUE);
4532 if (rules1 != rules2)
4533 errln("Alias transliterators aren't the same");
4534
4535 delete t1;
4536 delete t2;
4537 Transliterator::unregister(fakeID);
4538}
4539
46f4442e
A
4540void TransliteratorTest::TestRuleStripping() {
4541 /*
4542#
4543\uE001>\u0C01; # SIGN
4544 */
4545 static const UChar rule[] = {
4546 0x0023,0x0020,0x000D,0x000A,
4547 0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4548 };
4549 static const UChar expectedRule[] = {
4550 0xE001,0x003E,0x0C01,0x003B,0
4551 };
2ca993e8 4552 UChar result[UPRV_LENGTHOF(rule)];
46f4442e 4553 UErrorCode status = U_ZERO_ERROR;
2ca993e8 4554 int32_t len = utrans_stripRules(rule, UPRV_LENGTHOF(rule), result, &status);
46f4442e
A
4555 if (len != u_strlen(expectedRule)) {
4556 errln("utrans_stripRules return len = %d", len);
4557 }
4558 if (u_strncmp(expectedRule, result, len) != 0) {
4559 errln("utrans_stripRules did not return expected string");
4560 }
4561}
4562
4563/**
4564 * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4565 */
4566void TransliteratorTest::TestHalfwidthFullwidth(void) {
4567 UParseError parseError;
4568 UErrorCode status = U_ZERO_ERROR;
4569 Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
4570 Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
4571 if (hf == 0 || fh == 0) {
729e4ab9 4572 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
46f4442e
A
4573 delete hf;
4574 delete fh;
4575 return;
4576 }
4577
4578 // Array of 2n items
4579 // Each item is
4580 // "hf"|"fh"|"both",
4581 // <Halfwidth>,
4582 // <Fullwidth>
4583 const char* DATA[] = {
4584 "both",
4585 "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4586 "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4587 };
2ca993e8 4588 int32_t DATA_length = UPRV_LENGTHOF(DATA);
46f4442e
A
4589
4590 for (int32_t i=0; i<DATA_length; i+=3) {
4591 UnicodeString h = CharsToUnicodeString(DATA[i+1]);
4592 UnicodeString f = CharsToUnicodeString(DATA[i+2]);
4593 switch (*DATA[i]) {
4594 case 0x68: //'h': // Halfwidth-Fullwidth only
4595 expect(*hf, h, f);
4596 break;
4597 case 0x66: //'f': // Fullwidth-Halfwidth only
4598 expect(*fh, f, h);
4599 break;
4600 case 0x62: //'b': // both directions
4601 expect(*hf, h, f);
4602 expect(*fh, f, h);
4603 break;
4604 }
4605 }
4606 delete hf;
4607 delete fh;
4608}
4609
4610
4611 /**
4612 * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4613 * TODO: confirm that the expected results are correct.
4614 * For now, test just confirms that C++ and Java give identical results.
4615 */
4616void TransliteratorTest::TestThai(void) {
729e4ab9 4617#if !UCONFIG_NO_BREAK_ITERATION
46f4442e
A
4618 UParseError parseError;
4619 UErrorCode status = U_ZERO_ERROR;
4620 Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
4621 if (tr == 0) {
729e4ab9 4622 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
46f4442e
A
4623 return;
4624 }
4625 if (U_FAILURE(status)) {
4626 errln("FAIL: createInstance failed with %s", u_errorName(status));
4627 return;
4628 }
4629 const char *thaiText =
4630 "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4631 "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4632 "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4633 "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4634 "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4635 "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4636 "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4637 "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4638 "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4639 "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4640 "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4641 "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4642 "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4643 "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4644 "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4645 "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4646 "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4647 "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4648 "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4649 "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4650 "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4651 "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4652 "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4653 "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4654 " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4655 "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4656 "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4657 " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4658 "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4659 "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4660
4661 const char *latinText =
4662 "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4663 "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4664 "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4665 "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4666 "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4667 " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4668 "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4669 "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4670 "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4671 "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4672 "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4673 "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4674 " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4675 "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4676 " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4677 "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4678 "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4679 "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4680
4681
4682 UnicodeString xlitText(thaiText);
4683 xlitText = xlitText.unescape();
4684 tr->transliterate(xlitText);
4685
4686 UnicodeString expectedText(latinText);
4687 expectedText = expectedText.unescape();
4688 expect(*tr, xlitText, expectedText);
4689
4690 delete tr;
729e4ab9 4691#endif
46f4442e
A
4692}
4693
340931cb
A
4694/**
4695 * Test for rdar://problem/61817095 (and maybe eventually other Hans-Hant errors)
4696 * Apple-only
4697 * ICU4C only
4698 */
4699void TransliteratorTest::TestHansHant(void) {
4700 UParseError parseError;
4701 UErrorCode status = U_ZERO_ERROR;
4702 Transliterator* tr = Transliterator::createInstance("Hans-Hant", UTRANS_FORWARD, parseError, status);
4703 if (U_FAILURE(status)) {
4704 errln("FAIL: createInstance failed with %s", u_errorName(status));
4705 return;
4706 }
4707
4708 const char* _sourceText = "\\u810f \\u5185\\u810f \\u810f\\u5668 \\u4e94\\u810f \\u5fc3\\u810f \\u809d\\u810f \\u813e\\u810f \\u80c3\\u810f \\u80be\\u810f \\u80f0\\u810f \\u810f\\u8151 \\u80ba\\u810f";
4709 const char* _expectedResult = "\\u9ad2 \\u5167\\u81df \\u81df\\u5668 \\u4e94\\u81df \\u5fc3\\u81df \\u809d\\u81df \\u813e\\u81df \\u80c3\\u81df \\u814e\\u81df \\u80f0\\u81df \\u81df\\u8151 \\u80ba\\u81df";
4710
4711 UnicodeString sourceText(_sourceText);
4712 UnicodeString expectedResult(_expectedResult);
4713 sourceText = sourceText.unescape();
4714 expectedResult = expectedResult.unescape();
4715
4716 expect(*tr, sourceText, expectedResult);
4717 delete tr;
4718}
4719
46f4442e 4720
b75a7d8f
A
4721//======================================================================
4722// Support methods
4723//======================================================================
4724void TransliteratorTest::expectT(const UnicodeString& id,
4725 const UnicodeString& source,
4726 const UnicodeString& expectedResult) {
4727 UErrorCode ec = U_ZERO_ERROR;
4728 UParseError pe;
4729 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4730 if (U_FAILURE(ec)) {
729e4ab9 4731 errln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(ec));
b75a7d8f
A
4732 delete t;
4733 return;
4734 }
4735 expect(*t, source, expectedResult);
4736 delete t;
4737}
4738
73c04bcf
A
4739void TransliteratorTest::reportParseError(const UnicodeString& message,
4740 const UParseError& parseError,
4741 const UErrorCode& status) {
729e4ab9 4742 dataerrln(message +
73c04bcf
A
4743 /*", parse error " + parseError.code +*/
4744 ", line " + parseError.line +
4745 ", offset " + parseError.offset +
4746 ", pre-context " + prettify(parseError.preContext, TRUE) +
4747 ", post-context " + prettify(parseError.postContext,TRUE) +
4748 ", Error: " + u_errorName(status));
4749}
4750
b75a7d8f
A
4751void TransliteratorTest::expect(const UnicodeString& rules,
4752 const UnicodeString& source,
4753 const UnicodeString& expectedResult,
4754 UTransPosition *pos) {
73c04bcf
A
4755 expect("<ID>", rules, source, expectedResult, pos);
4756}
4757
4758void TransliteratorTest::expect(const UnicodeString& id,
4759 const UnicodeString& rules,
4760 const UnicodeString& source,
4761 const UnicodeString& expectedResult,
4762 UTransPosition *pos) {
b75a7d8f 4763 UErrorCode status = U_ZERO_ERROR;
73c04bcf
A
4764 UParseError parseError;
4765 Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
b75a7d8f 4766 if (U_FAILURE(status)) {
73c04bcf 4767 reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
b75a7d8f
A
4768 } else {
4769 expect(*t, source, expectedResult, pos);
4770 }
4771 delete t;
4772}
4773
4774void TransliteratorTest::expect(const Transliterator& t,
4775 const UnicodeString& source,
4776 const UnicodeString& expectedResult,
4777 const Transliterator& reverseTransliterator) {
4778 expect(t, source, expectedResult);
4779 expect(reverseTransliterator, expectedResult, source);
4780}
4781
4782void TransliteratorTest::expect(const Transliterator& t,
4783 const UnicodeString& source,
4784 const UnicodeString& expectedResult,
4785 UTransPosition *pos) {
4786 if (pos == 0) {
4787 UnicodeString result(source);
4788 t.transliterate(result);
4789 expectAux(t.getID() + ":String", source, result, expectedResult);
4790 }
b75a7d8f
A
4791 UTransPosition index={0, 0, 0, 0};
4792 if (pos != 0) {
4793 index = *pos;
4794 }
4795
4796 UnicodeString rsource(source);
4797 if (pos == 0) {
4798 t.transliterate(rsource);
4799 } else {
4800 // Do it all at once -- below we do it incrementally
4801 t.finishTransliteration(rsource, *pos);
4802 }
4803 expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4804
4805 // Test keyboard (incremental) transliteration -- this result
4806 // must be the same after we finalize (see below).
4807 UnicodeString log;
4808 rsource.remove();
4809 if (pos != 0) {
4810 rsource = source;
4811 formatInput(log, rsource, index);
4812 log.append(" -> ");
4813 UErrorCode status = U_ZERO_ERROR;
4814 t.transliterate(rsource, index, status);
4815 formatInput(log, rsource, index);
4816 } else {
4817 for (int32_t i=0; i<source.length(); ++i) {
4818 if (i != 0) {
4819 log.append(" + ");
4820 }
4821 log.append(source.charAt(i)).append(" -> ");
4822 UErrorCode status = U_ZERO_ERROR;
4823 t.transliterate(rsource, index, source.charAt(i), status);
4824 formatInput(log, rsource, index);
4825 }
4826 }
4827
4828 // As a final step in keyboard transliteration, we must call
4829 // transliterate to finish off any pending partial matches that
4830 // were waiting for more input.
4831 t.finishTransliteration(rsource, index);
4832 log.append(" => ").append(rsource);
4833
4834 expectAux(t.getID() + ":Keyboard", log,
4835 rsource == expectedResult,
4836 expectedResult);
4837}
4838
4839
4840/**
4841 * @param appendTo result is appended to this param.
4842 * @param input the string being transliterated
4843 * @param pos the index struct
4844 */
4845UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4846 const UnicodeString& input,
4847 const UTransPosition& pos) {
4848 // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4849 // the {} indicate the context start and limit, and the ||
4850 // indicate the start and limit.
4851 if (0 <= pos.contextStart &&
4852 pos.contextStart <= pos.start &&
4853 pos.start <= pos.limit &&
4854 pos.limit <= pos.contextLimit &&
4855 pos.contextLimit <= input.length()) {
4856
4857 UnicodeString a, b, c, d, e;
4858 input.extractBetween(0, pos.contextStart, a);
4859 input.extractBetween(pos.contextStart, pos.start, b);
4860 input.extractBetween(pos.start, pos.limit, c);
4861 input.extractBetween(pos.limit, pos.contextLimit, d);
4862 input.extractBetween(pos.contextLimit, input.length(), e);
4863 appendTo.append(a).append((UChar)123/*{*/).append(b).
4864 append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4865 append((UChar)125/*}*/).append(e);
4866 } else {
4867 appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4868 pos.contextStart + ", s=" + pos.start + ", l=" +
4869 pos.limit + ", cl=" + pos.contextLimit + "} on " +
4870 input);
4871 }
4872 return appendTo;
4873}
4874
4875void TransliteratorTest::expectAux(const UnicodeString& tag,
4876 const UnicodeString& source,
4877 const UnicodeString& result,
4878 const UnicodeString& expectedResult) {
4879 expectAux(tag, source + " -> " + result,
4880 result == expectedResult,
4881 expectedResult);
4882}
4883
4884void TransliteratorTest::expectAux(const UnicodeString& tag,
4885 const UnicodeString& summary, UBool pass,
4886 const UnicodeString& expectedResult) {
4887 if (pass) {
4888 logln(UnicodeString("(")+tag+") " + prettify(summary));
4889 } else {
729e4ab9 4890 dataerrln(UnicodeString("FAIL: (")+tag+") "
b75a7d8f
A
4891 + prettify(summary)
4892 + ", expected " + prettify(expectedResult));
4893 }
4894}
4895
4896#endif /* #if !UCONFIG_NO_TRANSLITERATION */