]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/jamotest.cpp
ICU-511.32.tar.gz
[apple/icu.git] / icuSources / test / intltest / jamotest.cpp
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2010, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************
6 ************************************************************************
7 * Date Name Description
8 * 02/28/2001 aliu Creation
9 * 03/01/2001 George port to HP/UX
10 ************************************************************************/
11
12 #include "unicode/utypes.h"
13
14 #if !UCONFIG_NO_TRANSLITERATION
15
16 #include "jamotest.h"
17 #include "unicode/utypes.h"
18 #include "unicode/translit.h"
19 #include "cpdtrans.h"
20
21 // SEP is the disambiguation separator used by Latin-Jamo and Jamo-Latin
22 #define SEP "-"
23
24 JamoTest::JamoTest()
25 {
26 UParseError parseError;
27 UErrorCode status = U_ZERO_ERROR;
28 NAME_JAMO = Transliterator::createFromRules("Name-Jamo",
29 UnicodeString(JAMO_NAMES_RULES, -1, US_INV),
30 UTRANS_FORWARD, parseError, status);
31
32 if (U_FAILURE(status)) {
33 delete NAME_JAMO;
34 NAME_JAMO = NULL;
35 }
36 status = U_ZERO_ERROR;
37 JAMO_NAME = Transliterator::createFromRules("Jamo-Name",
38 UnicodeString(JAMO_NAMES_RULES, -1, US_INV),
39 UTRANS_REVERSE, parseError, status);
40 if (U_FAILURE(status)) {
41 delete JAMO_NAME;
42 JAMO_NAME = NULL;
43 }
44 }
45
46 JamoTest::~JamoTest()
47 {
48 delete NAME_JAMO;
49 delete JAMO_NAME;
50 }
51
52 void
53 JamoTest::runIndexedTest(int32_t index, UBool exec,
54 const char* &name, char* /*par*/) {
55 switch (index) {
56 TESTCASE(0,TestJamo);
57 TESTCASE(1,TestRealText);
58 TESTCASE(2,TestPiecemeal);
59 default: name = ""; break;
60 }
61 }
62
63 void
64 JamoTest::TestJamo() {
65 UParseError parseError;
66 UErrorCode status = U_ZERO_ERROR;
67 Transliterator* latinJamo = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, parseError, status);
68
69 if (latinJamo == 0 || U_FAILURE(status)) {
70 dataerrln("FAIL: createInstance() returned 0 - %s", u_errorName(status));
71 return;
72 }
73
74 Transliterator* jamoLatin = latinJamo->createInverse(status);
75
76 if (jamoLatin == 0) {
77 delete latinJamo;
78 errln("FAIL: createInverse() returned 0");
79 return;
80 }
81
82 static const char* CASE[] = {
83 // Column 1 is the latin text L1 to be fed to Latin-Jamo
84 // to yield output J.
85
86 // Column 2 is expected value of J. J is fed to
87 // Jamo-Latin to yield output L2.
88
89 // Column 3 is expected value of L2. If the expected
90 // value of L2 is L1, then L2 is NULL.
91
92 // add tests for the update to fix problems where it didn't follow the standard
93 // see also http://www.unicode.org/cldr/data/charts/transforms/Latin-Hangul.html
94 "gach", "(Gi)(A)(Cf)", NULL,
95 "geumhui", "(Gi)(EU)(Mf)(Hi)(YI)", NULL,
96 "choe", "(Ci)(OE)", NULL,
97 "wo", "(IEUNG)(WEO)", NULL,
98 "Wonpil", "(IEUNG)(WEO)(Nf)(Pi)(I)(L)", "wonpil",
99 "GIPPEUM", "(Gi)(I)(BB)(EU)(Mf)", "gippeum",
100 "EUTTEUM", "(IEUNG)(EU)(DD)(EU)(Mf)", "eutteum",
101 "KKOTNAE", "(GGi)(O)(Tf)(Ni)(AE)", "kkotnae",
102 "gaga", "(Gi)(A)(Gi)(A)", NULL,
103 "gag-a", "(Gi)(A)(Gf)(IEUNG)(A)", NULL,
104 "gak-ka", "(Gi)(A)(Kf)(Ki)(A)", NULL,
105 "gakka", "(Gi)(A)(GGi)(A)", NULL,
106 "gakk-a", "(Gi)(A)(GGf)(IEUNG)(A)", NULL,
107 "gakkka", "(Gi)(A)(GGf)(Ki)(A)", NULL,
108 "gak-kka", "(Gi)(A)(Kf)(GGi)(A)", NULL,
109
110 "bab", "(Bi)(A)(Bf)", NULL,
111 "babb", "(Bi)(A)(Bf)(Bi)(EU)", "babbeu",
112 "babbba", "(Bi)(A)(Bf)(Bi)(EU)(Bi)(A)", "babbeuba",
113 "bagg", "(Bi)(A)(Gf)(Gi)(EU)", "baggeu",
114 "baggga", "(Bi)(A)(Gf)(Gi)(EU)(Gi)(A)", "baggeuga",
115 //"bag" SEP "gga", "(Bi)(A)(Gf)" SEP "(Gi)(EU)(Gi)(A)", "bag" SEP "geuga",
116 "kabsa", "(Ki)(A)(Bf)(Si)(A)", NULL,
117 "kabska", "(Ki)(A)(BS)(Ki)(A)", NULL,
118 "gabsbka", "(Gi)(A)(BS)(Bi)(EU)(Ki)(A)", "gabsbeuka", // not (Kf)
119 "gga", "(Gi)(EU)(Gi)(A)", "geuga",
120 "bsa", "(Bi)(EU)(Si)(A)", "beusa",
121 "agg", "(IEUNG)(A)(Gf)(Gi)(EU)", "aggeu",
122 "agga", "(IEUNG)(A)(Gf)(Gi)(A)", NULL,
123 "la", "(R)(A)", NULL,
124 "bs", "(Bi)(EU)(Sf)", "beus",
125 "kalgga", "(Ki)(A)(L)(Gi)(EU)(Gi)(A)", "kalgeuga",
126
127 // 'r' in a final position is treated like 'l'
128 "karka", "(Ki)(A)(L)(Ki)(A)", "kalka",
129 };
130
131 enum { CASE_length = sizeof(CASE) / sizeof(CASE[0]) };
132
133 int32_t i;
134 for (i=0; i<CASE_length; i+=3) {
135 UnicodeString jamo = nameToJamo(CASE[i+1]);
136 if (CASE[i+2] == NULL) {
137 expect(*latinJamo, CASE[i], jamo, *jamoLatin);
138 } else {
139 // Handle case where round-trip is expected to fail
140 expect(*latinJamo, CASE[i], jamo);
141 expect(*jamoLatin, jamo, CASE[i+2]);
142 }
143 }
144
145 delete latinJamo;
146 delete jamoLatin;
147 }
148
149 /**
150 * Test various step-at-a-time transformation of hangul to jamo to
151 * latin and back.
152 */
153 void JamoTest::TestPiecemeal(void) {
154 UnicodeString hangul; hangul.append((UChar)0xBC0F);
155 UnicodeString jamo = nameToJamo("(Mi)(I)(Cf)");
156 UnicodeString latin("mic");
157 UnicodeString latin2("mich");
158
159 Transliterator *t = NULL;
160 UErrorCode status = U_ZERO_ERROR;
161
162 t = Transliterator::createInstance("NFD", UTRANS_FORWARD, status); // was Hangul-Jamo
163 if (U_FAILURE(status) || t == 0) {
164 dataerrln("FAIL: createInstance failed");
165 return;
166 }
167 expect(*t, hangul, jamo);
168 delete t;
169
170 t = Transliterator::createInstance("NFC", UTRANS_FORWARD, status); // was Jamo-Hangul
171 if (U_FAILURE(status) || t == 0) {
172 errln("FAIL: createInstance failed");
173 return;
174 }
175 expect(*t, jamo, hangul);
176 delete t;
177
178 t = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, status);
179 if (U_FAILURE(status) || t == 0) {
180 dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
181 return;
182 }
183 expect(*t, latin, jamo);
184 delete t;
185
186 t = Transliterator::createInstance("Jamo-Latin", UTRANS_FORWARD, status);
187 if (U_FAILURE(status) || t == 0) {
188 errln("FAIL: createInstance failed");
189 return;
190 }
191 expect(*t, jamo, latin2);
192 delete t;
193
194 t = Transliterator::createInstance("Hangul-Latin", UTRANS_FORWARD, status);
195 if (U_FAILURE(status) || t == 0) {
196 errln("FAIL: createInstance failed");
197 return;
198 }
199 expect(*t, hangul, latin2);
200 delete t;
201
202 t = Transliterator::createInstance("Latin-Hangul", UTRANS_FORWARD, status);
203 if (U_FAILURE(status) || t == 0) {
204 errln("FAIL: createInstance failed");
205 return;
206 }
207 expect(*t, latin, hangul);
208 delete t;
209
210 t = Transliterator::createInstance("Hangul-Latin; Latin-Jamo", UTRANS_FORWARD, status);
211 if (U_FAILURE(status) || t == 0) {
212 errln("FAIL: createInstance failed");
213 return;
214 }
215 expect(*t, hangul, jamo);
216 delete t;
217
218 t = Transliterator::createInstance("Jamo-Latin; Latin-Hangul", UTRANS_FORWARD, status);
219 if (U_FAILURE(status) || t == 0) {
220 errln("FAIL: createInstance failed");
221 return;
222 }
223 expect(*t, jamo, hangul);
224 delete t;
225
226 t = Transliterator::createInstance("Hangul-Latin; Latin-Hangul", UTRANS_FORWARD, status);
227 if (U_FAILURE(status) || t == 0) {
228 errln("FAIL: createInstance failed");
229 return;
230 }
231 expect(*t, hangul, hangul);
232 delete t;
233 }
234
235 void
236 JamoTest::TestRealText() {
237 // Test text taken from the Unicode web site
238 static const char* const WHAT_IS_UNICODE[] = {
239 "\\uc720\\ub2c8\\ucf54\\ub4dc\\uc5d0", "\\ub300\\ud574", "?",
240
241 "\\uc5b4\\ub5a4", "\\ud50c\\ub7ab\\ud3fc,", "\\uc5b4\\ub5a4",
242 "\\ud504\\ub85c\\uadf8\\ub7a8,", "\\uc5b4\\ub5a4", "\\uc5b8\\uc5b4\\uc5d0\\ub3c4",
243 "\\uc0c1\\uad00\\uc5c6\\uc774", "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub294", "\\ubaa8\\ub4e0",
244 "\\ubb38\\uc790\\uc5d0", "\\ub300\\ud574", "\\uace0\\uc720", "\\ubc88\\ud638\\ub97c",
245 "\\uc81c\\uacf5\\ud569\\ub2c8\\ub2e4.",
246
247 "\\uae30\\ubcf8\\uc801\\uc73c\\ub85c", "\\ucef4\\ud4e8\\ud130\\ub294",
248 "\\uc22b\\uc790\\ub9cc", "\\ucc98\\ub9ac\\ud569\\ub2c8\\ub2e4.", "\\uae00\\uc790\\ub098",
249 "\\ub2e4\\ub978", "\\ubb38\\uc790\\uc5d0\\ub3c4", "\\uc22b\\uc790\\ub97c",
250 "\\uc9c0\\uc815\\ud558\\uc5ec",
251 "\\uc800\\uc7a5\\ud569\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc\\uac00",
252 "\\uac1c\\ubc1c\\ub418\\uae30", "\\uc804\\uc5d0\\ub294", "\\uc774\\ub7ec\\ud55c",
253 "\\uc22b\\uc790\\ub97c", "\\uc9c0\\uc815\\ud558\\uae30", "\\uc704\\ud574", "\\uc218\\ubc31",
254 "\\uac00\\uc9c0\\uc758", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654",
255 "\\uc2dc\\uc2a4\\ud15c\\uc744",
256 "\\uc0ac\\uc6a9\\ud588\\uc2b5\\ub2c8\\ub2e4.", "\\ub2e8\\uc77c", "\\uae30\\ud638\\ud654",
257 "\\ubc29\\ubc95\\uc73c\\ub85c\\ub294", "\\ubaa8\\ub4e0", "\\ubb38\\uc790\\ub97c",
258 "\\ud3ec\\ud568\\ud560", "\\uc218", "\\uc5c6\\uc5c8\\uc2b5\\ub2c8\\ub2e4.", "\\uc608\\ub97c",
259 "\\ub4e4\\uc5b4", "\\uc720\\ub7fd", "\\uc5f0\\ud569\\uc5d0\\uc11c\\ub9cc",
260 "\\ubcf4\\ub354\\ub77c\\ub3c4", "\\ubaa8\\ub4e0", "\\uac01", "\\ub098\\ub77c\\ubcc4",
261 "\\uc5b8\\uc5b4\\ub97c", "\\ucc98\\ub9ac\\ud558\\ub824\\uba74", "\\uc5ec\\ub7ec",
262 "\\uac1c\\uc758", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc774",
263 "\\ud544\\uc694\\ud569\\ub2c8\\ub2e4.", "\\uc601\\uc5b4\\uc640", "\\uac19\\uc740",
264 "\\ub2e8\\uc77c", "\\uc5b8\\uc5b4\\uc758", "\\uacbd\\uc6b0\\ub3c4",
265 "\\uacf5\\ud1b5\\uc801\\uc73c\\ub85c", "\\uc0ac\\uc6a9\\ub418\\ub294", "\\ubaa8\\ub4e0",
266 "\\uae00\\uc790,", "\\ubb38\\uc7a5", "\\ubd80\\ud638", "\\ubc0f",
267 "\\ud14c\\ud06c\\ub2c8\\uceec", "\\uae30\\ud638\\uc5d0", "\\ub9de\\ub294", "\\ub2e8\\uc77c",
268 "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc744", "\\uac16\\uace0", "\\uc788\\uc9c0",
269 "\\ubabb\\ud558\\uc600\\uc2b5\\ub2c8\\ub2e4.",
270
271 "\\uc774\\ub7ec\\ud55c", "\\uae30\\ud638\\ud654", "\\uc2dc\\uc2a4\\ud15c\\uc740",
272 "\\ub610\\ud55c", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", "\\uc2dc\\uc2a4\\ud15c\\uacfc",
273 "\\ucda9\\ub3cc\\ud569\\ub2c8\\ub2e4.", "\\uc989", "\\ub450", "\\uac00\\uc9c0",
274 "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc774", "\\ub450", "\\uac1c\\uc758", "\\ub2e4\\ub978",
275 "\\ubb38\\uc790\\uc5d0", "\\ub300\\ud574", "\\uac19\\uc740", "\\ubc88\\ud638\\ub97c",
276 "\\uc0ac\\uc6a9\\ud558\\uac70\\ub098", "\\uac19\\uc740", "\\ubb38\\uc790\\uc5d0",
277 "\\ub300\\ud574", "\\ub2e4\\ub978", "\\ubc88\\ud638\\ub97c", "\\uc0ac\\uc6a9\\ud560", "\\uc218",
278 "\\uc788\\uc2b5\\ub2c8\\ub2e4.", "\\uc8fc\\uc5b4\\uc9c4", "\\ubaa8\\ub4e0",
279 "\\ucef4\\ud4e8\\ud130(\\ud2b9\\ud788", "\\uc11c\\ubc84)\\ub294", "\\uc11c\\ub85c",
280 "\\ub2e4\\ub978", "\\uc5ec\\ub7ec", "\\uac00\\uc9c0", "\\uae30\\ud638\\ud654",
281 "\\ubc29\\ubc95\\uc744", "\\uc9c0\\uc6d0\\ud574\\uc57c",
282 "\\ud569\\ub2c8\\ub2e4.", "\\uadf8\\ub7ec\\ub098,", "\\ub370\\uc774\\ud130\\ub97c",
283 "\\uc11c\\ub85c", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc774\\ub098",
284 "\\ud50c\\ub7ab\\ud3fc", "\\uac04\\uc5d0", "\\uc804\\ub2ec\\ud560", "\\ub54c\\ub9c8\\ub2e4",
285 "\\uadf8", "\\ub370\\uc774\\ud130\\ub294", "\\ud56d\\uc0c1", "\\uc190\\uc0c1\\uc758",
286 "\\uc704\\ud5d8\\uc744", "\\uacaa\\uac8c", "\\ub429\\ub2c8\\ub2e4.",
287
288 "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub85c", "\\ubaa8\\ub4e0", "\\uac83\\uc744",
289 "\\ud574\\uacb0\\ud560", "\\uc218", "\\uc788\\uc2b5\\ub2c8\\ub2e4!",
290 "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub294", "\\uc0ac\\uc6a9", "\\uc911\\uc778",
291 "\\ud50c\\ub7ab\\ud3fc,", "\\ud504\\ub85c\\uadf8\\ub7a8,", "\\uc5b8\\uc5b4\\uc5d0",
292 "\\uad00\\uacc4\\uc5c6\\uc774", "\\ubb38\\uc790\\ub9c8\\ub2e4", "\\uace0\\uc720\\ud55c",
293 "\\uc22b\\uc790\\ub97c",
294 "\\uc81c\\uacf5\\ud569\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc",
295 "\\ud45c\\uc900\\uc740", // "Apple, HP, IBM, JustSystem, Microsoft, Oracle, SAP, "
296 // "Sun, Sybase, Unisys "
297 "\\ubc0f", "\\uae30\\ud0c0", "\\uc5ec\\ub7ec",
298 "\\ud68c\\uc0ac\\uc640", "\\uac19\\uc740", "\\uc5c5\\uacc4",
299 "\\uc120\\ub450\\uc8fc\\uc790\\uc5d0", "\\uc758\\ud574",
300 "\\ucc44\\ud0dd\\ub418\\uc5c8\\uc2b5\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub294",
301 // "XML, Java, ECMAScript(JavaScript), LDAP, CORBA 3.0, WML "
302 "\\ub4f1\\uacfc",
303 "\\uac19\\uc774", "\\ud604\\uc7ac", "\\ub110\\ub9ac", "\\uc0ac\\uc6a9\\ub418\\ub294",
304 "\\ud45c\\uc900\\uc5d0\\uc11c", "\\ud544\\uc694\\ud558\\uba70", "\\uc774\\ub294", // "ISO/IEC",
305 "10646\\uc744", "\\uad6c\\ud604\\ud558\\ub294", "\\uacf5\\uc2dd\\uc801\\uc778",
306 "\\ubc29\\ubc95\\uc785\\ub2c8\\ub2e4.", "\\uc774\\ub294", "\\ub9ce\\uc740", "\\uc6b4\\uc601",
307 "\\uccb4\\uc81c,", "\\uc694\\uc998", "\\uc0ac\\uc6a9\\ub418\\ub294", "\\ubaa8\\ub4e0",
308 "\\ube0c\\ub77c\\uc6b0\\uc800", "\\ubc0f", "\\uae30\\ud0c0", "\\ub9ce\\uc740",
309 "\\uc81c\\ud488\\uc5d0\\uc11c",
310 "\\uc9c0\\uc6d0\\ub429\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc",
311 "\\ud45c\\uc900\\uc758", "\\ubd80\\uc0c1\\uacfc", "\\uc774\\ub97c",
312 "\\uc9c0\\uc6d0\\ud558\\ub294", "\\ub3c4\\uad6c\\uc758", "\\uac00\\uc6a9\\uc131\\uc740",
313 "\\ucd5c\\uadfc", "\\uc804", "\\uc138\\uacc4\\uc5d0", "\\ubd88\\uace0", "\\uc788\\ub294",
314 "\\uae30\\uc220", "\\uacbd\\ud5a5\\uc5d0\\uc11c", "\\uac00\\uc7a5", "\\uc911\\uc694\\ud55c",
315 "\\ubd80\\ubd84\\uc744", "\\ucc28\\uc9c0\\ud558\\uace0", "\\uc788\\uc2b5\\ub2c8\\ub2e4.",
316
317 "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub97c",
318 // Replaced a hyphen with a space to make the test case work with CLDR1.5
319 //"\\ud074\\ub77c\\uc774\\uc5b8\\ud2b8-\\uc11c\\ubc84", "\\ub610\\ub294",
320 "\\ud074\\ub77c\\uc774\\uc5b8\\ud2b8 \\uc11c\\ubc84", "\\ub610\\ub294",
321 // Replaced a hyphen with a space.
322 //"\\ub2e4\\uc911-\\uc5f0\\uacb0", "\\uc751\\uc6a9", "\\ud504\\ub85c\\uadf8\\ub7a8\\uacfc",
323 "\\ub2e4\\uc911 \\uc5f0\\uacb0", "\\uc751\\uc6a9", "\\ud504\\ub85c\\uadf8\\ub7a8\\uacfc",
324 "\\uc6f9", "\\uc0ac\\uc774\\ud2b8\\uc5d0", "\\ud1b5\\ud569\\ud558\\uba74",
325 "\\ub808\\uac70\\uc2dc", "\\ubb38\\uc790", "\\uc138\\ud2b8", "\\uc0ac\\uc6a9\\uc5d0",
326 "\\uc788\\uc5b4\\uc11c", "\\uc0c1\\ub2f9\\ud55c", "\\ube44\\uc6a9", "\\uc808\\uac10",
327 "\\ud6a8\\uacfc\\uac00",
328 "\\ub098\\ud0c0\\ub0a9\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub97c",
329 "\\ud1b5\\ud574", "\\ub9ac\\uc5d4\\uc9c0\\ub2c8\\uc5b4\\ub9c1", "\\uc5c6\\uc774",
330 "\\ub2e4\\uc911", "\\ud50c\\ub7ab\\ud3fc,", "\\uc5b8\\uc5b4", "\\ubc0f", "\\uad6d\\uac00",
331 "\\uac04\\uc5d0", "\\ub2e8\\uc77c", "\\uc18c\\ud504\\ud2b8\\uc6e8\\uc5b4",
332 "\\ud50c\\ub7ab\\ud3fc", "\\ub610\\ub294", "\\ub2e8\\uc77c", "\\uc6f9",
333 "\\uc0ac\\uc774\\ud2b8\\ub97c", "\\ubaa9\\ud45c\\ub85c", "\\uc0bc\\uc744", "\\uc218",
334 "\\uc788\\uc2b5\\ub2c8\\ub2e4.", "\\uc774\\ub97c", "\\uc0ac\\uc6a9\\ud558\\uba74",
335 "\\ub370\\uc774\\ud130\\ub97c", "\\uc190\\uc0c1", "\\uc5c6\\uc774", "\\uc5ec\\ub7ec",
336 "\\uc2dc\\uc2a4\\ud15c\\uc744", "\\ud1b5\\ud574", "\\uc804\\uc1a1\\ud560", "\\uc218",
337 "\\uc788\\uc2b5\\ub2c8\\ub2e4.",
338
339 "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc5d0", "\\ub300\\ud574",
340 "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc740",
341 "\\ube44\\uc601\\ub9ac", "\\uc870\\uc9c1\\uc73c\\ub85c\\uc11c", "\\ud604\\ub300",
342 "\\uc18c\\ud504\\ud2b8\\uc6e8\\uc5b4", "\\uc81c\\ud488\\uacfc",
343 "\\ud45c\\uc900\\uc5d0\\uc11c", "\\ud14d\\uc2a4\\ud2b8\\uc758", "\\ud45c\\ud604\\uc744",
344 "\\uc9c0\\uc815\\ud558\\ub294", "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ud45c\\uc900\\uc758",
345 "\\uc0ac\\uc6a9\\uc744", "\\uac1c\\ubc1c\\ud558\\uace0", "\\ud655\\uc7a5\\ud558\\uba70",
346 "\\uc7a5\\ub824\\ud558\\uae30", "\\uc704\\ud574",
347 "\\uc138\\uc6cc\\uc84c\\uc2b5\\ub2c8\\ub2e4.", "\\ucf58\\uc18c\\uc2dc\\uc5c4",
348 "\\uba64\\ubc84\\uc27d\\uc740", "\\ucef4\\ud4e8\\ud130\\uc640", "\\uc815\\ubcf4",
349 "\\ucc98\\ub9ac", "\\uc0b0\\uc5c5\\uc5d0", "\\uc885\\uc0ac\\ud558\\uace0", "\\uc788\\ub294",
350 "\\uad11\\ubc94\\uc704\\ud55c", "\\ud68c\\uc0ac", "\\ubc0f", "\\uc870\\uc9c1\\uc758",
351 "\\ubc94\\uc704\\ub97c",
352 "\\ub098\\ud0c0\\ub0c5\\ub2c8\\ub2e4.", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc758",
353 "\\uc7ac\\uc815\\uc740", "\\uc804\\uc801\\uc73c\\ub85c", "\\ud68c\\ube44\\uc5d0",
354 "\\uc758\\ud574", "\\ucda9\\ub2f9\\ub429\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc",
355 "\\ucee8\\uc18c\\uc2dc\\uc5c4\\uc5d0\\uc11c\\uc758", "\\uba64\\ubc84\\uc27d\\uc740",
356 "\\uc804", "\\uc138\\uacc4", "\\uc5b4\\ub290", "\\uacf3\\uc5d0\\uc11c\\ub098",
357 "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ud45c\\uc900\\uc744", "\\uc9c0\\uc6d0\\ud558\\uace0",
358 "\\uadf8", "\\ud655\\uc7a5\\uacfc", "\\uad6c\\ud604\\uc744",
359 "\\uc9c0\\uc6d0\\ud558\\uace0\\uc790\\ud558\\ub294", "\\uc870\\uc9c1\\uacfc",
360 "\\uac1c\\uc778\\uc5d0\\uac8c", "\\uac1c\\ubc29\\ub418\\uc5b4",
361 "\\uc788\\uc2b5\\ub2c8\\ub2e4.",
362
363 "\\ub354", "\\uc790\\uc138\\ud55c", "\\ub0b4\\uc6a9\\uc740", "\\uc6a9\\uc5b4\\uc9d1,",
364 "\\uc608\\uc81c", "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\uc0ac\\uc6a9", "\\uac00\\ub2a5",
365 "\\uc81c\\ud488,", "\\uae30\\uc220", "\\uc815\\ubcf4", "\\ubc0f", "\\uae30\\ud0c0",
366 "\\uc720\\uc6a9\\ud55c", "\\uc815\\ubcf4\\ub97c",
367 "\\ucc38\\uc870\\ud558\\uc2ed\\uc2dc\\uc624."
368 };
369
370 enum { WHAT_IS_UNICODE_length = sizeof(WHAT_IS_UNICODE) / sizeof(WHAT_IS_UNICODE[0]) };
371
372 UParseError parseError;
373 UErrorCode status = U_ZERO_ERROR;
374 Transliterator* latinJamo = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, parseError, status);
375 Transliterator* jamoHangul = Transliterator::createInstance("NFC(NFD)", UTRANS_FORWARD, parseError, status);
376 if (latinJamo == 0 || jamoHangul == 0 || U_FAILURE(status)) {
377 delete latinJamo;
378 delete jamoHangul;
379 dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
380 return;
381 }
382 Transliterator* jamoLatin = latinJamo->createInverse(status);
383 Transliterator* hangulJamo = jamoHangul->createInverse(status);
384 if (jamoLatin == 0 || hangulJamo == 0) {
385 errln("FAIL: createInverse returned NULL");
386 delete latinJamo;
387 delete jamoLatin;
388 delete jamoHangul;
389 delete hangulJamo;
390 return;
391 }
392
393 Transliterator* tarray[4] =
394 { hangulJamo, jamoLatin, latinJamo, jamoHangul };
395 CompoundTransliterator rt(tarray, 4);
396
397 UnicodeString buf;
398 int32_t total = 0;
399 int32_t errors = 0;
400 int32_t i;
401 for (i=0; i < WHAT_IS_UNICODE_length; ++i) {
402 ++total;
403 UnicodeString hangul = UnicodeString(WHAT_IS_UNICODE[i], -1, US_INV);
404 hangul = hangul.unescape(); // Parse backslash-u escapes
405 UnicodeString hangulX = hangul;
406 rt.transliterate(hangulX);
407 if (hangul != hangulX) {
408 ++errors;
409 UnicodeString jamo = hangul; hangulJamo->transliterate(jamo);
410 UnicodeString latin = jamo; jamoLatin->transliterate(latin);
411 UnicodeString jamo2 = latin; latinJamo->transliterate(jamo2);
412 UnicodeString hangul2 = jamo2; jamoHangul->transliterate(hangul2);
413
414 buf.remove(0);
415 buf.append("FAIL: ");
416 if (hangul2 != hangulX) {
417 buf.append((UnicodeString)"(Weird: " + hangulX + " != " + hangul2 + ")");
418 }
419 // The Hangul-Jamo conversion is not usually the
420 // bug here, so we hide it from display.
421 // Uncomment lines to see the Hangul.
422 buf.append(//hangul + " => " +
423 jamoToName(jamo) + " => " +
424 latin + " => " + jamoToName(jamo2)
425 //+ " => " + hangul2
426 );
427 errln(prettify(buf));
428 }
429 }
430 if (errors != 0) {
431 errln((UnicodeString)"Test word failures: " + errors + " out of " + total);
432 } else {
433 logln((UnicodeString)"All " + total + " test words passed");
434 }
435
436 delete latinJamo;
437 delete jamoLatin;
438 delete jamoHangul;
439 delete hangulJamo;
440 }
441
442 // Override TransliteratorTest
443 void
444 JamoTest::expectAux(const UnicodeString& tag,
445 const UnicodeString& summary, UBool pass,
446 const UnicodeString& expectedResult) {
447 UnicodeString jsum = jamoToName(summary);
448 UnicodeString jexp = jamoToName(expectedResult);
449 TransliteratorTest::expectAux(tag, jsum, pass, jexp);
450 }
451
452 const char* JamoTest::JAMO_NAMES_RULES =
453 "'(Gi)' <> \\u1100;"
454 "'(GGi)' <> \\u1101;"
455 "'(Ni)' <> \\u1102;"
456 "'(Di)' <> \\u1103;"
457 "'(DD)' <> \\u1104;"
458 "'(R)' <> \\u1105;"
459 "'(Mi)' <> \\u1106;"
460 "'(Bi)' <> \\u1107;"
461 "'(BB)' <> \\u1108;"
462 "'(Si)' <> \\u1109;"
463 "'(SSi)' <> \\u110A;"
464 "'(IEUNG)' <> \\u110B;"
465 "'(Ji)' <> \\u110C;"
466 "'(JJ)' <> \\u110D;"
467 "'(Ci)' <> \\u110E;"
468 "'(Ki)' <> \\u110F;"
469 "'(Ti)' <> \\u1110;"
470 "'(Pi)' <> \\u1111;"
471 "'(Hi)' <> \\u1112;"
472
473 "'(A)' <> \\u1161;"
474 "'(AE)' <> \\u1162;"
475 "'(YA)' <> \\u1163;"
476 "'(YAE)' <> \\u1164;"
477 "'(EO)' <> \\u1165;"
478 "'(E)' <> \\u1166;"
479 "'(YEO)' <> \\u1167;"
480 "'(YE)' <> \\u1168;"
481 "'(O)' <> \\u1169;"
482 "'(WA)' <> \\u116A;"
483 "'(WAE)' <> \\u116B;"
484 "'(OE)' <> \\u116C;"
485 "'(YO)' <> \\u116D;"
486 "'(U)' <> \\u116E;"
487 "'(WEO)' <> \\u116F;"
488 "'(WE)' <> \\u1170;"
489 "'(WI)' <> \\u1171;"
490 "'(YU)' <> \\u1172;"
491 "'(EU)' <> \\u1173;"
492 "'(YI)' <> \\u1174;"
493 "'(I)' <> \\u1175;"
494
495 "'(Gf)' <> \\u11A8;"
496 "'(GGf)' <> \\u11A9;"
497 "'(GS)' <> \\u11AA;"
498 "'(Nf)' <> \\u11AB;"
499 "'(NJ)' <> \\u11AC;"
500 "'(NH)' <> \\u11AD;"
501 "'(Df)' <> \\u11AE;"
502 "'(L)' <> \\u11AF;"
503 "'(LG)' <> \\u11B0;"
504 "'(LM)' <> \\u11B1;"
505 "'(LB)' <> \\u11B2;"
506 "'(LS)' <> \\u11B3;"
507 "'(LT)' <> \\u11B4;"
508 "'(LP)' <> \\u11B5;"
509 "'(LH)' <> \\u11B6;"
510 "'(Mf)' <> \\u11B7;"
511 "'(Bf)' <> \\u11B8;"
512 "'(BS)' <> \\u11B9;"
513 "'(Sf)' <> \\u11BA;"
514 "'(SSf)' <> \\u11BB;"
515 "'(NG)' <> \\u11BC;"
516 "'(Jf)' <> \\u11BD;"
517 "'(Cf)' <> \\u11BE;"
518 "'(Kf)' <> \\u11BF;"
519 "'(Tf)' <> \\u11C0;"
520 "'(Pf)' <> \\u11C1;"
521 "'(Hf)' <> \\u11C2;";
522
523 /**
524 * Convert short names to actual jamo. E.g., "x(LG)y" returns
525 * "x\u11B0y". See JAMO_NAMES for table of names.
526 */
527 UnicodeString
528 JamoTest::nameToJamo(const UnicodeString& input) {
529 if (NAME_JAMO == 0) {
530 errln("Failed to create NAME_JAMO");
531 return input; /* failure! */
532 }
533 UnicodeString result(input);
534 NAME_JAMO->transliterate(result);
535 return result;
536 }
537
538 /**
539 * Convert jamo to short names. E.g., "x\u11B0y" returns
540 * "x(LG)y". See JAMO_NAMES for table of names.
541 */
542 UnicodeString
543 JamoTest::jamoToName(const UnicodeString& input) {
544 if (NAME_JAMO == 0) {
545 errln("Failed to create NAME_JAMO");
546 return input; /* failure! */
547 }
548 UnicodeString result(input);
549 JAMO_NAME->transliterate(result);
550 return result;
551 }
552
553 #endif /* #if !UCONFIG_NO_TRANSLITERATION */