]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /******************************************************************** |
2 | * COPYRIGHT: | |
46f4442e | 3 | * Copyright (c) 1997-2008, International Business Machines Corporation and |
b75a7d8f A |
4 | * others. All Rights Reserved. |
5 | ******************************************************************** | |
6 | ************************************************************************ | |
7 | * Date Name Description | |
8 | * 02/28/2001 aliu Creation | |
9 | * 03/01/2001 George port to HP/UX | |
10 | ************************************************************************/ | |
11 | ||
12 | #include "unicode/utypes.h" | |
13 | ||
14 | #if !UCONFIG_NO_TRANSLITERATION | |
15 | ||
16 | #include "jamotest.h" | |
17 | #include "unicode/utypes.h" | |
18 | #include "unicode/translit.h" | |
b75a7d8f A |
19 | #include "cpdtrans.h" |
20 | ||
21 | // SEP is the disambiguation separator used by Latin-Jamo and Jamo-Latin | |
46f4442e | 22 | #define SEP "-" |
b75a7d8f A |
23 | |
24 | JamoTest::JamoTest() | |
25 | { | |
46f4442e | 26 | UParseError parseError; |
b75a7d8f | 27 | UErrorCode status = U_ZERO_ERROR; |
46f4442e A |
28 | NAME_JAMO = Transliterator::createFromRules("Name-Jamo", |
29 | UnicodeString(JAMO_NAMES_RULES, -1, US_INV), | |
30 | UTRANS_FORWARD, parseError, status); | |
b75a7d8f A |
31 | |
32 | if (U_FAILURE(status)) { | |
33 | delete NAME_JAMO; | |
34 | NAME_JAMO = NULL; | |
35 | } | |
36 | status = U_ZERO_ERROR; | |
46f4442e A |
37 | JAMO_NAME = Transliterator::createFromRules("Jamo-Name", |
38 | UnicodeString(JAMO_NAMES_RULES, -1, US_INV), | |
39 | UTRANS_REVERSE, parseError, status); | |
b75a7d8f A |
40 | if (U_FAILURE(status)) { |
41 | delete JAMO_NAME; | |
42 | JAMO_NAME = NULL; | |
43 | } | |
44 | } | |
45 | ||
46 | JamoTest::~JamoTest() | |
47 | { | |
48 | delete NAME_JAMO; | |
49 | delete JAMO_NAME; | |
50 | } | |
51 | ||
52 | void | |
53 | JamoTest::runIndexedTest(int32_t index, UBool exec, | |
54 | const char* &name, char* /*par*/) { | |
55 | switch (index) { | |
56 | TESTCASE(0,TestJamo); | |
57 | TESTCASE(1,TestRealText); | |
58 | TESTCASE(2,TestPiecemeal); | |
59 | default: name = ""; break; | |
60 | } | |
61 | } | |
62 | ||
63 | void | |
64 | JamoTest::TestJamo() { | |
65 | UParseError parseError; | |
66 | UErrorCode status = U_ZERO_ERROR; | |
67 | Transliterator* latinJamo = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, parseError, status); | |
68 | ||
69 | if (latinJamo == 0 || U_FAILURE(status)) { | |
70 | errln("FAIL: createInstance() returned 0"); | |
71 | return; | |
72 | } | |
73 | ||
74 | Transliterator* jamoLatin = latinJamo->createInverse(status); | |
75 | ||
76 | if (jamoLatin == 0) { | |
77 | delete latinJamo; | |
78 | errln("FAIL: createInverse() returned 0"); | |
79 | return; | |
80 | } | |
81 | ||
82 | static const char* CASE[] = { | |
83 | // Column 1 is the latin text L1 to be fed to Latin-Jamo | |
84 | // to yield output J. | |
85 | ||
86 | // Column 2 is expected value of J. J is fed to | |
87 | // Jamo-Latin to yield output L2. | |
88 | ||
89 | // Column 3 is expected value of L2. If the expected | |
90 | // value of L2 is L1, then L2 is NULL. | |
46f4442e A |
91 | |
92 | // add tests for the update to fix problems where it didn't follow the standard | |
93 | // see also http://www.unicode.org/cldr/data/charts/transforms/Latin-Hangul.html | |
94 | "gach", "(Gi)(A)(Cf)", NULL, | |
95 | "geumhui", "(Gi)(EU)(Mf)(Hi)(YI)", NULL, | |
96 | "choe", "(Ci)(OE)", NULL, | |
97 | "wo", "(IEUNG)(WEO)", NULL, | |
98 | "Wonpil", "(IEUNG)(WEO)(Nf)(Pi)(I)(L)", "wonpil", | |
99 | "GIPPEUM", "(Gi)(I)(BB)(EU)(Mf)", "gippeum", | |
100 | "EUTTEUM", "(IEUNG)(EU)(DD)(EU)(Mf)", "eutteum", | |
101 | "KKOTNAE", "(GGi)(O)(Tf)(Ni)(AE)", "kkotnae", | |
102 | "gaga", "(Gi)(A)(Gi)(A)", NULL, | |
103 | "gag-a", "(Gi)(A)(Gf)(IEUNG)(A)", NULL, | |
104 | "gak-ka", "(Gi)(A)(Kf)(Ki)(A)", NULL, | |
105 | "gakka", "(Gi)(A)(GGi)(A)", NULL, | |
106 | "gakk-a", "(Gi)(A)(GGf)(IEUNG)(A)", NULL, | |
107 | "gakkka", "(Gi)(A)(GGf)(Ki)(A)", NULL, | |
108 | "gak-kka", "(Gi)(A)(Kf)(GGi)(A)", NULL, | |
109 | ||
b75a7d8f | 110 | "bab", "(Bi)(A)(Bf)", NULL, |
46f4442e A |
111 | "babb", "(Bi)(A)(Bf)(Bi)(EU)", "babbeu", |
112 | "babbba", "(Bi)(A)(Bf)(Bi)(EU)(Bi)(A)", "babbeuba", | |
113 | "bagg", "(Bi)(A)(Gf)(Gi)(EU)", "baggeu", | |
114 | "baggga", "(Bi)(A)(Gf)(Gi)(EU)(Gi)(A)", "baggeuga", | |
115 | //"bag" SEP "gga", "(Bi)(A)(Gf)" SEP "(Gi)(EU)(Gi)(A)", "bag" SEP "geuga", | |
b75a7d8f A |
116 | "kabsa", "(Ki)(A)(Bf)(Si)(A)", NULL, |
117 | "kabska", "(Ki)(A)(BS)(Ki)(A)", NULL, | |
118 | "gabsbka", "(Gi)(A)(BS)(Bi)(EU)(Ki)(A)", "gabsbeuka", // not (Kf) | |
46f4442e | 119 | "gga", "(Gi)(EU)(Gi)(A)", "geuga", |
b75a7d8f | 120 | "bsa", "(Bi)(EU)(Si)(A)", "beusa", |
46f4442e A |
121 | "agg", "(IEUNG)(A)(Gf)(Gi)(EU)", "aggeu", |
122 | "agga", "(IEUNG)(A)(Gf)(Gi)(A)", NULL, | |
123 | "la", "(R)(A)", NULL, | |
b75a7d8f | 124 | "bs", "(Bi)(EU)(Sf)", "beus", |
46f4442e | 125 | "kalgga", "(Ki)(A)(L)(Gi)(EU)(Gi)(A)", "kalgeuga", |
b75a7d8f A |
126 | |
127 | // 'r' in a final position is treated like 'l' | |
128 | "karka", "(Ki)(A)(L)(Ki)(A)", "kalka", | |
129 | }; | |
130 | ||
131 | enum { CASE_length = sizeof(CASE) / sizeof(CASE[0]) }; | |
132 | ||
133 | int32_t i; | |
134 | for (i=0; i<CASE_length; i+=3) { | |
135 | UnicodeString jamo = nameToJamo(CASE[i+1]); | |
136 | if (CASE[i+2] == NULL) { | |
137 | expect(*latinJamo, CASE[i], jamo, *jamoLatin); | |
138 | } else { | |
139 | // Handle case where round-trip is expected to fail | |
140 | expect(*latinJamo, CASE[i], jamo); | |
141 | expect(*jamoLatin, jamo, CASE[i+2]); | |
142 | } | |
143 | } | |
144 | ||
145 | delete latinJamo; | |
146 | delete jamoLatin; | |
147 | } | |
148 | ||
149 | /** | |
150 | * Test various step-at-a-time transformation of hangul to jamo to | |
151 | * latin and back. | |
152 | */ | |
153 | void JamoTest::TestPiecemeal(void) { | |
154 | UnicodeString hangul; hangul.append((UChar)0xBC0F); | |
155 | UnicodeString jamo = nameToJamo("(Mi)(I)(Cf)"); | |
156 | UnicodeString latin("mic"); | |
46f4442e | 157 | UnicodeString latin2("mich"); |
b75a7d8f A |
158 | |
159 | Transliterator *t = NULL; | |
160 | UErrorCode status = U_ZERO_ERROR; | |
161 | ||
162 | t = Transliterator::createInstance("NFD", UTRANS_FORWARD, status); // was Hangul-Jamo | |
163 | if (U_FAILURE(status) || t == 0) { | |
164 | errln("FAIL: createInstance failed"); | |
165 | return; | |
166 | } | |
167 | expect(*t, hangul, jamo); | |
168 | delete t; | |
169 | ||
170 | t = Transliterator::createInstance("NFC", UTRANS_FORWARD, status); // was Jamo-Hangul | |
171 | if (U_FAILURE(status) || t == 0) { | |
172 | errln("FAIL: createInstance failed"); | |
173 | return; | |
174 | } | |
175 | expect(*t, jamo, hangul); | |
176 | delete t; | |
177 | ||
178 | t = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, status); | |
179 | if (U_FAILURE(status) || t == 0) { | |
180 | errln("FAIL: createInstance failed"); | |
181 | return; | |
182 | } | |
183 | expect(*t, latin, jamo); | |
184 | delete t; | |
185 | ||
186 | t = Transliterator::createInstance("Jamo-Latin", UTRANS_FORWARD, status); | |
187 | if (U_FAILURE(status) || t == 0) { | |
188 | errln("FAIL: createInstance failed"); | |
189 | return; | |
190 | } | |
46f4442e | 191 | expect(*t, jamo, latin2); |
b75a7d8f A |
192 | delete t; |
193 | ||
194 | t = Transliterator::createInstance("Hangul-Latin", UTRANS_FORWARD, status); | |
195 | if (U_FAILURE(status) || t == 0) { | |
196 | errln("FAIL: createInstance failed"); | |
197 | return; | |
198 | } | |
46f4442e | 199 | expect(*t, hangul, latin2); |
b75a7d8f A |
200 | delete t; |
201 | ||
202 | t = Transliterator::createInstance("Latin-Hangul", UTRANS_FORWARD, status); | |
203 | if (U_FAILURE(status) || t == 0) { | |
204 | errln("FAIL: createInstance failed"); | |
205 | return; | |
206 | } | |
207 | expect(*t, latin, hangul); | |
208 | delete t; | |
209 | ||
210 | t = Transliterator::createInstance("Hangul-Latin; Latin-Jamo", UTRANS_FORWARD, status); | |
211 | if (U_FAILURE(status) || t == 0) { | |
212 | errln("FAIL: createInstance failed"); | |
213 | return; | |
214 | } | |
215 | expect(*t, hangul, jamo); | |
216 | delete t; | |
217 | ||
218 | t = Transliterator::createInstance("Jamo-Latin; Latin-Hangul", UTRANS_FORWARD, status); | |
219 | if (U_FAILURE(status) || t == 0) { | |
220 | errln("FAIL: createInstance failed"); | |
221 | return; | |
222 | } | |
223 | expect(*t, jamo, hangul); | |
224 | delete t; | |
225 | ||
226 | t = Transliterator::createInstance("Hangul-Latin; Latin-Hangul", UTRANS_FORWARD, status); | |
227 | if (U_FAILURE(status) || t == 0) { | |
228 | errln("FAIL: createInstance failed"); | |
229 | return; | |
230 | } | |
231 | expect(*t, hangul, hangul); | |
232 | delete t; | |
233 | } | |
234 | ||
235 | void | |
236 | JamoTest::TestRealText() { | |
237 | // Test text taken from the Unicode web site | |
46f4442e | 238 | static const char* const WHAT_IS_UNICODE[] = { |
b75a7d8f A |
239 | "\\uc720\\ub2c8\\ucf54\\ub4dc\\uc5d0", "\\ub300\\ud574", "?", |
240 | ||
241 | "\\uc5b4\\ub5a4", "\\ud50c\\ub7ab\\ud3fc,", "\\uc5b4\\ub5a4", | |
242 | "\\ud504\\ub85c\\uadf8\\ub7a8,", "\\uc5b4\\ub5a4", "\\uc5b8\\uc5b4\\uc5d0\\ub3c4", | |
243 | "\\uc0c1\\uad00\\uc5c6\\uc774", "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub294", "\\ubaa8\\ub4e0", | |
244 | "\\ubb38\\uc790\\uc5d0", "\\ub300\\ud574", "\\uace0\\uc720", "\\ubc88\\ud638\\ub97c", | |
245 | "\\uc81c\\uacf5\\ud569\\ub2c8\\ub2e4.", | |
246 | ||
247 | "\\uae30\\ubcf8\\uc801\\uc73c\\ub85c", "\\ucef4\\ud4e8\\ud130\\ub294", | |
248 | "\\uc22b\\uc790\\ub9cc", "\\ucc98\\ub9ac\\ud569\\ub2c8\\ub2e4.", "\\uae00\\uc790\\ub098", | |
249 | "\\ub2e4\\ub978", "\\ubb38\\uc790\\uc5d0\\ub3c4", "\\uc22b\\uc790\\ub97c", | |
250 | "\\uc9c0\\uc815\\ud558\\uc5ec", | |
251 | "\\uc800\\uc7a5\\ud569\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc\\uac00", | |
252 | "\\uac1c\\ubc1c\\ub418\\uae30", "\\uc804\\uc5d0\\ub294", "\\uc774\\ub7ec\\ud55c", | |
253 | "\\uc22b\\uc790\\ub97c", "\\uc9c0\\uc815\\ud558\\uae30", "\\uc704\\ud574", "\\uc218\\ubc31", | |
254 | "\\uac00\\uc9c0\\uc758", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", | |
255 | "\\uc2dc\\uc2a4\\ud15c\\uc744", | |
256 | "\\uc0ac\\uc6a9\\ud588\\uc2b5\\ub2c8\\ub2e4.", "\\ub2e8\\uc77c", "\\uae30\\ud638\\ud654", | |
257 | "\\ubc29\\ubc95\\uc73c\\ub85c\\ub294", "\\ubaa8\\ub4e0", "\\ubb38\\uc790\\ub97c", | |
258 | "\\ud3ec\\ud568\\ud560", "\\uc218", "\\uc5c6\\uc5c8\\uc2b5\\ub2c8\\ub2e4.", "\\uc608\\ub97c", | |
259 | "\\ub4e4\\uc5b4", "\\uc720\\ub7fd", "\\uc5f0\\ud569\\uc5d0\\uc11c\\ub9cc", | |
260 | "\\ubcf4\\ub354\\ub77c\\ub3c4", "\\ubaa8\\ub4e0", "\\uac01", "\\ub098\\ub77c\\ubcc4", | |
261 | "\\uc5b8\\uc5b4\\ub97c", "\\ucc98\\ub9ac\\ud558\\ub824\\uba74", "\\uc5ec\\ub7ec", | |
262 | "\\uac1c\\uc758", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc774", | |
263 | "\\ud544\\uc694\\ud569\\ub2c8\\ub2e4.", "\\uc601\\uc5b4\\uc640", "\\uac19\\uc740", | |
264 | "\\ub2e8\\uc77c", "\\uc5b8\\uc5b4\\uc758", "\\uacbd\\uc6b0\\ub3c4", | |
265 | "\\uacf5\\ud1b5\\uc801\\uc73c\\ub85c", "\\uc0ac\\uc6a9\\ub418\\ub294", "\\ubaa8\\ub4e0", | |
266 | "\\uae00\\uc790,", "\\ubb38\\uc7a5", "\\ubd80\\ud638", "\\ubc0f", | |
267 | "\\ud14c\\ud06c\\ub2c8\\uceec", "\\uae30\\ud638\\uc5d0", "\\ub9de\\ub294", "\\ub2e8\\uc77c", | |
268 | "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc744", "\\uac16\\uace0", "\\uc788\\uc9c0", | |
269 | "\\ubabb\\ud558\\uc600\\uc2b5\\ub2c8\\ub2e4.", | |
270 | ||
271 | "\\uc774\\ub7ec\\ud55c", "\\uae30\\ud638\\ud654", "\\uc2dc\\uc2a4\\ud15c\\uc740", | |
272 | "\\ub610\\ud55c", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", "\\uc2dc\\uc2a4\\ud15c\\uacfc", | |
273 | "\\ucda9\\ub3cc\\ud569\\ub2c8\\ub2e4.", "\\uc989", "\\ub450", "\\uac00\\uc9c0", | |
274 | "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc774", "\\ub450", "\\uac1c\\uc758", "\\ub2e4\\ub978", | |
275 | "\\ubb38\\uc790\\uc5d0", "\\ub300\\ud574", "\\uac19\\uc740", "\\ubc88\\ud638\\ub97c", | |
276 | "\\uc0ac\\uc6a9\\ud558\\uac70\\ub098", "\\uac19\\uc740", "\\ubb38\\uc790\\uc5d0", | |
277 | "\\ub300\\ud574", "\\ub2e4\\ub978", "\\ubc88\\ud638\\ub97c", "\\uc0ac\\uc6a9\\ud560", "\\uc218", | |
278 | "\\uc788\\uc2b5\\ub2c8\\ub2e4.", "\\uc8fc\\uc5b4\\uc9c4", "\\ubaa8\\ub4e0", | |
279 | "\\ucef4\\ud4e8\\ud130(\\ud2b9\\ud788", "\\uc11c\\ubc84)\\ub294", "\\uc11c\\ub85c", | |
280 | "\\ub2e4\\ub978", "\\uc5ec\\ub7ec", "\\uac00\\uc9c0", "\\uae30\\ud638\\ud654", | |
281 | "\\ubc29\\ubc95\\uc744", "\\uc9c0\\uc6d0\\ud574\\uc57c", | |
282 | "\\ud569\\ub2c8\\ub2e4.", "\\uadf8\\ub7ec\\ub098,", "\\ub370\\uc774\\ud130\\ub97c", | |
283 | "\\uc11c\\ub85c", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc774\\ub098", | |
284 | "\\ud50c\\ub7ab\\ud3fc", "\\uac04\\uc5d0", "\\uc804\\ub2ec\\ud560", "\\ub54c\\ub9c8\\ub2e4", | |
285 | "\\uadf8", "\\ub370\\uc774\\ud130\\ub294", "\\ud56d\\uc0c1", "\\uc190\\uc0c1\\uc758", | |
286 | "\\uc704\\ud5d8\\uc744", "\\uacaa\\uac8c", "\\ub429\\ub2c8\\ub2e4.", | |
287 | ||
288 | "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub85c", "\\ubaa8\\ub4e0", "\\uac83\\uc744", | |
289 | "\\ud574\\uacb0\\ud560", "\\uc218", "\\uc788\\uc2b5\\ub2c8\\ub2e4!", | |
290 | "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub294", "\\uc0ac\\uc6a9", "\\uc911\\uc778", | |
291 | "\\ud50c\\ub7ab\\ud3fc,", "\\ud504\\ub85c\\uadf8\\ub7a8,", "\\uc5b8\\uc5b4\\uc5d0", | |
292 | "\\uad00\\uacc4\\uc5c6\\uc774", "\\ubb38\\uc790\\ub9c8\\ub2e4", "\\uace0\\uc720\\ud55c", | |
293 | "\\uc22b\\uc790\\ub97c", | |
294 | "\\uc81c\\uacf5\\ud569\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc", | |
295 | "\\ud45c\\uc900\\uc740", // "Apple, HP, IBM, JustSystem, Microsoft, Oracle, SAP, " | |
296 | // "Sun, Sybase, Unisys " | |
297 | "\\ubc0f", "\\uae30\\ud0c0", "\\uc5ec\\ub7ec", | |
298 | "\\ud68c\\uc0ac\\uc640", "\\uac19\\uc740", "\\uc5c5\\uacc4", | |
299 | "\\uc120\\ub450\\uc8fc\\uc790\\uc5d0", "\\uc758\\ud574", | |
300 | "\\ucc44\\ud0dd\\ub418\\uc5c8\\uc2b5\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub294", | |
301 | // "XML, Java, ECMAScript(JavaScript), LDAP, CORBA 3.0, WML " | |
302 | "\\ub4f1\\uacfc", | |
303 | "\\uac19\\uc774", "\\ud604\\uc7ac", "\\ub110\\ub9ac", "\\uc0ac\\uc6a9\\ub418\\ub294", | |
304 | "\\ud45c\\uc900\\uc5d0\\uc11c", "\\ud544\\uc694\\ud558\\uba70", "\\uc774\\ub294", // "ISO/IEC", | |
305 | "10646\\uc744", "\\uad6c\\ud604\\ud558\\ub294", "\\uacf5\\uc2dd\\uc801\\uc778", | |
306 | "\\ubc29\\ubc95\\uc785\\ub2c8\\ub2e4.", "\\uc774\\ub294", "\\ub9ce\\uc740", "\\uc6b4\\uc601", | |
307 | "\\uccb4\\uc81c,", "\\uc694\\uc998", "\\uc0ac\\uc6a9\\ub418\\ub294", "\\ubaa8\\ub4e0", | |
308 | "\\ube0c\\ub77c\\uc6b0\\uc800", "\\ubc0f", "\\uae30\\ud0c0", "\\ub9ce\\uc740", | |
309 | "\\uc81c\\ud488\\uc5d0\\uc11c", | |
310 | "\\uc9c0\\uc6d0\\ub429\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc", | |
311 | "\\ud45c\\uc900\\uc758", "\\ubd80\\uc0c1\\uacfc", "\\uc774\\ub97c", | |
312 | "\\uc9c0\\uc6d0\\ud558\\ub294", "\\ub3c4\\uad6c\\uc758", "\\uac00\\uc6a9\\uc131\\uc740", | |
313 | "\\ucd5c\\uadfc", "\\uc804", "\\uc138\\uacc4\\uc5d0", "\\ubd88\\uace0", "\\uc788\\ub294", | |
314 | "\\uae30\\uc220", "\\uacbd\\ud5a5\\uc5d0\\uc11c", "\\uac00\\uc7a5", "\\uc911\\uc694\\ud55c", | |
315 | "\\ubd80\\ubd84\\uc744", "\\ucc28\\uc9c0\\ud558\\uace0", "\\uc788\\uc2b5\\ub2c8\\ub2e4.", | |
316 | ||
317 | "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub97c", | |
46f4442e A |
318 | // Replaced a hyphen with a space to make the test case work with CLDR1.5 |
319 | //"\\ud074\\ub77c\\uc774\\uc5b8\\ud2b8-\\uc11c\\ubc84", "\\ub610\\ub294", | |
320 | "\\ud074\\ub77c\\uc774\\uc5b8\\ud2b8 \\uc11c\\ubc84", "\\ub610\\ub294", | |
321 | // Replaced a hyphen with a space. | |
322 | //"\\ub2e4\\uc911-\\uc5f0\\uacb0", "\\uc751\\uc6a9", "\\ud504\\ub85c\\uadf8\\ub7a8\\uacfc", | |
323 | "\\ub2e4\\uc911 \\uc5f0\\uacb0", "\\uc751\\uc6a9", "\\ud504\\ub85c\\uadf8\\ub7a8\\uacfc", | |
b75a7d8f A |
324 | "\\uc6f9", "\\uc0ac\\uc774\\ud2b8\\uc5d0", "\\ud1b5\\ud569\\ud558\\uba74", |
325 | "\\ub808\\uac70\\uc2dc", "\\ubb38\\uc790", "\\uc138\\ud2b8", "\\uc0ac\\uc6a9\\uc5d0", | |
326 | "\\uc788\\uc5b4\\uc11c", "\\uc0c1\\ub2f9\\ud55c", "\\ube44\\uc6a9", "\\uc808\\uac10", | |
327 | "\\ud6a8\\uacfc\\uac00", | |
328 | "\\ub098\\ud0c0\\ub0a9\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub97c", | |
329 | "\\ud1b5\\ud574", "\\ub9ac\\uc5d4\\uc9c0\\ub2c8\\uc5b4\\ub9c1", "\\uc5c6\\uc774", | |
330 | "\\ub2e4\\uc911", "\\ud50c\\ub7ab\\ud3fc,", "\\uc5b8\\uc5b4", "\\ubc0f", "\\uad6d\\uac00", | |
331 | "\\uac04\\uc5d0", "\\ub2e8\\uc77c", "\\uc18c\\ud504\\ud2b8\\uc6e8\\uc5b4", | |
332 | "\\ud50c\\ub7ab\\ud3fc", "\\ub610\\ub294", "\\ub2e8\\uc77c", "\\uc6f9", | |
333 | "\\uc0ac\\uc774\\ud2b8\\ub97c", "\\ubaa9\\ud45c\\ub85c", "\\uc0bc\\uc744", "\\uc218", | |
334 | "\\uc788\\uc2b5\\ub2c8\\ub2e4.", "\\uc774\\ub97c", "\\uc0ac\\uc6a9\\ud558\\uba74", | |
335 | "\\ub370\\uc774\\ud130\\ub97c", "\\uc190\\uc0c1", "\\uc5c6\\uc774", "\\uc5ec\\ub7ec", | |
336 | "\\uc2dc\\uc2a4\\ud15c\\uc744", "\\ud1b5\\ud574", "\\uc804\\uc1a1\\ud560", "\\uc218", | |
337 | "\\uc788\\uc2b5\\ub2c8\\ub2e4.", | |
338 | ||
339 | "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc5d0", "\\ub300\\ud574", | |
340 | "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc740", | |
341 | "\\ube44\\uc601\\ub9ac", "\\uc870\\uc9c1\\uc73c\\ub85c\\uc11c", "\\ud604\\ub300", | |
342 | "\\uc18c\\ud504\\ud2b8\\uc6e8\\uc5b4", "\\uc81c\\ud488\\uacfc", | |
343 | "\\ud45c\\uc900\\uc5d0\\uc11c", "\\ud14d\\uc2a4\\ud2b8\\uc758", "\\ud45c\\ud604\\uc744", | |
344 | "\\uc9c0\\uc815\\ud558\\ub294", "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ud45c\\uc900\\uc758", | |
345 | "\\uc0ac\\uc6a9\\uc744", "\\uac1c\\ubc1c\\ud558\\uace0", "\\ud655\\uc7a5\\ud558\\uba70", | |
346 | "\\uc7a5\\ub824\\ud558\\uae30", "\\uc704\\ud574", | |
347 | "\\uc138\\uc6cc\\uc84c\\uc2b5\\ub2c8\\ub2e4.", "\\ucf58\\uc18c\\uc2dc\\uc5c4", | |
348 | "\\uba64\\ubc84\\uc27d\\uc740", "\\ucef4\\ud4e8\\ud130\\uc640", "\\uc815\\ubcf4", | |
349 | "\\ucc98\\ub9ac", "\\uc0b0\\uc5c5\\uc5d0", "\\uc885\\uc0ac\\ud558\\uace0", "\\uc788\\ub294", | |
350 | "\\uad11\\ubc94\\uc704\\ud55c", "\\ud68c\\uc0ac", "\\ubc0f", "\\uc870\\uc9c1\\uc758", | |
351 | "\\ubc94\\uc704\\ub97c", | |
352 | "\\ub098\\ud0c0\\ub0c5\\ub2c8\\ub2e4.", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc758", | |
353 | "\\uc7ac\\uc815\\uc740", "\\uc804\\uc801\\uc73c\\ub85c", "\\ud68c\\ube44\\uc5d0", | |
354 | "\\uc758\\ud574", "\\ucda9\\ub2f9\\ub429\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc", | |
355 | "\\ucee8\\uc18c\\uc2dc\\uc5c4\\uc5d0\\uc11c\\uc758", "\\uba64\\ubc84\\uc27d\\uc740", | |
356 | "\\uc804", "\\uc138\\uacc4", "\\uc5b4\\ub290", "\\uacf3\\uc5d0\\uc11c\\ub098", | |
357 | "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ud45c\\uc900\\uc744", "\\uc9c0\\uc6d0\\ud558\\uace0", | |
358 | "\\uadf8", "\\ud655\\uc7a5\\uacfc", "\\uad6c\\ud604\\uc744", | |
359 | "\\uc9c0\\uc6d0\\ud558\\uace0\\uc790\\ud558\\ub294", "\\uc870\\uc9c1\\uacfc", | |
360 | "\\uac1c\\uc778\\uc5d0\\uac8c", "\\uac1c\\ubc29\\ub418\\uc5b4", | |
361 | "\\uc788\\uc2b5\\ub2c8\\ub2e4.", | |
362 | ||
363 | "\\ub354", "\\uc790\\uc138\\ud55c", "\\ub0b4\\uc6a9\\uc740", "\\uc6a9\\uc5b4\\uc9d1,", | |
364 | "\\uc608\\uc81c", "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\uc0ac\\uc6a9", "\\uac00\\ub2a5", | |
365 | "\\uc81c\\ud488,", "\\uae30\\uc220", "\\uc815\\ubcf4", "\\ubc0f", "\\uae30\\ud0c0", | |
366 | "\\uc720\\uc6a9\\ud55c", "\\uc815\\ubcf4\\ub97c", | |
367 | "\\ucc38\\uc870\\ud558\\uc2ed\\uc2dc\\uc624." | |
368 | }; | |
369 | ||
370 | enum { WHAT_IS_UNICODE_length = sizeof(WHAT_IS_UNICODE) / sizeof(WHAT_IS_UNICODE[0]) }; | |
371 | ||
372 | UParseError parseError; | |
373 | UErrorCode status = U_ZERO_ERROR; | |
374 | Transliterator* latinJamo = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, parseError, status); | |
375 | Transliterator* jamoHangul = Transliterator::createInstance("NFC(NFD)", UTRANS_FORWARD, parseError, status); | |
376 | if (latinJamo == 0 || jamoHangul == 0 || U_FAILURE(status)) { | |
377 | delete latinJamo; | |
378 | delete jamoHangul; | |
379 | errln("FAIL: createInstance returned NULL"); | |
380 | return; | |
381 | } | |
382 | Transliterator* jamoLatin = latinJamo->createInverse(status); | |
383 | Transliterator* hangulJamo = jamoHangul->createInverse(status); | |
384 | if (jamoLatin == 0 || hangulJamo == 0) { | |
385 | errln("FAIL: createInverse returned NULL"); | |
386 | delete latinJamo; | |
387 | delete jamoLatin; | |
388 | delete jamoHangul; | |
389 | delete hangulJamo; | |
390 | return; | |
391 | } | |
392 | ||
393 | Transliterator* tarray[4] = | |
394 | { hangulJamo, jamoLatin, latinJamo, jamoHangul }; | |
395 | CompoundTransliterator rt(tarray, 4); | |
396 | ||
397 | UnicodeString buf; | |
398 | int32_t total = 0; | |
399 | int32_t errors = 0; | |
400 | int32_t i; | |
401 | for (i=0; i < WHAT_IS_UNICODE_length; ++i) { | |
402 | ++total; | |
46f4442e | 403 | UnicodeString hangul = UnicodeString(WHAT_IS_UNICODE[i], -1, US_INV); |
b75a7d8f A |
404 | hangul = hangul.unescape(); // Parse backslash-u escapes |
405 | UnicodeString hangulX = hangul; | |
406 | rt.transliterate(hangulX); | |
407 | if (hangul != hangulX) { | |
408 | ++errors; | |
409 | UnicodeString jamo = hangul; hangulJamo->transliterate(jamo); | |
410 | UnicodeString latin = jamo; jamoLatin->transliterate(latin); | |
411 | UnicodeString jamo2 = latin; latinJamo->transliterate(jamo2); | |
412 | UnicodeString hangul2 = jamo2; jamoHangul->transliterate(hangul2); | |
413 | ||
414 | buf.remove(0); | |
415 | buf.append("FAIL: "); | |
416 | if (hangul2 != hangulX) { | |
417 | buf.append((UnicodeString)"(Weird: " + hangulX + " != " + hangul2 + ")"); | |
418 | } | |
419 | // The Hangul-Jamo conversion is not usually the | |
420 | // bug here, so we hide it from display. | |
421 | // Uncomment lines to see the Hangul. | |
422 | buf.append(//hangul + " => " + | |
423 | jamoToName(jamo) + " => " + | |
424 | latin + " => " + jamoToName(jamo2) | |
425 | //+ " => " + hangul2 | |
426 | ); | |
427 | errln(prettify(buf)); | |
428 | } | |
429 | } | |
430 | if (errors != 0) { | |
431 | errln((UnicodeString)"Test word failures: " + errors + " out of " + total); | |
432 | } else { | |
433 | logln((UnicodeString)"All " + total + " test words passed"); | |
434 | } | |
435 | ||
436 | delete latinJamo; | |
437 | delete jamoLatin; | |
438 | delete jamoHangul; | |
439 | delete hangulJamo; | |
440 | } | |
441 | ||
442 | // Override TransliteratorTest | |
443 | void | |
444 | JamoTest::expectAux(const UnicodeString& tag, | |
445 | const UnicodeString& summary, UBool pass, | |
446 | const UnicodeString& expectedResult) { | |
447 | UnicodeString jsum = jamoToName(summary); | |
448 | UnicodeString jexp = jamoToName(expectedResult); | |
449 | TransliteratorTest::expectAux(tag, jsum, pass, jexp); | |
450 | } | |
451 | ||
452 | const char* JamoTest::JAMO_NAMES_RULES = | |
453 | "'(Gi)' <> \\u1100;" | |
454 | "'(GGi)' <> \\u1101;" | |
455 | "'(Ni)' <> \\u1102;" | |
456 | "'(Di)' <> \\u1103;" | |
457 | "'(DD)' <> \\u1104;" | |
458 | "'(R)' <> \\u1105;" | |
459 | "'(Mi)' <> \\u1106;" | |
460 | "'(Bi)' <> \\u1107;" | |
461 | "'(BB)' <> \\u1108;" | |
462 | "'(Si)' <> \\u1109;" | |
463 | "'(SSi)' <> \\u110A;" | |
464 | "'(IEUNG)' <> \\u110B;" | |
465 | "'(Ji)' <> \\u110C;" | |
466 | "'(JJ)' <> \\u110D;" | |
467 | "'(Ci)' <> \\u110E;" | |
468 | "'(Ki)' <> \\u110F;" | |
469 | "'(Ti)' <> \\u1110;" | |
470 | "'(Pi)' <> \\u1111;" | |
471 | "'(Hi)' <> \\u1112;" | |
472 | ||
473 | "'(A)' <> \\u1161;" | |
474 | "'(AE)' <> \\u1162;" | |
475 | "'(YA)' <> \\u1163;" | |
476 | "'(YAE)' <> \\u1164;" | |
477 | "'(EO)' <> \\u1165;" | |
478 | "'(E)' <> \\u1166;" | |
479 | "'(YEO)' <> \\u1167;" | |
480 | "'(YE)' <> \\u1168;" | |
481 | "'(O)' <> \\u1169;" | |
482 | "'(WA)' <> \\u116A;" | |
483 | "'(WAE)' <> \\u116B;" | |
484 | "'(OE)' <> \\u116C;" | |
485 | "'(YO)' <> \\u116D;" | |
486 | "'(U)' <> \\u116E;" | |
487 | "'(WEO)' <> \\u116F;" | |
488 | "'(WE)' <> \\u1170;" | |
489 | "'(WI)' <> \\u1171;" | |
490 | "'(YU)' <> \\u1172;" | |
491 | "'(EU)' <> \\u1173;" | |
492 | "'(YI)' <> \\u1174;" | |
493 | "'(I)' <> \\u1175;" | |
494 | ||
495 | "'(Gf)' <> \\u11A8;" | |
496 | "'(GGf)' <> \\u11A9;" | |
497 | "'(GS)' <> \\u11AA;" | |
498 | "'(Nf)' <> \\u11AB;" | |
499 | "'(NJ)' <> \\u11AC;" | |
500 | "'(NH)' <> \\u11AD;" | |
501 | "'(Df)' <> \\u11AE;" | |
502 | "'(L)' <> \\u11AF;" | |
503 | "'(LG)' <> \\u11B0;" | |
504 | "'(LM)' <> \\u11B1;" | |
505 | "'(LB)' <> \\u11B2;" | |
506 | "'(LS)' <> \\u11B3;" | |
507 | "'(LT)' <> \\u11B4;" | |
508 | "'(LP)' <> \\u11B5;" | |
509 | "'(LH)' <> \\u11B6;" | |
510 | "'(Mf)' <> \\u11B7;" | |
511 | "'(Bf)' <> \\u11B8;" | |
512 | "'(BS)' <> \\u11B9;" | |
513 | "'(Sf)' <> \\u11BA;" | |
514 | "'(SSf)' <> \\u11BB;" | |
515 | "'(NG)' <> \\u11BC;" | |
516 | "'(Jf)' <> \\u11BD;" | |
517 | "'(Cf)' <> \\u11BE;" | |
518 | "'(Kf)' <> \\u11BF;" | |
519 | "'(Tf)' <> \\u11C0;" | |
520 | "'(Pf)' <> \\u11C1;" | |
521 | "'(Hf)' <> \\u11C2;"; | |
522 | ||
523 | /** | |
524 | * Convert short names to actual jamo. E.g., "x(LG)y" returns | |
525 | * "x\u11B0y". See JAMO_NAMES for table of names. | |
526 | */ | |
527 | UnicodeString | |
528 | JamoTest::nameToJamo(const UnicodeString& input) { | |
529 | if (NAME_JAMO == 0) { | |
530 | errln("Failed to create NAME_JAMO"); | |
531 | return input; /* failure! */ | |
532 | } | |
533 | UnicodeString result(input); | |
534 | NAME_JAMO->transliterate(result); | |
535 | return result; | |
536 | } | |
537 | ||
538 | /** | |
539 | * Convert jamo to short names. E.g., "x\u11B0y" returns | |
540 | * "x(LG)y". See JAMO_NAMES for table of names. | |
541 | */ | |
542 | UnicodeString | |
543 | JamoTest::jamoToName(const UnicodeString& input) { | |
544 | if (NAME_JAMO == 0) { | |
545 | errln("Failed to create NAME_JAMO"); | |
546 | return input; /* failure! */ | |
547 | } | |
548 | UnicodeString result(input); | |
549 | JAMO_NAME->transliterate(result); | |
550 | return result; | |
551 | } | |
552 | ||
553 | #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |