]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
b75a7d8f A |
3 | /******************************************************************** |
4 | * COPYRIGHT: | |
b331163b | 5 | * Copyright (c) 1997-2014, International Business Machines Corporation and |
b75a7d8f A |
6 | * others. All Rights Reserved. |
7 | ********************************************************************/ | |
8 | ||
9 | #include "unicode/utypes.h" | |
10 | ||
11 | #if !UCONFIG_NO_COLLATION | |
12 | ||
13 | #include "unicode/coll.h" | |
14 | #include "unicode/tblcoll.h" | |
15 | #include "unicode/unistr.h" | |
16 | #include "unicode/sortkey.h" | |
17 | #include "g7coll.h" | |
18 | #include "sfwdchit.h" | |
b331163b | 19 | #include "cmemory.h" |
b75a7d8f | 20 | |
46f4442e | 21 | static const UChar testCases[][G7CollationTest::MAX_TOKEN_LEN] = { |
b75a7d8f A |
22 | { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/, |
23 | 0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0073 /*'s'*/, 0x0000}, /* 9 */ | |
24 | { 0x0050 /*'P'*/, 0x0061 /*'a'*/, 0x0074/*'t'*/, 0x0000}, /* 1 */ | |
25 | { 0x0070 /*'p'*/, 0x00E9, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x00E9, 0x0000}, /* 2 */ | |
26 | { 0x0070 /*'p'*/, 0x00EA, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0000}, /* 3 */ | |
27 | { 0x0070 /*'p'*/, 0x00E9, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0072 /*'r'*/, 0x0000}, /* 4 */ | |
28 | { 0x0070 /*'p'*/, 0x00EA, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0072 /*'r'*/, 0x0000}, /* 5 */ | |
29 | { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x0064 /*'d'*/, 0x0000}, /* 6 */ | |
30 | { 0x0054 /*'T'*/, 0x00F6, 0x006e /*'n'*/, 0x0065 /*'e'*/, 0x0000}, /* 7 */ | |
31 | { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x0066 /*'f'*/, 0x0075 /*'u'*/, 0x0000}, /* 8 */ | |
32 | { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/, | |
33 | 0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0000}, /* 12 */ | |
34 | { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x006e /*'n'*/, 0x0000}, /* 10 */ | |
35 | { 0x0050 /*'P'*/, 0x0041 /*'A'*/, 0x0054 /*'T'*/, 0x0000}, /* 11 */ | |
36 | { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/, | |
37 | 0x002d /*'-'*/, 0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0000}, /* 13 */ | |
38 | { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/, | |
39 | 0x002d /*'-'*/, 0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0073/*'s'*/, 0x0000}, /* 0 */ | |
40 | {0x0070 /*'p'*/, 0x0061 /*'a'*/, 0x0074 /*'t'*/, 0x0000}, /* 14 */ | |
41 | /* Additional tests */ | |
42 | { 0x0063 /*'c'*/, 0x007a /*'z'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x0000 }, /* 15 */ | |
43 | { 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0075 /*'u'*/, 0x0072 /*'r'*/, 0x006f /*'o'*/, 0x0000 }, /* 16 */ | |
44 | { 0x0063 /*'c'*/, 0x0061 /*'a'*/, 0x0074 /*'t'*/, 0x000 }, /* 17 */ | |
45 | { 0x0064 /*'d'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x006e /*'n'*/, 0x0000 }, /* 18 */ | |
46 | { 0x003f /*'?'*/, 0x0000 }, /* 19 */ | |
47 | { 0x0071 /*'q'*/, 0x0075 /*'u'*/, 0x0069 /*'i'*/, 0x0063 /*'c'*/, 0x006b /*'k'*/, 0x0000 }, /* 20 */ | |
48 | { 0x0023 /*'#'*/, 0x0000 }, /* 21 */ | |
49 | { 0x0026 /*'&'*/, 0x0000 }, /* 22 */ | |
50 | { 0x0061 /*'a'*/, 0x002d /*'-'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0076 /*'v'*/, 0x0061 /*'a'*/, | |
51 | 0x0072/*'r'*/, 0x006b/*'k'*/, 0x0000}, /* 24 */ | |
52 | { 0x0061 /*'a'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0076 /*'v'*/, 0x0061 /*'a'*/, | |
53 | 0x0072/*'r'*/, 0x006b/*'k'*/, 0x0000}, /* 23 */ | |
54 | { 0x0061 /*'a'*/, 0x0062 /*'b'*/, 0x0062 /*'b'*/, 0x006f /*'o'*/, 0x0074 /*'t'*/, 0x0000}, /* 25 */ | |
55 | { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x002d /*'-'*/, 0x0070 /*'p'*/, 0x0000}, /* 27 */ | |
56 | { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x0070 /*'p'*/, 0x0000}, /* 28 */ | |
57 | { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x006f /*'o'*/, 0x0070 /*'p'*/, 0x0000}, /* 26 */ | |
58 | { 0x007a /*'z'*/, 0x0065 /*'e'*/, 0x0062 /*'b'*/, 0x0072 /*'r'*/, 0x0061 /*'a'*/, 0x0000} /* 29 */ | |
59 | }; | |
60 | ||
46f4442e | 61 | static const int32_t results[G7CollationTest::TESTLOCALES][G7CollationTest::TOTALTESTSET] = { |
b75a7d8f A |
62 | { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_US */ |
63 | { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_GB */ | |
64 | { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_CA */ | |
729e4ab9 | 65 | { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* fr_FR */ |
b75a7d8f A |
66 | { 12, 13, 9, 0, 14, 1, 11, 3, 2, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* fr_CA */ |
67 | { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* de_DE */ | |
68 | { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* it_IT */ | |
69 | { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* ja_JP */ | |
70 | /* new table collation with rules "& Z < p, P" loop to FIXEDTESTSET */ | |
71 | { 12, 13, 9, 0, 6, 8, 10, 7, 14, 1, 11, 2, 3, 4, 5, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, | |
72 | /* new table collation with rules "& C < ch , cH, Ch, CH " loop to TOTALTESTSET */ | |
73 | { 19, 22, 21, 23, 24, 25, 12, 13, 9, 0, 17, 26, 28, 27, 15, 16, 18, 14, 1, 11, 2, 3, 4, 5, 20, 6, 8, 10, 7, 29 }, | |
74 | /* new table collation with rules "& Question-mark ; ? & Hash-mark ; # & Ampersand ; '&' " loop to TOTALTESTSET */ | |
75 | { 23, 24, 25, 22, 12, 13, 9, 0, 17, 16, 26, 28, 27, 15, 18, 21, 14, 1, 11, 2, 3, 4, 5, 19, 20, 6, 8, 10, 7, 29 }, | |
76 | /* analogous to Japanese rules " & aa ; a- & ee ; e- & ii ; i- & oo ; o- & uu ; u- " */ /* loop to TOTALTESTSET */ | |
77 | { 19, 22, 21, 24, 23, 25, 12, 13, 9, 0, 17, 16, 28, 26, 27, 15, 18, 14, 1, 11, 2, 3, 4, 5, 20, 6, 8, 10, 7, 29 } | |
78 | }; | |
79 | ||
374ca955 | 80 | G7CollationTest::~G7CollationTest() {} |
b75a7d8f A |
81 | |
82 | void G7CollationTest::TestG7Locales(/* char* par */) | |
83 | { | |
84 | int32_t i; | |
85 | const Locale locales[8] = { | |
86 | Locale("en", "US", ""), | |
87 | Locale("en", "GB", ""), | |
88 | Locale("en", "CA", ""), | |
89 | Locale("fr", "FR", ""), | |
90 | Locale("fr", "CA", ""), | |
91 | Locale("de", "DE", ""), | |
92 | Locale("it", "IT", ""), | |
93 | Locale("ja", "JP", "") | |
94 | }; | |
95 | ||
b331163b | 96 | for (i = 0; i < UPRV_LENGTHOF(locales); i++) |
b75a7d8f | 97 | { |
b75a7d8f A |
98 | UnicodeString dispName; |
99 | UErrorCode status = U_ZERO_ERROR; | |
b75a7d8f | 100 | |
b331163b A |
101 | const Locale &locale = locales[i]; |
102 | LocalPointer<Collator> myCollation(Collator::createInstance(locale, status)); | |
b75a7d8f | 103 | if(U_FAILURE(status)) { |
729e4ab9 | 104 | errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status)); |
b75a7d8f A |
105 | return; |
106 | } | |
107 | myCollation->setStrength(Collator::QUATERNARY); | |
108 | myCollation->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status); | |
b331163b A |
109 | if (U_FAILURE(status)) { |
110 | errln("Locale %s creation failed - %s", locale.getName(), u_errorName(status)); | |
b75a7d8f A |
111 | continue; |
112 | } | |
113 | ||
b331163b A |
114 | const UnicodeString &rules = ((RuleBasedCollator*)myCollation.getAlias())->getRules(); |
115 | if (rules.isEmpty() && | |
116 | (locale == Locale::getCanadaFrench() || locale == Locale::getJapanese())) { | |
117 | dataerrln("%s Collator missing rule string", locale.getName()); | |
118 | if (logKnownIssue("10671", "TestG7Locales does not test ignore-punctuation")) { | |
119 | continue; | |
120 | } | |
121 | } else { | |
122 | status = U_ZERO_ERROR; | |
123 | RuleBasedCollator *tblColl1 = new RuleBasedCollator(rules, status); | |
124 | if (U_FAILURE(status)) { | |
125 | errln("Recreate %s collation failed - %s", locale.getName(), u_errorName(status)); | |
126 | continue; | |
127 | } | |
128 | myCollation.adoptInstead(tblColl1); | |
b75a7d8f A |
129 | } |
130 | ||
131 | UnicodeString msg; | |
132 | ||
133 | msg += "Locale "; | |
134 | msg += locales[i].getDisplayName(dispName); | |
135 | msg += "tests start :"; | |
136 | logln(msg); | |
137 | ||
138 | int32_t j, n; | |
139 | for (j = 0; j < FIXEDTESTSET; j++) | |
140 | { | |
141 | for (n = j+1; n < FIXEDTESTSET; n++) | |
142 | { | |
b331163b | 143 | doTest(myCollation.getAlias(), testCases[results[i][j]], testCases[results[i][n]], Collator::LESS); |
b75a7d8f A |
144 | } |
145 | } | |
b75a7d8f A |
146 | } |
147 | } | |
148 | ||
149 | void G7CollationTest::TestDemo1(/* char* par */) | |
150 | { | |
151 | logln("Demo Test 1 : Create a new table collation with rules \"& Z < p, P\""); | |
152 | UErrorCode status = U_ZERO_ERROR; | |
153 | Collator *col = Collator::createInstance("en_US", status); | |
154 | if(U_FAILURE(status)) { | |
155 | delete col; | |
729e4ab9 | 156 | errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status)); |
b75a7d8f A |
157 | return; |
158 | } | |
159 | const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules(); | |
160 | UnicodeString newRules(" & Z < p, P"); | |
161 | newRules.insert(0, baseRules); | |
162 | RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status); | |
163 | ||
164 | if (U_FAILURE(status)) | |
165 | { | |
166 | errln( "Demo Test 1 Table Collation object creation failed."); | |
167 | return; | |
168 | } | |
169 | ||
170 | int32_t j, n; | |
171 | for (j = 0; j < FIXEDTESTSET; j++) | |
172 | { | |
173 | for (n = j+1; n < FIXEDTESTSET; n++) | |
174 | { | |
175 | doTest(myCollation, testCases[results[8][j]], testCases[results[8][n]], Collator::LESS); | |
176 | } | |
177 | } | |
178 | ||
179 | delete myCollation; | |
180 | delete col; | |
181 | } | |
182 | ||
183 | void G7CollationTest::TestDemo2(/* char* par */) | |
184 | { | |
185 | logln("Demo Test 2 : Create a new table collation with rules \"& C < ch , cH, Ch, CH\""); | |
186 | UErrorCode status = U_ZERO_ERROR; | |
187 | Collator *col = Collator::createInstance("en_US", status); | |
188 | if(U_FAILURE(status)) { | |
189 | delete col; | |
729e4ab9 | 190 | errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status)); |
b75a7d8f A |
191 | return; |
192 | } | |
193 | const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules(); | |
194 | UnicodeString newRules("& C < ch , cH, Ch, CH"); | |
195 | newRules.insert(0, baseRules); | |
196 | RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status); | |
197 | ||
198 | if (U_FAILURE(status)) | |
199 | { | |
200 | errln("Demo Test 2 Table Collation object creation failed."); | |
201 | return; | |
202 | } | |
203 | ||
204 | int32_t j, n; | |
205 | for (j = 0; j < TOTALTESTSET; j++) | |
206 | { | |
207 | for (n = j+1; n < TOTALTESTSET; n++) | |
208 | { | |
209 | doTest(myCollation, testCases[results[9][j]], testCases[results[9][n]], Collator::LESS); | |
210 | } | |
211 | } | |
212 | ||
213 | delete myCollation; | |
214 | delete col; | |
215 | } | |
216 | ||
217 | void G7CollationTest::TestDemo3(/* char* par */) | |
218 | { | |
219 | logln("Demo Test 3 : Create a new table collation with rules \"& Question'-'mark ; '?' & Hash'-'mark ; '#' & Ampersand ; '&'\""); | |
220 | UErrorCode status = U_ZERO_ERROR; | |
221 | Collator *col = Collator::createInstance("en_US", status); | |
222 | if(U_FAILURE(status)) { | |
729e4ab9 | 223 | errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status)); |
b75a7d8f A |
224 | delete col; |
225 | return; | |
226 | } | |
227 | const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules(); | |
228 | UnicodeString newRules = "& Question'-'mark ; '?' & Hash'-'mark ; '#' & Ampersand ; '&'"; | |
229 | newRules.insert(0, baseRules); | |
230 | RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status); | |
231 | ||
232 | if (U_FAILURE(status)) | |
233 | { | |
234 | errln("Demo Test 3 Table Collation object creation failed."); | |
235 | return; | |
236 | } | |
237 | ||
238 | int32_t j, n; | |
239 | for (j = 0; j < TOTALTESTSET; j++) | |
240 | { | |
241 | for (n = j+1; n < TOTALTESTSET; n++) | |
242 | { | |
243 | doTest(myCollation, testCases[results[10][j]], testCases[results[10][n]], Collator::LESS); | |
244 | } | |
245 | } | |
246 | ||
247 | delete myCollation; | |
248 | delete col; | |
249 | } | |
250 | ||
251 | void G7CollationTest::TestDemo4(/* char* par */) | |
252 | { | |
253 | logln("Demo Test 4 : Create a new table collation with rules \" & aa ; a'-' & ee ; e'-' & ii ; i'-' & oo ; o'-' & uu ; u'-' \""); | |
254 | UErrorCode status = U_ZERO_ERROR; | |
255 | Collator *col = Collator::createInstance("en_US", status); | |
256 | if(U_FAILURE(status)) { | |
257 | delete col; | |
729e4ab9 | 258 | errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status)); |
b75a7d8f A |
259 | return; |
260 | } | |
261 | ||
262 | const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules(); | |
263 | UnicodeString newRules = " & aa ; a'-' & ee ; e'-' & ii ; i'-' & oo ; o'-' & uu ; u'-' "; | |
264 | newRules.insert(0, baseRules); | |
265 | RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status); | |
266 | ||
267 | int32_t j, n; | |
268 | for (j = 0; j < TOTALTESTSET; j++) | |
269 | { | |
270 | for (n = j+1; n < TOTALTESTSET; n++) | |
271 | { | |
272 | doTest(myCollation, testCases[results[11][j]], testCases[results[11][n]], Collator::LESS); | |
273 | } | |
274 | } | |
275 | ||
276 | delete myCollation; | |
277 | delete col; | |
278 | } | |
279 | ||
280 | void G7CollationTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) | |
281 | { | |
282 | if (exec) logln("TestSuite G7CollationTest: "); | |
283 | switch (index) { | |
284 | case 0: name = "TestG7Locales"; if (exec) TestG7Locales(/* par */); break; | |
285 | case 1: name = "TestDemo1"; if (exec) TestDemo1(/* par */); break; | |
286 | case 2: name = "TestDemo2"; if (exec) TestDemo2(/* par */); break; | |
287 | case 3: name = "TestDemo3"; if (exec) TestDemo3(/* par */); break; | |
288 | case 4: name = "TestDemo4"; if (exec) TestDemo4(/* par */); break; | |
289 | default: name = ""; break; | |
290 | } | |
291 | } | |
292 | ||
293 | #endif /* #if !UCONFIG_NO_COLLATION */ |