]>
Commit | Line | Data |
---|---|---|
a9aaacca A |
1 | #include <TargetConditionals.h> |
2 | #include <errno.h> | |
3 | #include <limits.h> | |
4 | #include <locale.h> | |
5 | #include <stdlib.h> | |
6 | #include <xlocale.h> | |
7 | ||
8 | #include <darwintest.h> | |
9 | ||
10 | void __collate_lookup_l(const __darwin_wchar_t *, int *, int *, int *, | |
11 | locale_t); | |
12 | void __collate_lookup(const unsigned char *, int *, int *, int *); | |
13 | ||
14 | #define CHARS_WITHOUT_ENTRIES "\xdf" | |
15 | ||
16 | /* | |
17 | * in C or POSIX locales | |
18 | * __collate_lookup("", ... ) -> len: 0 prim: 0 sec: 0 | |
19 | * __collate_lookup("a", ... ) -> len: 1 prim: (int)'a' sec: 0 | |
20 | * __collate_lookup("ab", ... ) -> len: 1 prim: (int)'a' sec: 0 | |
21 | * | |
22 | * in a Latin-1 locale (de_DE.ISO8859-1) | |
23 | * __collate_lookup("", ... ) -> len: 0 prim: 0 sec: 0 | |
24 | * __collate_lookup("a", ... ) -> len: 1 prim: > 0 sec: > 0 | |
25 | * __collate_lookup("ab", ... ) -> len: 1 prim: > 0 sec: > 0 | |
26 | * # a character not in the table - lookup failure | |
27 | * __collate_lookup("\xdf", ... ) -> len: 0 prim: -1 sec: -1 | |
28 | * | |
29 | * in a UTF-8 locale (de_DE.UTF-8) | |
30 | * __collate_lookup("", ... ) -> len: 0 prim: 0 sec: 0 | |
31 | * __collate_lookup("a", ... ) -> len: 1 prim: > 0 sec: > 0 | |
32 | * __collate_lookup("ab", ... ) -> len: 1 prim: > 0 sec: > 0 | |
33 | * # An invalid multi-byte sequence | |
34 | * __collate_lookup("\xe4", ... ) -> len: 1 prim: (int)'\xe4' sec: 0 | |
35 | * # valid multi-byte sequence | |
36 | * __collate_lookup("\xc3\xa4", ... ) -> len: 2 prim: > 0 sec: > 0 | |
37 | */ | |
38 | T_DECL(collate_lookup, "Test __collate_lookup() behavior") { | |
39 | unsigned char c; | |
40 | unsigned char str[16]; | |
41 | int len, prim, sec, prim2, sec2; | |
42 | char *result; | |
43 | ||
44 | /* ------------------------- C Locale ------------------------- */ | |
45 | /* In the C locale primary weights should equal the int value of the | |
46 | * character*/ | |
47 | result = setlocale(LC_ALL, "C"); | |
48 | T_ASSERT_NOTNULL(result, "changed to C locale"); | |
49 | ||
50 | __collate_lookup("", &len, &prim, &sec); | |
51 | T_ASSERT_EQ_INT(len, 0, "No characters read"); | |
52 | T_EXPECT_EQ_INT(prim, 0, "No primary weight"); | |
53 | T_EXPECT_EQ_INT(sec, 0, "No secondary weight"); | |
54 | ||
55 | str[1] = 'X'; | |
56 | str[2] = '\0'; | |
57 | for (c = 1; c < UCHAR_MAX; c++) { | |
58 | len = 1; | |
59 | str[0] = c; | |
60 | __collate_lookup(str, &len, &prim, &sec); | |
61 | T_ASSERT_EQ_INT(len, 1, "Only read one character"); | |
62 | T_EXPECT_EQ_INT(prim, (int)c, "Primary weight returned is the value of c"); | |
63 | T_EXPECT_EQ_INT(sec, 0, "Secondary weight returned is 0"); | |
64 | } | |
65 | ||
66 | #if TARGET_OS_OSX | |
67 | /* ------------------------- German Latin-1 Locale ----------------------- */ | |
68 | result = setlocale(LC_ALL, "de_DE.ISO8859-1"); | |
69 | T_ASSERT_NOTNULL(result, "changed to german Latin-1 locale"); | |
70 | ||
71 | __collate_lookup("", &len, &prim, &sec); | |
72 | T_ASSERT_EQ_INT(len, 0, "No characters read"); | |
73 | T_EXPECT_EQ_INT(prim, 0, "No primary weight"); | |
74 | T_EXPECT_EQ_INT(sec, 0, "No secondary weight"); | |
75 | ||
76 | str[1] = 'X'; | |
77 | str[2] = '\0'; | |
78 | for (c = 1; c < UCHAR_MAX; c++) { | |
79 | len = 1; | |
80 | str[0] = c; | |
81 | __collate_lookup(str, &len, &prim, &sec); | |
82 | T_ASSERT_EQ_INT(len, (c == '\0' ? 0 : 1), "Only read one character"); | |
83 | str[1] = '\0'; | |
84 | if (strstr(CHARS_WITHOUT_ENTRIES, str)) { | |
85 | T_EXPECT_EQ(prim, -1, "0x%x is not present in the table", c); | |
86 | T_EXPECT_EQ(sec, -1, "0x%x is not present in the table", c); | |
87 | } else { | |
88 | T_EXPECT_GT(prim, 0, "0x%x Has primary weight", c); | |
89 | T_EXPECT_GT(sec, 0, "0x%x Has secondary weight", c); | |
90 | } | |
91 | } | |
92 | ||
93 | str[0] = 'a'; | |
94 | __collate_lookup(str, &len, &prim, &sec); | |
95 | T_ASSERT_EQ_INT(len, 1, "Only read one character"); | |
96 | ||
97 | /* a with dieresis in Latin-1 locales */ | |
98 | str[0] = (unsigned char)'\xe4'; | |
99 | __collate_lookup(str, &len, &prim2, &sec2); | |
100 | T_ASSERT_EQ_INT(len, 1, "Only read one character"); | |
101 | T_EXPECT_EQ(prim, prim2, "Same primary weight"); | |
102 | T_EXPECT_LT(sec, sec2, "Different secondary weight"); | |
103 | ||
104 | /* ------------------------- German UTF-8 Locale ------------------------- */ | |
105 | result = setlocale(LC_ALL, "de_DE.UTF-8"); | |
106 | T_ASSERT_NOTNULL(result, "changed to german UTF-8 locale"); | |
107 | ||
108 | __collate_lookup("", &len, &prim, &sec); | |
109 | T_ASSERT_EQ_INT(len, 0, "No characters read"); | |
110 | T_EXPECT_EQ_INT(prim, 0, "No primary weight"); | |
111 | T_EXPECT_EQ_INT(sec, 0, "No secondary weight"); | |
112 | ||
113 | str[1] = 'X'; | |
114 | str[2] = '\0'; | |
115 | for (c = 1; c < UCHAR_MAX; c++) { | |
116 | len = 2; /* Tell it that this string is longer */ | |
117 | str[0] = c; | |
118 | __collate_lookup(str, &len, &prim, &sec); | |
119 | T_ASSERT_EQ_INT(len, 1, "Only read one character"); | |
120 | if (strstr(CHARS_WITHOUT_ENTRIES, (const char *)str)) { | |
121 | T_EXPECT_EQ(prim, -1, "0x%x is not present in the table", c); | |
122 | T_EXPECT_GT(sec, -1, "0x%x is not present in the table", c); | |
123 | } else { | |
124 | T_EXPECT_GT(prim, 0, "0x%x Has primary weight", c); | |
125 | /* weight will be 0 for sequences that result in mb failure */ | |
126 | if (c < 128) { | |
127 | /* So only test secondary weights for the ASCII characters */ | |
128 | T_EXPECT_GT(sec, 0, "0x%x Has secondary weight", c); | |
129 | } | |
130 | } | |
131 | } | |
132 | ||
133 | str[0] = 'a'; | |
134 | __collate_lookup(str, &len, &prim, &sec); | |
135 | T_ASSERT_EQ_INT(len, 1, "Only read one character"); | |
136 | ||
137 | /* a with dieresis in Latin-1 locales */ | |
138 | /* this character is invalid in a UTF-8 locale */ | |
139 | str[0] = (unsigned char)'\xe4'; | |
140 | errno = 0; | |
141 | __collate_lookup(str, &len, &prim2, &sec2); | |
142 | T_EXPECT_EQ_INT(errno, EILSEQ, "errno indicates invalid character"); | |
143 | T_ASSERT_EQ_INT(len, 1, "Only read one character"); | |
144 | T_EXPECT_EQ(prim2, (unsigned int)L'\xe4', | |
145 | "Invalid character - Primary weight equal to value (228)"); | |
146 | T_EXPECT_EQ(sec2, 0, "Invalid character - No secondary weight"); | |
147 | ||
148 | T_EXPECT_NE(prim, prim2, "Different primary weight"); | |
149 | T_EXPECT_NE(sec, sec2, "Different secondary weight"); | |
150 | ||
151 | /* Test Multibyte lookup */ | |
152 | str[0] = (unsigned char)'\xc3'; | |
153 | str[1] = (unsigned char)'\xa4'; | |
154 | str[2] = (unsigned char)'X'; | |
155 | str[3] = (unsigned char)'\0'; | |
156 | len = 3; | |
157 | __collate_lookup(str, &len, &prim2, &sec2); | |
158 | T_EXPECTFAIL_WITH_REASON( | |
159 | "__collate_lookup doesn't actually tell you how many bytes were used"); | |
160 | T_ASSERT_EQ_INT(len, 2, "Only read 2 characters"); | |
161 | T_EXPECT_EQ(prim, prim2, "Same primary weight"); | |
162 | T_EXPECT_LT(sec, sec2, "Different secondary weight"); | |
163 | #endif | |
164 | } | |
165 | ||
166 | /* | |
167 | * Tests for the __collate_lookup_l() which is used to lookup weights of wide | |
168 | * characters | |
169 | */ | |
170 | T_DECL(collate_lookup_l, "Test __collate_lookup_l() behavior") { | |
171 | wchar_t wc; | |
172 | wchar_t wcs[16]; | |
173 | char str[16] = {0}; | |
174 | int len, prim, sec, prim2, sec2; | |
175 | char *result; | |
176 | ||
177 | /* ------------------------- C Locale ------------------------- */ | |
178 | /* In the C locale primary weights should equal the int value of the | |
179 | * character*/ | |
180 | result = setlocale(LC_ALL, "C"); | |
181 | T_ASSERT_NOTNULL(result, "changed to C locale"); | |
182 | ||
183 | __collate_lookup_l(L"", &len, &prim, &sec, LC_GLOBAL_LOCALE); | |
184 | T_ASSERT_EQ_INT(len, 0, "No characters read"); | |
185 | T_EXPECT_EQ_INT(prim, 0, "No primary weight"); | |
186 | T_EXPECT_EQ_INT(sec, 0, "No secondary weight"); | |
187 | ||
188 | wcs[1] = L'X'; | |
189 | wcs[2] = L'\0'; | |
190 | for (wc = 1; wc < UCHAR_MAX; wc++) { | |
191 | len = 1; | |
192 | wcs[0] = wc; | |
193 | errno = 0; | |
194 | __collate_lookup_l(wcs, &len, &prim, &sec, LC_GLOBAL_LOCALE); | |
195 | T_ASSERT_EQ_INT(errno, 0, "No error occurred"); | |
196 | T_ASSERT_EQ_INT(len, 1, "Only read one character"); | |
197 | T_EXPECT_EQ_INT(prim, (int)wc, | |
198 | "Primary weight returned is the value of wc"); | |
199 | T_EXPECT_EQ_INT(sec, 0, "Secondary weight returned is 0"); | |
200 | } | |
201 | ||
202 | #if TARGET_OS_OSX | |
203 | /* ------------------------- German Latin-1 Locale ------------------------- | |
204 | */ | |
205 | result = setlocale(LC_ALL, "de_DE.ISO8859-1"); | |
206 | T_ASSERT_NOTNULL(result, "changed to german Latin-1 locale"); | |
207 | ||
208 | wcs[1] = L'X'; | |
209 | wcs[2] = L'\0'; | |
210 | for (wc = 1; wc < UCHAR_MAX; wc++) { | |
211 | len = 1; | |
212 | wcs[0] = wc; | |
213 | str[0] = wc & 0xFF; | |
214 | __collate_lookup_l(wcs, &len, &prim, &sec, LC_GLOBAL_LOCALE); | |
215 | T_ASSERT_EQ_INT(len, 1, "Only read one character"); | |
216 | if (strstr(CHARS_WITHOUT_ENTRIES, str)) { | |
217 | T_EXPECT_EQ(prim, -1, "0x%x is not present in the table", wc); | |
218 | T_EXPECT_EQ(sec, -1, "0x%x is not present in the table", wc); | |
219 | } else { | |
220 | T_EXPECT_GT(prim, 0, "Wide char 0x%x Has primary weight", wc); | |
221 | T_EXPECT_GT(sec, 0, "Wide char 0x%x Has secondary weight", wc); | |
222 | } | |
223 | } | |
224 | ||
225 | wcs[0] = L'a'; | |
226 | __collate_lookup_l(wcs, &len, &prim, &sec, LC_GLOBAL_LOCALE); | |
227 | T_ASSERT_EQ_INT(len, 1, "Only read one character"); | |
228 | ||
229 | /* a with dieresis in Latin-1 locales */ | |
230 | wcs[0] = L'\xe4'; | |
231 | __collate_lookup_l(wcs, &len, &prim2, &sec2, LC_GLOBAL_LOCALE); | |
232 | T_ASSERT_EQ_INT(len, 1, "Only read one character"); | |
233 | T_EXPECT_EQ(prim, prim2, "Same primary weight"); | |
234 | T_EXPECT_LT(sec, sec2, "Different secondary weight"); | |
235 | ||
236 | /* ------------------------- German UTF-8 Locale ------------------------- */ | |
237 | result = setlocale(LC_ALL, "de_DE.UTF-8"); | |
238 | T_ASSERT_NOTNULL(result, "changed to german UTF-8 locale"); | |
239 | ||
240 | __collate_lookup_l(L"", &len, &prim, &sec, LC_GLOBAL_LOCALE); | |
241 | T_ASSERT_EQ_INT(len, 0, "No characters read"); | |
242 | T_EXPECT_EQ_INT(prim, 0, "No primary weight"); | |
243 | T_EXPECT_EQ_INT(sec, 0, "No secondary weight"); | |
244 | ||
245 | for (wc = 1; wc < UCHAR_MAX; wc++) { | |
246 | len = 1; | |
247 | wcs[0] = wc; | |
248 | str[0] = wc & 0xFF; | |
249 | __collate_lookup_l(wcs, &len, &prim, &sec, LC_GLOBAL_LOCALE); | |
250 | T_ASSERT_EQ_INT(len, 1, "Only read one character"); | |
251 | if (strstr(CHARS_WITHOUT_ENTRIES, str)) { | |
252 | T_EXPECT_EQ(prim, -1, "0x%x is not present in the table", wc); | |
253 | T_EXPECT_EQ(sec, -1, "0x%x is not present in the table", wc); | |
254 | } else { | |
255 | T_EXPECT_GT(prim, 0, "Wide char 0x%x Has primary weight", wc); | |
256 | T_EXPECT_GT(sec, 0, "Wide char 0x%x Has secondary weight", wc); | |
257 | } | |
258 | } | |
259 | ||
260 | /* Test that a lookup of 'a' and '\xe4' returns the same primary weight */ | |
261 | wcs[0] = L'a'; | |
262 | wcs[1] = L'\0'; | |
263 | __collate_lookup_l(wcs, &len, &prim, &sec, LC_GLOBAL_LOCALE); | |
264 | T_ASSERT_EQ_INT(len, 1, "Only read one character"); | |
265 | T_EXPECT_GT(prim, 0, "Wide char 0x%x Has primary weight", wc); | |
266 | T_EXPECT_GT(sec, 0, "Wide char 0x%x Has secondary weight", wc); | |
267 | ||
268 | wcs[0] = L'\xe4'; | |
269 | wcs[1] = L'\0'; | |
270 | errno = 0; | |
271 | __collate_lookup_l(wcs, &len, &prim2, &sec2, LC_GLOBAL_LOCALE); | |
272 | T_EXPECT_EQ_INT(errno, 0, "errno was not set"); | |
273 | T_ASSERT_EQ_INT(len, 1, "Only read one character"); | |
274 | T_EXPECT_GT(prim2, 0, "Wide char 0x%x Has primary weight", wc); | |
275 | T_EXPECT_GT(sec2, 0, "Wide char 0x%x Has secondary weight", wc); | |
276 | ||
277 | T_EXPECT_EQ(prim, prim2, "Primary weight equal"); | |
278 | T_EXPECT_NE(sec, sec2, "Different secondary weight"); | |
279 | #endif | |
280 | } |