]> git.saurik.com Git - apple/libc.git/blame - tests/collate.c
Libc-1439.100.3.tar.gz
[apple/libc.git] / tests / collate.c
CommitLineData
a9aaacca
A
1#include <TargetConditionals.h>
2#include <errno.h>
3#include <limits.h>
4#include <locale.h>
5#include <stdlib.h>
6#include <xlocale.h>
7
8#include <darwintest.h>
9
10void __collate_lookup_l(const __darwin_wchar_t *, int *, int *, int *,
11 locale_t);
12void __collate_lookup(const unsigned char *, int *, int *, int *);
13
14#define CHARS_WITHOUT_ENTRIES "\xdf"
15
16/*
17 * in C or POSIX locales
18 * __collate_lookup("", ... ) -> len: 0 prim: 0 sec: 0
19 * __collate_lookup("a", ... ) -> len: 1 prim: (int)'a' sec: 0
20 * __collate_lookup("ab", ... ) -> len: 1 prim: (int)'a' sec: 0
21 *
22 * in a Latin-1 locale (de_DE.ISO8859-1)
23 * __collate_lookup("", ... ) -> len: 0 prim: 0 sec: 0
24 * __collate_lookup("a", ... ) -> len: 1 prim: > 0 sec: > 0
25 * __collate_lookup("ab", ... ) -> len: 1 prim: > 0 sec: > 0
26 * # a character not in the table - lookup failure
27 * __collate_lookup("\xdf", ... ) -> len: 0 prim: -1 sec: -1
28 *
29 * in a UTF-8 locale (de_DE.UTF-8)
30 * __collate_lookup("", ... ) -> len: 0 prim: 0 sec: 0
31 * __collate_lookup("a", ... ) -> len: 1 prim: > 0 sec: > 0
32 * __collate_lookup("ab", ... ) -> len: 1 prim: > 0 sec: > 0
33 * # An invalid multi-byte sequence
34 * __collate_lookup("\xe4", ... ) -> len: 1 prim: (int)'\xe4' sec: 0
35 * # valid multi-byte sequence
36 * __collate_lookup("\xc3\xa4", ... ) -> len: 2 prim: > 0 sec: > 0
37 */
38T_DECL(collate_lookup, "Test __collate_lookup() behavior") {
39 unsigned char c;
40 unsigned char str[16];
41 int len, prim, sec, prim2, sec2;
42 char *result;
43
44 /* ------------------------- C Locale ------------------------- */
45 /* In the C locale primary weights should equal the int value of the
46 * character*/
47 result = setlocale(LC_ALL, "C");
48 T_ASSERT_NOTNULL(result, "changed to C locale");
49
50 __collate_lookup("", &len, &prim, &sec);
51 T_ASSERT_EQ_INT(len, 0, "No characters read");
52 T_EXPECT_EQ_INT(prim, 0, "No primary weight");
53 T_EXPECT_EQ_INT(sec, 0, "No secondary weight");
54
55 str[1] = 'X';
56 str[2] = '\0';
57 for (c = 1; c < UCHAR_MAX; c++) {
58 len = 1;
59 str[0] = c;
60 __collate_lookup(str, &len, &prim, &sec);
61 T_ASSERT_EQ_INT(len, 1, "Only read one character");
62 T_EXPECT_EQ_INT(prim, (int)c, "Primary weight returned is the value of c");
63 T_EXPECT_EQ_INT(sec, 0, "Secondary weight returned is 0");
64 }
65
66#if TARGET_OS_OSX
67 /* ------------------------- German Latin-1 Locale ----------------------- */
68 result = setlocale(LC_ALL, "de_DE.ISO8859-1");
69 T_ASSERT_NOTNULL(result, "changed to german Latin-1 locale");
70
71 __collate_lookup("", &len, &prim, &sec);
72 T_ASSERT_EQ_INT(len, 0, "No characters read");
73 T_EXPECT_EQ_INT(prim, 0, "No primary weight");
74 T_EXPECT_EQ_INT(sec, 0, "No secondary weight");
75
76 str[1] = 'X';
77 str[2] = '\0';
78 for (c = 1; c < UCHAR_MAX; c++) {
79 len = 1;
80 str[0] = c;
81 __collate_lookup(str, &len, &prim, &sec);
82 T_ASSERT_EQ_INT(len, (c == '\0' ? 0 : 1), "Only read one character");
83 str[1] = '\0';
84 if (strstr(CHARS_WITHOUT_ENTRIES, str)) {
85 T_EXPECT_EQ(prim, -1, "0x%x is not present in the table", c);
86 T_EXPECT_EQ(sec, -1, "0x%x is not present in the table", c);
87 } else {
88 T_EXPECT_GT(prim, 0, "0x%x Has primary weight", c);
89 T_EXPECT_GT(sec, 0, "0x%x Has secondary weight", c);
90 }
91 }
92
93 str[0] = 'a';
94 __collate_lookup(str, &len, &prim, &sec);
95 T_ASSERT_EQ_INT(len, 1, "Only read one character");
96
97 /* a with dieresis in Latin-1 locales */
98 str[0] = (unsigned char)'\xe4';
99 __collate_lookup(str, &len, &prim2, &sec2);
100 T_ASSERT_EQ_INT(len, 1, "Only read one character");
101 T_EXPECT_EQ(prim, prim2, "Same primary weight");
102 T_EXPECT_LT(sec, sec2, "Different secondary weight");
103
104 /* ------------------------- German UTF-8 Locale ------------------------- */
105 result = setlocale(LC_ALL, "de_DE.UTF-8");
106 T_ASSERT_NOTNULL(result, "changed to german UTF-8 locale");
107
108 __collate_lookup("", &len, &prim, &sec);
109 T_ASSERT_EQ_INT(len, 0, "No characters read");
110 T_EXPECT_EQ_INT(prim, 0, "No primary weight");
111 T_EXPECT_EQ_INT(sec, 0, "No secondary weight");
112
113 str[1] = 'X';
114 str[2] = '\0';
115 for (c = 1; c < UCHAR_MAX; c++) {
116 len = 2; /* Tell it that this string is longer */
117 str[0] = c;
118 __collate_lookup(str, &len, &prim, &sec);
119 T_ASSERT_EQ_INT(len, 1, "Only read one character");
120 if (strstr(CHARS_WITHOUT_ENTRIES, (const char *)str)) {
121 T_EXPECT_EQ(prim, -1, "0x%x is not present in the table", c);
122 T_EXPECT_GT(sec, -1, "0x%x is not present in the table", c);
123 } else {
124 T_EXPECT_GT(prim, 0, "0x%x Has primary weight", c);
125 /* weight will be 0 for sequences that result in mb failure */
126 if (c < 128) {
127 /* So only test secondary weights for the ASCII characters */
128 T_EXPECT_GT(sec, 0, "0x%x Has secondary weight", c);
129 }
130 }
131 }
132
133 str[0] = 'a';
134 __collate_lookup(str, &len, &prim, &sec);
135 T_ASSERT_EQ_INT(len, 1, "Only read one character");
136
137 /* a with dieresis in Latin-1 locales */
138 /* this character is invalid in a UTF-8 locale */
139 str[0] = (unsigned char)'\xe4';
140 errno = 0;
141 __collate_lookup(str, &len, &prim2, &sec2);
142 T_EXPECT_EQ_INT(errno, EILSEQ, "errno indicates invalid character");
143 T_ASSERT_EQ_INT(len, 1, "Only read one character");
144 T_EXPECT_EQ(prim2, (unsigned int)L'\xe4',
145 "Invalid character - Primary weight equal to value (228)");
146 T_EXPECT_EQ(sec2, 0, "Invalid character - No secondary weight");
147
148 T_EXPECT_NE(prim, prim2, "Different primary weight");
149 T_EXPECT_NE(sec, sec2, "Different secondary weight");
150
151 /* Test Multibyte lookup */
152 str[0] = (unsigned char)'\xc3';
153 str[1] = (unsigned char)'\xa4';
154 str[2] = (unsigned char)'X';
155 str[3] = (unsigned char)'\0';
156 len = 3;
157 __collate_lookup(str, &len, &prim2, &sec2);
158 T_EXPECTFAIL_WITH_REASON(
159 "__collate_lookup doesn't actually tell you how many bytes were used");
160 T_ASSERT_EQ_INT(len, 2, "Only read 2 characters");
161 T_EXPECT_EQ(prim, prim2, "Same primary weight");
162 T_EXPECT_LT(sec, sec2, "Different secondary weight");
163#endif
164}
165
166/*
167 * Tests for the __collate_lookup_l() which is used to lookup weights of wide
168 * characters
169 */
170T_DECL(collate_lookup_l, "Test __collate_lookup_l() behavior") {
171 wchar_t wc;
172 wchar_t wcs[16];
173 char str[16] = {0};
174 int len, prim, sec, prim2, sec2;
175 char *result;
176
177 /* ------------------------- C Locale ------------------------- */
178 /* In the C locale primary weights should equal the int value of the
179 * character*/
180 result = setlocale(LC_ALL, "C");
181 T_ASSERT_NOTNULL(result, "changed to C locale");
182
183 __collate_lookup_l(L"", &len, &prim, &sec, LC_GLOBAL_LOCALE);
184 T_ASSERT_EQ_INT(len, 0, "No characters read");
185 T_EXPECT_EQ_INT(prim, 0, "No primary weight");
186 T_EXPECT_EQ_INT(sec, 0, "No secondary weight");
187
188 wcs[1] = L'X';
189 wcs[2] = L'\0';
190 for (wc = 1; wc < UCHAR_MAX; wc++) {
191 len = 1;
192 wcs[0] = wc;
193 errno = 0;
194 __collate_lookup_l(wcs, &len, &prim, &sec, LC_GLOBAL_LOCALE);
195 T_ASSERT_EQ_INT(errno, 0, "No error occurred");
196 T_ASSERT_EQ_INT(len, 1, "Only read one character");
197 T_EXPECT_EQ_INT(prim, (int)wc,
198 "Primary weight returned is the value of wc");
199 T_EXPECT_EQ_INT(sec, 0, "Secondary weight returned is 0");
200 }
201
202#if TARGET_OS_OSX
203 /* ------------------------- German Latin-1 Locale -------------------------
204 */
205 result = setlocale(LC_ALL, "de_DE.ISO8859-1");
206 T_ASSERT_NOTNULL(result, "changed to german Latin-1 locale");
207
208 wcs[1] = L'X';
209 wcs[2] = L'\0';
210 for (wc = 1; wc < UCHAR_MAX; wc++) {
211 len = 1;
212 wcs[0] = wc;
213 str[0] = wc & 0xFF;
214 __collate_lookup_l(wcs, &len, &prim, &sec, LC_GLOBAL_LOCALE);
215 T_ASSERT_EQ_INT(len, 1, "Only read one character");
216 if (strstr(CHARS_WITHOUT_ENTRIES, str)) {
217 T_EXPECT_EQ(prim, -1, "0x%x is not present in the table", wc);
218 T_EXPECT_EQ(sec, -1, "0x%x is not present in the table", wc);
219 } else {
220 T_EXPECT_GT(prim, 0, "Wide char 0x%x Has primary weight", wc);
221 T_EXPECT_GT(sec, 0, "Wide char 0x%x Has secondary weight", wc);
222 }
223 }
224
225 wcs[0] = L'a';
226 __collate_lookup_l(wcs, &len, &prim, &sec, LC_GLOBAL_LOCALE);
227 T_ASSERT_EQ_INT(len, 1, "Only read one character");
228
229 /* a with dieresis in Latin-1 locales */
230 wcs[0] = L'\xe4';
231 __collate_lookup_l(wcs, &len, &prim2, &sec2, LC_GLOBAL_LOCALE);
232 T_ASSERT_EQ_INT(len, 1, "Only read one character");
233 T_EXPECT_EQ(prim, prim2, "Same primary weight");
234 T_EXPECT_LT(sec, sec2, "Different secondary weight");
235
236 /* ------------------------- German UTF-8 Locale ------------------------- */
237 result = setlocale(LC_ALL, "de_DE.UTF-8");
238 T_ASSERT_NOTNULL(result, "changed to german UTF-8 locale");
239
240 __collate_lookup_l(L"", &len, &prim, &sec, LC_GLOBAL_LOCALE);
241 T_ASSERT_EQ_INT(len, 0, "No characters read");
242 T_EXPECT_EQ_INT(prim, 0, "No primary weight");
243 T_EXPECT_EQ_INT(sec, 0, "No secondary weight");
244
245 for (wc = 1; wc < UCHAR_MAX; wc++) {
246 len = 1;
247 wcs[0] = wc;
248 str[0] = wc & 0xFF;
249 __collate_lookup_l(wcs, &len, &prim, &sec, LC_GLOBAL_LOCALE);
250 T_ASSERT_EQ_INT(len, 1, "Only read one character");
251 if (strstr(CHARS_WITHOUT_ENTRIES, str)) {
252 T_EXPECT_EQ(prim, -1, "0x%x is not present in the table", wc);
253 T_EXPECT_EQ(sec, -1, "0x%x is not present in the table", wc);
254 } else {
255 T_EXPECT_GT(prim, 0, "Wide char 0x%x Has primary weight", wc);
256 T_EXPECT_GT(sec, 0, "Wide char 0x%x Has secondary weight", wc);
257 }
258 }
259
260 /* Test that a lookup of 'a' and '\xe4' returns the same primary weight */
261 wcs[0] = L'a';
262 wcs[1] = L'\0';
263 __collate_lookup_l(wcs, &len, &prim, &sec, LC_GLOBAL_LOCALE);
264 T_ASSERT_EQ_INT(len, 1, "Only read one character");
265 T_EXPECT_GT(prim, 0, "Wide char 0x%x Has primary weight", wc);
266 T_EXPECT_GT(sec, 0, "Wide char 0x%x Has secondary weight", wc);
267
268 wcs[0] = L'\xe4';
269 wcs[1] = L'\0';
270 errno = 0;
271 __collate_lookup_l(wcs, &len, &prim2, &sec2, LC_GLOBAL_LOCALE);
272 T_EXPECT_EQ_INT(errno, 0, "errno was not set");
273 T_ASSERT_EQ_INT(len, 1, "Only read one character");
274 T_EXPECT_GT(prim2, 0, "Wide char 0x%x Has primary weight", wc);
275 T_EXPECT_GT(sec2, 0, "Wide char 0x%x Has secondary weight", wc);
276
277 T_EXPECT_EQ(prim, prim2, "Primary weight equal");
278 T_EXPECT_NE(sec, sec2, "Different secondary weight");
279#endif
280}