]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/custrtrn.c
ICU-57166.0.1.tar.gz
[apple/icu.git] / icuSources / test / cintltst / custrtrn.c
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 2001-2016, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
7 *
8 * File custrtrn.C
9 *
10 * Modification History:
11 * Name Description
12 * Ram String transformations test
13 *********************************************************************************
14 */
15 /****************************************************************************/
16
17
18 #include <stdlib.h>
19 #include <stdio.h>
20 #include <string.h>
21 #include "unicode/utypes.h"
22 #include "unicode/ustring.h"
23 #include "unicode/ures.h"
24 #include "ustr_imp.h"
25 #include "cintltst.h"
26 #include "cmemory.h"
27 #include "cstring.h"
28 #include "cwchar.h"
29
30 void addUCharTransformTest(TestNode** root);
31
32 static void Test_strToUTF32(void);
33 static void Test_strToUTF32_surrogates(void);
34 static void Test_strFromUTF32(void);
35 static void Test_strFromUTF32_surrogates(void);
36 static void Test_UChar_UTF8_API(void);
37 static void Test_FromUTF8(void);
38 static void Test_FromUTF8Lenient(void);
39 static void Test_UChar_WCHART_API(void);
40 static void Test_widestrs(void);
41 static void Test_WCHART_LongString(void);
42 static void Test_strToJavaModifiedUTF8(void);
43 static void Test_strFromJavaModifiedUTF8(void);
44 static void TestNullEmptySource(void);
45
46 void
47 addUCharTransformTest(TestNode** root)
48 {
49 addTest(root, &Test_strToUTF32, "custrtrn/Test_strToUTF32");
50 addTest(root, &Test_strToUTF32_surrogates, "custrtrn/Test_strToUTF32_surrogates");
51 addTest(root, &Test_strFromUTF32, "custrtrn/Test_strFromUTF32");
52 addTest(root, &Test_strFromUTF32_surrogates, "custrtrn/Test_strFromUTF32_surrogates");
53 addTest(root, &Test_UChar_UTF8_API, "custrtrn/Test_UChar_UTF8_API");
54 addTest(root, &Test_FromUTF8, "custrtrn/Test_FromUTF8");
55 addTest(root, &Test_FromUTF8Lenient, "custrtrn/Test_FromUTF8Lenient");
56 addTest(root, &Test_UChar_WCHART_API, "custrtrn/Test_UChar_WCHART_API");
57 addTest(root, &Test_widestrs, "custrtrn/Test_widestrs");
58 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
59 addTest(root, &Test_WCHART_LongString, "custrtrn/Test_WCHART_LongString");
60 #endif
61 addTest(root, &Test_strToJavaModifiedUTF8, "custrtrn/Test_strToJavaModifiedUTF8");
62 addTest(root, &Test_strFromJavaModifiedUTF8, "custrtrn/Test_strFromJavaModifiedUTF8");
63 addTest(root, &TestNullEmptySource, "custrtrn/TestNullEmptySource");
64 }
65
66 static const UChar32 src32[]={
67 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
68 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
69 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
70 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
71 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
72 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
73 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
74 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
75 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
76 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
77 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
78 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
79 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
80 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
81 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
82 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
83 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
84 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
85 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
86 /* test non-BMP code points */
87 0x0002A699,
88 0x0002A69C, 0x0002A69D, 0x0002A69E, 0x0002A69F, 0x0002A6A0, 0x0002A6A5, 0x0002A6A6, 0x0002A6A7, 0x0002A6A8, 0x0002A6AB,
89 0x0002A6AC, 0x0002A6AD, 0x0002A6AE, 0x0002A6AF, 0x0002A6B0, 0x0002A6B1, 0x0002A6B3, 0x0002A6B5, 0x0002A6B6, 0x0002A6B7,
90 0x0002A6B8, 0x0002A6B9, 0x0002A6BA, 0x0002A6BB, 0x0002A6BC, 0x0002A6BD, 0x0002A6BE, 0x0002A6BF, 0x0002A6C0, 0x0002A6C1,
91 0x0002A6C2, 0x0002A6C3, 0x0002A6C4, 0x0002A6C8, 0x0002A6CA, 0x0002A6CB, 0x0002A6CD, 0x0002A6CE, 0x0002A6CF, 0x0002A6D0,
92 0x0002A6D1, 0x0002A6D2, 0x0002A6D3, 0x0002A6D4, 0x0002A6D5,
93
94 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
95 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
96 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
97 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
98 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,0x0000
99 };
100
101 static const UChar src16[] = {
102 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
103 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
104 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
105 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
106 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
107 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
108 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
109 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
110 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
111 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
112 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
113 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
114 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
115 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
116 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
117 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
118 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
119 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
120 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
121
122 /* test non-BMP code points */
123 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
124 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
125 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
126 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
127 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
128 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
129 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
130 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
131 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
132 0xD869, 0xDED5,
133
134 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
135 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
136 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
137 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
138 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,0x0000
139 };
140
141
142 static void Test_strToUTF32(void){
143 UErrorCode err = U_ZERO_ERROR;
144 UChar32 u32Target[400];
145 int32_t u32DestLen;
146 int i= 0;
147
148 /* first with length */
149 u32DestLen = -2;
150 u_strToUTF32(u32Target, 0, &u32DestLen, src16, UPRV_LENGTHOF(src16),&err);
151 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != UPRV_LENGTHOF(src32)) {
152 log_err("u_strToUTF32(preflight with length): "
153 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
154 (long)u32DestLen, (long)UPRV_LENGTHOF(src32), u_errorName(err));
155 return;
156 }
157 err = U_ZERO_ERROR;
158 u32DestLen = -2;
159 u_strToUTF32(u32Target, UPRV_LENGTHOF(src32)+1, &u32DestLen, src16, UPRV_LENGTHOF(src16),&err);
160 if(err != U_ZERO_ERROR || u32DestLen != UPRV_LENGTHOF(src32)) {
161 log_err("u_strToUTF32(with length): "
162 "length %ld != %ld and %s != U_ZERO_ERROR\n",
163 (long)u32DestLen, (long)UPRV_LENGTHOF(src32), u_errorName(err));
164 return;
165 }
166 /*for(i=0; i< u32DestLen; i++){
167 printf("0x%08X, ",uTarget[i]);
168 if(i%10==0){
169 printf("\n");
170 }
171 }*/
172 for(i=0; i< UPRV_LENGTHOF(src32); i++){
173 if(u32Target[i] != src32[i]){
174 log_verbose("u_strToUTF32(with length) failed expected: %04X got: %04X at index: %i \n", src32[i], u32Target[i],i);
175 }
176 }
177 if(u32Target[i] != 0){
178 log_verbose("u_strToUTF32(with length) failed expected: %04X got: %04X at index: %i \n", 0, u32Target[i],i);
179 }
180
181 /* now NUL-terminated */
182 u32DestLen = -2;
183 u_strToUTF32(NULL,0, &u32DestLen, src16, -1,&err);
184 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != UPRV_LENGTHOF(src32)-1) {
185 log_err("u_strToUTF32(preflight with NUL-termination): "
186 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
187 (long)u32DestLen, (long)UPRV_LENGTHOF(src32)-1, u_errorName(err));
188 return;
189 }
190 err = U_ZERO_ERROR;
191 u32DestLen = -2;
192 u_strToUTF32(u32Target, UPRV_LENGTHOF(src32), &u32DestLen, src16, -1,&err);
193 if(err != U_ZERO_ERROR || u32DestLen != UPRV_LENGTHOF(src32)-1) {
194 log_err("u_strToUTF32(with NUL-termination): "
195 "length %ld != %ld and %s != U_ZERO_ERROR\n",
196 (long)u32DestLen, (long)UPRV_LENGTHOF(src32)-1, u_errorName(err));
197 return;
198 }
199
200 for(i=0; i< UPRV_LENGTHOF(src32); i++){
201 if(u32Target[i] != src32[i]){
202 log_verbose("u_strToUTF32(NUL-termination) failed expected: %04X got: %04X \n", src32[i], u32Target[i]);
203 }
204 }
205 }
206
207 /* test unpaired surrogates */
208 static void Test_strToUTF32_surrogates() {
209 UErrorCode err = U_ZERO_ERROR;
210 UChar32 u32Target[400];
211 int32_t len16, u32DestLen;
212 int32_t numSubstitutions;
213 int i;
214
215 static const UChar surr16[] = { 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
216 static const UChar32 expected[] = { 0x5a, 0x50000, 0x7a, 0 };
217 static const UChar32 expected_FFFD[] = { 0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0 };
218 static const UChar32 expected_12345[] = { 0x41, 0x12345, 0x61, 0x12345, 0x5a, 0x50000, 0x7a, 0 };
219 len16 = UPRV_LENGTHOF(surr16);
220 for(i = 0; i < 4; ++i) {
221 err = U_ZERO_ERROR;
222 u_strToUTF32(u32Target, 0, &u32DestLen, surr16+i, len16-i, &err);
223 if(err != U_INVALID_CHAR_FOUND) {
224 log_err("u_strToUTF32(preflight surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
225 (long)i, u_errorName(err));
226 return;
227 }
228
229 err = U_ZERO_ERROR;
230 u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+i, len16-i, &err);
231 if(err != U_INVALID_CHAR_FOUND) {
232 log_err("u_strToUTF32(surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
233 (long)i, u_errorName(err));
234 return;
235 }
236
237 err = U_ZERO_ERROR;
238 u_strToUTF32(NULL, 0, &u32DestLen, surr16+i, -1, &err);
239 if(err != U_INVALID_CHAR_FOUND) {
240 log_err("u_strToUTF32(preflight surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
241 (long)i, u_errorName(err));
242 return;
243 }
244
245 err = U_ZERO_ERROR;
246 u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+i, -1, &err);
247 if(err != U_INVALID_CHAR_FOUND) {
248 log_err("u_strToUTF32(surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
249 (long)i, u_errorName(err));
250 return;
251 }
252 }
253
254 err = U_ZERO_ERROR;
255 u_strToUTF32(u32Target, 0, &u32DestLen, surr16+4, len16-4-1, &err);
256 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) {
257 log_err("u_strToUTF32(preflight surr16+4) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
258 u_errorName(err));
259 return;
260 }
261
262 err = U_ZERO_ERROR;
263 u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+4, len16-4-1, &err);
264 if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) {
265 log_err("u_strToUTF32(surr16+4) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
266 u_errorName(err));
267 return;
268 }
269
270 err = U_ZERO_ERROR;
271 u_strToUTF32(NULL, 0, &u32DestLen, surr16+4, -1, &err);
272 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) {
273 log_err("u_strToUTF32(preflight surr16+4/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
274 u_errorName(err));
275 return;
276 }
277
278 err = U_ZERO_ERROR;
279 u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+4, -1, &err);
280 if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) {
281 log_err("u_strToUTF32(surr16+4/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
282 u_errorName(err));
283 return;
284 }
285
286 /* with substitution character */
287 numSubstitutions = -1;
288 err = U_ZERO_ERROR;
289 u_strToUTF32WithSub(u32Target, 0, &u32DestLen, surr16, len16-1, 0xfffd, &numSubstitutions, &err);
290 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 7 || numSubstitutions != 2) {
291 log_err("u_strToUTF32WithSub(preflight surr16) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
292 u_errorName(err));
293 return;
294 }
295
296 err = U_ZERO_ERROR;
297 u_strToUTF32WithSub(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16, len16-1, 0xfffd, &numSubstitutions, &err);
298 if(err != U_ZERO_ERROR || u32DestLen != 7 || numSubstitutions != 2 || uprv_memcmp(u32Target, expected_FFFD, 8*4)) {
299 log_err("u_strToUTF32WithSub(surr16) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
300 u_errorName(err));
301 return;
302 }
303
304 err = U_ZERO_ERROR;
305 u_strToUTF32WithSub(NULL, 0, &u32DestLen, surr16, -1, 0x12345, &numSubstitutions, &err);
306 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 7 || numSubstitutions != 2) {
307 log_err("u_strToUTF32WithSub(preflight surr16/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
308 u_errorName(err));
309 return;
310 }
311
312 err = U_ZERO_ERROR;
313 u_strToUTF32WithSub(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16, -1, 0x12345, &numSubstitutions, &err);
314 if(err != U_ZERO_ERROR || u32DestLen != 7 || numSubstitutions != 2 || uprv_memcmp(u32Target, expected_12345, 8*4)) {
315 log_err("u_strToUTF32WithSub(surr16/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
316 u_errorName(err));
317 return;
318 }
319 }
320
321 static void Test_strFromUTF32(void){
322 UErrorCode err = U_ZERO_ERROR;
323 UChar uTarget[400];
324 int32_t uDestLen;
325 int i= 0;
326
327 /* first with length */
328 uDestLen = -2;
329 u_strFromUTF32(uTarget,0,&uDestLen,src32,UPRV_LENGTHOF(src32),&err);
330 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != UPRV_LENGTHOF(src16)) {
331 log_err("u_strFromUTF32(preflight with length): "
332 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
333 (long)uDestLen, (long)UPRV_LENGTHOF(src16), u_errorName(err));
334 return;
335 }
336 err = U_ZERO_ERROR;
337 uDestLen = -2;
338 u_strFromUTF32(uTarget, UPRV_LENGTHOF(src16)+1,&uDestLen,src32,UPRV_LENGTHOF(src32),&err);
339 if(err != U_ZERO_ERROR || uDestLen != UPRV_LENGTHOF(src16)) {
340 log_err("u_strFromUTF32(with length): "
341 "length %ld != %ld and %s != U_ZERO_ERROR\n",
342 (long)uDestLen, (long)UPRV_LENGTHOF(src16), u_errorName(err));
343 return;
344 }
345 /*for(i=0; i< uDestLen; i++){
346 printf("0x%04X, ",uTarget[i]);
347 if(i%10==0){
348 printf("\n");
349 }
350 }*/
351
352 for(i=0; i< uDestLen; i++){
353 if(uTarget[i] != src16[i]){
354 log_verbose("u_strFromUTF32(with length) failed expected: %04X got: %04X at index: %i \n", src16[i] ,uTarget[i],i);
355 }
356 }
357 if(uTarget[i] != 0){
358 log_verbose("u_strFromUTF32(with length) failed expected: %04X got: %04X at index: %i \n", 0,uTarget[i],i);
359 }
360
361 /* now NUL-terminated */
362 uDestLen = -2;
363 u_strFromUTF32(NULL,0,&uDestLen,src32,-1,&err);
364 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != UPRV_LENGTHOF(src16)-1) {
365 log_err("u_strFromUTF32(preflight with NUL-termination): "
366 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
367 (long)uDestLen, (long)UPRV_LENGTHOF(src16)-1, u_errorName(err));
368 return;
369 }
370 err = U_ZERO_ERROR;
371 uDestLen = -2;
372 u_strFromUTF32(uTarget, UPRV_LENGTHOF(src16),&uDestLen,src32,-1,&err);
373 if(err != U_ZERO_ERROR || uDestLen != UPRV_LENGTHOF(src16)-1) {
374 log_err("u_strFromUTF32(with NUL-termination): "
375 "length %ld != %ld and %s != U_ZERO_ERROR\n",
376 (long)uDestLen, (long)UPRV_LENGTHOF(src16)-1, u_errorName(err));
377 return;
378 }
379
380 for(i=0; i< uDestLen; i++){
381 if(uTarget[i] != src16[i]){
382 log_verbose("u_strFromUTF32(with NUL-termination) failed expected: %04X got: %04X \n", src16[i] ,uTarget[i]);
383 }
384 }
385 }
386
387 /* test surrogate code points */
388 static void Test_strFromUTF32_surrogates() {
389 UErrorCode err = U_ZERO_ERROR;
390 UChar uTarget[400];
391 int32_t len32, uDestLen;
392 int32_t numSubstitutions;
393 int i;
394
395 static const UChar32 surr32[] = { 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a, 0 };
396 static const UChar expected[] = { 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
397 static const UChar expected_FFFD[] = { 0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
398 static const UChar expected_12345[] = { 0x41, 0xd808, 0xdf45, 0x61, 0xd808, 0xdf45, 0xd808, 0xdf45, 0xd808, 0xdf45,
399 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
400 len32 = UPRV_LENGTHOF(surr32);
401 for(i = 0; i < 6; ++i) {
402 err = U_ZERO_ERROR;
403 u_strFromUTF32(uTarget, 0, &uDestLen, surr32+i, len32-i, &err);
404 if(err != U_INVALID_CHAR_FOUND) {
405 log_err("u_strFromUTF32(preflight surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
406 (long)i, u_errorName(err));
407 return;
408 }
409
410 err = U_ZERO_ERROR;
411 u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+i, len32-i, &err);
412 if(err != U_INVALID_CHAR_FOUND) {
413 log_err("u_strFromUTF32(surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
414 (long)i, u_errorName(err));
415 return;
416 }
417
418 err = U_ZERO_ERROR;
419 u_strFromUTF32(NULL, 0, &uDestLen, surr32+i, -1, &err);
420 if(err != U_INVALID_CHAR_FOUND) {
421 log_err("u_strFromUTF32(preflight surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
422 (long)i, u_errorName(err));
423 return;
424 }
425
426 err = U_ZERO_ERROR;
427 u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+i, -1, &err);
428 if(err != U_INVALID_CHAR_FOUND) {
429 log_err("u_strFromUTF32(surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
430 (long)i, u_errorName(err));
431 return;
432 }
433 }
434
435 err = U_ZERO_ERROR;
436 u_strFromUTF32(uTarget, 0, &uDestLen, surr32+6, len32-6-1, &err);
437 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) {
438 log_err("u_strFromUTF32(preflight surr32+6) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
439 u_errorName(err));
440 return;
441 }
442
443 err = U_ZERO_ERROR;
444 u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+6, len32-6-1, &err);
445 if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) {
446 log_err("u_strFromUTF32(surr32+6) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
447 u_errorName(err));
448 return;
449 }
450
451 err = U_ZERO_ERROR;
452 u_strFromUTF32(NULL, 0, &uDestLen, surr32+6, -1, &err);
453 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) {
454 log_err("u_strFromUTF32(preflight surr32+6/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
455 u_errorName(err));
456 return;
457 }
458
459 err = U_ZERO_ERROR;
460 u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+6, -1, &err);
461 if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) {
462 log_err("u_strFromUTF32(surr32+6/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
463 u_errorName(err));
464 return;
465 }
466
467 /* with substitution character */
468 numSubstitutions = -1;
469 err = U_ZERO_ERROR;
470 u_strFromUTF32WithSub(uTarget, 0, &uDestLen, surr32, len32-1, 0xfffd, &numSubstitutions, &err);
471 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 10 || numSubstitutions != 4) {
472 log_err("u_strFromUTF32WithSub(preflight surr32) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
473 u_errorName(err));
474 return;
475 }
476
477 err = U_ZERO_ERROR;
478 u_strFromUTF32WithSub(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32, len32-1, 0xfffd, &numSubstitutions, &err);
479 if(err != U_ZERO_ERROR || uDestLen != 10 || numSubstitutions != 4 || u_memcmp(uTarget, expected_FFFD, 11)) {
480 log_err("u_strFromUTF32WithSub(surr32) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
481 u_errorName(err));
482 return;
483 }
484
485 err = U_ZERO_ERROR;
486 u_strFromUTF32WithSub(NULL, 0, &uDestLen, surr32, -1, 0x12345, &numSubstitutions, &err);
487 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 14 || numSubstitutions != 4) {
488 log_err("u_strFromUTF32WithSub(preflight surr32/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
489 u_errorName(err));
490 return;
491 }
492
493 err = U_ZERO_ERROR;
494 u_strFromUTF32WithSub(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32, -1, 0x12345, &numSubstitutions, &err);
495 if(err != U_ZERO_ERROR || uDestLen != 14 || numSubstitutions != 4 || u_memcmp(uTarget, expected_12345, 15)) {
496 log_err("u_strFromUTF32WithSub(surr32/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
497 u_errorName(err));
498 return;
499 }
500 }
501
502 static void Test_UChar_UTF8_API(void){
503
504 UErrorCode err = U_ZERO_ERROR;
505 UChar uTemp[1];
506 char u8Temp[1];
507 UChar* uTarget=uTemp;
508 const char* u8Src;
509 int32_t u8SrcLen = 0;
510 int32_t uTargetLength = 0;
511 int32_t uDestLen=0;
512 const UChar* uSrc = src16;
513 int32_t uSrcLen = sizeof(src16)/2;
514 char* u8Target = u8Temp;
515 int32_t u8TargetLength =0;
516 int32_t u8DestLen =0;
517 UBool failed = FALSE;
518 int i= 0;
519 int32_t numSubstitutions;
520
521 {
522 /* preflight */
523 u8Temp[0] = 0x12;
524 u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
525 if(err == U_BUFFER_OVERFLOW_ERROR && u8Temp[0] == 0x12){
526 err = U_ZERO_ERROR;
527 u8Target = (char*) malloc (sizeof(uint8_t) * (u8DestLen+1));
528 u8TargetLength = u8DestLen;
529
530 u8Target[u8TargetLength] = (char)0xfe;
531 u8DestLen = -1;
532 u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
533 if(U_FAILURE(err) || u8DestLen != u8TargetLength || u8Target[u8TargetLength] != (char)0xfe){
534 log_err("u_strToUTF8 failed after preflight. Error: %s\n", u_errorName(err));
535 return;
536 }
537
538 }
539 else {
540 log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR");
541 }
542 failed = FALSE;
543 /*for(i=0; i< u8DestLen; i++){
544 printf("0x%04X, ",u8Target[i]);
545 if(i%10==0){
546 printf("\n");
547 }
548 }*/
549 /*for(i=0; i< u8DestLen; i++){
550 if(u8Target[i] != src8[i]){
551 log_verbose("u_strToUTF8() failed expected: %04X got: %04X \n", src8[i], u8Target[i]);
552 failed =TRUE;
553 }
554 }
555 if(failed){
556 log_err("u_strToUTF8() failed \n");
557 }*/
558 u8Src = u8Target;
559 u8SrcLen = u8DestLen;
560
561 /* preflight */
562 uTemp[0] = 0x1234;
563 u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
564 if(err == U_BUFFER_OVERFLOW_ERROR && uTemp[0] == 0x1234){
565 err = U_ZERO_ERROR;
566 uTarget = (UChar*) malloc( sizeof(UChar) * (uDestLen+1));
567 uTargetLength = uDestLen;
568
569 uTarget[uTargetLength] = 0xfff0;
570 uDestLen = -1;
571 u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
572 }
573 else {
574 log_err("error: u_strFromUTF8(preflight) should have gotten U_BUFFER_OVERFLOW_ERROR\n");
575 }
576 /*for(i=0; i< uDestLen; i++){
577 printf("0x%04X, ",uTarget[i]);
578 if(i%10==0){
579 printf("\n");
580 }
581 }*/
582
583 if(U_FAILURE(err) || uDestLen != uTargetLength || uTarget[uTargetLength] != 0xfff0) {
584 failed = TRUE;
585 }
586 for(i=0; i< uSrcLen; i++){
587 if(uTarget[i] != src16[i]){
588 log_verbose("u_strFromUTF8() failed expected: \\u%04X got: \\u%04X at index: %i \n", src16[i] ,uTarget[i],i);
589 failed =TRUE;
590 }
591 }
592 if(failed){
593 log_err("error: u_strFromUTF8(after preflighting) failed\n");
594 }
595
596 free(u8Target);
597 free(uTarget);
598 }
599 {
600 u8SrcLen = -1;
601 uTargetLength = 0;
602 uSrcLen =-1;
603 u8TargetLength=0;
604 failed = FALSE;
605 /* preflight */
606 u_strToUTF8(NULL,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
607 if(err == U_BUFFER_OVERFLOW_ERROR){
608 err = U_ZERO_ERROR;
609 u8Target = (char*) malloc (sizeof(uint8_t) * (u8DestLen+1));
610 u8TargetLength = u8DestLen;
611
612 u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
613
614 }
615 else {
616 log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR");
617 }
618 failed = FALSE;
619 /*for(i=0; i< u8DestLen; i++){
620 printf("0x%04X, ",u8Target[i]);
621 if(i%10==0){
622 printf("\n");
623 }
624 }*/
625 /*for(i=0; i< u8DestLen; i++){
626 if(u8Target[i] != src8[i]){
627 log_verbose("u_strToUTF8() failed expected: %04X got: %04X \n", src8[i], u8Target[i]);
628 failed =TRUE;
629 }
630 }
631 if(failed){
632 log_err("u_strToUTF8() failed \n");
633 }*/
634 u8Src = u8Target;
635 u8SrcLen = u8DestLen;
636
637 /* preflight */
638 u_strFromUTF8(NULL,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
639 if(err == U_BUFFER_OVERFLOW_ERROR){
640 err = U_ZERO_ERROR;
641 uTarget = (UChar*) malloc( sizeof(UChar) * (uDestLen+1));
642 uTargetLength = uDestLen;
643
644 u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
645 }
646 else {
647 log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR");
648 }
649 /*for(i=0; i< uDestLen; i++){
650 printf("0x%04X, ",uTarget[i]);
651 if(i%10==0){
652 printf("\n");
653 }
654 }*/
655
656 for(i=0; i< uSrcLen; i++){
657 if(uTarget[i] != src16[i]){
658 log_verbose("u_strFromUTF8() failed expected: \\u%04X got: \\u%04X at index: %i \n", src16[i] ,uTarget[i],i);
659 failed =TRUE;
660 }
661 }
662 if(failed){
663 log_err("u_strToUTF8() failed \n");
664 }
665
666 free(u8Target);
667 free(uTarget);
668 }
669
670 /* test UTF-8 with single surrogates - illegal in Unicode 3.2 */
671 {
672 static const UChar
673 withLead16[]={ 0x1800, 0xd89a, 0x0061 },
674 withTrail16[]={ 0x1800, 0xdcba, 0x0061, 0 },
675 withTrail16SubFFFD[]={ 0x1800, 0xfffd, 0x0061, 0 }, /* sub==U+FFFD */
676 withTrail16Sub50005[]={ 0x1800, 0xd900, 0xdc05, 0x0061, 0 }; /* sub==U+50005 */
677 static const uint8_t
678 withLead8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xa2, 0x9a, 0x61 },
679 withTrail8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xb2, 0xba, 0x61, 0 },
680 withTrail8Sub1A[]={ 0xe1, 0xa0, 0x80, 0x1a, 0x61, 0 }, /* sub==U+001A */
681 withTrail8SubFFFD[]={ 0xe1, 0xa0, 0x80, 0xef, 0xbf, 0xbd, 0x61, 0 }; /* sub==U+FFFD */
682 UChar out16[10];
683 char out8[10];
684
685 if(
686 (err=U_ZERO_ERROR, u_strToUTF8(out8, UPRV_LENGTHOF(out8), NULL, withLead16, UPRV_LENGTHOF(withLead16), &err), err!=U_INVALID_CHAR_FOUND) ||
687 (err=U_ZERO_ERROR, u_strToUTF8(out8, UPRV_LENGTHOF(out8), NULL, withTrail16, -1, &err), err!=U_INVALID_CHAR_FOUND) ||
688 (err=U_ZERO_ERROR, u_strFromUTF8(out16, UPRV_LENGTHOF(out16), NULL, (const char *)withLead8, UPRV_LENGTHOF(withLead8), &err), err!=U_INVALID_CHAR_FOUND) ||
689 (err=U_ZERO_ERROR, u_strFromUTF8(out16, UPRV_LENGTHOF(out16), NULL, (const char *)withTrail8, -1, &err), err!=U_INVALID_CHAR_FOUND)
690 ) {
691 log_err("error: u_strTo/FromUTF8(string with single surrogate) fails to report error\n");
692 }
693
694 /* test error handling with substitution characters */
695
696 /* from UTF-8 with length */
697 err=U_ZERO_ERROR;
698 numSubstitutions=-1;
699 out16[0]=0x55aa;
700 uDestLen=0;
701 u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
702 (const char *)withTrail8, uprv_strlen((const char *)withTrail8),
703 0x50005, &numSubstitutions,
704 &err);
705 if(U_FAILURE(err) || uDestLen!=u_strlen(withTrail16Sub50005) ||
706 0!=u_memcmp(withTrail16Sub50005, out16, uDestLen+1) ||
707 numSubstitutions!=1) {
708 log_err("error: u_strFromUTF8WithSub(length) failed\n");
709 }
710
711 /* from UTF-8 with NUL termination */
712 err=U_ZERO_ERROR;
713 numSubstitutions=-1;
714 out16[0]=0x55aa;
715 uDestLen=0;
716 u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
717 (const char *)withTrail8, -1,
718 0xfffd, &numSubstitutions,
719 &err);
720 if(U_FAILURE(err) || uDestLen!=u_strlen(withTrail16SubFFFD) ||
721 0!=u_memcmp(withTrail16SubFFFD, out16, uDestLen+1) ||
722 numSubstitutions!=1) {
723 log_err("error: u_strFromUTF8WithSub(NUL termination) failed\n");
724 }
725
726 /* preflight from UTF-8 with NUL termination */
727 err=U_ZERO_ERROR;
728 numSubstitutions=-1;
729 out16[0]=0x55aa;
730 uDestLen=0;
731 u_strFromUTF8WithSub(out16, 1, &uDestLen,
732 (const char *)withTrail8, -1,
733 0x50005, &numSubstitutions,
734 &err);
735 if(err!=U_BUFFER_OVERFLOW_ERROR || uDestLen!=u_strlen(withTrail16Sub50005) || numSubstitutions!=1) {
736 log_err("error: u_strFromUTF8WithSub(preflight/NUL termination) failed\n");
737 }
738
739 /* to UTF-8 with length */
740 err=U_ZERO_ERROR;
741 numSubstitutions=-1;
742 out8[0]=(char)0xf5;
743 u8DestLen=0;
744 u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
745 withTrail16, u_strlen(withTrail16),
746 0xfffd, &numSubstitutions,
747 &err);
748 if(U_FAILURE(err) || u8DestLen!=uprv_strlen((const char *)withTrail8SubFFFD) ||
749 0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen+1) ||
750 numSubstitutions!=1) {
751 log_err("error: u_strToUTF8WithSub(length) failed\n");
752 }
753
754 /* to UTF-8 with NUL termination */
755 err=U_ZERO_ERROR;
756 numSubstitutions=-1;
757 out8[0]=(char)0xf5;
758 u8DestLen=0;
759 u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
760 withTrail16, -1,
761 0x1a, &numSubstitutions,
762 &err);
763 if(U_FAILURE(err) || u8DestLen!=uprv_strlen((const char *)withTrail8Sub1A) ||
764 0!=uprv_memcmp((const char *)withTrail8Sub1A, out8, u8DestLen+1) ||
765 numSubstitutions!=1) {
766 log_err("error: u_strToUTF8WithSub(NUL termination) failed\n");
767 }
768
769 /* preflight to UTF-8 with NUL termination */
770 err=U_ZERO_ERROR;
771 numSubstitutions=-1;
772 out8[0]=(char)0xf5;
773 u8DestLen=0;
774 u_strToUTF8WithSub(out8, 1, &u8DestLen,
775 withTrail16, -1,
776 0xfffd, &numSubstitutions,
777 &err);
778 if(err!=U_BUFFER_OVERFLOW_ERROR || u8DestLen!=uprv_strlen((const char *)withTrail8SubFFFD) ||
779 numSubstitutions!=1) {
780 log_err("error: u_strToUTF8WithSub(preflight/NUL termination) failed\n");
781 }
782
783 /* test that numSubstitutions==0 if there are no substitutions */
784
785 /* from UTF-8 with length (just first 3 bytes which are valid) */
786 err=U_ZERO_ERROR;
787 numSubstitutions=-1;
788 out16[0]=0x55aa;
789 uDestLen=0;
790 u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
791 (const char *)withTrail8, 3,
792 0x50005, &numSubstitutions,
793 &err);
794 if(U_FAILURE(err) || uDestLen!=1 ||
795 0!=u_memcmp(withTrail16Sub50005, out16, uDestLen) ||
796 numSubstitutions!=0) {
797 log_err("error: u_strFromUTF8WithSub(no subs) failed\n");
798 }
799
800 /* to UTF-8 with length (just first UChar which is valid) */
801 err=U_ZERO_ERROR;
802 numSubstitutions=-1;
803 out8[0]=(char)0xf5;
804 u8DestLen=0;
805 u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
806 withTrail16, 1,
807 0xfffd, &numSubstitutions,
808 &err);
809 if(U_FAILURE(err) || u8DestLen!=3 ||
810 0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen) ||
811 numSubstitutions!=0) {
812 log_err("error: u_strToUTF8WithSub(no subs) failed\n");
813 }
814
815 /* test that numSubstitutions==0 if subchar==U_SENTINEL (no subchar) */
816
817 /* from UTF-8 with length (just first 3 bytes which are valid) */
818 err=U_ZERO_ERROR;
819 numSubstitutions=-1;
820 out16[0]=0x55aa;
821 uDestLen=0;
822 u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
823 (const char *)withTrail8, 3,
824 U_SENTINEL, &numSubstitutions,
825 &err);
826 if(U_FAILURE(err) || uDestLen!=1 ||
827 0!=u_memcmp(withTrail16Sub50005, out16, uDestLen) ||
828 numSubstitutions!=0) {
829 log_err("error: u_strFromUTF8WithSub(no subchar) failed\n");
830 }
831
832 /* to UTF-8 with length (just first UChar which is valid) */
833 err=U_ZERO_ERROR;
834 numSubstitutions=-1;
835 out8[0]=(char)0xf5;
836 u8DestLen=0;
837 u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
838 withTrail16, 1,
839 U_SENTINEL, &numSubstitutions,
840 &err);
841 if(U_FAILURE(err) || u8DestLen!=3 ||
842 0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen) ||
843 numSubstitutions!=0) {
844 log_err("error: u_strToUTF8WithSub(no subchar) failed\n");
845 }
846 }
847 {
848 /*
849 * Test with an illegal lead byte that would be followed by more than 3 trail bytes.
850 * See ticket #10371.
851 */
852 static const char src[1]={ (char)0xf8 };
853 UChar out16[10];
854 err=U_ZERO_ERROR;
855 u_strFromUTF8(out16, UPRV_LENGTHOF(out16), NULL, src, 1, &err);
856 if(err!=U_INVALID_CHAR_FOUND) {
857 log_err("error: u_strFromUTF8(5-byte lead byte) failed\n");
858 }
859 }
860 }
861
862 /* compare if two strings are equal, but match 0xfffd in the second string with anything in the first */
863 static UBool
864 equalAnyFFFD(const UChar *s, const UChar *t, int32_t length) {
865 UChar c1, c2;
866
867 while(length>0) {
868 c1=*s++;
869 c2=*t++;
870 if(c1!=c2 && c2!=0xfffd) {
871 return FALSE;
872 }
873 --length;
874 }
875 return TRUE;
876 }
877
878 /* test u_strFromUTF8Lenient() */
879 static void
880 Test_FromUTF8(void) {
881 /*
882 * Test case from icu-support list 20071130 "u_strFromUTF8() returns U_INVALID_CHAR_FOUND(10)"
883 */
884 static const uint8_t bytes[]={ 0xe0, 0xa5, 0x9c, 0 };
885 UChar dest[64];
886 UChar *destPointer;
887 int32_t destLength;
888 UErrorCode errorCode;
889
890 /* 3 bytes input, one UChar output (U+095C) */
891 errorCode=U_ZERO_ERROR;
892 destLength=-99;
893 destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, 3, &errorCode);
894 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=1) {
895 log_err("error: u_strFromUTF8(preflight srcLength=3) fails: destLength=%ld - %s\n",
896 (long)destLength, u_errorName(errorCode));
897 }
898
899 /* 4 bytes input, two UChars output (U+095C U+0000) */
900 errorCode=U_ZERO_ERROR;
901 destLength=-99;
902 destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, 4, &errorCode);
903 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=2) {
904 log_err("error: u_strFromUTF8(preflight srcLength=4) fails: destLength=%ld - %s\n",
905 (long)destLength, u_errorName(errorCode));
906 }
907
908 /* NUL-terminated 3 bytes input, one UChar output (U+095C) */
909 errorCode=U_ZERO_ERROR;
910 destLength=-99;
911 destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, -1, &errorCode);
912 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=1) {
913 log_err("error: u_strFromUTF8(preflight srcLength=-1) fails: destLength=%ld - %s\n",
914 (long)destLength, u_errorName(errorCode));
915 }
916
917 /* 3 bytes input, one UChar output (U+095C), transform not just preflight */
918 errorCode=U_ZERO_ERROR;
919 dest[0]=dest[1]=99;
920 destLength=-99;
921 destPointer=u_strFromUTF8(dest, UPRV_LENGTHOF(dest), &destLength, (const char *)bytes, 3, &errorCode);
922 if(U_FAILURE(errorCode) || destPointer!=dest || destLength!=1 || dest[0]!=0x95c || dest[1]!=0) {
923 log_err("error: u_strFromUTF8(transform srcLength=3) fails: destLength=%ld - %s\n",
924 (long)destLength, u_errorName(errorCode));
925 }
926 }
927
928 /* test u_strFromUTF8Lenient() */
929 static void
930 Test_FromUTF8Lenient(void) {
931 /*
932 * Multiple input strings, each NUL-terminated.
933 * Terminate with a string starting with 0xff.
934 */
935 static const uint8_t bytes[]={
936 /* well-formed UTF-8 */
937 0x61, 0xc3, 0x9f, 0xe0, 0xa0, 0x80, 0xf0, 0xa0, 0x80, 0x80,
938 0x62, 0xc3, 0xa0, 0xe0, 0xa0, 0x81, 0xf0, 0xa0, 0x80, 0x81, 0,
939
940 /* various malformed sequences */
941 0xc3, 0xc3, 0x9f, 0xc3, 0xa0, 0xe0, 0x80, 0x8a, 0xf0, 0x41, 0x42, 0x43, 0,
942
943 /* truncated input */
944 0xc3, 0,
945 0xe0, 0,
946 0xe0, 0xa0, 0,
947 0xf0, 0,
948 0xf0, 0x90, 0,
949 0xf0, 0x90, 0x80, 0,
950
951 /* non-ASCII characters in the last few bytes */
952 0x61, 0xc3, 0x9f, 0xe0, 0xa0, 0x80, 0,
953 0x61, 0xe0, 0xa0, 0x80, 0xc3, 0x9f, 0,
954
955 /* empty string */
956 0,
957
958 /* finish */
959 0xff, 0
960 };
961
962 /* Multiple output strings, each NUL-terminated. 0xfffd matches anything. */
963 static const UChar uchars[]={
964 0x61, 0xdf, 0x800, 0xd840, 0xdc00,
965 0x62, 0xe0, 0x801, 0xd840, 0xdc01, 0,
966
967 0xfffd, 0x9f, 0xe0, 0xa, 0xfffd, 0xfffd, 0,
968
969 0xfffd, 0,
970 0xfffd, 0,
971 0xfffd, 0,
972 0xfffd, 0,
973 0xfffd, 0,
974 0xfffd, 0,
975
976 0x61, 0xdf, 0x800, 0,
977 0x61, 0x800, 0xdf, 0,
978
979 0,
980
981 0
982 };
983
984 UChar dest[64];
985 const char *pb;
986 const UChar *pu, *pDest;
987 int32_t srcLength, destLength0, destLength;
988 int number;
989 UErrorCode errorCode;
990
991 /* verify checking for some illegal arguments */
992 dest[0]=0x1234;
993 destLength=-1;
994 errorCode=U_ZERO_ERROR;
995 pDest=u_strFromUTF8Lenient(dest, 1, &destLength, NULL, -1, &errorCode);
996 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0x1234) {
997 log_err("u_strFromUTF8Lenient(src=NULL) failed\n");
998 }
999
1000 dest[0]=0x1234;
1001 destLength=-1;
1002 errorCode=U_ZERO_ERROR;
1003 pDest=u_strFromUTF8Lenient(NULL, 1, &destLength, (const char *)bytes, -1, &errorCode);
1004 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1005 log_err("u_strFromUTF8Lenient(dest=NULL[1]) failed\n");
1006 }
1007
1008 dest[0]=0x1234;
1009 destLength=-1;
1010 errorCode=U_MEMORY_ALLOCATION_ERROR;
1011 pDest=u_strFromUTF8Lenient(dest, 1, &destLength, (const char *)bytes, -1, &errorCode);
1012 if(errorCode!=U_MEMORY_ALLOCATION_ERROR || dest[0]!=0x1234) {
1013 log_err("u_strFromUTF8Lenient(U_MEMORY_ALLOCATION_ERROR) failed\n");
1014 }
1015
1016 dest[0]=0x1234;
1017 destLength=-1;
1018 errorCode=U_MEMORY_ALLOCATION_ERROR;
1019 pDest=u_strFromUTF8Lenient(dest, 1, &destLength, (const char *)bytes, -1, NULL);
1020 if(dest[0]!=0x1234) {
1021 log_err("u_strFromUTF8Lenient(pErrorCode=NULL) failed\n");
1022 }
1023
1024 /* test normal behavior */
1025 number=0; /* string number for log_err() */
1026
1027 for(pb=(const char *)bytes, pu=uchars;
1028 *pb!=(char)0xff;
1029 pb+=srcLength+1, pu+=destLength0+1, ++number
1030 ) {
1031 srcLength=uprv_strlen(pb);
1032 destLength0=u_strlen(pu);
1033
1034 /* preflighting with NUL-termination */
1035 dest[0]=0x1234;
1036 destLength=-1;
1037 errorCode=U_ZERO_ERROR;
1038 pDest=u_strFromUTF8Lenient(NULL, 0, &destLength, pb, -1, &errorCode);
1039 if (errorCode!= (destLength0==0 ? U_STRING_NOT_TERMINATED_WARNING : U_BUFFER_OVERFLOW_ERROR) ||
1040 pDest!=NULL || dest[0]!=0x1234 || destLength!=destLength0
1041 ) {
1042 log_err("u_strFromUTF8Lenient(%d preflighting with NUL-termination) failed\n", number);
1043 }
1044
1045 /* preflighting/some capacity with NUL-termination */
1046 if(srcLength>0) {
1047 dest[destLength0-1]=0x1234;
1048 destLength=-1;
1049 errorCode=U_ZERO_ERROR;
1050 pDest=u_strFromUTF8Lenient(dest, destLength0-1, &destLength, pb, -1, &errorCode);
1051 if (errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1052 dest[destLength0-1]!=0x1234 || destLength!=destLength0
1053 ) {
1054 log_err("u_strFromUTF8Lenient(%d preflighting/some capacity with NUL-termination) failed\n", number);
1055 }
1056 }
1057
1058 /* conversion with NUL-termination, much capacity */
1059 dest[0]=dest[destLength0]=0x1234;
1060 destLength=-1;
1061 errorCode=U_ZERO_ERROR;
1062 pDest=u_strFromUTF8Lenient(dest, UPRV_LENGTHOF(dest), &destLength, pb, -1, &errorCode);
1063 if (errorCode!=U_ZERO_ERROR ||
1064 pDest!=dest || dest[destLength0]!=0 ||
1065 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1066 ) {
1067 log_err("u_strFromUTF8Lenient(%d conversion with NUL-termination, much capacity) failed\n", number);
1068 }
1069
1070 /* conversion with NUL-termination, exact capacity */
1071 dest[0]=dest[destLength0]=0x1234;
1072 destLength=-1;
1073 errorCode=U_ZERO_ERROR;
1074 pDest=u_strFromUTF8Lenient(dest, destLength0, &destLength, pb, -1, &errorCode);
1075 if (errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
1076 pDest!=dest || dest[destLength0]!=0x1234 ||
1077 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1078 ) {
1079 log_err("u_strFromUTF8Lenient(%d conversion with NUL-termination, exact capacity) failed\n", number);
1080 }
1081
1082 /* preflighting with length */
1083 dest[0]=0x1234;
1084 destLength=-1;
1085 errorCode=U_ZERO_ERROR;
1086 pDest=u_strFromUTF8Lenient(NULL, 0, &destLength, pb, srcLength, &errorCode);
1087 if (errorCode!= (destLength0==0 ? U_STRING_NOT_TERMINATED_WARNING : U_BUFFER_OVERFLOW_ERROR) ||
1088 pDest!=NULL || dest[0]!=0x1234 || destLength!=srcLength
1089 ) {
1090 log_err("u_strFromUTF8Lenient(%d preflighting with length) failed\n", number);
1091 }
1092
1093 /* preflighting/some capacity with length */
1094 if(srcLength>0) {
1095 dest[srcLength-1]=0x1234;
1096 destLength=-1;
1097 errorCode=U_ZERO_ERROR;
1098 pDest=u_strFromUTF8Lenient(dest, srcLength-1, &destLength, pb, srcLength, &errorCode);
1099 if (errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1100 dest[srcLength-1]!=0x1234 || destLength!=srcLength
1101 ) {
1102 log_err("u_strFromUTF8Lenient(%d preflighting/some capacity with length) failed\n", number);
1103 }
1104 }
1105
1106 /* conversion with length, much capacity */
1107 dest[0]=dest[destLength0]=0x1234;
1108 destLength=-1;
1109 errorCode=U_ZERO_ERROR;
1110 pDest=u_strFromUTF8Lenient(dest, UPRV_LENGTHOF(dest), &destLength, pb, srcLength, &errorCode);
1111 if (errorCode!=U_ZERO_ERROR ||
1112 pDest!=dest || dest[destLength0]!=0 ||
1113 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1114 ) {
1115 log_err("u_strFromUTF8Lenient(%d conversion with length, much capacity) failed\n", number);
1116 }
1117
1118 /* conversion with length, srcLength capacity */
1119 dest[0]=dest[srcLength]=dest[destLength0]=0x1234;
1120 destLength=-1;
1121 errorCode=U_ZERO_ERROR;
1122 pDest=u_strFromUTF8Lenient(dest, srcLength, &destLength, pb, srcLength, &errorCode);
1123 if(srcLength==destLength0) {
1124 if (errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
1125 pDest!=dest || dest[destLength0]!=0x1234 ||
1126 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1127 ) {
1128 log_err("u_strFromUTF8Lenient(%d conversion with length, srcLength capacity/not terminated) failed\n", number);
1129 }
1130 } else {
1131 if (errorCode!=U_ZERO_ERROR ||
1132 pDest!=dest || dest[destLength0]!=0 ||
1133 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1134 ) {
1135 log_err("u_strFromUTF8Lenient(%d conversion with length, srcLength capacity/terminated) failed\n", number);
1136 }
1137 }
1138 }
1139 }
1140
1141 static const uint16_t src16j[] = {
1142 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
1143 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
1144 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
1145 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
1146 0x0000,
1147 /* Test only ASCII */
1148
1149 };
1150 static const uint16_t src16WithNulls[] = {
1151 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0000,
1152 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 0x0000,
1153 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0000,
1154 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 0x0000,
1155 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0000,
1156 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 0x0000,
1157 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0000,
1158 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 0x0000,
1159 /* test only ASCII */
1160 /*
1161 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD,
1162 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1163 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0, 0x00C1,
1164 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB,
1165 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5,
1166 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
1167 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9,
1168 0x0054, 0x0000 */
1169
1170 };
1171 static void Test_UChar_WCHART_API(void){
1172 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
1173 UErrorCode err = U_ZERO_ERROR;
1174 const UChar* uSrc = src16j;
1175 int32_t uSrcLen = sizeof(src16j)/2;
1176 wchar_t* wDest = NULL;
1177 int32_t wDestLen = 0;
1178 int32_t reqLen= 0 ;
1179 UBool failed = FALSE;
1180 UChar* uDest = NULL;
1181 int32_t uDestLen = 0;
1182 int i =0;
1183 {
1184 /* Bad UErrorCode arguments. Make sure that the API doesn't crash, and that Purify doesn't complain. */
1185 if (u_strFromWCS(NULL,0,NULL,NULL,0,NULL) != NULL) {
1186 log_err("u_strFromWCS() should return NULL with a bad argument\n");
1187 }
1188 if (u_strToWCS(NULL,0,NULL,NULL,0,NULL) != NULL) {
1189 log_err("u_strToWCS() should return NULL with a bad argument\n");
1190 }
1191
1192 /* NULL source & destination. */
1193 err = U_ZERO_ERROR;
1194 u_strFromWCS(NULL,0,NULL,NULL,0,&err);
1195 if (err != U_STRING_NOT_TERMINATED_WARNING) {
1196 log_err("u_strFromWCS(NULL, NULL) failed. Error: %s \n", u_errorName(err));
1197 }
1198 err = U_ZERO_ERROR;
1199 u_strToWCS(NULL,0,NULL,NULL,0,&err);
1200 if (err != U_STRING_NOT_TERMINATED_WARNING) {
1201 log_err("u_strToWCS(NULL, NULL) failed. Error: %s \n", u_errorName(err));
1202 }
1203 err = U_ZERO_ERROR;
1204
1205 /* pre-flight*/
1206 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1207
1208 if(err == U_BUFFER_OVERFLOW_ERROR){
1209 err=U_ZERO_ERROR;
1210 wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1211 wDestLen = reqLen+1;
1212 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1213 }
1214
1215 /* pre-flight */
1216 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1217
1218
1219 if(err == U_BUFFER_OVERFLOW_ERROR){
1220 err =U_ZERO_ERROR;
1221 uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1222 uDestLen = reqLen + 1;
1223 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1224 }else if(U_FAILURE(err)){
1225
1226 log_err("u_strFromWCS() failed. Error: %s \n", u_errorName(err));
1227 return;
1228 }
1229
1230 for(i=0; i< uSrcLen; i++){
1231 if(uDest[i] != src16j[i]){
1232 log_verbose("u_str*WCS() failed for unterminated string expected: \\u%04X got: \\u%04X at index: %i \n", src16j[i] ,uDest[i],i);
1233 failed =TRUE;
1234 }
1235 }
1236
1237 if(U_FAILURE(err)){
1238 failed = TRUE;
1239 }
1240 if(failed){
1241 log_err("u_strToWCS() failed \n");
1242 }
1243 free(wDest);
1244 free(uDest);
1245
1246
1247 /* test with embeded nulls */
1248 uSrc = src16WithNulls;
1249 uSrcLen = sizeof(src16WithNulls)/2;
1250 wDestLen =0;
1251 uDestLen =0;
1252 wDest = NULL;
1253 uDest = NULL;
1254 /* pre-flight*/
1255 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1256
1257 if(err == U_BUFFER_OVERFLOW_ERROR){
1258 err=U_ZERO_ERROR;
1259 wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1260 wDestLen = reqLen+1;
1261 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1262 }
1263
1264 /* pre-flight */
1265 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1266
1267 if(err == U_BUFFER_OVERFLOW_ERROR){
1268 err =U_ZERO_ERROR;
1269 uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1270 uDestLen = reqLen + 1;
1271 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1272 }
1273
1274 if(!U_FAILURE(err)) {
1275 for(i=0; i< uSrcLen; i++){
1276 if(uDest[i] != src16WithNulls[i]){
1277 log_verbose("u_str*WCS() failed for string with nulls expected: \\u%04X got: \\u%04X at index: %i \n", src16WithNulls[i] ,uDest[i],i);
1278 failed =TRUE;
1279 }
1280 }
1281 }
1282
1283 if(U_FAILURE(err)){
1284 failed = TRUE;
1285 }
1286 if(failed){
1287 log_err("u_strToWCS() failed \n");
1288 }
1289 free(wDest);
1290 free(uDest);
1291
1292 }
1293
1294 {
1295
1296 uSrc = src16j;
1297 uSrcLen = sizeof(src16j)/2;
1298 wDestLen =0;
1299 uDestLen =0;
1300 wDest = NULL;
1301 uDest = NULL;
1302 wDestLen = 0;
1303 /* pre-flight*/
1304 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&err);
1305
1306 if(err == U_BUFFER_OVERFLOW_ERROR){
1307 err=U_ZERO_ERROR;
1308 wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1309 wDestLen = reqLen+1;
1310 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&err);
1311 }
1312 uDestLen = 0;
1313 /* pre-flight */
1314 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&err);
1315
1316 if(err == U_BUFFER_OVERFLOW_ERROR){
1317 err =U_ZERO_ERROR;
1318 uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1319 uDestLen = reqLen + 1;
1320 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&err);
1321 }
1322
1323
1324 if(!U_FAILURE(err)) {
1325 for(i=0; i< uSrcLen; i++){
1326 if(uDest[i] != src16j[i]){
1327 log_verbose("u_str*WCS() failed for null terminated string expected: \\u%04X got: \\u%04X at index: %i \n", src16j[i] ,uDest[i],i);
1328 failed =TRUE;
1329 }
1330 }
1331 }
1332
1333 if(U_FAILURE(err)){
1334 failed = TRUE;
1335 }
1336 if(failed){
1337 log_err("u_strToWCS() failed \n");
1338 }
1339 free(wDest);
1340 free(uDest);
1341 }
1342
1343 /*
1344 * Test u_terminateWChars().
1345 * All u_terminateXYZ() use the same implementation macro;
1346 * we test this function to improve API coverage.
1347 */
1348 {
1349 wchar_t buffer[10];
1350
1351 err=U_ZERO_ERROR;
1352 buffer[3]=0x20ac;
1353 wDestLen=u_terminateWChars(buffer, UPRV_LENGTHOF(buffer), 3, &err);
1354 if(err!=U_ZERO_ERROR || wDestLen!=3 || buffer[3]!=0) {
1355 log_err("u_terminateWChars(buffer, all, 3, zero) failed: %s length %d [3]==U+%04x\n",
1356 u_errorName(err), wDestLen, buffer[3]);
1357 }
1358
1359 err=U_ZERO_ERROR;
1360 buffer[3]=0x20ac;
1361 wDestLen=u_terminateWChars(buffer, 3, 3, &err);
1362 if(err!=U_STRING_NOT_TERMINATED_WARNING || wDestLen!=3 || buffer[3]!=0x20ac) {
1363 log_err("u_terminateWChars(buffer, 3, 3, zero) failed: %s length %d [3]==U+%04x\n",
1364 u_errorName(err), wDestLen, buffer[3]);
1365 }
1366
1367 err=U_STRING_NOT_TERMINATED_WARNING;
1368 buffer[3]=0x20ac;
1369 wDestLen=u_terminateWChars(buffer, UPRV_LENGTHOF(buffer), 3, &err);
1370 if(err!=U_ZERO_ERROR || wDestLen!=3 || buffer[3]!=0) {
1371 log_err("u_terminateWChars(buffer, all, 3, not-terminated) failed: %s length %d [3]==U+%04x\n",
1372 u_errorName(err), wDestLen, buffer[3]);
1373 }
1374
1375 err=U_ZERO_ERROR;
1376 buffer[3]=0x20ac;
1377 wDestLen=u_terminateWChars(buffer, 2, 3, &err);
1378 if(err!=U_BUFFER_OVERFLOW_ERROR || wDestLen!=3 || buffer[3]!=0x20ac) {
1379 log_err("u_terminateWChars(buffer, 2, 3, zero) failed: %s length %d [3]==U+%04x\n",
1380 u_errorName(err), wDestLen, buffer[3]);
1381 }
1382 }
1383 #else
1384 log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32");
1385 #endif
1386 }
1387
1388 static void Test_widestrs()
1389 {
1390 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
1391 wchar_t ws[100];
1392 UChar rts[100];
1393 int32_t wcap = UPRV_LENGTHOF(ws);
1394 int32_t wl;
1395 int32_t rtcap = UPRV_LENGTHOF(rts);
1396 int32_t rtl;
1397 wchar_t *wcs;
1398 UChar *cp;
1399 const char *errname;
1400 UChar ustr[] = {'h', 'e', 'l', 'l', 'o', 0};
1401 int32_t ul = UPRV_LENGTHOF(ustr) -1;
1402 char astr[100];
1403
1404 UErrorCode err;
1405
1406 err = U_ZERO_ERROR;
1407 wcs = u_strToWCS(ws, wcap, &wl, ustr, ul, &err);
1408 if (U_FAILURE(err)) {
1409 errname = u_errorName(err);
1410 log_err("test_widestrs: u_strToWCS error: %s!\n",errname);
1411 }
1412 if(ul!=wl){
1413 log_err("u_strToWCS: ustr = %s, ul = %d, ws = %S, wl = %d!\n", u_austrcpy(astr, ustr), ul, ws, wl);
1414 }
1415 err = U_ZERO_ERROR;
1416 wl = (int32_t)uprv_wcslen(wcs);
1417 cp = u_strFromWCS(rts, rtcap, &rtl, wcs, wl, &err);
1418 (void)cp; /* Suppress set but not used warning. */
1419 if (U_FAILURE(err)) {
1420 errname = u_errorName(err);
1421 fprintf(stderr, "test_widestrs: ucnv_wcstombs error: %s!\n",errname);
1422 }
1423 if(wl != rtl){
1424 log_err("u_strFromWCS: wcs = %S, wl = %d,rts = %s, rtl = %d!\n", wcs, wl, u_austrcpy(astr, rts), rtl);
1425 }
1426 #else
1427 log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32");
1428 #endif
1429 }
1430
1431 static void
1432 Test_WCHART_LongString(){
1433 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
1434 UErrorCode status = U_ZERO_ERROR;
1435 const char* testdatapath=loadTestData(&status);
1436 UResourceBundle *theBundle = ures_open(testdatapath, "testtypes", &status);
1437 int32_t strLen =0;
1438 const UChar* str = ures_getStringByKey(theBundle, "testinclude",&strLen,&status);
1439 const UChar* uSrc = str;
1440 int32_t uSrcLen = strLen;
1441 int32_t wDestLen =0, reqLen=0, i=0;
1442 int32_t uDestLen =0;
1443 wchar_t* wDest = NULL;
1444 UChar* uDest = NULL;
1445 UBool failed = FALSE;
1446
1447 log_verbose("Loaded string of %d UChars\n", uSrcLen);
1448
1449 if(U_FAILURE(status)){
1450 log_data_err("Could not get testinclude resource from testtypes bundle. Error: %s\n",u_errorName(status));
1451 return;
1452 }
1453
1454 /* pre-flight*/
1455 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&status);
1456
1457 if(status == U_BUFFER_OVERFLOW_ERROR){
1458 status=U_ZERO_ERROR;
1459 wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1460 wDestLen = reqLen+1;
1461 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&status);
1462 log_verbose("To %d*%d-byte wchar_ts\n", reqLen,sizeof(wchar_t));
1463 }
1464
1465 {
1466 int j;
1467 for(j=0;j>=0&&j<reqLen;j++) {
1468 if(wDest[j]!=uSrc[j]) {
1469 log_verbose("Diff %04X vs %04X @ %d\n", wDest[j],uSrc[j],j);
1470 break;
1471 }
1472 }
1473 }
1474
1475 uDestLen = 0;
1476 /* pre-flight */
1477 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&status);
1478 if(status == U_BUFFER_OVERFLOW_ERROR){
1479 status =U_ZERO_ERROR;
1480 uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1481 u_memset(uDest,0xFFFF,reqLen+1);
1482 uDestLen = reqLen + 1;
1483 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&status);
1484 log_verbose("Back to %d UChars\n", reqLen);
1485 }
1486 #if defined(U_WCHAR_IS_UTF16)
1487 log_verbose("U_WCHAR_IS_UTF16\n");
1488 #elif defined(U_WCHAR_IS_UTF32)
1489 log_verbose("U_WCHAR_IS_UTF32\n");
1490 #else
1491 log_verbose("U_WCHAR_IS_idunno (not UTF)\n");
1492 #endif
1493
1494 if(reqLen!=uSrcLen) {
1495 log_err("Error: dest len is %d but expected src len %d\n", reqLen, uSrcLen);
1496 }
1497
1498 for(i=0; i< uSrcLen; i++){
1499 if(uDest[i] != str[i]){
1500 log_verbose("u_str*WCS() failed for null terminated string expected: \\u%04X got: \\u%04X at index: %i \n", str[i], uDest[i],i);
1501 failed =TRUE;
1502 }
1503 }
1504
1505 if(U_FAILURE(status)){
1506 failed = TRUE;
1507 }
1508 if(failed){
1509 log_err("u_strToWCS() failed \n");
1510 }
1511 free(wDest);
1512 free(uDest);
1513 /* close the bundle */
1514 ures_close(theBundle);
1515 #else
1516 log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32");
1517 #endif
1518 }
1519
1520 static void Test_strToJavaModifiedUTF8() {
1521 static const UChar src[]={
1522 0x61, 0x62, 0x63, 0xe1, 0xe2, 0xe3,
1523 0xe01, 0xe02, 0xe03, 0xe001, 0xe002, 0xe003,
1524 0xd800, 0xdc00, 0xdc00, 0xd800, 0,
1525 0xdbff, 0xdfff,
1526 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xed, 0xe0e, 0x6f
1527 };
1528 static const uint8_t expected[]={
1529 0x61, 0x62, 0x63, 0xc3, 0xa1, 0xc3, 0xa2, 0xc3, 0xa3,
1530 0xe0, 0xb8, 0x81, 0xe0, 0xb8, 0x82, 0xe0, 0xb8, 0x83,
1531 0xee, 0x80, 0x81, 0xee, 0x80, 0x82, 0xee, 0x80, 0x83,
1532 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0xc0, 0x80,
1533 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1534 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xc3, 0xad, 0xe0, 0xb8, 0x8e, 0x6f
1535 };
1536 static const UChar shortSrc[]={
1537 0xe01, 0xe1, 0x61
1538 };
1539 static const uint8_t shortExpected[]={
1540 0xe0, 0xb8, 0x81, 0xc3, 0xa1, 0x61
1541 };
1542 static const UChar asciiNul[]={
1543 0x61, 0x62, 0x63, 0
1544 };
1545 static const uint8_t asciiNulExpected[]={
1546 0x61, 0x62, 0x63
1547 };
1548 char dest[200];
1549 char *p;
1550 int32_t length, expectedTerminatedLength;
1551 UErrorCode errorCode;
1552
1553 expectedTerminatedLength=(int32_t)(strstr((const char *)expected, "\xc0\x80")-
1554 (const char *)expected);
1555
1556 errorCode=U_ZERO_ERROR;
1557 length=-5;
1558 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1559 src, UPRV_LENGTHOF(src), &errorCode);
1560 if( U_FAILURE(errorCode) || p!=dest ||
1561 length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1562 dest[length]!=0
1563 ) {
1564 log_err("u_strToJavaModifiedUTF8(normal) failed - %s\n", u_errorName(errorCode));
1565 }
1566 memset(dest, 0xff, sizeof(dest));
1567 errorCode=U_ZERO_ERROR;
1568 length=-5;
1569 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), NULL,
1570 src, UPRV_LENGTHOF(src), &errorCode);
1571 if( U_FAILURE(errorCode) || p!=dest ||
1572 0!=memcmp(dest, expected, UPRV_LENGTHOF(expected)) ||
1573 dest[UPRV_LENGTHOF(expected)]!=0
1574 ) {
1575 log_err("u_strToJavaModifiedUTF8(normal, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1576 }
1577 memset(dest, 0xff, sizeof(dest));
1578 errorCode=U_ZERO_ERROR;
1579 length=-5;
1580 p=u_strToJavaModifiedUTF8(dest, UPRV_LENGTHOF(expected), &length,
1581 src, UPRV_LENGTHOF(src), &errorCode);
1582 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || p!=dest ||
1583 length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1584 dest[length]!=(char)0xff
1585 ) {
1586 log_err("u_strToJavaModifiedUTF8(tight) failed - %s\n", u_errorName(errorCode));
1587 }
1588 memset(dest, 0xff, sizeof(dest));
1589 errorCode=U_ZERO_ERROR;
1590 length=-5;
1591 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length, src, -1, &errorCode);
1592 if( U_FAILURE(errorCode) || p!=dest ||
1593 length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) ||
1594 dest[length]!=0
1595 ) {
1596 log_err("u_strToJavaModifiedUTF8(NUL-terminated) failed - %s\n", u_errorName(errorCode));
1597 }
1598 memset(dest, 0xff, sizeof(dest));
1599 errorCode=U_ZERO_ERROR;
1600 length=-5;
1601 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), NULL, src, -1, &errorCode);
1602 if( U_FAILURE(errorCode) || p!=dest ||
1603 0!=memcmp(dest, expected, expectedTerminatedLength) ||
1604 dest[expectedTerminatedLength]!=0
1605 ) {
1606 log_err("u_strToJavaModifiedUTF8(NUL-terminated, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1607 }
1608 memset(dest, 0xff, sizeof(dest));
1609 errorCode=U_ZERO_ERROR;
1610 length=-5;
1611 p=u_strToJavaModifiedUTF8(dest, UPRV_LENGTHOF(expected)/2, &length,
1612 src, UPRV_LENGTHOF(src), &errorCode);
1613 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1614 length!=UPRV_LENGTHOF(expected) || dest[UPRV_LENGTHOF(expected)/2]!=(char)0xff
1615 ) {
1616 log_err("u_strToJavaModifiedUTF8(overflow) failed - %s\n", u_errorName(errorCode));
1617 }
1618 memset(dest, 0xff, sizeof(dest));
1619 errorCode=U_ZERO_ERROR;
1620 length=-5;
1621 p=u_strToJavaModifiedUTF8(NULL, 0, &length,
1622 src, UPRV_LENGTHOF(src), &errorCode);
1623 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1624 length!=UPRV_LENGTHOF(expected) || dest[0]!=(char)0xff
1625 ) {
1626 log_err("u_strToJavaModifiedUTF8(pure preflighting) failed - %s\n", u_errorName(errorCode));
1627 }
1628 memset(dest, 0xff, sizeof(dest));
1629 errorCode=U_ZERO_ERROR;
1630 length=-5;
1631 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1632 shortSrc, UPRV_LENGTHOF(shortSrc), &errorCode);
1633 if( U_FAILURE(errorCode) || p!=dest ||
1634 length!=UPRV_LENGTHOF(shortExpected) || 0!=memcmp(dest, shortExpected, length) ||
1635 dest[length]!=0
1636 ) {
1637 log_err("u_strToJavaModifiedUTF8(short) failed - %s\n", u_errorName(errorCode));
1638 }
1639 memset(dest, 0xff, sizeof(dest));
1640 errorCode=U_ZERO_ERROR;
1641 length=-5;
1642 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1643 asciiNul, -1, &errorCode);
1644 if( U_FAILURE(errorCode) || p!=dest ||
1645 length!=UPRV_LENGTHOF(asciiNulExpected) || 0!=memcmp(dest, asciiNulExpected, length) ||
1646 dest[length]!=0
1647 ) {
1648 log_err("u_strToJavaModifiedUTF8(asciiNul) failed - %s\n", u_errorName(errorCode));
1649 }
1650 memset(dest, 0xff, sizeof(dest));
1651 errorCode=U_ZERO_ERROR;
1652 length=-5;
1653 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1654 NULL, 0, &errorCode);
1655 if( U_FAILURE(errorCode) || p!=dest ||
1656 length!=0 || dest[0]!=0
1657 ) {
1658 log_err("u_strToJavaModifiedUTF8(empty) failed - %s\n", u_errorName(errorCode));
1659 }
1660
1661 /* illegal arguments */
1662 memset(dest, 0xff, sizeof(dest));
1663 errorCode=U_ZERO_ERROR;
1664 length=-5;
1665 p=u_strToJavaModifiedUTF8(NULL, sizeof(dest), &length,
1666 src, UPRV_LENGTHOF(src), &errorCode);
1667 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1668 log_err("u_strToJavaModifiedUTF8(dest=NULL) failed - %s\n", u_errorName(errorCode));
1669 }
1670 memset(dest, 0xff, sizeof(dest));
1671 errorCode=U_ZERO_ERROR;
1672 length=-5;
1673 p=u_strToJavaModifiedUTF8(dest, -1, &length,
1674 src, UPRV_LENGTHOF(src), &errorCode);
1675 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1676 log_err("u_strToJavaModifiedUTF8(destCapacity<0) failed - %s\n", u_errorName(errorCode));
1677 }
1678 memset(dest, 0xff, sizeof(dest));
1679 errorCode=U_ZERO_ERROR;
1680 length=-5;
1681 p=u_strToJavaModifiedUTF8(dest, sizeof(dest), &length,
1682 NULL, UPRV_LENGTHOF(src), &errorCode);
1683 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1684 log_err("u_strToJavaModifiedUTF8(src=NULL) failed - %s\n", u_errorName(errorCode));
1685 }
1686 memset(dest, 0xff, sizeof(dest));
1687 errorCode=U_ZERO_ERROR;
1688 length=-5;
1689 p=u_strToJavaModifiedUTF8(dest, sizeof(dest), &length,
1690 NULL, -1, &errorCode);
1691 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1692 log_err("u_strToJavaModifiedUTF8(src=NULL, srcLength<0) failed - %s\n", u_errorName(errorCode));
1693 }
1694 }
1695
1696 static void Test_strFromJavaModifiedUTF8() {
1697 static const uint8_t src[]={
1698 0x61, 0x62, 0x63, 0xc3, 0xa1, 0xc3, 0xa2, 0xc3, 0xa3,
1699 0xe0, 0xb8, 0x81, 0xe0, 0xb8, 0x82, 0xe0, 0xb8, 0x83,
1700 0xee, 0x80, 0x81, 0xee, 0x80, 0x82, 0xee, 0x80, 0x83,
1701 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0,
1702 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1703 0x81, 0xc0, 0xe0, 0xb8, 0xf0, 0x90, 0x80, 0x80, /* invalid sequences */
1704 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
1705 0xe0, 0x81, 0xac, 0xe0, 0x83, 0xad, /* non-shortest forms are allowed */
1706 0xe0, 0xb8, 0x8e, 0x6f
1707 };
1708 static const UChar expected[]={
1709 0x61, 0x62, 0x63, 0xe1, 0xe2, 0xe3,
1710 0xe01, 0xe02, 0xe03, 0xe001, 0xe002, 0xe003,
1711 0xd800, 0xdc00, 0xdc00, 0xd800, 0,
1712 0xdbff, 0xdfff,
1713 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1714 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
1715 0x6c, 0xed,
1716 0xe0e, 0x6f
1717 };
1718 static const uint8_t shortSrc[]={
1719 0xe0, 0xb8, 0x81, 0xc3, 0xa1, 0x61
1720 };
1721 static const UChar shortExpected[]={
1722 0xe01, 0xe1, 0x61
1723 };
1724 static const uint8_t asciiNul[]={
1725 0x61, 0x62, 0x63, 0
1726 };
1727 static const UChar asciiNulExpected[]={
1728 0x61, 0x62, 0x63
1729 };
1730 static const uint8_t invalid[]={
1731 0x81, 0xc0, 0xe0, 0xb8, 0xf0, 0x90, 0x80, 0x80
1732 };
1733 static const UChar invalidExpectedFFFD[]={
1734 0xfffd, 0xfffd, 0xfffd, 0xfffd
1735 };
1736 static const UChar invalidExpected50000[]={
1737 0xd900, 0xdc00, 0xd900, 0xdc00, 0xd900, 0xdc00, 0xd900, 0xdc00
1738 };
1739 UChar dest[200];
1740 UChar *p;
1741 int32_t length, expectedTerminatedLength;
1742 int32_t numSubstitutions;
1743 UErrorCode errorCode;
1744
1745 expectedTerminatedLength=(int32_t)(u_strchr(expected, 0)-expected);
1746
1747 errorCode=U_ZERO_ERROR;
1748 length=numSubstitutions=-5;
1749 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1750 (const char *)src, UPRV_LENGTHOF(src),
1751 0xfffd, &numSubstitutions, &errorCode);
1752 if( U_FAILURE(errorCode) || p!=dest ||
1753 length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1754 dest[length]!=0 ||
1755 numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1756 ) {
1757 log_err("u_strFromJavaModifiedUTF8WithSub(normal) failed - %s\n", u_errorName(errorCode));
1758 }
1759 memset(dest, 0xff, sizeof(dest));
1760 errorCode=U_ZERO_ERROR;
1761 length=numSubstitutions=-5;
1762 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), NULL,
1763 (const char *)src, UPRV_LENGTHOF(src),
1764 0xfffd, &numSubstitutions, &errorCode);
1765 if( U_FAILURE(errorCode) || p!=dest ||
1766 0!=memcmp(dest, expected, UPRV_LENGTHOF(expected)) ||
1767 dest[UPRV_LENGTHOF(expected)]!=0 ||
1768 numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1769 ) {
1770 log_err("u_strFromJavaModifiedUTF8WithSub(normal, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1771 }
1772 memset(dest, 0xff, sizeof(dest));
1773 errorCode=U_ZERO_ERROR;
1774 length=numSubstitutions=-5;
1775 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1776 (const char *)src, UPRV_LENGTHOF(src),
1777 0xfffd, NULL, &errorCode);
1778 if( U_FAILURE(errorCode) || p!=dest ||
1779 length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1780 dest[length]!=0
1781 ) {
1782 log_err("u_strFromJavaModifiedUTF8WithSub(normal, pNumSubstitutions=NULL) failed - %s\n", u_errorName(errorCode));
1783 }
1784 memset(dest, 0xff, sizeof(dest));
1785 errorCode=U_ZERO_ERROR;
1786 length=numSubstitutions=-5;
1787 p=u_strFromJavaModifiedUTF8WithSub(dest, UPRV_LENGTHOF(expected), &length,
1788 (const char *)src, UPRV_LENGTHOF(src),
1789 0xfffd, &numSubstitutions, &errorCode);
1790 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || p!=dest ||
1791 length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1792 dest[length]!=0xffff ||
1793 numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1794 ) {
1795 log_err("u_strFromJavaModifiedUTF8WithSub(tight) failed - %s\n", u_errorName(errorCode));
1796 }
1797 memset(dest, 0xff, sizeof(dest));
1798 errorCode=U_ZERO_ERROR;
1799 length=numSubstitutions=-5;
1800 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1801 (const char *)src, -1,
1802 0xfffd, &numSubstitutions, &errorCode);
1803 if( U_FAILURE(errorCode) || p!=dest ||
1804 length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) ||
1805 dest[length]!=0 ||
1806 numSubstitutions!=0
1807 ) {
1808 log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated) failed - %s\n", u_errorName(errorCode));
1809 }
1810 memset(dest, 0xff, sizeof(dest));
1811 errorCode=U_ZERO_ERROR;
1812 length=numSubstitutions=-5;
1813 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), NULL,
1814 (const char *)src, -1,
1815 0xfffd, &numSubstitutions, &errorCode);
1816 if( U_FAILURE(errorCode) || p!=dest ||
1817 0!=memcmp(dest, expected, expectedTerminatedLength) ||
1818 dest[expectedTerminatedLength]!=0 ||
1819 numSubstitutions!=0
1820 ) {
1821 log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1822 }
1823 memset(dest, 0xff, sizeof(dest));
1824 errorCode=U_ZERO_ERROR;
1825 length=numSubstitutions=-5;
1826 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1827 (const char *)src, -1,
1828 0xfffd, NULL, &errorCode);
1829 if( U_FAILURE(errorCode) || p!=dest ||
1830 length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) ||
1831 dest[length]!=0
1832 ) {
1833 log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated, pNumSubstitutions=NULL) failed - %s\n", u_errorName(errorCode));
1834 }
1835 memset(dest, 0xff, sizeof(dest));
1836 errorCode=U_ZERO_ERROR;
1837 length=numSubstitutions=-5;
1838 p=u_strFromJavaModifiedUTF8WithSub(dest, UPRV_LENGTHOF(expected)/2, &length,
1839 (const char *)src, UPRV_LENGTHOF(src),
1840 0xfffd, &numSubstitutions, &errorCode);
1841 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1842 length!=UPRV_LENGTHOF(expected) || dest[UPRV_LENGTHOF(expected)/2]!=0xffff
1843 ) {
1844 log_err("u_strFromJavaModifiedUTF8WithSub(overflow) failed - %s\n", u_errorName(errorCode));
1845 }
1846 memset(dest, 0xff, sizeof(dest));
1847 errorCode=U_ZERO_ERROR;
1848 length=numSubstitutions=-5;
1849 p=u_strFromJavaModifiedUTF8WithSub(NULL, 0, &length,
1850 (const char *)src, UPRV_LENGTHOF(src),
1851 0xfffd, &numSubstitutions, &errorCode);
1852 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1853 length!=UPRV_LENGTHOF(expected) || dest[0]!=0xffff
1854 ) {
1855 log_err("u_strFromJavaModifiedUTF8WithSub(pure preflighting) failed - %s\n", u_errorName(errorCode));
1856 }
1857 memset(dest, 0xff, sizeof(dest));
1858 errorCode=U_ZERO_ERROR;
1859 length=numSubstitutions=-5;
1860 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1861 (const char *)shortSrc, UPRV_LENGTHOF(shortSrc),
1862 0xfffd, &numSubstitutions, &errorCode);
1863 if( U_FAILURE(errorCode) || p!=dest ||
1864 length!=UPRV_LENGTHOF(shortExpected) || 0!=memcmp(dest, shortExpected, length) ||
1865 dest[length]!=0 ||
1866 numSubstitutions!=0
1867 ) {
1868 log_err("u_strFromJavaModifiedUTF8WithSub(short) failed - %s\n", u_errorName(errorCode));
1869 }
1870 memset(dest, 0xff, sizeof(dest));
1871 errorCode=U_ZERO_ERROR;
1872 length=numSubstitutions=-5;
1873 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1874 (const char *)asciiNul, -1,
1875 0xfffd, &numSubstitutions, &errorCode);
1876 if( U_FAILURE(errorCode) || p!=dest ||
1877 length!=UPRV_LENGTHOF(asciiNulExpected) || 0!=memcmp(dest, asciiNulExpected, length) ||
1878 dest[length]!=0 ||
1879 numSubstitutions!=0
1880 ) {
1881 log_err("u_strFromJavaModifiedUTF8WithSub(asciiNul) failed - %s\n", u_errorName(errorCode));
1882 }
1883 memset(dest, 0xff, sizeof(dest));
1884 errorCode=U_ZERO_ERROR;
1885 length=numSubstitutions=-5;
1886 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1887 NULL, 0, 0xfffd, &numSubstitutions, &errorCode);
1888 if( U_FAILURE(errorCode) || p!=dest ||
1889 length!=0 || dest[0]!=0 ||
1890 numSubstitutions!=0
1891 ) {
1892 log_err("u_strFromJavaModifiedUTF8WithSub(empty) failed - %s\n", u_errorName(errorCode));
1893 }
1894 memset(dest, 0xff, sizeof(dest));
1895 errorCode=U_ZERO_ERROR;
1896 length=numSubstitutions=-5;
1897 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1898 (const char *)invalid, UPRV_LENGTHOF(invalid),
1899 0xfffd, &numSubstitutions, &errorCode);
1900 if( U_FAILURE(errorCode) || p!=dest ||
1901 length!=UPRV_LENGTHOF(invalidExpectedFFFD) || 0!=memcmp(dest, invalidExpectedFFFD, length) ||
1902 dest[length]!=0 ||
1903 numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1904 ) {
1905 log_err("u_strFromJavaModifiedUTF8WithSub(invalid->fffd) failed - %s\n", u_errorName(errorCode));
1906 }
1907 memset(dest, 0xff, sizeof(dest));
1908 errorCode=U_ZERO_ERROR;
1909 length=numSubstitutions=-5;
1910 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1911 (const char *)invalid, UPRV_LENGTHOF(invalid),
1912 0x50000, &numSubstitutions, &errorCode);
1913 if( U_FAILURE(errorCode) || p!=dest ||
1914 length!=UPRV_LENGTHOF(invalidExpected50000) || 0!=memcmp(dest, invalidExpected50000, length) ||
1915 dest[length]!=0 ||
1916 numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD) /* not ...50000 */
1917 ) {
1918 log_err("u_strFromJavaModifiedUTF8WithSub(invalid->50000) failed - %s\n", u_errorName(errorCode));
1919 }
1920 memset(dest, 0xff, sizeof(dest));
1921 errorCode=U_ZERO_ERROR;
1922 length=numSubstitutions=-5;
1923 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1924 (const char *)invalid, UPRV_LENGTHOF(invalid),
1925 U_SENTINEL, &numSubstitutions, &errorCode);
1926 if(errorCode!=U_INVALID_CHAR_FOUND || dest[0]!=0xffff || numSubstitutions!=0) {
1927 log_err("u_strFromJavaModifiedUTF8WithSub(invalid->error) failed - %s\n", u_errorName(errorCode));
1928 }
1929 memset(dest, 0xff, sizeof(dest));
1930 errorCode=U_ZERO_ERROR;
1931 length=numSubstitutions=-5;
1932 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1933 (const char *)src, UPRV_LENGTHOF(src),
1934 U_SENTINEL, &numSubstitutions, &errorCode);
1935 if( errorCode!=U_INVALID_CHAR_FOUND ||
1936 length>=UPRV_LENGTHOF(expected) || dest[UPRV_LENGTHOF(expected)-1]!=0xffff ||
1937 numSubstitutions!=0
1938 ) {
1939 log_err("u_strFromJavaModifiedUTF8WithSub(normal->error) failed - %s\n", u_errorName(errorCode));
1940 }
1941
1942 /* illegal arguments */
1943 memset(dest, 0xff, sizeof(dest));
1944 errorCode=U_ZERO_ERROR;
1945 length=numSubstitutions=-5;
1946 p=u_strFromJavaModifiedUTF8WithSub(NULL, sizeof(dest), &length,
1947 (const char *)src, UPRV_LENGTHOF(src),
1948 0xfffd, &numSubstitutions, &errorCode);
1949 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1950 log_err("u_strFromJavaModifiedUTF8WithSub(dest=NULL) failed - %s\n", u_errorName(errorCode));
1951 }
1952 memset(dest, 0xff, sizeof(dest));
1953 errorCode=U_ZERO_ERROR;
1954 length=numSubstitutions=-5;
1955 p=u_strFromJavaModifiedUTF8WithSub(dest, -1, &length,
1956 (const char *)src, UPRV_LENGTHOF(src),
1957 0xfffd, &numSubstitutions, &errorCode);
1958 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1959 log_err("u_strFromJavaModifiedUTF8WithSub(destCapacity<0) failed - %s\n", u_errorName(errorCode));
1960 }
1961 memset(dest, 0xff, sizeof(dest));
1962 errorCode=U_ZERO_ERROR;
1963 length=numSubstitutions=-5;
1964 p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1965 NULL, UPRV_LENGTHOF(src),
1966 0xfffd, &numSubstitutions, &errorCode);
1967 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1968 log_err("u_strFromJavaModifiedUTF8WithSub(src=NULL) failed - %s\n", u_errorName(errorCode));
1969 }
1970 memset(dest, 0xff, sizeof(dest));
1971 errorCode=U_ZERO_ERROR;
1972 length=numSubstitutions=-5;
1973 p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1974 NULL, -1, 0xfffd, &numSubstitutions, &errorCode);
1975 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1976 log_err("u_strFromJavaModifiedUTF8WithSub(src=NULL, srcLength<0) failed - %s\n", u_errorName(errorCode));
1977 }
1978 memset(dest, 0xff, sizeof(dest));
1979 errorCode=U_ZERO_ERROR;
1980 length=numSubstitutions=-5;
1981 p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1982 (const char *)src, UPRV_LENGTHOF(src),
1983 0x110000, &numSubstitutions, &errorCode);
1984 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1985 log_err("u_strFromJavaModifiedUTF8WithSub(subchar=U_SENTINEL) failed - %s\n", u_errorName(errorCode));
1986 }
1987 memset(dest, 0xff, sizeof(dest));
1988 errorCode=U_ZERO_ERROR;
1989 length=numSubstitutions=-5;
1990 p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1991 (const char *)src, UPRV_LENGTHOF(src),
1992 0xdfff, &numSubstitutions, &errorCode);
1993 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1994 log_err("u_strFromJavaModifiedUTF8WithSub(subchar is surrogate) failed - %s\n", u_errorName(errorCode));
1995 }
1996 }
1997
1998 /* test that string transformation functions permit NULL source pointer when source length==0 */
1999 static void TestNullEmptySource() {
2000 char dest8[4]={ 3, 3, 3, 3 };
2001 UChar dest16[4]={ 3, 3, 3, 3 };
2002 UChar32 dest32[4]={ 3, 3, 3, 3 };
2003 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
2004 wchar_t destW[4]={ 3, 3, 3, 3 };
2005 #endif
2006
2007 int32_t length;
2008 UErrorCode errorCode;
2009
2010 /* u_strFromXyz() */
2011
2012 dest16[0]=3;
2013 length=3;
2014 errorCode=U_ZERO_ERROR;
2015 u_strFromUTF8(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2016 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2017 log_err("u_strFromUTF8(source=NULL, sourceLength=0) failed\n");
2018 }
2019
2020 dest16[0]=3;
2021 length=3;
2022 errorCode=U_ZERO_ERROR;
2023 u_strFromUTF8WithSub(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2024 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2025 log_err("u_strFromUTF8WithSub(source=NULL, sourceLength=0) failed\n");
2026 }
2027
2028 dest16[0]=3;
2029 length=3;
2030 errorCode=U_ZERO_ERROR;
2031 u_strFromUTF8Lenient(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2032 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2033 log_err("u_strFromUTF8Lenient(source=NULL, sourceLength=0) failed\n");
2034 }
2035
2036 dest16[0]=3;
2037 length=3;
2038 errorCode=U_ZERO_ERROR;
2039 u_strFromUTF32(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2040 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2041 log_err("u_strFromUTF32(source=NULL, sourceLength=0) failed\n");
2042 }
2043
2044 dest16[0]=3;
2045 length=3;
2046 errorCode=U_ZERO_ERROR;
2047 u_strFromUTF32WithSub(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2048 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2049 log_err("u_strFromUTF32WithSub(source=NULL, sourceLength=0) failed\n");
2050 }
2051
2052 dest16[0]=3;
2053 length=3;
2054 errorCode=U_ZERO_ERROR;
2055 u_strFromJavaModifiedUTF8WithSub(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2056 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2057 log_err("u_strFromJavaModifiedUTF8WithSub(source=NULL, sourceLength=0) failed\n");
2058 }
2059
2060 /* u_strToXyz() */
2061
2062 dest8[0]=3;
2063 length=3;
2064 errorCode=U_ZERO_ERROR;
2065 u_strToUTF8(dest8, UPRV_LENGTHOF(dest8), &length, NULL, 0, &errorCode);
2066 if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) {
2067 log_err("u_strToUTF8(source=NULL, sourceLength=0) failed\n");
2068 }
2069
2070 dest8[0]=3;
2071 length=3;
2072 errorCode=U_ZERO_ERROR;
2073 u_strToUTF8WithSub(dest8, UPRV_LENGTHOF(dest8), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2074 if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) {
2075 log_err("u_strToUTF8(source=NULL, sourceLength=0) failed\n");
2076 }
2077
2078 dest32[0]=3;
2079 length=3;
2080 errorCode=U_ZERO_ERROR;
2081 u_strToUTF32(dest32, UPRV_LENGTHOF(dest32), &length, NULL, 0, &errorCode);
2082 if(errorCode!=U_ZERO_ERROR || length!=0 || dest32[0]!=0 || dest32[1]!=3) {
2083 log_err("u_strToUTF32(source=NULL, sourceLength=0) failed\n");
2084 }
2085
2086 dest32[0]=3;
2087 length=3;
2088 errorCode=U_ZERO_ERROR;
2089 u_strToUTF32WithSub(dest32, UPRV_LENGTHOF(dest32), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2090 if(errorCode!=U_ZERO_ERROR || length!=0 || dest32[0]!=0 || dest32[1]!=3) {
2091 log_err("u_strToUTF32WithSub(source=NULL, sourceLength=0) failed\n");
2092 }
2093
2094 dest8[0]=3;
2095 length=3;
2096 errorCode=U_ZERO_ERROR;
2097 u_strToJavaModifiedUTF8(dest8, UPRV_LENGTHOF(dest8), &length, NULL, 0, &errorCode);
2098 if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) {
2099 log_err("u_strToJavaModifiedUTF8(source=NULL, sourceLength=0) failed\n");
2100 }
2101
2102 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
2103
2104 dest16[0]=3;
2105 length=3;
2106 errorCode=U_ZERO_ERROR;
2107 u_strFromWCS(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2108 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2109 log_err("u_strFromWCS(source=NULL, sourceLength=0) failed\n");
2110 }
2111
2112 destW[0]=3;
2113 length=3;
2114 errorCode=U_ZERO_ERROR;
2115 u_strToWCS(destW, UPRV_LENGTHOF(destW), &length, NULL, 0, &errorCode);
2116 if(errorCode!=U_ZERO_ERROR || length!=0 || destW[0]!=0 || destW[1]!=3) {
2117 log_err("u_strToWCS(source=NULL, sourceLength=0) failed\n");
2118 }
2119
2120 #endif
2121 }