]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/custrtrn.c
ICU-59173.0.1.tar.gz
[apple/icu.git] / icuSources / test / cintltst / custrtrn.c
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 2001-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
9 *
10 * File custrtrn.C
11 *
12 * Modification History:
13 * Name Description
14 * Ram String transformations test
15 *********************************************************************************
16 */
17 /****************************************************************************/
18
19
20 #include <stdlib.h>
21 #include <stdio.h>
22 #include <string.h>
23 #include "unicode/utypes.h"
24 #include "unicode/ustring.h"
25 #include "unicode/ures.h"
26 #include "ustr_imp.h"
27 #include "cintltst.h"
28 #include "cmemory.h"
29 #include "cstring.h"
30 #include "cwchar.h"
31
32 void addUCharTransformTest(TestNode** root);
33
34 static void Test_strToUTF32(void);
35 static void Test_strToUTF32_surrogates(void);
36 static void Test_strFromUTF32(void);
37 static void Test_strFromUTF32_surrogates(void);
38 static void Test_UChar_UTF8_API(void);
39 static void Test_FromUTF8(void);
40 static void Test_FromUTF8Lenient(void);
41 static void Test_UChar_WCHART_API(void);
42 static void Test_widestrs(void);
43 static void Test_WCHART_LongString(void);
44 static void Test_strToJavaModifiedUTF8(void);
45 static void Test_strFromJavaModifiedUTF8(void);
46 static void TestNullEmptySource(void);
47
48 void
49 addUCharTransformTest(TestNode** root)
50 {
51 addTest(root, &Test_strToUTF32, "custrtrn/Test_strToUTF32");
52 addTest(root, &Test_strToUTF32_surrogates, "custrtrn/Test_strToUTF32_surrogates");
53 addTest(root, &Test_strFromUTF32, "custrtrn/Test_strFromUTF32");
54 addTest(root, &Test_strFromUTF32_surrogates, "custrtrn/Test_strFromUTF32_surrogates");
55 addTest(root, &Test_UChar_UTF8_API, "custrtrn/Test_UChar_UTF8_API");
56 addTest(root, &Test_FromUTF8, "custrtrn/Test_FromUTF8");
57 addTest(root, &Test_FromUTF8Lenient, "custrtrn/Test_FromUTF8Lenient");
58 addTest(root, &Test_UChar_WCHART_API, "custrtrn/Test_UChar_WCHART_API");
59 addTest(root, &Test_widestrs, "custrtrn/Test_widestrs");
60 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
61 addTest(root, &Test_WCHART_LongString, "custrtrn/Test_WCHART_LongString");
62 #endif
63 addTest(root, &Test_strToJavaModifiedUTF8, "custrtrn/Test_strToJavaModifiedUTF8");
64 addTest(root, &Test_strFromJavaModifiedUTF8, "custrtrn/Test_strFromJavaModifiedUTF8");
65 addTest(root, &TestNullEmptySource, "custrtrn/TestNullEmptySource");
66 }
67
68 static const UChar32 src32[]={
69 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
70 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
71 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
72 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
73 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
74 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
75 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
76 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
77 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
78 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
79 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
80 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
81 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
82 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
83 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
84 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
85 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
86 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
87 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
88 /* test non-BMP code points */
89 0x0002A699,
90 0x0002A69C, 0x0002A69D, 0x0002A69E, 0x0002A69F, 0x0002A6A0, 0x0002A6A5, 0x0002A6A6, 0x0002A6A7, 0x0002A6A8, 0x0002A6AB,
91 0x0002A6AC, 0x0002A6AD, 0x0002A6AE, 0x0002A6AF, 0x0002A6B0, 0x0002A6B1, 0x0002A6B3, 0x0002A6B5, 0x0002A6B6, 0x0002A6B7,
92 0x0002A6B8, 0x0002A6B9, 0x0002A6BA, 0x0002A6BB, 0x0002A6BC, 0x0002A6BD, 0x0002A6BE, 0x0002A6BF, 0x0002A6C0, 0x0002A6C1,
93 0x0002A6C2, 0x0002A6C3, 0x0002A6C4, 0x0002A6C8, 0x0002A6CA, 0x0002A6CB, 0x0002A6CD, 0x0002A6CE, 0x0002A6CF, 0x0002A6D0,
94 0x0002A6D1, 0x0002A6D2, 0x0002A6D3, 0x0002A6D4, 0x0002A6D5,
95
96 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
97 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
98 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
99 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
100 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,0x0000
101 };
102
103 static const UChar src16[] = {
104 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
105 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
106 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
107 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
108 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
109 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
110 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
111 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
112 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
113 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
114 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
115 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
116 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
117 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
118 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
119 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
120 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
121 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
122 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
123
124 /* test non-BMP code points */
125 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
126 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
127 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
128 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
129 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
130 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
131 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
132 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
133 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
134 0xD869, 0xDED5,
135
136 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
137 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
138 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
139 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
140 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,0x0000
141 };
142
143
144 static void Test_strToUTF32(void){
145 UErrorCode err = U_ZERO_ERROR;
146 UChar32 u32Target[400];
147 int32_t u32DestLen;
148 int i= 0;
149
150 /* first with length */
151 u32DestLen = -2;
152 u_strToUTF32(u32Target, 0, &u32DestLen, src16, UPRV_LENGTHOF(src16),&err);
153 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != UPRV_LENGTHOF(src32)) {
154 log_err("u_strToUTF32(preflight with length): "
155 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
156 (long)u32DestLen, (long)UPRV_LENGTHOF(src32), u_errorName(err));
157 return;
158 }
159 err = U_ZERO_ERROR;
160 u32DestLen = -2;
161 u_strToUTF32(u32Target, UPRV_LENGTHOF(src32)+1, &u32DestLen, src16, UPRV_LENGTHOF(src16),&err);
162 if(err != U_ZERO_ERROR || u32DestLen != UPRV_LENGTHOF(src32)) {
163 log_err("u_strToUTF32(with length): "
164 "length %ld != %ld and %s != U_ZERO_ERROR\n",
165 (long)u32DestLen, (long)UPRV_LENGTHOF(src32), u_errorName(err));
166 return;
167 }
168 /*for(i=0; i< u32DestLen; i++){
169 printf("0x%08X, ",uTarget[i]);
170 if(i%10==0){
171 printf("\n");
172 }
173 }*/
174 for(i=0; i< UPRV_LENGTHOF(src32); i++){
175 if(u32Target[i] != src32[i]){
176 log_verbose("u_strToUTF32(with length) failed expected: %04X got: %04X at index: %i \n", src32[i], u32Target[i],i);
177 }
178 }
179 if(u32Target[i] != 0){
180 log_verbose("u_strToUTF32(with length) failed expected: %04X got: %04X at index: %i \n", 0, u32Target[i],i);
181 }
182
183 /* now NUL-terminated */
184 u32DestLen = -2;
185 u_strToUTF32(NULL,0, &u32DestLen, src16, -1,&err);
186 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != UPRV_LENGTHOF(src32)-1) {
187 log_err("u_strToUTF32(preflight with NUL-termination): "
188 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
189 (long)u32DestLen, (long)UPRV_LENGTHOF(src32)-1, u_errorName(err));
190 return;
191 }
192 err = U_ZERO_ERROR;
193 u32DestLen = -2;
194 u_strToUTF32(u32Target, UPRV_LENGTHOF(src32), &u32DestLen, src16, -1,&err);
195 if(err != U_ZERO_ERROR || u32DestLen != UPRV_LENGTHOF(src32)-1) {
196 log_err("u_strToUTF32(with NUL-termination): "
197 "length %ld != %ld and %s != U_ZERO_ERROR\n",
198 (long)u32DestLen, (long)UPRV_LENGTHOF(src32)-1, u_errorName(err));
199 return;
200 }
201
202 for(i=0; i< UPRV_LENGTHOF(src32); i++){
203 if(u32Target[i] != src32[i]){
204 log_verbose("u_strToUTF32(NUL-termination) failed expected: %04X got: %04X \n", src32[i], u32Target[i]);
205 }
206 }
207 }
208
209 /* test unpaired surrogates */
210 static void Test_strToUTF32_surrogates() {
211 UErrorCode err = U_ZERO_ERROR;
212 UChar32 u32Target[400];
213 int32_t len16, u32DestLen;
214 int32_t numSubstitutions;
215 int i;
216
217 static const UChar surr16[] = { 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
218 static const UChar32 expected[] = { 0x5a, 0x50000, 0x7a, 0 };
219 static const UChar32 expected_FFFD[] = { 0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0 };
220 static const UChar32 expected_12345[] = { 0x41, 0x12345, 0x61, 0x12345, 0x5a, 0x50000, 0x7a, 0 };
221 len16 = UPRV_LENGTHOF(surr16);
222 for(i = 0; i < 4; ++i) {
223 err = U_ZERO_ERROR;
224 u_strToUTF32(u32Target, 0, &u32DestLen, surr16+i, len16-i, &err);
225 if(err != U_INVALID_CHAR_FOUND) {
226 log_err("u_strToUTF32(preflight surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
227 (long)i, u_errorName(err));
228 return;
229 }
230
231 err = U_ZERO_ERROR;
232 u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+i, len16-i, &err);
233 if(err != U_INVALID_CHAR_FOUND) {
234 log_err("u_strToUTF32(surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
235 (long)i, u_errorName(err));
236 return;
237 }
238
239 err = U_ZERO_ERROR;
240 u_strToUTF32(NULL, 0, &u32DestLen, surr16+i, -1, &err);
241 if(err != U_INVALID_CHAR_FOUND) {
242 log_err("u_strToUTF32(preflight surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
243 (long)i, u_errorName(err));
244 return;
245 }
246
247 err = U_ZERO_ERROR;
248 u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+i, -1, &err);
249 if(err != U_INVALID_CHAR_FOUND) {
250 log_err("u_strToUTF32(surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
251 (long)i, u_errorName(err));
252 return;
253 }
254 }
255
256 err = U_ZERO_ERROR;
257 u_strToUTF32(u32Target, 0, &u32DestLen, surr16+4, len16-4-1, &err);
258 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) {
259 log_err("u_strToUTF32(preflight surr16+4) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
260 u_errorName(err));
261 return;
262 }
263
264 err = U_ZERO_ERROR;
265 u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+4, len16-4-1, &err);
266 if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) {
267 log_err("u_strToUTF32(surr16+4) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
268 u_errorName(err));
269 return;
270 }
271
272 err = U_ZERO_ERROR;
273 u_strToUTF32(NULL, 0, &u32DestLen, surr16+4, -1, &err);
274 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) {
275 log_err("u_strToUTF32(preflight surr16+4/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
276 u_errorName(err));
277 return;
278 }
279
280 err = U_ZERO_ERROR;
281 u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+4, -1, &err);
282 if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) {
283 log_err("u_strToUTF32(surr16+4/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
284 u_errorName(err));
285 return;
286 }
287
288 /* with substitution character */
289 numSubstitutions = -1;
290 err = U_ZERO_ERROR;
291 u_strToUTF32WithSub(u32Target, 0, &u32DestLen, surr16, len16-1, 0xfffd, &numSubstitutions, &err);
292 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 7 || numSubstitutions != 2) {
293 log_err("u_strToUTF32WithSub(preflight surr16) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
294 u_errorName(err));
295 return;
296 }
297
298 err = U_ZERO_ERROR;
299 u_strToUTF32WithSub(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16, len16-1, 0xfffd, &numSubstitutions, &err);
300 if(err != U_ZERO_ERROR || u32DestLen != 7 || numSubstitutions != 2 || uprv_memcmp(u32Target, expected_FFFD, 8*4)) {
301 log_err("u_strToUTF32WithSub(surr16) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
302 u_errorName(err));
303 return;
304 }
305
306 err = U_ZERO_ERROR;
307 u_strToUTF32WithSub(NULL, 0, &u32DestLen, surr16, -1, 0x12345, &numSubstitutions, &err);
308 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 7 || numSubstitutions != 2) {
309 log_err("u_strToUTF32WithSub(preflight surr16/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
310 u_errorName(err));
311 return;
312 }
313
314 err = U_ZERO_ERROR;
315 u_strToUTF32WithSub(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16, -1, 0x12345, &numSubstitutions, &err);
316 if(err != U_ZERO_ERROR || u32DestLen != 7 || numSubstitutions != 2 || uprv_memcmp(u32Target, expected_12345, 8*4)) {
317 log_err("u_strToUTF32WithSub(surr16/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
318 u_errorName(err));
319 return;
320 }
321 }
322
323 static void Test_strFromUTF32(void){
324 UErrorCode err = U_ZERO_ERROR;
325 UChar uTarget[400];
326 int32_t uDestLen;
327 int i= 0;
328
329 /* first with length */
330 uDestLen = -2;
331 u_strFromUTF32(uTarget,0,&uDestLen,src32,UPRV_LENGTHOF(src32),&err);
332 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != UPRV_LENGTHOF(src16)) {
333 log_err("u_strFromUTF32(preflight with length): "
334 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
335 (long)uDestLen, (long)UPRV_LENGTHOF(src16), u_errorName(err));
336 return;
337 }
338 err = U_ZERO_ERROR;
339 uDestLen = -2;
340 u_strFromUTF32(uTarget, UPRV_LENGTHOF(src16)+1,&uDestLen,src32,UPRV_LENGTHOF(src32),&err);
341 if(err != U_ZERO_ERROR || uDestLen != UPRV_LENGTHOF(src16)) {
342 log_err("u_strFromUTF32(with length): "
343 "length %ld != %ld and %s != U_ZERO_ERROR\n",
344 (long)uDestLen, (long)UPRV_LENGTHOF(src16), u_errorName(err));
345 return;
346 }
347 /*for(i=0; i< uDestLen; i++){
348 printf("0x%04X, ",uTarget[i]);
349 if(i%10==0){
350 printf("\n");
351 }
352 }*/
353
354 for(i=0; i< uDestLen; i++){
355 if(uTarget[i] != src16[i]){
356 log_verbose("u_strFromUTF32(with length) failed expected: %04X got: %04X at index: %i \n", src16[i] ,uTarget[i],i);
357 }
358 }
359 if(uTarget[i] != 0){
360 log_verbose("u_strFromUTF32(with length) failed expected: %04X got: %04X at index: %i \n", 0,uTarget[i],i);
361 }
362
363 /* now NUL-terminated */
364 uDestLen = -2;
365 u_strFromUTF32(NULL,0,&uDestLen,src32,-1,&err);
366 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != UPRV_LENGTHOF(src16)-1) {
367 log_err("u_strFromUTF32(preflight with NUL-termination): "
368 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
369 (long)uDestLen, (long)UPRV_LENGTHOF(src16)-1, u_errorName(err));
370 return;
371 }
372 err = U_ZERO_ERROR;
373 uDestLen = -2;
374 u_strFromUTF32(uTarget, UPRV_LENGTHOF(src16),&uDestLen,src32,-1,&err);
375 if(err != U_ZERO_ERROR || uDestLen != UPRV_LENGTHOF(src16)-1) {
376 log_err("u_strFromUTF32(with NUL-termination): "
377 "length %ld != %ld and %s != U_ZERO_ERROR\n",
378 (long)uDestLen, (long)UPRV_LENGTHOF(src16)-1, u_errorName(err));
379 return;
380 }
381
382 for(i=0; i< uDestLen; i++){
383 if(uTarget[i] != src16[i]){
384 log_verbose("u_strFromUTF32(with NUL-termination) failed expected: %04X got: %04X \n", src16[i] ,uTarget[i]);
385 }
386 }
387 }
388
389 /* test surrogate code points */
390 static void Test_strFromUTF32_surrogates() {
391 UErrorCode err = U_ZERO_ERROR;
392 UChar uTarget[400];
393 int32_t len32, uDestLen;
394 int32_t numSubstitutions;
395 int i;
396
397 static const UChar32 surr32[] = { 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a, 0 };
398 static const UChar expected[] = { 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
399 static const UChar expected_FFFD[] = { 0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
400 static const UChar expected_12345[] = { 0x41, 0xd808, 0xdf45, 0x61, 0xd808, 0xdf45, 0xd808, 0xdf45, 0xd808, 0xdf45,
401 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
402 len32 = UPRV_LENGTHOF(surr32);
403 for(i = 0; i < 6; ++i) {
404 err = U_ZERO_ERROR;
405 u_strFromUTF32(uTarget, 0, &uDestLen, surr32+i, len32-i, &err);
406 if(err != U_INVALID_CHAR_FOUND) {
407 log_err("u_strFromUTF32(preflight surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
408 (long)i, u_errorName(err));
409 return;
410 }
411
412 err = U_ZERO_ERROR;
413 u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+i, len32-i, &err);
414 if(err != U_INVALID_CHAR_FOUND) {
415 log_err("u_strFromUTF32(surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
416 (long)i, u_errorName(err));
417 return;
418 }
419
420 err = U_ZERO_ERROR;
421 u_strFromUTF32(NULL, 0, &uDestLen, surr32+i, -1, &err);
422 if(err != U_INVALID_CHAR_FOUND) {
423 log_err("u_strFromUTF32(preflight surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
424 (long)i, u_errorName(err));
425 return;
426 }
427
428 err = U_ZERO_ERROR;
429 u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+i, -1, &err);
430 if(err != U_INVALID_CHAR_FOUND) {
431 log_err("u_strFromUTF32(surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
432 (long)i, u_errorName(err));
433 return;
434 }
435 }
436
437 err = U_ZERO_ERROR;
438 u_strFromUTF32(uTarget, 0, &uDestLen, surr32+6, len32-6-1, &err);
439 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) {
440 log_err("u_strFromUTF32(preflight surr32+6) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
441 u_errorName(err));
442 return;
443 }
444
445 err = U_ZERO_ERROR;
446 u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+6, len32-6-1, &err);
447 if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) {
448 log_err("u_strFromUTF32(surr32+6) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
449 u_errorName(err));
450 return;
451 }
452
453 err = U_ZERO_ERROR;
454 u_strFromUTF32(NULL, 0, &uDestLen, surr32+6, -1, &err);
455 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) {
456 log_err("u_strFromUTF32(preflight surr32+6/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
457 u_errorName(err));
458 return;
459 }
460
461 err = U_ZERO_ERROR;
462 u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+6, -1, &err);
463 if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) {
464 log_err("u_strFromUTF32(surr32+6/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
465 u_errorName(err));
466 return;
467 }
468
469 /* with substitution character */
470 numSubstitutions = -1;
471 err = U_ZERO_ERROR;
472 u_strFromUTF32WithSub(uTarget, 0, &uDestLen, surr32, len32-1, 0xfffd, &numSubstitutions, &err);
473 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 10 || numSubstitutions != 4) {
474 log_err("u_strFromUTF32WithSub(preflight surr32) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
475 u_errorName(err));
476 return;
477 }
478
479 err = U_ZERO_ERROR;
480 u_strFromUTF32WithSub(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32, len32-1, 0xfffd, &numSubstitutions, &err);
481 if(err != U_ZERO_ERROR || uDestLen != 10 || numSubstitutions != 4 || u_memcmp(uTarget, expected_FFFD, 11)) {
482 log_err("u_strFromUTF32WithSub(surr32) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
483 u_errorName(err));
484 return;
485 }
486
487 err = U_ZERO_ERROR;
488 u_strFromUTF32WithSub(NULL, 0, &uDestLen, surr32, -1, 0x12345, &numSubstitutions, &err);
489 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 14 || numSubstitutions != 4) {
490 log_err("u_strFromUTF32WithSub(preflight surr32/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
491 u_errorName(err));
492 return;
493 }
494
495 err = U_ZERO_ERROR;
496 u_strFromUTF32WithSub(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32, -1, 0x12345, &numSubstitutions, &err);
497 if(err != U_ZERO_ERROR || uDestLen != 14 || numSubstitutions != 4 || u_memcmp(uTarget, expected_12345, 15)) {
498 log_err("u_strFromUTF32WithSub(surr32/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
499 u_errorName(err));
500 return;
501 }
502 }
503
504 static void Test_UChar_UTF8_API(void){
505
506 UErrorCode err = U_ZERO_ERROR;
507 UChar uTemp[1];
508 char u8Temp[1];
509 UChar* uTarget=uTemp;
510 const char* u8Src;
511 int32_t u8SrcLen = 0;
512 int32_t uTargetLength = 0;
513 int32_t uDestLen=0;
514 const UChar* uSrc = src16;
515 int32_t uSrcLen = sizeof(src16)/2;
516 char* u8Target = u8Temp;
517 int32_t u8TargetLength =0;
518 int32_t u8DestLen =0;
519 UBool failed = FALSE;
520 int i= 0;
521 int32_t numSubstitutions;
522
523 {
524 /* preflight */
525 u8Temp[0] = 0x12;
526 u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
527 if(err == U_BUFFER_OVERFLOW_ERROR && u8Temp[0] == 0x12){
528 err = U_ZERO_ERROR;
529 u8Target = (char*) malloc (sizeof(uint8_t) * (u8DestLen+1));
530 u8TargetLength = u8DestLen;
531
532 u8Target[u8TargetLength] = (char)0xfe;
533 u8DestLen = -1;
534 u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
535 if(U_FAILURE(err) || u8DestLen != u8TargetLength || u8Target[u8TargetLength] != (char)0xfe){
536 log_err("u_strToUTF8 failed after preflight. Error: %s\n", u_errorName(err));
537 return;
538 }
539
540 }
541 else {
542 log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR");
543 }
544 failed = FALSE;
545 /*for(i=0; i< u8DestLen; i++){
546 printf("0x%04X, ",u8Target[i]);
547 if(i%10==0){
548 printf("\n");
549 }
550 }*/
551 /*for(i=0; i< u8DestLen; i++){
552 if(u8Target[i] != src8[i]){
553 log_verbose("u_strToUTF8() failed expected: %04X got: %04X \n", src8[i], u8Target[i]);
554 failed =TRUE;
555 }
556 }
557 if(failed){
558 log_err("u_strToUTF8() failed \n");
559 }*/
560 u8Src = u8Target;
561 u8SrcLen = u8DestLen;
562
563 /* preflight */
564 uTemp[0] = 0x1234;
565 u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
566 if(err == U_BUFFER_OVERFLOW_ERROR && uTemp[0] == 0x1234){
567 err = U_ZERO_ERROR;
568 uTarget = (UChar*) malloc( sizeof(UChar) * (uDestLen+1));
569 uTargetLength = uDestLen;
570
571 uTarget[uTargetLength] = 0xfff0;
572 uDestLen = -1;
573 u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
574 }
575 else {
576 log_err("error: u_strFromUTF8(preflight) should have gotten U_BUFFER_OVERFLOW_ERROR\n");
577 }
578 /*for(i=0; i< uDestLen; i++){
579 printf("0x%04X, ",uTarget[i]);
580 if(i%10==0){
581 printf("\n");
582 }
583 }*/
584
585 if(U_FAILURE(err) || uDestLen != uTargetLength || uTarget[uTargetLength] != 0xfff0) {
586 failed = TRUE;
587 }
588 for(i=0; i< uSrcLen; i++){
589 if(uTarget[i] != src16[i]){
590 log_verbose("u_strFromUTF8() failed expected: \\u%04X got: \\u%04X at index: %i \n", src16[i] ,uTarget[i],i);
591 failed =TRUE;
592 }
593 }
594 if(failed){
595 log_err("error: u_strFromUTF8(after preflighting) failed\n");
596 }
597
598 free(u8Target);
599 free(uTarget);
600 }
601 {
602 u8SrcLen = -1;
603 uTargetLength = 0;
604 uSrcLen =-1;
605 u8TargetLength=0;
606 failed = FALSE;
607 /* preflight */
608 u_strToUTF8(NULL,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
609 if(err == U_BUFFER_OVERFLOW_ERROR){
610 err = U_ZERO_ERROR;
611 u8Target = (char*) malloc (sizeof(uint8_t) * (u8DestLen+1));
612 u8TargetLength = u8DestLen;
613
614 u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
615
616 }
617 else {
618 log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR");
619 }
620 failed = FALSE;
621 /*for(i=0; i< u8DestLen; i++){
622 printf("0x%04X, ",u8Target[i]);
623 if(i%10==0){
624 printf("\n");
625 }
626 }*/
627 /*for(i=0; i< u8DestLen; i++){
628 if(u8Target[i] != src8[i]){
629 log_verbose("u_strToUTF8() failed expected: %04X got: %04X \n", src8[i], u8Target[i]);
630 failed =TRUE;
631 }
632 }
633 if(failed){
634 log_err("u_strToUTF8() failed \n");
635 }*/
636 u8Src = u8Target;
637 u8SrcLen = u8DestLen;
638
639 /* preflight */
640 u_strFromUTF8(NULL,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
641 if(err == U_BUFFER_OVERFLOW_ERROR){
642 err = U_ZERO_ERROR;
643 uTarget = (UChar*) malloc( sizeof(UChar) * (uDestLen+1));
644 uTargetLength = uDestLen;
645
646 u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
647 }
648 else {
649 log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR");
650 }
651 /*for(i=0; i< uDestLen; i++){
652 printf("0x%04X, ",uTarget[i]);
653 if(i%10==0){
654 printf("\n");
655 }
656 }*/
657
658 for(i=0; i< uSrcLen; i++){
659 if(uTarget[i] != src16[i]){
660 log_verbose("u_strFromUTF8() failed expected: \\u%04X got: \\u%04X at index: %i \n", src16[i] ,uTarget[i],i);
661 failed =TRUE;
662 }
663 }
664 if(failed){
665 log_err("u_strToUTF8() failed \n");
666 }
667
668 free(u8Target);
669 free(uTarget);
670 }
671
672 /* test UTF-8 with single surrogates - illegal in Unicode 3.2 */
673 {
674 static const UChar
675 withLead16[]={ 0x1800, 0xd89a, 0x0061 },
676 withTrail16[]={ 0x1800, 0xdcba, 0x0061, 0 },
677 withTrail16SubFFFD[]={ 0x1800, 0xfffd, 0x0061, 0 }, /* sub==U+FFFD */
678 withTrail16Sub50005[]={ 0x1800, 0xd900, 0xdc05, 0x0061, 0 }; /* sub==U+50005 */
679 static const uint8_t
680 withLead8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xa2, 0x9a, 0x61 },
681 withTrail8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xb2, 0xba, 0x61, 0 },
682 withTrail8Sub1A[]={ 0xe1, 0xa0, 0x80, 0x1a, 0x61, 0 }, /* sub==U+001A */
683 withTrail8SubFFFD[]={ 0xe1, 0xa0, 0x80, 0xef, 0xbf, 0xbd, 0x61, 0 }; /* sub==U+FFFD */
684 UChar out16[10];
685 char out8[10];
686
687 if(
688 (err=U_ZERO_ERROR, u_strToUTF8(out8, UPRV_LENGTHOF(out8), NULL, withLead16, UPRV_LENGTHOF(withLead16), &err), err!=U_INVALID_CHAR_FOUND) ||
689 (err=U_ZERO_ERROR, u_strToUTF8(out8, UPRV_LENGTHOF(out8), NULL, withTrail16, -1, &err), err!=U_INVALID_CHAR_FOUND) ||
690 (err=U_ZERO_ERROR, u_strFromUTF8(out16, UPRV_LENGTHOF(out16), NULL, (const char *)withLead8, UPRV_LENGTHOF(withLead8), &err), err!=U_INVALID_CHAR_FOUND) ||
691 (err=U_ZERO_ERROR, u_strFromUTF8(out16, UPRV_LENGTHOF(out16), NULL, (const char *)withTrail8, -1, &err), err!=U_INVALID_CHAR_FOUND)
692 ) {
693 log_err("error: u_strTo/FromUTF8(string with single surrogate) fails to report error\n");
694 }
695
696 /* test error handling with substitution characters */
697
698 /* from UTF-8 with length */
699 err=U_ZERO_ERROR;
700 numSubstitutions=-1;
701 out16[0]=0x55aa;
702 uDestLen=0;
703 u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
704 (const char *)withTrail8, uprv_strlen((const char *)withTrail8),
705 0x50005, &numSubstitutions,
706 &err);
707 if(U_FAILURE(err) || uDestLen!=u_strlen(withTrail16Sub50005) ||
708 0!=u_memcmp(withTrail16Sub50005, out16, uDestLen+1) ||
709 numSubstitutions!=1) {
710 log_err("error: u_strFromUTF8WithSub(length) failed\n");
711 }
712
713 /* from UTF-8 with NUL termination */
714 err=U_ZERO_ERROR;
715 numSubstitutions=-1;
716 out16[0]=0x55aa;
717 uDestLen=0;
718 u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
719 (const char *)withTrail8, -1,
720 0xfffd, &numSubstitutions,
721 &err);
722 if(U_FAILURE(err) || uDestLen!=u_strlen(withTrail16SubFFFD) ||
723 0!=u_memcmp(withTrail16SubFFFD, out16, uDestLen+1) ||
724 numSubstitutions!=1) {
725 log_err("error: u_strFromUTF8WithSub(NUL termination) failed\n");
726 }
727
728 /* preflight from UTF-8 with NUL termination */
729 err=U_ZERO_ERROR;
730 numSubstitutions=-1;
731 out16[0]=0x55aa;
732 uDestLen=0;
733 u_strFromUTF8WithSub(out16, 1, &uDestLen,
734 (const char *)withTrail8, -1,
735 0x50005, &numSubstitutions,
736 &err);
737 if(err!=U_BUFFER_OVERFLOW_ERROR || uDestLen!=u_strlen(withTrail16Sub50005) || numSubstitutions!=1) {
738 log_err("error: u_strFromUTF8WithSub(preflight/NUL termination) failed\n");
739 }
740
741 /* to UTF-8 with length */
742 err=U_ZERO_ERROR;
743 numSubstitutions=-1;
744 out8[0]=(char)0xf5;
745 u8DestLen=0;
746 u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
747 withTrail16, u_strlen(withTrail16),
748 0xfffd, &numSubstitutions,
749 &err);
750 if(U_FAILURE(err) || u8DestLen!=uprv_strlen((const char *)withTrail8SubFFFD) ||
751 0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen+1) ||
752 numSubstitutions!=1) {
753 log_err("error: u_strToUTF8WithSub(length) failed\n");
754 }
755
756 /* to UTF-8 with NUL termination */
757 err=U_ZERO_ERROR;
758 numSubstitutions=-1;
759 out8[0]=(char)0xf5;
760 u8DestLen=0;
761 u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
762 withTrail16, -1,
763 0x1a, &numSubstitutions,
764 &err);
765 if(U_FAILURE(err) || u8DestLen!=uprv_strlen((const char *)withTrail8Sub1A) ||
766 0!=uprv_memcmp((const char *)withTrail8Sub1A, out8, u8DestLen+1) ||
767 numSubstitutions!=1) {
768 log_err("error: u_strToUTF8WithSub(NUL termination) failed\n");
769 }
770
771 /* preflight to UTF-8 with NUL termination */
772 err=U_ZERO_ERROR;
773 numSubstitutions=-1;
774 out8[0]=(char)0xf5;
775 u8DestLen=0;
776 u_strToUTF8WithSub(out8, 1, &u8DestLen,
777 withTrail16, -1,
778 0xfffd, &numSubstitutions,
779 &err);
780 if(err!=U_BUFFER_OVERFLOW_ERROR || u8DestLen!=uprv_strlen((const char *)withTrail8SubFFFD) ||
781 numSubstitutions!=1) {
782 log_err("error: u_strToUTF8WithSub(preflight/NUL termination) failed\n");
783 }
784
785 /* test that numSubstitutions==0 if there are no substitutions */
786
787 /* from UTF-8 with length (just first 3 bytes which are valid) */
788 err=U_ZERO_ERROR;
789 numSubstitutions=-1;
790 out16[0]=0x55aa;
791 uDestLen=0;
792 u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
793 (const char *)withTrail8, 3,
794 0x50005, &numSubstitutions,
795 &err);
796 if(U_FAILURE(err) || uDestLen!=1 ||
797 0!=u_memcmp(withTrail16Sub50005, out16, uDestLen) ||
798 numSubstitutions!=0) {
799 log_err("error: u_strFromUTF8WithSub(no subs) failed\n");
800 }
801
802 /* to UTF-8 with length (just first UChar which is valid) */
803 err=U_ZERO_ERROR;
804 numSubstitutions=-1;
805 out8[0]=(char)0xf5;
806 u8DestLen=0;
807 u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
808 withTrail16, 1,
809 0xfffd, &numSubstitutions,
810 &err);
811 if(U_FAILURE(err) || u8DestLen!=3 ||
812 0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen) ||
813 numSubstitutions!=0) {
814 log_err("error: u_strToUTF8WithSub(no subs) failed\n");
815 }
816
817 /* test that numSubstitutions==0 if subchar==U_SENTINEL (no subchar) */
818
819 /* from UTF-8 with length (just first 3 bytes which are valid) */
820 err=U_ZERO_ERROR;
821 numSubstitutions=-1;
822 out16[0]=0x55aa;
823 uDestLen=0;
824 u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
825 (const char *)withTrail8, 3,
826 U_SENTINEL, &numSubstitutions,
827 &err);
828 if(U_FAILURE(err) || uDestLen!=1 ||
829 0!=u_memcmp(withTrail16Sub50005, out16, uDestLen) ||
830 numSubstitutions!=0) {
831 log_err("error: u_strFromUTF8WithSub(no subchar) failed\n");
832 }
833
834 /* to UTF-8 with length (just first UChar which is valid) */
835 err=U_ZERO_ERROR;
836 numSubstitutions=-1;
837 out8[0]=(char)0xf5;
838 u8DestLen=0;
839 u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
840 withTrail16, 1,
841 U_SENTINEL, &numSubstitutions,
842 &err);
843 if(U_FAILURE(err) || u8DestLen!=3 ||
844 0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen) ||
845 numSubstitutions!=0) {
846 log_err("error: u_strToUTF8WithSub(no subchar) failed\n");
847 }
848 }
849 {
850 /*
851 * Test with an illegal lead byte that would be followed by more than 3 trail bytes.
852 * See ticket #10371.
853 */
854 static const char src[1]={ (char)0xf8 };
855 UChar out16[10];
856 err=U_ZERO_ERROR;
857 u_strFromUTF8(out16, UPRV_LENGTHOF(out16), NULL, src, 1, &err);
858 if(err!=U_INVALID_CHAR_FOUND) {
859 log_err("error: u_strFromUTF8(5-byte lead byte) failed\n");
860 }
861 }
862 }
863
864 /* compare if two strings are equal, but match 0xfffd in the second string with anything in the first */
865 static UBool
866 equalAnyFFFD(const UChar *s, const UChar *t, int32_t length) {
867 UChar c1, c2;
868
869 while(length>0) {
870 c1=*s++;
871 c2=*t++;
872 if(c1!=c2 && c2!=0xfffd) {
873 return FALSE;
874 }
875 --length;
876 }
877 return TRUE;
878 }
879
880 /* test u_strFromUTF8Lenient() */
881 static void
882 Test_FromUTF8(void) {
883 /*
884 * Test case from icu-support list 20071130 "u_strFromUTF8() returns U_INVALID_CHAR_FOUND(10)"
885 */
886 static const uint8_t bytes[]={ 0xe0, 0xa5, 0x9c, 0 };
887 UChar dest[64];
888 UChar *destPointer;
889 int32_t destLength;
890 UErrorCode errorCode;
891
892 /* 3 bytes input, one UChar output (U+095C) */
893 errorCode=U_ZERO_ERROR;
894 destLength=-99;
895 destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, 3, &errorCode);
896 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=1) {
897 log_err("error: u_strFromUTF8(preflight srcLength=3) fails: destLength=%ld - %s\n",
898 (long)destLength, u_errorName(errorCode));
899 }
900
901 /* 4 bytes input, two UChars output (U+095C U+0000) */
902 errorCode=U_ZERO_ERROR;
903 destLength=-99;
904 destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, 4, &errorCode);
905 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=2) {
906 log_err("error: u_strFromUTF8(preflight srcLength=4) fails: destLength=%ld - %s\n",
907 (long)destLength, u_errorName(errorCode));
908 }
909
910 /* NUL-terminated 3 bytes input, one UChar output (U+095C) */
911 errorCode=U_ZERO_ERROR;
912 destLength=-99;
913 destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, -1, &errorCode);
914 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=1) {
915 log_err("error: u_strFromUTF8(preflight srcLength=-1) fails: destLength=%ld - %s\n",
916 (long)destLength, u_errorName(errorCode));
917 }
918
919 /* 3 bytes input, one UChar output (U+095C), transform not just preflight */
920 errorCode=U_ZERO_ERROR;
921 dest[0]=dest[1]=99;
922 destLength=-99;
923 destPointer=u_strFromUTF8(dest, UPRV_LENGTHOF(dest), &destLength, (const char *)bytes, 3, &errorCode);
924 if(U_FAILURE(errorCode) || destPointer!=dest || destLength!=1 || dest[0]!=0x95c || dest[1]!=0) {
925 log_err("error: u_strFromUTF8(transform srcLength=3) fails: destLength=%ld - %s\n",
926 (long)destLength, u_errorName(errorCode));
927 }
928 }
929
930 /* test u_strFromUTF8Lenient() */
931 static void
932 Test_FromUTF8Lenient(void) {
933 /*
934 * Multiple input strings, each NUL-terminated.
935 * Terminate with a string starting with 0xff.
936 */
937 static const uint8_t bytes[]={
938 /* well-formed UTF-8 */
939 0x61, 0xc3, 0x9f, 0xe0, 0xa0, 0x80, 0xf0, 0xa0, 0x80, 0x80,
940 0x62, 0xc3, 0xa0, 0xe0, 0xa0, 0x81, 0xf0, 0xa0, 0x80, 0x81, 0,
941
942 /* various malformed sequences */
943 0xc3, 0xc3, 0x9f, 0xc3, 0xa0, 0xe0, 0x80, 0x8a, 0xf0, 0x41, 0x42, 0x43, 0,
944
945 /* truncated input */
946 0xc3, 0,
947 0xe0, 0,
948 0xe0, 0xa0, 0,
949 0xf0, 0,
950 0xf0, 0x90, 0,
951 0xf0, 0x90, 0x80, 0,
952
953 /* non-ASCII characters in the last few bytes */
954 0x61, 0xc3, 0x9f, 0xe0, 0xa0, 0x80, 0,
955 0x61, 0xe0, 0xa0, 0x80, 0xc3, 0x9f, 0,
956
957 /* empty string */
958 0,
959
960 /* finish */
961 0xff, 0
962 };
963
964 /* Multiple output strings, each NUL-terminated. 0xfffd matches anything. */
965 static const UChar uchars[]={
966 0x61, 0xdf, 0x800, 0xd840, 0xdc00,
967 0x62, 0xe0, 0x801, 0xd840, 0xdc01, 0,
968
969 0xfffd, 0x9f, 0xe0, 0xa, 0xfffd, 0xfffd, 0,
970
971 0xfffd, 0,
972 0xfffd, 0,
973 0xfffd, 0,
974 0xfffd, 0,
975 0xfffd, 0,
976 0xfffd, 0,
977
978 0x61, 0xdf, 0x800, 0,
979 0x61, 0x800, 0xdf, 0,
980
981 0,
982
983 0
984 };
985
986 UChar dest[64];
987 const char *pb;
988 const UChar *pu, *pDest;
989 int32_t srcLength, destLength0, destLength;
990 int number;
991 UErrorCode errorCode;
992
993 /* verify checking for some illegal arguments */
994 dest[0]=0x1234;
995 destLength=-1;
996 errorCode=U_ZERO_ERROR;
997 pDest=u_strFromUTF8Lenient(dest, 1, &destLength, NULL, -1, &errorCode);
998 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0x1234) {
999 log_err("u_strFromUTF8Lenient(src=NULL) failed\n");
1000 }
1001
1002 dest[0]=0x1234;
1003 destLength=-1;
1004 errorCode=U_ZERO_ERROR;
1005 pDest=u_strFromUTF8Lenient(NULL, 1, &destLength, (const char *)bytes, -1, &errorCode);
1006 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1007 log_err("u_strFromUTF8Lenient(dest=NULL[1]) failed\n");
1008 }
1009
1010 dest[0]=0x1234;
1011 destLength=-1;
1012 errorCode=U_MEMORY_ALLOCATION_ERROR;
1013 pDest=u_strFromUTF8Lenient(dest, 1, &destLength, (const char *)bytes, -1, &errorCode);
1014 if(errorCode!=U_MEMORY_ALLOCATION_ERROR || dest[0]!=0x1234) {
1015 log_err("u_strFromUTF8Lenient(U_MEMORY_ALLOCATION_ERROR) failed\n");
1016 }
1017
1018 dest[0]=0x1234;
1019 destLength=-1;
1020 errorCode=U_MEMORY_ALLOCATION_ERROR;
1021 pDest=u_strFromUTF8Lenient(dest, 1, &destLength, (const char *)bytes, -1, NULL);
1022 if(dest[0]!=0x1234) {
1023 log_err("u_strFromUTF8Lenient(pErrorCode=NULL) failed\n");
1024 }
1025
1026 /* test normal behavior */
1027 number=0; /* string number for log_err() */
1028
1029 for(pb=(const char *)bytes, pu=uchars;
1030 *pb!=(char)0xff;
1031 pb+=srcLength+1, pu+=destLength0+1, ++number
1032 ) {
1033 srcLength=uprv_strlen(pb);
1034 destLength0=u_strlen(pu);
1035
1036 /* preflighting with NUL-termination */
1037 dest[0]=0x1234;
1038 destLength=-1;
1039 errorCode=U_ZERO_ERROR;
1040 pDest=u_strFromUTF8Lenient(NULL, 0, &destLength, pb, -1, &errorCode);
1041 if (errorCode!= (destLength0==0 ? U_STRING_NOT_TERMINATED_WARNING : U_BUFFER_OVERFLOW_ERROR) ||
1042 pDest!=NULL || dest[0]!=0x1234 || destLength!=destLength0
1043 ) {
1044 log_err("u_strFromUTF8Lenient(%d preflighting with NUL-termination) failed\n", number);
1045 }
1046
1047 /* preflighting/some capacity with NUL-termination */
1048 if(srcLength>0) {
1049 dest[destLength0-1]=0x1234;
1050 destLength=-1;
1051 errorCode=U_ZERO_ERROR;
1052 pDest=u_strFromUTF8Lenient(dest, destLength0-1, &destLength, pb, -1, &errorCode);
1053 if (errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1054 dest[destLength0-1]!=0x1234 || destLength!=destLength0
1055 ) {
1056 log_err("u_strFromUTF8Lenient(%d preflighting/some capacity with NUL-termination) failed\n", number);
1057 }
1058 }
1059
1060 /* conversion with NUL-termination, much capacity */
1061 dest[0]=dest[destLength0]=0x1234;
1062 destLength=-1;
1063 errorCode=U_ZERO_ERROR;
1064 pDest=u_strFromUTF8Lenient(dest, UPRV_LENGTHOF(dest), &destLength, pb, -1, &errorCode);
1065 if (errorCode!=U_ZERO_ERROR ||
1066 pDest!=dest || dest[destLength0]!=0 ||
1067 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1068 ) {
1069 log_err("u_strFromUTF8Lenient(%d conversion with NUL-termination, much capacity) failed\n", number);
1070 }
1071
1072 /* conversion with NUL-termination, exact capacity */
1073 dest[0]=dest[destLength0]=0x1234;
1074 destLength=-1;
1075 errorCode=U_ZERO_ERROR;
1076 pDest=u_strFromUTF8Lenient(dest, destLength0, &destLength, pb, -1, &errorCode);
1077 if (errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
1078 pDest!=dest || dest[destLength0]!=0x1234 ||
1079 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1080 ) {
1081 log_err("u_strFromUTF8Lenient(%d conversion with NUL-termination, exact capacity) failed\n", number);
1082 }
1083
1084 /* preflighting with length */
1085 dest[0]=0x1234;
1086 destLength=-1;
1087 errorCode=U_ZERO_ERROR;
1088 pDest=u_strFromUTF8Lenient(NULL, 0, &destLength, pb, srcLength, &errorCode);
1089 if (errorCode!= (destLength0==0 ? U_STRING_NOT_TERMINATED_WARNING : U_BUFFER_OVERFLOW_ERROR) ||
1090 pDest!=NULL || dest[0]!=0x1234 || destLength!=srcLength
1091 ) {
1092 log_err("u_strFromUTF8Lenient(%d preflighting with length) failed\n", number);
1093 }
1094
1095 /* preflighting/some capacity with length */
1096 if(srcLength>0) {
1097 dest[srcLength-1]=0x1234;
1098 destLength=-1;
1099 errorCode=U_ZERO_ERROR;
1100 pDest=u_strFromUTF8Lenient(dest, srcLength-1, &destLength, pb, srcLength, &errorCode);
1101 if (errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1102 dest[srcLength-1]!=0x1234 || destLength!=srcLength
1103 ) {
1104 log_err("u_strFromUTF8Lenient(%d preflighting/some capacity with length) failed\n", number);
1105 }
1106 }
1107
1108 /* conversion with length, much capacity */
1109 dest[0]=dest[destLength0]=0x1234;
1110 destLength=-1;
1111 errorCode=U_ZERO_ERROR;
1112 pDest=u_strFromUTF8Lenient(dest, UPRV_LENGTHOF(dest), &destLength, pb, srcLength, &errorCode);
1113 if (errorCode!=U_ZERO_ERROR ||
1114 pDest!=dest || dest[destLength0]!=0 ||
1115 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1116 ) {
1117 log_err("u_strFromUTF8Lenient(%d conversion with length, much capacity) failed\n", number);
1118 }
1119
1120 /* conversion with length, srcLength capacity */
1121 dest[0]=dest[srcLength]=dest[destLength0]=0x1234;
1122 destLength=-1;
1123 errorCode=U_ZERO_ERROR;
1124 pDest=u_strFromUTF8Lenient(dest, srcLength, &destLength, pb, srcLength, &errorCode);
1125 if(srcLength==destLength0) {
1126 if (errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
1127 pDest!=dest || dest[destLength0]!=0x1234 ||
1128 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1129 ) {
1130 log_err("u_strFromUTF8Lenient(%d conversion with length, srcLength capacity/not terminated) failed\n", number);
1131 }
1132 } else {
1133 if (errorCode!=U_ZERO_ERROR ||
1134 pDest!=dest || dest[destLength0]!=0 ||
1135 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1136 ) {
1137 log_err("u_strFromUTF8Lenient(%d conversion with length, srcLength capacity/terminated) failed\n", number);
1138 }
1139 }
1140 }
1141 }
1142
1143 static const uint16_t src16j[] = {
1144 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
1145 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
1146 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
1147 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
1148 0x0000,
1149 /* Test only ASCII */
1150
1151 };
1152 static const uint16_t src16WithNulls[] = {
1153 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0000,
1154 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 0x0000,
1155 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0000,
1156 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 0x0000,
1157 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0000,
1158 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 0x0000,
1159 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0000,
1160 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 0x0000,
1161 /* test only ASCII */
1162 /*
1163 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD,
1164 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1165 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0, 0x00C1,
1166 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB,
1167 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5,
1168 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
1169 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9,
1170 0x0054, 0x0000 */
1171
1172 };
1173 static void Test_UChar_WCHART_API(void){
1174 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
1175 UErrorCode err = U_ZERO_ERROR;
1176 const UChar* uSrc = src16j;
1177 int32_t uSrcLen = sizeof(src16j)/2;
1178 wchar_t* wDest = NULL;
1179 int32_t wDestLen = 0;
1180 int32_t reqLen= 0 ;
1181 UBool failed = FALSE;
1182 UChar* uDest = NULL;
1183 int32_t uDestLen = 0;
1184 int i =0;
1185 {
1186 /* Bad UErrorCode arguments. Make sure that the API doesn't crash, and that Purify doesn't complain. */
1187 if (u_strFromWCS(NULL,0,NULL,NULL,0,NULL) != NULL) {
1188 log_err("u_strFromWCS() should return NULL with a bad argument\n");
1189 }
1190 if (u_strToWCS(NULL,0,NULL,NULL,0,NULL) != NULL) {
1191 log_err("u_strToWCS() should return NULL with a bad argument\n");
1192 }
1193
1194 /* NULL source & destination. */
1195 err = U_ZERO_ERROR;
1196 u_strFromWCS(NULL,0,NULL,NULL,0,&err);
1197 if (err != U_STRING_NOT_TERMINATED_WARNING) {
1198 log_err("u_strFromWCS(NULL, NULL) failed. Error: %s \n", u_errorName(err));
1199 }
1200 err = U_ZERO_ERROR;
1201 u_strToWCS(NULL,0,NULL,NULL,0,&err);
1202 if (err != U_STRING_NOT_TERMINATED_WARNING) {
1203 log_err("u_strToWCS(NULL, NULL) failed. Error: %s \n", u_errorName(err));
1204 }
1205 err = U_ZERO_ERROR;
1206
1207 /* pre-flight*/
1208 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1209
1210 if(err == U_BUFFER_OVERFLOW_ERROR){
1211 err=U_ZERO_ERROR;
1212 wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1213 wDestLen = reqLen+1;
1214 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1215 }
1216
1217 /* pre-flight */
1218 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1219
1220
1221 if(err == U_BUFFER_OVERFLOW_ERROR){
1222 err =U_ZERO_ERROR;
1223 uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1224 uDestLen = reqLen + 1;
1225 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1226 }else if(U_FAILURE(err)){
1227
1228 log_err("u_strFromWCS() failed. Error: %s \n", u_errorName(err));
1229 return;
1230 }
1231
1232 for(i=0; i< uSrcLen; i++){
1233 if(uDest[i] != src16j[i]){
1234 log_verbose("u_str*WCS() failed for unterminated string expected: \\u%04X got: \\u%04X at index: %i \n", src16j[i] ,uDest[i],i);
1235 failed =TRUE;
1236 }
1237 }
1238
1239 if(U_FAILURE(err)){
1240 failed = TRUE;
1241 }
1242 if(failed){
1243 log_err("u_strToWCS() failed \n");
1244 }
1245 free(wDest);
1246 free(uDest);
1247
1248
1249 /* test with embeded nulls */
1250 uSrc = src16WithNulls;
1251 uSrcLen = sizeof(src16WithNulls)/2;
1252 wDestLen =0;
1253 uDestLen =0;
1254 wDest = NULL;
1255 uDest = NULL;
1256 /* pre-flight*/
1257 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1258
1259 if(err == U_BUFFER_OVERFLOW_ERROR){
1260 err=U_ZERO_ERROR;
1261 wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1262 wDestLen = reqLen+1;
1263 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1264 }
1265
1266 /* pre-flight */
1267 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1268
1269 if(err == U_BUFFER_OVERFLOW_ERROR){
1270 err =U_ZERO_ERROR;
1271 uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1272 uDestLen = reqLen + 1;
1273 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1274 }
1275
1276 if(!U_FAILURE(err)) {
1277 for(i=0; i< uSrcLen; i++){
1278 if(uDest[i] != src16WithNulls[i]){
1279 log_verbose("u_str*WCS() failed for string with nulls expected: \\u%04X got: \\u%04X at index: %i \n", src16WithNulls[i] ,uDest[i],i);
1280 failed =TRUE;
1281 }
1282 }
1283 }
1284
1285 if(U_FAILURE(err)){
1286 failed = TRUE;
1287 }
1288 if(failed){
1289 log_err("u_strToWCS() failed \n");
1290 }
1291 free(wDest);
1292 free(uDest);
1293
1294 }
1295
1296 {
1297
1298 uSrc = src16j;
1299 uSrcLen = sizeof(src16j)/2;
1300 wDestLen =0;
1301 uDestLen =0;
1302 wDest = NULL;
1303 uDest = NULL;
1304 wDestLen = 0;
1305 /* pre-flight*/
1306 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&err);
1307
1308 if(err == U_BUFFER_OVERFLOW_ERROR){
1309 err=U_ZERO_ERROR;
1310 wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1311 wDestLen = reqLen+1;
1312 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&err);
1313 }
1314 uDestLen = 0;
1315 /* pre-flight */
1316 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&err);
1317
1318 if(err == U_BUFFER_OVERFLOW_ERROR){
1319 err =U_ZERO_ERROR;
1320 uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1321 uDestLen = reqLen + 1;
1322 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&err);
1323 }
1324
1325
1326 if(!U_FAILURE(err)) {
1327 for(i=0; i< uSrcLen; i++){
1328 if(uDest[i] != src16j[i]){
1329 log_verbose("u_str*WCS() failed for null terminated string expected: \\u%04X got: \\u%04X at index: %i \n", src16j[i] ,uDest[i],i);
1330 failed =TRUE;
1331 }
1332 }
1333 }
1334
1335 if(U_FAILURE(err)){
1336 failed = TRUE;
1337 }
1338 if(failed){
1339 log_err("u_strToWCS() failed \n");
1340 }
1341 free(wDest);
1342 free(uDest);
1343 }
1344
1345 /*
1346 * Test u_terminateWChars().
1347 * All u_terminateXYZ() use the same implementation macro;
1348 * we test this function to improve API coverage.
1349 */
1350 {
1351 wchar_t buffer[10];
1352
1353 err=U_ZERO_ERROR;
1354 buffer[3]=0x20ac;
1355 wDestLen=u_terminateWChars(buffer, UPRV_LENGTHOF(buffer), 3, &err);
1356 if(err!=U_ZERO_ERROR || wDestLen!=3 || buffer[3]!=0) {
1357 log_err("u_terminateWChars(buffer, all, 3, zero) failed: %s length %d [3]==U+%04x\n",
1358 u_errorName(err), wDestLen, buffer[3]);
1359 }
1360
1361 err=U_ZERO_ERROR;
1362 buffer[3]=0x20ac;
1363 wDestLen=u_terminateWChars(buffer, 3, 3, &err);
1364 if(err!=U_STRING_NOT_TERMINATED_WARNING || wDestLen!=3 || buffer[3]!=0x20ac) {
1365 log_err("u_terminateWChars(buffer, 3, 3, zero) failed: %s length %d [3]==U+%04x\n",
1366 u_errorName(err), wDestLen, buffer[3]);
1367 }
1368
1369 err=U_STRING_NOT_TERMINATED_WARNING;
1370 buffer[3]=0x20ac;
1371 wDestLen=u_terminateWChars(buffer, UPRV_LENGTHOF(buffer), 3, &err);
1372 if(err!=U_ZERO_ERROR || wDestLen!=3 || buffer[3]!=0) {
1373 log_err("u_terminateWChars(buffer, all, 3, not-terminated) failed: %s length %d [3]==U+%04x\n",
1374 u_errorName(err), wDestLen, buffer[3]);
1375 }
1376
1377 err=U_ZERO_ERROR;
1378 buffer[3]=0x20ac;
1379 wDestLen=u_terminateWChars(buffer, 2, 3, &err);
1380 if(err!=U_BUFFER_OVERFLOW_ERROR || wDestLen!=3 || buffer[3]!=0x20ac) {
1381 log_err("u_terminateWChars(buffer, 2, 3, zero) failed: %s length %d [3]==U+%04x\n",
1382 u_errorName(err), wDestLen, buffer[3]);
1383 }
1384 }
1385 #else
1386 log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32");
1387 #endif
1388 }
1389
1390 static void Test_widestrs()
1391 {
1392 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
1393 wchar_t ws[100];
1394 UChar rts[100];
1395 int32_t wcap = UPRV_LENGTHOF(ws);
1396 int32_t wl;
1397 int32_t rtcap = UPRV_LENGTHOF(rts);
1398 int32_t rtl;
1399 wchar_t *wcs;
1400 UChar *cp;
1401 const char *errname;
1402 UChar ustr[] = {'h', 'e', 'l', 'l', 'o', 0};
1403 int32_t ul = UPRV_LENGTHOF(ustr) -1;
1404 char astr[100];
1405
1406 UErrorCode err;
1407
1408 err = U_ZERO_ERROR;
1409 wcs = u_strToWCS(ws, wcap, &wl, ustr, ul, &err);
1410 if (U_FAILURE(err)) {
1411 errname = u_errorName(err);
1412 log_err("test_widestrs: u_strToWCS error: %s!\n",errname);
1413 }
1414 if(ul!=wl){
1415 log_err("u_strToWCS: ustr = %s, ul = %d, ws = %S, wl = %d!\n", u_austrcpy(astr, ustr), ul, ws, wl);
1416 }
1417 err = U_ZERO_ERROR;
1418 wl = (int32_t)uprv_wcslen(wcs);
1419 cp = u_strFromWCS(rts, rtcap, &rtl, wcs, wl, &err);
1420 (void)cp; /* Suppress set but not used warning. */
1421 if (U_FAILURE(err)) {
1422 errname = u_errorName(err);
1423 fprintf(stderr, "test_widestrs: ucnv_wcstombs error: %s!\n",errname);
1424 }
1425 if(wl != rtl){
1426 log_err("u_strFromWCS: wcs = %S, wl = %d,rts = %s, rtl = %d!\n", wcs, wl, u_austrcpy(astr, rts), rtl);
1427 }
1428 #else
1429 log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32");
1430 #endif
1431 }
1432
1433 static void
1434 Test_WCHART_LongString(){
1435 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
1436 UErrorCode status = U_ZERO_ERROR;
1437 const char* testdatapath=loadTestData(&status);
1438 UResourceBundle *theBundle = ures_open(testdatapath, "testtypes", &status);
1439 int32_t strLen =0;
1440 const UChar* str = ures_getStringByKey(theBundle, "testinclude",&strLen,&status);
1441 const UChar* uSrc = str;
1442 int32_t uSrcLen = strLen;
1443 int32_t wDestLen =0, reqLen=0, i=0;
1444 int32_t uDestLen =0;
1445 wchar_t* wDest = NULL;
1446 UChar* uDest = NULL;
1447 UBool failed = FALSE;
1448
1449 log_verbose("Loaded string of %d UChars\n", uSrcLen);
1450
1451 if(U_FAILURE(status)){
1452 log_data_err("Could not get testinclude resource from testtypes bundle. Error: %s\n",u_errorName(status));
1453 return;
1454 }
1455
1456 /* pre-flight*/
1457 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&status);
1458
1459 if(status == U_BUFFER_OVERFLOW_ERROR){
1460 status=U_ZERO_ERROR;
1461 wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1462 wDestLen = reqLen+1;
1463 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&status);
1464 log_verbose("To %d*%d-byte wchar_ts\n", reqLen,sizeof(wchar_t));
1465 }
1466
1467 {
1468 int j;
1469 for(j=0;j>=0&&j<reqLen;j++) {
1470 if(wDest[j]!=uSrc[j]) {
1471 log_verbose("Diff %04X vs %04X @ %d\n", wDest[j],uSrc[j],j);
1472 break;
1473 }
1474 }
1475 }
1476
1477 uDestLen = 0;
1478 /* pre-flight */
1479 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&status);
1480 if(status == U_BUFFER_OVERFLOW_ERROR){
1481 status =U_ZERO_ERROR;
1482 uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1483 u_memset(uDest,0xFFFF,reqLen+1);
1484 uDestLen = reqLen + 1;
1485 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&status);
1486 log_verbose("Back to %d UChars\n", reqLen);
1487 }
1488 #if defined(U_WCHAR_IS_UTF16)
1489 log_verbose("U_WCHAR_IS_UTF16\n");
1490 #elif defined(U_WCHAR_IS_UTF32)
1491 log_verbose("U_WCHAR_IS_UTF32\n");
1492 #else
1493 log_verbose("U_WCHAR_IS_idunno (not UTF)\n");
1494 #endif
1495
1496 if(reqLen!=uSrcLen) {
1497 log_err("Error: dest len is %d but expected src len %d\n", reqLen, uSrcLen);
1498 }
1499
1500 for(i=0; i< uSrcLen; i++){
1501 if(uDest[i] != str[i]){
1502 log_verbose("u_str*WCS() failed for null terminated string expected: \\u%04X got: \\u%04X at index: %i \n", str[i], uDest[i],i);
1503 failed =TRUE;
1504 }
1505 }
1506
1507 if(U_FAILURE(status)){
1508 failed = TRUE;
1509 }
1510 if(failed){
1511 log_err("u_strToWCS() failed \n");
1512 }
1513 free(wDest);
1514 free(uDest);
1515 /* close the bundle */
1516 ures_close(theBundle);
1517 #else
1518 log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32");
1519 #endif
1520 }
1521
1522 static void Test_strToJavaModifiedUTF8() {
1523 static const UChar src[]={
1524 0x61, 0x62, 0x63, 0xe1, 0xe2, 0xe3,
1525 0xe01, 0xe02, 0xe03, 0xe001, 0xe002, 0xe003,
1526 0xd800, 0xdc00, 0xdc00, 0xd800, 0,
1527 0xdbff, 0xdfff,
1528 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xed, 0xe0e, 0x6f
1529 };
1530 static const uint8_t expected[]={
1531 0x61, 0x62, 0x63, 0xc3, 0xa1, 0xc3, 0xa2, 0xc3, 0xa3,
1532 0xe0, 0xb8, 0x81, 0xe0, 0xb8, 0x82, 0xe0, 0xb8, 0x83,
1533 0xee, 0x80, 0x81, 0xee, 0x80, 0x82, 0xee, 0x80, 0x83,
1534 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0xc0, 0x80,
1535 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1536 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xc3, 0xad, 0xe0, 0xb8, 0x8e, 0x6f
1537 };
1538 static const UChar shortSrc[]={
1539 0xe01, 0xe1, 0x61
1540 };
1541 static const uint8_t shortExpected[]={
1542 0xe0, 0xb8, 0x81, 0xc3, 0xa1, 0x61
1543 };
1544 static const UChar asciiNul[]={
1545 0x61, 0x62, 0x63, 0
1546 };
1547 static const uint8_t asciiNulExpected[]={
1548 0x61, 0x62, 0x63
1549 };
1550 char dest[200];
1551 char *p;
1552 int32_t length, expectedTerminatedLength;
1553 UErrorCode errorCode;
1554
1555 expectedTerminatedLength=(int32_t)(strstr((const char *)expected, "\xc0\x80")-
1556 (const char *)expected);
1557
1558 errorCode=U_ZERO_ERROR;
1559 length=-5;
1560 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1561 src, UPRV_LENGTHOF(src), &errorCode);
1562 if( U_FAILURE(errorCode) || p!=dest ||
1563 length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1564 dest[length]!=0
1565 ) {
1566 log_err("u_strToJavaModifiedUTF8(normal) failed - %s\n", u_errorName(errorCode));
1567 }
1568 memset(dest, 0xff, sizeof(dest));
1569 errorCode=U_ZERO_ERROR;
1570 length=-5;
1571 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), NULL,
1572 src, UPRV_LENGTHOF(src), &errorCode);
1573 if( U_FAILURE(errorCode) || p!=dest ||
1574 0!=memcmp(dest, expected, UPRV_LENGTHOF(expected)) ||
1575 dest[UPRV_LENGTHOF(expected)]!=0
1576 ) {
1577 log_err("u_strToJavaModifiedUTF8(normal, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1578 }
1579 memset(dest, 0xff, sizeof(dest));
1580 errorCode=U_ZERO_ERROR;
1581 length=-5;
1582 p=u_strToJavaModifiedUTF8(dest, UPRV_LENGTHOF(expected), &length,
1583 src, UPRV_LENGTHOF(src), &errorCode);
1584 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || p!=dest ||
1585 length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1586 dest[length]!=(char)0xff
1587 ) {
1588 log_err("u_strToJavaModifiedUTF8(tight) failed - %s\n", u_errorName(errorCode));
1589 }
1590 memset(dest, 0xff, sizeof(dest));
1591 errorCode=U_ZERO_ERROR;
1592 length=-5;
1593 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length, src, -1, &errorCode);
1594 if( U_FAILURE(errorCode) || p!=dest ||
1595 length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) ||
1596 dest[length]!=0
1597 ) {
1598 log_err("u_strToJavaModifiedUTF8(NUL-terminated) failed - %s\n", u_errorName(errorCode));
1599 }
1600 memset(dest, 0xff, sizeof(dest));
1601 errorCode=U_ZERO_ERROR;
1602 length=-5;
1603 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), NULL, src, -1, &errorCode);
1604 if( U_FAILURE(errorCode) || p!=dest ||
1605 0!=memcmp(dest, expected, expectedTerminatedLength) ||
1606 dest[expectedTerminatedLength]!=0
1607 ) {
1608 log_err("u_strToJavaModifiedUTF8(NUL-terminated, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1609 }
1610 memset(dest, 0xff, sizeof(dest));
1611 errorCode=U_ZERO_ERROR;
1612 length=-5;
1613 p=u_strToJavaModifiedUTF8(dest, UPRV_LENGTHOF(expected)/2, &length,
1614 src, UPRV_LENGTHOF(src), &errorCode);
1615 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1616 length!=UPRV_LENGTHOF(expected) || dest[UPRV_LENGTHOF(expected)/2]!=(char)0xff
1617 ) {
1618 log_err("u_strToJavaModifiedUTF8(overflow) failed - %s\n", u_errorName(errorCode));
1619 }
1620 memset(dest, 0xff, sizeof(dest));
1621 errorCode=U_ZERO_ERROR;
1622 length=-5;
1623 p=u_strToJavaModifiedUTF8(NULL, 0, &length,
1624 src, UPRV_LENGTHOF(src), &errorCode);
1625 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1626 length!=UPRV_LENGTHOF(expected) || dest[0]!=(char)0xff
1627 ) {
1628 log_err("u_strToJavaModifiedUTF8(pure preflighting) failed - %s\n", u_errorName(errorCode));
1629 }
1630 memset(dest, 0xff, sizeof(dest));
1631 errorCode=U_ZERO_ERROR;
1632 length=-5;
1633 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1634 shortSrc, UPRV_LENGTHOF(shortSrc), &errorCode);
1635 if( U_FAILURE(errorCode) || p!=dest ||
1636 length!=UPRV_LENGTHOF(shortExpected) || 0!=memcmp(dest, shortExpected, length) ||
1637 dest[length]!=0
1638 ) {
1639 log_err("u_strToJavaModifiedUTF8(short) failed - %s\n", u_errorName(errorCode));
1640 }
1641 memset(dest, 0xff, sizeof(dest));
1642 errorCode=U_ZERO_ERROR;
1643 length=-5;
1644 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1645 asciiNul, -1, &errorCode);
1646 if( U_FAILURE(errorCode) || p!=dest ||
1647 length!=UPRV_LENGTHOF(asciiNulExpected) || 0!=memcmp(dest, asciiNulExpected, length) ||
1648 dest[length]!=0
1649 ) {
1650 log_err("u_strToJavaModifiedUTF8(asciiNul) failed - %s\n", u_errorName(errorCode));
1651 }
1652 memset(dest, 0xff, sizeof(dest));
1653 errorCode=U_ZERO_ERROR;
1654 length=-5;
1655 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1656 NULL, 0, &errorCode);
1657 if( U_FAILURE(errorCode) || p!=dest ||
1658 length!=0 || dest[0]!=0
1659 ) {
1660 log_err("u_strToJavaModifiedUTF8(empty) failed - %s\n", u_errorName(errorCode));
1661 }
1662
1663 /* illegal arguments */
1664 memset(dest, 0xff, sizeof(dest));
1665 errorCode=U_ZERO_ERROR;
1666 length=-5;
1667 p=u_strToJavaModifiedUTF8(NULL, sizeof(dest), &length,
1668 src, UPRV_LENGTHOF(src), &errorCode);
1669 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1670 log_err("u_strToJavaModifiedUTF8(dest=NULL) failed - %s\n", u_errorName(errorCode));
1671 }
1672 memset(dest, 0xff, sizeof(dest));
1673 errorCode=U_ZERO_ERROR;
1674 length=-5;
1675 p=u_strToJavaModifiedUTF8(dest, -1, &length,
1676 src, UPRV_LENGTHOF(src), &errorCode);
1677 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1678 log_err("u_strToJavaModifiedUTF8(destCapacity<0) failed - %s\n", u_errorName(errorCode));
1679 }
1680 memset(dest, 0xff, sizeof(dest));
1681 errorCode=U_ZERO_ERROR;
1682 length=-5;
1683 p=u_strToJavaModifiedUTF8(dest, sizeof(dest), &length,
1684 NULL, UPRV_LENGTHOF(src), &errorCode);
1685 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1686 log_err("u_strToJavaModifiedUTF8(src=NULL) failed - %s\n", u_errorName(errorCode));
1687 }
1688 memset(dest, 0xff, sizeof(dest));
1689 errorCode=U_ZERO_ERROR;
1690 length=-5;
1691 p=u_strToJavaModifiedUTF8(dest, sizeof(dest), &length,
1692 NULL, -1, &errorCode);
1693 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1694 log_err("u_strToJavaModifiedUTF8(src=NULL, srcLength<0) failed - %s\n", u_errorName(errorCode));
1695 }
1696 }
1697
1698 static void Test_strFromJavaModifiedUTF8() {
1699 static const uint8_t src[]={
1700 0x61, 0x62, 0x63, 0xc3, 0xa1, 0xc3, 0xa2, 0xc3, 0xa3,
1701 0xe0, 0xb8, 0x81, 0xe0, 0xb8, 0x82, 0xe0, 0xb8, 0x83,
1702 0xee, 0x80, 0x81, 0xee, 0x80, 0x82, 0xee, 0x80, 0x83,
1703 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0,
1704 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1705 0x81, 0xc0, 0xe0, 0xb8, 0xf0, 0x90, 0x80, 0x80, /* invalid sequences */
1706 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
1707 0xe0, 0x81, 0xac, 0xe0, 0x83, 0xad, /* non-shortest forms are allowed */
1708 0xe0, 0xb8, 0x8e, 0x6f
1709 };
1710 static const UChar expected[]={
1711 0x61, 0x62, 0x63, 0xe1, 0xe2, 0xe3,
1712 0xe01, 0xe02, 0xe03, 0xe001, 0xe002, 0xe003,
1713 0xd800, 0xdc00, 0xdc00, 0xd800, 0,
1714 0xdbff, 0xdfff,
1715 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1716 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
1717 0x6c, 0xed,
1718 0xe0e, 0x6f
1719 };
1720 static const uint8_t shortSrc[]={
1721 0xe0, 0xb8, 0x81, 0xc3, 0xa1, 0x61
1722 };
1723 static const UChar shortExpected[]={
1724 0xe01, 0xe1, 0x61
1725 };
1726 static const uint8_t asciiNul[]={
1727 0x61, 0x62, 0x63, 0
1728 };
1729 static const UChar asciiNulExpected[]={
1730 0x61, 0x62, 0x63
1731 };
1732 static const uint8_t invalid[]={
1733 0x81, 0xc0, 0xe0, 0xb8, 0xf0, 0x90, 0x80, 0x80
1734 };
1735 static const UChar invalidExpectedFFFD[]={
1736 0xfffd, 0xfffd, 0xfffd, 0xfffd
1737 };
1738 static const UChar invalidExpected50000[]={
1739 0xd900, 0xdc00, 0xd900, 0xdc00, 0xd900, 0xdc00, 0xd900, 0xdc00
1740 };
1741 UChar dest[200];
1742 UChar *p;
1743 int32_t length, expectedTerminatedLength;
1744 int32_t numSubstitutions;
1745 UErrorCode errorCode;
1746
1747 expectedTerminatedLength=(int32_t)(u_strchr(expected, 0)-expected);
1748
1749 errorCode=U_ZERO_ERROR;
1750 length=numSubstitutions=-5;
1751 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1752 (const char *)src, UPRV_LENGTHOF(src),
1753 0xfffd, &numSubstitutions, &errorCode);
1754 if( U_FAILURE(errorCode) || p!=dest ||
1755 length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1756 dest[length]!=0 ||
1757 numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1758 ) {
1759 log_err("u_strFromJavaModifiedUTF8WithSub(normal) failed - %s\n", u_errorName(errorCode));
1760 }
1761 memset(dest, 0xff, sizeof(dest));
1762 errorCode=U_ZERO_ERROR;
1763 length=numSubstitutions=-5;
1764 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), NULL,
1765 (const char *)src, UPRV_LENGTHOF(src),
1766 0xfffd, &numSubstitutions, &errorCode);
1767 if( U_FAILURE(errorCode) || p!=dest ||
1768 0!=memcmp(dest, expected, UPRV_LENGTHOF(expected)) ||
1769 dest[UPRV_LENGTHOF(expected)]!=0 ||
1770 numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1771 ) {
1772 log_err("u_strFromJavaModifiedUTF8WithSub(normal, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1773 }
1774 memset(dest, 0xff, sizeof(dest));
1775 errorCode=U_ZERO_ERROR;
1776 length=numSubstitutions=-5;
1777 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1778 (const char *)src, UPRV_LENGTHOF(src),
1779 0xfffd, NULL, &errorCode);
1780 if( U_FAILURE(errorCode) || p!=dest ||
1781 length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1782 dest[length]!=0
1783 ) {
1784 log_err("u_strFromJavaModifiedUTF8WithSub(normal, pNumSubstitutions=NULL) failed - %s\n", u_errorName(errorCode));
1785 }
1786 memset(dest, 0xff, sizeof(dest));
1787 errorCode=U_ZERO_ERROR;
1788 length=numSubstitutions=-5;
1789 p=u_strFromJavaModifiedUTF8WithSub(dest, UPRV_LENGTHOF(expected), &length,
1790 (const char *)src, UPRV_LENGTHOF(src),
1791 0xfffd, &numSubstitutions, &errorCode);
1792 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || p!=dest ||
1793 length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1794 dest[length]!=0xffff ||
1795 numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1796 ) {
1797 log_err("u_strFromJavaModifiedUTF8WithSub(tight) failed - %s\n", u_errorName(errorCode));
1798 }
1799 memset(dest, 0xff, sizeof(dest));
1800 errorCode=U_ZERO_ERROR;
1801 length=numSubstitutions=-5;
1802 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1803 (const char *)src, -1,
1804 0xfffd, &numSubstitutions, &errorCode);
1805 if( U_FAILURE(errorCode) || p!=dest ||
1806 length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) ||
1807 dest[length]!=0 ||
1808 numSubstitutions!=0
1809 ) {
1810 log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated) failed - %s\n", u_errorName(errorCode));
1811 }
1812 memset(dest, 0xff, sizeof(dest));
1813 errorCode=U_ZERO_ERROR;
1814 length=numSubstitutions=-5;
1815 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), NULL,
1816 (const char *)src, -1,
1817 0xfffd, &numSubstitutions, &errorCode);
1818 if( U_FAILURE(errorCode) || p!=dest ||
1819 0!=memcmp(dest, expected, expectedTerminatedLength) ||
1820 dest[expectedTerminatedLength]!=0 ||
1821 numSubstitutions!=0
1822 ) {
1823 log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1824 }
1825 memset(dest, 0xff, sizeof(dest));
1826 errorCode=U_ZERO_ERROR;
1827 length=numSubstitutions=-5;
1828 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1829 (const char *)src, -1,
1830 0xfffd, NULL, &errorCode);
1831 if( U_FAILURE(errorCode) || p!=dest ||
1832 length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) ||
1833 dest[length]!=0
1834 ) {
1835 log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated, pNumSubstitutions=NULL) failed - %s\n", u_errorName(errorCode));
1836 }
1837 memset(dest, 0xff, sizeof(dest));
1838 errorCode=U_ZERO_ERROR;
1839 length=numSubstitutions=-5;
1840 p=u_strFromJavaModifiedUTF8WithSub(dest, UPRV_LENGTHOF(expected)/2, &length,
1841 (const char *)src, UPRV_LENGTHOF(src),
1842 0xfffd, &numSubstitutions, &errorCode);
1843 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1844 length!=UPRV_LENGTHOF(expected) || dest[UPRV_LENGTHOF(expected)/2]!=0xffff
1845 ) {
1846 log_err("u_strFromJavaModifiedUTF8WithSub(overflow) failed - %s\n", u_errorName(errorCode));
1847 }
1848 memset(dest, 0xff, sizeof(dest));
1849 errorCode=U_ZERO_ERROR;
1850 length=numSubstitutions=-5;
1851 p=u_strFromJavaModifiedUTF8WithSub(NULL, 0, &length,
1852 (const char *)src, UPRV_LENGTHOF(src),
1853 0xfffd, &numSubstitutions, &errorCode);
1854 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1855 length!=UPRV_LENGTHOF(expected) || dest[0]!=0xffff
1856 ) {
1857 log_err("u_strFromJavaModifiedUTF8WithSub(pure preflighting) failed - %s\n", u_errorName(errorCode));
1858 }
1859 memset(dest, 0xff, sizeof(dest));
1860 errorCode=U_ZERO_ERROR;
1861 length=numSubstitutions=-5;
1862 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1863 (const char *)shortSrc, UPRV_LENGTHOF(shortSrc),
1864 0xfffd, &numSubstitutions, &errorCode);
1865 if( U_FAILURE(errorCode) || p!=dest ||
1866 length!=UPRV_LENGTHOF(shortExpected) || 0!=memcmp(dest, shortExpected, length) ||
1867 dest[length]!=0 ||
1868 numSubstitutions!=0
1869 ) {
1870 log_err("u_strFromJavaModifiedUTF8WithSub(short) failed - %s\n", u_errorName(errorCode));
1871 }
1872 memset(dest, 0xff, sizeof(dest));
1873 errorCode=U_ZERO_ERROR;
1874 length=numSubstitutions=-5;
1875 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1876 (const char *)asciiNul, -1,
1877 0xfffd, &numSubstitutions, &errorCode);
1878 if( U_FAILURE(errorCode) || p!=dest ||
1879 length!=UPRV_LENGTHOF(asciiNulExpected) || 0!=memcmp(dest, asciiNulExpected, length) ||
1880 dest[length]!=0 ||
1881 numSubstitutions!=0
1882 ) {
1883 log_err("u_strFromJavaModifiedUTF8WithSub(asciiNul) failed - %s\n", u_errorName(errorCode));
1884 }
1885 memset(dest, 0xff, sizeof(dest));
1886 errorCode=U_ZERO_ERROR;
1887 length=numSubstitutions=-5;
1888 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1889 NULL, 0, 0xfffd, &numSubstitutions, &errorCode);
1890 if( U_FAILURE(errorCode) || p!=dest ||
1891 length!=0 || dest[0]!=0 ||
1892 numSubstitutions!=0
1893 ) {
1894 log_err("u_strFromJavaModifiedUTF8WithSub(empty) failed - %s\n", u_errorName(errorCode));
1895 }
1896 memset(dest, 0xff, sizeof(dest));
1897 errorCode=U_ZERO_ERROR;
1898 length=numSubstitutions=-5;
1899 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1900 (const char *)invalid, UPRV_LENGTHOF(invalid),
1901 0xfffd, &numSubstitutions, &errorCode);
1902 if( U_FAILURE(errorCode) || p!=dest ||
1903 length!=UPRV_LENGTHOF(invalidExpectedFFFD) || 0!=memcmp(dest, invalidExpectedFFFD, length) ||
1904 dest[length]!=0 ||
1905 numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1906 ) {
1907 log_err("u_strFromJavaModifiedUTF8WithSub(invalid->fffd) failed - %s\n", u_errorName(errorCode));
1908 }
1909 memset(dest, 0xff, sizeof(dest));
1910 errorCode=U_ZERO_ERROR;
1911 length=numSubstitutions=-5;
1912 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1913 (const char *)invalid, UPRV_LENGTHOF(invalid),
1914 0x50000, &numSubstitutions, &errorCode);
1915 if( U_FAILURE(errorCode) || p!=dest ||
1916 length!=UPRV_LENGTHOF(invalidExpected50000) || 0!=memcmp(dest, invalidExpected50000, length) ||
1917 dest[length]!=0 ||
1918 numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD) /* not ...50000 */
1919 ) {
1920 log_err("u_strFromJavaModifiedUTF8WithSub(invalid->50000) failed - %s\n", u_errorName(errorCode));
1921 }
1922 memset(dest, 0xff, sizeof(dest));
1923 errorCode=U_ZERO_ERROR;
1924 length=numSubstitutions=-5;
1925 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1926 (const char *)invalid, UPRV_LENGTHOF(invalid),
1927 U_SENTINEL, &numSubstitutions, &errorCode);
1928 if(errorCode!=U_INVALID_CHAR_FOUND || dest[0]!=0xffff || numSubstitutions!=0) {
1929 log_err("u_strFromJavaModifiedUTF8WithSub(invalid->error) failed - %s\n", u_errorName(errorCode));
1930 }
1931 memset(dest, 0xff, sizeof(dest));
1932 errorCode=U_ZERO_ERROR;
1933 length=numSubstitutions=-5;
1934 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1935 (const char *)src, UPRV_LENGTHOF(src),
1936 U_SENTINEL, &numSubstitutions, &errorCode);
1937 if( errorCode!=U_INVALID_CHAR_FOUND ||
1938 length>=UPRV_LENGTHOF(expected) || dest[UPRV_LENGTHOF(expected)-1]!=0xffff ||
1939 numSubstitutions!=0
1940 ) {
1941 log_err("u_strFromJavaModifiedUTF8WithSub(normal->error) failed - %s\n", u_errorName(errorCode));
1942 }
1943
1944 /* illegal arguments */
1945 memset(dest, 0xff, sizeof(dest));
1946 errorCode=U_ZERO_ERROR;
1947 length=numSubstitutions=-5;
1948 p=u_strFromJavaModifiedUTF8WithSub(NULL, sizeof(dest), &length,
1949 (const char *)src, UPRV_LENGTHOF(src),
1950 0xfffd, &numSubstitutions, &errorCode);
1951 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1952 log_err("u_strFromJavaModifiedUTF8WithSub(dest=NULL) failed - %s\n", u_errorName(errorCode));
1953 }
1954 memset(dest, 0xff, sizeof(dest));
1955 errorCode=U_ZERO_ERROR;
1956 length=numSubstitutions=-5;
1957 p=u_strFromJavaModifiedUTF8WithSub(dest, -1, &length,
1958 (const char *)src, UPRV_LENGTHOF(src),
1959 0xfffd, &numSubstitutions, &errorCode);
1960 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1961 log_err("u_strFromJavaModifiedUTF8WithSub(destCapacity<0) failed - %s\n", u_errorName(errorCode));
1962 }
1963 memset(dest, 0xff, sizeof(dest));
1964 errorCode=U_ZERO_ERROR;
1965 length=numSubstitutions=-5;
1966 p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1967 NULL, UPRV_LENGTHOF(src),
1968 0xfffd, &numSubstitutions, &errorCode);
1969 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1970 log_err("u_strFromJavaModifiedUTF8WithSub(src=NULL) failed - %s\n", u_errorName(errorCode));
1971 }
1972 memset(dest, 0xff, sizeof(dest));
1973 errorCode=U_ZERO_ERROR;
1974 length=numSubstitutions=-5;
1975 p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1976 NULL, -1, 0xfffd, &numSubstitutions, &errorCode);
1977 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1978 log_err("u_strFromJavaModifiedUTF8WithSub(src=NULL, srcLength<0) failed - %s\n", u_errorName(errorCode));
1979 }
1980 memset(dest, 0xff, sizeof(dest));
1981 errorCode=U_ZERO_ERROR;
1982 length=numSubstitutions=-5;
1983 p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1984 (const char *)src, UPRV_LENGTHOF(src),
1985 0x110000, &numSubstitutions, &errorCode);
1986 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1987 log_err("u_strFromJavaModifiedUTF8WithSub(subchar=U_SENTINEL) failed - %s\n", u_errorName(errorCode));
1988 }
1989 memset(dest, 0xff, sizeof(dest));
1990 errorCode=U_ZERO_ERROR;
1991 length=numSubstitutions=-5;
1992 p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1993 (const char *)src, UPRV_LENGTHOF(src),
1994 0xdfff, &numSubstitutions, &errorCode);
1995 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1996 log_err("u_strFromJavaModifiedUTF8WithSub(subchar is surrogate) failed - %s\n", u_errorName(errorCode));
1997 }
1998 }
1999
2000 /* test that string transformation functions permit NULL source pointer when source length==0 */
2001 static void TestNullEmptySource() {
2002 char dest8[4]={ 3, 3, 3, 3 };
2003 UChar dest16[4]={ 3, 3, 3, 3 };
2004 UChar32 dest32[4]={ 3, 3, 3, 3 };
2005 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
2006 wchar_t destW[4]={ 3, 3, 3, 3 };
2007 #endif
2008
2009 int32_t length;
2010 UErrorCode errorCode;
2011
2012 /* u_strFromXyz() */
2013
2014 dest16[0]=3;
2015 length=3;
2016 errorCode=U_ZERO_ERROR;
2017 u_strFromUTF8(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2018 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2019 log_err("u_strFromUTF8(source=NULL, sourceLength=0) failed\n");
2020 }
2021
2022 dest16[0]=3;
2023 length=3;
2024 errorCode=U_ZERO_ERROR;
2025 u_strFromUTF8WithSub(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2026 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2027 log_err("u_strFromUTF8WithSub(source=NULL, sourceLength=0) failed\n");
2028 }
2029
2030 dest16[0]=3;
2031 length=3;
2032 errorCode=U_ZERO_ERROR;
2033 u_strFromUTF8Lenient(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2034 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2035 log_err("u_strFromUTF8Lenient(source=NULL, sourceLength=0) failed\n");
2036 }
2037
2038 dest16[0]=3;
2039 length=3;
2040 errorCode=U_ZERO_ERROR;
2041 u_strFromUTF32(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2042 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2043 log_err("u_strFromUTF32(source=NULL, sourceLength=0) failed\n");
2044 }
2045
2046 dest16[0]=3;
2047 length=3;
2048 errorCode=U_ZERO_ERROR;
2049 u_strFromUTF32WithSub(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2050 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2051 log_err("u_strFromUTF32WithSub(source=NULL, sourceLength=0) failed\n");
2052 }
2053
2054 dest16[0]=3;
2055 length=3;
2056 errorCode=U_ZERO_ERROR;
2057 u_strFromJavaModifiedUTF8WithSub(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2058 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2059 log_err("u_strFromJavaModifiedUTF8WithSub(source=NULL, sourceLength=0) failed\n");
2060 }
2061
2062 /* u_strToXyz() */
2063
2064 dest8[0]=3;
2065 length=3;
2066 errorCode=U_ZERO_ERROR;
2067 u_strToUTF8(dest8, UPRV_LENGTHOF(dest8), &length, NULL, 0, &errorCode);
2068 if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) {
2069 log_err("u_strToUTF8(source=NULL, sourceLength=0) failed\n");
2070 }
2071
2072 dest8[0]=3;
2073 length=3;
2074 errorCode=U_ZERO_ERROR;
2075 u_strToUTF8WithSub(dest8, UPRV_LENGTHOF(dest8), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2076 if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) {
2077 log_err("u_strToUTF8(source=NULL, sourceLength=0) failed\n");
2078 }
2079
2080 dest32[0]=3;
2081 length=3;
2082 errorCode=U_ZERO_ERROR;
2083 u_strToUTF32(dest32, UPRV_LENGTHOF(dest32), &length, NULL, 0, &errorCode);
2084 if(errorCode!=U_ZERO_ERROR || length!=0 || dest32[0]!=0 || dest32[1]!=3) {
2085 log_err("u_strToUTF32(source=NULL, sourceLength=0) failed\n");
2086 }
2087
2088 dest32[0]=3;
2089 length=3;
2090 errorCode=U_ZERO_ERROR;
2091 u_strToUTF32WithSub(dest32, UPRV_LENGTHOF(dest32), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2092 if(errorCode!=U_ZERO_ERROR || length!=0 || dest32[0]!=0 || dest32[1]!=3) {
2093 log_err("u_strToUTF32WithSub(source=NULL, sourceLength=0) failed\n");
2094 }
2095
2096 dest8[0]=3;
2097 length=3;
2098 errorCode=U_ZERO_ERROR;
2099 u_strToJavaModifiedUTF8(dest8, UPRV_LENGTHOF(dest8), &length, NULL, 0, &errorCode);
2100 if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) {
2101 log_err("u_strToJavaModifiedUTF8(source=NULL, sourceLength=0) failed\n");
2102 }
2103
2104 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
2105
2106 dest16[0]=3;
2107 length=3;
2108 errorCode=U_ZERO_ERROR;
2109 u_strFromWCS(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2110 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2111 log_err("u_strFromWCS(source=NULL, sourceLength=0) failed\n");
2112 }
2113
2114 destW[0]=3;
2115 length=3;
2116 errorCode=U_ZERO_ERROR;
2117 u_strToWCS(destW, UPRV_LENGTHOF(destW), &length, NULL, 0, &errorCode);
2118 if(errorCode!=U_ZERO_ERROR || length!=0 || destW[0]!=0 || destW[1]!=3) {
2119 log_err("u_strToWCS(source=NULL, sourceLength=0) failed\n");
2120 }
2121
2122 #endif
2123 }