]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /******************************************************************** |
2 | * COPYRIGHT: | |
4388f060 | 3 | * Copyright (c) 2001-2011, International Business Machines Corporation and |
b75a7d8f A |
4 | * others. All Rights Reserved. |
5 | ********************************************************************/ | |
6 | /******************************************************************************** | |
7 | * | |
8 | * File custrtrn.C | |
9 | * | |
10 | * Modification History: | |
11 | * Name Description | |
12 | * Ram String transformations test | |
13 | ********************************************************************************* | |
14 | */ | |
15 | /****************************************************************************/ | |
16 | ||
17 | ||
18 | #include <stdlib.h> | |
19 | #include <stdio.h> | |
729e4ab9 | 20 | #include <string.h> |
b75a7d8f A |
21 | #include "unicode/utypes.h" |
22 | #include "unicode/ustring.h" | |
374ca955 | 23 | #include "unicode/ures.h" |
b75a7d8f A |
24 | #include "ustr_imp.h" |
25 | #include "cintltst.h" | |
73c04bcf A |
26 | #include "cmemory.h" |
27 | #include "cstring.h" | |
374ca955 | 28 | #include "cwchar.h" |
b75a7d8f A |
29 | |
30 | #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | |
31 | ||
32 | void addUCharTransformTest(TestNode** root); | |
33 | ||
729e4ab9 A |
34 | static void Test_strToUTF32(void); |
35 | static void Test_strToUTF32_surrogates(void); | |
36 | static void Test_strFromUTF32(void); | |
37 | static void Test_strFromUTF32_surrogates(void); | |
b75a7d8f | 38 | static void Test_UChar_UTF8_API(void); |
46f4442e | 39 | static void Test_FromUTF8(void); |
73c04bcf | 40 | static void Test_FromUTF8Lenient(void); |
b75a7d8f | 41 | static void Test_UChar_WCHART_API(void); |
374ca955 A |
42 | static void Test_widestrs(void); |
43 | static void Test_WCHART_LongString(void); | |
729e4ab9 A |
44 | static void Test_strToJavaModifiedUTF8(void); |
45 | static void Test_strFromJavaModifiedUTF8(void); | |
46 | static void TestNullEmptySource(void); | |
b75a7d8f A |
47 | |
48 | void | |
49 | addUCharTransformTest(TestNode** root) | |
50 | { | |
729e4ab9 A |
51 | addTest(root, &Test_strToUTF32, "custrtrn/Test_strToUTF32"); |
52 | addTest(root, &Test_strToUTF32_surrogates, "custrtrn/Test_strToUTF32_surrogates"); | |
53 | addTest(root, &Test_strFromUTF32, "custrtrn/Test_strFromUTF32"); | |
54 | addTest(root, &Test_strFromUTF32_surrogates, "custrtrn/Test_strFromUTF32_surrogates"); | |
b75a7d8f | 55 | addTest(root, &Test_UChar_UTF8_API, "custrtrn/Test_UChar_UTF8_API"); |
46f4442e | 56 | addTest(root, &Test_FromUTF8, "custrtrn/Test_FromUTF8"); |
73c04bcf | 57 | addTest(root, &Test_FromUTF8Lenient, "custrtrn/Test_FromUTF8Lenient"); |
b75a7d8f | 58 | addTest(root, &Test_UChar_WCHART_API, "custrtrn/Test_UChar_WCHART_API"); |
374ca955 | 59 | addTest(root, &Test_widestrs, "custrtrn/Test_widestrs"); |
729e4ab9 | 60 | #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION |
374ca955 | 61 | addTest(root, &Test_WCHART_LongString, "custrtrn/Test_WCHART_LongString"); |
729e4ab9 A |
62 | #endif |
63 | addTest(root, &Test_strToJavaModifiedUTF8, "custrtrn/Test_strToJavaModifiedUTF8"); | |
64 | addTest(root, &Test_strFromJavaModifiedUTF8, "custrtrn/Test_strFromJavaModifiedUTF8"); | |
65 | addTest(root, &TestNullEmptySource, "custrtrn/TestNullEmptySource"); | |
b75a7d8f A |
66 | } |
67 | ||
68 | static const UChar32 src32[]={ | |
69 | 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, | |
70 | 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, | |
71 | 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, | |
72 | 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, | |
73 | 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, | |
74 | 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, | |
75 | 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, | |
76 | 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, | |
77 | 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, | |
78 | 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, | |
79 | 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, | |
80 | 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, | |
81 | 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, | |
82 | 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, | |
83 | 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, | |
84 | 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, | |
85 | 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, | |
86 | 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, | |
87 | 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A, | |
88 | /* test non-BMP code points */ | |
89 | 0x0002A699, | |
90 | 0x0002A69C, 0x0002A69D, 0x0002A69E, 0x0002A69F, 0x0002A6A0, 0x0002A6A5, 0x0002A6A6, 0x0002A6A7, 0x0002A6A8, 0x0002A6AB, | |
91 | 0x0002A6AC, 0x0002A6AD, 0x0002A6AE, 0x0002A6AF, 0x0002A6B0, 0x0002A6B1, 0x0002A6B3, 0x0002A6B5, 0x0002A6B6, 0x0002A6B7, | |
92 | 0x0002A6B8, 0x0002A6B9, 0x0002A6BA, 0x0002A6BB, 0x0002A6BC, 0x0002A6BD, 0x0002A6BE, 0x0002A6BF, 0x0002A6C0, 0x0002A6C1, | |
93 | 0x0002A6C2, 0x0002A6C3, 0x0002A6C4, 0x0002A6C8, 0x0002A6CA, 0x0002A6CB, 0x0002A6CD, 0x0002A6CE, 0x0002A6CF, 0x0002A6D0, | |
94 | 0x0002A6D1, 0x0002A6D2, 0x0002A6D3, 0x0002A6D4, 0x0002A6D5, | |
95 | ||
96 | 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, | |
97 | 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, | |
98 | 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, | |
99 | 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, | |
100 | 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,0x0000 | |
101 | }; | |
102 | ||
103 | static const UChar src16[] = { | |
104 | 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, | |
105 | 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, | |
106 | 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, | |
107 | 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, | |
108 | 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, | |
109 | 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, | |
110 | 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, | |
111 | 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, | |
112 | 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, | |
113 | 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, | |
114 | 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, | |
115 | 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, | |
116 | 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, | |
117 | 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, | |
118 | 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, | |
119 | 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, | |
120 | 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, | |
121 | 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, | |
122 | 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A, | |
123 | ||
124 | /* test non-BMP code points */ | |
125 | 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, | |
126 | 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, | |
127 | 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, | |
128 | 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, | |
129 | 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, | |
130 | 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, | |
131 | 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, | |
132 | 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, | |
133 | 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, | |
134 | 0xD869, 0xDED5, | |
135 | ||
136 | 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, | |
137 | 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, | |
138 | 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, | |
139 | 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, | |
140 | 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,0x0000 | |
141 | }; | |
142 | ||
143 | ||
729e4ab9 | 144 | static void Test_strToUTF32(void){ |
b75a7d8f | 145 | UErrorCode err = U_ZERO_ERROR; |
729e4ab9 A |
146 | UChar32 u32Target[400]; |
147 | int32_t u32DestLen; | |
b75a7d8f | 148 | int i= 0; |
729e4ab9 A |
149 | |
150 | /* first with length */ | |
151 | u32DestLen = -2; | |
152 | u_strToUTF32(u32Target, 0, &u32DestLen, src16, LENGTHOF(src16),&err); | |
153 | if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != LENGTHOF(src32)) { | |
154 | log_err("u_strToUTF32(preflight with length): " | |
155 | "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n", | |
156 | (long)u32DestLen, (long)LENGTHOF(src32), u_errorName(err)); | |
157 | return; | |
158 | } | |
159 | err = U_ZERO_ERROR; | |
160 | u32DestLen = -2; | |
161 | u_strToUTF32(u32Target, LENGTHOF(src32)+1, &u32DestLen, src16, LENGTHOF(src16),&err); | |
162 | if(err != U_ZERO_ERROR || u32DestLen != LENGTHOF(src32)) { | |
163 | log_err("u_strToUTF32(with length): " | |
164 | "length %ld != %ld and %s != U_ZERO_ERROR\n", | |
165 | (long)u32DestLen, (long)LENGTHOF(src32), u_errorName(err)); | |
166 | return; | |
167 | } | |
168 | /*for(i=0; i< u32DestLen; i++){ | |
169 | printf("0x%08X, ",uTarget[i]); | |
170 | if(i%10==0){ | |
171 | printf("\n"); | |
b75a7d8f | 172 | } |
729e4ab9 A |
173 | }*/ |
174 | for(i=0; i< LENGTHOF(src32); i++){ | |
175 | if(u32Target[i] != src32[i]){ | |
176 | log_verbose("u_strToUTF32(with length) failed expected: %04X got: %04X at index: %i \n", src32[i], u32Target[i],i); | |
b75a7d8f | 177 | } |
729e4ab9 A |
178 | } |
179 | if(u32Target[i] != 0){ | |
180 | log_verbose("u_strToUTF32(with length) failed expected: %04X got: %04X at index: %i \n", 0, u32Target[i],i); | |
181 | } | |
182 | ||
183 | /* now NUL-terminated */ | |
184 | u32DestLen = -2; | |
185 | u_strToUTF32(NULL,0, &u32DestLen, src16, -1,&err); | |
186 | if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != LENGTHOF(src32)-1) { | |
187 | log_err("u_strToUTF32(preflight with NUL-termination): " | |
188 | "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n", | |
189 | (long)u32DestLen, (long)LENGTHOF(src32)-1, u_errorName(err)); | |
190 | return; | |
191 | } | |
192 | err = U_ZERO_ERROR; | |
193 | u32DestLen = -2; | |
194 | u_strToUTF32(u32Target, LENGTHOF(src32), &u32DestLen, src16, -1,&err); | |
195 | if(err != U_ZERO_ERROR || u32DestLen != LENGTHOF(src32)-1) { | |
196 | log_err("u_strToUTF32(with NUL-termination): " | |
197 | "length %ld != %ld and %s != U_ZERO_ERROR\n", | |
198 | (long)u32DestLen, (long)LENGTHOF(src32)-1, u_errorName(err)); | |
199 | return; | |
200 | } | |
201 | ||
202 | for(i=0; i< LENGTHOF(src32); i++){ | |
203 | if(u32Target[i] != src32[i]){ | |
204 | log_verbose("u_strToUTF32(NUL-termination) failed expected: %04X got: %04X \n", src32[i], u32Target[i]); | |
b75a7d8f | 205 | } |
729e4ab9 A |
206 | } |
207 | } | |
b75a7d8f | 208 | |
729e4ab9 A |
209 | /* test unpaired surrogates */ |
210 | static void Test_strToUTF32_surrogates() { | |
211 | UErrorCode err = U_ZERO_ERROR; | |
212 | UChar32 u32Target[400]; | |
213 | int32_t len16, u32DestLen; | |
214 | int32_t numSubstitutions; | |
215 | int i; | |
216 | ||
217 | static const UChar surr16[] = { 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0 }; | |
218 | static const UChar32 expected[] = { 0x5a, 0x50000, 0x7a, 0 }; | |
219 | static const UChar32 expected_FFFD[] = { 0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0 }; | |
220 | static const UChar32 expected_12345[] = { 0x41, 0x12345, 0x61, 0x12345, 0x5a, 0x50000, 0x7a, 0 }; | |
221 | len16 = LENGTHOF(surr16); | |
222 | for(i = 0; i < 4; ++i) { | |
223 | err = U_ZERO_ERROR; | |
224 | u_strToUTF32(u32Target, 0, &u32DestLen, surr16+i, len16-i, &err); | |
225 | if(err != U_INVALID_CHAR_FOUND) { | |
226 | log_err("u_strToUTF32(preflight surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n", | |
227 | (long)i, u_errorName(err)); | |
228 | return; | |
229 | } | |
b75a7d8f | 230 | |
729e4ab9 A |
231 | err = U_ZERO_ERROR; |
232 | u_strToUTF32(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16+i, len16-i, &err); | |
233 | if(err != U_INVALID_CHAR_FOUND) { | |
234 | log_err("u_strToUTF32(surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n", | |
235 | (long)i, u_errorName(err)); | |
236 | return; | |
b75a7d8f | 237 | } |
729e4ab9 A |
238 | |
239 | err = U_ZERO_ERROR; | |
240 | u_strToUTF32(NULL, 0, &u32DestLen, surr16+i, -1, &err); | |
241 | if(err != U_INVALID_CHAR_FOUND) { | |
242 | log_err("u_strToUTF32(preflight surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n", | |
243 | (long)i, u_errorName(err)); | |
244 | return; | |
b75a7d8f | 245 | } |
729e4ab9 A |
246 | |
247 | err = U_ZERO_ERROR; | |
248 | u_strToUTF32(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16+i, -1, &err); | |
249 | if(err != U_INVALID_CHAR_FOUND) { | |
250 | log_err("u_strToUTF32(surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n", | |
251 | (long)i, u_errorName(err)); | |
252 | return; | |
b75a7d8f | 253 | } |
729e4ab9 | 254 | } |
b75a7d8f | 255 | |
729e4ab9 A |
256 | err = U_ZERO_ERROR; |
257 | u_strToUTF32(u32Target, 0, &u32DestLen, surr16+4, len16-4-1, &err); | |
258 | if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) { | |
259 | log_err("u_strToUTF32(preflight surr16+4) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", | |
260 | u_errorName(err)); | |
261 | return; | |
b75a7d8f | 262 | } |
b75a7d8f | 263 | |
729e4ab9 A |
264 | err = U_ZERO_ERROR; |
265 | u_strToUTF32(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16+4, len16-4-1, &err); | |
266 | if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) { | |
267 | log_err("u_strToUTF32(surr16+4) sets %s != U_ZERO_ERROR or does not produce the expected string\n", | |
268 | u_errorName(err)); | |
269 | return; | |
270 | } | |
271 | ||
272 | err = U_ZERO_ERROR; | |
273 | u_strToUTF32(NULL, 0, &u32DestLen, surr16+4, -1, &err); | |
274 | if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) { | |
275 | log_err("u_strToUTF32(preflight surr16+4/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", | |
276 | u_errorName(err)); | |
277 | return; | |
278 | } | |
279 | ||
280 | err = U_ZERO_ERROR; | |
281 | u_strToUTF32(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16+4, -1, &err); | |
282 | if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) { | |
283 | log_err("u_strToUTF32(surr16+4/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n", | |
284 | u_errorName(err)); | |
285 | return; | |
286 | } | |
287 | ||
288 | /* with substitution character */ | |
289 | numSubstitutions = -1; | |
290 | err = U_ZERO_ERROR; | |
291 | u_strToUTF32WithSub(u32Target, 0, &u32DestLen, surr16, len16-1, 0xfffd, &numSubstitutions, &err); | |
292 | if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 7 || numSubstitutions != 2) { | |
293 | log_err("u_strToUTF32WithSub(preflight surr16) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", | |
294 | u_errorName(err)); | |
295 | return; | |
296 | } | |
297 | ||
298 | err = U_ZERO_ERROR; | |
299 | u_strToUTF32WithSub(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16, len16-1, 0xfffd, &numSubstitutions, &err); | |
300 | if(err != U_ZERO_ERROR || u32DestLen != 7 || numSubstitutions != 2 || uprv_memcmp(u32Target, expected_FFFD, 8*4)) { | |
301 | log_err("u_strToUTF32WithSub(surr16) sets %s != U_ZERO_ERROR or does not produce the expected string\n", | |
302 | u_errorName(err)); | |
303 | return; | |
304 | } | |
305 | ||
306 | err = U_ZERO_ERROR; | |
307 | u_strToUTF32WithSub(NULL, 0, &u32DestLen, surr16, -1, 0x12345, &numSubstitutions, &err); | |
308 | if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 7 || numSubstitutions != 2) { | |
309 | log_err("u_strToUTF32WithSub(preflight surr16/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", | |
310 | u_errorName(err)); | |
311 | return; | |
312 | } | |
313 | ||
314 | err = U_ZERO_ERROR; | |
315 | u_strToUTF32WithSub(u32Target, LENGTHOF(u32Target), &u32DestLen, surr16, -1, 0x12345, &numSubstitutions, &err); | |
316 | if(err != U_ZERO_ERROR || u32DestLen != 7 || numSubstitutions != 2 || uprv_memcmp(u32Target, expected_12345, 8*4)) { | |
317 | log_err("u_strToUTF32WithSub(surr16/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n", | |
318 | u_errorName(err)); | |
319 | return; | |
320 | } | |
321 | } | |
322 | ||
323 | static void Test_strFromUTF32(void){ | |
324 | UErrorCode err = U_ZERO_ERROR; | |
325 | UChar uTarget[400]; | |
326 | int32_t uDestLen; | |
327 | int i= 0; | |
328 | ||
329 | /* first with length */ | |
330 | uDestLen = -2; | |
331 | u_strFromUTF32(uTarget,0,&uDestLen,src32,LENGTHOF(src32),&err); | |
332 | if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != LENGTHOF(src16)) { | |
333 | log_err("u_strFromUTF32(preflight with length): " | |
334 | "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n", | |
335 | (long)uDestLen, (long)LENGTHOF(src16), u_errorName(err)); | |
336 | return; | |
337 | } | |
338 | err = U_ZERO_ERROR; | |
339 | uDestLen = -2; | |
340 | u_strFromUTF32(uTarget, LENGTHOF(src16)+1,&uDestLen,src32,LENGTHOF(src32),&err); | |
341 | if(err != U_ZERO_ERROR || uDestLen != LENGTHOF(src16)) { | |
342 | log_err("u_strFromUTF32(with length): " | |
343 | "length %ld != %ld and %s != U_ZERO_ERROR\n", | |
344 | (long)uDestLen, (long)LENGTHOF(src16), u_errorName(err)); | |
345 | return; | |
346 | } | |
347 | /*for(i=0; i< uDestLen; i++){ | |
348 | printf("0x%04X, ",uTarget[i]); | |
349 | if(i%10==0){ | |
350 | printf("\n"); | |
b75a7d8f | 351 | } |
729e4ab9 A |
352 | }*/ |
353 | ||
354 | for(i=0; i< uDestLen; i++){ | |
355 | if(uTarget[i] != src16[i]){ | |
356 | log_verbose("u_strFromUTF32(with length) failed expected: %04X got: %04X at index: %i \n", src16[i] ,uTarget[i],i); | |
b75a7d8f | 357 | } |
729e4ab9 A |
358 | } |
359 | if(uTarget[i] != 0){ | |
360 | log_verbose("u_strFromUTF32(with length) failed expected: %04X got: %04X at index: %i \n", 0,uTarget[i],i); | |
361 | } | |
b75a7d8f | 362 | |
729e4ab9 A |
363 | /* now NUL-terminated */ |
364 | uDestLen = -2; | |
365 | u_strFromUTF32(NULL,0,&uDestLen,src32,-1,&err); | |
366 | if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != LENGTHOF(src16)-1) { | |
367 | log_err("u_strFromUTF32(preflight with NUL-termination): " | |
368 | "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n", | |
369 | (long)uDestLen, (long)LENGTHOF(src16)-1, u_errorName(err)); | |
370 | return; | |
371 | } | |
372 | err = U_ZERO_ERROR; | |
373 | uDestLen = -2; | |
374 | u_strFromUTF32(uTarget, LENGTHOF(src16),&uDestLen,src32,-1,&err); | |
375 | if(err != U_ZERO_ERROR || uDestLen != LENGTHOF(src16)-1) { | |
376 | log_err("u_strFromUTF32(with NUL-termination): " | |
377 | "length %ld != %ld and %s != U_ZERO_ERROR\n", | |
378 | (long)uDestLen, (long)LENGTHOF(src16)-1, u_errorName(err)); | |
379 | return; | |
380 | } | |
381 | ||
382 | for(i=0; i< uDestLen; i++){ | |
383 | if(uTarget[i] != src16[i]){ | |
384 | log_verbose("u_strFromUTF32(with NUL-termination) failed expected: %04X got: %04X \n", src16[i] ,uTarget[i]); | |
b75a7d8f | 385 | } |
729e4ab9 A |
386 | } |
387 | } | |
b75a7d8f | 388 | |
729e4ab9 A |
389 | /* test surrogate code points */ |
390 | static void Test_strFromUTF32_surrogates() { | |
391 | UErrorCode err = U_ZERO_ERROR; | |
392 | UChar uTarget[400]; | |
393 | int32_t len32, uDestLen; | |
394 | int32_t numSubstitutions; | |
395 | int i; | |
396 | ||
397 | static const UChar32 surr32[] = { 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a, 0 }; | |
398 | static const UChar expected[] = { 0x5a, 0xd900, 0xdc00, 0x7a, 0 }; | |
399 | static const UChar expected_FFFD[] = { 0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a, 0 }; | |
400 | static const UChar expected_12345[] = { 0x41, 0xd808, 0xdf45, 0x61, 0xd808, 0xdf45, 0xd808, 0xdf45, 0xd808, 0xdf45, | |
401 | 0x5a, 0xd900, 0xdc00, 0x7a, 0 }; | |
402 | len32 = LENGTHOF(surr32); | |
403 | for(i = 0; i < 6; ++i) { | |
404 | err = U_ZERO_ERROR; | |
405 | u_strFromUTF32(uTarget, 0, &uDestLen, surr32+i, len32-i, &err); | |
406 | if(err != U_INVALID_CHAR_FOUND) { | |
407 | log_err("u_strFromUTF32(preflight surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n", | |
408 | (long)i, u_errorName(err)); | |
409 | return; | |
410 | } | |
b75a7d8f | 411 | |
729e4ab9 A |
412 | err = U_ZERO_ERROR; |
413 | u_strFromUTF32(uTarget, LENGTHOF(uTarget), &uDestLen, surr32+i, len32-i, &err); | |
414 | if(err != U_INVALID_CHAR_FOUND) { | |
415 | log_err("u_strFromUTF32(surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n", | |
416 | (long)i, u_errorName(err)); | |
417 | return; | |
b75a7d8f | 418 | } |
729e4ab9 A |
419 | |
420 | err = U_ZERO_ERROR; | |
421 | u_strFromUTF32(NULL, 0, &uDestLen, surr32+i, -1, &err); | |
422 | if(err != U_INVALID_CHAR_FOUND) { | |
423 | log_err("u_strFromUTF32(preflight surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n", | |
424 | (long)i, u_errorName(err)); | |
425 | return; | |
b75a7d8f | 426 | } |
729e4ab9 A |
427 | |
428 | err = U_ZERO_ERROR; | |
429 | u_strFromUTF32(uTarget, LENGTHOF(uTarget), &uDestLen, surr32+i, -1, &err); | |
430 | if(err != U_INVALID_CHAR_FOUND) { | |
431 | log_err("u_strFromUTF32(surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n", | |
432 | (long)i, u_errorName(err)); | |
433 | return; | |
b75a7d8f | 434 | } |
729e4ab9 | 435 | } |
b75a7d8f | 436 | |
729e4ab9 A |
437 | err = U_ZERO_ERROR; |
438 | u_strFromUTF32(uTarget, 0, &uDestLen, surr32+6, len32-6-1, &err); | |
439 | if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) { | |
440 | log_err("u_strFromUTF32(preflight surr32+6) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", | |
441 | u_errorName(err)); | |
442 | return; | |
b75a7d8f | 443 | } |
b75a7d8f | 444 | |
729e4ab9 A |
445 | err = U_ZERO_ERROR; |
446 | u_strFromUTF32(uTarget, LENGTHOF(uTarget), &uDestLen, surr32+6, len32-6-1, &err); | |
447 | if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) { | |
448 | log_err("u_strFromUTF32(surr32+6) sets %s != U_ZERO_ERROR or does not produce the expected string\n", | |
449 | u_errorName(err)); | |
450 | return; | |
451 | } | |
452 | ||
453 | err = U_ZERO_ERROR; | |
454 | u_strFromUTF32(NULL, 0, &uDestLen, surr32+6, -1, &err); | |
455 | if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) { | |
456 | log_err("u_strFromUTF32(preflight surr32+6/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", | |
457 | u_errorName(err)); | |
458 | return; | |
459 | } | |
460 | ||
461 | err = U_ZERO_ERROR; | |
462 | u_strFromUTF32(uTarget, LENGTHOF(uTarget), &uDestLen, surr32+6, -1, &err); | |
463 | if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) { | |
464 | log_err("u_strFromUTF32(surr32+6/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n", | |
465 | u_errorName(err)); | |
466 | return; | |
467 | } | |
468 | ||
469 | /* with substitution character */ | |
470 | numSubstitutions = -1; | |
471 | err = U_ZERO_ERROR; | |
472 | u_strFromUTF32WithSub(uTarget, 0, &uDestLen, surr32, len32-1, 0xfffd, &numSubstitutions, &err); | |
473 | if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 10 || numSubstitutions != 4) { | |
474 | log_err("u_strFromUTF32WithSub(preflight surr32) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", | |
475 | u_errorName(err)); | |
476 | return; | |
477 | } | |
478 | ||
479 | err = U_ZERO_ERROR; | |
480 | u_strFromUTF32WithSub(uTarget, LENGTHOF(uTarget), &uDestLen, surr32, len32-1, 0xfffd, &numSubstitutions, &err); | |
481 | if(err != U_ZERO_ERROR || uDestLen != 10 || numSubstitutions != 4 || u_memcmp(uTarget, expected_FFFD, 11)) { | |
482 | log_err("u_strFromUTF32WithSub(surr32) sets %s != U_ZERO_ERROR or does not produce the expected string\n", | |
483 | u_errorName(err)); | |
484 | return; | |
485 | } | |
486 | ||
487 | err = U_ZERO_ERROR; | |
488 | u_strFromUTF32WithSub(NULL, 0, &uDestLen, surr32, -1, 0x12345, &numSubstitutions, &err); | |
489 | if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 14 || numSubstitutions != 4) { | |
490 | log_err("u_strFromUTF32WithSub(preflight surr32/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n", | |
491 | u_errorName(err)); | |
492 | return; | |
493 | } | |
494 | ||
495 | err = U_ZERO_ERROR; | |
496 | u_strFromUTF32WithSub(uTarget, LENGTHOF(uTarget), &uDestLen, surr32, -1, 0x12345, &numSubstitutions, &err); | |
497 | if(err != U_ZERO_ERROR || uDestLen != 14 || numSubstitutions != 4 || u_memcmp(uTarget, expected_12345, 15)) { | |
498 | log_err("u_strFromUTF32WithSub(surr32/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n", | |
499 | u_errorName(err)); | |
500 | return; | |
501 | } | |
502 | } | |
b75a7d8f A |
503 | |
504 | static void Test_UChar_UTF8_API(void){ | |
505 | ||
506 | UErrorCode err = U_ZERO_ERROR; | |
507 | UChar uTemp[1]; | |
508 | char u8Temp[1]; | |
509 | UChar* uTarget=uTemp; | |
510 | const char* u8Src; | |
511 | int32_t u8SrcLen = 0; | |
512 | int32_t uTargetLength = 0; | |
513 | int32_t uDestLen=0; | |
514 | const UChar* uSrc = src16; | |
515 | int32_t uSrcLen = sizeof(src16)/2; | |
516 | char* u8Target = u8Temp; | |
517 | int32_t u8TargetLength =0; | |
518 | int32_t u8DestLen =0; | |
519 | UBool failed = FALSE; | |
520 | int i= 0; | |
73c04bcf A |
521 | int32_t numSubstitutions; |
522 | ||
b75a7d8f A |
523 | { |
524 | /* preflight */ | |
73c04bcf | 525 | u8Temp[0] = 0x12; |
b75a7d8f | 526 | u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err); |
73c04bcf | 527 | if(err == U_BUFFER_OVERFLOW_ERROR && u8Temp[0] == 0x12){ |
b75a7d8f A |
528 | err = U_ZERO_ERROR; |
529 | u8Target = (char*) malloc (sizeof(uint8_t) * (u8DestLen+1)); | |
530 | u8TargetLength = u8DestLen; | |
73c04bcf A |
531 | |
532 | u8Target[u8TargetLength] = (char)0xfe; | |
533 | u8DestLen = -1; | |
b75a7d8f | 534 | u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err); |
73c04bcf | 535 | if(U_FAILURE(err) || u8DestLen != u8TargetLength || u8Target[u8TargetLength] != (char)0xfe){ |
b75a7d8f A |
536 | log_err("u_strToUTF8 failed after preflight. Error: %s\n", u_errorName(err)); |
537 | return; | |
538 | } | |
539 | ||
540 | } | |
541 | else { | |
542 | log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR"); | |
543 | } | |
544 | failed = FALSE; | |
545 | /*for(i=0; i< u8DestLen; i++){ | |
546 | printf("0x%04X, ",u8Target[i]); | |
547 | if(i%10==0){ | |
548 | printf("\n"); | |
549 | } | |
550 | }*/ | |
551 | /*for(i=0; i< u8DestLen; i++){ | |
552 | if(u8Target[i] != src8[i]){ | |
729e4ab9 | 553 | log_verbose("u_strToUTF8() failed expected: %04X got: %04X \n", src8[i], u8Target[i]); |
b75a7d8f A |
554 | failed =TRUE; |
555 | } | |
556 | } | |
557 | if(failed){ | |
558 | log_err("u_strToUTF8() failed \n"); | |
559 | }*/ | |
560 | u8Src = u8Target; | |
561 | u8SrcLen = u8DestLen; | |
562 | ||
563 | /* preflight */ | |
73c04bcf | 564 | uTemp[0] = 0x1234; |
b75a7d8f | 565 | u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err); |
73c04bcf | 566 | if(err == U_BUFFER_OVERFLOW_ERROR && uTemp[0] == 0x1234){ |
b75a7d8f A |
567 | err = U_ZERO_ERROR; |
568 | uTarget = (UChar*) malloc( sizeof(UChar) * (uDestLen+1)); | |
569 | uTargetLength = uDestLen; | |
570 | ||
73c04bcf A |
571 | uTarget[uTargetLength] = 0xfff0; |
572 | uDestLen = -1; | |
b75a7d8f A |
573 | u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err); |
574 | } | |
575 | else { | |
73c04bcf | 576 | log_err("error: u_strFromUTF8(preflight) should have gotten U_BUFFER_OVERFLOW_ERROR\n"); |
b75a7d8f A |
577 | } |
578 | /*for(i=0; i< uDestLen; i++){ | |
579 | printf("0x%04X, ",uTarget[i]); | |
580 | if(i%10==0){ | |
581 | printf("\n"); | |
582 | } | |
583 | }*/ | |
73c04bcf A |
584 | |
585 | if(U_FAILURE(err) || uDestLen != uTargetLength || uTarget[uTargetLength] != 0xfff0) { | |
586 | failed = TRUE; | |
587 | } | |
b75a7d8f A |
588 | for(i=0; i< uSrcLen; i++){ |
589 | if(uTarget[i] != src16[i]){ | |
590 | log_verbose("u_strFromUTF8() failed expected: \\u%04X got: \\u%04X at index: %i \n", src16[i] ,uTarget[i],i); | |
591 | failed =TRUE; | |
592 | } | |
593 | } | |
594 | if(failed){ | |
73c04bcf | 595 | log_err("error: u_strFromUTF8(after preflighting) failed\n"); |
b75a7d8f A |
596 | } |
597 | ||
598 | free(u8Target); | |
599 | free(uTarget); | |
600 | } | |
601 | { | |
602 | u8SrcLen = -1; | |
603 | uTargetLength = 0; | |
604 | uSrcLen =-1; | |
605 | u8TargetLength=0; | |
606 | failed = FALSE; | |
607 | /* preflight */ | |
608 | u_strToUTF8(NULL,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err); | |
609 | if(err == U_BUFFER_OVERFLOW_ERROR){ | |
610 | err = U_ZERO_ERROR; | |
611 | u8Target = (char*) malloc (sizeof(uint8_t) * (u8DestLen+1)); | |
612 | u8TargetLength = u8DestLen; | |
613 | ||
614 | u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err); | |
615 | ||
616 | } | |
617 | else { | |
618 | log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR"); | |
619 | } | |
620 | failed = FALSE; | |
621 | /*for(i=0; i< u8DestLen; i++){ | |
622 | printf("0x%04X, ",u8Target[i]); | |
623 | if(i%10==0){ | |
624 | printf("\n"); | |
625 | } | |
626 | }*/ | |
627 | /*for(i=0; i< u8DestLen; i++){ | |
628 | if(u8Target[i] != src8[i]){ | |
729e4ab9 | 629 | log_verbose("u_strToUTF8() failed expected: %04X got: %04X \n", src8[i], u8Target[i]); |
b75a7d8f A |
630 | failed =TRUE; |
631 | } | |
632 | } | |
633 | if(failed){ | |
634 | log_err("u_strToUTF8() failed \n"); | |
635 | }*/ | |
636 | u8Src = u8Target; | |
637 | u8SrcLen = u8DestLen; | |
638 | ||
639 | /* preflight */ | |
640 | u_strFromUTF8(NULL,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err); | |
641 | if(err == U_BUFFER_OVERFLOW_ERROR){ | |
642 | err = U_ZERO_ERROR; | |
643 | uTarget = (UChar*) malloc( sizeof(UChar) * (uDestLen+1)); | |
644 | uTargetLength = uDestLen; | |
645 | ||
646 | u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err); | |
647 | } | |
648 | else { | |
649 | log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR"); | |
650 | } | |
651 | /*for(i=0; i< uDestLen; i++){ | |
652 | printf("0x%04X, ",uTarget[i]); | |
653 | if(i%10==0){ | |
654 | printf("\n"); | |
655 | } | |
656 | }*/ | |
657 | ||
658 | for(i=0; i< uSrcLen; i++){ | |
659 | if(uTarget[i] != src16[i]){ | |
660 | log_verbose("u_strFromUTF8() failed expected: \\u%04X got: \\u%04X at index: %i \n", src16[i] ,uTarget[i],i); | |
661 | failed =TRUE; | |
662 | } | |
663 | } | |
664 | if(failed){ | |
665 | log_err("u_strToUTF8() failed \n"); | |
666 | } | |
667 | ||
668 | free(u8Target); | |
669 | free(uTarget); | |
670 | } | |
671 | ||
672 | /* test UTF-8 with single surrogates - illegal in Unicode 3.2 */ | |
673 | { | |
674 | static const UChar | |
675 | withLead16[]={ 0x1800, 0xd89a, 0x0061 }, | |
73c04bcf A |
676 | withTrail16[]={ 0x1800, 0xdcba, 0x0061, 0 }, |
677 | withTrail16SubFFFD[]={ 0x1800, 0xfffd, 0x0061, 0 }, /* sub==U+FFFD */ | |
678 | withTrail16Sub50005[]={ 0x1800, 0xd900, 0xdc05, 0x0061, 0 }; /* sub==U+50005 */ | |
b75a7d8f A |
679 | static const uint8_t |
680 | withLead8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xa2, 0x9a, 0x61 }, | |
73c04bcf A |
681 | withTrail8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xb2, 0xba, 0x61, 0 }, |
682 | withTrail8Sub1A[]={ 0xe1, 0xa0, 0x80, 0x1a, 0x61, 0 }, /* sub==U+001A */ | |
683 | withTrail8SubFFFD[]={ 0xe1, 0xa0, 0x80, 0xef, 0xbf, 0xbd, 0x61, 0 }; /* sub==U+FFFD */ | |
b75a7d8f A |
684 | UChar out16[10]; |
685 | char out8[10]; | |
686 | ||
687 | if( | |
688 | (err=U_ZERO_ERROR, u_strToUTF8(out8, LENGTHOF(out8), NULL, withLead16, LENGTHOF(withLead16), &err), err!=U_INVALID_CHAR_FOUND) || | |
689 | (err=U_ZERO_ERROR, u_strToUTF8(out8, LENGTHOF(out8), NULL, withTrail16, -1, &err), err!=U_INVALID_CHAR_FOUND) || | |
690 | (err=U_ZERO_ERROR, u_strFromUTF8(out16, LENGTHOF(out16), NULL, (const char *)withLead8, LENGTHOF(withLead8), &err), err!=U_INVALID_CHAR_FOUND) || | |
691 | (err=U_ZERO_ERROR, u_strFromUTF8(out16, LENGTHOF(out16), NULL, (const char *)withTrail8, -1, &err), err!=U_INVALID_CHAR_FOUND) | |
692 | ) { | |
693 | log_err("error: u_strTo/FromUTF8(string with single surrogate) fails to report error\n"); | |
694 | } | |
73c04bcf A |
695 | |
696 | /* test error handling with substitution characters */ | |
697 | ||
698 | /* from UTF-8 with length */ | |
699 | err=U_ZERO_ERROR; | |
700 | numSubstitutions=-1; | |
701 | out16[0]=0x55aa; | |
702 | uDestLen=0; | |
703 | u_strFromUTF8WithSub(out16, LENGTHOF(out16), &uDestLen, | |
704 | (const char *)withTrail8, uprv_strlen((const char *)withTrail8), | |
705 | 0x50005, &numSubstitutions, | |
706 | &err); | |
707 | if(U_FAILURE(err) || uDestLen!=u_strlen(withTrail16Sub50005) || | |
708 | 0!=u_memcmp(withTrail16Sub50005, out16, uDestLen+1) || | |
709 | numSubstitutions!=1) { | |
710 | log_err("error: u_strFromUTF8WithSub(length) failed\n"); | |
711 | } | |
712 | ||
713 | /* from UTF-8 with NUL termination */ | |
714 | err=U_ZERO_ERROR; | |
715 | numSubstitutions=-1; | |
716 | out16[0]=0x55aa; | |
717 | uDestLen=0; | |
718 | u_strFromUTF8WithSub(out16, LENGTHOF(out16), &uDestLen, | |
719 | (const char *)withTrail8, -1, | |
720 | 0xfffd, &numSubstitutions, | |
721 | &err); | |
722 | if(U_FAILURE(err) || uDestLen!=u_strlen(withTrail16SubFFFD) || | |
723 | 0!=u_memcmp(withTrail16SubFFFD, out16, uDestLen+1) || | |
724 | numSubstitutions!=1) { | |
725 | log_err("error: u_strFromUTF8WithSub(NUL termination) failed\n"); | |
726 | } | |
727 | ||
728 | /* preflight from UTF-8 with NUL termination */ | |
729 | err=U_ZERO_ERROR; | |
730 | numSubstitutions=-1; | |
731 | out16[0]=0x55aa; | |
732 | uDestLen=0; | |
733 | u_strFromUTF8WithSub(out16, 1, &uDestLen, | |
734 | (const char *)withTrail8, -1, | |
735 | 0x50005, &numSubstitutions, | |
736 | &err); | |
737 | if(err!=U_BUFFER_OVERFLOW_ERROR || uDestLen!=u_strlen(withTrail16Sub50005) || numSubstitutions!=1) { | |
738 | log_err("error: u_strFromUTF8WithSub(preflight/NUL termination) failed\n"); | |
739 | } | |
740 | ||
741 | /* to UTF-8 with length */ | |
742 | err=U_ZERO_ERROR; | |
743 | numSubstitutions=-1; | |
744 | out8[0]=(char)0xf5; | |
745 | u8DestLen=0; | |
746 | u_strToUTF8WithSub(out8, LENGTHOF(out8), &u8DestLen, | |
747 | withTrail16, u_strlen(withTrail16), | |
748 | 0xfffd, &numSubstitutions, | |
749 | &err); | |
750 | if(U_FAILURE(err) || u8DestLen!=uprv_strlen((const char *)withTrail8SubFFFD) || | |
751 | 0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen+1) || | |
752 | numSubstitutions!=1) { | |
753 | log_err("error: u_strToUTF8WithSub(length) failed\n"); | |
754 | } | |
755 | ||
756 | /* to UTF-8 with NUL termination */ | |
757 | err=U_ZERO_ERROR; | |
758 | numSubstitutions=-1; | |
759 | out8[0]=(char)0xf5; | |
760 | u8DestLen=0; | |
761 | u_strToUTF8WithSub(out8, LENGTHOF(out8), &u8DestLen, | |
762 | withTrail16, -1, | |
763 | 0x1a, &numSubstitutions, | |
764 | &err); | |
765 | if(U_FAILURE(err) || u8DestLen!=uprv_strlen((const char *)withTrail8Sub1A) || | |
766 | 0!=uprv_memcmp((const char *)withTrail8Sub1A, out8, u8DestLen+1) || | |
767 | numSubstitutions!=1) { | |
768 | log_err("error: u_strToUTF8WithSub(NUL termination) failed\n"); | |
769 | } | |
770 | ||
771 | /* preflight to UTF-8 with NUL termination */ | |
772 | err=U_ZERO_ERROR; | |
773 | numSubstitutions=-1; | |
774 | out8[0]=(char)0xf5; | |
775 | u8DestLen=0; | |
776 | u_strToUTF8WithSub(out8, 1, &u8DestLen, | |
777 | withTrail16, -1, | |
778 | 0xfffd, &numSubstitutions, | |
779 | &err); | |
780 | if(err!=U_BUFFER_OVERFLOW_ERROR || u8DestLen!=uprv_strlen((const char *)withTrail8SubFFFD) || | |
781 | numSubstitutions!=1) { | |
782 | log_err("error: u_strToUTF8WithSub(preflight/NUL termination) failed\n"); | |
783 | } | |
784 | ||
785 | /* test that numSubstitutions==0 if there are no substitutions */ | |
786 | ||
787 | /* from UTF-8 with length (just first 3 bytes which are valid) */ | |
788 | err=U_ZERO_ERROR; | |
789 | numSubstitutions=-1; | |
790 | out16[0]=0x55aa; | |
791 | uDestLen=0; | |
792 | u_strFromUTF8WithSub(out16, LENGTHOF(out16), &uDestLen, | |
793 | (const char *)withTrail8, 3, | |
794 | 0x50005, &numSubstitutions, | |
795 | &err); | |
796 | if(U_FAILURE(err) || uDestLen!=1 || | |
797 | 0!=u_memcmp(withTrail16Sub50005, out16, uDestLen) || | |
798 | numSubstitutions!=0) { | |
799 | log_err("error: u_strFromUTF8WithSub(no subs) failed\n"); | |
800 | } | |
801 | ||
802 | /* to UTF-8 with length (just first UChar which is valid) */ | |
803 | err=U_ZERO_ERROR; | |
804 | numSubstitutions=-1; | |
805 | out8[0]=(char)0xf5; | |
806 | u8DestLen=0; | |
807 | u_strToUTF8WithSub(out8, LENGTHOF(out8), &u8DestLen, | |
808 | withTrail16, 1, | |
809 | 0xfffd, &numSubstitutions, | |
810 | &err); | |
811 | if(U_FAILURE(err) || u8DestLen!=3 || | |
812 | 0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen) || | |
813 | numSubstitutions!=0) { | |
814 | log_err("error: u_strToUTF8WithSub(no subs) failed\n"); | |
815 | } | |
816 | ||
817 | /* test that numSubstitutions==0 if subchar==U_SENTINEL (no subchar) */ | |
818 | ||
819 | /* from UTF-8 with length (just first 3 bytes which are valid) */ | |
820 | err=U_ZERO_ERROR; | |
821 | numSubstitutions=-1; | |
822 | out16[0]=0x55aa; | |
823 | uDestLen=0; | |
824 | u_strFromUTF8WithSub(out16, LENGTHOF(out16), &uDestLen, | |
825 | (const char *)withTrail8, 3, | |
826 | U_SENTINEL, &numSubstitutions, | |
827 | &err); | |
828 | if(U_FAILURE(err) || uDestLen!=1 || | |
829 | 0!=u_memcmp(withTrail16Sub50005, out16, uDestLen) || | |
830 | numSubstitutions!=0) { | |
831 | log_err("error: u_strFromUTF8WithSub(no subchar) failed\n"); | |
832 | } | |
833 | ||
834 | /* to UTF-8 with length (just first UChar which is valid) */ | |
835 | err=U_ZERO_ERROR; | |
836 | numSubstitutions=-1; | |
837 | out8[0]=(char)0xf5; | |
838 | u8DestLen=0; | |
839 | u_strToUTF8WithSub(out8, LENGTHOF(out8), &u8DestLen, | |
840 | withTrail16, 1, | |
841 | U_SENTINEL, &numSubstitutions, | |
842 | &err); | |
843 | if(U_FAILURE(err) || u8DestLen!=3 || | |
844 | 0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen) || | |
845 | numSubstitutions!=0) { | |
846 | log_err("error: u_strToUTF8WithSub(no subchar) failed\n"); | |
847 | } | |
848 | } | |
849 | } | |
850 | ||
851 | /* compare if two strings are equal, but match 0xfffd in the second string with anything in the first */ | |
852 | static UBool | |
853 | equalAnyFFFD(const UChar *s, const UChar *t, int32_t length) { | |
854 | UChar c1, c2; | |
855 | ||
856 | while(length>0) { | |
857 | c1=*s++; | |
858 | c2=*t++; | |
859 | if(c1!=c2 && c2!=0xfffd) { | |
860 | return FALSE; | |
861 | } | |
862 | --length; | |
b75a7d8f | 863 | } |
73c04bcf | 864 | return TRUE; |
b75a7d8f | 865 | } |
73c04bcf | 866 | |
46f4442e A |
867 | /* test u_strFromUTF8Lenient() */ |
868 | static void | |
869 | Test_FromUTF8(void) { | |
870 | /* | |
871 | * Test case from icu-support list 20071130 "u_strFromUTF8() returns U_INVALID_CHAR_FOUND(10)" | |
872 | */ | |
873 | static const uint8_t bytes[]={ 0xe0, 0xa5, 0x9c, 0 }; | |
874 | UChar dest[64]; | |
875 | UChar *destPointer; | |
876 | int32_t destLength; | |
877 | UErrorCode errorCode; | |
878 | ||
879 | /* 3 bytes input, one UChar output (U+095C) */ | |
880 | errorCode=U_ZERO_ERROR; | |
881 | destLength=-99; | |
882 | destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, 3, &errorCode); | |
883 | if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=1) { | |
884 | log_err("error: u_strFromUTF8(preflight srcLength=3) fails: destLength=%ld - %s\n", | |
885 | (long)destLength, u_errorName(errorCode)); | |
886 | } | |
887 | ||
888 | /* 4 bytes input, two UChars output (U+095C U+0000) */ | |
889 | errorCode=U_ZERO_ERROR; | |
890 | destLength=-99; | |
891 | destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, 4, &errorCode); | |
892 | if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=2) { | |
893 | log_err("error: u_strFromUTF8(preflight srcLength=4) fails: destLength=%ld - %s\n", | |
894 | (long)destLength, u_errorName(errorCode)); | |
895 | } | |
896 | ||
897 | /* NUL-terminated 3 bytes input, one UChar output (U+095C) */ | |
898 | errorCode=U_ZERO_ERROR; | |
899 | destLength=-99; | |
900 | destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, -1, &errorCode); | |
901 | if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=1) { | |
902 | log_err("error: u_strFromUTF8(preflight srcLength=-1) fails: destLength=%ld - %s\n", | |
903 | (long)destLength, u_errorName(errorCode)); | |
904 | } | |
905 | ||
906 | /* 3 bytes input, one UChar output (U+095C), transform not just preflight */ | |
907 | errorCode=U_ZERO_ERROR; | |
908 | dest[0]=dest[1]=99; | |
909 | destLength=-99; | |
910 | destPointer=u_strFromUTF8(dest, LENGTHOF(dest), &destLength, (const char *)bytes, 3, &errorCode); | |
911 | if(U_FAILURE(errorCode) || destPointer!=dest || destLength!=1 || dest[0]!=0x95c || dest[1]!=0) { | |
912 | log_err("error: u_strFromUTF8(transform srcLength=3) fails: destLength=%ld - %s\n", | |
913 | (long)destLength, u_errorName(errorCode)); | |
914 | } | |
915 | } | |
916 | ||
73c04bcf A |
917 | /* test u_strFromUTF8Lenient() */ |
918 | static void | |
919 | Test_FromUTF8Lenient(void) { | |
920 | /* | |
921 | * Multiple input strings, each NUL-terminated. | |
922 | * Terminate with a string starting with 0xff. | |
923 | */ | |
924 | static const uint8_t bytes[]={ | |
925 | /* well-formed UTF-8 */ | |
926 | 0x61, 0xc3, 0x9f, 0xe0, 0xa0, 0x80, 0xf0, 0xa0, 0x80, 0x80, | |
927 | 0x62, 0xc3, 0xa0, 0xe0, 0xa0, 0x81, 0xf0, 0xa0, 0x80, 0x81, 0, | |
928 | ||
929 | /* various malformed sequences */ | |
930 | 0xc3, 0xc3, 0x9f, 0xc3, 0xa0, 0xe0, 0x80, 0x8a, 0xf0, 0x41, 0x42, 0x43, 0, | |
931 | ||
932 | /* truncated input */ | |
933 | 0xc3, 0, | |
934 | 0xe0, 0, | |
935 | 0xe0, 0xa0, 0, | |
936 | 0xf0, 0, | |
937 | 0xf0, 0x90, 0, | |
938 | 0xf0, 0x90, 0x80, 0, | |
939 | ||
46f4442e A |
940 | /* non-ASCII characters in the last few bytes */ |
941 | 0x61, 0xc3, 0x9f, 0xe0, 0xa0, 0x80, 0, | |
942 | 0x61, 0xe0, 0xa0, 0x80, 0xc3, 0x9f, 0, | |
943 | ||
73c04bcf A |
944 | /* empty string */ |
945 | 0, | |
946 | ||
947 | /* finish */ | |
948 | 0xff, 0 | |
949 | }; | |
950 | ||
951 | /* Multiple output strings, each NUL-terminated. 0xfffd matches anything. */ | |
952 | static const UChar uchars[]={ | |
953 | 0x61, 0xdf, 0x800, 0xd840, 0xdc00, | |
954 | 0x62, 0xe0, 0x801, 0xd840, 0xdc01, 0, | |
955 | ||
956 | 0xfffd, 0x9f, 0xe0, 0xa, 0xfffd, 0xfffd, 0, | |
957 | ||
958 | 0xfffd, 0, | |
959 | 0xfffd, 0, | |
960 | 0xfffd, 0, | |
961 | 0xfffd, 0, | |
962 | 0xfffd, 0, | |
963 | 0xfffd, 0, | |
964 | ||
46f4442e A |
965 | 0x61, 0xdf, 0x800, 0, |
966 | 0x61, 0x800, 0xdf, 0, | |
967 | ||
73c04bcf A |
968 | 0, |
969 | ||
970 | 0 | |
971 | }; | |
972 | ||
973 | UChar dest[64]; | |
974 | const char *pb; | |
975 | const UChar *pu, *pDest; | |
976 | int32_t srcLength, destLength0, destLength; | |
977 | int number; | |
978 | UErrorCode errorCode; | |
979 | ||
980 | /* verify checking for some illegal arguments */ | |
981 | dest[0]=0x1234; | |
982 | destLength=-1; | |
983 | errorCode=U_ZERO_ERROR; | |
984 | pDest=u_strFromUTF8Lenient(dest, 1, &destLength, NULL, -1, &errorCode); | |
985 | if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0x1234) { | |
986 | log_err("u_strFromUTF8Lenient(src=NULL) failed\n"); | |
987 | } | |
988 | ||
989 | dest[0]=0x1234; | |
990 | destLength=-1; | |
991 | errorCode=U_ZERO_ERROR; | |
992 | pDest=u_strFromUTF8Lenient(NULL, 1, &destLength, (const char *)bytes, -1, &errorCode); | |
993 | if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { | |
994 | log_err("u_strFromUTF8Lenient(dest=NULL[1]) failed\n"); | |
995 | } | |
996 | ||
997 | dest[0]=0x1234; | |
998 | destLength=-1; | |
999 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |
1000 | pDest=u_strFromUTF8Lenient(dest, 1, &destLength, (const char *)bytes, -1, &errorCode); | |
1001 | if(errorCode!=U_MEMORY_ALLOCATION_ERROR || dest[0]!=0x1234) { | |
1002 | log_err("u_strFromUTF8Lenient(U_MEMORY_ALLOCATION_ERROR) failed\n"); | |
1003 | } | |
1004 | ||
1005 | dest[0]=0x1234; | |
1006 | destLength=-1; | |
1007 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |
1008 | pDest=u_strFromUTF8Lenient(dest, 1, &destLength, (const char *)bytes, -1, NULL); | |
1009 | if(dest[0]!=0x1234) { | |
1010 | log_err("u_strFromUTF8Lenient(pErrorCode=NULL) failed\n"); | |
1011 | } | |
1012 | ||
1013 | /* test normal behavior */ | |
1014 | number=0; /* string number for log_err() */ | |
1015 | ||
1016 | for(pb=(const char *)bytes, pu=uchars; | |
1017 | *pb!=(char)0xff; | |
1018 | pb+=srcLength+1, pu+=destLength0+1, ++number | |
1019 | ) { | |
1020 | srcLength=uprv_strlen(pb); | |
1021 | destLength0=u_strlen(pu); | |
1022 | ||
1023 | /* preflighting with NUL-termination */ | |
1024 | dest[0]=0x1234; | |
1025 | destLength=-1; | |
1026 | errorCode=U_ZERO_ERROR; | |
1027 | pDest=u_strFromUTF8Lenient(NULL, 0, &destLength, pb, -1, &errorCode); | |
1028 | if (errorCode!= (destLength0==0 ? U_STRING_NOT_TERMINATED_WARNING : U_BUFFER_OVERFLOW_ERROR) || | |
1029 | pDest!=NULL || dest[0]!=0x1234 || destLength!=destLength0 | |
1030 | ) { | |
1031 | log_err("u_strFromUTF8Lenient(%d preflighting with NUL-termination) failed\n", number); | |
1032 | } | |
1033 | ||
1034 | /* preflighting/some capacity with NUL-termination */ | |
1035 | if(srcLength>0) { | |
1036 | dest[destLength0-1]=0x1234; | |
1037 | destLength=-1; | |
1038 | errorCode=U_ZERO_ERROR; | |
1039 | pDest=u_strFromUTF8Lenient(dest, destLength0-1, &destLength, pb, -1, &errorCode); | |
1040 | if (errorCode!=U_BUFFER_OVERFLOW_ERROR || | |
1041 | dest[destLength0-1]!=0x1234 || destLength!=destLength0 | |
1042 | ) { | |
1043 | log_err("u_strFromUTF8Lenient(%d preflighting/some capacity with NUL-termination) failed\n", number); | |
1044 | } | |
1045 | } | |
1046 | ||
1047 | /* conversion with NUL-termination, much capacity */ | |
1048 | dest[0]=dest[destLength0]=0x1234; | |
1049 | destLength=-1; | |
1050 | errorCode=U_ZERO_ERROR; | |
1051 | pDest=u_strFromUTF8Lenient(dest, LENGTHOF(dest), &destLength, pb, -1, &errorCode); | |
1052 | if (errorCode!=U_ZERO_ERROR || | |
1053 | pDest!=dest || dest[destLength0]!=0 || | |
1054 | destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength) | |
1055 | ) { | |
1056 | log_err("u_strFromUTF8Lenient(%d conversion with NUL-termination, much capacity) failed\n", number); | |
1057 | } | |
1058 | ||
1059 | /* conversion with NUL-termination, exact capacity */ | |
1060 | dest[0]=dest[destLength0]=0x1234; | |
1061 | destLength=-1; | |
1062 | errorCode=U_ZERO_ERROR; | |
1063 | pDest=u_strFromUTF8Lenient(dest, destLength0, &destLength, pb, -1, &errorCode); | |
1064 | if (errorCode!=U_STRING_NOT_TERMINATED_WARNING || | |
1065 | pDest!=dest || dest[destLength0]!=0x1234 || | |
1066 | destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength) | |
1067 | ) { | |
1068 | log_err("u_strFromUTF8Lenient(%d conversion with NUL-termination, exact capacity) failed\n", number); | |
1069 | } | |
1070 | ||
1071 | /* preflighting with length */ | |
1072 | dest[0]=0x1234; | |
1073 | destLength=-1; | |
1074 | errorCode=U_ZERO_ERROR; | |
1075 | pDest=u_strFromUTF8Lenient(NULL, 0, &destLength, pb, srcLength, &errorCode); | |
1076 | if (errorCode!= (destLength0==0 ? U_STRING_NOT_TERMINATED_WARNING : U_BUFFER_OVERFLOW_ERROR) || | |
1077 | pDest!=NULL || dest[0]!=0x1234 || destLength!=srcLength | |
1078 | ) { | |
1079 | log_err("u_strFromUTF8Lenient(%d preflighting with length) failed\n", number); | |
1080 | } | |
1081 | ||
1082 | /* preflighting/some capacity with length */ | |
1083 | if(srcLength>0) { | |
1084 | dest[srcLength-1]=0x1234; | |
1085 | destLength=-1; | |
1086 | errorCode=U_ZERO_ERROR; | |
1087 | pDest=u_strFromUTF8Lenient(dest, srcLength-1, &destLength, pb, srcLength, &errorCode); | |
1088 | if (errorCode!=U_BUFFER_OVERFLOW_ERROR || | |
1089 | dest[srcLength-1]!=0x1234 || destLength!=srcLength | |
1090 | ) { | |
1091 | log_err("u_strFromUTF8Lenient(%d preflighting/some capacity with length) failed\n", number); | |
1092 | } | |
1093 | } | |
1094 | ||
1095 | /* conversion with length, much capacity */ | |
1096 | dest[0]=dest[destLength0]=0x1234; | |
1097 | destLength=-1; | |
1098 | errorCode=U_ZERO_ERROR; | |
1099 | pDest=u_strFromUTF8Lenient(dest, LENGTHOF(dest), &destLength, pb, srcLength, &errorCode); | |
1100 | if (errorCode!=U_ZERO_ERROR || | |
1101 | pDest!=dest || dest[destLength0]!=0 || | |
1102 | destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength) | |
1103 | ) { | |
1104 | log_err("u_strFromUTF8Lenient(%d conversion with length, much capacity) failed\n", number); | |
1105 | } | |
1106 | ||
1107 | /* conversion with length, srcLength capacity */ | |
1108 | dest[0]=dest[srcLength]=dest[destLength0]=0x1234; | |
1109 | destLength=-1; | |
1110 | errorCode=U_ZERO_ERROR; | |
1111 | pDest=u_strFromUTF8Lenient(dest, srcLength, &destLength, pb, srcLength, &errorCode); | |
1112 | if(srcLength==destLength0) { | |
1113 | if (errorCode!=U_STRING_NOT_TERMINATED_WARNING || | |
1114 | pDest!=dest || dest[destLength0]!=0x1234 || | |
1115 | destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength) | |
1116 | ) { | |
1117 | log_err("u_strFromUTF8Lenient(%d conversion with length, srcLength capacity/not terminated) failed\n", number); | |
1118 | } | |
1119 | } else { | |
1120 | if (errorCode!=U_ZERO_ERROR || | |
1121 | pDest!=dest || dest[destLength0]!=0 || | |
1122 | destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength) | |
1123 | ) { | |
1124 | log_err("u_strFromUTF8Lenient(%d conversion with length, srcLength capacity/terminated) failed\n", number); | |
1125 | } | |
1126 | } | |
1127 | } | |
1128 | } | |
1129 | ||
b75a7d8f A |
1130 | static const uint16_t src16j[] = { |
1131 | 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, | |
1132 | 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, | |
1133 | 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, | |
1134 | 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, | |
1135 | 0x0000, | |
1136 | /* Test only ASCII */ | |
374ca955 | 1137 | |
b75a7d8f A |
1138 | }; |
1139 | static const uint16_t src16WithNulls[] = { | |
1140 | 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0000, | |
1141 | 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 0x0000, | |
1142 | 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0000, | |
1143 | 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 0x0000, | |
1144 | 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0000, | |
1145 | 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 0x0000, | |
1146 | 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0000, | |
1147 | 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 0x0000, | |
1148 | /* test only ASCII */ | |
1149 | /* | |
1150 | 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, | |
1151 | 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, | |
1152 | 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0, 0x00C1, | |
1153 | 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, | |
1154 | 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, | |
1155 | 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, | |
1156 | 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, | |
1157 | 0x0054, 0x0000 */ | |
1158 | ||
1159 | }; | |
1160 | static void Test_UChar_WCHART_API(void){ | |
73c04bcf | 1161 | #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) |
b75a7d8f A |
1162 | UErrorCode err = U_ZERO_ERROR; |
1163 | const UChar* uSrc = src16j; | |
1164 | int32_t uSrcLen = sizeof(src16j)/2; | |
1165 | wchar_t* wDest = NULL; | |
1166 | int32_t wDestLen = 0; | |
1167 | int32_t reqLen= 0 ; | |
1168 | UBool failed = FALSE; | |
1169 | UChar* uDest = NULL; | |
1170 | int32_t uDestLen = 0; | |
1171 | int i =0; | |
1172 | { | |
73c04bcf A |
1173 | /* Bad UErrorCode arguments. Make sure that the API doesn't crash, and that Purify doesn't complain. */ |
1174 | if (u_strFromWCS(NULL,0,NULL,NULL,0,NULL) != NULL) { | |
1175 | log_err("u_strFromWCS() should return NULL with a bad argument\n"); | |
1176 | } | |
1177 | if (u_strToWCS(NULL,0,NULL,NULL,0,NULL) != NULL) { | |
1178 | log_err("u_strToWCS() should return NULL with a bad argument\n"); | |
1179 | } | |
1180 | ||
729e4ab9 | 1181 | /* NULL source & destination. */ |
73c04bcf A |
1182 | err = U_ZERO_ERROR; |
1183 | u_strFromWCS(NULL,0,NULL,NULL,0,&err); | |
729e4ab9 A |
1184 | if (err != U_STRING_NOT_TERMINATED_WARNING) { |
1185 | log_err("u_strFromWCS(NULL, NULL) failed. Error: %s \n", u_errorName(err)); | |
73c04bcf A |
1186 | } |
1187 | err = U_ZERO_ERROR; | |
1188 | u_strToWCS(NULL,0,NULL,NULL,0,&err); | |
729e4ab9 A |
1189 | if (err != U_STRING_NOT_TERMINATED_WARNING) { |
1190 | log_err("u_strToWCS(NULL, NULL) failed. Error: %s \n", u_errorName(err)); | |
73c04bcf A |
1191 | } |
1192 | err = U_ZERO_ERROR; | |
1193 | ||
b75a7d8f A |
1194 | /* pre-flight*/ |
1195 | u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err); | |
1196 | ||
1197 | if(err == U_BUFFER_OVERFLOW_ERROR){ | |
1198 | err=U_ZERO_ERROR; | |
1199 | wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1)); | |
1200 | wDestLen = reqLen+1; | |
1201 | u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err); | |
1202 | } | |
1203 | ||
1204 | /* pre-flight */ | |
1205 | u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err); | |
1206 | ||
1207 | ||
1208 | if(err == U_BUFFER_OVERFLOW_ERROR){ | |
1209 | err =U_ZERO_ERROR; | |
1210 | uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1)); | |
1211 | uDestLen = reqLen + 1; | |
1212 | u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err); | |
1213 | }else if(U_FAILURE(err)){ | |
1214 | ||
1215 | log_err("u_strFromWCS() failed. Error: %s \n", u_errorName(err)); | |
1216 | return; | |
1217 | } | |
1218 | ||
1219 | for(i=0; i< uSrcLen; i++){ | |
1220 | if(uDest[i] != src16j[i]){ | |
1221 | log_verbose("u_str*WCS() failed for unterminated string expected: \\u%04X got: \\u%04X at index: %i \n", src16j[i] ,uDest[i],i); | |
1222 | failed =TRUE; | |
1223 | } | |
1224 | } | |
1225 | ||
1226 | if(U_FAILURE(err)){ | |
1227 | failed = TRUE; | |
1228 | } | |
1229 | if(failed){ | |
1230 | log_err("u_strToWCS() failed \n"); | |
1231 | } | |
1232 | free(wDest); | |
1233 | free(uDest); | |
1234 | ||
1235 | ||
1236 | /* test with embeded nulls */ | |
1237 | uSrc = src16WithNulls; | |
1238 | uSrcLen = sizeof(src16WithNulls)/2; | |
1239 | wDestLen =0; | |
1240 | uDestLen =0; | |
1241 | wDest = NULL; | |
1242 | uDest = NULL; | |
1243 | /* pre-flight*/ | |
1244 | u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err); | |
1245 | ||
1246 | if(err == U_BUFFER_OVERFLOW_ERROR){ | |
1247 | err=U_ZERO_ERROR; | |
1248 | wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1)); | |
1249 | wDestLen = reqLen+1; | |
1250 | u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err); | |
1251 | } | |
1252 | ||
1253 | /* pre-flight */ | |
1254 | u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err); | |
1255 | ||
1256 | if(err == U_BUFFER_OVERFLOW_ERROR){ | |
1257 | err =U_ZERO_ERROR; | |
1258 | uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1)); | |
1259 | uDestLen = reqLen + 1; | |
1260 | u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err); | |
1261 | } | |
1262 | ||
729e4ab9 A |
1263 | if(!U_FAILURE(err)) { |
1264 | for(i=0; i< uSrcLen; i++){ | |
b75a7d8f A |
1265 | if(uDest[i] != src16WithNulls[i]){ |
1266 | log_verbose("u_str*WCS() failed for string with nulls expected: \\u%04X got: \\u%04X at index: %i \n", src16WithNulls[i] ,uDest[i],i); | |
1267 | failed =TRUE; | |
1268 | } | |
729e4ab9 | 1269 | } |
b75a7d8f A |
1270 | } |
1271 | ||
1272 | if(U_FAILURE(err)){ | |
1273 | failed = TRUE; | |
1274 | } | |
1275 | if(failed){ | |
1276 | log_err("u_strToWCS() failed \n"); | |
1277 | } | |
1278 | free(wDest); | |
1279 | free(uDest); | |
1280 | ||
1281 | } | |
1282 | ||
1283 | { | |
1284 | ||
1285 | uSrc = src16j; | |
1286 | uSrcLen = sizeof(src16j)/2; | |
1287 | wDestLen =0; | |
1288 | uDestLen =0; | |
1289 | wDest = NULL; | |
1290 | uDest = NULL; | |
1291 | wDestLen = 0; | |
1292 | /* pre-flight*/ | |
1293 | u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&err); | |
1294 | ||
1295 | if(err == U_BUFFER_OVERFLOW_ERROR){ | |
1296 | err=U_ZERO_ERROR; | |
1297 | wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1)); | |
1298 | wDestLen = reqLen+1; | |
1299 | u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&err); | |
1300 | } | |
1301 | uDestLen = 0; | |
1302 | /* pre-flight */ | |
1303 | u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&err); | |
1304 | ||
1305 | if(err == U_BUFFER_OVERFLOW_ERROR){ | |
1306 | err =U_ZERO_ERROR; | |
1307 | uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1)); | |
1308 | uDestLen = reqLen + 1; | |
1309 | u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&err); | |
1310 | } | |
1311 | ||
1312 | ||
729e4ab9 A |
1313 | if(!U_FAILURE(err)) { |
1314 | for(i=0; i< uSrcLen; i++){ | |
b75a7d8f A |
1315 | if(uDest[i] != src16j[i]){ |
1316 | log_verbose("u_str*WCS() failed for null terminated string expected: \\u%04X got: \\u%04X at index: %i \n", src16j[i] ,uDest[i],i); | |
1317 | failed =TRUE; | |
1318 | } | |
729e4ab9 | 1319 | } |
b75a7d8f A |
1320 | } |
1321 | ||
1322 | if(U_FAILURE(err)){ | |
1323 | failed = TRUE; | |
1324 | } | |
1325 | if(failed){ | |
1326 | log_err("u_strToWCS() failed \n"); | |
1327 | } | |
1328 | free(wDest); | |
1329 | free(uDest); | |
1330 | } | |
1331 | ||
1332 | /* | |
1333 | * Test u_terminateWChars(). | |
1334 | * All u_terminateXYZ() use the same implementation macro; | |
1335 | * we test this function to improve API coverage. | |
1336 | */ | |
1337 | { | |
1338 | wchar_t buffer[10]; | |
1339 | ||
1340 | err=U_ZERO_ERROR; | |
1341 | buffer[3]=0x20ac; | |
1342 | wDestLen=u_terminateWChars(buffer, LENGTHOF(buffer), 3, &err); | |
1343 | if(err!=U_ZERO_ERROR || wDestLen!=3 || buffer[3]!=0) { | |
1344 | log_err("u_terminateWChars(buffer, all, 3, zero) failed: %s length %d [3]==U+%04x\n", | |
1345 | u_errorName(err), wDestLen, buffer[3]); | |
1346 | } | |
1347 | ||
1348 | err=U_ZERO_ERROR; | |
1349 | buffer[3]=0x20ac; | |
1350 | wDestLen=u_terminateWChars(buffer, 3, 3, &err); | |
1351 | if(err!=U_STRING_NOT_TERMINATED_WARNING || wDestLen!=3 || buffer[3]!=0x20ac) { | |
1352 | log_err("u_terminateWChars(buffer, 3, 3, zero) failed: %s length %d [3]==U+%04x\n", | |
1353 | u_errorName(err), wDestLen, buffer[3]); | |
1354 | } | |
1355 | ||
1356 | err=U_STRING_NOT_TERMINATED_WARNING; | |
1357 | buffer[3]=0x20ac; | |
1358 | wDestLen=u_terminateWChars(buffer, LENGTHOF(buffer), 3, &err); | |
1359 | if(err!=U_ZERO_ERROR || wDestLen!=3 || buffer[3]!=0) { | |
1360 | log_err("u_terminateWChars(buffer, all, 3, not-terminated) failed: %s length %d [3]==U+%04x\n", | |
1361 | u_errorName(err), wDestLen, buffer[3]); | |
1362 | } | |
1363 | ||
1364 | err=U_ZERO_ERROR; | |
1365 | buffer[3]=0x20ac; | |
1366 | wDestLen=u_terminateWChars(buffer, 2, 3, &err); | |
1367 | if(err!=U_BUFFER_OVERFLOW_ERROR || wDestLen!=3 || buffer[3]!=0x20ac) { | |
1368 | log_err("u_terminateWChars(buffer, 2, 3, zero) failed: %s length %d [3]==U+%04x\n", | |
1369 | u_errorName(err), wDestLen, buffer[3]); | |
1370 | } | |
1371 | } | |
73c04bcf A |
1372 | #else |
1373 | log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32"); | |
1374 | #endif | |
b75a7d8f | 1375 | } |
374ca955 A |
1376 | |
1377 | static void Test_widestrs() | |
1378 | { | |
73c04bcf | 1379 | #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) |
374ca955 A |
1380 | wchar_t ws[100]; |
1381 | UChar rts[100]; | |
1382 | int32_t wcap = sizeof(ws) / sizeof(*ws); | |
1383 | int32_t wl; | |
1384 | int32_t rtcap = sizeof(rts) / sizeof(*rts); | |
1385 | int32_t rtl; | |
1386 | wchar_t *wcs; | |
1387 | UChar *cp; | |
1388 | const char *errname; | |
1389 | UChar ustr[] = {'h', 'e', 'l', 'l', 'o', 0}; | |
1390 | int32_t ul = sizeof(ustr)/sizeof(*ustr) -1; | |
1391 | char astr[100]; | |
1392 | ||
1393 | UErrorCode err; | |
1394 | ||
1395 | err = U_ZERO_ERROR; | |
1396 | wcs = u_strToWCS(ws, wcap, &wl, ustr, ul, &err); | |
1397 | if (U_FAILURE(err)) { | |
1398 | errname = u_errorName(err); | |
1399 | log_err("test_widestrs: u_strToWCS error: %s!\n",errname); | |
1400 | } | |
1401 | if(ul!=wl){ | |
1402 | log_err("u_strToWCS: ustr = %s, ul = %d, ws = %S, wl = %d!\n", u_austrcpy(astr, ustr), ul, ws, wl); | |
1403 | } | |
1404 | err = U_ZERO_ERROR; | |
1405 | wl = (int32_t)uprv_wcslen(wcs); | |
1406 | cp = u_strFromWCS(rts, rtcap, &rtl, wcs, wl, &err); | |
1407 | if (U_FAILURE(err)) { | |
1408 | errname = u_errorName(err); | |
1409 | fprintf(stderr, "test_widestrs: ucnv_wcstombs error: %s!\n",errname); | |
1410 | } | |
1411 | if(wl != rtl){ | |
1412 | log_err("u_strFromWCS: wcs = %S, wl = %d,rts = %s, rtl = %d!\n", wcs, wl, u_austrcpy(astr, rts), rtl); | |
1413 | } | |
73c04bcf A |
1414 | #else |
1415 | log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32"); | |
1416 | #endif | |
374ca955 A |
1417 | } |
1418 | ||
1419 | static void | |
1420 | Test_WCHART_LongString(){ | |
73c04bcf | 1421 | #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) |
374ca955 A |
1422 | UErrorCode status = U_ZERO_ERROR; |
1423 | const char* testdatapath=loadTestData(&status); | |
1424 | UResourceBundle *theBundle = ures_open(testdatapath, "testtypes", &status); | |
1425 | int32_t strLen =0; | |
1426 | const UChar* str = ures_getStringByKey(theBundle, "testinclude",&strLen,&status); | |
1427 | const UChar* uSrc = str; | |
1428 | int32_t uSrcLen = strLen; | |
1429 | int32_t wDestLen =0, reqLen=0, i=0; | |
1430 | int32_t uDestLen =0; | |
1431 | wchar_t* wDest = NULL; | |
1432 | UChar* uDest = NULL; | |
1433 | UBool failed = FALSE; | |
1434 | ||
4388f060 A |
1435 | log_verbose("Loaded string of %d UChars\n", uSrcLen); |
1436 | ||
374ca955 | 1437 | if(U_FAILURE(status)){ |
46f4442e | 1438 | log_data_err("Could not get testinclude resource from testtypes bundle. Error: %s\n",u_errorName(status)); |
374ca955 A |
1439 | return; |
1440 | } | |
1441 | ||
1442 | /* pre-flight*/ | |
1443 | u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&status); | |
1444 | ||
1445 | if(status == U_BUFFER_OVERFLOW_ERROR){ | |
1446 | status=U_ZERO_ERROR; | |
1447 | wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1)); | |
1448 | wDestLen = reqLen+1; | |
1449 | u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&status); | |
4388f060 A |
1450 | log_verbose("To %d*%d-byte wchar_ts\n", reqLen,sizeof(wchar_t)); |
1451 | } | |
1452 | ||
1453 | { | |
1454 | int j; | |
1455 | for(j=0;j>=0&&j<reqLen;j++) { | |
1456 | if(wDest[j]!=uSrc[j]) { | |
1457 | log_verbose("Diff %04X vs %04X @ %d\n", wDest[j],uSrc[j],j); | |
1458 | break; | |
1459 | } | |
1460 | } | |
374ca955 | 1461 | } |
4388f060 | 1462 | |
374ca955 A |
1463 | uDestLen = 0; |
1464 | /* pre-flight */ | |
1465 | u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&status); | |
374ca955 A |
1466 | if(status == U_BUFFER_OVERFLOW_ERROR){ |
1467 | status =U_ZERO_ERROR; | |
1468 | uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1)); | |
4388f060 | 1469 | u_memset(uDest,0xFFFF,reqLen+1); |
374ca955 A |
1470 | uDestLen = reqLen + 1; |
1471 | u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&status); | |
4388f060 | 1472 | log_verbose("Back to %d UChars\n", reqLen); |
374ca955 | 1473 | } |
4388f060 A |
1474 | #if defined(U_WCHAR_IS_UTF16) |
1475 | log_verbose("U_WCHAR_IS_UTF16\n"); | |
1476 | #elif defined(U_WCHAR_IS_UTF32) | |
1477 | log_verbose("U_WCHAR_IS_UTF32\n"); | |
1478 | #else | |
1479 | log_verbose("U_WCHAR_IS_idunno (not UTF)\n"); | |
1480 | #endif | |
374ca955 | 1481 | |
4388f060 A |
1482 | if(reqLen!=uSrcLen) { |
1483 | log_err("Error: dest len is %d but expected src len %d\n", reqLen, uSrcLen); | |
1484 | } | |
374ca955 A |
1485 | |
1486 | for(i=0; i< uSrcLen; i++){ | |
1487 | if(uDest[i] != str[i]){ | |
4388f060 | 1488 | log_verbose("u_str*WCS() failed for null terminated string expected: \\u%04X got: \\u%04X at index: %i \n", str[i], uDest[i],i); |
374ca955 A |
1489 | failed =TRUE; |
1490 | } | |
1491 | } | |
1492 | ||
1493 | if(U_FAILURE(status)){ | |
1494 | failed = TRUE; | |
1495 | } | |
1496 | if(failed){ | |
1497 | log_err("u_strToWCS() failed \n"); | |
1498 | } | |
1499 | free(wDest); | |
1500 | free(uDest); | |
1501 | /* close the bundle */ | |
1502 | ures_close(theBundle); | |
73c04bcf A |
1503 | #else |
1504 | log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32"); | |
1505 | #endif | |
374ca955 A |
1506 | } |
1507 | ||
729e4ab9 A |
1508 | static void Test_strToJavaModifiedUTF8() { |
1509 | static const UChar src[]={ | |
1510 | 0x61, 0x62, 0x63, 0xe1, 0xe2, 0xe3, | |
1511 | 0xe01, 0xe02, 0xe03, 0xe001, 0xe002, 0xe003, | |
1512 | 0xd800, 0xdc00, 0xdc00, 0xd800, 0, | |
1513 | 0xdbff, 0xdfff, | |
1514 | 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xed, 0xe0e, 0x6f | |
1515 | }; | |
1516 | static const uint8_t expected[]={ | |
1517 | 0x61, 0x62, 0x63, 0xc3, 0xa1, 0xc3, 0xa2, 0xc3, 0xa3, | |
1518 | 0xe0, 0xb8, 0x81, 0xe0, 0xb8, 0x82, 0xe0, 0xb8, 0x83, | |
1519 | 0xee, 0x80, 0x81, 0xee, 0x80, 0x82, 0xee, 0x80, 0x83, | |
1520 | 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0xc0, 0x80, | |
1521 | 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf, | |
1522 | 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xc3, 0xad, 0xe0, 0xb8, 0x8e, 0x6f | |
1523 | }; | |
1524 | static const UChar shortSrc[]={ | |
1525 | 0xe01, 0xe1, 0x61 | |
1526 | }; | |
1527 | static const uint8_t shortExpected[]={ | |
1528 | 0xe0, 0xb8, 0x81, 0xc3, 0xa1, 0x61 | |
1529 | }; | |
1530 | static const UChar asciiNul[]={ | |
1531 | 0x61, 0x62, 0x63, 0 | |
1532 | }; | |
1533 | static const uint8_t asciiNulExpected[]={ | |
1534 | 0x61, 0x62, 0x63 | |
1535 | }; | |
1536 | char dest[200]; | |
1537 | char *p; | |
1538 | int32_t length, expectedTerminatedLength; | |
1539 | UErrorCode errorCode; | |
1540 | ||
1541 | expectedTerminatedLength=(int32_t)(strstr((const char *)expected, "\xc0\x80")- | |
1542 | (const char *)expected); | |
1543 | ||
1544 | errorCode=U_ZERO_ERROR; | |
1545 | length=-5; | |
1546 | p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length, | |
1547 | src, LENGTHOF(src), &errorCode); | |
1548 | if( U_FAILURE(errorCode) || p!=dest || | |
1549 | length!=LENGTHOF(expected) || 0!=memcmp(dest, expected, length) || | |
1550 | dest[length]!=0 | |
1551 | ) { | |
1552 | log_err("u_strToJavaModifiedUTF8(normal) failed - %s\n", u_errorName(errorCode)); | |
1553 | } | |
1554 | memset(dest, 0xff, sizeof(dest)); | |
1555 | errorCode=U_ZERO_ERROR; | |
1556 | length=-5; | |
1557 | p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), NULL, | |
1558 | src, LENGTHOF(src), &errorCode); | |
1559 | if( U_FAILURE(errorCode) || p!=dest || | |
1560 | 0!=memcmp(dest, expected, LENGTHOF(expected)) || | |
1561 | dest[LENGTHOF(expected)]!=0 | |
1562 | ) { | |
1563 | log_err("u_strToJavaModifiedUTF8(normal, pLength=NULL) failed - %s\n", u_errorName(errorCode)); | |
1564 | } | |
1565 | memset(dest, 0xff, sizeof(dest)); | |
1566 | errorCode=U_ZERO_ERROR; | |
1567 | length=-5; | |
1568 | p=u_strToJavaModifiedUTF8(dest, LENGTHOF(expected), &length, | |
1569 | src, LENGTHOF(src), &errorCode); | |
1570 | if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || p!=dest || | |
1571 | length!=LENGTHOF(expected) || 0!=memcmp(dest, expected, length) || | |
1572 | dest[length]!=(char)0xff | |
1573 | ) { | |
1574 | log_err("u_strToJavaModifiedUTF8(tight) failed - %s\n", u_errorName(errorCode)); | |
1575 | } | |
1576 | memset(dest, 0xff, sizeof(dest)); | |
1577 | errorCode=U_ZERO_ERROR; | |
1578 | length=-5; | |
1579 | p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length, src, -1, &errorCode); | |
1580 | if( U_FAILURE(errorCode) || p!=dest || | |
1581 | length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) || | |
1582 | dest[length]!=0 | |
1583 | ) { | |
1584 | log_err("u_strToJavaModifiedUTF8(NUL-terminated) failed - %s\n", u_errorName(errorCode)); | |
1585 | } | |
1586 | memset(dest, 0xff, sizeof(dest)); | |
1587 | errorCode=U_ZERO_ERROR; | |
1588 | length=-5; | |
1589 | p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), NULL, src, -1, &errorCode); | |
1590 | if( U_FAILURE(errorCode) || p!=dest || | |
1591 | 0!=memcmp(dest, expected, expectedTerminatedLength) || | |
1592 | dest[expectedTerminatedLength]!=0 | |
1593 | ) { | |
1594 | log_err("u_strToJavaModifiedUTF8(NUL-terminated, pLength=NULL) failed - %s\n", u_errorName(errorCode)); | |
1595 | } | |
1596 | memset(dest, 0xff, sizeof(dest)); | |
1597 | errorCode=U_ZERO_ERROR; | |
1598 | length=-5; | |
1599 | p=u_strToJavaModifiedUTF8(dest, LENGTHOF(expected)/2, &length, | |
1600 | src, LENGTHOF(src), &errorCode); | |
1601 | if( errorCode!=U_BUFFER_OVERFLOW_ERROR || | |
1602 | length!=LENGTHOF(expected) || dest[LENGTHOF(expected)/2]!=(char)0xff | |
1603 | ) { | |
1604 | log_err("u_strToJavaModifiedUTF8(overflow) failed - %s\n", u_errorName(errorCode)); | |
1605 | } | |
1606 | memset(dest, 0xff, sizeof(dest)); | |
1607 | errorCode=U_ZERO_ERROR; | |
1608 | length=-5; | |
1609 | p=u_strToJavaModifiedUTF8(NULL, 0, &length, | |
1610 | src, LENGTHOF(src), &errorCode); | |
1611 | if( errorCode!=U_BUFFER_OVERFLOW_ERROR || | |
1612 | length!=LENGTHOF(expected) || dest[0]!=(char)0xff | |
1613 | ) { | |
1614 | log_err("u_strToJavaModifiedUTF8(pure preflighting) failed - %s\n", u_errorName(errorCode)); | |
1615 | } | |
1616 | memset(dest, 0xff, sizeof(dest)); | |
1617 | errorCode=U_ZERO_ERROR; | |
1618 | length=-5; | |
1619 | p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length, | |
1620 | shortSrc, LENGTHOF(shortSrc), &errorCode); | |
1621 | if( U_FAILURE(errorCode) || p!=dest || | |
1622 | length!=LENGTHOF(shortExpected) || 0!=memcmp(dest, shortExpected, length) || | |
1623 | dest[length]!=0 | |
1624 | ) { | |
1625 | log_err("u_strToJavaModifiedUTF8(short) failed - %s\n", u_errorName(errorCode)); | |
1626 | } | |
1627 | memset(dest, 0xff, sizeof(dest)); | |
1628 | errorCode=U_ZERO_ERROR; | |
1629 | length=-5; | |
1630 | p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length, | |
1631 | asciiNul, -1, &errorCode); | |
1632 | if( U_FAILURE(errorCode) || p!=dest || | |
1633 | length!=LENGTHOF(asciiNulExpected) || 0!=memcmp(dest, asciiNulExpected, length) || | |
1634 | dest[length]!=0 | |
1635 | ) { | |
1636 | log_err("u_strToJavaModifiedUTF8(asciiNul) failed - %s\n", u_errorName(errorCode)); | |
1637 | } | |
1638 | memset(dest, 0xff, sizeof(dest)); | |
1639 | errorCode=U_ZERO_ERROR; | |
1640 | length=-5; | |
1641 | p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length, | |
1642 | NULL, 0, &errorCode); | |
1643 | if( U_FAILURE(errorCode) || p!=dest || | |
1644 | length!=0 || dest[0]!=0 | |
1645 | ) { | |
1646 | log_err("u_strToJavaModifiedUTF8(empty) failed - %s\n", u_errorName(errorCode)); | |
1647 | } | |
1648 | ||
1649 | /* illegal arguments */ | |
1650 | memset(dest, 0xff, sizeof(dest)); | |
1651 | errorCode=U_ZERO_ERROR; | |
1652 | length=-5; | |
1653 | p=u_strToJavaModifiedUTF8(NULL, sizeof(dest), &length, | |
1654 | src, LENGTHOF(src), &errorCode); | |
1655 | if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) { | |
1656 | log_err("u_strToJavaModifiedUTF8(dest=NULL) failed - %s\n", u_errorName(errorCode)); | |
1657 | } | |
1658 | memset(dest, 0xff, sizeof(dest)); | |
1659 | errorCode=U_ZERO_ERROR; | |
1660 | length=-5; | |
1661 | p=u_strToJavaModifiedUTF8(dest, -1, &length, | |
1662 | src, LENGTHOF(src), &errorCode); | |
1663 | if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) { | |
1664 | log_err("u_strToJavaModifiedUTF8(destCapacity<0) failed - %s\n", u_errorName(errorCode)); | |
1665 | } | |
1666 | memset(dest, 0xff, sizeof(dest)); | |
1667 | errorCode=U_ZERO_ERROR; | |
1668 | length=-5; | |
1669 | p=u_strToJavaModifiedUTF8(dest, sizeof(dest), &length, | |
1670 | NULL, LENGTHOF(src), &errorCode); | |
1671 | if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) { | |
1672 | log_err("u_strToJavaModifiedUTF8(src=NULL) failed - %s\n", u_errorName(errorCode)); | |
1673 | } | |
1674 | memset(dest, 0xff, sizeof(dest)); | |
1675 | errorCode=U_ZERO_ERROR; | |
1676 | length=-5; | |
1677 | p=u_strToJavaModifiedUTF8(dest, sizeof(dest), &length, | |
1678 | NULL, -1, &errorCode); | |
1679 | if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) { | |
1680 | log_err("u_strToJavaModifiedUTF8(src=NULL, srcLength<0) failed - %s\n", u_errorName(errorCode)); | |
1681 | } | |
1682 | } | |
1683 | ||
1684 | static void Test_strFromJavaModifiedUTF8() { | |
1685 | static const uint8_t src[]={ | |
1686 | 0x61, 0x62, 0x63, 0xc3, 0xa1, 0xc3, 0xa2, 0xc3, 0xa3, | |
1687 | 0xe0, 0xb8, 0x81, 0xe0, 0xb8, 0x82, 0xe0, 0xb8, 0x83, | |
1688 | 0xee, 0x80, 0x81, 0xee, 0x80, 0x82, 0xee, 0x80, 0x83, | |
1689 | 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0, | |
1690 | 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf, | |
1691 | 0x81, 0xc0, 0xe0, 0xb8, 0xf0, 0x90, 0x80, 0x80, /* invalid sequences */ | |
1692 | 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, | |
1693 | 0xe0, 0x81, 0xac, 0xe0, 0x83, 0xad, /* non-shortest forms are allowed */ | |
1694 | 0xe0, 0xb8, 0x8e, 0x6f | |
1695 | }; | |
1696 | static const UChar expected[]={ | |
1697 | 0x61, 0x62, 0x63, 0xe1, 0xe2, 0xe3, | |
1698 | 0xe01, 0xe02, 0xe03, 0xe001, 0xe002, 0xe003, | |
1699 | 0xd800, 0xdc00, 0xdc00, 0xd800, 0, | |
1700 | 0xdbff, 0xdfff, | |
1701 | 0xfffd, 0xfffd, 0xfffd, 0xfffd, | |
1702 | 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, | |
1703 | 0x6c, 0xed, | |
1704 | 0xe0e, 0x6f | |
1705 | }; | |
1706 | static const uint8_t shortSrc[]={ | |
1707 | 0xe0, 0xb8, 0x81, 0xc3, 0xa1, 0x61 | |
1708 | }; | |
1709 | static const UChar shortExpected[]={ | |
1710 | 0xe01, 0xe1, 0x61 | |
1711 | }; | |
1712 | static const uint8_t asciiNul[]={ | |
1713 | 0x61, 0x62, 0x63, 0 | |
1714 | }; | |
1715 | static const UChar asciiNulExpected[]={ | |
1716 | 0x61, 0x62, 0x63 | |
1717 | }; | |
1718 | static const uint8_t invalid[]={ | |
1719 | 0x81, 0xc0, 0xe0, 0xb8, 0xf0, 0x90, 0x80, 0x80 | |
1720 | }; | |
1721 | static const UChar invalidExpectedFFFD[]={ | |
1722 | 0xfffd, 0xfffd, 0xfffd, 0xfffd | |
1723 | }; | |
1724 | static const UChar invalidExpected50000[]={ | |
1725 | 0xd900, 0xdc00, 0xd900, 0xdc00, 0xd900, 0xdc00, 0xd900, 0xdc00 | |
1726 | }; | |
1727 | UChar dest[200]; | |
1728 | UChar *p; | |
1729 | int32_t length, expectedTerminatedLength; | |
1730 | int32_t numSubstitutions; | |
1731 | UErrorCode errorCode; | |
1732 | ||
1733 | expectedTerminatedLength=(int32_t)(u_strchr(expected, 0)-expected); | |
1734 | ||
1735 | errorCode=U_ZERO_ERROR; | |
1736 | length=numSubstitutions=-5; | |
1737 | p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, | |
1738 | (const char *)src, LENGTHOF(src), | |
1739 | 0xfffd, &numSubstitutions, &errorCode); | |
1740 | if( U_FAILURE(errorCode) || p!=dest || | |
1741 | length!=LENGTHOF(expected) || 0!=memcmp(dest, expected, length) || | |
1742 | dest[length]!=0 || | |
1743 | numSubstitutions!=LENGTHOF(invalidExpectedFFFD) | |
1744 | ) { | |
1745 | log_err("u_strFromJavaModifiedUTF8WithSub(normal) failed - %s\n", u_errorName(errorCode)); | |
1746 | } | |
1747 | memset(dest, 0xff, sizeof(dest)); | |
1748 | errorCode=U_ZERO_ERROR; | |
1749 | length=numSubstitutions=-5; | |
1750 | p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), NULL, | |
1751 | (const char *)src, LENGTHOF(src), | |
1752 | 0xfffd, &numSubstitutions, &errorCode); | |
1753 | if( U_FAILURE(errorCode) || p!=dest || | |
1754 | 0!=memcmp(dest, expected, LENGTHOF(expected)) || | |
1755 | dest[LENGTHOF(expected)]!=0 || | |
1756 | numSubstitutions!=LENGTHOF(invalidExpectedFFFD) | |
1757 | ) { | |
1758 | log_err("u_strFromJavaModifiedUTF8WithSub(normal, pLength=NULL) failed - %s\n", u_errorName(errorCode)); | |
1759 | } | |
1760 | memset(dest, 0xff, sizeof(dest)); | |
1761 | errorCode=U_ZERO_ERROR; | |
1762 | length=numSubstitutions=-5; | |
1763 | p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, | |
1764 | (const char *)src, LENGTHOF(src), | |
1765 | 0xfffd, NULL, &errorCode); | |
1766 | if( U_FAILURE(errorCode) || p!=dest || | |
1767 | length!=LENGTHOF(expected) || 0!=memcmp(dest, expected, length) || | |
1768 | dest[length]!=0 | |
1769 | ) { | |
1770 | log_err("u_strFromJavaModifiedUTF8WithSub(normal, pNumSubstitutions=NULL) failed - %s\n", u_errorName(errorCode)); | |
1771 | } | |
1772 | memset(dest, 0xff, sizeof(dest)); | |
1773 | errorCode=U_ZERO_ERROR; | |
1774 | length=numSubstitutions=-5; | |
1775 | p=u_strFromJavaModifiedUTF8WithSub(dest, LENGTHOF(expected), &length, | |
1776 | (const char *)src, LENGTHOF(src), | |
1777 | 0xfffd, &numSubstitutions, &errorCode); | |
1778 | if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || p!=dest || | |
1779 | length!=LENGTHOF(expected) || 0!=memcmp(dest, expected, length) || | |
1780 | dest[length]!=0xffff || | |
1781 | numSubstitutions!=LENGTHOF(invalidExpectedFFFD) | |
1782 | ) { | |
1783 | log_err("u_strFromJavaModifiedUTF8WithSub(tight) failed - %s\n", u_errorName(errorCode)); | |
1784 | } | |
1785 | memset(dest, 0xff, sizeof(dest)); | |
1786 | errorCode=U_ZERO_ERROR; | |
1787 | length=numSubstitutions=-5; | |
1788 | p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, | |
1789 | (const char *)src, -1, | |
1790 | 0xfffd, &numSubstitutions, &errorCode); | |
1791 | if( U_FAILURE(errorCode) || p!=dest || | |
1792 | length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) || | |
1793 | dest[length]!=0 || | |
1794 | numSubstitutions!=0 | |
1795 | ) { | |
1796 | log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated) failed - %s\n", u_errorName(errorCode)); | |
1797 | } | |
1798 | memset(dest, 0xff, sizeof(dest)); | |
1799 | errorCode=U_ZERO_ERROR; | |
1800 | length=numSubstitutions=-5; | |
1801 | p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), NULL, | |
1802 | (const char *)src, -1, | |
1803 | 0xfffd, &numSubstitutions, &errorCode); | |
1804 | if( U_FAILURE(errorCode) || p!=dest || | |
1805 | 0!=memcmp(dest, expected, expectedTerminatedLength) || | |
1806 | dest[expectedTerminatedLength]!=0 || | |
1807 | numSubstitutions!=0 | |
1808 | ) { | |
1809 | log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated, pLength=NULL) failed - %s\n", u_errorName(errorCode)); | |
1810 | } | |
1811 | memset(dest, 0xff, sizeof(dest)); | |
1812 | errorCode=U_ZERO_ERROR; | |
1813 | length=numSubstitutions=-5; | |
1814 | p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, | |
1815 | (const char *)src, -1, | |
1816 | 0xfffd, NULL, &errorCode); | |
1817 | if( U_FAILURE(errorCode) || p!=dest || | |
1818 | length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) || | |
1819 | dest[length]!=0 | |
1820 | ) { | |
1821 | log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated, pNumSubstitutions=NULL) failed - %s\n", u_errorName(errorCode)); | |
1822 | } | |
1823 | memset(dest, 0xff, sizeof(dest)); | |
1824 | errorCode=U_ZERO_ERROR; | |
1825 | length=numSubstitutions=-5; | |
1826 | p=u_strFromJavaModifiedUTF8WithSub(dest, LENGTHOF(expected)/2, &length, | |
1827 | (const char *)src, LENGTHOF(src), | |
1828 | 0xfffd, &numSubstitutions, &errorCode); | |
1829 | if( errorCode!=U_BUFFER_OVERFLOW_ERROR || | |
1830 | length!=LENGTHOF(expected) || dest[LENGTHOF(expected)/2]!=0xffff | |
1831 | ) { | |
1832 | log_err("u_strFromJavaModifiedUTF8WithSub(overflow) failed - %s\n", u_errorName(errorCode)); | |
1833 | } | |
1834 | memset(dest, 0xff, sizeof(dest)); | |
1835 | errorCode=U_ZERO_ERROR; | |
1836 | length=numSubstitutions=-5; | |
1837 | p=u_strFromJavaModifiedUTF8WithSub(NULL, 0, &length, | |
1838 | (const char *)src, LENGTHOF(src), | |
1839 | 0xfffd, &numSubstitutions, &errorCode); | |
1840 | if( errorCode!=U_BUFFER_OVERFLOW_ERROR || | |
1841 | length!=LENGTHOF(expected) || dest[0]!=0xffff | |
1842 | ) { | |
1843 | log_err("u_strFromJavaModifiedUTF8WithSub(pure preflighting) failed - %s\n", u_errorName(errorCode)); | |
1844 | } | |
1845 | memset(dest, 0xff, sizeof(dest)); | |
1846 | errorCode=U_ZERO_ERROR; | |
1847 | length=numSubstitutions=-5; | |
1848 | p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, | |
1849 | (const char *)shortSrc, LENGTHOF(shortSrc), | |
1850 | 0xfffd, &numSubstitutions, &errorCode); | |
1851 | if( U_FAILURE(errorCode) || p!=dest || | |
1852 | length!=LENGTHOF(shortExpected) || 0!=memcmp(dest, shortExpected, length) || | |
1853 | dest[length]!=0 || | |
1854 | numSubstitutions!=0 | |
1855 | ) { | |
1856 | log_err("u_strFromJavaModifiedUTF8WithSub(short) failed - %s\n", u_errorName(errorCode)); | |
1857 | } | |
1858 | memset(dest, 0xff, sizeof(dest)); | |
1859 | errorCode=U_ZERO_ERROR; | |
1860 | length=numSubstitutions=-5; | |
1861 | p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, | |
1862 | (const char *)asciiNul, -1, | |
1863 | 0xfffd, &numSubstitutions, &errorCode); | |
1864 | if( U_FAILURE(errorCode) || p!=dest || | |
1865 | length!=LENGTHOF(asciiNulExpected) || 0!=memcmp(dest, asciiNulExpected, length) || | |
1866 | dest[length]!=0 || | |
1867 | numSubstitutions!=0 | |
1868 | ) { | |
1869 | log_err("u_strFromJavaModifiedUTF8WithSub(asciiNul) failed - %s\n", u_errorName(errorCode)); | |
1870 | } | |
1871 | memset(dest, 0xff, sizeof(dest)); | |
1872 | errorCode=U_ZERO_ERROR; | |
1873 | length=numSubstitutions=-5; | |
1874 | p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, | |
1875 | NULL, 0, 0xfffd, &numSubstitutions, &errorCode); | |
1876 | if( U_FAILURE(errorCode) || p!=dest || | |
1877 | length!=0 || dest[0]!=0 || | |
1878 | numSubstitutions!=0 | |
1879 | ) { | |
1880 | log_err("u_strFromJavaModifiedUTF8WithSub(empty) failed - %s\n", u_errorName(errorCode)); | |
1881 | } | |
1882 | memset(dest, 0xff, sizeof(dest)); | |
1883 | errorCode=U_ZERO_ERROR; | |
1884 | length=numSubstitutions=-5; | |
1885 | p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, | |
1886 | (const char *)invalid, LENGTHOF(invalid), | |
1887 | 0xfffd, &numSubstitutions, &errorCode); | |
1888 | if( U_FAILURE(errorCode) || p!=dest || | |
1889 | length!=LENGTHOF(invalidExpectedFFFD) || 0!=memcmp(dest, invalidExpectedFFFD, length) || | |
1890 | dest[length]!=0 || | |
1891 | numSubstitutions!=LENGTHOF(invalidExpectedFFFD) | |
1892 | ) { | |
1893 | log_err("u_strFromJavaModifiedUTF8WithSub(invalid->fffd) failed - %s\n", u_errorName(errorCode)); | |
1894 | } | |
1895 | memset(dest, 0xff, sizeof(dest)); | |
1896 | errorCode=U_ZERO_ERROR; | |
1897 | length=numSubstitutions=-5; | |
1898 | p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, | |
1899 | (const char *)invalid, LENGTHOF(invalid), | |
1900 | 0x50000, &numSubstitutions, &errorCode); | |
1901 | if( U_FAILURE(errorCode) || p!=dest || | |
1902 | length!=LENGTHOF(invalidExpected50000) || 0!=memcmp(dest, invalidExpected50000, length) || | |
1903 | dest[length]!=0 || | |
1904 | numSubstitutions!=LENGTHOF(invalidExpectedFFFD) /* not ...50000 */ | |
1905 | ) { | |
1906 | log_err("u_strFromJavaModifiedUTF8WithSub(invalid->50000) failed - %s\n", u_errorName(errorCode)); | |
1907 | } | |
1908 | memset(dest, 0xff, sizeof(dest)); | |
1909 | errorCode=U_ZERO_ERROR; | |
1910 | length=numSubstitutions=-5; | |
1911 | p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, | |
1912 | (const char *)invalid, LENGTHOF(invalid), | |
1913 | U_SENTINEL, &numSubstitutions, &errorCode); | |
1914 | if(errorCode!=U_INVALID_CHAR_FOUND || dest[0]!=0xffff || numSubstitutions!=0) { | |
1915 | log_err("u_strFromJavaModifiedUTF8WithSub(invalid->error) failed - %s\n", u_errorName(errorCode)); | |
1916 | } | |
1917 | memset(dest, 0xff, sizeof(dest)); | |
1918 | errorCode=U_ZERO_ERROR; | |
1919 | length=numSubstitutions=-5; | |
1920 | p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length, | |
1921 | (const char *)src, LENGTHOF(src), | |
1922 | U_SENTINEL, &numSubstitutions, &errorCode); | |
1923 | if( errorCode!=U_INVALID_CHAR_FOUND || | |
1924 | length>=LENGTHOF(expected) || dest[LENGTHOF(expected)-1]!=0xffff || | |
1925 | numSubstitutions!=0 | |
1926 | ) { | |
1927 | log_err("u_strFromJavaModifiedUTF8WithSub(normal->error) failed - %s\n", u_errorName(errorCode)); | |
1928 | } | |
1929 | ||
1930 | /* illegal arguments */ | |
1931 | memset(dest, 0xff, sizeof(dest)); | |
1932 | errorCode=U_ZERO_ERROR; | |
1933 | length=numSubstitutions=-5; | |
1934 | p=u_strFromJavaModifiedUTF8WithSub(NULL, sizeof(dest), &length, | |
1935 | (const char *)src, LENGTHOF(src), | |
1936 | 0xfffd, &numSubstitutions, &errorCode); | |
1937 | if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) { | |
1938 | log_err("u_strFromJavaModifiedUTF8WithSub(dest=NULL) failed - %s\n", u_errorName(errorCode)); | |
1939 | } | |
1940 | memset(dest, 0xff, sizeof(dest)); | |
1941 | errorCode=U_ZERO_ERROR; | |
1942 | length=numSubstitutions=-5; | |
1943 | p=u_strFromJavaModifiedUTF8WithSub(dest, -1, &length, | |
1944 | (const char *)src, LENGTHOF(src), | |
1945 | 0xfffd, &numSubstitutions, &errorCode); | |
1946 | if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) { | |
1947 | log_err("u_strFromJavaModifiedUTF8WithSub(destCapacity<0) failed - %s\n", u_errorName(errorCode)); | |
1948 | } | |
1949 | memset(dest, 0xff, sizeof(dest)); | |
1950 | errorCode=U_ZERO_ERROR; | |
1951 | length=numSubstitutions=-5; | |
1952 | p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length, | |
1953 | NULL, LENGTHOF(src), | |
1954 | 0xfffd, &numSubstitutions, &errorCode); | |
1955 | if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) { | |
1956 | log_err("u_strFromJavaModifiedUTF8WithSub(src=NULL) failed - %s\n", u_errorName(errorCode)); | |
1957 | } | |
1958 | memset(dest, 0xff, sizeof(dest)); | |
1959 | errorCode=U_ZERO_ERROR; | |
1960 | length=numSubstitutions=-5; | |
1961 | p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length, | |
1962 | NULL, -1, 0xfffd, &numSubstitutions, &errorCode); | |
1963 | if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) { | |
1964 | log_err("u_strFromJavaModifiedUTF8WithSub(src=NULL, srcLength<0) failed - %s\n", u_errorName(errorCode)); | |
1965 | } | |
1966 | memset(dest, 0xff, sizeof(dest)); | |
1967 | errorCode=U_ZERO_ERROR; | |
1968 | length=numSubstitutions=-5; | |
1969 | p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length, | |
1970 | (const char *)src, LENGTHOF(src), | |
1971 | 0x110000, &numSubstitutions, &errorCode); | |
1972 | if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) { | |
1973 | log_err("u_strFromJavaModifiedUTF8WithSub(subchar=U_SENTINEL) failed - %s\n", u_errorName(errorCode)); | |
1974 | } | |
1975 | memset(dest, 0xff, sizeof(dest)); | |
1976 | errorCode=U_ZERO_ERROR; | |
1977 | length=numSubstitutions=-5; | |
1978 | p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length, | |
1979 | (const char *)src, LENGTHOF(src), | |
1980 | 0xdfff, &numSubstitutions, &errorCode); | |
1981 | if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) { | |
1982 | log_err("u_strFromJavaModifiedUTF8WithSub(subchar is surrogate) failed - %s\n", u_errorName(errorCode)); | |
1983 | } | |
1984 | } | |
1985 | ||
1986 | /* test that string transformation functions permit NULL source pointer when source length==0 */ | |
1987 | static void TestNullEmptySource() { | |
1988 | char dest8[4]={ 3, 3, 3, 3 }; | |
1989 | UChar dest16[4]={ 3, 3, 3, 3 }; | |
1990 | UChar32 dest32[4]={ 3, 3, 3, 3 }; | |
1991 | #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) | |
1992 | wchar_t destW[4]={ 3, 3, 3, 3 }; | |
1993 | #endif | |
1994 | ||
1995 | int32_t length; | |
1996 | UErrorCode errorCode; | |
1997 | ||
1998 | /* u_strFromXyz() */ | |
1999 | ||
2000 | dest16[0]=3; | |
2001 | length=3; | |
2002 | errorCode=U_ZERO_ERROR; | |
2003 | u_strFromUTF8(dest16, LENGTHOF(dest16), &length, NULL, 0, &errorCode); | |
2004 | if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { | |
2005 | log_err("u_strFromUTF8(source=NULL, sourceLength=0) failed\n"); | |
2006 | } | |
2007 | ||
2008 | dest16[0]=3; | |
2009 | length=3; | |
2010 | errorCode=U_ZERO_ERROR; | |
2011 | u_strFromUTF8WithSub(dest16, LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode); | |
2012 | if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { | |
2013 | log_err("u_strFromUTF8WithSub(source=NULL, sourceLength=0) failed\n"); | |
2014 | } | |
2015 | ||
2016 | dest16[0]=3; | |
2017 | length=3; | |
2018 | errorCode=U_ZERO_ERROR; | |
2019 | u_strFromUTF8Lenient(dest16, LENGTHOF(dest16), &length, NULL, 0, &errorCode); | |
2020 | if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { | |
2021 | log_err("u_strFromUTF8Lenient(source=NULL, sourceLength=0) failed\n"); | |
2022 | } | |
2023 | ||
2024 | dest16[0]=3; | |
2025 | length=3; | |
2026 | errorCode=U_ZERO_ERROR; | |
2027 | u_strFromUTF32(dest16, LENGTHOF(dest16), &length, NULL, 0, &errorCode); | |
2028 | if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { | |
2029 | log_err("u_strFromUTF32(source=NULL, sourceLength=0) failed\n"); | |
2030 | } | |
2031 | ||
2032 | dest16[0]=3; | |
2033 | length=3; | |
2034 | errorCode=U_ZERO_ERROR; | |
2035 | u_strFromUTF32WithSub(dest16, LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode); | |
2036 | if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { | |
2037 | log_err("u_strFromUTF32WithSub(source=NULL, sourceLength=0) failed\n"); | |
2038 | } | |
2039 | ||
2040 | dest16[0]=3; | |
2041 | length=3; | |
2042 | errorCode=U_ZERO_ERROR; | |
2043 | u_strFromJavaModifiedUTF8WithSub(dest16, LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode); | |
2044 | if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { | |
2045 | log_err("u_strFromJavaModifiedUTF8WithSub(source=NULL, sourceLength=0) failed\n"); | |
2046 | } | |
2047 | ||
2048 | /* u_strToXyz() */ | |
2049 | ||
2050 | dest8[0]=3; | |
2051 | length=3; | |
2052 | errorCode=U_ZERO_ERROR; | |
2053 | u_strToUTF8(dest8, LENGTHOF(dest8), &length, NULL, 0, &errorCode); | |
2054 | if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) { | |
2055 | log_err("u_strToUTF8(source=NULL, sourceLength=0) failed\n"); | |
2056 | } | |
2057 | ||
2058 | dest8[0]=3; | |
2059 | length=3; | |
2060 | errorCode=U_ZERO_ERROR; | |
2061 | u_strToUTF8WithSub(dest8, LENGTHOF(dest8), &length, NULL, 0, 0xfffd, NULL, &errorCode); | |
2062 | if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) { | |
2063 | log_err("u_strToUTF8(source=NULL, sourceLength=0) failed\n"); | |
2064 | } | |
2065 | ||
2066 | dest32[0]=3; | |
2067 | length=3; | |
2068 | errorCode=U_ZERO_ERROR; | |
2069 | u_strToUTF32(dest32, LENGTHOF(dest32), &length, NULL, 0, &errorCode); | |
2070 | if(errorCode!=U_ZERO_ERROR || length!=0 || dest32[0]!=0 || dest32[1]!=3) { | |
2071 | log_err("u_strToUTF32(source=NULL, sourceLength=0) failed\n"); | |
2072 | } | |
2073 | ||
2074 | dest32[0]=3; | |
2075 | length=3; | |
2076 | errorCode=U_ZERO_ERROR; | |
2077 | u_strToUTF32WithSub(dest32, LENGTHOF(dest32), &length, NULL, 0, 0xfffd, NULL, &errorCode); | |
2078 | if(errorCode!=U_ZERO_ERROR || length!=0 || dest32[0]!=0 || dest32[1]!=3) { | |
2079 | log_err("u_strToUTF32WithSub(source=NULL, sourceLength=0) failed\n"); | |
2080 | } | |
2081 | ||
2082 | dest8[0]=3; | |
2083 | length=3; | |
2084 | errorCode=U_ZERO_ERROR; | |
2085 | u_strToJavaModifiedUTF8(dest8, LENGTHOF(dest8), &length, NULL, 0, &errorCode); | |
2086 | if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) { | |
2087 | log_err("u_strToJavaModifiedUTF8(source=NULL, sourceLength=0) failed\n"); | |
2088 | } | |
2089 | ||
2090 | #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) | |
2091 | ||
2092 | dest16[0]=3; | |
2093 | length=3; | |
2094 | errorCode=U_ZERO_ERROR; | |
2095 | u_strFromWCS(dest16, LENGTHOF(dest16), &length, NULL, 0, &errorCode); | |
2096 | if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) { | |
2097 | log_err("u_strFromWCS(source=NULL, sourceLength=0) failed\n"); | |
2098 | } | |
2099 | ||
2100 | destW[0]=3; | |
2101 | length=3; | |
2102 | errorCode=U_ZERO_ERROR; | |
2103 | u_strToWCS(destW, LENGTHOF(destW), &length, NULL, 0, &errorCode); | |
2104 | if(errorCode!=U_ZERO_ERROR || length!=0 || destW[0]!=0 || destW[1]!=3) { | |
2105 | log_err("u_strToWCS(source=NULL, sourceLength=0) failed\n"); | |
2106 | } | |
2107 | ||
2108 | #endif | |
2109 | } |