]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/utf8tst.c
ICU-6.2.14.tar.gz
[apple/icu.git] / icuSources / test / cintltst / utf8tst.c
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1998-2003, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /*
7 * File test.c
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 07/24/2000 Madhu Creation
13 *******************************************************************************
14 */
15
16 #include "unicode/utypes.h"
17 #include "unicode/utf8.h"
18 #include "cmemory.h"
19 #include "cintltst.h"
20
21 #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
22
23 static void printUChars(const uint8_t *uchars, int16_t len);
24
25 static void TestCodeUnitValues(void);
26 static void TestCharLength(void);
27 static void TestGetChar(void);
28 static void TestNextPrevChar(void);
29 static void TestFwdBack(void);
30 static void TestSetChar(void);
31 static void TestAppendChar(void);
32 static void TestAppend(void);
33
34 void addUTF8Test(TestNode** root);
35
36 void
37 addUTF8Test(TestNode** root)
38 {
39 addTest(root, &TestCodeUnitValues, "utf8tst/TestCodeUnitValues");
40 addTest(root, &TestCharLength, "utf8tst/TestCharLength" );
41 addTest(root, &TestGetChar, "utf8tst/TestGetChar" );
42 addTest(root, &TestNextPrevChar, "utf8tst/TestNextPrevChar" );
43 addTest(root, &TestFwdBack, "utf8tst/TestFwdBack" );
44 addTest(root, &TestSetChar, "utf8tst/TestSetChar" );
45 addTest(root, &TestAppendChar, "utf8tst/TestAppendChar" );
46 addTest(root, &TestAppend, "utf8tst/TestAppend" );
47 }
48
49 static void TestCodeUnitValues()
50 {
51 static const uint8_t codeunit[]={0x00, 0x65, 0x7e, 0x7f, 0xc0, 0xc4, 0xf0, 0xfd, 0x80, 0x81, 0xbc, 0xbe,};
52
53 int16_t i;
54 for(i=0; i<sizeof(codeunit)/sizeof(codeunit[0]); i++){
55 uint8_t c=codeunit[i];
56 log_verbose("Testing code unit value of %x\n", c);
57 if(i<4){
58 if(!UTF8_IS_SINGLE(c) || UTF8_IS_LEAD(c) || UTF8_IS_TRAIL(c) || !U8_IS_SINGLE(c) || U8_IS_LEAD(c) || U8_IS_TRAIL(c)){
59 log_err("ERROR: 0x%02x is a single byte but results in single: %c lead: %c trail: %c\n",
60 c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n');
61 }
62 } else if(i< 8){
63 if(!UTF8_IS_LEAD(c) || UTF8_IS_SINGLE(c) || UTF8_IS_TRAIL(c) || !U8_IS_LEAD(c) || U8_IS_SINGLE(c) || U8_IS_TRAIL(c)){
64 log_err("ERROR: 0x%02x is a lead byte but results in single: %c lead: %c trail: %c\n",
65 c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n');
66 }
67 } else if(i< 12){
68 if(!UTF8_IS_TRAIL(c) || UTF8_IS_SINGLE(c) || UTF8_IS_LEAD(c) || !U8_IS_TRAIL(c) || U8_IS_SINGLE(c) || U8_IS_LEAD(c)){
69 log_err("ERROR: 0x%02x is a trail byte but results in single: %c lead: %c trail: %c\n",
70 c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n');
71 }
72 }
73 }
74 }
75
76 static void TestCharLength()
77 {
78 static const uint32_t codepoint[]={
79 1, 0x0061,
80 1, 0x007f,
81 2, 0x016f,
82 2, 0x07ff,
83 3, 0x0865,
84 3, 0x20ac,
85 4, 0x20402,
86 4, 0x23456,
87 4, 0x24506,
88 4, 0x20402,
89 4, 0x10402,
90 3, 0xd7ff,
91 3, 0xe000,
92
93 };
94
95 int16_t i;
96 UBool multiple;
97 for(i=0; i<sizeof(codepoint)/sizeof(codepoint[0]); i=(int16_t)(i+2)){
98 UChar32 c=codepoint[i+1];
99 if(UTF8_CHAR_LENGTH(c) != (uint16_t)codepoint[i] || U8_LENGTH(c) != (uint16_t)codepoint[i]){
100 log_err("The no: of code units for %lx:- Expected: %d Got: %d\n", c, codepoint[i], UTF8_CHAR_LENGTH(c));
101 }else{
102 log_verbose("The no: of code units for %lx is %d\n",c, UTF8_CHAR_LENGTH(c) );
103 }
104 multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
105 if(UTF8_NEED_MULTIPLE_UCHAR(c) != multiple){
106 log_err("ERROR: UTF8_NEED_MULTIPLE_UCHAR failed for %lx\n", c);
107 }
108 }
109 }
110
111 static void TestGetChar()
112 {
113 static const uint8_t input[]={
114 /* code unit,*/
115 0x61,
116 0x7f,
117 0xe4,
118 0xba,
119 0x8c,
120 0xF0,
121 0x90,
122 0x90,
123 0x81,
124 0xc0,
125 0x65,
126 0x31,
127 0x9a,
128 0xc9
129 };
130 static const UChar32 result[]={
131 /*codepoint-unsafe, codepoint-safe(not strict) codepoint-safe(strict)*/
132 0x61, 0x61, 0x61,
133 0x7f, 0x7f, 0x7f,
134 0x4e8c, 0x4e8c, 0x4e8c,
135 0x4e8c, 0x4e8c, 0x4e8c ,
136 0x4e8c, 0x4e8c, 0x4e8c,
137 0x10401, 0x10401, 0x10401 ,
138 0x10401, 0x10401, 0x10401 ,
139 0x10401, 0x10401, 0x10401 ,
140 0x10401, 0x10401, 0x10401,
141 0x25, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
142 0x65, 0x65, 0x65,
143 0x31, 0x31, 0x31,
144 0x31, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
145 0x240, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1
146 };
147 uint16_t i=0;
148 UChar32 c;
149 uint32_t offset=0;
150
151 for(offset=0; offset<sizeof(input); offset++) {
152 if (offset < sizeof(input) - 1) {
153 UTF8_GET_CHAR_UNSAFE(input, offset, c);
154 if(c != result[i]){
155 log_err("ERROR: UTF8_GET_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
156
157 }
158
159 U8_GET_UNSAFE(input, offset, c);
160 if(c != result[i]){
161 log_err("ERROR: U8_GET_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
162
163 }
164 }
165
166 U8_GET(input, 0, offset, sizeof(input), c);
167 if(UTF_IS_ERROR(result[i+1]) ? c >= 0 : c != result[i+1]){
168 log_err("ERROR: UTF8_GET_CHAR_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
169 }
170
171 UTF8_GET_CHAR_SAFE(input, 0, offset, sizeof(input), c, FALSE);
172 if(c != result[i+1]){
173 log_err("ERROR: UTF8_GET_CHAR_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
174 }
175
176 UTF8_GET_CHAR_SAFE(input, 0, offset, sizeof(input), c, TRUE);
177 if(c != result[i+2]){
178 log_err("ERROR: UTF8_GET_CHAR_SAFE(strict) failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c);
179 }
180
181 i=(uint16_t)(i+3);
182 }
183 }
184
185 static void TestNextPrevChar(){
186 static const uint8_t input[]={0x61, 0xf0, 0x90, 0x90, 0x81, 0xc0, 0x80, 0xfd, 0xbe, 0xc2, 0x61, 0x81, 0x90, 0x90, 0xf0, 0x00};
187 static const UChar32 result[]={
188 /*next_unsafe next_safe_ns next_safe_s prev_unsafe prev_safe_ns prev_safe_s*/
189 0x0061, 0x0061, 0x0061, 0x0000, 0x0000, 0x0000,
190 0x10401, 0x10401, 0x10401, 0xf0, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
191 0x90, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x2841410, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
192 0x90, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0xa1050, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
193 0x81, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x2841, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
194 0x00, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2, 0x61, 0x61, 0x61,
195 0x80, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0xc2, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
196 0xfd, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2, 0x77e, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,
197 0xbe, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0xfd, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
198 0xa1, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x00, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,
199 0x61, 0x61, 0x61, 0xc0, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
200 0x81, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x10401, 0x10401, 0x10401,
201 0x90, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x410, UTF_ERROR_VALUE, UTF_ERROR_VALUE,
202 0x90, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x410, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,
203 0x0840, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0xf0, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
204 0x0000, 0x0000, 0x0000, 0x0061, 0x0061, 0x0061
205 };
206 static const int32_t movedOffset[]={
207 /*next_unsafe next_safe_ns next_safe_s prev_unsafe prev_safe_ns prev_safe_s*/
208 1, 1, 1, 15, 15, 15,
209 5, 5, 5, 14, 14 , 14,
210 3, 3, 3, 9, 13, 13,
211 4, 4, 4, 9, 12, 12,
212 5, 5, 5, 9, 11, 11,
213 7, 7, 7, 10, 10, 10,
214 7, 7, 7, 9, 9, 9,
215 8, 9, 9, 7, 7, 7,
216 9, 9, 9, 7, 7, 7,
217 11, 10, 10, 5, 5, 5,
218 11, 11, 11, 5, 5, 5,
219 12, 12, 12, 1, 1, 1,
220 13, 13, 13, 1, 1, 1,
221 14, 14, 14, 1, 1, 1,
222 14, 15, 15, 1, 1, 1,
223 14, 16, 16, 0, 0, 0,
224
225
226 };
227
228
229 UChar32 c=0x0000;
230 uint32_t i=0;
231 uint32_t offset=0;
232 int32_t setOffset=0;
233 for(offset=0; offset<sizeof(input); offset++){
234 if (offset < sizeof(input) - 2) { /* Can't have it go off the end of the array based on input */
235 setOffset=offset;
236 UTF8_NEXT_CHAR_UNSAFE(input, setOffset, c);
237 if(setOffset != movedOffset[i]){
238 log_err("ERROR: UTF8_NEXT_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
239 offset, movedOffset[i], setOffset);
240 }
241 if(c != result[i]){
242 log_err("ERROR: UTF8_NEXT_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
243 }
244
245 setOffset=offset;
246 U8_NEXT_UNSAFE(input, setOffset, c);
247 if(setOffset != movedOffset[i]){
248 log_err("ERROR: U8_NEXT_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
249 offset, movedOffset[i], setOffset);
250 }
251 if(c != result[i]){
252 log_err("ERROR: U8_NEXT_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
253 }
254 }
255
256 setOffset=offset;
257 UTF8_NEXT_CHAR_SAFE(input, setOffset, sizeof(input), c, FALSE);
258 if(setOffset != movedOffset[i+1]){
259 log_err("ERROR: UTF8_NEXT_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
260 offset, movedOffset[i+1], setOffset);
261 }
262 if(c != result[i+1]){
263 log_err("ERROR: UTF8_NEXT_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
264 }
265
266 setOffset=offset;
267 U8_NEXT(input, setOffset, sizeof(input), c);
268 if(setOffset != movedOffset[i+1]){
269 log_err("ERROR: U8_NEXT failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
270 offset, movedOffset[i+1], setOffset);
271 }
272 if(UTF_IS_ERROR(result[i+1]) ? c >= 0 : c != result[i+1]){
273 log_err("ERROR: U8_NEXT failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
274 }
275
276 setOffset=offset;
277 UTF8_NEXT_CHAR_SAFE(input, setOffset, sizeof(input), c, TRUE);
278 if(setOffset != movedOffset[i+1]){
279 log_err("ERROR: UTF8_NEXT_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
280 offset, movedOffset[i+2], setOffset);
281 }
282 if(c != result[i+2]){
283 log_err("ERROR: UTF8_NEXT_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c);
284 }
285
286 i=i+6;
287 }
288
289 i=0;
290 for(offset=sizeof(input); offset > 0; --offset){
291 setOffset=offset;
292 UTF8_PREV_CHAR_UNSAFE(input, setOffset, c);
293 if(setOffset != movedOffset[i+3]){
294 log_err("ERROR: UTF8_PREV_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
295 offset, movedOffset[i+3], setOffset);
296 }
297 if(c != result[i+3]){
298 log_err("ERROR: UTF8_PREV_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+3], c);
299 }
300
301 setOffset=offset;
302 UTF8_PREV_CHAR_SAFE(input, 0, setOffset, c, FALSE);
303 if(setOffset != movedOffset[i+4]){
304 log_err("ERROR: UTF8_PREV_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
305 offset, movedOffset[i+4], setOffset);
306 }
307 if(c != result[i+4]){
308 log_err("ERROR: UTF8_PREV_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+4], c);
309 }
310
311 setOffset=offset;
312 U8_PREV(input, 0, setOffset, c);
313 if(setOffset != movedOffset[i+4]){
314 log_err("ERROR: U8_PREV failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
315 offset, movedOffset[i+4], setOffset);
316 }
317 if(UTF_IS_ERROR(result[i+4]) ? c >= 0 : c != result[i+4]){
318 log_err("ERROR: U8_PREV failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+4], c);
319 }
320
321 setOffset=offset;
322 UTF8_PREV_CHAR_SAFE(input, 0, setOffset, c, TRUE);
323 if(setOffset != movedOffset[i+5]){
324 log_err("ERROR: UTF8_PREV_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
325 offset, movedOffset[i+5], setOffset);
326 }
327 if(c != result[i+5]){
328 log_err("ERROR: UTF8_PREV_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+5], c);
329 }
330
331 i=i+6;
332 }
333
334 {
335 /* test non-characters */
336 static const uint8_t nonChars[]={
337 0xef, 0xb7, 0x90, /* U+fdd0 */
338 0xef, 0xbf, 0xbf, /* U+feff */
339 0xf0, 0x9f, 0xbf, 0xbe, /* U+1fffe */
340 0xf0, 0xbf, 0xbf, 0xbf, /* U+3ffff */
341 0xf4, 0x8f, 0xbf, 0xbe /* U+10fffe */
342 };
343
344 UChar32 ch;
345 int32_t idx;
346
347 for(idx=0; idx<(int32_t)sizeof(nonChars);) {
348 U8_NEXT(nonChars, idx, sizeof(nonChars), ch);
349 if(!U_IS_UNICODE_NONCHAR(ch)) {
350 log_err("U8_NEXT(before %d) failed to read a non-character\n", idx);
351 }
352 }
353 for(idx=(int32_t)sizeof(nonChars); idx>0;) {
354 U8_PREV(nonChars, 0, idx, ch);
355 if(!U_IS_UNICODE_NONCHAR(ch)) {
356 log_err("U8_PREV(at %d) failed to read a non-character\n", idx);
357 }
358 }
359 }
360 }
361
362 static void TestFwdBack(){
363 static const uint8_t input[]={0x61, 0xF0, 0x90, 0x90, 0x81, 0xff, 0x62, 0xc0, 0x80, 0x7f, 0x8f, 0xc0, 0x63, 0x81, 0x90, 0x90, 0xF0, 0x00};
364 static const uint16_t fwd_unsafe[] ={1, 5, 6, 7, 9, 10, 11, 13, 14, 15, 16, 20, };
365 static const uint16_t fwd_safe[] ={1, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
366 static const uint16_t back_unsafe[]={17, 16, 12, 11, 9, 7, 6, 5, 1, 0};
367 static const uint16_t back_safe[] ={17, 16, 15, 14, 13, 12, 11, 10, 9, 7, 6, 5, 1, 0};
368
369 static const uint16_t Nvalue[]= {0, 1, 2, 3, 1, 2, 1, 5};
370 static const uint16_t fwd_N_unsafe[] ={0, 1, 6, 10, 11, 14, 15};
371 static const uint16_t fwd_N_safe[] ={0, 1, 6, 10, 11, 13, 14, 18}; /*safe macro keeps it at the end of the string */
372 static const uint16_t back_N_unsafe[]={18, 17, 12, 7, 6, 1, 0};
373 static const uint16_t back_N_safe[] ={18, 17, 15, 12, 11, 9, 7, 0};
374
375
376 uint32_t offunsafe=0, offsafe=0;
377
378 uint32_t i=0;
379 while(offunsafe < sizeof(input)){
380 UTF8_FWD_1_UNSAFE(input, offunsafe);
381 if(offunsafe != fwd_unsafe[i]){
382 log_err("ERROR: Forward_unsafe offset expected:%d, Got:%d\n", fwd_unsafe[i], offunsafe);
383 }
384 i++;
385 }
386
387 i=0;
388 while(offunsafe < sizeof(input)){
389 U8_FWD_1_UNSAFE(input, offunsafe);
390 if(offunsafe != fwd_unsafe[i]){
391 log_err("ERROR: U8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", fwd_unsafe[i], offunsafe);
392 }
393 i++;
394 }
395
396 i=0;
397 while(offsafe < sizeof(input)){
398 UTF8_FWD_1_SAFE(input, offsafe, sizeof(input));
399 if(offsafe != fwd_safe[i]){
400 log_err("ERROR: Forward_safe offset expected:%d, Got:%d\n", fwd_safe[i], offsafe);
401 }
402 i++;
403 }
404
405 i=0;
406 while(offsafe < sizeof(input)){
407 U8_FWD_1(input, offsafe, sizeof(input));
408 if(offsafe != fwd_safe[i]){
409 log_err("ERROR: U8_FWD_1 offset expected:%d, Got:%d\n", fwd_safe[i], offsafe);
410 }
411 i++;
412 }
413
414 offunsafe=sizeof(input);
415 i=0;
416 while(offunsafe > 0){
417 UTF8_BACK_1_UNSAFE(input, offunsafe);
418 if(offunsafe != back_unsafe[i]){
419 log_err("ERROR: Backward_unsafe offset expected:%d, Got:%d\n", back_unsafe[i], offunsafe);
420 }
421 i++;
422 }
423
424 offunsafe=sizeof(input);
425 i=0;
426 while(offunsafe > 0){
427 U8_BACK_1_UNSAFE(input, offunsafe);
428 if(offunsafe != back_unsafe[i]){
429 log_err("ERROR: U8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", back_unsafe[i], offunsafe);
430 }
431 i++;
432 }
433
434 i=0;
435 offsafe=sizeof(input);
436 while(offsafe > 0){
437 UTF8_BACK_1_SAFE(input, 0, offsafe);
438 if(offsafe != back_safe[i]){
439 log_err("ERROR: Backward_safe offset expected:%d, Got:%d\n", back_unsafe[i], offsafe);
440 }
441 i++;
442 }
443
444 i=0;
445 offsafe=sizeof(input);
446 while(offsafe > 0){
447 U8_BACK_1(input, 0, offsafe);
448 if(offsafe != back_safe[i]){
449 log_err("ERROR: U8_BACK_1 offset expected:%d, Got:%d\n", back_unsafe[i], offsafe);
450 }
451 i++;
452 }
453
454 offunsafe=0;
455 for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){
456 UTF8_FWD_N_UNSAFE(input, offunsafe, Nvalue[i]);
457 if(offunsafe != fwd_N_unsafe[i]){
458 log_err("ERROR: Forward_N_unsafe offset=%d expected:%d, Got:%d\n", i, fwd_N_unsafe[i], offunsafe);
459 }
460 }
461
462 offunsafe=0;
463 for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){
464 U8_FWD_N_UNSAFE(input, offunsafe, Nvalue[i]);
465 if(offunsafe != fwd_N_unsafe[i]){
466 log_err("ERROR: U8_FWD_N_UNSAFE offset=%d expected:%d, Got:%d\n", i, fwd_N_unsafe[i], offunsafe);
467 }
468 }
469
470 offsafe=0;
471 for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){
472 UTF8_FWD_N_SAFE(input, offsafe, sizeof(input), Nvalue[i]);
473 if(offsafe != fwd_N_safe[i]){
474 log_err("ERROR: Forward_N_safe offset=%d expected:%d, Got:%d\n", i, fwd_N_safe[i], offsafe);
475 }
476
477 }
478
479 offsafe=0;
480 for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){
481 U8_FWD_N(input, offsafe, sizeof(input), Nvalue[i]);
482 if(offsafe != fwd_N_safe[i]){
483 log_err("ERROR: U8_FWD_N offset=%d expected:%d, Got:%d\n", i, fwd_N_safe[i], offsafe);
484 }
485
486 }
487
488 offunsafe=sizeof(input);
489 for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){
490 UTF8_BACK_N_UNSAFE(input, offunsafe, Nvalue[i]);
491 if(offunsafe != back_N_unsafe[i]){
492 log_err("ERROR: backward_N_unsafe offset=%d expected:%d, Got:%d\n", i, back_N_unsafe[i], offunsafe);
493 }
494 }
495
496 offunsafe=sizeof(input);
497 for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){
498 U8_BACK_N_UNSAFE(input, offunsafe, Nvalue[i]);
499 if(offunsafe != back_N_unsafe[i]){
500 log_err("ERROR: U8_BACK_N_UNSAFE offset=%d expected:%d, Got:%d\n", i, back_N_unsafe[i], offunsafe);
501 }
502 }
503
504 offsafe=sizeof(input);
505 for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){
506 UTF8_BACK_N_SAFE(input, 0, offsafe, Nvalue[i]);
507 if(offsafe != back_N_safe[i]){
508 log_err("ERROR: backward_N_safe offset=%d expected:%d, Got:%ld\n", i, back_N_safe[i], offsafe);
509 }
510 }
511
512 offsafe=sizeof(input);
513 for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){
514 U8_BACK_N(input, 0, offsafe, Nvalue[i]);
515 if(offsafe != back_N_safe[i]){
516 log_err("ERROR: U8_BACK_N offset=%d expected:%d, Got:%ld\n", i, back_N_safe[i], offsafe);
517 }
518 }
519 }
520
521 static void TestSetChar(){
522 static const uint8_t input[]
523 = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0xfe, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x80, 0xe0, 0x00 };
524 static const int16_t start_unsafe[]
525 = {0, 1, 1, 1, 4, 5, 6, 7, 8, 9, 9, 9, 12, 13 };
526 static const int16_t start_safe[]
527 = {0, 1, 1, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 };
528 static const int16_t limit_unsafe[]
529 = {0, 1, 4, 4, 4, 5, 6, 7, 9, 9, 10, 10, 10, 15 };
530 static const int16_t limit_safe[]
531 = {0, 1, 4, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 };
532
533 uint32_t i=0;
534 int32_t offset=0, setOffset=0;
535 for(offset=0; offset<(int32_t)sizeof(input); offset++){
536 setOffset=offset;
537 UTF8_SET_CHAR_START_UNSAFE(input, setOffset);
538 if(setOffset != start_unsafe[i]){
539 log_err("ERROR: UTF8_SET_CHAR_START_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset);
540 }
541
542 setOffset=offset;
543 U8_SET_CP_START_UNSAFE(input, setOffset);
544 if(setOffset != start_unsafe[i]){
545 log_err("ERROR: U8_SET_CP_START_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset);
546 }
547
548 setOffset=offset;
549 UTF8_SET_CHAR_START_SAFE(input, 0, setOffset);
550 if(setOffset != start_safe[i]){
551 log_err("ERROR: UTF8_SET_CHAR_START_SAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset);
552 }
553
554 setOffset=offset;
555 U8_SET_CP_START(input, 0, setOffset);
556 if(setOffset != start_safe[i]){
557 log_err("ERROR: U8_SET_CP_START failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset);
558 }
559
560 if (offset != 0) { /* Can't have it go off the end of the array */
561 setOffset=offset;
562 UTF8_SET_CHAR_LIMIT_UNSAFE(input, setOffset);
563 if(setOffset != limit_unsafe[i]){
564 log_err("ERROR: UTF8_SET_CHAR_LIMIT_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_unsafe[i], setOffset);
565 }
566
567 setOffset=offset;
568 U8_SET_CP_LIMIT_UNSAFE(input, setOffset);
569 if(setOffset != limit_unsafe[i]){
570 log_err("ERROR: U8_SET_CP_LIMIT_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_unsafe[i], setOffset);
571 }
572 }
573
574 setOffset=offset;
575 UTF8_SET_CHAR_LIMIT_SAFE(input,0, setOffset, sizeof(input));
576 if(setOffset != limit_safe[i]){
577 log_err("ERROR: UTF8_SET_CHAR_LIMIT_SAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_safe[i], setOffset);
578 }
579
580 setOffset=offset;
581 U8_SET_CP_LIMIT(input,0, setOffset, sizeof(input));
582 if(setOffset != limit_safe[i]){
583 log_err("ERROR: U8_SET_CP_LIMIT failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_safe[i], setOffset);
584 }
585
586 i++;
587 }
588 }
589
590 static void TestAppendChar(){
591 static const uint8_t s[11]={0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00};
592 static const uint32_t test[]={
593 /*append-position(unsafe), CHAR to be appended */
594 0, 0x10401,
595 2, 0x0028,
596 2, 0x007f,
597 3, 0xd801,
598 1, 0x20402,
599 8, 0x10401,
600 5, 0xc0,
601 5, 0xc1,
602 5, 0xfd,
603 6, 0x80,
604 6, 0x81,
605 6, 0xbf,
606 7, 0xfe,
607
608 /*append-position(safe), CHAR to be appended */
609 0, 0x10401,
610 2, 0x0028,
611 3, 0x7f,
612 3, 0xd801, /* illegal for UTF-8 starting with Unicode 3.2 */
613 1, 0x20402,
614 9, 0x10401,
615 5, 0xc0,
616 5, 0xc1,
617 5, 0xfd,
618 6, 0x80,
619 6, 0x81,
620 6, 0xbf,
621 7, 0xfe,
622
623 };
624 static const uint16_t movedOffset[]={
625 /*offset-moved-to(unsafe)*/
626 4, /*for append-pos: 0 , CHAR 0x10401*/
627 3,
628 3,
629 6,
630 5,
631 12,
632 7,
633 7,
634 7,
635 8,
636 8,
637 8,
638 9,
639
640 /*offset-moved-to(safe)*/
641 4, /*for append-pos: 0, CHAR 0x10401*/
642 3,
643 4,
644 6,
645 5,
646 11,
647 7,
648 7,
649 7,
650 8,
651 8,
652 8,
653 9,
654
655 };
656
657 static const uint8_t result[][11]={
658 /*unsafe*/
659 {0xF0, 0x90, 0x90, 0x81, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
660 {0x61, 0x62, 0x28, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
661 {0x61, 0x62, 0x7f, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
662 {0x61, 0x62, 0x63, 0xed, 0xa0, 0x81, 0x67, 0x68, 0x69, 0x6a, 0x00},
663 {0x61, 0xF0, 0xa0, 0x90, 0x82, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
664 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0xF0, 0x90, 0x90},
665
666 {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x80, 0x68, 0x69, 0x6a, 0x00},
667 {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x81, 0x68, 0x69, 0x6a, 0x00},
668 {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0xbd, 0x68, 0x69, 0x6a, 0x00},
669
670 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x80, 0x69, 0x6a, 0x00},
671 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x81, 0x69, 0x6a, 0x00},
672 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0xbf, 0x69, 0x6a, 0x00},
673
674 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xc3, 0xbe, 0x6a, 0x00},
675 /*safe*/
676 {0xF0, 0x90, 0x90, 0x81, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
677 {0x61, 0x62, 0x28, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
678 {0x61, 0x62, 0x63, 0x7f, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
679 {0x61, 0x62, 0x63, 0xef, 0xbf, 0xbf, 0x67, 0x68, 0x69, 0x6a, 0x00},
680 {0x61, 0xF0, 0xa0, 0x90, 0x82, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
681 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xc2, 0x9f}, /*gets UTF8_ERROR_VALUE_2 which takes 2 bytes 0xc0, 0x9f*/
682
683 {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x80, 0x68, 0x69, 0x6a, 0x00},
684 {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x81, 0x68, 0x69, 0x6a, 0x00},
685 {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0xbd, 0x68, 0x69, 0x6a, 0x00},
686
687 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x80, 0x69, 0x6a, 0x00},
688 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x81, 0x69, 0x6a, 0x00},
689 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0xbf, 0x69, 0x6a, 0x00},
690
691 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xc3, 0xbe, 0x6a, 0x00},
692
693 };
694 uint16_t i, count=0;
695 uint8_t str[12];
696 uint32_t offset;
697 /* UChar32 c=0;*/
698 uint16_t size=sizeof(s)/sizeof(s[0]);
699 for(i=0; i<sizeof(test)/sizeof(test[0]); i=(uint16_t)(i+2)){
700 uprv_memcpy(str, s, size);
701 offset=test[i];
702 if(count<13){
703 UTF8_APPEND_CHAR_UNSAFE(str, offset, test[i+1]);
704 if(offset != movedOffset[count]){
705 log_err("ERROR: UTF8_APPEND_CHAR_UNSAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n",
706 count, movedOffset[count], offset);
707
708 }
709 if(uprv_memcmp(str, result[count], size) !=0){
710 log_err("ERROR: UTF8_APPEND_CHAR_UNSAFE failed for count=%d. \nExpected:", count);
711 printUChars(result[count], size);
712 log_err("\nGot: ");
713 printUChars(str, size);
714 log_err("\n");
715 }
716 }else{
717 UTF8_APPEND_CHAR_SAFE(str, offset, size, test[i+1]);
718 if(offset != movedOffset[count]){
719 log_err("ERROR: UTF8_APPEND_CHAR_SAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n",
720 count, movedOffset[count], offset);
721
722 }
723 if(uprv_memcmp(str, result[count], size) !=0){
724 log_err("ERROR: UTF8_APPEND_CHAR_SAFE failed for count=%d. \nExpected:", count);
725 printUChars(result[count], size);
726 log_err("\nGot: ");
727 printUChars(str, size);
728 log_err("\n");
729 }
730 /*call the API instead of MACRO
731 uprv_memcpy(str, s, size);
732 offset=test[i];
733 c=test[i+1];
734 if((uint32_t)(c)<=0x7f) {
735 (str)[(offset)++]=(uint8_t)(c);
736 } else {
737 (offset)=utf8_appendCharSafeBody(str, (int32_t)(offset), (int32_t)(size), c);
738 }
739 if(offset != movedOffset[count]){
740 log_err("ERROR: utf8_appendCharSafeBody() failed to move the offset correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n",
741 count, movedOffset[count], offset);
742
743 }
744 if(uprv_memcmp(str, result[count], size) !=0){
745 log_err("ERROR: utf8_appendCharSafeBody() failed for count=%d. \nExpected:", count);
746 printUChars(result[count], size);
747 printf("\nGot: ");
748 printUChars(str, size);
749 printf("\n");
750 }
751 */
752 }
753 count++;
754 }
755
756
757 }
758
759 static void TestAppend() {
760 static const UChar32 codePoints[]={
761 0x61, 0xdf, 0x901, 0x3040,
762 0xac00, 0xd800, 0xdbff, 0xdcde,
763 0xdffd, 0xe000, 0xffff, 0x10000,
764 0x12345, 0xe0021, 0x10ffff, 0x110000,
765 0x234567, 0x7fffffff, -1, -1000,
766 0, 0x400
767 };
768 static const uint8_t expectUnsafe[]={
769 0x61, 0xc3, 0x9f, 0xe0, 0xa4, 0x81, 0xe3, 0x81, 0x80,
770 0xea, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0xed, 0xaf, 0xbf, 0xed, 0xb3, 0x9e,
771 0xed, 0xbf, 0xbd, 0xee, 0x80, 0x80, 0xef, 0xbf, 0xbf, 0xf0, 0x90, 0x80, 0x80,
772 0xf0, 0x92, 0x8d, 0x85, 0xf3, 0xa0, 0x80, 0xa1, 0xf4, 0x8f, 0xbf, 0xbf, /* not 0x110000 */
773 /* none from this line */
774 0, 0xd0, 0x80
775 }, expectSafe[]={
776 0x61, 0xc3, 0x9f, 0xe0, 0xa4, 0x81, 0xe3, 0x81, 0x80,
777 0xea, 0xb0, 0x80, /* no surrogates */
778 /* no surrogates */ 0xee, 0x80, 0x80, 0xef, 0xbf, 0xbf, 0xf0, 0x90, 0x80, 0x80,
779 0xf0, 0x92, 0x8d, 0x85, 0xf3, 0xa0, 0x80, 0xa1, 0xf4, 0x8f, 0xbf, 0xbf, /* not 0x110000 */
780 /* none from this line */
781 0, 0xd0, 0x80
782 };
783
784 uint8_t buffer[100];
785 UChar32 c;
786 int32_t i, length;
787 UBool isError, expectIsError, wrongIsError;
788
789 length=0;
790 for(i=0; i<LENGTHOF(codePoints); ++i) {
791 c=codePoints[i];
792 if(c<0 || 0x10ffff<c) {
793 continue; /* skip non-code points for U8_APPEND_UNSAFE */
794 }
795
796 U8_APPEND_UNSAFE(buffer, length, c);
797 }
798 if(length!=LENGTHOF(expectUnsafe) || 0!=memcmp(buffer, expectUnsafe, length)) {
799 log_err("U8_APPEND_UNSAFE did not generate the expected output\n");
800 }
801
802 length=0;
803 wrongIsError=FALSE;
804 for(i=0; i<LENGTHOF(codePoints); ++i) {
805 c=codePoints[i];
806 expectIsError= c<0 || 0x10ffff<c || U_IS_SURROGATE(c);
807 isError=FALSE;
808
809 U8_APPEND(buffer, length, LENGTHOF(buffer), c, isError);
810 wrongIsError|= isError!=expectIsError;
811 }
812 if(wrongIsError) {
813 log_err("U8_APPEND did not set isError correctly\n");
814 }
815 if(length!=LENGTHOF(expectSafe) || 0!=memcmp(buffer, expectSafe, length)) {
816 log_err("U8_APPEND did not generate the expected output\n");
817 }
818 }
819
820 static void printUChars(const uint8_t *uchars, int16_t len){
821 int16_t i=0;
822 for(i=0; i<len; i++){
823 log_err("0x%02x ", *(uchars+i));
824 }
825 }