]>
git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/utf8tst.c
1 /********************************************************************
3 * Copyright (c) 1998-2003, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
9 * Modification History:
11 * Date Name Description
12 * 07/24/2000 Madhu Creation
13 *******************************************************************************
16 #include "unicode/utypes.h"
17 #include "unicode/utf8.h"
21 #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
23 static void printUChars(const uint8_t *uchars
, int16_t len
);
25 static void TestCodeUnitValues(void);
26 static void TestCharLength(void);
27 static void TestGetChar(void);
28 static void TestNextPrevChar(void);
29 static void TestFwdBack(void);
30 static void TestSetChar(void);
31 static void TestAppendChar(void);
32 static void TestAppend(void);
34 void addUTF8Test(TestNode
** root
);
37 addUTF8Test(TestNode
** root
)
39 addTest(root
, &TestCodeUnitValues
, "utf8tst/TestCodeUnitValues");
40 addTest(root
, &TestCharLength
, "utf8tst/TestCharLength" );
41 addTest(root
, &TestGetChar
, "utf8tst/TestGetChar" );
42 addTest(root
, &TestNextPrevChar
, "utf8tst/TestNextPrevChar" );
43 addTest(root
, &TestFwdBack
, "utf8tst/TestFwdBack" );
44 addTest(root
, &TestSetChar
, "utf8tst/TestSetChar" );
45 addTest(root
, &TestAppendChar
, "utf8tst/TestAppendChar" );
46 addTest(root
, &TestAppend
, "utf8tst/TestAppend" );
49 static void TestCodeUnitValues()
51 static const uint8_t codeunit
[]={0x00, 0x65, 0x7e, 0x7f, 0xc0, 0xc4, 0xf0, 0xfd, 0x80, 0x81, 0xbc, 0xbe,};
54 for(i
=0; i
<sizeof(codeunit
)/sizeof(codeunit
[0]); i
++){
55 uint8_t c
=codeunit
[i
];
56 log_verbose("Testing code unit value of %x\n", c
);
58 if(!UTF8_IS_SINGLE(c
) || UTF8_IS_LEAD(c
) || UTF8_IS_TRAIL(c
) || !U8_IS_SINGLE(c
) || U8_IS_LEAD(c
) || U8_IS_TRAIL(c
)){
59 log_err("ERROR: 0x%02x is a single byte but results in single: %c lead: %c trail: %c\n",
60 c
, UTF8_IS_SINGLE(c
) ? 'y' : 'n', UTF8_IS_LEAD(c
) ? 'y' : 'n', UTF8_IS_TRAIL(c
) ? 'y' : 'n');
63 if(!UTF8_IS_LEAD(c
) || UTF8_IS_SINGLE(c
) || UTF8_IS_TRAIL(c
) || !U8_IS_LEAD(c
) || U8_IS_SINGLE(c
) || U8_IS_TRAIL(c
)){
64 log_err("ERROR: 0x%02x is a lead byte but results in single: %c lead: %c trail: %c\n",
65 c
, UTF8_IS_SINGLE(c
) ? 'y' : 'n', UTF8_IS_LEAD(c
) ? 'y' : 'n', UTF8_IS_TRAIL(c
) ? 'y' : 'n');
68 if(!UTF8_IS_TRAIL(c
) || UTF8_IS_SINGLE(c
) || UTF8_IS_LEAD(c
) || !U8_IS_TRAIL(c
) || U8_IS_SINGLE(c
) || U8_IS_LEAD(c
)){
69 log_err("ERROR: 0x%02x is a trail byte but results in single: %c lead: %c trail: %c\n",
70 c
, UTF8_IS_SINGLE(c
) ? 'y' : 'n', UTF8_IS_LEAD(c
) ? 'y' : 'n', UTF8_IS_TRAIL(c
) ? 'y' : 'n');
76 static void TestCharLength()
78 static const uint32_t codepoint
[]={
97 for(i
=0; i
<sizeof(codepoint
)/sizeof(codepoint
[0]); i
=(int16_t)(i
+2)){
98 UChar32 c
=codepoint
[i
+1];
99 if(UTF8_CHAR_LENGTH(c
) != (uint16_t)codepoint
[i
] || U8_LENGTH(c
) != (uint16_t)codepoint
[i
]){
100 log_err("The no: of code units for %lx:- Expected: %d Got: %d\n", c
, codepoint
[i
], UTF8_CHAR_LENGTH(c
));
102 log_verbose("The no: of code units for %lx is %d\n",c
, UTF8_CHAR_LENGTH(c
) );
104 multiple
=(UBool
)(codepoint
[i
] == 1 ? FALSE
: TRUE
);
105 if(UTF8_NEED_MULTIPLE_UCHAR(c
) != multiple
){
106 log_err("ERROR: UTF8_NEED_MULTIPLE_UCHAR failed for %lx\n", c
);
111 static void TestGetChar()
113 static const uint8_t input
[]={
130 static const UChar32 result
[]={
131 /*codepoint-unsafe, codepoint-safe(not strict) codepoint-safe(strict)*/
134 0x4e8c, 0x4e8c, 0x4e8c,
135 0x4e8c, 0x4e8c, 0x4e8c ,
136 0x4e8c, 0x4e8c, 0x4e8c,
137 0x10401, 0x10401, 0x10401 ,
138 0x10401, 0x10401, 0x10401 ,
139 0x10401, 0x10401, 0x10401 ,
140 0x10401, 0x10401, 0x10401,
141 0x25, UTF8_ERROR_VALUE_1
, UTF8_ERROR_VALUE_1
,
144 0x31, UTF8_ERROR_VALUE_1
, UTF8_ERROR_VALUE_1
,
145 0x240, UTF8_ERROR_VALUE_1
, UTF8_ERROR_VALUE_1
151 for(offset
=0; offset
<sizeof(input
); offset
++) {
152 if (offset
< sizeof(input
) - 1) {
153 UTF8_GET_CHAR_UNSAFE(input
, offset
, c
);
155 log_err("ERROR: UTF8_GET_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset
, result
[i
], c
);
159 U8_GET_UNSAFE(input
, offset
, c
);
161 log_err("ERROR: U8_GET_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset
, result
[i
], c
);
166 U8_GET(input
, 0, offset
, sizeof(input
), c
);
167 if(UTF_IS_ERROR(result
[i
+1]) ? c
>= 0 : c
!= result
[i
+1]){
168 log_err("ERROR: UTF8_GET_CHAR_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset
, result
[i
+1], c
);
171 UTF8_GET_CHAR_SAFE(input
, 0, offset
, sizeof(input
), c
, FALSE
);
172 if(c
!= result
[i
+1]){
173 log_err("ERROR: UTF8_GET_CHAR_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset
, result
[i
+1], c
);
176 UTF8_GET_CHAR_SAFE(input
, 0, offset
, sizeof(input
), c
, TRUE
);
177 if(c
!= result
[i
+2]){
178 log_err("ERROR: UTF8_GET_CHAR_SAFE(strict) failed for offset=%ld. Expected:%lx Got:%lx\n", offset
, result
[i
+2], c
);
185 static void TestNextPrevChar(){
186 static const uint8_t input
[]={0x61, 0xf0, 0x90, 0x90, 0x81, 0xc0, 0x80, 0xfd, 0xbe, 0xc2, 0x61, 0x81, 0x90, 0x90, 0xf0, 0x00};
187 static const UChar32 result
[]={
188 /*next_unsafe next_safe_ns next_safe_s prev_unsafe prev_safe_ns prev_safe_s*/
189 0x0061, 0x0061, 0x0061, 0x0000, 0x0000, 0x0000,
190 0x10401, 0x10401, 0x10401, 0xf0, UTF8_ERROR_VALUE_1
, UTF8_ERROR_VALUE_1
,
191 0x90, UTF8_ERROR_VALUE_1
, UTF8_ERROR_VALUE_1
, 0x2841410, UTF8_ERROR_VALUE_1
, UTF8_ERROR_VALUE_1
,
192 0x90, UTF8_ERROR_VALUE_1
, UTF8_ERROR_VALUE_1
, 0xa1050, UTF8_ERROR_VALUE_1
, UTF8_ERROR_VALUE_1
,
193 0x81, UTF8_ERROR_VALUE_1
, UTF8_ERROR_VALUE_1
, 0x2841, UTF8_ERROR_VALUE_1
, UTF8_ERROR_VALUE_1
,
194 0x00, UTF8_ERROR_VALUE_2
, UTF8_ERROR_VALUE_2
, 0x61, 0x61, 0x61,
195 0x80, UTF8_ERROR_VALUE_1
, UTF8_ERROR_VALUE_1
, 0xc2, UTF8_ERROR_VALUE_1
, UTF8_ERROR_VALUE_1
,
196 0xfd, UTF8_ERROR_VALUE_2
, UTF8_ERROR_VALUE_2
, 0x77e, UTF8_ERROR_VALUE_2
, UTF8_ERROR_VALUE_2
,
197 0xbe, UTF8_ERROR_VALUE_1
, UTF8_ERROR_VALUE_1
, 0xfd, UTF8_ERROR_VALUE_1
, UTF8_ERROR_VALUE_1
,
198 0xa1, UTF8_ERROR_VALUE_1
, UTF8_ERROR_VALUE_1
, 0x00, UTF8_ERROR_VALUE_2
, UTF8_ERROR_VALUE_2
,
199 0x61, 0x61, 0x61, 0xc0, UTF8_ERROR_VALUE_1
, UTF8_ERROR_VALUE_1
,
200 0x81, UTF8_ERROR_VALUE_1
, UTF8_ERROR_VALUE_1
, 0x10401, 0x10401, 0x10401,
201 0x90, UTF8_ERROR_VALUE_1
, UTF8_ERROR_VALUE_1
, 0x410, UTF_ERROR_VALUE
, UTF_ERROR_VALUE
,
202 0x90, UTF8_ERROR_VALUE_1
, UTF8_ERROR_VALUE_1
, 0x410, UTF8_ERROR_VALUE_2
, UTF8_ERROR_VALUE_2
,
203 0x0840, UTF8_ERROR_VALUE_1
, UTF8_ERROR_VALUE_1
, 0xf0, UTF8_ERROR_VALUE_1
, UTF8_ERROR_VALUE_1
,
204 0x0000, 0x0000, 0x0000, 0x0061, 0x0061, 0x0061
206 static const int32_t movedOffset
[]={
207 /*next_unsafe next_safe_ns next_safe_s prev_unsafe prev_safe_ns prev_safe_s*/
209 5, 5, 5, 14, 14 , 14,
233 for(offset
=0; offset
<sizeof(input
); offset
++){
234 if (offset
< sizeof(input
) - 2) { /* Can't have it go off the end of the array based on input */
236 UTF8_NEXT_CHAR_UNSAFE(input
, setOffset
, c
);
237 if(setOffset
!= movedOffset
[i
]){
238 log_err("ERROR: UTF8_NEXT_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
239 offset
, movedOffset
[i
], setOffset
);
242 log_err("ERROR: UTF8_NEXT_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset
, result
[i
], c
);
246 U8_NEXT_UNSAFE(input
, setOffset
, c
);
247 if(setOffset
!= movedOffset
[i
]){
248 log_err("ERROR: U8_NEXT_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
249 offset
, movedOffset
[i
], setOffset
);
252 log_err("ERROR: U8_NEXT_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset
, result
[i
], c
);
257 UTF8_NEXT_CHAR_SAFE(input
, setOffset
, sizeof(input
), c
, FALSE
);
258 if(setOffset
!= movedOffset
[i
+1]){
259 log_err("ERROR: UTF8_NEXT_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
260 offset
, movedOffset
[i
+1], setOffset
);
262 if(c
!= result
[i
+1]){
263 log_err("ERROR: UTF8_NEXT_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset
, result
[i
+1], c
);
267 U8_NEXT(input
, setOffset
, sizeof(input
), c
);
268 if(setOffset
!= movedOffset
[i
+1]){
269 log_err("ERROR: U8_NEXT failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
270 offset
, movedOffset
[i
+1], setOffset
);
272 if(UTF_IS_ERROR(result
[i
+1]) ? c
>= 0 : c
!= result
[i
+1]){
273 log_err("ERROR: U8_NEXT failed for input=%ld. Expected:%lx Got:%lx\n", offset
, result
[i
+1], c
);
277 UTF8_NEXT_CHAR_SAFE(input
, setOffset
, sizeof(input
), c
, TRUE
);
278 if(setOffset
!= movedOffset
[i
+1]){
279 log_err("ERROR: UTF8_NEXT_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
280 offset
, movedOffset
[i
+2], setOffset
);
282 if(c
!= result
[i
+2]){
283 log_err("ERROR: UTF8_NEXT_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset
, result
[i
+2], c
);
290 for(offset
=sizeof(input
); offset
> 0; --offset
){
292 UTF8_PREV_CHAR_UNSAFE(input
, setOffset
, c
);
293 if(setOffset
!= movedOffset
[i
+3]){
294 log_err("ERROR: UTF8_PREV_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
295 offset
, movedOffset
[i
+3], setOffset
);
297 if(c
!= result
[i
+3]){
298 log_err("ERROR: UTF8_PREV_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset
, result
[i
+3], c
);
302 UTF8_PREV_CHAR_SAFE(input
, 0, setOffset
, c
, FALSE
);
303 if(setOffset
!= movedOffset
[i
+4]){
304 log_err("ERROR: UTF8_PREV_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
305 offset
, movedOffset
[i
+4], setOffset
);
307 if(c
!= result
[i
+4]){
308 log_err("ERROR: UTF8_PREV_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset
, result
[i
+4], c
);
312 U8_PREV(input
, 0, setOffset
, c
);
313 if(setOffset
!= movedOffset
[i
+4]){
314 log_err("ERROR: U8_PREV failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
315 offset
, movedOffset
[i
+4], setOffset
);
317 if(UTF_IS_ERROR(result
[i
+4]) ? c
>= 0 : c
!= result
[i
+4]){
318 log_err("ERROR: U8_PREV failed for input=%ld. Expected:%lx Got:%lx\n", offset
, result
[i
+4], c
);
322 UTF8_PREV_CHAR_SAFE(input
, 0, setOffset
, c
, TRUE
);
323 if(setOffset
!= movedOffset
[i
+5]){
324 log_err("ERROR: UTF8_PREV_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
325 offset
, movedOffset
[i
+5], setOffset
);
327 if(c
!= result
[i
+5]){
328 log_err("ERROR: UTF8_PREV_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset
, result
[i
+5], c
);
335 /* test non-characters */
336 static const uint8_t nonChars
[]={
337 0xef, 0xb7, 0x90, /* U+fdd0 */
338 0xef, 0xbf, 0xbf, /* U+feff */
339 0xf0, 0x9f, 0xbf, 0xbe, /* U+1fffe */
340 0xf0, 0xbf, 0xbf, 0xbf, /* U+3ffff */
341 0xf4, 0x8f, 0xbf, 0xbe /* U+10fffe */
347 for(idx
=0; idx
<(int32_t)sizeof(nonChars
);) {
348 U8_NEXT(nonChars
, idx
, sizeof(nonChars
), ch
);
349 if(!U_IS_UNICODE_NONCHAR(ch
)) {
350 log_err("U8_NEXT(before %d) failed to read a non-character\n", idx
);
353 for(idx
=(int32_t)sizeof(nonChars
); idx
>0;) {
354 U8_PREV(nonChars
, 0, idx
, ch
);
355 if(!U_IS_UNICODE_NONCHAR(ch
)) {
356 log_err("U8_PREV(at %d) failed to read a non-character\n", idx
);
362 static void TestFwdBack(){
363 static const uint8_t input
[]={0x61, 0xF0, 0x90, 0x90, 0x81, 0xff, 0x62, 0xc0, 0x80, 0x7f, 0x8f, 0xc0, 0x63, 0x81, 0x90, 0x90, 0xF0, 0x00};
364 static const uint16_t fwd_unsafe
[] ={1, 5, 6, 7, 9, 10, 11, 13, 14, 15, 16, 20, };
365 static const uint16_t fwd_safe
[] ={1, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
366 static const uint16_t back_unsafe
[]={17, 16, 12, 11, 9, 7, 6, 5, 1, 0};
367 static const uint16_t back_safe
[] ={17, 16, 15, 14, 13, 12, 11, 10, 9, 7, 6, 5, 1, 0};
369 static const uint16_t Nvalue
[]= {0, 1, 2, 3, 1, 2, 1, 5};
370 static const uint16_t fwd_N_unsafe
[] ={0, 1, 6, 10, 11, 14, 15};
371 static const uint16_t fwd_N_safe
[] ={0, 1, 6, 10, 11, 13, 14, 18}; /*safe macro keeps it at the end of the string */
372 static const uint16_t back_N_unsafe
[]={18, 17, 12, 7, 6, 1, 0};
373 static const uint16_t back_N_safe
[] ={18, 17, 15, 12, 11, 9, 7, 0};
376 uint32_t offunsafe
=0, offsafe
=0;
379 while(offunsafe
< sizeof(input
)){
380 UTF8_FWD_1_UNSAFE(input
, offunsafe
);
381 if(offunsafe
!= fwd_unsafe
[i
]){
382 log_err("ERROR: Forward_unsafe offset expected:%d, Got:%d\n", fwd_unsafe
[i
], offunsafe
);
388 while(offunsafe
< sizeof(input
)){
389 U8_FWD_1_UNSAFE(input
, offunsafe
);
390 if(offunsafe
!= fwd_unsafe
[i
]){
391 log_err("ERROR: U8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", fwd_unsafe
[i
], offunsafe
);
397 while(offsafe
< sizeof(input
)){
398 UTF8_FWD_1_SAFE(input
, offsafe
, sizeof(input
));
399 if(offsafe
!= fwd_safe
[i
]){
400 log_err("ERROR: Forward_safe offset expected:%d, Got:%d\n", fwd_safe
[i
], offsafe
);
406 while(offsafe
< sizeof(input
)){
407 U8_FWD_1(input
, offsafe
, sizeof(input
));
408 if(offsafe
!= fwd_safe
[i
]){
409 log_err("ERROR: U8_FWD_1 offset expected:%d, Got:%d\n", fwd_safe
[i
], offsafe
);
414 offunsafe
=sizeof(input
);
416 while(offunsafe
> 0){
417 UTF8_BACK_1_UNSAFE(input
, offunsafe
);
418 if(offunsafe
!= back_unsafe
[i
]){
419 log_err("ERROR: Backward_unsafe offset expected:%d, Got:%d\n", back_unsafe
[i
], offunsafe
);
424 offunsafe
=sizeof(input
);
426 while(offunsafe
> 0){
427 U8_BACK_1_UNSAFE(input
, offunsafe
);
428 if(offunsafe
!= back_unsafe
[i
]){
429 log_err("ERROR: U8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", back_unsafe
[i
], offunsafe
);
435 offsafe
=sizeof(input
);
437 UTF8_BACK_1_SAFE(input
, 0, offsafe
);
438 if(offsafe
!= back_safe
[i
]){
439 log_err("ERROR: Backward_safe offset expected:%d, Got:%d\n", back_unsafe
[i
], offsafe
);
445 offsafe
=sizeof(input
);
447 U8_BACK_1(input
, 0, offsafe
);
448 if(offsafe
!= back_safe
[i
]){
449 log_err("ERROR: U8_BACK_1 offset expected:%d, Got:%d\n", back_unsafe
[i
], offsafe
);
455 for(i
=0; i
<sizeof(Nvalue
)/sizeof(Nvalue
[0])-2; i
++){
456 UTF8_FWD_N_UNSAFE(input
, offunsafe
, Nvalue
[i
]);
457 if(offunsafe
!= fwd_N_unsafe
[i
]){
458 log_err("ERROR: Forward_N_unsafe offset=%d expected:%d, Got:%d\n", i
, fwd_N_unsafe
[i
], offunsafe
);
463 for(i
=0; i
<sizeof(Nvalue
)/sizeof(Nvalue
[0])-2; i
++){
464 U8_FWD_N_UNSAFE(input
, offunsafe
, Nvalue
[i
]);
465 if(offunsafe
!= fwd_N_unsafe
[i
]){
466 log_err("ERROR: U8_FWD_N_UNSAFE offset=%d expected:%d, Got:%d\n", i
, fwd_N_unsafe
[i
], offunsafe
);
471 for(i
=0; i
<sizeof(Nvalue
)/sizeof(Nvalue
[0]); i
++){
472 UTF8_FWD_N_SAFE(input
, offsafe
, sizeof(input
), Nvalue
[i
]);
473 if(offsafe
!= fwd_N_safe
[i
]){
474 log_err("ERROR: Forward_N_safe offset=%d expected:%d, Got:%d\n", i
, fwd_N_safe
[i
], offsafe
);
480 for(i
=0; i
<sizeof(Nvalue
)/sizeof(Nvalue
[0]); i
++){
481 U8_FWD_N(input
, offsafe
, sizeof(input
), Nvalue
[i
]);
482 if(offsafe
!= fwd_N_safe
[i
]){
483 log_err("ERROR: U8_FWD_N offset=%d expected:%d, Got:%d\n", i
, fwd_N_safe
[i
], offsafe
);
488 offunsafe
=sizeof(input
);
489 for(i
=0; i
<sizeof(Nvalue
)/sizeof(Nvalue
[0])-2; i
++){
490 UTF8_BACK_N_UNSAFE(input
, offunsafe
, Nvalue
[i
]);
491 if(offunsafe
!= back_N_unsafe
[i
]){
492 log_err("ERROR: backward_N_unsafe offset=%d expected:%d, Got:%d\n", i
, back_N_unsafe
[i
], offunsafe
);
496 offunsafe
=sizeof(input
);
497 for(i
=0; i
<sizeof(Nvalue
)/sizeof(Nvalue
[0])-2; i
++){
498 U8_BACK_N_UNSAFE(input
, offunsafe
, Nvalue
[i
]);
499 if(offunsafe
!= back_N_unsafe
[i
]){
500 log_err("ERROR: U8_BACK_N_UNSAFE offset=%d expected:%d, Got:%d\n", i
, back_N_unsafe
[i
], offunsafe
);
504 offsafe
=sizeof(input
);
505 for(i
=0; i
<sizeof(Nvalue
)/sizeof(Nvalue
[0]); i
++){
506 UTF8_BACK_N_SAFE(input
, 0, offsafe
, Nvalue
[i
]);
507 if(offsafe
!= back_N_safe
[i
]){
508 log_err("ERROR: backward_N_safe offset=%d expected:%d, Got:%ld\n", i
, back_N_safe
[i
], offsafe
);
512 offsafe
=sizeof(input
);
513 for(i
=0; i
<sizeof(Nvalue
)/sizeof(Nvalue
[0]); i
++){
514 U8_BACK_N(input
, 0, offsafe
, Nvalue
[i
]);
515 if(offsafe
!= back_N_safe
[i
]){
516 log_err("ERROR: U8_BACK_N offset=%d expected:%d, Got:%ld\n", i
, back_N_safe
[i
], offsafe
);
521 static void TestSetChar(){
522 static const uint8_t input
[]
523 = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0xfe, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x80, 0xe0, 0x00 };
524 static const int16_t start_unsafe
[]
525 = {0, 1, 1, 1, 4, 5, 6, 7, 8, 9, 9, 9, 12, 13 };
526 static const int16_t start_safe
[]
527 = {0, 1, 1, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 };
528 static const int16_t limit_unsafe
[]
529 = {0, 1, 4, 4, 4, 5, 6, 7, 9, 9, 10, 10, 10, 15 };
530 static const int16_t limit_safe
[]
531 = {0, 1, 4, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 };
534 int32_t offset
=0, setOffset
=0;
535 for(offset
=0; offset
<(int32_t)sizeof(input
); offset
++){
537 UTF8_SET_CHAR_START_UNSAFE(input
, setOffset
);
538 if(setOffset
!= start_unsafe
[i
]){
539 log_err("ERROR: UTF8_SET_CHAR_START_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset
, start_unsafe
[i
], setOffset
);
543 U8_SET_CP_START_UNSAFE(input
, setOffset
);
544 if(setOffset
!= start_unsafe
[i
]){
545 log_err("ERROR: U8_SET_CP_START_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset
, start_unsafe
[i
], setOffset
);
549 UTF8_SET_CHAR_START_SAFE(input
, 0, setOffset
);
550 if(setOffset
!= start_safe
[i
]){
551 log_err("ERROR: UTF8_SET_CHAR_START_SAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset
, start_safe
[i
], setOffset
);
555 U8_SET_CP_START(input
, 0, setOffset
);
556 if(setOffset
!= start_safe
[i
]){
557 log_err("ERROR: U8_SET_CP_START failed for offset=%ld. Expected:%ld Got:%ld\n", offset
, start_safe
[i
], setOffset
);
560 if (offset
!= 0) { /* Can't have it go off the end of the array */
562 UTF8_SET_CHAR_LIMIT_UNSAFE(input
, setOffset
);
563 if(setOffset
!= limit_unsafe
[i
]){
564 log_err("ERROR: UTF8_SET_CHAR_LIMIT_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset
, limit_unsafe
[i
], setOffset
);
568 U8_SET_CP_LIMIT_UNSAFE(input
, setOffset
);
569 if(setOffset
!= limit_unsafe
[i
]){
570 log_err("ERROR: U8_SET_CP_LIMIT_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset
, limit_unsafe
[i
], setOffset
);
575 UTF8_SET_CHAR_LIMIT_SAFE(input
,0, setOffset
, sizeof(input
));
576 if(setOffset
!= limit_safe
[i
]){
577 log_err("ERROR: UTF8_SET_CHAR_LIMIT_SAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset
, limit_safe
[i
], setOffset
);
581 U8_SET_CP_LIMIT(input
,0, setOffset
, sizeof(input
));
582 if(setOffset
!= limit_safe
[i
]){
583 log_err("ERROR: U8_SET_CP_LIMIT failed for offset=%ld. Expected:%ld Got:%ld\n", offset
, limit_safe
[i
], setOffset
);
590 static void TestAppendChar(){
591 static const uint8_t s
[11]={0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00};
592 static const uint32_t test
[]={
593 /*append-position(unsafe), CHAR to be appended */
608 /*append-position(safe), CHAR to be appended */
612 3, 0xd801, /* illegal for UTF-8 starting with Unicode 3.2 */
624 static const uint16_t movedOffset
[]={
625 /*offset-moved-to(unsafe)*/
626 4, /*for append-pos: 0 , CHAR 0x10401*/
640 /*offset-moved-to(safe)*/
641 4, /*for append-pos: 0, CHAR 0x10401*/
657 static const uint8_t result
[][11]={
659 {0xF0, 0x90, 0x90, 0x81, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
660 {0x61, 0x62, 0x28, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
661 {0x61, 0x62, 0x7f, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
662 {0x61, 0x62, 0x63, 0xed, 0xa0, 0x81, 0x67, 0x68, 0x69, 0x6a, 0x00},
663 {0x61, 0xF0, 0xa0, 0x90, 0x82, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
664 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0xF0, 0x90, 0x90},
666 {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x80, 0x68, 0x69, 0x6a, 0x00},
667 {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x81, 0x68, 0x69, 0x6a, 0x00},
668 {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0xbd, 0x68, 0x69, 0x6a, 0x00},
670 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x80, 0x69, 0x6a, 0x00},
671 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x81, 0x69, 0x6a, 0x00},
672 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0xbf, 0x69, 0x6a, 0x00},
674 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xc3, 0xbe, 0x6a, 0x00},
676 {0xF0, 0x90, 0x90, 0x81, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
677 {0x61, 0x62, 0x28, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
678 {0x61, 0x62, 0x63, 0x7f, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
679 {0x61, 0x62, 0x63, 0xef, 0xbf, 0xbf, 0x67, 0x68, 0x69, 0x6a, 0x00},
680 {0x61, 0xF0, 0xa0, 0x90, 0x82, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
681 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xc2, 0x9f}, /*gets UTF8_ERROR_VALUE_2 which takes 2 bytes 0xc0, 0x9f*/
683 {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x80, 0x68, 0x69, 0x6a, 0x00},
684 {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x81, 0x68, 0x69, 0x6a, 0x00},
685 {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0xbd, 0x68, 0x69, 0x6a, 0x00},
687 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x80, 0x69, 0x6a, 0x00},
688 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x81, 0x69, 0x6a, 0x00},
689 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0xbf, 0x69, 0x6a, 0x00},
691 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xc3, 0xbe, 0x6a, 0x00},
698 uint16_t size
=sizeof(s
)/sizeof(s
[0]);
699 for(i
=0; i
<sizeof(test
)/sizeof(test
[0]); i
=(uint16_t)(i
+2)){
700 uprv_memcpy(str
, s
, size
);
703 UTF8_APPEND_CHAR_UNSAFE(str
, offset
, test
[i
+1]);
704 if(offset
!= movedOffset
[count
]){
705 log_err("ERROR: UTF8_APPEND_CHAR_UNSAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n",
706 count
, movedOffset
[count
], offset
);
709 if(uprv_memcmp(str
, result
[count
], size
) !=0){
710 log_err("ERROR: UTF8_APPEND_CHAR_UNSAFE failed for count=%d. \nExpected:", count
);
711 printUChars(result
[count
], size
);
713 printUChars(str
, size
);
717 UTF8_APPEND_CHAR_SAFE(str
, offset
, size
, test
[i
+1]);
718 if(offset
!= movedOffset
[count
]){
719 log_err("ERROR: UTF8_APPEND_CHAR_SAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n",
720 count
, movedOffset
[count
], offset
);
723 if(uprv_memcmp(str
, result
[count
], size
) !=0){
724 log_err("ERROR: UTF8_APPEND_CHAR_SAFE failed for count=%d. \nExpected:", count
);
725 printUChars(result
[count
], size
);
727 printUChars(str
, size
);
730 /*call the API instead of MACRO
731 uprv_memcpy(str, s, size);
734 if((uint32_t)(c)<=0x7f) {
735 (str)[(offset)++]=(uint8_t)(c);
737 (offset)=utf8_appendCharSafeBody(str, (int32_t)(offset), (int32_t)(size), c);
739 if(offset != movedOffset[count]){
740 log_err("ERROR: utf8_appendCharSafeBody() failed to move the offset correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n",
741 count, movedOffset[count], offset);
744 if(uprv_memcmp(str, result[count], size) !=0){
745 log_err("ERROR: utf8_appendCharSafeBody() failed for count=%d. \nExpected:", count);
746 printUChars(result[count], size);
748 printUChars(str, size);
759 static void TestAppend() {
760 static const UChar32 codePoints
[]={
761 0x61, 0xdf, 0x901, 0x3040,
762 0xac00, 0xd800, 0xdbff, 0xdcde,
763 0xdffd, 0xe000, 0xffff, 0x10000,
764 0x12345, 0xe0021, 0x10ffff, 0x110000,
765 0x234567, 0x7fffffff, -1, -1000,
768 static const uint8_t expectUnsafe
[]={
769 0x61, 0xc3, 0x9f, 0xe0, 0xa4, 0x81, 0xe3, 0x81, 0x80,
770 0xea, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0xed, 0xaf, 0xbf, 0xed, 0xb3, 0x9e,
771 0xed, 0xbf, 0xbd, 0xee, 0x80, 0x80, 0xef, 0xbf, 0xbf, 0xf0, 0x90, 0x80, 0x80,
772 0xf0, 0x92, 0x8d, 0x85, 0xf3, 0xa0, 0x80, 0xa1, 0xf4, 0x8f, 0xbf, 0xbf, /* not 0x110000 */
773 /* none from this line */
776 0x61, 0xc3, 0x9f, 0xe0, 0xa4, 0x81, 0xe3, 0x81, 0x80,
777 0xea, 0xb0, 0x80, /* no surrogates */
778 /* no surrogates */ 0xee, 0x80, 0x80, 0xef, 0xbf, 0xbf, 0xf0, 0x90, 0x80, 0x80,
779 0xf0, 0x92, 0x8d, 0x85, 0xf3, 0xa0, 0x80, 0xa1, 0xf4, 0x8f, 0xbf, 0xbf, /* not 0x110000 */
780 /* none from this line */
787 UBool isError
, expectIsError
, wrongIsError
;
790 for(i
=0; i
<LENGTHOF(codePoints
); ++i
) {
792 if(c
<0 || 0x10ffff<c
) {
793 continue; /* skip non-code points for U8_APPEND_UNSAFE */
796 U8_APPEND_UNSAFE(buffer
, length
, c
);
798 if(length
!=LENGTHOF(expectUnsafe
) || 0!=memcmp(buffer
, expectUnsafe
, length
)) {
799 log_err("U8_APPEND_UNSAFE did not generate the expected output\n");
804 for(i
=0; i
<LENGTHOF(codePoints
); ++i
) {
806 expectIsError
= c
<0 || 0x10ffff<c
|| U_IS_SURROGATE(c
);
809 U8_APPEND(buffer
, length
, LENGTHOF(buffer
), c
, isError
);
810 wrongIsError
|= isError
!=expectIsError
;
813 log_err("U8_APPEND did not set isError correctly\n");
815 if(length
!=LENGTHOF(expectSafe
) || 0!=memcmp(buffer
, expectSafe
, length
)) {
816 log_err("U8_APPEND did not generate the expected output\n");
820 static void printUChars(const uint8_t *uchars
, int16_t len
){
822 for(i
=0; i
<len
; i
++){
823 log_err("0x%02x ", *(uchars
+i
));