]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/utf8tst.c
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / test / cintltst / utf8tst.c
CommitLineData
b75a7d8f 1/********************************************************************
51004dcb 2 * COPYRIGHT:
b331163b 3 * Copyright (c) 1998-2014, International Business Machines Corporation and
b75a7d8f
A
4 * others. All Rights Reserved.
5 ********************************************************************/
6/*
51004dcb 7* File utf8tst.c
b75a7d8f
A
8*
9* Modification History:
10*
11* Date Name Description
51004dcb 12* 07/24/2000 Madhu Creation
b75a7d8f
A
13*******************************************************************************
14*/
15
16#include "unicode/utypes.h"
17#include "unicode/utf8.h"
18#include "cmemory.h"
19#include "cintltst.h"
20
73c04bcf
A
21/* lenient UTF-8 ------------------------------------------------------------ */
22
23/*
24 * Lenient UTF-8 differs from conformant UTF-8 in that it allows surrogate
25 * code points with their "natural" encoding.
26 * Effectively, this allows a mix of UTF-8 and CESU-8 as well as encodings of
27 * single surrogates.
28 *
29 * This is not conformant with UTF-8.
30 *
31 * Supplementary code points may be encoded as pairs of 3-byte sequences, but
32 * the macros below do not attempt to assemble such pairs.
33 */
34
35#define L8_NEXT(s, i, length, c) { \
36 (c)=(uint8_t)(s)[(i)++]; \
37 if((c)>=0x80) { \
38 if(U8_IS_LEAD(c)) { \
39 (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (int32_t)(length), c, -2); \
40 } else { \
41 (c)=U_SENTINEL; \
42 } \
43 } \
44}
45
46#define L8_PREV(s, start, i, c) { \
47 (c)=(uint8_t)(s)[--(i)]; \
48 if((c)>=0x80) { \
49 if((c)<=0xbf) { \
50 (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -2); \
51 } else { \
52 (c)=U_SENTINEL; \
53 } \
54 } \
55}
56
57/* -------------------------------------------------------------------------- */
58
b75a7d8f
A
59static void printUChars(const uint8_t *uchars, int16_t len);
60
61static void TestCodeUnitValues(void);
62static void TestCharLength(void);
63static void TestGetChar(void);
64static void TestNextPrevChar(void);
51004dcb
A
65static void TestNulTerminated(void);
66static void TestNextPrevNonCharacters(void);
67static void TestNextPrevCharUnsafe(void);
b75a7d8f 68static void TestFwdBack(void);
51004dcb 69static void TestFwdBackUnsafe(void);
b75a7d8f 70static void TestSetChar(void);
51004dcb 71static void TestSetCharUnsafe(void);
b75a7d8f
A
72static void TestAppendChar(void);
73static void TestAppend(void);
73c04bcf 74static void TestSurrogates(void);
b75a7d8f
A
75
76void addUTF8Test(TestNode** root);
77
78void
79addUTF8Test(TestNode** root)
80{
51004dcb
A
81 addTest(root, &TestCodeUnitValues, "utf8tst/TestCodeUnitValues");
82 addTest(root, &TestCharLength, "utf8tst/TestCharLength");
83 addTest(root, &TestGetChar, "utf8tst/TestGetChar");
84 addTest(root, &TestNextPrevChar, "utf8tst/TestNextPrevChar");
85 addTest(root, &TestNulTerminated, "utf8tst/TestNulTerminated");
86 addTest(root, &TestNextPrevNonCharacters, "utf8tst/TestNextPrevNonCharacters");
87 addTest(root, &TestNextPrevCharUnsafe, "utf8tst/TestNextPrevCharUnsafe");
88 addTest(root, &TestFwdBack, "utf8tst/TestFwdBack");
89 addTest(root, &TestFwdBackUnsafe, "utf8tst/TestFwdBackUnsafe");
90 addTest(root, &TestSetChar, "utf8tst/TestSetChar");
91 addTest(root, &TestSetCharUnsafe, "utf8tst/TestSetCharUnsafe");
92 addTest(root, &TestAppendChar, "utf8tst/TestAppendChar");
93 addTest(root, &TestAppend, "utf8tst/TestAppend");
94 addTest(root, &TestSurrogates, "utf8tst/TestSurrogates");
b75a7d8f
A
95}
96
97static void TestCodeUnitValues()
98{
99 static const uint8_t codeunit[]={0x00, 0x65, 0x7e, 0x7f, 0xc0, 0xc4, 0xf0, 0xfd, 0x80, 0x81, 0xbc, 0xbe,};
51004dcb 100
b75a7d8f 101 int16_t i;
b331163b 102 for(i=0; i<UPRV_LENGTHOF(codeunit); i++){
b75a7d8f
A
103 uint8_t c=codeunit[i];
104 log_verbose("Testing code unit value of %x\n", c);
105 if(i<4){
106 if(!UTF8_IS_SINGLE(c) || UTF8_IS_LEAD(c) || UTF8_IS_TRAIL(c) || !U8_IS_SINGLE(c) || U8_IS_LEAD(c) || U8_IS_TRAIL(c)){
107 log_err("ERROR: 0x%02x is a single byte but results in single: %c lead: %c trail: %c\n",
108 c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n');
109 }
110 } else if(i< 8){
111 if(!UTF8_IS_LEAD(c) || UTF8_IS_SINGLE(c) || UTF8_IS_TRAIL(c) || !U8_IS_LEAD(c) || U8_IS_SINGLE(c) || U8_IS_TRAIL(c)){
112 log_err("ERROR: 0x%02x is a lead byte but results in single: %c lead: %c trail: %c\n",
113 c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n');
114 }
115 } else if(i< 12){
116 if(!UTF8_IS_TRAIL(c) || UTF8_IS_SINGLE(c) || UTF8_IS_LEAD(c) || !U8_IS_TRAIL(c) || U8_IS_SINGLE(c) || U8_IS_LEAD(c)){
117 log_err("ERROR: 0x%02x is a trail byte but results in single: %c lead: %c trail: %c\n",
118 c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n');
119 }
120 }
121 }
122}
123
124static void TestCharLength()
125{
126 static const uint32_t codepoint[]={
127 1, 0x0061,
128 1, 0x007f,
129 2, 0x016f,
130 2, 0x07ff,
131 3, 0x0865,
132 3, 0x20ac,
133 4, 0x20402,
134 4, 0x23456,
135 4, 0x24506,
136 4, 0x20402,
137 4, 0x10402,
138 3, 0xd7ff,
139 3, 0xe000,
51004dcb 140
b75a7d8f 141 };
51004dcb 142
b75a7d8f
A
143 int16_t i;
144 UBool multiple;
b331163b 145 for(i=0; i<UPRV_LENGTHOF(codepoint); i=(int16_t)(i+2)){
b75a7d8f
A
146 UChar32 c=codepoint[i+1];
147 if(UTF8_CHAR_LENGTH(c) != (uint16_t)codepoint[i] || U8_LENGTH(c) != (uint16_t)codepoint[i]){
148 log_err("The no: of code units for %lx:- Expected: %d Got: %d\n", c, codepoint[i], UTF8_CHAR_LENGTH(c));
149 }else{
51004dcb 150 log_verbose("The no: of code units for %lx is %d\n",c, UTF8_CHAR_LENGTH(c));
b75a7d8f
A
151 }
152 multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
153 if(UTF8_NEED_MULTIPLE_UCHAR(c) != multiple){
154 log_err("ERROR: UTF8_NEED_MULTIPLE_UCHAR failed for %lx\n", c);
155 }
156 }
157}
158
159static void TestGetChar()
160{
161 static const uint8_t input[]={
162 /* code unit,*/
163 0x61,
164 0x7f,
165 0xe4,
51004dcb 166 0xba,
b75a7d8f 167 0x8c,
51004dcb
A
168 0xF0,
169 0x90,
170 0x90,
b75a7d8f
A
171 0x81,
172 0xc0,
173 0x65,
174 0x31,
175 0x9a,
176 0xc9
177 };
178 static const UChar32 result[]={
51004dcb
A
179 /* codepoint-unsafe, codepoint-safe(not strict) codepoint-safe(strict) */
180 0x61, 0x61, 0x61,
181 0x7f, 0x7f, 0x7f,
b75a7d8f
A
182 0x4e8c, 0x4e8c, 0x4e8c,
183 0x4e8c, 0x4e8c, 0x4e8c ,
184 0x4e8c, 0x4e8c, 0x4e8c,
185 0x10401, 0x10401, 0x10401 ,
186 0x10401, 0x10401, 0x10401 ,
187 0x10401, 0x10401, 0x10401 ,
188 0x10401, 0x10401, 0x10401,
189 0x25, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
51004dcb
A
190 0x65, 0x65, 0x65,
191 0x31, 0x31, 0x31,
b75a7d8f
A
192 0x31, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
193 0x240, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1
194 };
195 uint16_t i=0;
51004dcb 196 UChar32 c, expected;
b75a7d8f
A
197 uint32_t offset=0;
198
199 for(offset=0; offset<sizeof(input); offset++) {
200 if (offset < sizeof(input) - 1) {
201 UTF8_GET_CHAR_UNSAFE(input, offset, c);
202 if(c != result[i]){
203 log_err("ERROR: UTF8_GET_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
51004dcb 204
b75a7d8f
A
205 }
206
207 U8_GET_UNSAFE(input, offset, c);
208 if(c != result[i]){
209 log_err("ERROR: U8_GET_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
51004dcb 210
b75a7d8f
A
211 }
212 }
213
51004dcb
A
214 UTF8_GET_CHAR_SAFE(input, 0, offset, sizeof(input), c, FALSE);
215 expected=result[i+1];
216 if(c != expected){
217 log_err("ERROR: UTF8_GET_CHAR_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
218 }
219
b75a7d8f 220 U8_GET(input, 0, offset, sizeof(input), c);
51004dcb
A
221 if(UTF_IS_ERROR(expected)) { expected=U_SENTINEL; }
222 if(c != expected){
223 log_err("ERROR: U8_GET failed for offset=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
b75a7d8f
A
224 }
225
51004dcb
A
226 U8_GET_OR_FFFD(input, 0, offset, sizeof(input), c);
227 if(expected<0) { expected=0xfffd; }
228 if(c != expected){
229 log_err("ERROR: U8_GET_OR_FFFD failed for offset=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
b75a7d8f
A
230 }
231
232 UTF8_GET_CHAR_SAFE(input, 0, offset, sizeof(input), c, TRUE);
233 if(c != result[i+2]){
234 log_err("ERROR: UTF8_GET_CHAR_SAFE(strict) failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c);
235 }
51004dcb
A
236
237 i=(uint16_t)(i+3);
b75a7d8f
A
238 }
239}
240
51004dcb 241static void TestNextPrevChar() {
b75a7d8f
A
242 static const uint8_t input[]={0x61, 0xf0, 0x90, 0x90, 0x81, 0xc0, 0x80, 0xfd, 0xbe, 0xc2, 0x61, 0x81, 0x90, 0x90, 0xf0, 0x00};
243 static const UChar32 result[]={
51004dcb 244 /* next_unsafe next_safe_ns next_safe_s prev_unsafe prev_safe_ns prev_safe_s */
b75a7d8f
A
245 0x0061, 0x0061, 0x0061, 0x0000, 0x0000, 0x0000,
246 0x10401, 0x10401, 0x10401, 0xf0, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
247 0x90, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x2841410, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
248 0x90, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0xa1050, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
249 0x81, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x2841, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
250 0x00, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2, 0x61, 0x61, 0x61,
251 0x80, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0xc2, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
252 0xfd, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2, 0x77e, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,
253 0xbe, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0xfd, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
254 0xa1, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x00, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,
255 0x61, 0x61, 0x61, 0xc0, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
256 0x81, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x10401, 0x10401, 0x10401,
257 0x90, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x410, UTF_ERROR_VALUE, UTF_ERROR_VALUE,
258 0x90, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x410, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,
259 0x0840, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0xf0, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
260 0x0000, 0x0000, 0x0000, 0x0061, 0x0061, 0x0061
261 };
262 static const int32_t movedOffset[]={
51004dcb 263 /* next_unsafe next_safe_ns next_safe_s prev_unsafe prev_safe_ns prev_safe_s */
b75a7d8f 264 1, 1, 1, 15, 15, 15,
51004dcb
A
265 5, 5, 5, 14, 14 , 14,
266 3, 3, 3, 9, 13, 13,
b75a7d8f 267 4, 4, 4, 9, 12, 12,
51004dcb
A
268 5, 5, 5, 9, 11, 11,
269 7, 7, 7, 10, 10, 10,
270 7, 7, 7, 9, 9, 9,
271 8, 9, 9, 7, 7, 7,
272 9, 9, 9, 7, 7, 7,
273 11, 10, 10, 5, 5, 5,
274 11, 11, 11, 5, 5, 5,
275 12, 12, 12, 1, 1, 1,
276 13, 13, 13, 1, 1, 1,
277 14, 14, 14, 1, 1, 1,
278 14, 15, 15, 1, 1, 1,
279 14, 16, 16, 0, 0, 0,
b75a7d8f 280 };
51004dcb 281 /* TODO: remove unused columns for next_unsafe & prev_unsafe, and adjust the test code */
b75a7d8f 282
51004dcb 283 UChar32 c, expected;
b75a7d8f
A
284 uint32_t i=0;
285 uint32_t offset=0;
286 int32_t setOffset=0;
287 for(offset=0; offset<sizeof(input); offset++){
b75a7d8f
A
288 setOffset=offset;
289 UTF8_NEXT_CHAR_SAFE(input, setOffset, sizeof(input), c, FALSE);
290 if(setOffset != movedOffset[i+1]){
291 log_err("ERROR: UTF8_NEXT_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
292 offset, movedOffset[i+1], setOffset);
293 }
51004dcb
A
294 expected=result[i+1];
295 if(c != expected){
296 log_err("ERROR: UTF8_NEXT_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
297 }
b75a7d8f
A
298
299 setOffset=offset;
300 U8_NEXT(input, setOffset, sizeof(input), c);
301 if(setOffset != movedOffset[i+1]){
302 log_err("ERROR: U8_NEXT failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
303 offset, movedOffset[i+1], setOffset);
304 }
51004dcb
A
305 if(UTF_IS_ERROR(expected)) { expected=U_SENTINEL; }
306 if(c != expected){
307 log_err("ERROR: U8_NEXT failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
308 }
309
310 setOffset=offset;
311 U8_NEXT_OR_FFFD(input, setOffset, sizeof(input), c);
312 if(setOffset != movedOffset[i+1]){
313 log_err("ERROR: U8_NEXT_OR_FFFD failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
314 offset, movedOffset[i+1], setOffset);
315 }
316 if(expected<0) { expected=0xfffd; }
317 if(c != expected){
318 log_err("ERROR: U8_NEXT_OR_FFFD failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
319 }
b75a7d8f
A
320
321 setOffset=offset;
322 UTF8_NEXT_CHAR_SAFE(input, setOffset, sizeof(input), c, TRUE);
323 if(setOffset != movedOffset[i+1]){
324 log_err("ERROR: UTF8_NEXT_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
325 offset, movedOffset[i+2], setOffset);
326 }
327 if(c != result[i+2]){
328 log_err("ERROR: UTF8_NEXT_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c);
329 }
330
331 i=i+6;
332 }
333
334 i=0;
335 for(offset=sizeof(input); offset > 0; --offset){
b75a7d8f
A
336 setOffset=offset;
337 UTF8_PREV_CHAR_SAFE(input, 0, setOffset, c, FALSE);
338 if(setOffset != movedOffset[i+4]){
339 log_err("ERROR: UTF8_PREV_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
340 offset, movedOffset[i+4], setOffset);
341 }
51004dcb
A
342 expected=result[i+4];
343 if(c != expected){
344 log_err("ERROR: UTF8_PREV_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
345 }
b75a7d8f
A
346
347 setOffset=offset;
348 U8_PREV(input, 0, setOffset, c);
349 if(setOffset != movedOffset[i+4]){
350 log_err("ERROR: U8_PREV failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
351 offset, movedOffset[i+4], setOffset);
352 }
51004dcb
A
353 if(UTF_IS_ERROR(expected)) { expected=U_SENTINEL; }
354 if(c != expected){
355 log_err("ERROR: U8_PREV failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
356 }
357
358 setOffset=offset;
359 U8_PREV_OR_FFFD(input, 0, setOffset, c);
360 if(setOffset != movedOffset[i+4]){
361 log_err("ERROR: U8_PREV_OR_FFFD failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
362 offset, movedOffset[i+4], setOffset);
363 }
364 if(expected<0) { expected=0xfffd; }
365 if(c != expected){
366 log_err("ERROR: U8_PREV_OR_FFFD failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
367 }
b75a7d8f
A
368
369 setOffset=offset;
370 UTF8_PREV_CHAR_SAFE(input, 0, setOffset, c, TRUE);
371 if(setOffset != movedOffset[i+5]){
372 log_err("ERROR: UTF8_PREV_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
373 offset, movedOffset[i+5], setOffset);
51004dcb 374 }
b75a7d8f
A
375 if(c != result[i+5]){
376 log_err("ERROR: UTF8_PREV_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+5], c);
377 }
378
379 i=i+6;
380 }
51004dcb 381}
b75a7d8f 382
51004dcb
A
383/* keep this in sync with utf16tst.c's TestNulTerminated() */
384static void TestNulTerminated() {
385 static const uint8_t input[]={
386 /* 0 */ 0x61,
387 /* 1 */ 0xf0, 0x90, 0x90, 0x81,
388 /* 5 */ 0xc0, 0x80,
389 /* 7 */ 0xdf, 0x80,
390 /* 9 */ 0xc2,
391 /* 10 */ 0x62,
392 /* 11 */ 0xfd, 0xbe,
393 /* 13 */ 0xe0, 0xa0, 0x80,
394 /* 16 */ 0xe2, 0x82, 0xac,
395 /* 19 */ 0xf0, 0x90, 0x90,
396 /* 22 */ 0x00
397 /* 23 */
398 };
399 static const UChar32 result[]={
400 0x61,
401 0x10401,
402 U_SENTINEL,
403 0x7c0,
404 U_SENTINEL,
405 0x62,
406 U_SENTINEL,
407 0x800,
408 0x20ac,
409 U_SENTINEL,
410 0
411 };
412
413 UChar32 c, c2, expected;
414 int32_t i0, i=0, j, k, expectedIndex;
415 int32_t cpIndex=0;
416 do {
417 i0=i;
418 U8_NEXT(input, i, -1, c);
419 expected=result[cpIndex];
420 if(c!=expected) {
421 log_err("U8_NEXT(from %d)=U+%04x != U+%04x\n", i0, c, expected);
422 }
423 j=i0;
424 U8_NEXT_OR_FFFD(input, j, -1, c);
425 if(expected<0) { expected=0xfffd; }
426 if(c!=expected) {
427 log_err("U8_NEXT_OR_FFFD(from %d)=U+%04x != U+%04x\n", i0, c, expected);
428 }
429 if(j!=i) {
430 log_err("U8_NEXT_OR_FFFD() moved to index %d but U8_NEXT() moved to %d\n", j, i);
b75a7d8f 431 }
51004dcb
A
432 j=i0;
433 U8_FWD_1(input, j, -1);
434 if(j!=i) {
435 log_err("U8_FWD_1() moved to index %d but U8_NEXT() moved to %d\n", j, i);
436 }
437 ++cpIndex;
438 /*
439 * Move by this many code points from the start.
440 * U8_FWD_N() stops at the end of the string, that is, at the NUL if necessary.
441 */
442 expectedIndex= (c==0) ? i-1 : i;
443 k=0;
444 U8_FWD_N(input, k, -1, cpIndex);
445 if(k!=expectedIndex) {
446 log_err("U8_FWD_N(code points from 0) moved to index %d but expected %d\n", k, expectedIndex);
447 }
448 } while(c!=0);
449
450 i=0;
451 do {
452 j=i0=i;
453 U8_NEXT(input, i, -1, c);
454 do {
455 U8_GET(input, 0, j, -1, c2);
456 if(c2!=c) {
457 log_err("U8_NEXT(from %d)=U+%04x != U+%04x=U8_GET(at %d)\n", i0, c, c2, j);
458 }
459 U8_GET_OR_FFFD(input, 0, j, -1, c2);
460 expected= (c>=0) ? c : 0xfffd;
461 if(c2!=expected) {
462 log_err("U8_NEXT_OR_FFFD(from %d)=U+%04x != U+%04x=U8_GET_OR_FFFD(at %d)\n", i0, expected, c2, j);
463 }
464 /* U8_SET_CP_LIMIT moves from a non-lead byte to the limit of the code point */
465 k=j+1;
466 U8_SET_CP_LIMIT(input, 0, k, -1);
467 if(k!=i) {
468 log_err("U8_NEXT() moved to %d but U8_SET_CP_LIMIT(%d) moved to %d\n", i, j+1, k);
b75a7d8f 469 }
51004dcb
A
470 } while(++j<i);
471 } while(c!=0);
472}
473
474static void TestNextPrevNonCharacters() {
475 /* test non-characters */
476 static const uint8_t nonChars[]={
477 0xef, 0xb7, 0x90, /* U+fdd0 */
478 0xef, 0xbf, 0xbf, /* U+feff */
479 0xf0, 0x9f, 0xbf, 0xbe, /* U+1fffe */
480 0xf0, 0xbf, 0xbf, 0xbf, /* U+3ffff */
481 0xf4, 0x8f, 0xbf, 0xbe /* U+10fffe */
482 };
483
484 UChar32 ch;
485 int32_t idx;
486
487 for(idx=0; idx<(int32_t)sizeof(nonChars);) {
488 U8_NEXT(nonChars, idx, sizeof(nonChars), ch);
489 if(!U_IS_UNICODE_NONCHAR(ch)) {
490 log_err("U8_NEXT(before %d) failed to read a non-character\n", idx);
491 }
492 }
493 for(idx=(int32_t)sizeof(nonChars); idx>0;) {
494 U8_PREV(nonChars, 0, idx, ch);
495 if(!U_IS_UNICODE_NONCHAR(ch)) {
496 log_err("U8_PREV(at %d) failed to read a non-character\n", idx);
b75a7d8f
A
497 }
498 }
499}
500
51004dcb
A
501static void TestNextPrevCharUnsafe() {
502 /*
503 * Use a (mostly) well-formed UTF-8 string and test at code point boundaries.
504 * The behavior of _UNSAFE macros for ill-formed strings is undefined.
505 */
506 static const uint8_t input[]={
507 0x61,
508 0xf0, 0x90, 0x90, 0x81,
509 0xc0, 0x80, /* non-shortest form */
510 0xe2, 0x82, 0xac,
511 0xc2, 0xa1,
512 0xf4, 0x8f, 0xbf, 0xbf,
513 0x00
514 };
515 static const UChar32 codePoints[]={
516 0x61,
517 0x10401,
518 0,
519 0x20ac,
520 0xa1,
521 0x10ffff,
522 0
523 };
524
525 UChar32 c;
526 int32_t i;
527 uint32_t offset;
528 for(i=0, offset=0; offset<sizeof(input); ++i) {
529 UTF8_NEXT_CHAR_UNSAFE(input, offset, c);
530 if(c != codePoints[i]){
531 log_err("ERROR: UTF8_NEXT_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n",
532 offset, codePoints[i], c);
533 }
534 }
535 for(i=0, offset=0; offset<sizeof(input); ++i) {
536 U8_NEXT_UNSAFE(input, offset, c);
537 if(c != codePoints[i]){
538 log_err("ERROR: U8_NEXT_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n",
539 offset, codePoints[i], c);
540 }
541 }
542
b331163b 543 for(i=UPRV_LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){
51004dcb
A
544 UTF8_PREV_CHAR_UNSAFE(input, offset, c);
545 if(c != codePoints[i]){
546 log_err("ERROR: UTF8_PREV_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n",
547 offset, codePoints[i], c);
548 }
549 }
b331163b 550 for(i=UPRV_LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){
51004dcb
A
551 U8_PREV_UNSAFE(input, offset, c);
552 if(c != codePoints[i]){
553 log_err("ERROR: U8_PREV_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n",
554 offset, codePoints[i], c);
555 }
556 }
557}
558
559static void TestFwdBack() {
b75a7d8f 560 static const uint8_t input[]={0x61, 0xF0, 0x90, 0x90, 0x81, 0xff, 0x62, 0xc0, 0x80, 0x7f, 0x8f, 0xc0, 0x63, 0x81, 0x90, 0x90, 0xF0, 0x00};
b75a7d8f 561 static const uint16_t fwd_safe[] ={1, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
b75a7d8f
A
562 static const uint16_t back_safe[] ={17, 16, 15, 14, 13, 12, 11, 10, 9, 7, 6, 5, 1, 0};
563
564 static const uint16_t Nvalue[]= {0, 1, 2, 3, 1, 2, 1, 5};
b75a7d8f 565 static const uint16_t fwd_N_safe[] ={0, 1, 6, 10, 11, 13, 14, 18}; /*safe macro keeps it at the end of the string */
51004dcb 566 static const uint16_t back_N_safe[] ={18, 17, 15, 12, 11, 9, 7, 0};
b75a7d8f 567
51004dcb 568 uint32_t offsafe=0;
b75a7d8f
A
569
570 uint32_t i=0;
b75a7d8f
A
571 while(offsafe < sizeof(input)){
572 UTF8_FWD_1_SAFE(input, offsafe, sizeof(input));
573 if(offsafe != fwd_safe[i]){
574 log_err("ERROR: Forward_safe offset expected:%d, Got:%d\n", fwd_safe[i], offsafe);
575 }
576 i++;
577 }
578
579 i=0;
580 while(offsafe < sizeof(input)){
581 U8_FWD_1(input, offsafe, sizeof(input));
582 if(offsafe != fwd_safe[i]){
583 log_err("ERROR: U8_FWD_1 offset expected:%d, Got:%d\n", fwd_safe[i], offsafe);
584 }
585 i++;
586 }
587
b75a7d8f
A
588 i=0;
589 offsafe=sizeof(input);
590 while(offsafe > 0){
591 UTF8_BACK_1_SAFE(input, 0, offsafe);
592 if(offsafe != back_safe[i]){
51004dcb 593 log_err("ERROR: Backward_safe offset expected:%d, Got:%d\n", back_safe[i], offsafe);
b75a7d8f
A
594 }
595 i++;
596 }
597
598 i=0;
599 offsafe=sizeof(input);
600 while(offsafe > 0){
601 U8_BACK_1(input, 0, offsafe);
602 if(offsafe != back_safe[i]){
51004dcb 603 log_err("ERROR: U8_BACK_1 offset expected:%d, Got:%d\n", back_safe[i], offsafe);
b75a7d8f
A
604 }
605 i++;
606 }
607
b75a7d8f 608 offsafe=0;
b331163b 609 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){
b75a7d8f
A
610 UTF8_FWD_N_SAFE(input, offsafe, sizeof(input), Nvalue[i]);
611 if(offsafe != fwd_N_safe[i]){
612 log_err("ERROR: Forward_N_safe offset=%d expected:%d, Got:%d\n", i, fwd_N_safe[i], offsafe);
613 }
51004dcb 614
b75a7d8f
A
615 }
616
617 offsafe=0;
b331163b 618 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){
b75a7d8f
A
619 U8_FWD_N(input, offsafe, sizeof(input), Nvalue[i]);
620 if(offsafe != fwd_N_safe[i]){
621 log_err("ERROR: U8_FWD_N offset=%d expected:%d, Got:%d\n", i, fwd_N_safe[i], offsafe);
622 }
b75a7d8f 623
b75a7d8f
A
624 }
625
626 offsafe=sizeof(input);
b331163b 627 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){
b75a7d8f
A
628 UTF8_BACK_N_SAFE(input, 0, offsafe, Nvalue[i]);
629 if(offsafe != back_N_safe[i]){
630 log_err("ERROR: backward_N_safe offset=%d expected:%d, Got:%ld\n", i, back_N_safe[i], offsafe);
631 }
632 }
633
634 offsafe=sizeof(input);
b331163b 635 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){
b75a7d8f
A
636 U8_BACK_N(input, 0, offsafe, Nvalue[i]);
637 if(offsafe != back_N_safe[i]){
638 log_err("ERROR: U8_BACK_N offset=%d expected:%d, Got:%ld\n", i, back_N_safe[i], offsafe);
639 }
640 }
641}
642
51004dcb
A
643static void TestFwdBackUnsafe() {
644 /*
645 * Use a (mostly) well-formed UTF-8 string and test at code point boundaries.
646 * The behavior of _UNSAFE macros for ill-formed strings is undefined.
647 */
648 static const uint8_t input[]={
649 0x61,
650 0xf0, 0x90, 0x90, 0x81,
651 0xc0, 0x80, /* non-shortest form */
652 0xe2, 0x82, 0xac,
653 0xc2, 0xa1,
654 0xf4, 0x8f, 0xbf, 0xbf,
655 0x00
656 };
657 static const int8_t boundaries[]={ 0, 1, 5, 7, 10, 12, 16, 17 };
658
659 int32_t offset;
660 int32_t i;
b331163b 661 for(i=1, offset=0; offset<UPRV_LENGTHOF(input); ++i) {
51004dcb
A
662 UTF8_FWD_1_UNSAFE(input, offset);
663 if(offset != boundaries[i]){
664 log_err("ERROR: UTF8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset);
665 }
666 }
b331163b 667 for(i=1, offset=0; offset<UPRV_LENGTHOF(input); ++i) {
51004dcb
A
668 U8_FWD_1_UNSAFE(input, offset);
669 if(offset != boundaries[i]){
670 log_err("ERROR: U8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset);
671 }
672 }
673
b331163b 674 for(i=UPRV_LENGTHOF(boundaries)-2, offset=UPRV_LENGTHOF(input); offset>0; --i) {
51004dcb
A
675 UTF8_BACK_1_UNSAFE(input, offset);
676 if(offset != boundaries[i]){
677 log_err("ERROR: UTF8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset);
678 }
679 }
b331163b 680 for(i=UPRV_LENGTHOF(boundaries)-2, offset=UPRV_LENGTHOF(input); offset>0; --i) {
51004dcb
A
681 U8_BACK_1_UNSAFE(input, offset);
682 if(offset != boundaries[i]){
683 log_err("ERROR: U8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset);
684 }
685 }
686
b331163b 687 for(i=0; i<UPRV_LENGTHOF(boundaries); ++i) {
51004dcb
A
688 offset=0;
689 UTF8_FWD_N_UNSAFE(input, offset, i);
690 if(offset != boundaries[i]) {
691 log_err("ERROR: UTF8_FWD_N_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset);
692 }
693 }
b331163b 694 for(i=0; i<UPRV_LENGTHOF(boundaries); ++i) {
51004dcb
A
695 offset=0;
696 U8_FWD_N_UNSAFE(input, offset, i);
697 if(offset != boundaries[i]) {
698 log_err("ERROR: U8_FWD_N_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset);
699 }
700 }
701
b331163b
A
702 for(i=0; i<UPRV_LENGTHOF(boundaries); ++i) {
703 int32_t j=UPRV_LENGTHOF(boundaries)-1-i;
704 offset=UPRV_LENGTHOF(input);
51004dcb
A
705 UTF8_BACK_N_UNSAFE(input, offset, i);
706 if(offset != boundaries[j]) {
707 log_err("ERROR: UTF8_BACK_N_UNSAFE offset expected:%d, Got:%d\n", boundaries[j], offset);
708 }
709 }
b331163b
A
710 for(i=0; i<UPRV_LENGTHOF(boundaries); ++i) {
711 int32_t j=UPRV_LENGTHOF(boundaries)-1-i;
712 offset=UPRV_LENGTHOF(input);
51004dcb
A
713 U8_BACK_N_UNSAFE(input, offset, i);
714 if(offset != boundaries[j]) {
715 log_err("ERROR: U8_BACK_N_UNSAFE offset expected:%d, Got:%d\n", boundaries[j], offset);
716 }
717 }
718}
719
720static void TestSetChar() {
b75a7d8f
A
721 static const uint8_t input[]
722 = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0xfe, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x80, 0xe0, 0x00 };
b75a7d8f 723 static const int16_t start_safe[]
51004dcb 724 = {0, 1, 1, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
b75a7d8f 725 static const int16_t limit_safe[]
51004dcb
A
726 = {0, 1, 4, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
727
b75a7d8f
A
728 uint32_t i=0;
729 int32_t offset=0, setOffset=0;
b331163b
A
730 for(offset=0; offset<=UPRV_LENGTHOF(input); offset++){
731 if (offset<UPRV_LENGTHOF(input)){
51004dcb
A
732 setOffset=offset;
733 UTF8_SET_CHAR_START_SAFE(input, 0, setOffset);
734 if(setOffset != start_safe[i]){
735 log_err("ERROR: UTF8_SET_CHAR_START_SAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset);
736 }
b75a7d8f 737
51004dcb
A
738 setOffset=offset;
739 U8_SET_CP_START(input, 0, setOffset);
740 if(setOffset != start_safe[i]){
741 log_err("ERROR: U8_SET_CP_START failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset);
742 }
743 }
b75a7d8f 744
51004dcb
A
745 setOffset=offset;
746 UTF8_SET_CHAR_LIMIT_SAFE(input,0, setOffset, sizeof(input));
747 if(setOffset != limit_safe[i]){
748 log_err("ERROR: UTF8_SET_CHAR_LIMIT_SAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_safe[i], setOffset);
749 }
b75a7d8f 750
51004dcb
A
751 setOffset=offset;
752 U8_SET_CP_LIMIT(input,0, setOffset, sizeof(input));
753 if(setOffset != limit_safe[i]){
754 log_err("ERROR: U8_SET_CP_LIMIT failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_safe[i], setOffset);
755 }
b75a7d8f 756
51004dcb
A
757 i++;
758 }
759}
b75a7d8f 760
51004dcb
A
761static void TestSetCharUnsafe() {
762 static const uint8_t input[]
763 = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0x2e, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x80, 0xe0, 0x80, 0x80, 0x00 };
764 static const int16_t start_unsafe[]
765 = {0, 1, 1, 1, 4, 5, 6, 7, 8, 9, 9, 9, 12, 12, 12, 15 };
766 static const int16_t limit_unsafe[]
767 = {0, 1, 4, 4, 4, 5, 6, 7, 9, 9, 10, 10, 10, 15, 15, 15, 16 };
b75a7d8f 768
51004dcb
A
769 uint32_t i=0;
770 int32_t offset=0, setOffset=0;
b331163b
A
771 for(offset=0; offset<=UPRV_LENGTHOF(input); offset++){
772 if (offset<UPRV_LENGTHOF(input)){
51004dcb
A
773 setOffset=offset;
774 UTF8_SET_CHAR_START_UNSAFE(input, setOffset);
775 if(setOffset != start_unsafe[i]){
776 log_err("ERROR: UTF8_SET_CHAR_START_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset);
777 }
778
779 setOffset=offset;
780 U8_SET_CP_START_UNSAFE(input, setOffset);
781 if(setOffset != start_unsafe[i]){
782 log_err("ERROR: U8_SET_CP_START_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset);
783 }
784 }
b75a7d8f 785
51004dcb
A
786 if (offset != 0) { /* Can't have it go off the end of the array */
787 setOffset=offset;
788 UTF8_SET_CHAR_LIMIT_UNSAFE(input, setOffset);
789 if(setOffset != limit_unsafe[i]){
790 log_err("ERROR: UTF8_SET_CHAR_LIMIT_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_unsafe[i], setOffset);
791 }
792
793 setOffset=offset;
794 U8_SET_CP_LIMIT_UNSAFE(input, setOffset);
795 if(setOffset != limit_unsafe[i]){
796 log_err("ERROR: U8_SET_CP_LIMIT_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_unsafe[i], setOffset);
797 }
798 }
799
800 i++;
b75a7d8f
A
801 }
802}
803
804static void TestAppendChar(){
805 static const uint8_t s[11]={0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00};
806 static const uint32_t test[]={
51004dcb 807 /* append-position(unsafe), CHAR to be appended */
b75a7d8f
A
808 0, 0x10401,
809 2, 0x0028,
51004dcb 810 2, 0x007f,
b75a7d8f
A
811 3, 0xd801,
812 1, 0x20402,
813 8, 0x10401,
814 5, 0xc0,
815 5, 0xc1,
816 5, 0xfd,
817 6, 0x80,
818 6, 0x81,
819 6, 0xbf,
820 7, 0xfe,
821
51004dcb 822 /* append-position(safe), CHAR to be appended */
b75a7d8f 823 0, 0x10401,
51004dcb 824 2, 0x0028,
b75a7d8f
A
825 3, 0x7f,
826 3, 0xd801, /* illegal for UTF-8 starting with Unicode 3.2 */
827 1, 0x20402,
828 9, 0x10401,
829 5, 0xc0,
830 5, 0xc1,
831 5, 0xfd,
832 6, 0x80,
833 6, 0x81,
834 6, 0xbf,
835 7, 0xfe,
51004dcb 836
b75a7d8f
A
837 };
838 static const uint16_t movedOffset[]={
51004dcb 839 /* offset-moved-to(unsafe) */
b75a7d8f 840 4, /*for append-pos: 0 , CHAR 0x10401*/
51004dcb 841 3,
b75a7d8f
A
842 3,
843 6,
844 5,
845 12,
846 7,
51004dcb 847 7,
b75a7d8f
A
848 7,
849 8,
850 8,
851 8,
852 9,
853
51004dcb 854 /* offset-moved-to(safe) */
b75a7d8f
A
855 4, /*for append-pos: 0, CHAR 0x10401*/
856 3,
857 4,
858 6,
859 5,
860 11,
861 7,
51004dcb 862 7,
b75a7d8f
A
863 7,
864 8,
865 8,
866 8,
867 9,
51004dcb 868
b75a7d8f 869 };
51004dcb 870
b75a7d8f
A
871 static const uint8_t result[][11]={
872 /*unsafe*/
51004dcb
A
873 {0xF0, 0x90, 0x90, 0x81, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
874 {0x61, 0x62, 0x28, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
875 {0x61, 0x62, 0x7f, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
876 {0x61, 0x62, 0x63, 0xed, 0xa0, 0x81, 0x67, 0x68, 0x69, 0x6a, 0x00},
877 {0x61, 0xF0, 0xa0, 0x90, 0x82, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
b75a7d8f 878 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0xF0, 0x90, 0x90},
51004dcb 879
b75a7d8f
A
880 {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x80, 0x68, 0x69, 0x6a, 0x00},
881 {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x81, 0x68, 0x69, 0x6a, 0x00},
882 {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0xbd, 0x68, 0x69, 0x6a, 0x00},
51004dcb 883
b75a7d8f
A
884 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x80, 0x69, 0x6a, 0x00},
885 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x81, 0x69, 0x6a, 0x00},
886 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0xbf, 0x69, 0x6a, 0x00},
887
888 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xc3, 0xbe, 0x6a, 0x00},
889 /*safe*/
51004dcb
A
890 {0xF0, 0x90, 0x90, 0x81, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
891 {0x61, 0x62, 0x28, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
b75a7d8f 892 {0x61, 0x62, 0x63, 0x7f, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
51004dcb
A
893 {0x61, 0x62, 0x63, 0xef, 0xbf, 0xbf, 0x67, 0x68, 0x69, 0x6a, 0x00},
894 {0x61, 0xF0, 0xa0, 0x90, 0x82, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
b75a7d8f 895 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xc2, 0x9f}, /*gets UTF8_ERROR_VALUE_2 which takes 2 bytes 0xc0, 0x9f*/
51004dcb 896
b75a7d8f
A
897 {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x80, 0x68, 0x69, 0x6a, 0x00},
898 {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x81, 0x68, 0x69, 0x6a, 0x00},
899 {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0xbd, 0x68, 0x69, 0x6a, 0x00},
51004dcb 900
b75a7d8f
A
901 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x80, 0x69, 0x6a, 0x00},
902 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x81, 0x69, 0x6a, 0x00},
903 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0xbf, 0x69, 0x6a, 0x00},
51004dcb 904
b75a7d8f 905 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xc3, 0xbe, 0x6a, 0x00},
51004dcb 906
b75a7d8f
A
907 };
908 uint16_t i, count=0;
909 uint8_t str[12];
910 uint32_t offset;
911/* UChar32 c=0;*/
b331163b
A
912 uint16_t size=UPRV_LENGTHOF(s);
913 for(i=0; i<UPRV_LENGTHOF(test); i=(uint16_t)(i+2)){
b75a7d8f 914 uprv_memcpy(str, s, size);
51004dcb 915 offset=test[i];
b75a7d8f
A
916 if(count<13){
917 UTF8_APPEND_CHAR_UNSAFE(str, offset, test[i+1]);
918 if(offset != movedOffset[count]){
51004dcb 919 log_err("ERROR: UTF8_APPEND_CHAR_UNSAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n",
b75a7d8f 920 count, movedOffset[count], offset);
51004dcb 921
b75a7d8f
A
922 }
923 if(uprv_memcmp(str, result[count], size) !=0){
924 log_err("ERROR: UTF8_APPEND_CHAR_UNSAFE failed for count=%d. \nExpected:", count);
925 printUChars(result[count], size);
926 log_err("\nGot: ");
927 printUChars(str, size);
928 log_err("\n");
929 }
930 }else{
931 UTF8_APPEND_CHAR_SAFE(str, offset, size, test[i+1]);
932 if(offset != movedOffset[count]){
51004dcb 933 log_err("ERROR: UTF8_APPEND_CHAR_SAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n",
b75a7d8f 934 count, movedOffset[count], offset);
51004dcb 935
b75a7d8f
A
936 }
937 if(uprv_memcmp(str, result[count], size) !=0){
938 log_err("ERROR: UTF8_APPEND_CHAR_SAFE failed for count=%d. \nExpected:", count);
939 printUChars(result[count], size);
940 log_err("\nGot: ");
941 printUChars(str, size);
942 log_err("\n");
943 }
944 /*call the API instead of MACRO
945 uprv_memcpy(str, s, size);
51004dcb 946 offset=test[i];
b75a7d8f 947 c=test[i+1];
51004dcb
A
948 if((uint32_t)(c)<=0x7f) {
949 (str)[(offset)++]=(uint8_t)(c);
950 } else {
951 (offset)=utf8_appendCharSafeBody(str, (int32_t)(offset), (int32_t)(size), c);
b75a7d8f
A
952 }
953 if(offset != movedOffset[count]){
51004dcb 954 log_err("ERROR: utf8_appendCharSafeBody() failed to move the offset correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n",
b75a7d8f 955 count, movedOffset[count], offset);
51004dcb 956
b75a7d8f
A
957 }
958 if(uprv_memcmp(str, result[count], size) !=0){
959 log_err("ERROR: utf8_appendCharSafeBody() failed for count=%d. \nExpected:", count);
960 printUChars(result[count], size);
961 printf("\nGot: ");
962 printUChars(str, size);
963 printf("\n");
964 }
965 */
966 }
967 count++;
51004dcb
A
968 }
969
b75a7d8f
A
970
971}
972
973static void TestAppend() {
974 static const UChar32 codePoints[]={
975 0x61, 0xdf, 0x901, 0x3040,
976 0xac00, 0xd800, 0xdbff, 0xdcde,
977 0xdffd, 0xe000, 0xffff, 0x10000,
978 0x12345, 0xe0021, 0x10ffff, 0x110000,
979 0x234567, 0x7fffffff, -1, -1000,
980 0, 0x400
981 };
982 static const uint8_t expectUnsafe[]={
983 0x61, 0xc3, 0x9f, 0xe0, 0xa4, 0x81, 0xe3, 0x81, 0x80,
984 0xea, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0xed, 0xaf, 0xbf, 0xed, 0xb3, 0x9e,
985 0xed, 0xbf, 0xbd, 0xee, 0x80, 0x80, 0xef, 0xbf, 0xbf, 0xf0, 0x90, 0x80, 0x80,
986 0xf0, 0x92, 0x8d, 0x85, 0xf3, 0xa0, 0x80, 0xa1, 0xf4, 0x8f, 0xbf, 0xbf, /* not 0x110000 */
987 /* none from this line */
988 0, 0xd0, 0x80
989 }, expectSafe[]={
990 0x61, 0xc3, 0x9f, 0xe0, 0xa4, 0x81, 0xe3, 0x81, 0x80,
991 0xea, 0xb0, 0x80, /* no surrogates */
992 /* no surrogates */ 0xee, 0x80, 0x80, 0xef, 0xbf, 0xbf, 0xf0, 0x90, 0x80, 0x80,
993 0xf0, 0x92, 0x8d, 0x85, 0xf3, 0xa0, 0x80, 0xa1, 0xf4, 0x8f, 0xbf, 0xbf, /* not 0x110000 */
994 /* none from this line */
995 0, 0xd0, 0x80
996 };
997
998 uint8_t buffer[100];
999 UChar32 c;
1000 int32_t i, length;
1001 UBool isError, expectIsError, wrongIsError;
1002
1003 length=0;
b331163b 1004 for(i=0; i<UPRV_LENGTHOF(codePoints); ++i) {
b75a7d8f
A
1005 c=codePoints[i];
1006 if(c<0 || 0x10ffff<c) {
1007 continue; /* skip non-code points for U8_APPEND_UNSAFE */
1008 }
1009
1010 U8_APPEND_UNSAFE(buffer, length, c);
1011 }
b331163b 1012 if(length!=UPRV_LENGTHOF(expectUnsafe) || 0!=memcmp(buffer, expectUnsafe, length)) {
b75a7d8f
A
1013 log_err("U8_APPEND_UNSAFE did not generate the expected output\n");
1014 }
1015
1016 length=0;
1017 wrongIsError=FALSE;
b331163b 1018 for(i=0; i<UPRV_LENGTHOF(codePoints); ++i) {
b75a7d8f
A
1019 c=codePoints[i];
1020 expectIsError= c<0 || 0x10ffff<c || U_IS_SURROGATE(c);
1021 isError=FALSE;
1022
b331163b 1023 U8_APPEND(buffer, length, UPRV_LENGTHOF(buffer), c, isError);
b75a7d8f
A
1024 wrongIsError|= isError!=expectIsError;
1025 }
1026 if(wrongIsError) {
1027 log_err("U8_APPEND did not set isError correctly\n");
1028 }
b331163b 1029 if(length!=UPRV_LENGTHOF(expectSafe) || 0!=memcmp(buffer, expectSafe, length)) {
b75a7d8f
A
1030 log_err("U8_APPEND did not generate the expected output\n");
1031 }
1032}
1033
73c04bcf
A
1034static void
1035TestSurrogates() {
1036 static const uint8_t b[]={
1037 0xc3, 0x9f, /* 00DF */
1038 0xed, 0x9f, 0xbf, /* D7FF */
1039 0xed, 0xa0, 0x81, /* D801 */
1040 0xed, 0xbf, 0xbe, /* DFFE */
1041 0xee, 0x80, 0x80, /* E000 */
1042 0xf0, 0x97, 0xbf, 0xbe /* 17FFE */
1043 };
1044 static const UChar32 cp[]={
1045 0xdf, 0xd7ff, 0xd801, 0xdffe, 0xe000, 0x17ffe
1046 };
1047
1048 UChar32 cu, cs, cl;
1049 int32_t i, j, k, iu, is, il, length;
1050
1051 k=0; /* index into cp[] */
b331163b 1052 length=UPRV_LENGTHOF(b);
73c04bcf
A
1053 for(i=0; i<length;) {
1054 j=i;
1055 U8_NEXT_UNSAFE(b, j, cu);
1056 iu=j;
1057
1058 j=i;
1059 U8_NEXT(b, j, length, cs);
1060 is=j;
1061
1062 j=i;
1063 L8_NEXT(b, j, length, cl);
1064 il=j;
1065
1066 if(cu!=cp[k]) {
1067 log_err("U8_NEXT_UNSAFE(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cu, (long)cp[k]);
1068 }
1069
1070 /* U8_NEXT() returns <0 for surrogate code points */
1071 if(U_IS_SURROGATE(cu) ? cs>=0 : cs!=cu) {
1072 log_err("U8_NEXT(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cs, (long)cu);
1073 }
1074
1075 /* L8_NEXT() returns surrogate code points like U8_NEXT_UNSAFE() */
1076 if(cl!=cu) {
1077 log_err("L8_NEXT(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cl, (long)cu);
1078 }
1079
1080 if(is!=iu || il!=iu) {
1081 log_err("U8_NEXT(b[%ld]) or L8_NEXT(b[%ld]) did not advance the index correctly\n", (long)i, (long)i);
1082 }
1083
1084 ++k; /* next code point */
1085 i=iu; /* advance by one UTF-8 sequence */
1086 }
1087
1088 while(i>0) {
1089 --k; /* previous code point */
1090
1091 j=i;
1092 U8_PREV_UNSAFE(b, j, cu);
1093 iu=j;
1094
1095 j=i;
1096 U8_PREV(b, 0, j, cs);
1097 is=j;
1098
1099 j=i;
1100 L8_PREV(b, 0, j, cl);
1101 il=j;
1102
1103 if(cu!=cp[k]) {
1104 log_err("U8_PREV_UNSAFE(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cu, (long)cp[k]);
1105 }
1106
1107 /* U8_PREV() returns <0 for surrogate code points */
1108 if(U_IS_SURROGATE(cu) ? cs>=0 : cs!=cu) {
1109 log_err("U8_PREV(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cs, (long)cu);
1110 }
1111
1112 /* L8_PREV() returns surrogate code points like U8_PREV_UNSAFE() */
1113 if(cl!=cu) {
1114 log_err("L8_PREV(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cl, (long)cu);
1115 }
1116
1117 if(is!=iu || il !=iu) {
1118 log_err("U8_PREV(b[%ld]) or L8_PREV(b[%ld]) did not advance the index correctly\n", (long)i, (long)i);
1119 }
1120
1121 i=iu; /* go back by one UTF-8 sequence */
1122 }
1123}
1124
b75a7d8f
A
1125static void printUChars(const uint8_t *uchars, int16_t len){
1126 int16_t i=0;
1127 for(i=0; i<len; i++){
1128 log_err("0x%02x ", *(uchars+i));
1129 }
1130}