icuSources/test/cintltst/utf8tst.c

   1 /********************************************************************
   2  * COPYRIGHT:
   3  * Copyright (c) 1998-2006, International Business Machines Corporation and
   4  * others. All Rights Reserved.
   5  ********************************************************************/
   6 /*
   7 * File test.c
   8 *
   9 * Modification History:
  10 *
  11 *   Date          Name        Description
  12 *   07/24/2000    Madhu       Creation
  13 *******************************************************************************
  14 */
  15
  16 #include "unicode/utypes.h"
  17 #include "unicode/utf8.h"
  18 #include "cmemory.h"
  19 #include "cintltst.h"
  20
  21 #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
  22
  23 /* lenient UTF-8 ------------------------------------------------------------ */
  24
  25 /*
  26  * Lenient UTF-8 differs from conformant UTF-8 in that it allows surrogate
  27  * code points with their "natural" encoding.
  28  * Effectively, this allows a mix of UTF-8 and CESU-8 as well as encodings of
  29  * single surrogates.
  30  *
  31  * This is not conformant with UTF-8.
  32  *
  33  * Supplementary code points may be encoded as pairs of 3-byte sequences, but
  34  * the macros below do not attempt to assemble such pairs.
  35  */
  36
  37 #define L8_NEXT(s, i, length, c) { \
  38     (c)=(uint8_t)(s)[(i)++]; \
  39     if((c)>=0x80) { \
  40         if(U8_IS_LEAD(c)) { \
  41             (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (int32_t)(length), c, -2); \
  42         } else { \
  43             (c)=U_SENTINEL; \
  44         } \
  45     } \
  46 }
  47
  48 #define L8_PREV(s, start, i, c) { \
  49     (c)=(uint8_t)(s)[--(i)]; \
  50     if((c)>=0x80) { \
  51         if((c)<=0xbf) { \
  52             (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -2); \
  53         } else { \
  54             (c)=U_SENTINEL; \
  55         } \
  56     } \
  57 }
  58
  59 /* -------------------------------------------------------------------------- */
  60
  61 static void printUChars(const uint8_t *uchars, int16_t len);
  62
  63 static void TestCodeUnitValues(void);
  64 static void TestCharLength(void);
  65 static void TestGetChar(void);
  66 static void TestNextPrevChar(void);
  67 static void TestFwdBack(void);
  68 static void TestSetChar(void);
  69 static void TestAppendChar(void);
  70 static void TestAppend(void);
  71 static void TestSurrogates(void);
  72
  73 void addUTF8Test(TestNode** root);
  74
  75 void
  76 addUTF8Test(TestNode** root)
  77 {
  78   addTest(root, &TestCodeUnitValues,    "utf8tst/TestCodeUnitValues");
  79   addTest(root, &TestCharLength,        "utf8tst/TestCharLength"    );
  80   addTest(root, &TestGetChar,           "utf8tst/TestGetChar"       );
  81   addTest(root, &TestNextPrevChar,      "utf8tst/TestNextPrevChar"  );
  82   addTest(root, &TestFwdBack,           "utf8tst/TestFwdBack"       );
  83   addTest(root, &TestSetChar,           "utf8tst/TestSetChar"       );
  84   addTest(root, &TestAppendChar,        "utf8tst/TestAppendChar"    );
  85   addTest(root, &TestAppend,            "utf8tst/TestAppend"        );
  86   addTest(root, &TestSurrogates,        "utf8tst/TestSurrogates"    );
  87 }
  88
  89 static void TestCodeUnitValues()
  90 {
  91     static const uint8_t codeunit[]={0x00, 0x65, 0x7e, 0x7f, 0xc0, 0xc4, 0xf0, 0xfd, 0x80, 0x81, 0xbc, 0xbe,};
  92
  93     int16_t i;
  94     for(i=0; i<sizeof(codeunit)/sizeof(codeunit[0]); i++){
  95         uint8_t c=codeunit[i];
  96         log_verbose("Testing code unit value of %x\n", c);
  97         if(i<4){
  98             if(!UTF8_IS_SINGLE(c) || UTF8_IS_LEAD(c) || UTF8_IS_TRAIL(c) || !U8_IS_SINGLE(c) || U8_IS_LEAD(c) || U8_IS_TRAIL(c)){
  99                 log_err("ERROR: 0x%02x is a single byte but results in single: %c lead: %c trail: %c\n",
 100                     c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n');
 101             }
 102         } else if(i< 8){
 103             if(!UTF8_IS_LEAD(c) || UTF8_IS_SINGLE(c) || UTF8_IS_TRAIL(c) || !U8_IS_LEAD(c) || U8_IS_SINGLE(c) || U8_IS_TRAIL(c)){
 104                 log_err("ERROR: 0x%02x is a lead byte but results in single: %c lead: %c trail: %c\n",
 105                     c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n');
 106             }
 107         } else if(i< 12){
 108             if(!UTF8_IS_TRAIL(c) || UTF8_IS_SINGLE(c) || UTF8_IS_LEAD(c) || !U8_IS_TRAIL(c) || U8_IS_SINGLE(c) || U8_IS_LEAD(c)){
 109                 log_err("ERROR: 0x%02x is a trail byte but results in single: %c lead: %c trail: %c\n",
 110                     c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n');
 111             }
 112         }
 113     }
 114 }
 115
 116 static void TestCharLength()
 117 {
 118     static const uint32_t codepoint[]={
 119         1, 0x0061,
 120         1, 0x007f,
 121         2, 0x016f,
 122         2, 0x07ff,
 123         3, 0x0865,
 124         3, 0x20ac,
 125         4, 0x20402,
 126         4, 0x23456,
 127         4, 0x24506,
 128         4, 0x20402,
 129         4, 0x10402,
 130         3, 0xd7ff,
 131         3, 0xe000,
 132
 133     };
 134
 135     int16_t i;
 136     UBool multiple;
 137     for(i=0; i<sizeof(codepoint)/sizeof(codepoint[0]); i=(int16_t)(i+2)){
 138         UChar32 c=codepoint[i+1];
 139         if(UTF8_CHAR_LENGTH(c) != (uint16_t)codepoint[i] || U8_LENGTH(c) != (uint16_t)codepoint[i]){
 140               log_err("The no: of code units for %lx:- Expected: %d Got: %d\n", c, codepoint[i], UTF8_CHAR_LENGTH(c));
 141         }else{
 142               log_verbose("The no: of code units for %lx is %d\n",c, UTF8_CHAR_LENGTH(c) );
 143         }
 144         multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
 145         if(UTF8_NEED_MULTIPLE_UCHAR(c) != multiple){
 146               log_err("ERROR: UTF8_NEED_MULTIPLE_UCHAR failed for %lx\n", c);
 147         }
 148     }
 149 }
 150
 151 static void TestGetChar()
 152 {
 153     static const uint8_t input[]={
 154     /*  code unit,*/
 155         0x61,
 156         0x7f,
 157         0xe4,
 158         0xba,
 159         0x8c,
 160         0xF0,
 161         0x90,
 162         0x90,
 163         0x81,
 164         0xc0,
 165         0x65,
 166         0x31,
 167         0x9a,
 168         0xc9
 169     };
 170     static const UChar32 result[]={
 171      /*codepoint-unsafe,  codepoint-safe(not strict)  codepoint-safe(strict)*/
 172         0x61,             0x61,                       0x61,
 173         0x7f,             0x7f,                       0x7f,
 174         0x4e8c,           0x4e8c,                     0x4e8c,
 175         0x4e8c,           0x4e8c,                     0x4e8c ,
 176         0x4e8c,           0x4e8c,                     0x4e8c,
 177         0x10401,          0x10401,                    0x10401 ,
 178         0x10401,          0x10401,                    0x10401 ,
 179         0x10401,          0x10401,                    0x10401 ,
 180         0x10401,          0x10401,                    0x10401,
 181         0x25,             UTF8_ERROR_VALUE_1,         UTF8_ERROR_VALUE_1,
 182         0x65,             0x65,                       0x65,
 183         0x31,             0x31,                       0x31,
 184         0x31,             UTF8_ERROR_VALUE_1,         UTF8_ERROR_VALUE_1,
 185         0x240,            UTF8_ERROR_VALUE_1,         UTF8_ERROR_VALUE_1
 186     };
 187     uint16_t i=0;
 188     UChar32 c;
 189     uint32_t offset=0;
 190
 191     for(offset=0; offset<sizeof(input); offset++) {
 192         if (offset < sizeof(input) - 1) {
 193             UTF8_GET_CHAR_UNSAFE(input, offset, c);
 194             if(c != result[i]){
 195                 log_err("ERROR: UTF8_GET_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
 196
 197             }
 198
 199             U8_GET_UNSAFE(input, offset, c);
 200             if(c != result[i]){
 201                 log_err("ERROR: U8_GET_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
 202
 203             }
 204         }
 205
 206         U8_GET(input, 0, offset, sizeof(input), c);
 207         if(UTF_IS_ERROR(result[i+1]) ? c >= 0 : c != result[i+1]){
 208             log_err("ERROR: UTF8_GET_CHAR_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
 209         }
 210
 211         UTF8_GET_CHAR_SAFE(input, 0, offset, sizeof(input), c, FALSE);
 212         if(c != result[i+1]){
 213             log_err("ERROR: UTF8_GET_CHAR_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
 214         }
 215
 216         UTF8_GET_CHAR_SAFE(input, 0, offset, sizeof(input), c, TRUE);
 217         if(c != result[i+2]){
 218             log_err("ERROR: UTF8_GET_CHAR_SAFE(strict) failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c);
 219         }
 220
 221          i=(uint16_t)(i+3);
 222     }
 223 }
 224
 225 static void TestNextPrevChar(){
 226     static const uint8_t input[]={0x61, 0xf0, 0x90, 0x90, 0x81, 0xc0, 0x80, 0xfd, 0xbe, 0xc2, 0x61, 0x81, 0x90, 0x90, 0xf0, 0x00};
 227     static const UChar32 result[]={
 228     /*next_unsafe    next_safe_ns        next_safe_s          prev_unsafe   prev_safe_ns         prev_safe_s*/
 229         0x0061,        0x0061,             0x0061,              0x0000,       0x0000,             0x0000,
 230         0x10401,       0x10401,            0x10401,             0xf0,         UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
 231         0x90,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0x2841410,    UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
 232         0x90,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0xa1050,      UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
 233         0x81,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0x2841,       UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
 234         0x00,          UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,  0x61,         0x61,               0x61,
 235         0x80,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0xc2,         UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
 236         0xfd,          UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,  0x77e,        UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,
 237         0xbe,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0xfd,         UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
 238         0xa1,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0x00,         UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,
 239         0x61,          0x61,               0x61,                0xc0,         UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
 240         0x81,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0x10401,      0x10401,            0x10401,
 241         0x90,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0x410,        UTF_ERROR_VALUE,    UTF_ERROR_VALUE,
 242         0x90,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0x410,        UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,
 243         0x0840,        UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0xf0,         UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
 244         0x0000,        0x0000,             0x0000,              0x0061,       0x0061,             0x0061
 245     };
 246     static const int32_t movedOffset[]={
 247    /*next_unsafe    next_safe_ns  next_safe_s       prev_unsafe   prev_safe_ns     prev_safe_s*/
 248         1,            1,           1,                15,           15,               15,
 249         5,            5,           5,                14,           14 ,              14,
 250         3,            3,           3,                9,            13,               13,
 251         4,            4,           4,                9,            12,               12,
 252         5,            5,           5,                9,            11,               11,
 253         7,            7,           7,                10,           10,               10,
 254         7,            7,           7,                9,            9,                9,
 255         8,            9,           9,                7,            7,                7,
 256         9,            9,           9,                7,            7,                7,
 257         11,           10,          10,               5,            5,                5,
 258         11,           11,          11,               5,            5,                5,
 259         12,           12,          12,               1,            1,                1,
 260         13,           13,          13,               1,            1,                1,
 261         14,           14,          14,               1,            1,                1,
 262         14,           15,          15,               1,            1,                1,
 263         14,           16,          16,               0,            0,                0,
 264
 265
 266     };
 267
 268
 269     UChar32 c=0x0000;
 270     uint32_t i=0;
 271     uint32_t offset=0;
 272     int32_t setOffset=0;
 273     for(offset=0; offset<sizeof(input); offset++){
 274          if (offset < sizeof(input) - 2) { /* Can't have it go off the end of the array based on input */
 275              setOffset=offset;
 276              UTF8_NEXT_CHAR_UNSAFE(input, setOffset, c);
 277              if(setOffset != movedOffset[i]){
 278                  log_err("ERROR: UTF8_NEXT_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
 279                      offset, movedOffset[i], setOffset);
 280              }
 281              if(c != result[i]){
 282                  log_err("ERROR: UTF8_NEXT_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
 283              }
 284
 285              setOffset=offset;
 286              U8_NEXT_UNSAFE(input, setOffset, c);
 287              if(setOffset != movedOffset[i]){
 288                  log_err("ERROR: U8_NEXT_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
 289                      offset, movedOffset[i], setOffset);
 290              }
 291              if(c != result[i]){
 292                  log_err("ERROR: U8_NEXT_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
 293              }
 294          }
 295
 296          setOffset=offset;
 297          UTF8_NEXT_CHAR_SAFE(input, setOffset, sizeof(input), c, FALSE);
 298          if(setOffset != movedOffset[i+1]){
 299              log_err("ERROR: UTF8_NEXT_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
 300                  offset, movedOffset[i+1], setOffset);
 301          }
 302          if(c != result[i+1]){
 303              log_err("ERROR: UTF8_NEXT_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
 304          }
 305
 306          setOffset=offset;
 307          U8_NEXT(input, setOffset, sizeof(input), c);
 308          if(setOffset != movedOffset[i+1]){
 309              log_err("ERROR: U8_NEXT failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
 310                  offset, movedOffset[i+1], setOffset);
 311          }
 312          if(UTF_IS_ERROR(result[i+1]) ? c >= 0 : c != result[i+1]){
 313              log_err("ERROR: U8_NEXT failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
 314          }
 315
 316          setOffset=offset;
 317          UTF8_NEXT_CHAR_SAFE(input, setOffset, sizeof(input), c, TRUE);
 318          if(setOffset != movedOffset[i+1]){
 319              log_err("ERROR: UTF8_NEXT_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
 320                  offset, movedOffset[i+2], setOffset);
 321          }
 322          if(c != result[i+2]){
 323              log_err("ERROR: UTF8_NEXT_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c);
 324          }
 325
 326          i=i+6;
 327     }
 328
 329     i=0;
 330     for(offset=sizeof(input); offset > 0; --offset){
 331          setOffset=offset;
 332          UTF8_PREV_CHAR_UNSAFE(input, setOffset, c);
 333          if(setOffset != movedOffset[i+3]){
 334              log_err("ERROR: UTF8_PREV_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
 335                  offset, movedOffset[i+3], setOffset);
 336          }
 337          if(c != result[i+3]){
 338              log_err("ERROR: UTF8_PREV_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+3], c);
 339          }
 340
 341          setOffset=offset;
 342          UTF8_PREV_CHAR_SAFE(input, 0, setOffset, c, FALSE);
 343          if(setOffset != movedOffset[i+4]){
 344              log_err("ERROR: UTF8_PREV_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
 345                  offset, movedOffset[i+4], setOffset);
 346          }
 347          if(c != result[i+4]){
 348              log_err("ERROR: UTF8_PREV_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+4], c);
 349          }
 350
 351          setOffset=offset;
 352          U8_PREV(input, 0, setOffset, c);
 353          if(setOffset != movedOffset[i+4]){
 354              log_err("ERROR: U8_PREV failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
 355                  offset, movedOffset[i+4], setOffset);
 356          }
 357          if(UTF_IS_ERROR(result[i+4]) ? c >= 0 : c != result[i+4]){
 358              log_err("ERROR: U8_PREV failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+4], c);
 359          }
 360
 361          setOffset=offset;
 362          UTF8_PREV_CHAR_SAFE(input, 0,  setOffset, c, TRUE);
 363          if(setOffset != movedOffset[i+5]){
 364              log_err("ERROR: UTF8_PREV_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
 365                  offset, movedOffset[i+5], setOffset);
 366          }
 367          if(c != result[i+5]){
 368              log_err("ERROR: UTF8_PREV_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+5], c);
 369          }
 370
 371          i=i+6;
 372     }
 373
 374     {
 375         /* test non-characters */
 376         static const uint8_t nonChars[]={
 377             0xef, 0xb7, 0x90,       /* U+fdd0 */
 378             0xef, 0xbf, 0xbf,       /* U+feff */
 379             0xf0, 0x9f, 0xbf, 0xbe, /* U+1fffe */
 380             0xf0, 0xbf, 0xbf, 0xbf, /* U+3ffff */
 381             0xf4, 0x8f, 0xbf, 0xbe  /* U+10fffe */
 382         };
 383
 384         UChar32 ch;
 385         int32_t idx;
 386
 387         for(idx=0; idx<(int32_t)sizeof(nonChars);) {
 388             U8_NEXT(nonChars, idx, sizeof(nonChars), ch);
 389             if(!U_IS_UNICODE_NONCHAR(ch)) {
 390                 log_err("U8_NEXT(before %d) failed to read a non-character\n", idx);
 391             }
 392         }
 393         for(idx=(int32_t)sizeof(nonChars); idx>0;) {
 394             U8_PREV(nonChars, 0, idx, ch);
 395             if(!U_IS_UNICODE_NONCHAR(ch)) {
 396                 log_err("U8_PREV(at %d) failed to read a non-character\n", idx);
 397             }
 398         }
 399     }
 400 }
 401
 402 static void TestFwdBack(){
 403     static const uint8_t input[]={0x61, 0xF0, 0x90, 0x90, 0x81, 0xff, 0x62, 0xc0, 0x80, 0x7f, 0x8f, 0xc0, 0x63, 0x81, 0x90, 0x90, 0xF0, 0x00};
 404     static const uint16_t fwd_unsafe[] ={1, 5, 6, 7,  9, 10, 11, 13, 14, 15, 16,  20, };
 405     static const uint16_t fwd_safe[]   ={1, 5, 6, 7, 9, 10, 11,  12, 13, 14, 15, 16, 17, 18};
 406     static const uint16_t back_unsafe[]={17, 16, 12, 11, 9, 7, 6, 5, 1, 0};
 407     static const uint16_t back_safe[]  ={17, 16, 15, 14, 13, 12, 11, 10, 9, 7, 6, 5, 1, 0};
 408
 409     static const uint16_t Nvalue[]= {0, 1, 2, 3, 1, 2, 1, 5};
 410     static const uint16_t fwd_N_unsafe[] ={0, 1, 6, 10, 11, 14, 15};
 411     static const uint16_t fwd_N_safe[]   ={0, 1, 6, 10, 11, 13, 14, 18}; /*safe macro keeps it at the end of the string */
 412     static const uint16_t back_N_unsafe[]={18, 17, 12, 7, 6, 1, 0};
 413     static const uint16_t back_N_safe[]  ={18, 17, 15, 12, 11, 9, 7, 0};
 414
 415
 416     uint32_t offunsafe=0, offsafe=0;
 417
 418     uint32_t i=0;
 419     while(offunsafe < sizeof(input)){
 420         UTF8_FWD_1_UNSAFE(input, offunsafe);
 421         if(offunsafe != fwd_unsafe[i]){
 422             log_err("ERROR: Forward_unsafe offset expected:%d, Got:%d\n", fwd_unsafe[i], offunsafe);
 423         }
 424         i++;
 425     }
 426
 427     i=0;
 428     while(offunsafe < sizeof(input)){
 429         U8_FWD_1_UNSAFE(input, offunsafe);
 430         if(offunsafe != fwd_unsafe[i]){
 431             log_err("ERROR: U8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", fwd_unsafe[i], offunsafe);
 432         }
 433         i++;
 434     }
 435
 436     i=0;
 437     while(offsafe < sizeof(input)){
 438         UTF8_FWD_1_SAFE(input, offsafe, sizeof(input));
 439         if(offsafe != fwd_safe[i]){
 440             log_err("ERROR: Forward_safe offset expected:%d, Got:%d\n", fwd_safe[i], offsafe);
 441         }
 442         i++;
 443     }
 444
 445     i=0;
 446     while(offsafe < sizeof(input)){
 447         U8_FWD_1(input, offsafe, sizeof(input));
 448         if(offsafe != fwd_safe[i]){
 449             log_err("ERROR: U8_FWD_1 offset expected:%d, Got:%d\n", fwd_safe[i], offsafe);
 450         }
 451         i++;
 452     }
 453
 454     offunsafe=sizeof(input);
 455     i=0;
 456     while(offunsafe > 0){
 457         UTF8_BACK_1_UNSAFE(input, offunsafe);
 458         if(offunsafe != back_unsafe[i]){
 459             log_err("ERROR: Backward_unsafe offset expected:%d, Got:%d\n", back_unsafe[i], offunsafe);
 460         }
 461         i++;
 462     }
 463
 464     offunsafe=sizeof(input);
 465     i=0;
 466     while(offunsafe > 0){
 467         U8_BACK_1_UNSAFE(input, offunsafe);
 468         if(offunsafe != back_unsafe[i]){
 469             log_err("ERROR: U8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", back_unsafe[i], offunsafe);
 470         }
 471         i++;
 472     }
 473
 474     i=0;
 475     offsafe=sizeof(input);
 476     while(offsafe > 0){
 477         UTF8_BACK_1_SAFE(input, 0,  offsafe);
 478         if(offsafe != back_safe[i]){
 479             log_err("ERROR: Backward_safe offset expected:%d, Got:%d\n", back_unsafe[i], offsafe);
 480         }
 481         i++;
 482     }
 483
 484     i=0;
 485     offsafe=sizeof(input);
 486     while(offsafe > 0){
 487         U8_BACK_1(input, 0,  offsafe);
 488         if(offsafe != back_safe[i]){
 489             log_err("ERROR: U8_BACK_1 offset expected:%d, Got:%d\n", back_unsafe[i], offsafe);
 490         }
 491         i++;
 492     }
 493
 494     offunsafe=0;
 495     for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){
 496         UTF8_FWD_N_UNSAFE(input, offunsafe, Nvalue[i]);
 497         if(offunsafe != fwd_N_unsafe[i]){
 498             log_err("ERROR: Forward_N_unsafe offset=%d expected:%d, Got:%d\n", i, fwd_N_unsafe[i], offunsafe);
 499         }
 500     }
 501
 502     offunsafe=0;
 503     for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){
 504         U8_FWD_N_UNSAFE(input, offunsafe, Nvalue[i]);
 505         if(offunsafe != fwd_N_unsafe[i]){
 506             log_err("ERROR: U8_FWD_N_UNSAFE offset=%d expected:%d, Got:%d\n", i, fwd_N_unsafe[i], offunsafe);
 507         }
 508     }
 509
 510     offsafe=0;
 511     for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){
 512         UTF8_FWD_N_SAFE(input, offsafe, sizeof(input), Nvalue[i]);
 513         if(offsafe != fwd_N_safe[i]){
 514             log_err("ERROR: Forward_N_safe offset=%d expected:%d, Got:%d\n", i, fwd_N_safe[i], offsafe);
 515         }
 516
 517     }
 518
 519     offsafe=0;
 520     for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){
 521         U8_FWD_N(input, offsafe, sizeof(input), Nvalue[i]);
 522         if(offsafe != fwd_N_safe[i]){
 523             log_err("ERROR: U8_FWD_N offset=%d expected:%d, Got:%d\n", i, fwd_N_safe[i], offsafe);
 524         }
 525
 526     }
 527
 528     offunsafe=sizeof(input);
 529     for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){
 530         UTF8_BACK_N_UNSAFE(input, offunsafe, Nvalue[i]);
 531         if(offunsafe != back_N_unsafe[i]){
 532             log_err("ERROR: backward_N_unsafe offset=%d expected:%d, Got:%d\n", i, back_N_unsafe[i], offunsafe);
 533         }
 534     }
 535
 536     offunsafe=sizeof(input);
 537     for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){
 538         U8_BACK_N_UNSAFE(input, offunsafe, Nvalue[i]);
 539         if(offunsafe != back_N_unsafe[i]){
 540             log_err("ERROR: U8_BACK_N_UNSAFE offset=%d expected:%d, Got:%d\n", i, back_N_unsafe[i], offunsafe);
 541         }
 542     }
 543
 544     offsafe=sizeof(input);
 545     for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){
 546         UTF8_BACK_N_SAFE(input, 0, offsafe, Nvalue[i]);
 547         if(offsafe != back_N_safe[i]){
 548             log_err("ERROR: backward_N_safe offset=%d expected:%d, Got:%ld\n", i, back_N_safe[i], offsafe);
 549         }
 550     }
 551
 552     offsafe=sizeof(input);
 553     for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){
 554         U8_BACK_N(input, 0, offsafe, Nvalue[i]);
 555         if(offsafe != back_N_safe[i]){
 556             log_err("ERROR: U8_BACK_N offset=%d expected:%d, Got:%ld\n", i, back_N_safe[i], offsafe);
 557         }
 558     }
 559 }
 560
 561 static void TestSetChar(){
 562     static const uint8_t input[]
 563         = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0xfe, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x80, 0xe0, 0x00 };
 564     static const int16_t start_unsafe[]
 565         = {0,    1,    1,    1,    4,    5,    6,    7,    8,    9,    9,    9,    12,   13 };
 566     static const int16_t start_safe[]
 567         = {0,    1,    1,    1,    4,    5,    6,    7,    8,    9,    10,   11,   12,   13 };
 568     static const int16_t limit_unsafe[]
 569         = {0,    1,    4,    4,    4,    5,    6,    7,    9,    9,    10,   10,   10,   15 };
 570     static const int16_t limit_safe[]
 571         = {0,    1,    4,    4,    4,    5,    6,    7,    8,    9,    10,   11,   12,   13 };
 572
 573     uint32_t i=0;
 574     int32_t offset=0, setOffset=0;
 575     for(offset=0; offset<(int32_t)sizeof(input); offset++){
 576          setOffset=offset;
 577          UTF8_SET_CHAR_START_UNSAFE(input, setOffset);
 578          if(setOffset != start_unsafe[i]){
 579              log_err("ERROR: UTF8_SET_CHAR_START_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset);
 580          }
 581
 582          setOffset=offset;
 583          U8_SET_CP_START_UNSAFE(input, setOffset);
 584          if(setOffset != start_unsafe[i]){
 585              log_err("ERROR: U8_SET_CP_START_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset);
 586          }
 587
 588          setOffset=offset;
 589          UTF8_SET_CHAR_START_SAFE(input, 0, setOffset);
 590          if(setOffset != start_safe[i]){
 591              log_err("ERROR: UTF8_SET_CHAR_START_SAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset);
 592          }
 593
 594          setOffset=offset;
 595          U8_SET_CP_START(input, 0, setOffset);
 596          if(setOffset != start_safe[i]){
 597              log_err("ERROR: U8_SET_CP_START failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset);
 598          }
 599
 600          if (offset != 0) { /* Can't have it go off the end of the array */
 601              setOffset=offset;
 602              UTF8_SET_CHAR_LIMIT_UNSAFE(input, setOffset);
 603              if(setOffset != limit_unsafe[i]){
 604                  log_err("ERROR: UTF8_SET_CHAR_LIMIT_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_unsafe[i], setOffset);
 605              }
 606
 607              setOffset=offset;
 608              U8_SET_CP_LIMIT_UNSAFE(input, setOffset);
 609              if(setOffset != limit_unsafe[i]){
 610                  log_err("ERROR: U8_SET_CP_LIMIT_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_unsafe[i], setOffset);
 611              }
 612          }
 613
 614          setOffset=offset;
 615          UTF8_SET_CHAR_LIMIT_SAFE(input,0, setOffset, sizeof(input));
 616          if(setOffset != limit_safe[i]){
 617              log_err("ERROR: UTF8_SET_CHAR_LIMIT_SAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_safe[i], setOffset);
 618          }
 619
 620          setOffset=offset;
 621          U8_SET_CP_LIMIT(input,0, setOffset, sizeof(input));
 622          if(setOffset != limit_safe[i]){
 623              log_err("ERROR: U8_SET_CP_LIMIT failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_safe[i], setOffset);
 624          }
 625
 626          i++;
 627     }
 628 }
 629
 630 static void TestAppendChar(){
 631     static const uint8_t s[11]={0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00};
 632     static const uint32_t test[]={
 633      /*append-position(unsafe),  CHAR to be appended  */
 634         0,                        0x10401,
 635         2,                        0x0028,
 636         2,                        0x007f,
 637         3,                        0xd801,
 638         1,                        0x20402,
 639         8,                        0x10401,
 640         5,                        0xc0,
 641         5,                        0xc1,
 642         5,                        0xfd,
 643         6,                        0x80,
 644         6,                        0x81,
 645         6,                        0xbf,
 646         7,                        0xfe,
 647
 648     /*append-position(safe),     CHAR to be appended */
 649         0,                        0x10401,
 650         2,                        0x0028,
 651         3,                        0x7f,
 652         3,                        0xd801,   /* illegal for UTF-8 starting with Unicode 3.2 */
 653         1,                        0x20402,
 654         9,                        0x10401,
 655         5,                        0xc0,
 656         5,                        0xc1,
 657         5,                        0xfd,
 658         6,                        0x80,
 659         6,                        0x81,
 660         6,                        0xbf,
 661         7,                        0xfe,
 662
 663     };
 664     static const uint16_t movedOffset[]={
 665         /*offset-moved-to(unsafe)*/
 666           4,              /*for append-pos: 0 , CHAR 0x10401*/
 667           3,
 668           3,
 669           6,
 670           5,
 671           12,
 672           7,
 673           7,
 674           7,
 675           8,
 676           8,
 677           8,
 678           9,
 679
 680           /*offset-moved-to(safe)*/
 681           4,              /*for append-pos: 0, CHAR  0x10401*/
 682           3,
 683           4,
 684           6,
 685           5,
 686           11,
 687           7,
 688           7,
 689           7,
 690           8,
 691           8,
 692           8,
 693           9,
 694
 695     };
 696
 697     static const uint8_t result[][11]={
 698         /*unsafe*/
 699         {0xF0, 0x90, 0x90, 0x81, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
 700         {0x61, 0x62, 0x28, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
 701         {0x61, 0x62, 0x7f, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
 702         {0x61, 0x62, 0x63, 0xed, 0xa0, 0x81, 0x67, 0x68, 0x69, 0x6a, 0x00},
 703         {0x61, 0xF0, 0xa0, 0x90, 0x82, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
 704         {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0xF0, 0x90, 0x90},
 705
 706         {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x80, 0x68, 0x69, 0x6a, 0x00},
 707         {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x81, 0x68, 0x69, 0x6a, 0x00},
 708         {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0xbd, 0x68, 0x69, 0x6a, 0x00},
 709
 710         {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x80, 0x69, 0x6a, 0x00},
 711         {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x81, 0x69, 0x6a, 0x00},
 712         {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0xbf, 0x69, 0x6a, 0x00},
 713
 714         {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xc3, 0xbe, 0x6a, 0x00},
 715         /*safe*/
 716         {0xF0, 0x90, 0x90, 0x81, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
 717         {0x61, 0x62, 0x28, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
 718         {0x61, 0x62, 0x63, 0x7f, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
 719         {0x61, 0x62, 0x63, 0xef, 0xbf, 0xbf, 0x67, 0x68, 0x69, 0x6a, 0x00},
 720         {0x61, 0xF0, 0xa0, 0x90, 0x82, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
 721         {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xc2, 0x9f}, /*gets UTF8_ERROR_VALUE_2 which takes 2 bytes 0xc0, 0x9f*/
 722
 723         {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x80, 0x68, 0x69, 0x6a, 0x00},
 724         {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x81, 0x68, 0x69, 0x6a, 0x00},
 725         {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0xbd, 0x68, 0x69, 0x6a, 0x00},
 726
 727         {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x80, 0x69, 0x6a, 0x00},
 728         {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x81, 0x69, 0x6a, 0x00},
 729         {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0xbf, 0x69, 0x6a, 0x00},
 730
 731         {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xc3, 0xbe, 0x6a, 0x00},
 732
 733     };
 734     uint16_t i, count=0;
 735     uint8_t str[12];
 736     uint32_t offset;
 737 /*    UChar32 c=0;*/
 738     uint16_t size=sizeof(s)/sizeof(s[0]);
 739     for(i=0; i<sizeof(test)/sizeof(test[0]); i=(uint16_t)(i+2)){
 740         uprv_memcpy(str, s, size);
 741         offset=test[i];
 742         if(count<13){
 743             UTF8_APPEND_CHAR_UNSAFE(str, offset, test[i+1]);
 744             if(offset != movedOffset[count]){
 745                 log_err("ERROR: UTF8_APPEND_CHAR_UNSAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d  currentOffset=%d\n",
 746                     count, movedOffset[count], offset);
 747
 748             }
 749             if(uprv_memcmp(str, result[count], size) !=0){
 750                 log_err("ERROR: UTF8_APPEND_CHAR_UNSAFE failed for count=%d. \nExpected:", count);
 751                 printUChars(result[count], size);
 752                 log_err("\nGot:      ");
 753                 printUChars(str, size);
 754                 log_err("\n");
 755             }
 756         }else{
 757             UTF8_APPEND_CHAR_SAFE(str, offset, size, test[i+1]);
 758             if(offset != movedOffset[count]){
 759                 log_err("ERROR: UTF8_APPEND_CHAR_SAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d  currentOffset=%d\n",
 760                     count, movedOffset[count], offset);
 761
 762             }
 763             if(uprv_memcmp(str, result[count], size) !=0){
 764                 log_err("ERROR: UTF8_APPEND_CHAR_SAFE failed for count=%d. \nExpected:", count);
 765                 printUChars(result[count], size);
 766                 log_err("\nGot:     ");
 767                 printUChars(str, size);
 768                 log_err("\n");
 769             }
 770             /*call the API instead of MACRO
 771             uprv_memcpy(str, s, size);
 772             offset=test[i];
 773             c=test[i+1];
 774             if((uint32_t)(c)<=0x7f) {
 775                   (str)[(offset)++]=(uint8_t)(c);
 776             } else {
 777                  (offset)=utf8_appendCharSafeBody(str, (int32_t)(offset), (int32_t)(size), c);
 778             }
 779             if(offset != movedOffset[count]){
 780                 log_err("ERROR: utf8_appendCharSafeBody() failed to move the offset correctly for count=%d.\nExpectedOffset=%d  currentOffset=%d\n",
 781                     count, movedOffset[count], offset);
 782
 783             }
 784             if(uprv_memcmp(str, result[count], size) !=0){
 785                 log_err("ERROR: utf8_appendCharSafeBody() failed for count=%d. \nExpected:", count);
 786                 printUChars(result[count], size);
 787                 printf("\nGot:     ");
 788                 printUChars(str, size);
 789                 printf("\n");
 790             }
 791             */
 792         }
 793         count++;
 794     }
 795
 796
 797 }
 798
 799 static void TestAppend() {
 800     static const UChar32 codePoints[]={
 801         0x61, 0xdf, 0x901, 0x3040,
 802         0xac00, 0xd800, 0xdbff, 0xdcde,
 803         0xdffd, 0xe000, 0xffff, 0x10000,
 804         0x12345, 0xe0021, 0x10ffff, 0x110000,
 805         0x234567, 0x7fffffff, -1, -1000,
 806         0, 0x400
 807     };
 808     static const uint8_t expectUnsafe[]={
 809         0x61,  0xc3, 0x9f,  0xe0, 0xa4, 0x81,  0xe3, 0x81, 0x80,
 810         0xea, 0xb0, 0x80,  0xed, 0xa0, 0x80,  0xed, 0xaf, 0xbf,  0xed, 0xb3, 0x9e,
 811         0xed, 0xbf, 0xbd,  0xee, 0x80, 0x80,  0xef, 0xbf, 0xbf,  0xf0, 0x90, 0x80, 0x80,
 812         0xf0, 0x92, 0x8d, 0x85,  0xf3, 0xa0, 0x80, 0xa1,  0xf4, 0x8f, 0xbf, 0xbf,  /* not 0x110000 */
 813         /* none from this line */
 814         0,  0xd0, 0x80
 815     }, expectSafe[]={
 816         0x61,  0xc3, 0x9f,  0xe0, 0xa4, 0x81,  0xe3, 0x81, 0x80,
 817         0xea, 0xb0, 0x80,  /* no surrogates */
 818         /* no surrogates */  0xee, 0x80, 0x80,  0xef, 0xbf, 0xbf,  0xf0, 0x90, 0x80, 0x80,
 819         0xf0, 0x92, 0x8d, 0x85,  0xf3, 0xa0, 0x80, 0xa1,  0xf4, 0x8f, 0xbf, 0xbf,  /* not 0x110000 */
 820         /* none from this line */
 821         0,  0xd0, 0x80
 822     };
 823
 824     uint8_t buffer[100];
 825     UChar32 c;
 826     int32_t i, length;
 827     UBool isError, expectIsError, wrongIsError;
 828
 829     length=0;
 830     for(i=0; i<LENGTHOF(codePoints); ++i) {
 831         c=codePoints[i];
 832         if(c<0 || 0x10ffff<c) {
 833             continue; /* skip non-code points for U8_APPEND_UNSAFE */
 834         }
 835
 836         U8_APPEND_UNSAFE(buffer, length, c);
 837     }
 838     if(length!=LENGTHOF(expectUnsafe) || 0!=memcmp(buffer, expectUnsafe, length)) {
 839         log_err("U8_APPEND_UNSAFE did not generate the expected output\n");
 840     }
 841
 842     length=0;
 843     wrongIsError=FALSE;
 844     for(i=0; i<LENGTHOF(codePoints); ++i) {
 845         c=codePoints[i];
 846         expectIsError= c<0 || 0x10ffff<c || U_IS_SURROGATE(c);
 847         isError=FALSE;
 848
 849         U8_APPEND(buffer, length, LENGTHOF(buffer), c, isError);
 850         wrongIsError|= isError!=expectIsError;
 851     }
 852     if(wrongIsError) {
 853         log_err("U8_APPEND did not set isError correctly\n");
 854     }
 855     if(length!=LENGTHOF(expectSafe) || 0!=memcmp(buffer, expectSafe, length)) {
 856         log_err("U8_APPEND did not generate the expected output\n");
 857     }
 858 }
 859
 860 static void
 861 TestSurrogates() {
 862     static const uint8_t b[]={
 863         0xc3, 0x9f,             /*  00DF */
 864         0xed, 0x9f, 0xbf,       /*  D7FF */
 865         0xed, 0xa0, 0x81,       /*  D801 */
 866         0xed, 0xbf, 0xbe,       /*  DFFE */
 867         0xee, 0x80, 0x80,       /*  E000 */
 868         0xf0, 0x97, 0xbf, 0xbe  /* 17FFE */
 869     };
 870     static const UChar32 cp[]={
 871         0xdf, 0xd7ff, 0xd801, 0xdffe, 0xe000, 0x17ffe
 872     };
 873
 874     UChar32 cu, cs, cl;
 875     int32_t i, j, k, iu, is, il, length;
 876
 877     k=0; /* index into cp[] */
 878     length=LENGTHOF(b);
 879     for(i=0; i<length;) {
 880         j=i;
 881         U8_NEXT_UNSAFE(b, j, cu);
 882         iu=j;
 883
 884         j=i;
 885         U8_NEXT(b, j, length, cs);
 886         is=j;
 887
 888         j=i;
 889         L8_NEXT(b, j, length, cl);
 890         il=j;
 891
 892         if(cu!=cp[k]) {
 893             log_err("U8_NEXT_UNSAFE(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cu, (long)cp[k]);
 894         }
 895
 896         /* U8_NEXT() returns <0 for surrogate code points */
 897         if(U_IS_SURROGATE(cu) ? cs>=0 : cs!=cu) {
 898             log_err("U8_NEXT(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cs, (long)cu);
 899         }
 900
 901         /* L8_NEXT() returns surrogate code points like U8_NEXT_UNSAFE() */
 902         if(cl!=cu) {
 903             log_err("L8_NEXT(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cl, (long)cu);
 904         }
 905
 906         if(is!=iu || il!=iu) {
 907             log_err("U8_NEXT(b[%ld]) or L8_NEXT(b[%ld]) did not advance the index correctly\n", (long)i, (long)i);
 908         }
 909
 910         ++k;    /* next code point */
 911         i=iu;   /* advance by one UTF-8 sequence */
 912     }
 913
 914     while(i>0) {
 915         --k; /* previous code point */
 916
 917         j=i;
 918         U8_PREV_UNSAFE(b, j, cu);
 919         iu=j;
 920
 921         j=i;
 922         U8_PREV(b, 0, j, cs);
 923         is=j;
 924
 925         j=i;
 926         L8_PREV(b, 0, j, cl);
 927         il=j;
 928
 929         if(cu!=cp[k]) {
 930             log_err("U8_PREV_UNSAFE(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cu, (long)cp[k]);
 931         }
 932
 933         /* U8_PREV() returns <0 for surrogate code points */
 934         if(U_IS_SURROGATE(cu) ? cs>=0 : cs!=cu) {
 935             log_err("U8_PREV(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cs, (long)cu);
 936         }
 937
 938         /* L8_PREV() returns surrogate code points like U8_PREV_UNSAFE() */
 939         if(cl!=cu) {
 940             log_err("L8_PREV(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cl, (long)cu);
 941         }
 942
 943         if(is!=iu || il !=iu) {
 944             log_err("U8_PREV(b[%ld]) or L8_PREV(b[%ld]) did not advance the index correctly\n", (long)i, (long)i);
 945         }
 946
 947         i=iu;   /* go back by one UTF-8 sequence */
 948     }
 949 }
 950
 951 static void printUChars(const uint8_t *uchars, int16_t len){
 952     int16_t i=0;
 953     for(i=0; i<len; i++){
 954         log_err("0x%02x ", *(uchars+i));
 955     }
 956 }