icuSources/test/intltest/utxttest.cpp

   1 /********************************************************************
   2  * COPYRIGHT:
   3  * Copyright (c) 2005-2014, International Business Machines Corporation and
   4  * others. All Rights Reserved.
   5  ********************************************************************/
   6 /************************************************************************
   7 *   Tests for the UText and UTextIterator text abstraction classses
   8 *
   9 ************************************************************************/
  10
  11 #include <string.h>
  12 #include <stdio.h>
  13 #include <stdlib.h>
  14 #include "unicode/utypes.h"
  15 #include "unicode/utext.h"
  16 #include "unicode/utf8.h"
  17 #include "unicode/ustring.h"
  18 #include "unicode/uchriter.h"
  19 #include "utxttest.h"
  20
  21 static UBool  gFailed = FALSE;
  22 static int    gTestNum = 0;
  23
  24 // Forward decl
  25 UText *openFragmentedUnicodeString(UText *ut, UnicodeString *s, UErrorCode *status);
  26
  27 #define TEST_ASSERT(x) \
  28 { if ((x)==FALSE) {errln("Test #%d failure in file %s at line %d\n", gTestNum, __FILE__, __LINE__);\
  29                      gFailed = TRUE;\
  30    }}
  31
  32
  33 #define TEST_SUCCESS(status) \
  34 { if (U_FAILURE(status)) {errln("Test #%d failure in file %s at line %d. Error = \"%s\"\n", \
  35        gTestNum, __FILE__, __LINE__, u_errorName(status)); \
  36        gFailed = TRUE;\
  37    }}
  38
  39 UTextTest::UTextTest() {
  40 }
  41
  42 UTextTest::~UTextTest() {
  43 }
  44
  45
  46 void
  47 UTextTest::runIndexedTest(int32_t index, UBool exec,
  48                           const char* &name, char* /*par*/) {
  49     switch (index) {
  50         case 0: name = "TextTest";
  51             if (exec) TextTest();    break;
  52         case 1: name = "ErrorTest";
  53             if (exec) ErrorTest();   break;
  54         case 2: name = "FreezeTest";
  55             if (exec) FreezeTest();  break;
  56         case 3: name = "Ticket5560";
  57             if (exec) Ticket5560();  break;
  58         case 4: name = "Ticket6847";
  59             if (exec) Ticket6847();  break;
  60         case 5: name = "Ticket10562";
  61             if (exec) Ticket10562();  break;
  62         case 6: name = "Ticket10983";
  63             if (exec) Ticket10983();  break;
  64         default: name = "";          break;
  65     }
  66 }
  67
  68 //
  69 // Quick and dirty random number generator.
  70 //   (don't use library so that results are portable.
  71 static uint32_t m_seed = 1;
  72 static uint32_t m_rand()
  73 {
  74     m_seed = m_seed * 1103515245 + 12345;
  75     return (uint32_t)(m_seed/65536) % 32768;
  76 }
  77
  78
  79 //
  80 //   TextTest()
  81 //
  82 //       Top Level function for UText testing.
  83 //       Specifies the strings to be tested, with the acutal testing itself
  84 //       being carried out in another function, TestString().
  85 //
  86 void  UTextTest::TextTest() {
  87     int32_t i, j;
  88
  89     TestString("abcd\\U00010001xyz");
  90     TestString("");
  91
  92     // Supplementary chars at start or end
  93     TestString("\\U00010001");
  94     TestString("abc\\U00010001");
  95     TestString("\\U00010001abc");
  96
  97     // Test simple strings of lengths 1 to 60, looking for glitches at buffer boundaries
  98     UnicodeString s;
  99     for (i=1; i<60; i++) {
 100         s.truncate(0);
 101         for (j=0; j<i; j++) {
 102             if (j+0x30 == 0x5c) {
 103                 // backslash.  Needs to be escaped
 104                 s.append((UChar)0x5c);
 105             }
 106             s.append(UChar(j+0x30));
 107         }
 108         TestString(s);
 109     }
 110
 111    // Test strings with odd-aligned supplementary chars,
 112    //    looking for glitches at buffer boundaries
 113     for (i=1; i<60; i++) {
 114         s.truncate(0);
 115         s.append((UChar)0x41);
 116         for (j=0; j<i; j++) {
 117             s.append(UChar32(j+0x11000));
 118         }
 119         TestString(s);
 120     }
 121
 122     // String of chars of randomly varying size in utf-8 representation.
 123     //   Exercise the mapping, and the varying sized buffer.
 124     //
 125     s.truncate(0);
 126     UChar32  c1 = 0;
 127     UChar32  c2 = 0x100;
 128     UChar32  c3 = 0xa000;
 129     UChar32  c4 = 0x11000;
 130     for (i=0; i<1000; i++) {
 131         int len8 = m_rand()%4 + 1;
 132         switch (len8) {
 133             case 1:
 134                 c1 = (c1+1)%0x80;
 135                 // don't put 0 into string (0 terminated strings for some tests)
 136                 // don't put '\', will cause unescape() to fail.
 137                 if (c1==0x5c || c1==0) {
 138                     c1++;
 139                 }
 140                 s.append(c1);
 141                 break;
 142             case 2:
 143                 s.append(c2++);
 144                 break;
 145             case 3:
 146                 s.append(c3++);
 147                 break;
 148             case 4:
 149                 s.append(c4++);
 150                 break;
 151         }
 152     }
 153     TestString(s);
 154 }
 155
 156
 157 //
 158 //  TestString()     Run a suite of UText tests on a string.
 159 //                   The test string is unescaped before use.
 160 //
 161 void UTextTest::TestString(const UnicodeString &s) {
 162     int32_t       i;
 163     int32_t       j;
 164     UChar32       c;
 165     int32_t       cpCount = 0;
 166     UErrorCode    status  = U_ZERO_ERROR;
 167     UText        *ut      = NULL;
 168     int32_t       saLen;
 169
 170     UnicodeString sa = s.unescape();
 171     saLen = sa.length();
 172
 173     //
 174     // Build up a mapping between code points and UTF-16 code unit indexes.
 175     //
 176     m *cpMap = new m[sa.length() + 1];
 177     j = 0;
 178     for (i=0; i<sa.length(); i=sa.moveIndex32(i, 1)) {
 179         c = sa.char32At(i);
 180         cpMap[j].nativeIdx = i;
 181         cpMap[j].cp = c;
 182         j++;
 183         cpCount++;
 184     }
 185     cpMap[j].nativeIdx = i;   // position following the last char in utf-16 string.
 186
 187
 188     // UChar * test, null terminated
 189     status = U_ZERO_ERROR;
 190     UChar *buf = new UChar[saLen+1];
 191     sa.extract(buf, saLen+1, status);
 192     TEST_SUCCESS(status);
 193     ut = utext_openUChars(NULL, buf, -1, &status);
 194     TEST_SUCCESS(status);
 195     TestAccess(sa, ut, cpCount, cpMap);
 196     utext_close(ut);
 197     delete [] buf;
 198
 199     // UChar * test, with length
 200     status = U_ZERO_ERROR;
 201     buf = new UChar[saLen+1];
 202     sa.extract(buf, saLen+1, status);
 203     TEST_SUCCESS(status);
 204     ut = utext_openUChars(NULL, buf, saLen, &status);
 205     TEST_SUCCESS(status);
 206     TestAccess(sa, ut, cpCount, cpMap);
 207     utext_close(ut);
 208     delete [] buf;
 209
 210
 211     // UnicodeString test
 212     status = U_ZERO_ERROR;
 213     ut = utext_openUnicodeString(NULL, &sa, &status);
 214     TEST_SUCCESS(status);
 215     TestAccess(sa, ut, cpCount, cpMap);
 216     TestCMR(sa, ut, cpCount, cpMap, cpMap);
 217     utext_close(ut);
 218
 219
 220     // Const UnicodeString test
 221     status = U_ZERO_ERROR;
 222     ut = utext_openConstUnicodeString(NULL, &sa, &status);
 223     TEST_SUCCESS(status);
 224     TestAccess(sa, ut, cpCount, cpMap);
 225     utext_close(ut);
 226
 227
 228     // Replaceable test.  (UnicodeString inherits Replaceable)
 229     status = U_ZERO_ERROR;
 230     ut = utext_openReplaceable(NULL, &sa, &status);
 231     TEST_SUCCESS(status);
 232     TestAccess(sa, ut, cpCount, cpMap);
 233     TestCMR(sa, ut, cpCount, cpMap, cpMap);
 234     utext_close(ut);
 235
 236     // Character Iterator Tests
 237     status = U_ZERO_ERROR;
 238     const UChar *cbuf = sa.getBuffer();
 239     CharacterIterator *ci = new UCharCharacterIterator(cbuf, saLen, status);
 240     TEST_SUCCESS(status);
 241     ut = utext_openCharacterIterator(NULL, ci, &status);
 242     TEST_SUCCESS(status);
 243     TestAccess(sa, ut, cpCount, cpMap);
 244     utext_close(ut);
 245     delete ci;
 246
 247
 248     // Fragmented UnicodeString  (Chunk size of one)
 249     //
 250     status = U_ZERO_ERROR;
 251     ut = openFragmentedUnicodeString(NULL, &sa, &status);
 252     TEST_SUCCESS(status);
 253     TestAccess(sa, ut, cpCount, cpMap);
 254     utext_close(ut);
 255
 256     //
 257     // UTF-8 test
 258     //
 259
 260     // Convert the test string from UnicodeString to (char *) in utf-8 format
 261     int32_t u8Len = sa.extract(0, sa.length(), NULL, 0, "utf-8");
 262     char *u8String = new char[u8Len + 1];
 263     sa.extract(0, sa.length(), u8String, u8Len+1, "utf-8");
 264
 265     // Build up the map of code point indices in the utf-8 string
 266     m * u8Map = new m[sa.length() + 1];
 267     i = 0;   // native utf-8 index
 268     for (j=0; j<cpCount ; j++) {  // code point number
 269         u8Map[j].nativeIdx = i;
 270         U8_NEXT(u8String, i, u8Len, c)
 271         u8Map[j].cp = c;
 272     }
 273     u8Map[cpCount].nativeIdx = u8Len;   // position following the last char in utf-8 string.
 274
 275     // Do the test itself
 276     status = U_ZERO_ERROR;
 277     ut = utext_openUTF8(NULL, u8String, -1, &status);
 278     TEST_SUCCESS(status);
 279     TestAccess(sa, ut, cpCount, u8Map);
 280     utext_close(ut);
 281
 282
 283
 284     delete []cpMap;
 285     delete []u8Map;
 286     delete []u8String;
 287 }
 288
 289 //  TestCMR   test Copy, Move and Replace operations.
 290 //              us         UnicodeString containing the test text.
 291 //              ut         UText containing the same test text.
 292 //              cpCount    number of code points in the test text.
 293 //              nativeMap  Mapping from code points to native indexes for the UText.
 294 //              u16Map     Mapping from code points to UTF-16 indexes, for use with the UnicodeString.
 295 //
 296 //     This function runs a whole series of opertions on each incoming UText.
 297 //     The UText is deep-cloned prior to each operation, so that the original UText remains unchanged.
 298 //
 299 void UTextTest::TestCMR(const UnicodeString &us, UText *ut, int cpCount, m *nativeMap, m *u16Map) {
 300     TEST_ASSERT(utext_isWritable(ut) == TRUE);
 301
 302     int  srcLengthType;       // Loop variables for selecting the postion and length
 303     int  srcPosType;          //   of the block to operate on within the source text.
 304     int  destPosType;
 305
 306     int  srcIndex  = 0;       // Code Point indexes of the block to operate on for
 307     int  srcLength = 0;       //   a specific test.
 308
 309     int  destIndex = 0;       // Code point index of the destination for a copy/move test.
 310
 311     int32_t  nativeStart = 0; // Native unit indexes for a test.
 312     int32_t  nativeLimit = 0;
 313     int32_t  nativeDest  = 0;
 314
 315     int32_t  u16Start    = 0; // UTF-16 indexes for a test.
 316     int32_t  u16Limit    = 0; //   used when performing the same operation in a Unicode String
 317     int32_t  u16Dest     = 0;
 318
 319     // Iterate over a whole series of source index, length and a target indexes.
 320     // This is done with code point indexes; these will be later translated to native
 321     //   indexes using the cpMap.
 322     for (srcLengthType=1; srcLengthType<=3; srcLengthType++) {
 323         switch (srcLengthType) {
 324             case 1: srcLength = 1; break;
 325             case 2: srcLength = 5; break;
 326             case 3: srcLength = cpCount / 3;
 327         }
 328         for (srcPosType=1; srcPosType<=5; srcPosType++) {
 329             switch (srcPosType) {
 330                 case 1: srcIndex = 0; break;
 331                 case 2: srcIndex = 1; break;
 332                 case 3: srcIndex = cpCount - srcLength; break;
 333                 case 4: srcIndex = cpCount - srcLength - 1; break;
 334                 case 5: srcIndex = cpCount / 2; break;
 335             }
 336             if (srcIndex < 0 || srcIndex + srcLength > cpCount) {
 337                 // filter out bogus test cases -
 338                 //   those with a source range that falls of an edge of the string.
 339                 continue;
 340             }
 341
 342             //
 343             // Copy and move tests.
 344             //   iterate over a variety of destination positions.
 345             //
 346             for (destPosType=1; destPosType<=4; destPosType++) {
 347                 switch (destPosType) {
 348                     case 1: destIndex = 0; break;
 349                     case 2: destIndex = 1; break;
 350                     case 3: destIndex = srcIndex - 1; break;
 351                     case 4: destIndex = srcIndex + srcLength + 1; break;
 352                     case 5: destIndex = cpCount-1; break;
 353                     case 6: destIndex = cpCount; break;
 354                 }
 355                 if (destIndex<0 || destIndex>cpCount) {
 356                     // filter out bogus test cases.
 357                     continue;
 358                 }
 359
 360                 nativeStart = nativeMap[srcIndex].nativeIdx;
 361                 nativeLimit = nativeMap[srcIndex+srcLength].nativeIdx;
 362                 nativeDest  = nativeMap[destIndex].nativeIdx;
 363
 364                 u16Start    = u16Map[srcIndex].nativeIdx;
 365                 u16Limit    = u16Map[srcIndex+srcLength].nativeIdx;
 366                 u16Dest     = u16Map[destIndex].nativeIdx;
 367
 368                 gFailed = FALSE;
 369                 TestCopyMove(us, ut, FALSE,
 370                     nativeStart, nativeLimit, nativeDest,
 371                     u16Start, u16Limit, u16Dest);
 372
 373                 TestCopyMove(us, ut, TRUE,
 374                     nativeStart, nativeLimit, nativeDest,
 375                     u16Start, u16Limit, u16Dest);
 376
 377                 if (gFailed) {
 378                     return;
 379                 }
 380             }
 381
 382             //
 383             //  Replace tests.
 384             //
 385             UnicodeString fullRepString("This is an arbitrary string that will be used as replacement text");
 386             for (int32_t replStrLen=0; replStrLen<20; replStrLen++) {
 387                 UnicodeString repStr(fullRepString, 0, replStrLen);
 388                 TestReplace(us, ut,
 389                     nativeStart, nativeLimit,
 390                     u16Start, u16Limit,
 391                     repStr);
 392                 if (gFailed) {
 393                     return;
 394                 }
 395             }
 396
 397         }
 398     }
 399
 400 }
 401
 402 //
 403 //   TestCopyMove    run a single test case for utext_copy.
 404 //                   Test cases are created in TestCMR and dispatched here for execution.
 405 //
 406 void UTextTest::TestCopyMove(const UnicodeString &us, UText *ut, UBool move,
 407                     int32_t nativeStart, int32_t nativeLimit, int32_t nativeDest,
 408                     int32_t u16Start, int32_t u16Limit, int32_t u16Dest)
 409 {
 410     UErrorCode      status   = U_ZERO_ERROR;
 411     UText          *targetUT = NULL;
 412     gTestNum++;
 413     gFailed = FALSE;
 414
 415     //
 416     //  clone the UText.  The test will be run in the cloned copy
 417     //  so that we don't alter the original.
 418     //
 419     targetUT = utext_clone(NULL, ut, TRUE, FALSE, &status);
 420     TEST_SUCCESS(status);
 421     UnicodeString targetUS(us);    // And copy the reference string.
 422
 423     // do the test operation first in the reference
 424     targetUS.copy(u16Start, u16Limit, u16Dest);
 425     if (move) {
 426         // delete out the source range.
 427         if (u16Limit < u16Dest) {
 428             targetUS.removeBetween(u16Start, u16Limit);
 429         } else {
 430             int32_t amtCopied = u16Limit - u16Start;
 431             targetUS.removeBetween(u16Start+amtCopied, u16Limit+amtCopied);
 432         }
 433     }
 434
 435     // Do the same operation in the UText under test
 436     utext_copy(targetUT, nativeStart, nativeLimit, nativeDest, move, &status);
 437     if (nativeDest > nativeStart && nativeDest < nativeLimit) {
 438         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
 439     } else {
 440         TEST_SUCCESS(status);
 441
 442         // Compare the results of the two parallel tests
 443         int32_t  usi = 0;    // UnicodeString postion, utf-16 index.
 444         int64_t  uti = 0;    // UText position, native index.
 445         int32_t  cpi;        // char32 position (code point index)
 446         UChar32  usc;        // code point from Unicode String
 447         UChar32  utc;        // code point from UText
 448         utext_setNativeIndex(targetUT, 0);
 449         for (cpi=0; ; cpi++) {
 450             usc = targetUS.char32At(usi);
 451             utc = utext_next32(targetUT);
 452             if (utc < 0) {
 453                 break;
 454             }
 455             TEST_ASSERT(uti == usi);
 456             TEST_ASSERT(utc == usc);
 457             usi = targetUS.moveIndex32(usi, 1);
 458             uti = utext_getNativeIndex(targetUT);
 459             if (gFailed) {
 460                 goto cleanupAndReturn;
 461             }
 462         }
 463         int64_t expectedNativeLength = utext_nativeLength(ut);
 464         if (move == FALSE) {
 465             expectedNativeLength += nativeLimit - nativeStart;
 466         }
 467         uti = utext_getNativeIndex(targetUT);
 468         TEST_ASSERT(uti == expectedNativeLength);
 469     }
 470
 471 cleanupAndReturn:
 472     utext_close(targetUT);
 473 }
 474
 475
 476 //
 477 //  TestReplace   Test a single Replace operation.
 478 //
 479 void UTextTest::TestReplace(
 480             const UnicodeString &us,     // reference UnicodeString in which to do the replace
 481             UText         *ut,                // UnicodeText object under test.
 482             int32_t       nativeStart,        // Range to be replaced, in UText native units.
 483             int32_t       nativeLimit,
 484             int32_t       u16Start,           // Range to be replaced, in UTF-16 units
 485             int32_t       u16Limit,           //    for use in the reference UnicodeString.
 486             const UnicodeString &repStr)      // The replacement string
 487 {
 488     UErrorCode      status   = U_ZERO_ERROR;
 489     UText          *targetUT = NULL;
 490     gTestNum++;
 491     gFailed = FALSE;
 492
 493     //
 494     //  clone the target UText.  The test will be run in the cloned copy
 495     //  so that we don't alter the original.
 496     //
 497     targetUT = utext_clone(NULL, ut, TRUE, FALSE, &status);
 498     TEST_SUCCESS(status);
 499     UnicodeString targetUS(us);    // And copy the reference string.
 500
 501     //
 502     // Do the replace operation in the Unicode String, to
 503     //   produce a reference result.
 504     //
 505     targetUS.replace(u16Start, u16Limit-u16Start, repStr);
 506
 507     //
 508     // Do the replace on the UText under test
 509     //
 510     const UChar *rs = repStr.getBuffer();
 511     int32_t  rsLen = repStr.length();
 512     int32_t actualDelta = utext_replace(targetUT, nativeStart, nativeLimit, rs, rsLen, &status);
 513     int32_t expectedDelta = repStr.length() - (nativeLimit - nativeStart);
 514     TEST_ASSERT(actualDelta == expectedDelta);
 515
 516     //
 517     // Compare the results
 518     //
 519     int32_t  usi = 0;    // UnicodeString postion, utf-16 index.
 520     int64_t  uti = 0;    // UText position, native index.
 521     int32_t  cpi;        // char32 position (code point index)
 522     UChar32  usc;        // code point from Unicode String
 523     UChar32  utc;        // code point from UText
 524     int64_t  expectedNativeLength = 0;
 525     utext_setNativeIndex(targetUT, 0);
 526     for (cpi=0; ; cpi++) {
 527         usc = targetUS.char32At(usi);
 528         utc = utext_next32(targetUT);
 529         if (utc < 0) {
 530             break;
 531         }
 532         TEST_ASSERT(uti == usi);
 533         TEST_ASSERT(utc == usc);
 534         usi = targetUS.moveIndex32(usi, 1);
 535         uti = utext_getNativeIndex(targetUT);
 536         if (gFailed) {
 537             goto cleanupAndReturn;
 538         }
 539     }
 540     expectedNativeLength = utext_nativeLength(ut) + expectedDelta;
 541     uti = utext_getNativeIndex(targetUT);
 542     TEST_ASSERT(uti == expectedNativeLength);
 543
 544 cleanupAndReturn:
 545     utext_close(targetUT);
 546 }
 547
 548 //
 549 //  TestAccess      Test the read only access functions on a UText, including cloning.
 550 //                  The text is accessed in a variety of ways, and compared with
 551 //                  the reference UnicodeString.
 552 //
 553 void UTextTest::TestAccess(const UnicodeString &us, UText *ut, int cpCount, m *cpMap) {
 554     // Run the standard tests on the caller-supplied UText.
 555     TestAccessNoClone(us, ut, cpCount, cpMap);
 556
 557     // Re-run tests on a shallow clone.
 558     utext_setNativeIndex(ut, 0);
 559     UErrorCode status = U_ZERO_ERROR;
 560     UText *shallowClone = utext_clone(NULL, ut, FALSE /*deep*/, FALSE /*readOnly*/, &status);
 561     TEST_SUCCESS(status);
 562     TestAccessNoClone(us, shallowClone, cpCount, cpMap);
 563
 564     //
 565     // Rerun again on a deep clone.
 566     // Note that text providers are not required to provide deep cloning,
 567     //   so unsupported errors are ignored.
 568     //
 569     status = U_ZERO_ERROR;
 570     utext_setNativeIndex(shallowClone, 0);
 571     UText *deepClone = utext_clone(NULL, shallowClone, TRUE, FALSE, &status);
 572     utext_close(shallowClone);
 573     if (status != U_UNSUPPORTED_ERROR) {
 574         TEST_SUCCESS(status);
 575         TestAccessNoClone(us, deepClone, cpCount, cpMap);
 576     }
 577     utext_close(deepClone);
 578 }
 579
 580
 581 //
 582 //  TestAccessNoClone()    Test the read only access functions on a UText.
 583 //                         The text is accessed in a variety of ways, and compared with
 584 //                         the reference UnicodeString.
 585 //
 586 void UTextTest::TestAccessNoClone(const UnicodeString &us, UText *ut, int cpCount, m *cpMap) {
 587     UErrorCode  status = U_ZERO_ERROR;
 588     gTestNum++;
 589
 590     //
 591     //  Check the length from the UText
 592     //
 593     int64_t expectedLen = cpMap[cpCount].nativeIdx;
 594     int64_t utlen = utext_nativeLength(ut);
 595     TEST_ASSERT(expectedLen == utlen);
 596
 597     //
 598     //  Iterate forwards, verify that we get the correct code points
 599     //   at the correct native offsets.
 600     //
 601     int         i = 0;
 602     int64_t     index;
 603     int64_t     expectedIndex = 0;
 604     int64_t     foundIndex = 0;
 605     UChar32     expectedC;
 606     UChar32     foundC;
 607     int64_t     len;
 608
 609     for (i=0; i<cpCount; i++) {
 610         expectedIndex = cpMap[i].nativeIdx;
 611         foundIndex    = utext_getNativeIndex(ut);
 612         TEST_ASSERT(expectedIndex == foundIndex);
 613         expectedC     = cpMap[i].cp;
 614         foundC        = utext_next32(ut);
 615         TEST_ASSERT(expectedC == foundC);
 616         foundIndex    = utext_getPreviousNativeIndex(ut);
 617         TEST_ASSERT(expectedIndex == foundIndex);
 618         if (gFailed) {
 619             return;
 620         }
 621     }
 622     foundC = utext_next32(ut);
 623     TEST_ASSERT(foundC == U_SENTINEL);
 624
 625     // Repeat above, using macros
 626     utext_setNativeIndex(ut, 0);
 627     for (i=0; i<cpCount; i++) {
 628         expectedIndex = cpMap[i].nativeIdx;
 629         foundIndex    = UTEXT_GETNATIVEINDEX(ut);
 630         TEST_ASSERT(expectedIndex == foundIndex);
 631         expectedC     = cpMap[i].cp;
 632         foundC        = UTEXT_NEXT32(ut);
 633         TEST_ASSERT(expectedC == foundC);
 634         if (gFailed) {
 635             return;
 636         }
 637     }
 638     foundC = UTEXT_NEXT32(ut);
 639     TEST_ASSERT(foundC == U_SENTINEL);
 640
 641     //
 642     //  Forward iteration (above) should have left index at the
 643     //   end of the input, which should == length().
 644     //
 645     len = utext_nativeLength(ut);
 646     foundIndex  = utext_getNativeIndex(ut);
 647     TEST_ASSERT(len == foundIndex);
 648
 649     //
 650     // Iterate backwards over entire test string
 651     //
 652     len = utext_getNativeIndex(ut);
 653     utext_setNativeIndex(ut, len);
 654     for (i=cpCount-1; i>=0; i--) {
 655         expectedC     = cpMap[i].cp;
 656         expectedIndex = cpMap[i].nativeIdx;
 657         int64_t prevIndex = utext_getPreviousNativeIndex(ut);
 658         foundC        = utext_previous32(ut);
 659         foundIndex    = utext_getNativeIndex(ut);
 660         TEST_ASSERT(expectedIndex == foundIndex);
 661         TEST_ASSERT(expectedC == foundC);
 662         TEST_ASSERT(prevIndex == foundIndex);
 663         if (gFailed) {
 664             return;
 665         }
 666     }
 667
 668     //
 669     //  Backwards iteration, above, should have left our iterator
 670     //   position at zero, and continued backwards iterationshould fail.
 671     //
 672     foundIndex = utext_getNativeIndex(ut);
 673     TEST_ASSERT(foundIndex == 0);
 674     foundIndex = utext_getPreviousNativeIndex(ut);
 675     TEST_ASSERT(foundIndex == 0);
 676
 677
 678     foundC = utext_previous32(ut);
 679     TEST_ASSERT(foundC == U_SENTINEL);
 680     foundIndex = utext_getNativeIndex(ut);
 681     TEST_ASSERT(foundIndex == 0);
 682     foundIndex = utext_getPreviousNativeIndex(ut);
 683     TEST_ASSERT(foundIndex == 0);
 684
 685
 686     // And again, with the macros
 687     utext_setNativeIndex(ut, len);
 688     for (i=cpCount-1; i>=0; i--) {
 689         expectedC     = cpMap[i].cp;
 690         expectedIndex = cpMap[i].nativeIdx;
 691         foundC        = UTEXT_PREVIOUS32(ut);
 692         foundIndex    = UTEXT_GETNATIVEINDEX(ut);
 693         TEST_ASSERT(expectedIndex == foundIndex);
 694         TEST_ASSERT(expectedC == foundC);
 695         if (gFailed) {
 696             return;
 697         }
 698     }
 699
 700     //
 701     //  Backwards iteration, above, should have left our iterator
 702     //   position at zero, and continued backwards iterationshould fail.
 703     //
 704     foundIndex = UTEXT_GETNATIVEINDEX(ut);
 705     TEST_ASSERT(foundIndex == 0);
 706
 707     foundC = UTEXT_PREVIOUS32(ut);
 708     TEST_ASSERT(foundC == U_SENTINEL);
 709     foundIndex = UTEXT_GETNATIVEINDEX(ut);
 710     TEST_ASSERT(foundIndex == 0);
 711     if (gFailed) {
 712         return;
 713     }
 714
 715     //
 716     //  next32From(), prevous32From(), Iterate in a somewhat random order.
 717     //
 718     int  cpIndex = 0;
 719     for (i=0; i<cpCount; i++) {
 720         cpIndex = (cpIndex + 9973) % cpCount;
 721         index         = cpMap[cpIndex].nativeIdx;
 722         expectedC     = cpMap[cpIndex].cp;
 723         foundC        = utext_next32From(ut, index);
 724         TEST_ASSERT(expectedC == foundC);
 725         if (gFailed) {
 726             return;
 727         }
 728     }
 729
 730     cpIndex = 0;
 731     for (i=0; i<cpCount; i++) {
 732         cpIndex = (cpIndex + 9973) % cpCount;
 733         index         = cpMap[cpIndex+1].nativeIdx;
 734         expectedC     = cpMap[cpIndex].cp;
 735         foundC        = utext_previous32From(ut, index);
 736         TEST_ASSERT(expectedC == foundC);
 737         if (gFailed) {
 738             return;
 739         }
 740     }
 741
 742
 743     //
 744     // moveIndex(int32_t delta);
 745     //
 746
 747     // Walk through frontwards, incrementing by one
 748     utext_setNativeIndex(ut, 0);
 749     for (i=1; i<=cpCount; i++) {
 750         utext_moveIndex32(ut, 1);
 751         index = utext_getNativeIndex(ut);
 752         expectedIndex = cpMap[i].nativeIdx;
 753         TEST_ASSERT(expectedIndex == index);
 754         index = UTEXT_GETNATIVEINDEX(ut);
 755         TEST_ASSERT(expectedIndex == index);
 756     }
 757
 758     // Walk through frontwards, incrementing by two
 759     utext_setNativeIndex(ut, 0);
 760     for (i=2; i<cpCount; i+=2) {
 761         utext_moveIndex32(ut, 2);
 762         index = utext_getNativeIndex(ut);
 763         expectedIndex = cpMap[i].nativeIdx;
 764         TEST_ASSERT(expectedIndex == index);
 765         index = UTEXT_GETNATIVEINDEX(ut);
 766         TEST_ASSERT(expectedIndex == index);
 767     }
 768
 769     // walk through the string backwards, decrementing by one.
 770     i = cpMap[cpCount].nativeIdx;
 771     utext_setNativeIndex(ut, i);
 772     for (i=cpCount; i>=0; i--) {
 773         expectedIndex = cpMap[i].nativeIdx;
 774         index = utext_getNativeIndex(ut);
 775         TEST_ASSERT(expectedIndex == index);
 776         index = UTEXT_GETNATIVEINDEX(ut);
 777         TEST_ASSERT(expectedIndex == index);
 778         utext_moveIndex32(ut, -1);
 779     }
 780
 781
 782     // walk through backwards, decrementing by three
 783     i = cpMap[cpCount].nativeIdx;
 784     utext_setNativeIndex(ut, i);
 785     for (i=cpCount; i>=0; i-=3) {
 786         expectedIndex = cpMap[i].nativeIdx;
 787         index = utext_getNativeIndex(ut);
 788         TEST_ASSERT(expectedIndex == index);
 789         index = UTEXT_GETNATIVEINDEX(ut);
 790         TEST_ASSERT(expectedIndex == index);
 791         utext_moveIndex32(ut, -3);
 792     }
 793
 794
 795     //
 796     // Extract
 797     //
 798     int bufSize = us.length() + 10;
 799     UChar *buf = new UChar[bufSize];
 800     status = U_ZERO_ERROR;
 801     expectedLen = us.length();
 802     len = utext_extract(ut, 0, utlen, buf, bufSize, &status);
 803     TEST_SUCCESS(status);
 804     TEST_ASSERT(len == expectedLen);
 805     int compareResult = us.compare(buf, -1);
 806     TEST_ASSERT(compareResult == 0);
 807
 808     status = U_ZERO_ERROR;
 809     len = utext_extract(ut, 0, utlen, NULL, 0, &status);
 810     if (utlen == 0) {
 811         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 812     } else {
 813         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 814     }
 815     TEST_ASSERT(len == expectedLen);
 816
 817     status = U_ZERO_ERROR;
 818     u_memset(buf, 0x5555, bufSize);
 819     len = utext_extract(ut, 0, utlen, buf, 1, &status);
 820     if (us.length() == 0) {
 821         TEST_SUCCESS(status);
 822         TEST_ASSERT(buf[0] == 0);
 823     } else {
 824         // Buf len == 1, extracting a single 16 bit value.
 825         // If the data char is supplementary, it doesn't matter whether the buffer remains unchanged,
 826         //   or whether the lead surrogate of the pair is extracted.
 827         //   It's a buffer overflow error in either case.
 828         TEST_ASSERT(buf[0] == us.charAt(0) ||
 829                     (buf[0] == 0x5555 && U_IS_SUPPLEMENTARY(us.char32At(0))));
 830         TEST_ASSERT(buf[1] == 0x5555);
 831         if (us.length() == 1) {
 832             TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 833         } else {
 834             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 835         }
 836     }
 837
 838     delete []buf;
 839 }
 840
 841 //
 842 //  ErrorTest()    Check various error and edge cases.
 843 //
 844 void UTextTest::ErrorTest()
 845 {
 846     // Close of an unitialized UText.  Shouldn't blow up.
 847     {
 848         UText  ut;
 849         memset(&ut, 0, sizeof(UText));
 850         utext_close(&ut);
 851         utext_close(NULL);
 852     }
 853
 854     // Double-close of a UText.  Shouldn't blow up.  UText should still be usable.
 855     {
 856         UErrorCode status = U_ZERO_ERROR;
 857         UText ut = UTEXT_INITIALIZER;
 858         UnicodeString s("Hello, World");
 859         UText *ut2 = utext_openUnicodeString(&ut, &s, &status);
 860         TEST_SUCCESS(status);
 861         TEST_ASSERT(ut2 == &ut);
 862
 863         UText *ut3 = utext_close(&ut);
 864         TEST_ASSERT(ut3 == &ut);
 865
 866         UText *ut4 = utext_close(&ut);
 867         TEST_ASSERT(ut4 == &ut);
 868
 869         utext_openUnicodeString(&ut, &s, &status);
 870         TEST_SUCCESS(status);
 871         utext_close(&ut);
 872     }
 873
 874     // Re-use of a UText, chaining through each of the types of UText
 875     //   (If it doesn't blow up, and doesn't leak, it's probably working fine)
 876     {
 877         UErrorCode status = U_ZERO_ERROR;
 878         UText ut = UTEXT_INITIALIZER;
 879         UText  *utp;
 880         UnicodeString s1("Hello, World");
 881         UChar s2[] = {(UChar)0x41, (UChar)0x42, (UChar)0};
 882         const char  *s3 = "\x66\x67\x68";
 883
 884         utp = utext_openUnicodeString(&ut, &s1, &status);
 885         TEST_SUCCESS(status);
 886         TEST_ASSERT(utp == &ut);
 887
 888         utp = utext_openConstUnicodeString(&ut, &s1, &status);
 889         TEST_SUCCESS(status);
 890         TEST_ASSERT(utp == &ut);
 891
 892         utp = utext_openUTF8(&ut, s3, -1, &status);
 893         TEST_SUCCESS(status);
 894         TEST_ASSERT(utp == &ut);
 895
 896         utp = utext_openUChars(&ut, s2, -1, &status);
 897         TEST_SUCCESS(status);
 898         TEST_ASSERT(utp == &ut);
 899
 900         utp = utext_close(&ut);
 901         TEST_ASSERT(utp == &ut);
 902
 903         utp = utext_openUnicodeString(&ut, &s1, &status);
 904         TEST_SUCCESS(status);
 905         TEST_ASSERT(utp == &ut);
 906     }
 907
 908     // Invalid parameters on open
 909     //
 910     {
 911         UErrorCode status = U_ZERO_ERROR;
 912         UText ut = UTEXT_INITIALIZER;
 913
 914         utext_openUChars(&ut, NULL, 5, &status);
 915         TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
 916
 917         status = U_ZERO_ERROR;
 918         utext_openUChars(&ut, NULL, -1, &status);
 919         TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
 920
 921         status = U_ZERO_ERROR;
 922         utext_openUTF8(&ut, NULL, 4, &status);
 923         TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
 924
 925         status = U_ZERO_ERROR;
 926         utext_openUTF8(&ut, NULL, -1, &status);
 927         TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
 928     }
 929
 930     //
 931     //  UTF-8 with malformed sequences.
 932     //    These should come through as the Unicode replacement char, \ufffd
 933     //
 934     {
 935         UErrorCode status = U_ZERO_ERROR;
 936         UText *ut = NULL;
 937         const char *badUTF8 = "\x41\x81\x42\xf0\x81\x81\x43";
 938         UChar32  c;
 939
 940         ut = utext_openUTF8(NULL, badUTF8, -1, &status);
 941         TEST_SUCCESS(status);
 942         c = utext_char32At(ut, 1);
 943         TEST_ASSERT(c == 0xfffd);
 944         c = utext_char32At(ut, 3);
 945         TEST_ASSERT(c == 0xfffd);
 946         c = utext_char32At(ut, 5);
 947         TEST_ASSERT(c == 0xfffd);
 948         c = utext_char32At(ut, 6);
 949         TEST_ASSERT(c == 0x43);
 950
 951         UChar buf[10];
 952         int n = utext_extract(ut, 0, 9, buf, 10, &status);
 953         TEST_SUCCESS(status);
 954         TEST_ASSERT(n==5);
 955         TEST_ASSERT(buf[1] == 0xfffd);
 956         TEST_ASSERT(buf[3] == 0xfffd);
 957         TEST_ASSERT(buf[2] == 0x42);
 958         utext_close(ut);
 959     }
 960
 961
 962     //
 963     //  isLengthExpensive - does it make the exptected transitions after
 964     //                      getting the length of a nul terminated string?
 965     //
 966     {
 967         UErrorCode status = U_ZERO_ERROR;
 968         UnicodeString sa("Hello, this is a string");
 969         UBool  isExpensive;
 970
 971         UChar sb[100];
 972         memset(sb, 0x20, sizeof(sb));
 973         sb[99] = 0;
 974
 975         UText *uta = utext_openUnicodeString(NULL, &sa, &status);
 976         TEST_SUCCESS(status);
 977         isExpensive = utext_isLengthExpensive(uta);
 978         TEST_ASSERT(isExpensive == FALSE);
 979         utext_close(uta);
 980
 981         UText *utb = utext_openUChars(NULL, sb, -1, &status);
 982         TEST_SUCCESS(status);
 983         isExpensive = utext_isLengthExpensive(utb);
 984         TEST_ASSERT(isExpensive == TRUE);
 985         int64_t  len = utext_nativeLength(utb);
 986         TEST_ASSERT(len == 99);
 987         isExpensive = utext_isLengthExpensive(utb);
 988         TEST_ASSERT(isExpensive == FALSE);
 989         utext_close(utb);
 990     }
 991
 992     //
 993     // Index to positions not on code point boundaries.
 994     //
 995     {
 996         const char *u8str =         "\xc8\x81\xe1\x82\x83\xf1\x84\x85\x86";
 997         int32_t startMap[] =        {   0,  0,  2,  2,  2,  5,  5,  5,  5,  9,  9};
 998         int32_t nextMap[]  =        {   2,  2,  5,  5,  5,  9,  9,  9,  9,  9,  9};
 999         int32_t prevMap[]  =        {   0,  0,  0,  0,  0,  2,  2,  2,  2,  5,  5};
1000         UChar32  c32Map[] =    {0x201, 0x201, 0x1083, 0x1083, 0x1083, 0x044146, 0x044146, 0x044146, 0x044146, -1, -1};
1001         UChar32  pr32Map[] =   {    -1,   -1,  0x201,  0x201,  0x201,   0x1083,   0x1083,   0x1083,   0x1083, 0x044146, 0x044146};
1002
1003         // extractLen is the size, in UChars, of what will be extracted between index and index+1.
1004         //  is zero when both index positions lie within the same code point.
1005         int32_t  exLen[] =          {   0,  1,   0,  0,  1,  0,  0,  0,  2,  0,  0};
1006
1007
1008         UErrorCode status = U_ZERO_ERROR;
1009         UText *ut = utext_openUTF8(NULL, u8str, -1, &status);
1010         TEST_SUCCESS(status);
1011
1012         // Check setIndex
1013         int32_t i;
1014         int32_t startMapLimit = sizeof(startMap) / sizeof(int32_t);
1015         for (i=0; i<startMapLimit; i++) {
1016             utext_setNativeIndex(ut, i);
1017             int64_t cpIndex = utext_getNativeIndex(ut);
1018             TEST_ASSERT(cpIndex == startMap[i]);
1019             cpIndex = UTEXT_GETNATIVEINDEX(ut);
1020             TEST_ASSERT(cpIndex == startMap[i]);
1021         }
1022
1023         // Check char32At
1024         for (i=0; i<startMapLimit; i++) {
1025             UChar32 c32 = utext_char32At(ut, i);
1026             TEST_ASSERT(c32 == c32Map[i]);
1027             int64_t cpIndex = utext_getNativeIndex(ut);
1028             TEST_ASSERT(cpIndex == startMap[i]);
1029         }
1030
1031         // Check utext_next32From
1032         for (i=0; i<startMapLimit; i++) {
1033             UChar32 c32 = utext_next32From(ut, i);
1034             TEST_ASSERT(c32 == c32Map[i]);
1035             int64_t cpIndex = utext_getNativeIndex(ut);
1036             TEST_ASSERT(cpIndex == nextMap[i]);
1037         }
1038
1039         // check utext_previous32From
1040         for (i=0; i<startMapLimit; i++) {
1041             gTestNum++;
1042             UChar32 c32 = utext_previous32From(ut, i);
1043             TEST_ASSERT(c32 == pr32Map[i]);
1044             int64_t cpIndex = utext_getNativeIndex(ut);
1045             TEST_ASSERT(cpIndex == prevMap[i]);
1046         }
1047
1048         // check Extract
1049         //   Extract from i to i+1, which may be zero or one code points,
1050         //     depending on whether the indices straddle a cp boundary.
1051         for (i=0; i<startMapLimit; i++) {
1052             UChar buf[3];
1053             status = U_ZERO_ERROR;
1054             int32_t  extractedLen = utext_extract(ut, i, i+1, buf, 3, &status);
1055             TEST_SUCCESS(status);
1056             TEST_ASSERT(extractedLen == exLen[i]);
1057             if (extractedLen > 0) {
1058                 UChar32  c32;
1059                 /* extractedLen-extractedLen == 0 is used to get around a compiler warning. */
1060                 U16_GET(buf, 0, extractedLen-extractedLen, extractedLen, c32);
1061                 TEST_ASSERT(c32 == c32Map[i]);
1062             }
1063         }
1064
1065         utext_close(ut);
1066     }
1067
1068
1069     {    //  Similar test, with utf16 instead of utf8
1070          //  TODO:  merge the common parts of these tests.
1071
1072         UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000", -1, US_INV);
1073         int32_t startMap[]  ={ 0,     1,   1,    3,     4,  4,     6,  6};
1074         int32_t nextMap[]  = { 1,     3,   3,    4,     6,  6,     6,  6};
1075         int32_t prevMap[]  = { 0,     0,   0,    1,     3,  3,     4,  4};
1076         UChar32  c32Map[] =  {0x1000, 0x11000, 0x11000, 0x2000,  0x22000, 0x22000, -1, -1};
1077         UChar32  pr32Map[] = {    -1, 0x1000,  0x1000,  0x11000, 0x2000,  0x2000,   0x22000,   0x22000};
1078         int32_t  exLen[] =   {   1,  0,   2,  1,  0,  2,  0,  0,};
1079
1080         u16str = u16str.unescape();
1081         UErrorCode status = U_ZERO_ERROR;
1082         UText *ut = utext_openUnicodeString(NULL, &u16str, &status);
1083         TEST_SUCCESS(status);
1084
1085         int32_t startMapLimit = sizeof(startMap) / sizeof(int32_t);
1086         int i;
1087         for (i=0; i<startMapLimit; i++) {
1088             utext_setNativeIndex(ut, i);
1089             int64_t cpIndex = utext_getNativeIndex(ut);
1090             TEST_ASSERT(cpIndex == startMap[i]);
1091         }
1092
1093         // Check char32At
1094         for (i=0; i<startMapLimit; i++) {
1095             UChar32 c32 = utext_char32At(ut, i);
1096             TEST_ASSERT(c32 == c32Map[i]);
1097             int64_t cpIndex = utext_getNativeIndex(ut);
1098             TEST_ASSERT(cpIndex == startMap[i]);
1099         }
1100
1101         // Check utext_next32From
1102         for (i=0; i<startMapLimit; i++) {
1103             UChar32 c32 = utext_next32From(ut, i);
1104             TEST_ASSERT(c32 == c32Map[i]);
1105             int64_t cpIndex = utext_getNativeIndex(ut);
1106             TEST_ASSERT(cpIndex == nextMap[i]);
1107         }
1108
1109         // check utext_previous32From
1110         for (i=0; i<startMapLimit; i++) {
1111             UChar32 c32 = utext_previous32From(ut, i);
1112             TEST_ASSERT(c32 == pr32Map[i]);
1113             int64_t cpIndex = utext_getNativeIndex(ut);
1114             TEST_ASSERT(cpIndex == prevMap[i]);
1115         }
1116
1117         // check Extract
1118         //   Extract from i to i+1, which may be zero or one code points,
1119         //     depending on whether the indices straddle a cp boundary.
1120         for (i=0; i<startMapLimit; i++) {
1121             UChar buf[3];
1122             status = U_ZERO_ERROR;
1123             int32_t  extractedLen = utext_extract(ut, i, i+1, buf, 3, &status);
1124             TEST_SUCCESS(status);
1125             TEST_ASSERT(extractedLen == exLen[i]);
1126             if (extractedLen > 0) {
1127                 UChar32  c32;
1128                 /* extractedLen-extractedLen == 0 is used to get around a compiler warning. */
1129                 U16_GET(buf, 0, extractedLen-extractedLen, extractedLen, c32);
1130                 TEST_ASSERT(c32 == c32Map[i]);
1131             }
1132         }
1133
1134         utext_close(ut);
1135     }
1136
1137     {    //  Similar test, with UText over Replaceable
1138          //  TODO:  merge the common parts of these tests.
1139
1140         UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000", -1, US_INV);
1141         int32_t startMap[]  ={ 0,     1,   1,    3,     4,  4,     6,  6};
1142         int32_t nextMap[]  = { 1,     3,   3,    4,     6,  6,     6,  6};
1143         int32_t prevMap[]  = { 0,     0,   0,    1,     3,  3,     4,  4};
1144         UChar32  c32Map[] =  {0x1000, 0x11000, 0x11000, 0x2000,  0x22000, 0x22000, -1, -1};
1145         UChar32  pr32Map[] = {    -1, 0x1000,  0x1000,  0x11000, 0x2000,  0x2000,   0x22000,   0x22000};
1146         int32_t  exLen[] =   {   1,  0,   2,  1,  0,  2,  0,  0,};
1147
1148         u16str = u16str.unescape();
1149         UErrorCode status = U_ZERO_ERROR;
1150         UText *ut = utext_openReplaceable(NULL, &u16str, &status);
1151         TEST_SUCCESS(status);
1152
1153         int32_t startMapLimit = sizeof(startMap) / sizeof(int32_t);
1154         int i;
1155         for (i=0; i<startMapLimit; i++) {
1156             utext_setNativeIndex(ut, i);
1157             int64_t cpIndex = utext_getNativeIndex(ut);
1158             TEST_ASSERT(cpIndex == startMap[i]);
1159         }
1160
1161         // Check char32At
1162         for (i=0; i<startMapLimit; i++) {
1163             UChar32 c32 = utext_char32At(ut, i);
1164             TEST_ASSERT(c32 == c32Map[i]);
1165             int64_t cpIndex = utext_getNativeIndex(ut);
1166             TEST_ASSERT(cpIndex == startMap[i]);
1167         }
1168
1169         // Check utext_next32From
1170         for (i=0; i<startMapLimit; i++) {
1171             UChar32 c32 = utext_next32From(ut, i);
1172             TEST_ASSERT(c32 == c32Map[i]);
1173             int64_t cpIndex = utext_getNativeIndex(ut);
1174             TEST_ASSERT(cpIndex == nextMap[i]);
1175         }
1176
1177         // check utext_previous32From
1178         for (i=0; i<startMapLimit; i++) {
1179             UChar32 c32 = utext_previous32From(ut, i);
1180             TEST_ASSERT(c32 == pr32Map[i]);
1181             int64_t cpIndex = utext_getNativeIndex(ut);
1182             TEST_ASSERT(cpIndex == prevMap[i]);
1183         }
1184
1185         // check Extract
1186         //   Extract from i to i+1, which may be zero or one code points,
1187         //     depending on whether the indices straddle a cp boundary.
1188         for (i=0; i<startMapLimit; i++) {
1189             UChar buf[3];
1190             status = U_ZERO_ERROR;
1191             int32_t  extractedLen = utext_extract(ut, i, i+1, buf, 3, &status);
1192             TEST_SUCCESS(status);
1193             TEST_ASSERT(extractedLen == exLen[i]);
1194             if (extractedLen > 0) {
1195                 UChar32  c32;
1196                 /* extractedLen-extractedLen == 0 is used to get around a compiler warning. */
1197                 U16_GET(buf, 0, extractedLen-extractedLen, extractedLen, c32);
1198                 TEST_ASSERT(c32 == c32Map[i]);
1199             }
1200         }
1201
1202         utext_close(ut);
1203     }
1204 }
1205
1206
1207 void UTextTest::FreezeTest() {
1208     // Check isWritable() and freeze() behavior.
1209     //
1210
1211     UnicodeString  ustr("Hello, World.");
1212     const char u8str[] = {char(0x31), (char)0x32, (char)0x33, 0};
1213     const UChar u16str[] = {(UChar)0x31, (UChar)0x32, (UChar)0x44, 0};
1214
1215     UErrorCode status = U_ZERO_ERROR;
1216     UText  *ut        = NULL;
1217     UText  *ut2       = NULL;
1218
1219     ut = utext_openUTF8(ut, u8str, -1, &status);
1220     TEST_SUCCESS(status);
1221     UBool writable = utext_isWritable(ut);
1222     TEST_ASSERT(writable == FALSE);
1223     utext_copy(ut, 1, 2, 0, TRUE, &status);
1224     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
1225
1226     status = U_ZERO_ERROR;
1227     ut = utext_openUChars(ut, u16str, -1, &status);
1228     TEST_SUCCESS(status);
1229     writable = utext_isWritable(ut);
1230     TEST_ASSERT(writable == FALSE);
1231     utext_copy(ut, 1, 2, 0, TRUE, &status);
1232     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
1233
1234     status = U_ZERO_ERROR;
1235     ut = utext_openUnicodeString(ut, &ustr, &status);
1236     TEST_SUCCESS(status);
1237     writable = utext_isWritable(ut);
1238     TEST_ASSERT(writable == TRUE);
1239     utext_freeze(ut);
1240     writable = utext_isWritable(ut);
1241     TEST_ASSERT(writable == FALSE);
1242     utext_copy(ut, 1, 2, 0, TRUE, &status);
1243     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
1244
1245     status = U_ZERO_ERROR;
1246     ut = utext_openUnicodeString(ut, &ustr, &status);
1247     TEST_SUCCESS(status);
1248     ut2 = utext_clone(ut2, ut, FALSE, FALSE, &status);  // clone with readonly = false
1249     TEST_SUCCESS(status);
1250     writable = utext_isWritable(ut2);
1251     TEST_ASSERT(writable == TRUE);
1252     ut2 = utext_clone(ut2, ut, FALSE, TRUE, &status);  // clone with readonly = true
1253     TEST_SUCCESS(status);
1254     writable = utext_isWritable(ut2);
1255     TEST_ASSERT(writable == FALSE);
1256     utext_copy(ut2, 1, 2, 0, TRUE, &status);
1257     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
1258
1259     status = U_ZERO_ERROR;
1260     ut = utext_openConstUnicodeString(ut, (const UnicodeString *)&ustr, &status);
1261     TEST_SUCCESS(status);
1262     writable = utext_isWritable(ut);
1263     TEST_ASSERT(writable == FALSE);
1264     utext_copy(ut, 1, 2, 0, TRUE, &status);
1265     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
1266
1267     // Deep Clone of a frozen UText should re-enable writing in the copy.
1268     status = U_ZERO_ERROR;
1269     ut = utext_openUnicodeString(ut, &ustr, &status);
1270     TEST_SUCCESS(status);
1271     utext_freeze(ut);
1272     ut2 = utext_clone(ut2, ut, TRUE, FALSE, &status);   // deep clone
1273     TEST_SUCCESS(status);
1274     writable = utext_isWritable(ut2);
1275     TEST_ASSERT(writable == TRUE);
1276
1277
1278     // Deep clone of a frozen UText, where the base type is intrinsically non-writable,
1279     //  should NOT enable writing in the copy.
1280     status = U_ZERO_ERROR;
1281     ut = utext_openUChars(ut, u16str, -1, &status);
1282     TEST_SUCCESS(status);
1283     utext_freeze(ut);
1284     ut2 = utext_clone(ut2, ut, TRUE, FALSE, &status);   // deep clone
1285     TEST_SUCCESS(status);
1286     writable = utext_isWritable(ut2);
1287     TEST_ASSERT(writable == FALSE);
1288
1289     // cleanup
1290     utext_close(ut);
1291     utext_close(ut2);
1292 }
1293
1294
1295 //
1296 //  Fragmented UText
1297 //      A UText type that works with a chunk size of 1.
1298 //      Intended to test for edge cases.
1299 //      Input comes from a UnicodeString.
1300 //
1301 //       ut.b    the character.  Put into both halves.
1302 //
1303
1304 U_CDECL_BEGIN
1305 static UBool U_CALLCONV
1306 fragTextAccess(UText *ut, int64_t index, UBool forward) {
1307     const UnicodeString *us = (const UnicodeString *)ut->context;
1308     UChar  c;
1309     int32_t length = us->length();
1310     if (forward && index>=0 && index<length) {
1311         c = us->charAt((int32_t)index);
1312         ut->b = c | c<<16;
1313         ut->chunkOffset = 0;
1314         ut->chunkLength = 1;
1315         ut->chunkNativeStart = index;
1316         ut->chunkNativeLimit = index+1;
1317         return true;
1318     }
1319     if (!forward && index>0 && index <=length) {
1320         c = us->charAt((int32_t)index-1);
1321         ut->b = c | c<<16;
1322         ut->chunkOffset = 1;
1323         ut->chunkLength = 1;
1324         ut->chunkNativeStart = index-1;
1325         ut->chunkNativeLimit = index;
1326         return true;
1327     }
1328     ut->b = 0;
1329     ut->chunkOffset = 0;
1330     ut->chunkLength = 0;
1331     if (index <= 0) {
1332         ut->chunkNativeStart = 0;
1333         ut->chunkNativeLimit = 0;
1334     } else {
1335         ut->chunkNativeStart = length;
1336         ut->chunkNativeLimit = length;
1337     }
1338     return false;
1339 }
1340
1341 // Function table to be used with this fragmented text provider.
1342 //   Initialized in the open function.
1343 static UTextFuncs  fragmentFuncs;
1344
1345 // Clone function for fragmented text provider.
1346 //   Didn't really want to provide this, but it's easier to provide it than to keep it
1347 //   out of the tests.
1348 //
1349 UText *
1350 cloneFragmentedUnicodeString(UText *dest, const UText *src, UBool deep, UErrorCode *status) {
1351     if (U_FAILURE(*status)) {
1352         return NULL;
1353     }
1354     if (deep) {
1355         *status = U_UNSUPPORTED_ERROR;
1356         return NULL;
1357     }
1358     dest = utext_openUnicodeString(dest, (UnicodeString *)src->context, status);
1359     utext_setNativeIndex(dest, utext_getNativeIndex(src));
1360     return dest;
1361 }
1362
1363 U_CDECL_END
1364
1365 // Open function for the fragmented text provider.
1366 UText *
1367 openFragmentedUnicodeString(UText *ut, UnicodeString *s, UErrorCode *status) {
1368     ut = utext_openUnicodeString(ut, s, status);
1369     if (U_FAILURE(*status)) {
1370         return ut;
1371     }
1372
1373     // Copy of the function table from the stock UnicodeString UText,
1374     //   and replace the entry for the access function.
1375     memcpy(&fragmentFuncs, ut->pFuncs, sizeof(fragmentFuncs));
1376     fragmentFuncs.access = fragTextAccess;
1377     fragmentFuncs.clone  = cloneFragmentedUnicodeString;
1378     ut->pFuncs = &fragmentFuncs;
1379
1380     ut->chunkContents = (UChar *)&ut->b;
1381     ut->pFuncs->access(ut, 0, TRUE);
1382     return ut;
1383 }
1384
1385 // Regression test for Ticket 5560
1386 //   Clone fails to update chunkContentPointer in the cloned copy.
1387 //   This is only an issue for UText types that work in a local buffer,
1388 //      (UTF-8 wrapper, for example)
1389 //
1390 //   The test:
1391 //     1.  Create an inital UText
1392 //     2.  Deep clone it.  Contents should match original.
1393 //     3.  Reset original to something different.
1394 //     4.  Check that clone contents did not change.
1395 //
1396 void UTextTest::Ticket5560() {
1397     /* The following two strings are in UTF-8 even on EBCDIC platforms. */
1398     static const char s1[] = {0x41,0x42,0x43,0x44,0x45,0x46,0}; /* "ABCDEF" */
1399     static const char s2[] = {0x31,0x32,0x33,0x34,0x35,0x36,0}; /* "123456" */
1400         UErrorCode status = U_ZERO_ERROR;
1401
1402         UText ut1 = UTEXT_INITIALIZER;
1403         UText ut2 = UTEXT_INITIALIZER;
1404
1405         utext_openUTF8(&ut1, s1, -1, &status);
1406         UChar c = utext_next32(&ut1);
1407         TEST_ASSERT(c == 0x41);  // c == 'A'
1408
1409         utext_clone(&ut2, &ut1, TRUE, FALSE, &status);
1410         TEST_SUCCESS(status);
1411     c = utext_next32(&ut2);
1412         TEST_ASSERT(c == 0x42);  // c == 'B'
1413     c = utext_next32(&ut1);
1414         TEST_ASSERT(c == 0x42);  // c == 'B'
1415
1416         utext_openUTF8(&ut1, s2, -1, &status);
1417         c = utext_next32(&ut1);
1418         TEST_ASSERT(c == 0x31);  // c == '1'
1419     c = utext_next32(&ut2);
1420         TEST_ASSERT(c == 0x43);  // c == 'C'
1421
1422     utext_close(&ut1);
1423     utext_close(&ut2);
1424 }
1425
1426
1427 // Test for Ticket 6847
1428 //
1429 void UTextTest::Ticket6847() {
1430     const int STRLEN = 90;
1431     UChar s[STRLEN+1];
1432     u_memset(s, 0x41, STRLEN);
1433     s[STRLEN] = 0;
1434
1435     UErrorCode status = U_ZERO_ERROR;
1436     UText *ut = utext_openUChars(NULL, s, -1, &status);
1437
1438     utext_setNativeIndex(ut, 0);
1439     int32_t count = 0;
1440     UChar32 c = 0;
1441     int64_t nativeIndex = UTEXT_GETNATIVEINDEX(ut);
1442     TEST_ASSERT(nativeIndex == 0);
1443     while ((c = utext_next32(ut)) != U_SENTINEL) {
1444         TEST_ASSERT(c == 0x41);
1445         TEST_ASSERT(count < STRLEN);
1446         if (count >= STRLEN) {
1447             break;
1448         }
1449         count++;
1450         nativeIndex = UTEXT_GETNATIVEINDEX(ut);
1451         TEST_ASSERT(nativeIndex == count);
1452     }
1453     TEST_ASSERT(count == STRLEN);
1454     nativeIndex = UTEXT_GETNATIVEINDEX(ut);
1455     TEST_ASSERT(nativeIndex == STRLEN);
1456     utext_close(ut);
1457 }
1458
1459
1460 void UTextTest::Ticket10562() {
1461     // Note: failures show as a heap error when the test is run under valgrind.
1462     UErrorCode status = U_ZERO_ERROR;
1463
1464     const char *utf8_string = "\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41";
1465     UText *utf8Text = utext_openUTF8(NULL, utf8_string, -1, &status);
1466     TEST_SUCCESS(status);
1467     UText *deepClone = utext_clone(NULL, utf8Text, TRUE, FALSE, &status);
1468     TEST_SUCCESS(status);
1469     UText *shallowClone = utext_clone(NULL, deepClone, FALSE, FALSE, &status);
1470     TEST_SUCCESS(status);
1471     utext_close(shallowClone);
1472     utext_close(deepClone);
1473     utext_close(utf8Text);
1474
1475     status = U_ZERO_ERROR;
1476     UnicodeString usString("Hello, World.");
1477     UText *usText = utext_openUnicodeString(NULL, &usString, &status);
1478     TEST_SUCCESS(status);
1479     UText *usDeepClone = utext_clone(NULL, usText, TRUE, FALSE, &status);
1480     TEST_SUCCESS(status);
1481     UText *usShallowClone = utext_clone(NULL, usDeepClone, FALSE, FALSE, &status);
1482     TEST_SUCCESS(status);
1483     utext_close(usShallowClone);
1484     utext_close(usDeepClone);
1485     utext_close(usText);
1486 }
1487
1488
1489 void UTextTest::Ticket10983() {
1490     // Note: failure shows as a seg fault when the defect is present.
1491
1492     UErrorCode status = U_ZERO_ERROR;
1493     UnicodeString s("Hello, World");
1494     UText *ut = utext_openConstUnicodeString(NULL, &s, &status);
1495     TEST_SUCCESS(status);
1496
1497     status = U_INVALID_STATE_ERROR;
1498     UText *cloned = utext_clone(NULL, ut, TRUE, TRUE, &status);
1499     TEST_ASSERT(cloned == NULL);
1500     TEST_ASSERT(status == U_INVALID_STATE_ERROR);
1501
1502     utext_close(ut);
1503 }