icuSources/test/intltest/utxttest.cpp

   1 /********************************************************************
   2  * COPYRIGHT:
   3  * Copyright (c) 2005-2016, International Business Machines Corporation and
   4  * others. All Rights Reserved.
   5  ********************************************************************/
   6 /************************************************************************
   7 *   Tests for the UText and UTextIterator text abstraction classses
   8 *
   9 ************************************************************************/
  10
  11 #include <string.h>
  12 #include <stdio.h>
  13 #include <stdlib.h>
  14 #include "unicode/utypes.h"
  15 #include "unicode/utext.h"
  16 #include "unicode/utf8.h"
  17 #include "unicode/ustring.h"
  18 #include "unicode/uchriter.h"
  19 #include "cmemory.h"
  20 #include "cstr.h"
  21 #include "utxttest.h"
  22
  23 static UBool  gFailed = FALSE;
  24 static int    gTestNum = 0;
  25
  26 // Forward decl
  27 UText *openFragmentedUnicodeString(UText *ut, UnicodeString *s, UErrorCode *status);
  28
  29 #define TEST_ASSERT(x) \
  30 { if ((x)==FALSE) {errln("Test #%d failure in file %s at line %d\n", gTestNum, __FILE__, __LINE__);\
  31                      gFailed = TRUE;\
  32    }}
  33
  34
  35 #define TEST_SUCCESS(status) \
  36 { if (U_FAILURE(status)) {errln("Test #%d failure in file %s at line %d. Error = \"%s\"\n", \
  37        gTestNum, __FILE__, __LINE__, u_errorName(status)); \
  38        gFailed = TRUE;\
  39    }}
  40
  41 UTextTest::UTextTest() {
  42 }
  43
  44 UTextTest::~UTextTest() {
  45 }
  46
  47
  48 void
  49 UTextTest::runIndexedTest(int32_t index, UBool exec,
  50                           const char* &name, char* /*par*/) {
  51     switch (index) {
  52         case 0: name = "TextTest";
  53             if (exec) TextTest();    break;
  54         case 1: name = "ErrorTest";
  55             if (exec) ErrorTest();   break;
  56         case 2: name = "FreezeTest";
  57             if (exec) FreezeTest();  break;
  58         case 3: name = "Ticket5560";
  59             if (exec) Ticket5560();  break;
  60         case 4: name = "Ticket6847";
  61             if (exec) Ticket6847();  break;
  62         case 5: name = "Ticket10562";
  63             if (exec) Ticket10562();  break;
  64         case 6: name = "Ticket10983";
  65             if (exec) Ticket10983();  break;
  66         case 7: name = "Ticket12130";
  67             if (exec) Ticket12130(); break;
  68         default: name = "";          break;
  69     }
  70 }
  71
  72 //
  73 // Quick and dirty random number generator.
  74 //   (don't use library so that results are portable.
  75 static uint32_t m_seed = 1;
  76 static uint32_t m_rand()
  77 {
  78     m_seed = m_seed * 1103515245 + 12345;
  79     return (uint32_t)(m_seed/65536) % 32768;
  80 }
  81
  82
  83 //
  84 //   TextTest()
  85 //
  86 //       Top Level function for UText testing.
  87 //       Specifies the strings to be tested, with the acutal testing itself
  88 //       being carried out in another function, TestString().
  89 //
  90 void  UTextTest::TextTest() {
  91     int32_t i, j;
  92
  93     TestString("abcd\\U00010001xyz");
  94     TestString("");
  95
  96     // Supplementary chars at start or end
  97     TestString("\\U00010001");
  98     TestString("abc\\U00010001");
  99     TestString("\\U00010001abc");
 100
 101     // Test simple strings of lengths 1 to 60, looking for glitches at buffer boundaries
 102     UnicodeString s;
 103     for (i=1; i<60; i++) {
 104         s.truncate(0);
 105         for (j=0; j<i; j++) {
 106             if (j+0x30 == 0x5c) {
 107                 // backslash.  Needs to be escaped
 108                 s.append((UChar)0x5c);
 109             }
 110             s.append(UChar(j+0x30));
 111         }
 112         TestString(s);
 113     }
 114
 115    // Test strings with odd-aligned supplementary chars,
 116    //    looking for glitches at buffer boundaries
 117     for (i=1; i<60; i++) {
 118         s.truncate(0);
 119         s.append((UChar)0x41);
 120         for (j=0; j<i; j++) {
 121             s.append(UChar32(j+0x11000));
 122         }
 123         TestString(s);
 124     }
 125
 126     // String of chars of randomly varying size in utf-8 representation.
 127     //   Exercise the mapping, and the varying sized buffer.
 128     //
 129     s.truncate(0);
 130     UChar32  c1 = 0;
 131     UChar32  c2 = 0x100;
 132     UChar32  c3 = 0xa000;
 133     UChar32  c4 = 0x11000;
 134     for (i=0; i<1000; i++) {
 135         int len8 = m_rand()%4 + 1;
 136         switch (len8) {
 137             case 1:
 138                 c1 = (c1+1)%0x80;
 139                 // don't put 0 into string (0 terminated strings for some tests)
 140                 // don't put '\', will cause unescape() to fail.
 141                 if (c1==0x5c || c1==0) {
 142                     c1++;
 143                 }
 144                 s.append(c1);
 145                 break;
 146             case 2:
 147                 s.append(c2++);
 148                 break;
 149             case 3:
 150                 s.append(c3++);
 151                 break;
 152             case 4:
 153                 s.append(c4++);
 154                 break;
 155         }
 156     }
 157     TestString(s);
 158 }
 159
 160
 161 //
 162 //  TestString()     Run a suite of UText tests on a string.
 163 //                   The test string is unescaped before use.
 164 //
 165 void UTextTest::TestString(const UnicodeString &s) {
 166     int32_t       i;
 167     int32_t       j;
 168     UChar32       c;
 169     int32_t       cpCount = 0;
 170     UErrorCode    status  = U_ZERO_ERROR;
 171     UText        *ut      = NULL;
 172     int32_t       saLen;
 173
 174     UnicodeString sa = s.unescape();
 175     saLen = sa.length();
 176
 177     //
 178     // Build up a mapping between code points and UTF-16 code unit indexes.
 179     //
 180     m *cpMap = new m[sa.length() + 1];
 181     j = 0;
 182     for (i=0; i<sa.length(); i=sa.moveIndex32(i, 1)) {
 183         c = sa.char32At(i);
 184         cpMap[j].nativeIdx = i;
 185         cpMap[j].cp = c;
 186         j++;
 187         cpCount++;
 188     }
 189     cpMap[j].nativeIdx = i;   // position following the last char in utf-16 string.
 190
 191
 192     // UChar * test, null terminated
 193     status = U_ZERO_ERROR;
 194     UChar *buf = new UChar[saLen+1];
 195     sa.extract(buf, saLen+1, status);
 196     TEST_SUCCESS(status);
 197     ut = utext_openUChars(NULL, buf, -1, &status);
 198     TEST_SUCCESS(status);
 199     TestAccess(sa, ut, cpCount, cpMap);
 200     utext_close(ut);
 201     delete [] buf;
 202
 203     // UChar * test, with length
 204     status = U_ZERO_ERROR;
 205     buf = new UChar[saLen+1];
 206     sa.extract(buf, saLen+1, status);
 207     TEST_SUCCESS(status);
 208     ut = utext_openUChars(NULL, buf, saLen, &status);
 209     TEST_SUCCESS(status);
 210     TestAccess(sa, ut, cpCount, cpMap);
 211     utext_close(ut);
 212     delete [] buf;
 213
 214
 215     // UnicodeString test
 216     status = U_ZERO_ERROR;
 217     ut = utext_openUnicodeString(NULL, &sa, &status);
 218     TEST_SUCCESS(status);
 219     TestAccess(sa, ut, cpCount, cpMap);
 220     TestCMR(sa, ut, cpCount, cpMap, cpMap);
 221     utext_close(ut);
 222
 223
 224     // Const UnicodeString test
 225     status = U_ZERO_ERROR;
 226     ut = utext_openConstUnicodeString(NULL, &sa, &status);
 227     TEST_SUCCESS(status);
 228     TestAccess(sa, ut, cpCount, cpMap);
 229     utext_close(ut);
 230
 231
 232     // Replaceable test.  (UnicodeString inherits Replaceable)
 233     status = U_ZERO_ERROR;
 234     ut = utext_openReplaceable(NULL, &sa, &status);
 235     TEST_SUCCESS(status);
 236     TestAccess(sa, ut, cpCount, cpMap);
 237     TestCMR(sa, ut, cpCount, cpMap, cpMap);
 238     utext_close(ut);
 239
 240     // Character Iterator Tests
 241     status = U_ZERO_ERROR;
 242     const UChar *cbuf = sa.getBuffer();
 243     CharacterIterator *ci = new UCharCharacterIterator(cbuf, saLen, status);
 244     TEST_SUCCESS(status);
 245     ut = utext_openCharacterIterator(NULL, ci, &status);
 246     TEST_SUCCESS(status);
 247     TestAccess(sa, ut, cpCount, cpMap);
 248     utext_close(ut);
 249     delete ci;
 250
 251
 252     // Fragmented UnicodeString  (Chunk size of one)
 253     //
 254     status = U_ZERO_ERROR;
 255     ut = openFragmentedUnicodeString(NULL, &sa, &status);
 256     TEST_SUCCESS(status);
 257     TestAccess(sa, ut, cpCount, cpMap);
 258     utext_close(ut);
 259
 260     //
 261     // UTF-8 test
 262     //
 263
 264     // Convert the test string from UnicodeString to (char *) in utf-8 format
 265     int32_t u8Len = sa.extract(0, sa.length(), NULL, 0, "utf-8");
 266     char *u8String = new char[u8Len + 1];
 267     sa.extract(0, sa.length(), u8String, u8Len+1, "utf-8");
 268
 269     // Build up the map of code point indices in the utf-8 string
 270     m * u8Map = new m[sa.length() + 1];
 271     i = 0;   // native utf-8 index
 272     for (j=0; j<cpCount ; j++) {  // code point number
 273         u8Map[j].nativeIdx = i;
 274         U8_NEXT(u8String, i, u8Len, c)
 275         u8Map[j].cp = c;
 276     }
 277     u8Map[cpCount].nativeIdx = u8Len;   // position following the last char in utf-8 string.
 278
 279     // Do the test itself
 280     status = U_ZERO_ERROR;
 281     ut = utext_openUTF8(NULL, u8String, -1, &status);
 282     TEST_SUCCESS(status);
 283     TestAccess(sa, ut, cpCount, u8Map);
 284     utext_close(ut);
 285
 286
 287
 288     delete []cpMap;
 289     delete []u8Map;
 290     delete []u8String;
 291 }
 292
 293 //  TestCMR   test Copy, Move and Replace operations.
 294 //              us         UnicodeString containing the test text.
 295 //              ut         UText containing the same test text.
 296 //              cpCount    number of code points in the test text.
 297 //              nativeMap  Mapping from code points to native indexes for the UText.
 298 //              u16Map     Mapping from code points to UTF-16 indexes, for use with the UnicodeString.
 299 //
 300 //     This function runs a whole series of opertions on each incoming UText.
 301 //     The UText is deep-cloned prior to each operation, so that the original UText remains unchanged.
 302 //
 303 void UTextTest::TestCMR(const UnicodeString &us, UText *ut, int cpCount, m *nativeMap, m *u16Map) {
 304     TEST_ASSERT(utext_isWritable(ut) == TRUE);
 305
 306     int  srcLengthType;       // Loop variables for selecting the postion and length
 307     int  srcPosType;          //   of the block to operate on within the source text.
 308     int  destPosType;
 309
 310     int  srcIndex  = 0;       // Code Point indexes of the block to operate on for
 311     int  srcLength = 0;       //   a specific test.
 312
 313     int  destIndex = 0;       // Code point index of the destination for a copy/move test.
 314
 315     int32_t  nativeStart = 0; // Native unit indexes for a test.
 316     int32_t  nativeLimit = 0;
 317     int32_t  nativeDest  = 0;
 318
 319     int32_t  u16Start    = 0; // UTF-16 indexes for a test.
 320     int32_t  u16Limit    = 0; //   used when performing the same operation in a Unicode String
 321     int32_t  u16Dest     = 0;
 322
 323     // Iterate over a whole series of source index, length and a target indexes.
 324     // This is done with code point indexes; these will be later translated to native
 325     //   indexes using the cpMap.
 326     for (srcLengthType=1; srcLengthType<=3; srcLengthType++) {
 327         switch (srcLengthType) {
 328             case 1: srcLength = 1; break;
 329             case 2: srcLength = 5; break;
 330             case 3: srcLength = cpCount / 3;
 331         }
 332         for (srcPosType=1; srcPosType<=5; srcPosType++) {
 333             switch (srcPosType) {
 334                 case 1: srcIndex = 0; break;
 335                 case 2: srcIndex = 1; break;
 336                 case 3: srcIndex = cpCount - srcLength; break;
 337                 case 4: srcIndex = cpCount - srcLength - 1; break;
 338                 case 5: srcIndex = cpCount / 2; break;
 339             }
 340             if (srcIndex < 0 || srcIndex + srcLength > cpCount) {
 341                 // filter out bogus test cases -
 342                 //   those with a source range that falls of an edge of the string.
 343                 continue;
 344             }
 345
 346             //
 347             // Copy and move tests.
 348             //   iterate over a variety of destination positions.
 349             //
 350             for (destPosType=1; destPosType<=4; destPosType++) {
 351                 switch (destPosType) {
 352                     case 1: destIndex = 0; break;
 353                     case 2: destIndex = 1; break;
 354                     case 3: destIndex = srcIndex - 1; break;
 355                     case 4: destIndex = srcIndex + srcLength + 1; break;
 356                     case 5: destIndex = cpCount-1; break;
 357                     case 6: destIndex = cpCount; break;
 358                 }
 359                 if (destIndex<0 || destIndex>cpCount) {
 360                     // filter out bogus test cases.
 361                     continue;
 362                 }
 363
 364                 nativeStart = nativeMap[srcIndex].nativeIdx;
 365                 nativeLimit = nativeMap[srcIndex+srcLength].nativeIdx;
 366                 nativeDest  = nativeMap[destIndex].nativeIdx;
 367
 368                 u16Start    = u16Map[srcIndex].nativeIdx;
 369                 u16Limit    = u16Map[srcIndex+srcLength].nativeIdx;
 370                 u16Dest     = u16Map[destIndex].nativeIdx;
 371
 372                 gFailed = FALSE;
 373                 TestCopyMove(us, ut, FALSE,
 374                     nativeStart, nativeLimit, nativeDest,
 375                     u16Start, u16Limit, u16Dest);
 376
 377                 TestCopyMove(us, ut, TRUE,
 378                     nativeStart, nativeLimit, nativeDest,
 379                     u16Start, u16Limit, u16Dest);
 380
 381                 if (gFailed) {
 382                     return;
 383                 }
 384             }
 385
 386             //
 387             //  Replace tests.
 388             //
 389             UnicodeString fullRepString("This is an arbitrary string that will be used as replacement text");
 390             for (int32_t replStrLen=0; replStrLen<20; replStrLen++) {
 391                 UnicodeString repStr(fullRepString, 0, replStrLen);
 392                 TestReplace(us, ut,
 393                     nativeStart, nativeLimit,
 394                     u16Start, u16Limit,
 395                     repStr);
 396                 if (gFailed) {
 397                     return;
 398                 }
 399             }
 400
 401         }
 402     }
 403
 404 }
 405
 406 //
 407 //   TestCopyMove    run a single test case for utext_copy.
 408 //                   Test cases are created in TestCMR and dispatched here for execution.
 409 //
 410 void UTextTest::TestCopyMove(const UnicodeString &us, UText *ut, UBool move,
 411                     int32_t nativeStart, int32_t nativeLimit, int32_t nativeDest,
 412                     int32_t u16Start, int32_t u16Limit, int32_t u16Dest)
 413 {
 414     UErrorCode      status   = U_ZERO_ERROR;
 415     UText          *targetUT = NULL;
 416     gTestNum++;
 417     gFailed = FALSE;
 418
 419     //
 420     //  clone the UText.  The test will be run in the cloned copy
 421     //  so that we don't alter the original.
 422     //
 423     targetUT = utext_clone(NULL, ut, TRUE, FALSE, &status);
 424     TEST_SUCCESS(status);
 425     UnicodeString targetUS(us);    // And copy the reference string.
 426
 427     // do the test operation first in the reference
 428     targetUS.copy(u16Start, u16Limit, u16Dest);
 429     if (move) {
 430         // delete out the source range.
 431         if (u16Limit < u16Dest) {
 432             targetUS.removeBetween(u16Start, u16Limit);
 433         } else {
 434             int32_t amtCopied = u16Limit - u16Start;
 435             targetUS.removeBetween(u16Start+amtCopied, u16Limit+amtCopied);
 436         }
 437     }
 438
 439     // Do the same operation in the UText under test
 440     utext_copy(targetUT, nativeStart, nativeLimit, nativeDest, move, &status);
 441     if (nativeDest > nativeStart && nativeDest < nativeLimit) {
 442         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
 443     } else {
 444         TEST_SUCCESS(status);
 445
 446         // Compare the results of the two parallel tests
 447         int32_t  usi = 0;    // UnicodeString postion, utf-16 index.
 448         int64_t  uti = 0;    // UText position, native index.
 449         int32_t  cpi;        // char32 position (code point index)
 450         UChar32  usc;        // code point from Unicode String
 451         UChar32  utc;        // code point from UText
 452         utext_setNativeIndex(targetUT, 0);
 453         for (cpi=0; ; cpi++) {
 454             usc = targetUS.char32At(usi);
 455             utc = utext_next32(targetUT);
 456             if (utc < 0) {
 457                 break;
 458             }
 459             TEST_ASSERT(uti == usi);
 460             TEST_ASSERT(utc == usc);
 461             usi = targetUS.moveIndex32(usi, 1);
 462             uti = utext_getNativeIndex(targetUT);
 463             if (gFailed) {
 464                 goto cleanupAndReturn;
 465             }
 466         }
 467         int64_t expectedNativeLength = utext_nativeLength(ut);
 468         if (move == FALSE) {
 469             expectedNativeLength += nativeLimit - nativeStart;
 470         }
 471         uti = utext_getNativeIndex(targetUT);
 472         TEST_ASSERT(uti == expectedNativeLength);
 473     }
 474
 475 cleanupAndReturn:
 476     utext_close(targetUT);
 477 }
 478
 479
 480 //
 481 //  TestReplace   Test a single Replace operation.
 482 //
 483 void UTextTest::TestReplace(
 484             const UnicodeString &us,     // reference UnicodeString in which to do the replace
 485             UText         *ut,                // UnicodeText object under test.
 486             int32_t       nativeStart,        // Range to be replaced, in UText native units.
 487             int32_t       nativeLimit,
 488             int32_t       u16Start,           // Range to be replaced, in UTF-16 units
 489             int32_t       u16Limit,           //    for use in the reference UnicodeString.
 490             const UnicodeString &repStr)      // The replacement string
 491 {
 492     UErrorCode      status   = U_ZERO_ERROR;
 493     UText          *targetUT = NULL;
 494     gTestNum++;
 495     gFailed = FALSE;
 496
 497     //
 498     //  clone the target UText.  The test will be run in the cloned copy
 499     //  so that we don't alter the original.
 500     //
 501     targetUT = utext_clone(NULL, ut, TRUE, FALSE, &status);
 502     TEST_SUCCESS(status);
 503     UnicodeString targetUS(us);    // And copy the reference string.
 504
 505     //
 506     // Do the replace operation in the Unicode String, to
 507     //   produce a reference result.
 508     //
 509     targetUS.replace(u16Start, u16Limit-u16Start, repStr);
 510
 511     //
 512     // Do the replace on the UText under test
 513     //
 514     const UChar *rs = repStr.getBuffer();
 515     int32_t  rsLen = repStr.length();
 516     int32_t actualDelta = utext_replace(targetUT, nativeStart, nativeLimit, rs, rsLen, &status);
 517     int32_t expectedDelta = repStr.length() - (nativeLimit - nativeStart);
 518     TEST_ASSERT(actualDelta == expectedDelta);
 519
 520     //
 521     // Compare the results
 522     //
 523     int32_t  usi = 0;    // UnicodeString postion, utf-16 index.
 524     int64_t  uti = 0;    // UText position, native index.
 525     int32_t  cpi;        // char32 position (code point index)
 526     UChar32  usc;        // code point from Unicode String
 527     UChar32  utc;        // code point from UText
 528     int64_t  expectedNativeLength = 0;
 529     utext_setNativeIndex(targetUT, 0);
 530     for (cpi=0; ; cpi++) {
 531         usc = targetUS.char32At(usi);
 532         utc = utext_next32(targetUT);
 533         if (utc < 0) {
 534             break;
 535         }
 536         TEST_ASSERT(uti == usi);
 537         TEST_ASSERT(utc == usc);
 538         usi = targetUS.moveIndex32(usi, 1);
 539         uti = utext_getNativeIndex(targetUT);
 540         if (gFailed) {
 541             goto cleanupAndReturn;
 542         }
 543     }
 544     expectedNativeLength = utext_nativeLength(ut) + expectedDelta;
 545     uti = utext_getNativeIndex(targetUT);
 546     TEST_ASSERT(uti == expectedNativeLength);
 547
 548 cleanupAndReturn:
 549     utext_close(targetUT);
 550 }
 551
 552 //
 553 //  TestAccess      Test the read only access functions on a UText, including cloning.
 554 //                  The text is accessed in a variety of ways, and compared with
 555 //                  the reference UnicodeString.
 556 //
 557 void UTextTest::TestAccess(const UnicodeString &us, UText *ut, int cpCount, m *cpMap) {
 558     // Run the standard tests on the caller-supplied UText.
 559     TestAccessNoClone(us, ut, cpCount, cpMap);
 560
 561     // Re-run tests on a shallow clone.
 562     utext_setNativeIndex(ut, 0);
 563     UErrorCode status = U_ZERO_ERROR;
 564     UText *shallowClone = utext_clone(NULL, ut, FALSE /*deep*/, FALSE /*readOnly*/, &status);
 565     TEST_SUCCESS(status);
 566     TestAccessNoClone(us, shallowClone, cpCount, cpMap);
 567
 568     //
 569     // Rerun again on a deep clone.
 570     // Note that text providers are not required to provide deep cloning,
 571     //   so unsupported errors are ignored.
 572     //
 573     status = U_ZERO_ERROR;
 574     utext_setNativeIndex(shallowClone, 0);
 575     UText *deepClone = utext_clone(NULL, shallowClone, TRUE, FALSE, &status);
 576     utext_close(shallowClone);
 577     if (status != U_UNSUPPORTED_ERROR) {
 578         TEST_SUCCESS(status);
 579         TestAccessNoClone(us, deepClone, cpCount, cpMap);
 580     }
 581     utext_close(deepClone);
 582 }
 583
 584
 585 //
 586 //  TestAccessNoClone()    Test the read only access functions on a UText.
 587 //                         The text is accessed in a variety of ways, and compared with
 588 //                         the reference UnicodeString.
 589 //
 590 void UTextTest::TestAccessNoClone(const UnicodeString &us, UText *ut, int cpCount, m *cpMap) {
 591     UErrorCode  status = U_ZERO_ERROR;
 592     gTestNum++;
 593
 594     //
 595     //  Check the length from the UText
 596     //
 597     int64_t expectedLen = cpMap[cpCount].nativeIdx;
 598     int64_t utlen = utext_nativeLength(ut);
 599     TEST_ASSERT(expectedLen == utlen);
 600
 601     //
 602     //  Iterate forwards, verify that we get the correct code points
 603     //   at the correct native offsets.
 604     //
 605     int         i = 0;
 606     int64_t     index;
 607     int64_t     expectedIndex = 0;
 608     int64_t     foundIndex = 0;
 609     UChar32     expectedC;
 610     UChar32     foundC;
 611     int64_t     len;
 612
 613     for (i=0; i<cpCount; i++) {
 614         expectedIndex = cpMap[i].nativeIdx;
 615         foundIndex    = utext_getNativeIndex(ut);
 616         TEST_ASSERT(expectedIndex == foundIndex);
 617         expectedC     = cpMap[i].cp;
 618         foundC        = utext_next32(ut);
 619         TEST_ASSERT(expectedC == foundC);
 620         foundIndex    = utext_getPreviousNativeIndex(ut);
 621         TEST_ASSERT(expectedIndex == foundIndex);
 622         if (gFailed) {
 623             return;
 624         }
 625     }
 626     foundC = utext_next32(ut);
 627     TEST_ASSERT(foundC == U_SENTINEL);
 628
 629     // Repeat above, using macros
 630     utext_setNativeIndex(ut, 0);
 631     for (i=0; i<cpCount; i++) {
 632         expectedIndex = cpMap[i].nativeIdx;
 633         foundIndex    = UTEXT_GETNATIVEINDEX(ut);
 634         TEST_ASSERT(expectedIndex == foundIndex);
 635         expectedC     = cpMap[i].cp;
 636         foundC        = UTEXT_NEXT32(ut);
 637         TEST_ASSERT(expectedC == foundC);
 638         if (gFailed) {
 639             return;
 640         }
 641     }
 642     foundC = UTEXT_NEXT32(ut);
 643     TEST_ASSERT(foundC == U_SENTINEL);
 644
 645     //
 646     //  Forward iteration (above) should have left index at the
 647     //   end of the input, which should == length().
 648     //
 649     len = utext_nativeLength(ut);
 650     foundIndex  = utext_getNativeIndex(ut);
 651     TEST_ASSERT(len == foundIndex);
 652
 653     //
 654     // Iterate backwards over entire test string
 655     //
 656     len = utext_getNativeIndex(ut);
 657     utext_setNativeIndex(ut, len);
 658     for (i=cpCount-1; i>=0; i--) {
 659         expectedC     = cpMap[i].cp;
 660         expectedIndex = cpMap[i].nativeIdx;
 661         int64_t prevIndex = utext_getPreviousNativeIndex(ut);
 662         foundC        = utext_previous32(ut);
 663         foundIndex    = utext_getNativeIndex(ut);
 664         TEST_ASSERT(expectedIndex == foundIndex);
 665         TEST_ASSERT(expectedC == foundC);
 666         TEST_ASSERT(prevIndex == foundIndex);
 667         if (gFailed) {
 668             return;
 669         }
 670     }
 671
 672     //
 673     //  Backwards iteration, above, should have left our iterator
 674     //   position at zero, and continued backwards iterationshould fail.
 675     //
 676     foundIndex = utext_getNativeIndex(ut);
 677     TEST_ASSERT(foundIndex == 0);
 678     foundIndex = utext_getPreviousNativeIndex(ut);
 679     TEST_ASSERT(foundIndex == 0);
 680
 681
 682     foundC = utext_previous32(ut);
 683     TEST_ASSERT(foundC == U_SENTINEL);
 684     foundIndex = utext_getNativeIndex(ut);
 685     TEST_ASSERT(foundIndex == 0);
 686     foundIndex = utext_getPreviousNativeIndex(ut);
 687     TEST_ASSERT(foundIndex == 0);
 688
 689
 690     // And again, with the macros
 691     utext_setNativeIndex(ut, len);
 692     for (i=cpCount-1; i>=0; i--) {
 693         expectedC     = cpMap[i].cp;
 694         expectedIndex = cpMap[i].nativeIdx;
 695         foundC        = UTEXT_PREVIOUS32(ut);
 696         foundIndex    = UTEXT_GETNATIVEINDEX(ut);
 697         TEST_ASSERT(expectedIndex == foundIndex);
 698         TEST_ASSERT(expectedC == foundC);
 699         if (gFailed) {
 700             return;
 701         }
 702     }
 703
 704     //
 705     //  Backwards iteration, above, should have left our iterator
 706     //   position at zero, and continued backwards iterationshould fail.
 707     //
 708     foundIndex = UTEXT_GETNATIVEINDEX(ut);
 709     TEST_ASSERT(foundIndex == 0);
 710
 711     foundC = UTEXT_PREVIOUS32(ut);
 712     TEST_ASSERT(foundC == U_SENTINEL);
 713     foundIndex = UTEXT_GETNATIVEINDEX(ut);
 714     TEST_ASSERT(foundIndex == 0);
 715     if (gFailed) {
 716         return;
 717     }
 718
 719     //
 720     //  next32From(), prevous32From(), Iterate in a somewhat random order.
 721     //
 722     int  cpIndex = 0;
 723     for (i=0; i<cpCount; i++) {
 724         cpIndex = (cpIndex + 9973) % cpCount;
 725         index         = cpMap[cpIndex].nativeIdx;
 726         expectedC     = cpMap[cpIndex].cp;
 727         foundC        = utext_next32From(ut, index);
 728         TEST_ASSERT(expectedC == foundC);
 729         if (gFailed) {
 730             return;
 731         }
 732     }
 733
 734     cpIndex = 0;
 735     for (i=0; i<cpCount; i++) {
 736         cpIndex = (cpIndex + 9973) % cpCount;
 737         index         = cpMap[cpIndex+1].nativeIdx;
 738         expectedC     = cpMap[cpIndex].cp;
 739         foundC        = utext_previous32From(ut, index);
 740         TEST_ASSERT(expectedC == foundC);
 741         if (gFailed) {
 742             return;
 743         }
 744     }
 745
 746
 747     //
 748     // moveIndex(int32_t delta);
 749     //
 750
 751     // Walk through frontwards, incrementing by one
 752     utext_setNativeIndex(ut, 0);
 753     for (i=1; i<=cpCount; i++) {
 754         utext_moveIndex32(ut, 1);
 755         index = utext_getNativeIndex(ut);
 756         expectedIndex = cpMap[i].nativeIdx;
 757         TEST_ASSERT(expectedIndex == index);
 758         index = UTEXT_GETNATIVEINDEX(ut);
 759         TEST_ASSERT(expectedIndex == index);
 760     }
 761
 762     // Walk through frontwards, incrementing by two
 763     utext_setNativeIndex(ut, 0);
 764     for (i=2; i<cpCount; i+=2) {
 765         utext_moveIndex32(ut, 2);
 766         index = utext_getNativeIndex(ut);
 767         expectedIndex = cpMap[i].nativeIdx;
 768         TEST_ASSERT(expectedIndex == index);
 769         index = UTEXT_GETNATIVEINDEX(ut);
 770         TEST_ASSERT(expectedIndex == index);
 771     }
 772
 773     // walk through the string backwards, decrementing by one.
 774     i = cpMap[cpCount].nativeIdx;
 775     utext_setNativeIndex(ut, i);
 776     for (i=cpCount; i>=0; i--) {
 777         expectedIndex = cpMap[i].nativeIdx;
 778         index = utext_getNativeIndex(ut);
 779         TEST_ASSERT(expectedIndex == index);
 780         index = UTEXT_GETNATIVEINDEX(ut);
 781         TEST_ASSERT(expectedIndex == index);
 782         utext_moveIndex32(ut, -1);
 783     }
 784
 785
 786     // walk through backwards, decrementing by three
 787     i = cpMap[cpCount].nativeIdx;
 788     utext_setNativeIndex(ut, i);
 789     for (i=cpCount; i>=0; i-=3) {
 790         expectedIndex = cpMap[i].nativeIdx;
 791         index = utext_getNativeIndex(ut);
 792         TEST_ASSERT(expectedIndex == index);
 793         index = UTEXT_GETNATIVEINDEX(ut);
 794         TEST_ASSERT(expectedIndex == index);
 795         utext_moveIndex32(ut, -3);
 796     }
 797
 798
 799     //
 800     // Extract
 801     //
 802     int bufSize = us.length() + 10;
 803     UChar *buf = new UChar[bufSize];
 804     status = U_ZERO_ERROR;
 805     expectedLen = us.length();
 806     len = utext_extract(ut, 0, utlen, buf, bufSize, &status);
 807     TEST_SUCCESS(status);
 808     TEST_ASSERT(len == expectedLen);
 809     int compareResult = us.compare(buf, -1);
 810     TEST_ASSERT(compareResult == 0);
 811
 812     status = U_ZERO_ERROR;
 813     len = utext_extract(ut, 0, utlen, NULL, 0, &status);
 814     if (utlen == 0) {
 815         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 816     } else {
 817         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 818     }
 819     TEST_ASSERT(len == expectedLen);
 820
 821     status = U_ZERO_ERROR;
 822     u_memset(buf, 0x5555, bufSize);
 823     len = utext_extract(ut, 0, utlen, buf, 1, &status);
 824     if (us.length() == 0) {
 825         TEST_SUCCESS(status);
 826         TEST_ASSERT(buf[0] == 0);
 827     } else {
 828         // Buf len == 1, extracting a single 16 bit value.
 829         // If the data char is supplementary, it doesn't matter whether the buffer remains unchanged,
 830         //   or whether the lead surrogate of the pair is extracted.
 831         //   It's a buffer overflow error in either case.
 832         TEST_ASSERT(buf[0] == us.charAt(0) ||
 833                     (buf[0] == 0x5555 && U_IS_SUPPLEMENTARY(us.char32At(0))));
 834         TEST_ASSERT(buf[1] == 0x5555);
 835         if (us.length() == 1) {
 836             TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 837         } else {
 838             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 839         }
 840     }
 841
 842     delete []buf;
 843 }
 844
 845 //
 846 //  ErrorTest()    Check various error and edge cases.
 847 //
 848 void UTextTest::ErrorTest()
 849 {
 850     // Close of an unitialized UText.  Shouldn't blow up.
 851     {
 852         UText  ut;
 853         memset(&ut, 0, sizeof(UText));
 854         utext_close(&ut);
 855         utext_close(NULL);
 856     }
 857
 858     // Double-close of a UText.  Shouldn't blow up.  UText should still be usable.
 859     {
 860         UErrorCode status = U_ZERO_ERROR;
 861         UText ut = UTEXT_INITIALIZER;
 862         UnicodeString s("Hello, World");
 863         UText *ut2 = utext_openUnicodeString(&ut, &s, &status);
 864         TEST_SUCCESS(status);
 865         TEST_ASSERT(ut2 == &ut);
 866
 867         UText *ut3 = utext_close(&ut);
 868         TEST_ASSERT(ut3 == &ut);
 869
 870         UText *ut4 = utext_close(&ut);
 871         TEST_ASSERT(ut4 == &ut);
 872
 873         utext_openUnicodeString(&ut, &s, &status);
 874         TEST_SUCCESS(status);
 875         utext_close(&ut);
 876     }
 877
 878     // Re-use of a UText, chaining through each of the types of UText
 879     //   (If it doesn't blow up, and doesn't leak, it's probably working fine)
 880     {
 881         UErrorCode status = U_ZERO_ERROR;
 882         UText ut = UTEXT_INITIALIZER;
 883         UText  *utp;
 884         UnicodeString s1("Hello, World");
 885         UChar s2[] = {(UChar)0x41, (UChar)0x42, (UChar)0};
 886         const char  *s3 = "\x66\x67\x68";
 887
 888         utp = utext_openUnicodeString(&ut, &s1, &status);
 889         TEST_SUCCESS(status);
 890         TEST_ASSERT(utp == &ut);
 891
 892         utp = utext_openConstUnicodeString(&ut, &s1, &status);
 893         TEST_SUCCESS(status);
 894         TEST_ASSERT(utp == &ut);
 895
 896         utp = utext_openUTF8(&ut, s3, -1, &status);
 897         TEST_SUCCESS(status);
 898         TEST_ASSERT(utp == &ut);
 899
 900         utp = utext_openUChars(&ut, s2, -1, &status);
 901         TEST_SUCCESS(status);
 902         TEST_ASSERT(utp == &ut);
 903
 904         utp = utext_close(&ut);
 905         TEST_ASSERT(utp == &ut);
 906
 907         utp = utext_openUnicodeString(&ut, &s1, &status);
 908         TEST_SUCCESS(status);
 909         TEST_ASSERT(utp == &ut);
 910     }
 911
 912     // Invalid parameters on open
 913     //
 914     {
 915         UErrorCode status = U_ZERO_ERROR;
 916         UText ut = UTEXT_INITIALIZER;
 917
 918         utext_openUChars(&ut, NULL, 5, &status);
 919         TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
 920
 921         status = U_ZERO_ERROR;
 922         utext_openUChars(&ut, NULL, -1, &status);
 923         TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
 924
 925         status = U_ZERO_ERROR;
 926         utext_openUTF8(&ut, NULL, 4, &status);
 927         TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
 928
 929         status = U_ZERO_ERROR;
 930         utext_openUTF8(&ut, NULL, -1, &status);
 931         TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
 932     }
 933
 934     //
 935     //  UTF-8 with malformed sequences.
 936     //    These should come through as the Unicode replacement char, \ufffd
 937     //
 938     {
 939         UErrorCode status = U_ZERO_ERROR;
 940         UText *ut = NULL;
 941         const char *badUTF8 = "\x41\x81\x42\xf0\x81\x81\x43";
 942         UChar32  c;
 943
 944         ut = utext_openUTF8(NULL, badUTF8, -1, &status);
 945         TEST_SUCCESS(status);
 946         c = utext_char32At(ut, 1);
 947         TEST_ASSERT(c == 0xfffd);
 948         c = utext_char32At(ut, 3);
 949         TEST_ASSERT(c == 0xfffd);
 950         c = utext_char32At(ut, 5);
 951         TEST_ASSERT(c == 0xfffd);
 952         c = utext_char32At(ut, 6);
 953         TEST_ASSERT(c == 0x43);
 954
 955         UChar buf[10];
 956         int n = utext_extract(ut, 0, 9, buf, 10, &status);
 957         TEST_SUCCESS(status);
 958         TEST_ASSERT(n==5);
 959         TEST_ASSERT(buf[1] == 0xfffd);
 960         TEST_ASSERT(buf[3] == 0xfffd);
 961         TEST_ASSERT(buf[2] == 0x42);
 962         utext_close(ut);
 963     }
 964
 965
 966     //
 967     //  isLengthExpensive - does it make the exptected transitions after
 968     //                      getting the length of a nul terminated string?
 969     //
 970     {
 971         UErrorCode status = U_ZERO_ERROR;
 972         UnicodeString sa("Hello, this is a string");
 973         UBool  isExpensive;
 974
 975         UChar sb[100];
 976         memset(sb, 0x20, sizeof(sb));
 977         sb[99] = 0;
 978
 979         UText *uta = utext_openUnicodeString(NULL, &sa, &status);
 980         TEST_SUCCESS(status);
 981         isExpensive = utext_isLengthExpensive(uta);
 982         TEST_ASSERT(isExpensive == FALSE);
 983         utext_close(uta);
 984
 985         UText *utb = utext_openUChars(NULL, sb, -1, &status);
 986         TEST_SUCCESS(status);
 987         isExpensive = utext_isLengthExpensive(utb);
 988         TEST_ASSERT(isExpensive == TRUE);
 989         int64_t  len = utext_nativeLength(utb);
 990         TEST_ASSERT(len == 99);
 991         isExpensive = utext_isLengthExpensive(utb);
 992         TEST_ASSERT(isExpensive == FALSE);
 993         utext_close(utb);
 994     }
 995
 996     //
 997     // Index to positions not on code point boundaries.
 998     //
 999     {
1000         const char *u8str =         "\xc8\x81\xe1\x82\x83\xf1\x84\x85\x86";
1001         int32_t startMap[] =        {   0,  0,  2,  2,  2,  5,  5,  5,  5,  9,  9};
1002         int32_t nextMap[]  =        {   2,  2,  5,  5,  5,  9,  9,  9,  9,  9,  9};
1003         int32_t prevMap[]  =        {   0,  0,  0,  0,  0,  2,  2,  2,  2,  5,  5};
1004         UChar32  c32Map[] =    {0x201, 0x201, 0x1083, 0x1083, 0x1083, 0x044146, 0x044146, 0x044146, 0x044146, -1, -1};
1005         UChar32  pr32Map[] =   {    -1,   -1,  0x201,  0x201,  0x201,   0x1083,   0x1083,   0x1083,   0x1083, 0x044146, 0x044146};
1006
1007         // extractLen is the size, in UChars, of what will be extracted between index and index+1.
1008         //  is zero when both index positions lie within the same code point.
1009         int32_t  exLen[] =          {   0,  1,   0,  0,  1,  0,  0,  0,  2,  0,  0};
1010
1011
1012         UErrorCode status = U_ZERO_ERROR;
1013         UText *ut = utext_openUTF8(NULL, u8str, -1, &status);
1014         TEST_SUCCESS(status);
1015
1016         // Check setIndex
1017         int32_t i;
1018         int32_t startMapLimit = UPRV_LENGTHOF(startMap);
1019         for (i=0; i<startMapLimit; i++) {
1020             utext_setNativeIndex(ut, i);
1021             int64_t cpIndex = utext_getNativeIndex(ut);
1022             TEST_ASSERT(cpIndex == startMap[i]);
1023             cpIndex = UTEXT_GETNATIVEINDEX(ut);
1024             TEST_ASSERT(cpIndex == startMap[i]);
1025         }
1026
1027         // Check char32At
1028         for (i=0; i<startMapLimit; i++) {
1029             UChar32 c32 = utext_char32At(ut, i);
1030             TEST_ASSERT(c32 == c32Map[i]);
1031             int64_t cpIndex = utext_getNativeIndex(ut);
1032             TEST_ASSERT(cpIndex == startMap[i]);
1033         }
1034
1035         // Check utext_next32From
1036         for (i=0; i<startMapLimit; i++) {
1037             UChar32 c32 = utext_next32From(ut, i);
1038             TEST_ASSERT(c32 == c32Map[i]);
1039             int64_t cpIndex = utext_getNativeIndex(ut);
1040             TEST_ASSERT(cpIndex == nextMap[i]);
1041         }
1042
1043         // check utext_previous32From
1044         for (i=0; i<startMapLimit; i++) {
1045             gTestNum++;
1046             UChar32 c32 = utext_previous32From(ut, i);
1047             TEST_ASSERT(c32 == pr32Map[i]);
1048             int64_t cpIndex = utext_getNativeIndex(ut);
1049             TEST_ASSERT(cpIndex == prevMap[i]);
1050         }
1051
1052         // check Extract
1053         //   Extract from i to i+1, which may be zero or one code points,
1054         //     depending on whether the indices straddle a cp boundary.
1055         for (i=0; i<startMapLimit; i++) {
1056             UChar buf[3];
1057             status = U_ZERO_ERROR;
1058             int32_t  extractedLen = utext_extract(ut, i, i+1, buf, 3, &status);
1059             TEST_SUCCESS(status);
1060             TEST_ASSERT(extractedLen == exLen[i]);
1061             if (extractedLen > 0) {
1062                 UChar32  c32;
1063                 /* extractedLen-extractedLen == 0 is used to get around a compiler warning. */
1064                 U16_GET(buf, 0, extractedLen-extractedLen, extractedLen, c32);
1065                 TEST_ASSERT(c32 == c32Map[i]);
1066             }
1067         }
1068
1069         utext_close(ut);
1070     }
1071
1072
1073     {    //  Similar test, with utf16 instead of utf8
1074          //  TODO:  merge the common parts of these tests.
1075
1076         UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000", -1, US_INV);
1077         int32_t startMap[]  ={ 0,     1,   1,    3,     4,  4,     6,  6};
1078         int32_t nextMap[]  = { 1,     3,   3,    4,     6,  6,     6,  6};
1079         int32_t prevMap[]  = { 0,     0,   0,    1,     3,  3,     4,  4};
1080         UChar32  c32Map[] =  {0x1000, 0x11000, 0x11000, 0x2000,  0x22000, 0x22000, -1, -1};
1081         UChar32  pr32Map[] = {    -1, 0x1000,  0x1000,  0x11000, 0x2000,  0x2000,   0x22000,   0x22000};
1082         int32_t  exLen[] =   {   1,  0,   2,  1,  0,  2,  0,  0,};
1083
1084         u16str = u16str.unescape();
1085         UErrorCode status = U_ZERO_ERROR;
1086         UText *ut = utext_openUnicodeString(NULL, &u16str, &status);
1087         TEST_SUCCESS(status);
1088
1089         int32_t startMapLimit = UPRV_LENGTHOF(startMap);
1090         int i;
1091         for (i=0; i<startMapLimit; i++) {
1092             utext_setNativeIndex(ut, i);
1093             int64_t cpIndex = utext_getNativeIndex(ut);
1094             TEST_ASSERT(cpIndex == startMap[i]);
1095         }
1096
1097         // Check char32At
1098         for (i=0; i<startMapLimit; i++) {
1099             UChar32 c32 = utext_char32At(ut, i);
1100             TEST_ASSERT(c32 == c32Map[i]);
1101             int64_t cpIndex = utext_getNativeIndex(ut);
1102             TEST_ASSERT(cpIndex == startMap[i]);
1103         }
1104
1105         // Check utext_next32From
1106         for (i=0; i<startMapLimit; i++) {
1107             UChar32 c32 = utext_next32From(ut, i);
1108             TEST_ASSERT(c32 == c32Map[i]);
1109             int64_t cpIndex = utext_getNativeIndex(ut);
1110             TEST_ASSERT(cpIndex == nextMap[i]);
1111         }
1112
1113         // check utext_previous32From
1114         for (i=0; i<startMapLimit; i++) {
1115             UChar32 c32 = utext_previous32From(ut, i);
1116             TEST_ASSERT(c32 == pr32Map[i]);
1117             int64_t cpIndex = utext_getNativeIndex(ut);
1118             TEST_ASSERT(cpIndex == prevMap[i]);
1119         }
1120
1121         // check Extract
1122         //   Extract from i to i+1, which may be zero or one code points,
1123         //     depending on whether the indices straddle a cp boundary.
1124         for (i=0; i<startMapLimit; i++) {
1125             UChar buf[3];
1126             status = U_ZERO_ERROR;
1127             int32_t  extractedLen = utext_extract(ut, i, i+1, buf, 3, &status);
1128             TEST_SUCCESS(status);
1129             TEST_ASSERT(extractedLen == exLen[i]);
1130             if (extractedLen > 0) {
1131                 UChar32  c32;
1132                 /* extractedLen-extractedLen == 0 is used to get around a compiler warning. */
1133                 U16_GET(buf, 0, extractedLen-extractedLen, extractedLen, c32);
1134                 TEST_ASSERT(c32 == c32Map[i]);
1135             }
1136         }
1137
1138         utext_close(ut);
1139     }
1140
1141     {    //  Similar test, with UText over Replaceable
1142          //  TODO:  merge the common parts of these tests.
1143
1144         UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000", -1, US_INV);
1145         int32_t startMap[]  ={ 0,     1,   1,    3,     4,  4,     6,  6};
1146         int32_t nextMap[]  = { 1,     3,   3,    4,     6,  6,     6,  6};
1147         int32_t prevMap[]  = { 0,     0,   0,    1,     3,  3,     4,  4};
1148         UChar32  c32Map[] =  {0x1000, 0x11000, 0x11000, 0x2000,  0x22000, 0x22000, -1, -1};
1149         UChar32  pr32Map[] = {    -1, 0x1000,  0x1000,  0x11000, 0x2000,  0x2000,   0x22000,   0x22000};
1150         int32_t  exLen[] =   {   1,  0,   2,  1,  0,  2,  0,  0,};
1151
1152         u16str = u16str.unescape();
1153         UErrorCode status = U_ZERO_ERROR;
1154         UText *ut = utext_openReplaceable(NULL, &u16str, &status);
1155         TEST_SUCCESS(status);
1156
1157         int32_t startMapLimit = UPRV_LENGTHOF(startMap);
1158         int i;
1159         for (i=0; i<startMapLimit; i++) {
1160             utext_setNativeIndex(ut, i);
1161             int64_t cpIndex = utext_getNativeIndex(ut);
1162             TEST_ASSERT(cpIndex == startMap[i]);
1163         }
1164
1165         // Check char32At
1166         for (i=0; i<startMapLimit; i++) {
1167             UChar32 c32 = utext_char32At(ut, i);
1168             TEST_ASSERT(c32 == c32Map[i]);
1169             int64_t cpIndex = utext_getNativeIndex(ut);
1170             TEST_ASSERT(cpIndex == startMap[i]);
1171         }
1172
1173         // Check utext_next32From
1174         for (i=0; i<startMapLimit; i++) {
1175             UChar32 c32 = utext_next32From(ut, i);
1176             TEST_ASSERT(c32 == c32Map[i]);
1177             int64_t cpIndex = utext_getNativeIndex(ut);
1178             TEST_ASSERT(cpIndex == nextMap[i]);
1179         }
1180
1181         // check utext_previous32From
1182         for (i=0; i<startMapLimit; i++) {
1183             UChar32 c32 = utext_previous32From(ut, i);
1184             TEST_ASSERT(c32 == pr32Map[i]);
1185             int64_t cpIndex = utext_getNativeIndex(ut);
1186             TEST_ASSERT(cpIndex == prevMap[i]);
1187         }
1188
1189         // check Extract
1190         //   Extract from i to i+1, which may be zero or one code points,
1191         //     depending on whether the indices straddle a cp boundary.
1192         for (i=0; i<startMapLimit; i++) {
1193             UChar buf[3];
1194             status = U_ZERO_ERROR;
1195             int32_t  extractedLen = utext_extract(ut, i, i+1, buf, 3, &status);
1196             TEST_SUCCESS(status);
1197             TEST_ASSERT(extractedLen == exLen[i]);
1198             if (extractedLen > 0) {
1199                 UChar32  c32;
1200                 /* extractedLen-extractedLen == 0 is used to get around a compiler warning. */
1201                 U16_GET(buf, 0, extractedLen-extractedLen, extractedLen, c32);
1202                 TEST_ASSERT(c32 == c32Map[i]);
1203             }
1204         }
1205
1206         utext_close(ut);
1207     }
1208 }
1209
1210
1211 void UTextTest::FreezeTest() {
1212     // Check isWritable() and freeze() behavior.
1213     //
1214
1215     UnicodeString  ustr("Hello, World.");
1216     const char u8str[] = {char(0x31), (char)0x32, (char)0x33, 0};
1217     const UChar u16str[] = {(UChar)0x31, (UChar)0x32, (UChar)0x44, 0};
1218
1219     UErrorCode status = U_ZERO_ERROR;
1220     UText  *ut        = NULL;
1221     UText  *ut2       = NULL;
1222
1223     ut = utext_openUTF8(ut, u8str, -1, &status);
1224     TEST_SUCCESS(status);
1225     UBool writable = utext_isWritable(ut);
1226     TEST_ASSERT(writable == FALSE);
1227     utext_copy(ut, 1, 2, 0, TRUE, &status);
1228     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
1229
1230     status = U_ZERO_ERROR;
1231     ut = utext_openUChars(ut, u16str, -1, &status);
1232     TEST_SUCCESS(status);
1233     writable = utext_isWritable(ut);
1234     TEST_ASSERT(writable == FALSE);
1235     utext_copy(ut, 1, 2, 0, TRUE, &status);
1236     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
1237
1238     status = U_ZERO_ERROR;
1239     ut = utext_openUnicodeString(ut, &ustr, &status);
1240     TEST_SUCCESS(status);
1241     writable = utext_isWritable(ut);
1242     TEST_ASSERT(writable == TRUE);
1243     utext_freeze(ut);
1244     writable = utext_isWritable(ut);
1245     TEST_ASSERT(writable == FALSE);
1246     utext_copy(ut, 1, 2, 0, TRUE, &status);
1247     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
1248
1249     status = U_ZERO_ERROR;
1250     ut = utext_openUnicodeString(ut, &ustr, &status);
1251     TEST_SUCCESS(status);
1252     ut2 = utext_clone(ut2, ut, FALSE, FALSE, &status);  // clone with readonly = false
1253     TEST_SUCCESS(status);
1254     writable = utext_isWritable(ut2);
1255     TEST_ASSERT(writable == TRUE);
1256     ut2 = utext_clone(ut2, ut, FALSE, TRUE, &status);  // clone with readonly = true
1257     TEST_SUCCESS(status);
1258     writable = utext_isWritable(ut2);
1259     TEST_ASSERT(writable == FALSE);
1260     utext_copy(ut2, 1, 2, 0, TRUE, &status);
1261     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
1262
1263     status = U_ZERO_ERROR;
1264     ut = utext_openConstUnicodeString(ut, (const UnicodeString *)&ustr, &status);
1265     TEST_SUCCESS(status);
1266     writable = utext_isWritable(ut);
1267     TEST_ASSERT(writable == FALSE);
1268     utext_copy(ut, 1, 2, 0, TRUE, &status);
1269     TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
1270
1271     // Deep Clone of a frozen UText should re-enable writing in the copy.
1272     status = U_ZERO_ERROR;
1273     ut = utext_openUnicodeString(ut, &ustr, &status);
1274     TEST_SUCCESS(status);
1275     utext_freeze(ut);
1276     ut2 = utext_clone(ut2, ut, TRUE, FALSE, &status);   // deep clone
1277     TEST_SUCCESS(status);
1278     writable = utext_isWritable(ut2);
1279     TEST_ASSERT(writable == TRUE);
1280
1281
1282     // Deep clone of a frozen UText, where the base type is intrinsically non-writable,
1283     //  should NOT enable writing in the copy.
1284     status = U_ZERO_ERROR;
1285     ut = utext_openUChars(ut, u16str, -1, &status);
1286     TEST_SUCCESS(status);
1287     utext_freeze(ut);
1288     ut2 = utext_clone(ut2, ut, TRUE, FALSE, &status);   // deep clone
1289     TEST_SUCCESS(status);
1290     writable = utext_isWritable(ut2);
1291     TEST_ASSERT(writable == FALSE);
1292
1293     // cleanup
1294     utext_close(ut);
1295     utext_close(ut2);
1296 }
1297
1298
1299 //
1300 //  Fragmented UText
1301 //      A UText type that works with a chunk size of 1.
1302 //      Intended to test for edge cases.
1303 //      Input comes from a UnicodeString.
1304 //
1305 //       ut.b    the character.  Put into both halves.
1306 //
1307
1308 U_CDECL_BEGIN
1309 static UBool U_CALLCONV
1310 fragTextAccess(UText *ut, int64_t index, UBool forward) {
1311     const UnicodeString *us = (const UnicodeString *)ut->context;
1312     UChar  c;
1313     int32_t length = us->length();
1314     if (forward && index>=0 && index<length) {
1315         c = us->charAt((int32_t)index);
1316         ut->b = c | c<<16;
1317         ut->chunkOffset = 0;
1318         ut->chunkLength = 1;
1319         ut->chunkNativeStart = index;
1320         ut->chunkNativeLimit = index+1;
1321         return true;
1322     }
1323     if (!forward && index>0 && index <=length) {
1324         c = us->charAt((int32_t)index-1);
1325         ut->b = c | c<<16;
1326         ut->chunkOffset = 1;
1327         ut->chunkLength = 1;
1328         ut->chunkNativeStart = index-1;
1329         ut->chunkNativeLimit = index;
1330         return true;
1331     }
1332     ut->b = 0;
1333     ut->chunkOffset = 0;
1334     ut->chunkLength = 0;
1335     if (index <= 0) {
1336         ut->chunkNativeStart = 0;
1337         ut->chunkNativeLimit = 0;
1338     } else {
1339         ut->chunkNativeStart = length;
1340         ut->chunkNativeLimit = length;
1341     }
1342     return false;
1343 }
1344
1345 // Function table to be used with this fragmented text provider.
1346 //   Initialized in the open function.
1347 static UTextFuncs  fragmentFuncs;
1348
1349 // Clone function for fragmented text provider.
1350 //   Didn't really want to provide this, but it's easier to provide it than to keep it
1351 //   out of the tests.
1352 //
1353 UText *
1354 cloneFragmentedUnicodeString(UText *dest, const UText *src, UBool deep, UErrorCode *status) {
1355     if (U_FAILURE(*status)) {
1356         return NULL;
1357     }
1358     if (deep) {
1359         *status = U_UNSUPPORTED_ERROR;
1360         return NULL;
1361     }
1362     dest = utext_openUnicodeString(dest, (UnicodeString *)src->context, status);
1363     utext_setNativeIndex(dest, utext_getNativeIndex(src));
1364     return dest;
1365 }
1366
1367 U_CDECL_END
1368
1369 // Open function for the fragmented text provider.
1370 UText *
1371 openFragmentedUnicodeString(UText *ut, UnicodeString *s, UErrorCode *status) {
1372     ut = utext_openUnicodeString(ut, s, status);
1373     if (U_FAILURE(*status)) {
1374         return ut;
1375     }
1376
1377     // Copy of the function table from the stock UnicodeString UText,
1378     //   and replace the entry for the access function.
1379     memcpy(&fragmentFuncs, ut->pFuncs, sizeof(fragmentFuncs));
1380     fragmentFuncs.access = fragTextAccess;
1381     fragmentFuncs.clone  = cloneFragmentedUnicodeString;
1382     ut->pFuncs = &fragmentFuncs;
1383
1384     ut->chunkContents = (UChar *)&ut->b;
1385     ut->pFuncs->access(ut, 0, TRUE);
1386     return ut;
1387 }
1388
1389 // Regression test for Ticket 5560
1390 //   Clone fails to update chunkContentPointer in the cloned copy.
1391 //   This is only an issue for UText types that work in a local buffer,
1392 //      (UTF-8 wrapper, for example)
1393 //
1394 //   The test:
1395 //     1.  Create an inital UText
1396 //     2.  Deep clone it.  Contents should match original.
1397 //     3.  Reset original to something different.
1398 //     4.  Check that clone contents did not change.
1399 //
1400 void UTextTest::Ticket5560() {
1401     /* The following two strings are in UTF-8 even on EBCDIC platforms. */
1402     static const char s1[] = {0x41,0x42,0x43,0x44,0x45,0x46,0}; /* "ABCDEF" */
1403     static const char s2[] = {0x31,0x32,0x33,0x34,0x35,0x36,0}; /* "123456" */
1404         UErrorCode status = U_ZERO_ERROR;
1405
1406         UText ut1 = UTEXT_INITIALIZER;
1407         UText ut2 = UTEXT_INITIALIZER;
1408
1409         utext_openUTF8(&ut1, s1, -1, &status);
1410         UChar c = utext_next32(&ut1);
1411         TEST_ASSERT(c == 0x41);  // c == 'A'
1412
1413         utext_clone(&ut2, &ut1, TRUE, FALSE, &status);
1414         TEST_SUCCESS(status);
1415     c = utext_next32(&ut2);
1416         TEST_ASSERT(c == 0x42);  // c == 'B'
1417     c = utext_next32(&ut1);
1418         TEST_ASSERT(c == 0x42);  // c == 'B'
1419
1420         utext_openUTF8(&ut1, s2, -1, &status);
1421         c = utext_next32(&ut1);
1422         TEST_ASSERT(c == 0x31);  // c == '1'
1423     c = utext_next32(&ut2);
1424         TEST_ASSERT(c == 0x43);  // c == 'C'
1425
1426     utext_close(&ut1);
1427     utext_close(&ut2);
1428 }
1429
1430
1431 // Test for Ticket 6847
1432 //
1433 void UTextTest::Ticket6847() {
1434     const int STRLEN = 90;
1435     UChar s[STRLEN+1];
1436     u_memset(s, 0x41, STRLEN);
1437     s[STRLEN] = 0;
1438
1439     UErrorCode status = U_ZERO_ERROR;
1440     UText *ut = utext_openUChars(NULL, s, -1, &status);
1441
1442     utext_setNativeIndex(ut, 0);
1443     int32_t count = 0;
1444     UChar32 c = 0;
1445     int64_t nativeIndex = UTEXT_GETNATIVEINDEX(ut);
1446     TEST_ASSERT(nativeIndex == 0);
1447     while ((c = utext_next32(ut)) != U_SENTINEL) {
1448         TEST_ASSERT(c == 0x41);
1449         TEST_ASSERT(count < STRLEN);
1450         if (count >= STRLEN) {
1451             break;
1452         }
1453         count++;
1454         nativeIndex = UTEXT_GETNATIVEINDEX(ut);
1455         TEST_ASSERT(nativeIndex == count);
1456     }
1457     TEST_ASSERT(count == STRLEN);
1458     nativeIndex = UTEXT_GETNATIVEINDEX(ut);
1459     TEST_ASSERT(nativeIndex == STRLEN);
1460     utext_close(ut);
1461 }
1462
1463
1464 void UTextTest::Ticket10562() {
1465     // Note: failures show as a heap error when the test is run under valgrind.
1466     UErrorCode status = U_ZERO_ERROR;
1467
1468     const char *utf8_string = "\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41";
1469     UText *utf8Text = utext_openUTF8(NULL, utf8_string, -1, &status);
1470     TEST_SUCCESS(status);
1471     UText *deepClone = utext_clone(NULL, utf8Text, TRUE, FALSE, &status);
1472     TEST_SUCCESS(status);
1473     UText *shallowClone = utext_clone(NULL, deepClone, FALSE, FALSE, &status);
1474     TEST_SUCCESS(status);
1475     utext_close(shallowClone);
1476     utext_close(deepClone);
1477     utext_close(utf8Text);
1478
1479     status = U_ZERO_ERROR;
1480     UnicodeString usString("Hello, World.");
1481     UText *usText = utext_openUnicodeString(NULL, &usString, &status);
1482     TEST_SUCCESS(status);
1483     UText *usDeepClone = utext_clone(NULL, usText, TRUE, FALSE, &status);
1484     TEST_SUCCESS(status);
1485     UText *usShallowClone = utext_clone(NULL, usDeepClone, FALSE, FALSE, &status);
1486     TEST_SUCCESS(status);
1487     utext_close(usShallowClone);
1488     utext_close(usDeepClone);
1489     utext_close(usText);
1490 }
1491
1492
1493 void UTextTest::Ticket10983() {
1494     // Note: failure shows as a seg fault when the defect is present.
1495
1496     UErrorCode status = U_ZERO_ERROR;
1497     UnicodeString s("Hello, World");
1498     UText *ut = utext_openConstUnicodeString(NULL, &s, &status);
1499     TEST_SUCCESS(status);
1500
1501     status = U_INVALID_STATE_ERROR;
1502     UText *cloned = utext_clone(NULL, ut, TRUE, TRUE, &status);
1503     TEST_ASSERT(cloned == NULL);
1504     TEST_ASSERT(status == U_INVALID_STATE_ERROR);
1505
1506     utext_close(ut);
1507 }
1508
1509 // Ticket 12130 - extract on a UText wrapping a null terminated UChar * string
1510 //                leaves the iteration position set incorrectly when the
1511 //                actual string length is not yet known.
1512 //
1513 //                The test text needs to be long enough that UText defers getting the length.
1514
1515 void UTextTest::Ticket12130() {
1516     UErrorCode status = U_ZERO_ERROR;
1517
1518     const char *text8 =
1519         "Fundamentally, computers just deal with numbers. They store letters and other characters "
1520         "by assigning a number for each one. Before Unicode was invented, there were hundreds "
1521         "of different encoding systems for assigning these numbers. No single encoding could "
1522         "contain enough characters: for example, the European Union alone requires several "
1523         "different encodings to cover all its languages. Even for a single language like "
1524         "English no single encoding was adequate for all the letters, punctuation, and technical "
1525         "symbols in common use.";
1526
1527     UnicodeString str(text8);
1528     const UChar *ustr = str.getTerminatedBuffer();
1529     UText ut = UTEXT_INITIALIZER;
1530     utext_openUChars(&ut, ustr, -1, &status);
1531     UChar extractBuffer[50];
1532
1533     for (int32_t startIdx = 0; startIdx<str.length(); ++startIdx) {
1534         int32_t endIdx = startIdx + 20;
1535
1536         u_memset(extractBuffer, 0, UPRV_LENGTHOF(extractBuffer));
1537         utext_extract(&ut, startIdx, endIdx, extractBuffer, UPRV_LENGTHOF(extractBuffer), &status);
1538         if (U_FAILURE(status)) {
1539             errln("%s:%d %s", __FILE__, __LINE__, u_errorName(status));
1540             return;
1541         }
1542         int64_t ni  = utext_getNativeIndex(&ut);
1543         int64_t expectedni = startIdx + 20;
1544         if (expectedni > str.length()) {
1545             expectedni = str.length();
1546         }
1547         if (expectedni != ni) {
1548             errln("%s:%d utext_getNativeIndex() expected %d, got %d", __FILE__, __LINE__, expectedni, ni);
1549         }
1550         if (0 != str.tempSubString(startIdx, 20).compare(extractBuffer)) {
1551             errln("%s:%d utext_extract() failed. expected \"%s\", got \"%s\"",
1552                     __FILE__, __LINE__, CStr(str.tempSubString(startIdx, 20))(), CStr(UnicodeString(extractBuffer))());
1553         }
1554     }
1555     utext_close(&ut);
1556
1557     // Similar utext extract, this time with the string length provided to the UText in advance,
1558     // and a buffer of larger than required capacity.
1559
1560     utext_openUChars(&ut, ustr, str.length(), &status);
1561     for (int32_t startIdx = 0; startIdx<str.length(); ++startIdx) {
1562         int32_t endIdx = startIdx + 20;
1563         u_memset(extractBuffer, 0, UPRV_LENGTHOF(extractBuffer));
1564         utext_extract(&ut, startIdx, endIdx, extractBuffer, UPRV_LENGTHOF(extractBuffer), &status);
1565         if (U_FAILURE(status)) {
1566             errln("%s:%d %s", __FILE__, __LINE__, u_errorName(status));
1567             return;
1568         }
1569         int64_t ni  = utext_getNativeIndex(&ut);
1570         int64_t expectedni = startIdx + 20;
1571         if (expectedni > str.length()) {
1572             expectedni = str.length();
1573         }
1574         if (expectedni != ni) {
1575             errln("%s:%d utext_getNativeIndex() expected %d, got %d", __FILE__, __LINE__, expectedni, ni);
1576         }
1577         if (0 != str.tempSubString(startIdx, 20).compare(extractBuffer)) {
1578             errln("%s:%d utext_extract() failed. expected \"%s\", got \"%s\"",
1579                     __FILE__, __LINE__, CStr(str.tempSubString(startIdx, 20))(), CStr(UnicodeString(extractBuffer))());
1580         }
1581     }
1582     utext_close(&ut);
1583 }