+// Regression test for Ticket 5560
+// Clone fails to update chunkContentPointer in the cloned copy.
+// This is only an issue for UText types that work in a local buffer,
+// (UTF-8 wrapper, for example)
+//
+// The test:
+// 1. Create an inital UText
+// 2. Deep clone it. Contents should match original.
+// 3. Reset original to something different.
+// 4. Check that clone contents did not change.
+//
+void UTextTest::Ticket5560() {
+ /* The following two strings are in UTF-8 even on EBCDIC platforms. */
+ static const char s1[] = {0x41,0x42,0x43,0x44,0x45,0x46,0}; /* "ABCDEF" */
+ static const char s2[] = {0x31,0x32,0x33,0x34,0x35,0x36,0}; /* "123456" */
+ UErrorCode status = U_ZERO_ERROR;
+
+ UText ut1 = UTEXT_INITIALIZER;
+ UText ut2 = UTEXT_INITIALIZER;
+
+ utext_openUTF8(&ut1, s1, -1, &status);
+ UChar c = utext_next32(&ut1);
+ TEST_ASSERT(c == 0x41); // c == 'A'
+
+ utext_clone(&ut2, &ut1, TRUE, FALSE, &status);
+ TEST_SUCCESS(status);
+ c = utext_next32(&ut2);
+ TEST_ASSERT(c == 0x42); // c == 'B'
+ c = utext_next32(&ut1);
+ TEST_ASSERT(c == 0x42); // c == 'B'
+
+ utext_openUTF8(&ut1, s2, -1, &status);
+ c = utext_next32(&ut1);
+ TEST_ASSERT(c == 0x31); // c == '1'
+ c = utext_next32(&ut2);
+ TEST_ASSERT(c == 0x43); // c == 'C'
+
+ utext_close(&ut1);
+ utext_close(&ut2);
+}
+
+
+// Test for Ticket 6847
+//
+void UTextTest::Ticket6847() {
+ const int STRLEN = 90;
+ UChar s[STRLEN+1];
+ u_memset(s, 0x41, STRLEN);
+ s[STRLEN] = 0;
+
+ UErrorCode status = U_ZERO_ERROR;
+ UText *ut = utext_openUChars(NULL, s, -1, &status);
+
+ utext_setNativeIndex(ut, 0);
+ int32_t count = 0;
+ UChar32 c = 0;
+ int64_t nativeIndex = UTEXT_GETNATIVEINDEX(ut);
+ TEST_ASSERT(nativeIndex == 0);
+ while ((c = utext_next32(ut)) != U_SENTINEL) {
+ TEST_ASSERT(c == 0x41);
+ TEST_ASSERT(count < STRLEN);
+ if (count >= STRLEN) {
+ break;
+ }
+ count++;
+ nativeIndex = UTEXT_GETNATIVEINDEX(ut);
+ TEST_ASSERT(nativeIndex == count);
+ }
+ TEST_ASSERT(count == STRLEN);
+ nativeIndex = UTEXT_GETNATIVEINDEX(ut);
+ TEST_ASSERT(nativeIndex == STRLEN);
+ utext_close(ut);
+}
+
+
+void UTextTest::Ticket10562() {
+ // Note: failures show as a heap error when the test is run under valgrind.
+ UErrorCode status = U_ZERO_ERROR;
+
+ const char *utf8_string = "\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41";
+ UText *utf8Text = utext_openUTF8(NULL, utf8_string, -1, &status);
+ TEST_SUCCESS(status);
+ UText *deepClone = utext_clone(NULL, utf8Text, TRUE, FALSE, &status);
+ TEST_SUCCESS(status);
+ UText *shallowClone = utext_clone(NULL, deepClone, FALSE, FALSE, &status);
+ TEST_SUCCESS(status);
+ utext_close(shallowClone);
+ utext_close(deepClone);
+ utext_close(utf8Text);
+
+ status = U_ZERO_ERROR;
+ UnicodeString usString("Hello, World.");
+ UText *usText = utext_openUnicodeString(NULL, &usString, &status);
+ TEST_SUCCESS(status);
+ UText *usDeepClone = utext_clone(NULL, usText, TRUE, FALSE, &status);
+ TEST_SUCCESS(status);
+ UText *usShallowClone = utext_clone(NULL, usDeepClone, FALSE, FALSE, &status);
+ TEST_SUCCESS(status);
+ utext_close(usShallowClone);
+ utext_close(usDeepClone);
+ utext_close(usText);
+}
+
+
+void UTextTest::Ticket10983() {
+ // Note: failure shows as a seg fault when the defect is present.
+
+ UErrorCode status = U_ZERO_ERROR;
+ UnicodeString s("Hello, World");
+ UText *ut = utext_openConstUnicodeString(NULL, &s, &status);
+ TEST_SUCCESS(status);
+
+ status = U_INVALID_STATE_ERROR;
+ UText *cloned = utext_clone(NULL, ut, TRUE, TRUE, &status);
+ TEST_ASSERT(cloned == NULL);
+ TEST_ASSERT(status == U_INVALID_STATE_ERROR);
+
+ utext_close(ut);
+}
+
+// Ticket 12130 - extract on a UText wrapping a null terminated UChar * string
+// leaves the iteration position set incorrectly when the
+// actual string length is not yet known.
+//
+// The test text needs to be long enough that UText defers getting the length.
+
+void UTextTest::Ticket12130() {
+ UErrorCode status = U_ZERO_ERROR;
+
+ const char *text8 =
+ "Fundamentally, computers just deal with numbers. They store letters and other characters "
+ "by assigning a number for each one. Before Unicode was invented, there were hundreds "
+ "of different encoding systems for assigning these numbers. No single encoding could "
+ "contain enough characters: for example, the European Union alone requires several "
+ "different encodings to cover all its languages. Even for a single language like "
+ "English no single encoding was adequate for all the letters, punctuation, and technical "
+ "symbols in common use.";
+
+ UnicodeString str(text8);
+ const UChar *ustr = str.getTerminatedBuffer();
+ UText ut = UTEXT_INITIALIZER;
+ utext_openUChars(&ut, ustr, -1, &status);
+ UChar extractBuffer[50];
+
+ for (int32_t startIdx = 0; startIdx<str.length(); ++startIdx) {
+ int32_t endIdx = startIdx + 20;
+
+ u_memset(extractBuffer, 0, UPRV_LENGTHOF(extractBuffer));
+ utext_extract(&ut, startIdx, endIdx, extractBuffer, UPRV_LENGTHOF(extractBuffer), &status);
+ if (U_FAILURE(status)) {
+ errln("%s:%d %s", __FILE__, __LINE__, u_errorName(status));
+ return;
+ }
+ int64_t ni = utext_getNativeIndex(&ut);
+ int64_t expectedni = startIdx + 20;
+ if (expectedni > str.length()) {
+ expectedni = str.length();
+ }
+ if (expectedni != ni) {
+ errln("%s:%d utext_getNativeIndex() expected %d, got %d", __FILE__, __LINE__, expectedni, ni);
+ }
+ if (0 != str.tempSubString(startIdx, 20).compare(extractBuffer)) {
+ errln("%s:%d utext_extract() failed. expected \"%s\", got \"%s\"",
+ __FILE__, __LINE__, CStr(str.tempSubString(startIdx, 20))(), CStr(UnicodeString(extractBuffer))());
+ }
+ }
+ utext_close(&ut);
+
+ // Similar utext extract, this time with the string length provided to the UText in advance,
+ // and a buffer of larger than required capacity.
+
+ utext_openUChars(&ut, ustr, str.length(), &status);
+ for (int32_t startIdx = 0; startIdx<str.length(); ++startIdx) {
+ int32_t endIdx = startIdx + 20;
+ u_memset(extractBuffer, 0, UPRV_LENGTHOF(extractBuffer));
+ utext_extract(&ut, startIdx, endIdx, extractBuffer, UPRV_LENGTHOF(extractBuffer), &status);
+ if (U_FAILURE(status)) {
+ errln("%s:%d %s", __FILE__, __LINE__, u_errorName(status));
+ return;
+ }
+ int64_t ni = utext_getNativeIndex(&ut);
+ int64_t expectedni = startIdx + 20;
+ if (expectedni > str.length()) {
+ expectedni = str.length();
+ }
+ if (expectedni != ni) {
+ errln("%s:%d utext_getNativeIndex() expected %d, got %d", __FILE__, __LINE__, expectedni, ni);
+ }
+ if (0 != str.tempSubString(startIdx, 20).compare(extractBuffer)) {
+ errln("%s:%d utext_extract() failed. expected \"%s\", got \"%s\"",
+ __FILE__, __LINE__, CStr(str.tempSubString(startIdx, 20))(), CStr(UnicodeString(extractBuffer))());
+ }
+ }
+ utext_close(&ut);
+}
+
+// Ticket 13344 The macro form of UTEXT_SETNATIVEINDEX failed when target was a trail surrogate
+// of a supplementary character.
+
+void UTextTest::Ticket13344() {
+ UErrorCode status = U_ZERO_ERROR;
+ const char16_t *str = u"abc\U0010abcd xyz";
+ LocalUTextPointer ut(utext_openUChars(NULL, str, -1, &status));
+
+ assertSuccess("UTextTest::Ticket13344-status", status);
+ UTEXT_SETNATIVEINDEX(ut.getAlias(), 3);
+ assertEquals("UTextTest::Ticket13344-lead", (int64_t)3, utext_getNativeIndex(ut.getAlias()));
+ UTEXT_SETNATIVEINDEX(ut.getAlias(), 4);
+ assertEquals("UTextTest::Ticket13344-trail", (int64_t)3, utext_getNativeIndex(ut.getAlias()));
+ UTEXT_SETNATIVEINDEX(ut.getAlias(), 5);
+ assertEquals("UTextTest::Ticket13344-bmp", (int64_t)5, utext_getNativeIndex(ut.getAlias()));
+
+ utext_setNativeIndex(ut.getAlias(), 3);
+ assertEquals("UTextTest::Ticket13344-lead-2", (int64_t)3, utext_getNativeIndex(ut.getAlias()));
+ utext_setNativeIndex(ut.getAlias(), 4);
+ assertEquals("UTextTest::Ticket13344-trail-2", (int64_t)3, utext_getNativeIndex(ut.getAlias()));
+ utext_setNativeIndex(ut.getAlias(), 5);
+ assertEquals("UTextTest::Ticket13344-bmp-2", (int64_t)5, utext_getNativeIndex(ut.getAlias()));
+}