/********************************************************************
- * COPYRIGHT:
- * Copyright (c) 2005-2009, International Business Machines Corporation and
+ * COPYRIGHT:
+ * Copyright (c) 2005-2016, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/************************************************************************
*
************************************************************************/
-#include "unicode/utypes.h"
-
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
-#include <unicode/utext.h>
-#include <unicode/utf8.h>
-#include <unicode/ustring.h>
-#include <unicode/uchriter.h>
+#include "unicode/utypes.h"
+#include "unicode/utext.h"
+#include "unicode/utf8.h"
+#include "unicode/ustring.h"
+#include "unicode/uchriter.h"
+#include "cmemory.h"
+#include "cstr.h"
#include "utxttest.h"
static UBool gFailed = FALSE;
if (exec) Ticket5560(); break;
case 4: name = "Ticket6847";
if (exec) Ticket6847(); break;
+ case 5: name = "Ticket10562";
+ if (exec) Ticket10562(); break;
+ case 6: name = "Ticket10983";
+ if (exec) Ticket10983(); break;
+ case 7: name = "Ticket12130";
+ if (exec) Ticket12130(); break;
default: name = ""; break;
}
}
for (i=0; i<1000; i++) {
int len8 = m_rand()%4 + 1;
switch (len8) {
- case 1:
+ case 1:
c1 = (c1+1)%0x80;
// don't put 0 into string (0 terminated strings for some tests)
// don't put '\', will cause unescape() to fail.
j++;
cpCount++;
}
- cpMap[j].nativeIdx = i; // position following the last char in utf-16 string.
+ cpMap[j].nativeIdx = i; // position following the last char in utf-16 string.
// UChar * test, null terminated
TestAccess(sa, ut, cpCount, cpMap);
utext_close(ut);
delete ci;
-
+
// Fragmented UnicodeString (Chunk size of one)
//
- delete []cpMap;
- delete []u8Map;
- delete []u8String;
+ delete []cpMap;
+ delete []u8Map;
+ delete []u8String;
}
// TestCMR test Copy, Move and Replace operations.
//
// This function runs a whole series of opertions on each incoming UText.
// The UText is deep-cloned prior to each operation, so that the original UText remains unchanged.
-//
+//
void UTextTest::TestCMR(const UnicodeString &us, UText *ut, int cpCount, m *nativeMap, m *u16Map) {
TEST_ASSERT(utext_isWritable(ut) == TRUE);
int srcLengthType; // Loop variables for selecting the postion and length
int srcPosType; // of the block to operate on within the source text.
- int destPosType;
+ int destPosType;
int srcIndex = 0; // Code Point indexes of the block to operate on for
int srcLength = 0; // a specific test.
case 5: srcIndex = cpCount / 2; break;
}
if (srcIndex < 0 || srcIndex + srcLength > cpCount) {
- // filter out bogus test cases -
+ // filter out bogus test cases -
// those with a source range that falls of an edge of the string.
continue;
}
//
void UTextTest::TestCopyMove(const UnicodeString &us, UText *ut, UBool move,
int32_t nativeStart, int32_t nativeLimit, int32_t nativeDest,
- int32_t u16Start, int32_t u16Limit, int32_t u16Dest)
+ int32_t u16Start, int32_t u16Limit, int32_t u16Dest)
{
UErrorCode status = U_ZERO_ERROR;
UText *targetUT = NULL;
// Compare the results of the two parallel tests
int32_t usi = 0; // UnicodeString postion, utf-16 index.
int64_t uti = 0; // UText position, native index.
- int32_t cpi; // char32 position (code point index)
+ int32_t cpi; // char32 position (code point index)
UChar32 usc; // code point from Unicode String
UChar32 utc; // code point from UText
utext_setNativeIndex(targetUT, 0);
cleanupAndReturn:
utext_close(targetUT);
}
-
+
//
// TestReplace Test a single Replace operation.
//
void UTextTest::TestReplace(
- const UnicodeString &us, // reference UnicodeString in which to do the replace
+ const UnicodeString &us, // reference UnicodeString in which to do the replace
UText *ut, // UnicodeText object under test.
- int32_t nativeStart, // Range to be replaced, in UText native units.
+ int32_t nativeStart, // Range to be replaced, in UText native units.
int32_t nativeLimit,
int32_t u16Start, // Range to be replaced, in UTF-16 units
int32_t u16Limit, // for use in the reference UnicodeString.
UnicodeString targetUS(us); // And copy the reference string.
//
- // Do the replace operation in the Unicode String, to
+ // Do the replace operation in the Unicode String, to
// produce a reference result.
//
targetUS.replace(u16Start, u16Limit-u16Start, repStr);
//
int32_t usi = 0; // UnicodeString postion, utf-16 index.
int64_t uti = 0; // UText position, native index.
- int32_t cpi; // char32 position (code point index)
+ int32_t cpi; // char32 position (code point index)
UChar32 usc; // code point from Unicode String
UChar32 utc; // code point from UText
int64_t expectedNativeLength = 0;
}
utext_close(deepClone);
}
-
+
//
// TestAccessNoClone() Test the read only access functions on a UText.
foundIndex = utext_getNativeIndex(ut);
TEST_ASSERT(expectedIndex == foundIndex);
expectedC = cpMap[i].cp;
- foundC = utext_next32(ut);
+ foundC = utext_next32(ut);
TEST_ASSERT(expectedC == foundC);
foundIndex = utext_getPreviousNativeIndex(ut);
TEST_ASSERT(expectedIndex == foundIndex);
}
foundC = utext_next32(ut);
TEST_ASSERT(foundC == U_SENTINEL);
-
+
// Repeat above, using macros
utext_setNativeIndex(ut, 0);
for (i=0; i<cpCount; i++) {
foundIndex = UTEXT_GETNATIVEINDEX(ut);
TEST_ASSERT(expectedIndex == foundIndex);
expectedC = cpMap[i].cp;
- foundC = UTEXT_NEXT32(ut);
+ foundC = UTEXT_NEXT32(ut);
TEST_ASSERT(expectedC == foundC);
if (gFailed) {
return;
// or whether the lead surrogate of the pair is extracted.
// It's a buffer overflow error in either case.
TEST_ASSERT(buf[0] == us.charAt(0) ||
- buf[0] == 0x5555 && U_IS_SUPPLEMENTARY(us.char32At(0)));
+ (buf[0] == 0x5555 && U_IS_SUPPLEMENTARY(us.char32At(0))));
TEST_ASSERT(buf[1] == 0x5555);
if (us.length() == 1) {
TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
delete []buf;
}
-
-
//
// ErrorTest() Check various error and edge cases.
//
-void UTextTest::ErrorTest()
+void UTextTest::ErrorTest()
{
// Close of an unitialized UText. Shouldn't blow up.
{
- UText ut;
+ UText ut;
memset(&ut, 0, sizeof(UText));
utext_close(&ut);
utext_close(NULL);
TEST_ASSERT(utp == &ut);
}
+ // Invalid parameters on open
+ //
+ {
+ UErrorCode status = U_ZERO_ERROR;
+ UText ut = UTEXT_INITIALIZER;
+
+ utext_openUChars(&ut, NULL, 5, &status);
+ TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
+
+ status = U_ZERO_ERROR;
+ utext_openUChars(&ut, NULL, -1, &status);
+ TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
+
+ status = U_ZERO_ERROR;
+ utext_openUTF8(&ut, NULL, 4, &status);
+ TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
+
+ status = U_ZERO_ERROR;
+ utext_openUTF8(&ut, NULL, -1, &status);
+ TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
+ }
+
//
// UTF-8 with malformed sequences.
// These should come through as the Unicode replacement char, \ufffd
{
UErrorCode status = U_ZERO_ERROR;
UText *ut = NULL;
- const char *badUTF8 = "\x41\x81\x42\xf0\x81\x81\x43";
+ const char *badUTF8 = "\x41\x81\x42\xf0\x81\x81\x43";
UChar32 c;
ut = utext_openUTF8(NULL, badUTF8, -1, &status);
int32_t startMap[] = { 0, 0, 2, 2, 2, 5, 5, 5, 5, 9, 9};
int32_t nextMap[] = { 2, 2, 5, 5, 5, 9, 9, 9, 9, 9, 9};
int32_t prevMap[] = { 0, 0, 0, 0, 0, 2, 2, 2, 2, 5, 5};
- UChar32 c32Map[] = {0x201, 0x201, 0x1083, 0x1083, 0x1083, 0x044146, 0x044146, 0x044146, 0x044146, -1, -1};
- UChar32 pr32Map[] = { -1, -1, 0x201, 0x201, 0x201, 0x1083, 0x1083, 0x1083, 0x1083, 0x044146, 0x044146};
+ UChar32 c32Map[] = {0x201, 0x201, 0x1083, 0x1083, 0x1083, 0x044146, 0x044146, 0x044146, 0x044146, -1, -1};
+ UChar32 pr32Map[] = { -1, -1, 0x201, 0x201, 0x201, 0x1083, 0x1083, 0x1083, 0x1083, 0x044146, 0x044146};
// extractLen is the size, in UChars, of what will be extracted between index and index+1.
// is zero when both index positions lie within the same code point.
// Check setIndex
int32_t i;
- int32_t startMapLimit = sizeof(startMap) / sizeof(int32_t);
+ int32_t startMapLimit = UPRV_LENGTHOF(startMap);
for (i=0; i<startMapLimit; i++) {
utext_setNativeIndex(ut, i);
int64_t cpIndex = utext_getNativeIndex(ut);
int64_t cpIndex = utext_getNativeIndex(ut);
TEST_ASSERT(cpIndex == nextMap[i]);
}
-
+
// check utext_previous32From
for (i=0; i<startMapLimit; i++) {
gTestNum++;
{ // Similar test, with utf16 instead of utf8
// TODO: merge the common parts of these tests.
-
+
UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000", -1, US_INV);
int32_t startMap[] ={ 0, 1, 1, 3, 4, 4, 6, 6};
int32_t nextMap[] = { 1, 3, 3, 4, 6, 6, 6, 6};
int32_t prevMap[] = { 0, 0, 0, 1, 3, 3, 4, 4};
- UChar32 c32Map[] = {0x1000, 0x11000, 0x11000, 0x2000, 0x22000, 0x22000, -1, -1};
- UChar32 pr32Map[] = { -1, 0x1000, 0x1000, 0x11000, 0x2000, 0x2000, 0x22000, 0x22000};
+ UChar32 c32Map[] = {0x1000, 0x11000, 0x11000, 0x2000, 0x22000, 0x22000, -1, -1};
+ UChar32 pr32Map[] = { -1, 0x1000, 0x1000, 0x11000, 0x2000, 0x2000, 0x22000, 0x22000};
int32_t exLen[] = { 1, 0, 2, 1, 0, 2, 0, 0,};
u16str = u16str.unescape();
UText *ut = utext_openUnicodeString(NULL, &u16str, &status);
TEST_SUCCESS(status);
- int32_t startMapLimit = sizeof(startMap) / sizeof(int32_t);
+ int32_t startMapLimit = UPRV_LENGTHOF(startMap);
int i;
for (i=0; i<startMapLimit; i++) {
utext_setNativeIndex(ut, i);
int64_t cpIndex = utext_getNativeIndex(ut);
TEST_ASSERT(cpIndex == nextMap[i]);
}
-
+
// check utext_previous32From
for (i=0; i<startMapLimit; i++) {
UChar32 c32 = utext_previous32From(ut, i);
{ // Similar test, with UText over Replaceable
// TODO: merge the common parts of these tests.
-
+
UnicodeString u16str("\\u1000\\U00011000\\u2000\\U00022000", -1, US_INV);
int32_t startMap[] ={ 0, 1, 1, 3, 4, 4, 6, 6};
int32_t nextMap[] = { 1, 3, 3, 4, 6, 6, 6, 6};
int32_t prevMap[] = { 0, 0, 0, 1, 3, 3, 4, 4};
- UChar32 c32Map[] = {0x1000, 0x11000, 0x11000, 0x2000, 0x22000, 0x22000, -1, -1};
- UChar32 pr32Map[] = { -1, 0x1000, 0x1000, 0x11000, 0x2000, 0x2000, 0x22000, 0x22000};
+ UChar32 c32Map[] = {0x1000, 0x11000, 0x11000, 0x2000, 0x22000, 0x22000, -1, -1};
+ UChar32 pr32Map[] = { -1, 0x1000, 0x1000, 0x11000, 0x2000, 0x2000, 0x22000, 0x22000};
int32_t exLen[] = { 1, 0, 2, 1, 0, 2, 0, 0,};
u16str = u16str.unescape();
UText *ut = utext_openReplaceable(NULL, &u16str, &status);
TEST_SUCCESS(status);
- int32_t startMapLimit = sizeof(startMap) / sizeof(int32_t);
+ int32_t startMapLimit = UPRV_LENGTHOF(startMap);
int i;
for (i=0; i<startMapLimit; i++) {
utext_setNativeIndex(ut, i);
int64_t cpIndex = utext_getNativeIndex(ut);
TEST_ASSERT(cpIndex == nextMap[i]);
}
-
+
// check utext_previous32From
for (i=0; i<startMapLimit; i++) {
UChar32 c32 = utext_previous32From(ut, i);
//
UnicodeString ustr("Hello, World.");
- const char u8str[] = {char(0x31), (char)0x32, (char)0x33, 0};
+ const char u8str[] = {char(0x31), (char)0x32, (char)0x33, 0};
const UChar u16str[] = {(UChar)0x31, (UChar)0x32, (UChar)0x44, 0};
UErrorCode status = U_ZERO_ERROR;
TEST_ASSERT(writable == FALSE);
utext_copy(ut, 1, 2, 0, TRUE, &status);
TEST_ASSERT(status == U_NO_WRITE_PERMISSION);
-
+
status = U_ZERO_ERROR;
ut = utext_openUnicodeString(ut, &ustr, &status);
TEST_SUCCESS(status);
ut->chunkNativeStart = index-1;
ut->chunkNativeLimit = index;
return true;
- }
+ }
ut->b = 0;
ut->chunkOffset = 0;
ut->chunkLength = 0;
// 1. Create an inital UText
// 2. Deep clone it. Contents should match original.
// 3. Reset original to something different.
-// 4. Check that clone contents did not change.
+// 4. Check that clone contents did not change.
//
void UTextTest::Ticket5560() {
/* The following two strings are in UTF-8 even on EBCDIC platforms. */
utext_setNativeIndex(ut, 0);
int32_t count = 0;
UChar32 c = 0;
- int32_t nativeIndex = UTEXT_GETNATIVEINDEX(ut);
+ int64_t nativeIndex = UTEXT_GETNATIVEINDEX(ut);
TEST_ASSERT(nativeIndex == 0);
while ((c = utext_next32(ut)) != U_SENTINEL) {
TEST_ASSERT(c == 0x41);
utext_close(ut);
}
+
+void UTextTest::Ticket10562() {
+ // Note: failures show as a heap error when the test is run under valgrind.
+ UErrorCode status = U_ZERO_ERROR;
+
+ const char *utf8_string = "\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41";
+ UText *utf8Text = utext_openUTF8(NULL, utf8_string, -1, &status);
+ TEST_SUCCESS(status);
+ UText *deepClone = utext_clone(NULL, utf8Text, TRUE, FALSE, &status);
+ TEST_SUCCESS(status);
+ UText *shallowClone = utext_clone(NULL, deepClone, FALSE, FALSE, &status);
+ TEST_SUCCESS(status);
+ utext_close(shallowClone);
+ utext_close(deepClone);
+ utext_close(utf8Text);
+
+ status = U_ZERO_ERROR;
+ UnicodeString usString("Hello, World.");
+ UText *usText = utext_openUnicodeString(NULL, &usString, &status);
+ TEST_SUCCESS(status);
+ UText *usDeepClone = utext_clone(NULL, usText, TRUE, FALSE, &status);
+ TEST_SUCCESS(status);
+ UText *usShallowClone = utext_clone(NULL, usDeepClone, FALSE, FALSE, &status);
+ TEST_SUCCESS(status);
+ utext_close(usShallowClone);
+ utext_close(usDeepClone);
+ utext_close(usText);
+}
+
+
+void UTextTest::Ticket10983() {
+ // Note: failure shows as a seg fault when the defect is present.
+
+ UErrorCode status = U_ZERO_ERROR;
+ UnicodeString s("Hello, World");
+ UText *ut = utext_openConstUnicodeString(NULL, &s, &status);
+ TEST_SUCCESS(status);
+
+ status = U_INVALID_STATE_ERROR;
+ UText *cloned = utext_clone(NULL, ut, TRUE, TRUE, &status);
+ TEST_ASSERT(cloned == NULL);
+ TEST_ASSERT(status == U_INVALID_STATE_ERROR);
+
+ utext_close(ut);
+}
+
+// Ticket 12130 - extract on a UText wrapping a null terminated UChar * string
+// leaves the iteration position set incorrectly when the
+// actual string length is not yet known.
+//
+// The test text needs to be long enough that UText defers getting the length.
+
+void UTextTest::Ticket12130() {
+ UErrorCode status = U_ZERO_ERROR;
+
+ const char *text8 =
+ "Fundamentally, computers just deal with numbers. They store letters and other characters "
+ "by assigning a number for each one. Before Unicode was invented, there were hundreds "
+ "of different encoding systems for assigning these numbers. No single encoding could "
+ "contain enough characters: for example, the European Union alone requires several "
+ "different encodings to cover all its languages. Even for a single language like "
+ "English no single encoding was adequate for all the letters, punctuation, and technical "
+ "symbols in common use.";
+
+ UnicodeString str(text8);
+ const UChar *ustr = str.getTerminatedBuffer();
+ UText ut = UTEXT_INITIALIZER;
+ utext_openUChars(&ut, ustr, -1, &status);
+ UChar extractBuffer[50];
+
+ for (int32_t startIdx = 0; startIdx<str.length(); ++startIdx) {
+ int32_t endIdx = startIdx + 20;
+
+ u_memset(extractBuffer, 0, UPRV_LENGTHOF(extractBuffer));
+ utext_extract(&ut, startIdx, endIdx, extractBuffer, UPRV_LENGTHOF(extractBuffer), &status);
+ if (U_FAILURE(status)) {
+ errln("%s:%d %s", __FILE__, __LINE__, u_errorName(status));
+ return;
+ }
+ int64_t ni = utext_getNativeIndex(&ut);
+ int64_t expectedni = startIdx + 20;
+ if (expectedni > str.length()) {
+ expectedni = str.length();
+ }
+ if (expectedni != ni) {
+ errln("%s:%d utext_getNativeIndex() expected %d, got %d", __FILE__, __LINE__, expectedni, ni);
+ }
+ if (0 != str.tempSubString(startIdx, 20).compare(extractBuffer)) {
+ errln("%s:%d utext_extract() failed. expected \"%s\", got \"%s\"",
+ __FILE__, __LINE__, CStr(str.tempSubString(startIdx, 20))(), CStr(UnicodeString(extractBuffer))());
+ }
+ }
+ utext_close(&ut);
+
+ // Similar utext extract, this time with the string length provided to the UText in advance,
+ // and a buffer of larger than required capacity.
+
+ utext_openUChars(&ut, ustr, str.length(), &status);
+ for (int32_t startIdx = 0; startIdx<str.length(); ++startIdx) {
+ int32_t endIdx = startIdx + 20;
+ u_memset(extractBuffer, 0, UPRV_LENGTHOF(extractBuffer));
+ utext_extract(&ut, startIdx, endIdx, extractBuffer, UPRV_LENGTHOF(extractBuffer), &status);
+ if (U_FAILURE(status)) {
+ errln("%s:%d %s", __FILE__, __LINE__, u_errorName(status));
+ return;
+ }
+ int64_t ni = utext_getNativeIndex(&ut);
+ int64_t expectedni = startIdx + 20;
+ if (expectedni > str.length()) {
+ expectedni = str.length();
+ }
+ if (expectedni != ni) {
+ errln("%s:%d utext_getNativeIndex() expected %d, got %d", __FILE__, __LINE__, expectedni, ni);
+ }
+ if (0 != str.tempSubString(startIdx, 20).compare(extractBuffer)) {
+ errln("%s:%d utext_extract() failed. expected \"%s\", got \"%s\"",
+ __FILE__, __LINE__, CStr(str.tempSubString(startIdx, 20))(), CStr(UnicodeString(extractBuffer))());
+ }
+ }
+ utext_close(&ut);
+}