+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
-* Copyright (C) 2003-2010, International Business Machines
+* Copyright (C) 2003-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: convtest.cpp
-* encoding: US-ASCII
+* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
#include "unicode/uniset.h"
#include "unicode/ustring.h"
#include "unicode/ures.h"
+#include "unicode/utf16.h"
#include "convtest.h"
+#include "cmemory.h"
#include "unicode/tstdtmod.h"
#include <string.h>
#include <stdlib.h>
-#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
-
enum {
// characters used in test data for callbacks
SUB_CB='?',
void
ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
if (exec) logln("TestSuite ConversionTest: ");
- switch (index) {
+ TESTCASE_AUTO_BEGIN;
#if !UCONFIG_NO_FILE_IO
- case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break;
- case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break;
- case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break;
-#else
- case 0:
- case 1:
- case 2: name="skip"; break;
+ TESTCASE_AUTO(TestToUnicode);
+ TESTCASE_AUTO(TestFromUnicode);
+ TESTCASE_AUTO(TestGetUnicodeSet);
#endif
- case 3: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break;
- default: name=""; break; //needed to end loop
- }
+ TESTCASE_AUTO(TestGetUnicodeSet2);
+ TESTCASE_AUTO(TestDefaultIgnorableCallback);
+ TESTCASE_AUTO(TestUTF8ToUTF8Overflow);
+ TESTCASE_AUTO_END;
}
// test data interface ----------------------------------------------------- ***
// read a substitution string, separated by an equal sign
p=s.getBuffer()+index+1;
length=s.length()-(index+1);
- if(length<0 || length>=LENGTHOF(cc.subString)) {
+ if(length<0 || length>=UPRV_LENGTHOF(cc.subString)) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
} else {
u_memcpy(cc.subString, p, length);
if(!diffSet.isEmpty()) {
diffSet.toPattern(s, TRUE);
if(s.length()>100) {
- s.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
+ s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
}
errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - conversion/getUnicodeSet test case %d",
charset, i);
if(!diffSet.isEmpty()) {
diffSet.toPattern(s, TRUE);
if(s.length()>100) {
- s.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
+ s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
}
errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - conversion/getUnicodeSet test case %d",
charset, i);
LocalUConverterPointer cnv;
char buffer[1024];
int32_t i;
- for(i=0; i<LENGTHOF(cnvNames); ++i) {
+ for(i=0; i<UPRV_LENGTHOF(cnvNames); ++i) {
UErrorCode errorCode=U_ZERO_ERROR;
cnv.adoptInstead(cnv_open(cnvNames[i], errorCode));
if(U_FAILURE(errorCode)) {
// First try to see if we have different sets because ucnv_getUnicodeSet()
// added strings: The above conversion method does not tell us what strings might be convertible.
// Remove strings from the set and compare again.
- // Unfortunately, there are no good, direct set methods for finding out whether there are strings
- // in the set, nor for enumerating or removing just them.
- // Intersect all code points with the set. The intersection will not contain strings.
- UnicodeSet temp(0, 0x10ffff);
- temp.retainAll(set);
- set=temp;
+ set.removeAllStrings();
}
if(set!=expected) {
UnicodeSet diffSet;
if(!diffSet.isEmpty()) {
diffSet.toPattern(out, TRUE);
if(out.length()>100) {
- out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
+ out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
}
errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - which set: %d",
cnvNames[i], which);
if(!diffSet.isEmpty()) {
diffSet.toPattern(out, TRUE);
if(out.length()>100) {
- out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
+ out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
}
errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - which set: %d",
cnvNames[i], which);
delete [] s0;
}
+// Test all codepoints which has the default ignorable Unicode property are ignored if they have no mapping
+// If there are any failures, the hard coded list (IS_DEFAULT_IGNORABLE_CODE_POINT) in ucnv_err.c should be updated
+void
+ConversionTest::TestDefaultIgnorableCallback() {
+ UErrorCode status = U_ZERO_ERROR;
+ const char *cnv_name = "euc-jp-2007";
+ const char *pattern_ignorable = "[:Default_Ignorable_Code_Point:]";
+ const char *pattern_not_ignorable = "[:^Default_Ignorable_Code_Point:]";
+
+ UnicodeSet *set_ignorable = new UnicodeSet(pattern_ignorable, status);
+ if (U_FAILURE(status)) {
+ dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_ignorable, u_errorName(status));
+ return;
+ }
+
+ UnicodeSet *set_not_ignorable = new UnicodeSet(pattern_not_ignorable, status);
+ if (U_FAILURE(status)) {
+ dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_not_ignorable, u_errorName(status));
+ return;
+ }
+
+ UConverter *cnv = cnv_open(cnv_name, status);
+ if (U_FAILURE(status)) {
+ dataerrln("Unable to open converter: %s - %s\n", cnv_name, u_errorName(status));
+ return;
+ }
+
+ // set callback for the converter
+ ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &status);
+
+ UChar32 input[1];
+ char output[10];
+ int32_t outputLength;
+
+ // test default ignorables are ignored
+ int size = set_ignorable->size();
+ for (int i = 0; i < size; i++) {
+ status = U_ZERO_ERROR;
+ outputLength= 0;
+
+ input[0] = set_ignorable->charAt(i);
+
+ outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
+ if (U_FAILURE(status) || outputLength != 0) {
+ errln("Ignorable code point: U+%04X not skipped as expected - %s", input[0], u_errorName(status));
+ }
+ }
+
+ // test non-ignorables are not ignored
+ size = set_not_ignorable->size();
+ for (int i = 0; i < size; i++) {
+ status = U_ZERO_ERROR;
+ outputLength= 0;
+
+ input[0] = set_not_ignorable->charAt(i);
+
+ if (input[0] == 0) {
+ continue;
+ }
+
+ outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
+ if (U_FAILURE(status) || outputLength <= 0) {
+ errln("Non-ignorable code point: U+%04X skipped unexpectedly - %s", input[0], u_errorName(status));
+ }
+ }
+
+ ucnv_close(cnv);
+ delete set_not_ignorable;
+ delete set_ignorable;
+}
+
+void
+ConversionTest::TestUTF8ToUTF8Overflow() {
+ IcuTestErrorCode errorCode(*this, "TestUTF8ToUTF8Overflow");
+ LocalUConverterPointer cnv1(ucnv_open("UTF-8", errorCode));
+ LocalUConverterPointer cnv2(ucnv_open("UTF-8", errorCode));
+ static const char *text = "aä"; // ä: 2 bytes
+ const char *source = text;
+ const char *sourceLimit = text + strlen(text);
+ char result[20];
+ char *target = result;
+ const char *targetLimit = result + sizeof(result);
+ UChar buffer16[20];
+ UChar *pivotSource = buffer16;
+ UChar *pivotTarget = buffer16;
+ const UChar *pivotLimit = buffer16 + UPRV_LENGTHOF(buffer16);
+ int32_t length;
+
+ // Convert with insufficient target capacity.
+ result[2] = 5;
+ ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
+ &target, result + 2, &source, sourceLimit,
+ buffer16, &pivotSource, &pivotTarget, pivotLimit,
+ FALSE, FALSE, errorCode);
+ assertEquals("overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
+ length = (int32_t)(target - result);
+ assertEquals("number of bytes written", 2, length);
+ assertEquals("next byte not clobbered", 5, result[2]);
+
+ // Convert the rest and flush.
+ ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
+ &target, targetLimit, &source, sourceLimit,
+ buffer16, &pivotSource, &pivotTarget, pivotLimit,
+ FALSE, TRUE, errorCode);
+
+ assertSuccess("UTF-8->UTF-8", errorCode);
+ length = (int32_t)(target - result);
+ assertEquals("3 bytes", 3, length);
+ if (length == 3) {
+ assertTrue("result same as input", memcmp(text, result, length) == 0);
+ }
+
+ ucnv_reset(cnv1.getAlias());
+ ucnv_reset(cnv2.getAlias());
+ memset(result, 0, sizeof(result));
+ static const char *text2 = "a🚲"; // U+1F6B2 bicycle: 4 bytes
+ source = text2;
+ sourceLimit = text2 + strlen(text2);
+ target = result;
+ pivotSource = pivotTarget = buffer16;
+
+ // Convert with insufficient target capacity.
+ result[3] = 5;
+ ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
+ &target, result + 3, &source, sourceLimit,
+ buffer16, &pivotSource, &pivotTarget, pivotLimit,
+ FALSE, FALSE, errorCode);
+ assertEquals("text2 overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
+ length = (int32_t)(target - result);
+ assertEquals("text2 number of bytes written", 3, length);
+ assertEquals("text2 next byte not clobbered", 5, result[3]);
+
+ // Convert the rest and flush.
+ ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
+ &target, targetLimit, &source, sourceLimit,
+ buffer16, &pivotSource, &pivotTarget, pivotLimit,
+ FALSE, TRUE, errorCode);
+
+ assertSuccess("text2 UTF-8->UTF-8", errorCode);
+ length = (int32_t)(target - result);
+ assertEquals("text2 5 bytes", 5, length);
+ if (length == 5) {
+ assertTrue("text2 result same as input", memcmp(text2, result, length) == 0);
+ }
+
+ ucnv_reset(cnv1.getAlias());
+ ucnv_reset(cnv2.getAlias());
+ memset(result, 0, sizeof(result));
+ static const char *illFormed = "\xf1\x91\x93\x96\x91\x94"; // U+514D6 + two more trail bytes
+ source = illFormed;
+ sourceLimit = illFormed + strlen(illFormed);
+ target = result;
+ pivotSource = pivotTarget = buffer16;
+
+ ucnv_setToUCallBack(cnv1.getAlias(), UCNV_TO_U_CALLBACK_STOP, nullptr, nullptr, nullptr, errorCode);
+
+ // Convert only two bytes and flush (but expect failure).
+ char errorBytes[10];
+ int8_t errorLength;
+ result[0] = 5;
+ ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
+ &target, targetLimit, &source, source + 2,
+ buffer16, &pivotSource, &pivotTarget, pivotLimit,
+ FALSE, TRUE, errorCode);
+ assertEquals("illFormed truncated", U_TRUNCATED_CHAR_FOUND, errorCode.reset());
+ length = (int32_t)(target - result);
+ assertEquals("illFormed number of bytes written", 0, length);
+ errorLength = UPRV_LENGTHOF(errorBytes);
+ ucnv_getInvalidChars(cnv1.getAlias(), errorBytes, &errorLength, errorCode);
+ assertEquals("illFormed truncated errorLength", 2, (int32_t)errorLength);
+ if (errorLength == 2) {
+ assertEquals("illFormed truncated errorBytes", 0xf191,
+ ((int32_t)(uint8_t)errorBytes[0] << 8) | (uint8_t)errorBytes[1]);
+ }
+
+ // Continue conversion starting with a trail byte.
+ ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
+ &target, targetLimit, &source, sourceLimit,
+ buffer16, &pivotSource, &pivotTarget, pivotLimit,
+ FALSE, TRUE, errorCode);
+
+ assertEquals("illFormed trail byte", U_ILLEGAL_CHAR_FOUND, errorCode.reset());
+ length = (int32_t)(target - result);
+ assertEquals("illFormed trail byte number of bytes written", 0, length);
+ errorLength = UPRV_LENGTHOF(errorBytes);
+ ucnv_getInvalidChars(cnv1.getAlias(), errorBytes, &errorLength, errorCode);
+ assertEquals("illFormed trail byte errorLength", 1, (int32_t)errorLength);
+ if (errorLength == 1) {
+ assertEquals("illFormed trail byte errorBytes", 0x93, (int32_t)(uint8_t)errorBytes[0]);
+ }
+}
+
// open testdata or ICU data converter ------------------------------------- ***
UConverter *
ConversionTest::cnv_open(const char *name, UErrorCode &errorCode) {
+ if(name!=NULL && *name=='+') {
+ // Converter names that start with '+' are ignored in ICU4J tests.
+ ++name;
+ }
if(name!=NULL && *name=='*') {
/* loadTestData(): set the data directory */
return ucnv_openPackage(loadTestData(errorCode), name+1, &errorCode);
- } else if(name!=NULL && *name=='+') {
- return ucnv_open((name+1), &errorCode);
} else {
return ucnv_open(name, &errorCode);
}
// open the converter
IcuTestErrorCode errorCode(*this, "ToUnicodeCase");
LocalUConverterPointer cnv(cnv_open(cc.charset, errorCode));
+ // with no data, the above crashes with "pointer being freed was not allocated" for charset "x11-compound-text", see #13078
if(errorCode.isFailure()) {
errcheckln(errorCode, "toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, errorCode.errorName());
int32_t i, step;
ok=TRUE;
- for(i=0; i<LENGTHOF(steps) && ok; ++i) {
+ for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) {
step=steps[i].step;
if(step<0 && !cc.finalFlush) {
// skip ucnv_getNextUChar() if !finalFlush because
cc.offsets=NULL;
}
else {
- memset(resultOffsets, -1, LENGTHOF(resultOffsets));
+ memset(resultOffsets, -1, UPRV_LENGTHOF(resultOffsets));
}
- memset(result, -1, LENGTHOF(result));
+ memset(result, -1, UPRV_LENGTHOF(result));
errorCode.reset();
resultLength=stepToUnicode(cc, cnv.getAlias(),
- result, LENGTHOF(result),
+ result, UPRV_LENGTHOF(result),
step==0 ? resultOffsets : NULL,
step, errorCode);
ok=checkToUnicode(
errorCode.reset();
resultLength=ucnv_toUChars(cnv.getAlias(),
- result, LENGTHOF(result),
+ result, UPRV_LENGTHOF(result),
(const char *)cc.bytes, cc.bytesLength,
errorCode);
ok=checkToUnicode(
targetLimit=resultLimit;
flush=cc.finalFlush;
- pivotLimit=pivotBuffer+LENGTHOF(pivotBuffer);
+ pivotLimit=pivotBuffer+UPRV_LENGTHOF(pivotBuffer);
} else {
// start with empty partial buffers
sourceLimit=source;
// convert unicode to utf8
char utf8[256];
cc.utf8=utf8;
- u_strToUTF8(utf8, LENGTHOF(utf8), &cc.utf8Length,
+ u_strToUTF8(utf8, UPRV_LENGTHOF(utf8), &cc.utf8Length,
cc.unicode, cc.unicodeLength,
&errorCode);
if(U_FAILURE(errorCode)) {
int32_t i, step;
ok=TRUE;
- for(i=0; i<LENGTHOF(steps) && ok; ++i) {
+ for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) {
step=steps[i].step;
- memset(resultOffsets, -1, LENGTHOF(resultOffsets));
- memset(result, -1, LENGTHOF(result));
+ memset(resultOffsets, -1, UPRV_LENGTHOF(resultOffsets));
+ memset(result, -1, UPRV_LENGTHOF(result));
errorCode=U_ZERO_ERROR;
resultLength=stepFromUnicode(cc, cnv,
- result, LENGTHOF(result),
+ result, UPRV_LENGTHOF(result),
step==0 ? resultOffsets : NULL,
step, &errorCode);
ok=checkFromUnicode(
if(cc.utf8Length>=0) {
errorCode=U_ZERO_ERROR;
resultLength=stepFromUTF8(cc, utf8Cnv, cnv,
- result, LENGTHOF(result),
+ result, UPRV_LENGTHOF(result),
step, &errorCode);
ok=checkFromUnicode(
cc, cnv, steps[i].utf8Name,
errorCode=U_ZERO_ERROR;
resultLength=ucnv_fromUChars(cnv,
- result, LENGTHOF(result),
+ result, UPRV_LENGTHOF(result),
cc.unicode, cc.unicodeLength,
&errorCode);
ok=checkFromUnicode(
msg=NULL;
errorCode=U_ZERO_ERROR;
- resultInvalidLength=LENGTHOF(resultInvalidUChars);
+ resultInvalidLength=UPRV_LENGTHOF(resultInvalidUChars);
ucnv_getInvalidUChars(cnv, resultInvalidUChars, &resultInvalidLength, &errorCode);
if(U_FAILURE(errorCode)) {
errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUChars() failed - %s",