/*
*******************************************************************************
*
-* Copyright (C) 2003-2008, International Business Machines
+* Copyright (C) 2003-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
#include "unicode/ustring.h"
#include "unicode/ures.h"
#include "convtest.h"
+#include "cmemory.h"
#include "unicode/tstdtmod.h"
#include <string.h>
#include <stdlib.h>
-#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
-
enum {
// characters used in test data for callbacks
SUB_CB='?',
ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
if (exec) logln("TestSuite ConversionTest: ");
switch (index) {
+#if !UCONFIG_NO_FILE_IO
case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break;
case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break;
case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break;
- case 3: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break;
+ case 3: name="TestDefaultIgnorableCallback"; if (exec) TestDefaultIgnorableCallback(); break;
+#else
+ case 0:
+ case 1:
+ case 2:
+ case 3: name="skip"; break;
+#endif
+ case 4: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break;
default: name=""; break; //needed to end loop
}
}
delete dataModule;
}
else {
- dataerrln("[DATA] Could not load test conversion data");
+ dataerrln("Could not load test conversion data");
}
}
// read a substitution string, separated by an equal sign
p=s.getBuffer()+index+1;
length=s.length()-(index+1);
- if(length<0 || length>=LENGTHOF(cc.subString)) {
+ if(length<0 || length>=UPRV_LENGTHOF(cc.subString)) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
} else {
u_memcpy(cc.subString, p, length);
delete dataModule;
}
else {
- dataerrln("[DATA] Could not load test conversion data");
+ dataerrln("Could not load test conversion data");
}
}
ParsePosition pos;
UnicodeSet cnvSet, mapSet, mapnotSet, diffSet;
UnicodeSet *cnvSetPtr = &cnvSet;
- UConverter *cnv;
+ LocalUConverterPointer cnv;
TestDataModule *dataModule;
TestData *testData;
logln("TestGetUnicodeSet[%d] %s", i, charset);
- cnv=cnv_open(charset, errorCode);
+ cnv.adoptInstead(cnv_open(charset, errorCode));
if(U_FAILURE(errorCode)) {
- errln("error opening \"%s\" for conversion/getUnicodeSet test case %d - %s",
+ errcheckln(errorCode, "error opening \"%s\" for conversion/getUnicodeSet test case %d - %s",
charset, i, u_errorName(errorCode));
errorCode=U_ZERO_ERROR;
continue;
}
- ucnv_getUnicodeSet(cnv, (USet *)cnvSetPtr, (UConverterUnicodeSet)which, &errorCode);
- ucnv_close(cnv);
+ ucnv_getUnicodeSet(cnv.getAlias(), cnvSetPtr->toUSet(), (UConverterUnicodeSet)which, &errorCode);
if(U_FAILURE(errorCode)) {
errln("error in ucnv_getUnicodeSet(\"%s\") for conversion/getUnicodeSet test case %d - %s",
if(!diffSet.isEmpty()) {
diffSet.toPattern(s, TRUE);
if(s.length()>100) {
- s.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
+ s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
}
errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - conversion/getUnicodeSet test case %d",
charset, i);
if(!diffSet.isEmpty()) {
diffSet.toPattern(s, TRUE);
if(s.length()>100) {
- s.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
+ s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
}
errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - conversion/getUnicodeSet test case %d",
charset, i);
delete dataModule;
}
else {
- dataerrln("[DATA] Could not load test conversion data");
+ dataerrln("Could not load test conversion data");
}
}
"ISO-2022-CN-EXT",
"LMBCS"
};
+ LocalUConverterPointer cnv;
char buffer[1024];
int32_t i;
- for(i=0; i<LENGTHOF(cnvNames); ++i) {
+ for(i=0; i<UPRV_LENGTHOF(cnvNames); ++i) {
UErrorCode errorCode=U_ZERO_ERROR;
- UConverter *cnv=cnv_open(cnvNames[i], errorCode);
+ cnv.adoptInstead(cnv_open(cnvNames[i], errorCode));
if(U_FAILURE(errorCode)) {
- errln("failed to open converter %s - %s", cnvNames[i], u_errorName(errorCode));
+ errcheckln(errorCode, "failed to open converter %s - %s", cnvNames[i], u_errorName(errorCode));
continue;
}
UnicodeSet expected;
- ucnv_setFromUCallBack(cnv, getUnicodeSetCallback, &expected, NULL, NULL, &errorCode);
+ ucnv_setFromUCallBack(cnv.getAlias(), getUnicodeSetCallback, &expected, NULL, NULL, &errorCode);
if(U_FAILURE(errorCode)) {
errln("failed to set the callback on converter %s - %s", cnvNames[i], u_errorName(errorCode));
- ucnv_close(cnv);
continue;
}
UConverterUnicodeSet which;
for(which=UCNV_ROUNDTRIP_SET; which<UCNV_SET_COUNT; which=(UConverterUnicodeSet)((int)which+1)) {
if(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
- ucnv_setFallback(cnv, TRUE);
+ ucnv_setFallback(cnv.getAlias(), TRUE);
}
expected.add(0, cpLimit-1);
s=s0;
do {
char *t=buffer;
flush=(UBool)(s==s0+s0Length);
- ucnv_fromUnicode(cnv, &t, buffer+sizeof(buffer), (const UChar **)&s, s0+s0Length, NULL, flush, &errorCode);
+ ucnv_fromUnicode(cnv.getAlias(), &t, buffer+sizeof(buffer), (const UChar **)&s, s0+s0Length, NULL, flush, &errorCode);
if(U_FAILURE(errorCode)) {
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
errorCode=U_ZERO_ERROR;
}
} while(!flush);
UnicodeSet set;
- ucnv_getUnicodeSet(cnv, (USet *)&set, which, &errorCode);
+ ucnv_getUnicodeSet(cnv.getAlias(), set.toUSet(), which, &errorCode);
if(cpLimit<0x110000) {
set.remove(cpLimit, 0x10ffff);
}
if(!diffSet.isEmpty()) {
diffSet.toPattern(out, TRUE);
if(out.length()>100) {
- out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
+ out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
}
errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - which set: %d",
cnvNames[i], which);
if(!diffSet.isEmpty()) {
diffSet.toPattern(out, TRUE);
if(out.length()>100) {
- out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
+ out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
}
errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - which set: %d",
cnvNames[i], which);
}
}
}
- ucnv_close(cnv);
}
delete [] s0;
}
+// Test all codepoints which has the default ignorable Unicode property are ignored if they have no mapping
+// If there are any failures, the hard coded list (IS_DEFAULT_IGNORABLE_CODE_POINT) in ucnv_err.c should be updated
+void
+ConversionTest::TestDefaultIgnorableCallback() {
+ UErrorCode status = U_ZERO_ERROR;
+ const char *cnv_name = "euc-jp-2007";
+ const char *pattern_ignorable = "[:Default_Ignorable_Code_Point:]";
+ const char *pattern_not_ignorable = "[:^Default_Ignorable_Code_Point:]";
+
+ UnicodeSet *set_ignorable = new UnicodeSet(pattern_ignorable, status);
+ if (U_FAILURE(status)) {
+ dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_ignorable, u_errorName(status));
+ return;
+ }
+
+ UnicodeSet *set_not_ignorable = new UnicodeSet(pattern_not_ignorable, status);
+ if (U_FAILURE(status)) {
+ dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_not_ignorable, u_errorName(status));
+ return;
+ }
+
+ UConverter *cnv = cnv_open(cnv_name, status);
+ if (U_FAILURE(status)) {
+ dataerrln("Unable to open converter: %s - %s\n", cnv_name, u_errorName(status));
+ return;
+ }
+
+ // set callback for the converter
+ ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &status);
+
+ UChar32 input[1];
+ char output[10];
+ int32_t outputLength;
+
+ // test default ignorables are ignored
+ int size = set_ignorable->size();
+ for (int i = 0; i < size; i++) {
+ status = U_ZERO_ERROR;
+ outputLength= 0;
+
+ input[0] = set_ignorable->charAt(i);
+
+ outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
+ if (U_FAILURE(status) || outputLength != 0) {
+ errln("Ignorable code point: U+%04X not skipped as expected - %s", input[0], u_errorName(status));
+ }
+ }
+
+ // test non-ignorables are not ignored
+ size = set_not_ignorable->size();
+ for (int i = 0; i < size; i++) {
+ status = U_ZERO_ERROR;
+ outputLength= 0;
+
+ input[0] = set_not_ignorable->charAt(i);
+
+ if (input[0] == 0) {
+ continue;
+ }
+
+ outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
+ if (U_FAILURE(status) || outputLength <= 0) {
+ errln("Non-ignorable code point: U+%04X skipped unexpectedly - %s", input[0], u_errorName(status));
+ }
+ }
+
+ ucnv_close(cnv);
+ delete set_not_ignorable;
+ delete set_ignorable;
+}
+
// open testdata or ICU data converter ------------------------------------- ***
UConverter *
ConversionTest::cnv_open(const char *name, UErrorCode &errorCode) {
+ if(name!=NULL && *name=='+') {
+ // Converter names that start with '+' are ignored in ICU4J tests.
+ ++name;
+ }
if(name!=NULL && *name=='*') {
/* loadTestData(): set the data directory */
return ucnv_openPackage(loadTestData(errorCode), name+1, &errorCode);
UBool
ConversionTest::ToUnicodeCase(ConversionCase &cc, UConverterToUCallback callback, const char *option) {
- UConverter *cnv;
- UErrorCode errorCode;
-
// open the converter
- errorCode=U_ZERO_ERROR;
- cnv=cnv_open(cc.charset, errorCode);
- if(U_FAILURE(errorCode)) {
- errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
- cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
+ IcuTestErrorCode errorCode(*this, "ToUnicodeCase");
+ LocalUConverterPointer cnv(cnv_open(cc.charset, errorCode));
+ if(errorCode.isFailure()) {
+ errcheckln(errorCode, "toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
+ cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, errorCode.errorName());
+ errorCode.reset();
return FALSE;
}
// set the callback
if(callback!=NULL) {
- ucnv_setToUCallBack(cnv, callback, option, NULL, NULL, &errorCode);
+ ucnv_setToUCallBack(cnv.getAlias(), callback, option, NULL, NULL, errorCode);
if(U_FAILURE(errorCode)) {
errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setToUCallBack() failed - %s",
cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
- ucnv_close(cnv);
return FALSE;
}
}
int32_t i, step;
ok=TRUE;
- for(i=0; i<LENGTHOF(steps) && ok; ++i) {
+ for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) {
step=steps[i].step;
if(step<0 && !cc.finalFlush) {
// skip ucnv_getNextUChar() if !finalFlush because
cc.offsets=NULL;
}
else {
- memset(resultOffsets, -1, LENGTHOF(resultOffsets));
+ memset(resultOffsets, -1, UPRV_LENGTHOF(resultOffsets));
}
- memset(result, -1, LENGTHOF(result));
- errorCode=U_ZERO_ERROR;
- resultLength=stepToUnicode(cc, cnv,
- result, LENGTHOF(result),
+ memset(result, -1, UPRV_LENGTHOF(result));
+ errorCode.reset();
+ resultLength=stepToUnicode(cc, cnv.getAlias(),
+ result, UPRV_LENGTHOF(result),
step==0 ? resultOffsets : NULL,
- step, &errorCode);
+ step, errorCode);
ok=checkToUnicode(
- cc, cnv, steps[i].name,
+ cc, cnv.getAlias(), steps[i].name,
result, resultLength,
cc.offsets!=NULL ? resultOffsets : NULL,
errorCode);
- if(U_FAILURE(errorCode) || !cc.finalFlush) {
+ if(errorCode.isFailure() || !cc.finalFlush) {
// reset if an error occurred or we did not flush
// otherwise do nothing to make sure that flushing resets
- ucnv_resetToUnicode(cnv);
+ ucnv_resetToUnicode(cnv.getAlias());
}
- if (resultOffsets[resultLength] != -1) {
+ if (cc.offsets != NULL && resultOffsets[resultLength] != -1) {
errln("toUnicode[%d](%s) Conversion wrote too much to offsets at index %d",
cc.caseNr, cc.charset, resultLength);
}
// test ucnv_toUChars()
memset(result, 0, sizeof(result));
- errorCode=U_ZERO_ERROR;
- resultLength=ucnv_toUChars(cnv,
- result, LENGTHOF(result),
+ errorCode.reset();
+ resultLength=ucnv_toUChars(cnv.getAlias(),
+ result, UPRV_LENGTHOF(result),
(const char *)cc.bytes, cc.bytesLength,
- &errorCode);
+ errorCode);
ok=checkToUnicode(
- cc, cnv, "toUChars",
+ cc, cnv.getAlias(), "toUChars",
result, resultLength,
NULL,
errorCode);
// test preflighting
// keep the correct result for simple checking
- errorCode=U_ZERO_ERROR;
- resultLength=ucnv_toUChars(cnv,
+ errorCode.reset();
+ resultLength=ucnv_toUChars(cnv.getAlias(),
NULL, 0,
(const char *)cc.bytes, cc.bytesLength,
- &errorCode);
- if(errorCode==U_STRING_NOT_TERMINATED_WARNING || errorCode==U_BUFFER_OVERFLOW_ERROR) {
- errorCode=U_ZERO_ERROR;
+ errorCode);
+ if(errorCode.get()==U_STRING_NOT_TERMINATED_WARNING || errorCode.get()==U_BUFFER_OVERFLOW_ERROR) {
+ errorCode.reset();
}
ok=checkToUnicode(
- cc, cnv, "preflight toUChars",
+ cc, cnv.getAlias(), "preflight toUChars",
result, resultLength,
NULL,
errorCode);
break;
}
- ucnv_close(cnv);
+ errorCode.reset(); // all errors have already been reported
return ok;
}
targetLimit=resultLimit;
flush=cc.finalFlush;
- pivotLimit=pivotBuffer+LENGTHOF(pivotBuffer);
+ pivotLimit=pivotBuffer+UPRV_LENGTHOF(pivotBuffer);
} else {
// start with empty partial buffers
sourceLimit=source;
errorCode=U_ZERO_ERROR;
cnv=cnv_open(cc.charset, errorCode);
if(U_FAILURE(errorCode)) {
- errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
+ errcheckln(errorCode, "fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
return FALSE;
}
// convert unicode to utf8
char utf8[256];
cc.utf8=utf8;
- u_strToUTF8(utf8, LENGTHOF(utf8), &cc.utf8Length,
+ u_strToUTF8(utf8, UPRV_LENGTHOF(utf8), &cc.utf8Length,
cc.unicode, cc.unicodeLength,
&errorCode);
if(U_FAILURE(errorCode)) {
int32_t i, step;
ok=TRUE;
- for(i=0; i<LENGTHOF(steps) && ok; ++i) {
+ for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) {
step=steps[i].step;
- memset(resultOffsets, -1, LENGTHOF(resultOffsets));
- memset(result, -1, LENGTHOF(result));
+ memset(resultOffsets, -1, UPRV_LENGTHOF(resultOffsets));
+ memset(result, -1, UPRV_LENGTHOF(result));
errorCode=U_ZERO_ERROR;
resultLength=stepFromUnicode(cc, cnv,
- result, LENGTHOF(result),
+ result, UPRV_LENGTHOF(result),
step==0 ? resultOffsets : NULL,
step, &errorCode);
ok=checkFromUnicode(
if(cc.utf8Length>=0) {
errorCode=U_ZERO_ERROR;
resultLength=stepFromUTF8(cc, utf8Cnv, cnv,
- result, LENGTHOF(result),
+ result, UPRV_LENGTHOF(result),
step, &errorCode);
ok=checkFromUnicode(
cc, cnv, steps[i].utf8Name,
errorCode=U_ZERO_ERROR;
resultLength=ucnv_fromUChars(cnv,
- result, LENGTHOF(result),
+ result, UPRV_LENGTHOF(result),
cc.unicode, cc.unicodeLength,
&errorCode);
ok=checkFromUnicode(
msg=NULL;
errorCode=U_ZERO_ERROR;
- resultInvalidLength=LENGTHOF(resultInvalidUChars);
+ resultInvalidLength=UPRV_LENGTHOF(resultInvalidUChars);
ucnv_getInvalidUChars(cnv, resultInvalidUChars, &resultInvalidLength, &errorCode);
if(U_FAILURE(errorCode)) {
errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUChars() failed - %s",